Use the infinity norm in the PCGM tests.

[octave.git] / optimization / preconditioned_conjugate_gradient_method.m
diff --git a/optimization/preconditioned_conjugate_gradient_method.m b/optimization/preconditioned_conjugate_gradient_method.m

index 14421948a0f23f70de9603d049e7e88890bf1572..cd886084a9766e1d785df59185c358ae69d75cc7 100644 (file)
--- a/optimization/preconditioned_conjugate_gradient_method.m
+++ b/optimization/preconditioned_conjugate_gradient_method.m
@@ -1,17 +1,17 @@
-function [x, k] = preconditioned_conjugate_gradient_method(A,
-                                                          M,
-                                                          b,
-                                                          x0,
-                                                          tolerance,
+function [x, k] = preconditioned_conjugate_gradient_method(Q, ...
+                                                          M, ...
+                                                          b, ...
+                                                          x0, ...
+                                                          tolerance, ...
                                                            max_iterations)
    %
    % Solve,
    %
-  %   Ax = b
+  %   Qx = b
    %
    % or equivalently,
    %
-  %   min [phi(x) = (1/2)*<Ax,x> + <b,x>]
+  %   min [phi(x) = (1/2)*<Qx,x> + <b,x>]
    %
    % using the preconditioned conjugate gradient method (14.56 in
    % Guler). If ``M`` is the identity matrix, we use the slightly
@@ -19,11 +19,11 @@ function [x, k] = preconditioned_conjugate_gradient_method(A,
    %
    % INPUT:
    %
-  %   - ``A`` -- The coefficient matrix of the system to solve. Must
+  %   - ``Q`` -- The coefficient matrix of the system to solve. Must
    %     be positive definite.
    %
    %   - ``M`` -- The preconditioning matrix. If the actual matrix used
-  %     to precondition ``A`` is called ``C``, i.e. ``C^(-1) * Q *
+  %     to precondition ``Q`` is called ``C``, i.e. ``C^(-1) * Q *
    %     C^(-T) == \bar{Q}``, then M=CC^T. However the matrix ``C`` is
    %     never itself needed. This is explained in Guler, section 14.9.
    %
@@ -31,7 +31,7 @@ function [x, k] = preconditioned_conjugate_gradient_method(A,
    %
    %   - ``x0`` -- The starting point for the search.
    %
-  %   - ``tolerance`` -- How close ``Ax`` has to be to ``b`` (in
+  %   - ``tolerance`` -- How close ``Qx`` has to be to ``b`` (in
    %     magnitude) before we stop.
    %
    %   - ``max_iterations`` -- The maximum number of iterations to
@@ -39,7 +39,7 @@ function [x, k] = preconditioned_conjugate_gradient_method(A,
    %
    % OUTPUT:
    %
-  %   - ``x`` - The solution to Ax=b.
+  %   - ``x`` - The computed solution to Qx=b.
    %
    %   - ``k`` - The ending value of k; that is, the number of
    %   iterations that were performed.
@@ -52,45 +52,76 @@ function [x, k] = preconditioned_conjugate_gradient_method(A,
    % Conjugate-Gradient Method", we are supposed to define
    % d_{0} = -z_{0}, not -r_{0} as written.
    %
+  % The rather verbose name of this function was chosen to avoid
+  % conflicts with other implementations.
+  %
    % REFERENCES:
    %
    %   1. Guler, Osman. Foundations of Optimization. New York, Springer,
-  %   2010.
+  %      2010.
+  %
+  %   2. Shewchuk, Jonathan Richard. An Introduction to the Conjugate
+  %      Gradient Method Without the Agonizing Pain, Edition 1.25.
+  %      August 4, 1994.
    %
-  n = length(x0);
-
-  if (isequal(M, eye(n)))
-    [x, k] = conjugate_gradient_method(A, b, x0, tolerance, max_iterations);
-    return;
-  end
  
-  zero_vector = zeros(n, 1);
+  % We use this in the inner loop.
+  sqrt_n = floor(sqrt(length(x0)));
  
+  % Set k=0 first, that way the references to xk,rk,zk,dk which
+  % immediately follow correspond (semantically) to x0,r0,z0,d0.
    k = 0;
-  x = x0; % Eschew the 'k' suffix on 'x' for simplicity.
-  rk = A*x - b; % The first residual must be computed the hard way.
+
+  xk = x0;
+  rk = Q*xk - b;
    zk = M \ rk;
    dk = -zk;
  
-  for k = [ 0 : max_iterations ]
-    if (norm(rk) < tolerance)
-       % Success.
-       return;
+  while (k <= max_iterations && norm(rk, 'inf') > tolerance)
+    % Used twice, avoid recomputation.
+    rkzk = rk' * zk;
+
+    % The term alpha_k*dk appears twice, but so does Q*dk. We can't
+    % do them both, so we precompute the more expensive operation.
+    Qdk = Q * dk;
+
+    % We're going to divide by this quantity...
+    dkQdk = dk' * Qdk;
+
+    % So if it's too close to zero, we replace it with something
+    % comparable but non-zero.
+    if (abs(dkQdk) < eps)
+      dkQdk = sign(dkQdk)*eps;
+    end
+
+    alpha_k = rkzk/dkQdk;
+    x_next = xk + (alpha_k * dk);
+
+    % The recursive definition of r_next is prone to accumulate
+    % roundoff error. When sqrt(n) divides k, we recompute the
+    % residual to minimize this error. This modification was suggested
+    % by the second reference.
+    if (mod(k, sqrt_n) == 0)
+      r_next = Q*x_next - b;
+    else
+      r_next = rk + (alpha_k * Qdk);
      end
  
-    % Unfortunately, since we don't know the matrix ``C``, it isn't
-    % easy to compute alpha_k with an existing step size function.
-    alpha_k = (rk' * zk)/(dk' * A * dk);
-    x_next = x + alpha_k*dk;
-    r_next = rk + alpha_k*A*dk;
      z_next = M \ r_next;
-    beta_next = (r_next' * z_next)/(rk' * zk);
+    beta_next = (r_next' * z_next)/rkzk;
      d_next = -z_next + beta_next*dk;
  
+    % We potentially just performed one more iteration than necessary
+    % in order to simplify the loop. Note that due to the structure of
+    % our loop, we will have k > max_iterations when we fail to
+    % converge.
      k = k + 1;
-    x = x_next;
+    xk = x_next;
      rk = r_next;
      zk = z_next;
      dk = d_next;
    end
+
+  % If we make it here, one of the two stopping conditions was met.
+  x = xk;
  end