Add a second reference for the PCGM and make it more resistant to accumulated roundof...

[octave.git] / optimization / preconditioned_conjugate_gradient_method.m
diff --git a/optimization/preconditioned_conjugate_gradient_method.m b/optimization/preconditioned_conjugate_gradient_method.m

index af70af5f14e90b819f7ef2c6b1dc980877a62d7e..4e68ccb583e7fab2bc8cf9f34ce170eeab7fe3a2 100644 (file)
--- a/optimization/preconditioned_conjugate_gradient_method.m
+++ b/optimization/preconditioned_conjugate_gradient_method.m
@@ -1,8 +1,8 @@
-function [x, k] = preconditioned_conjugate_gradient_method(Q,
-                                                          M,
-                                                          b,
-                                                          x0,
-                                                          tolerance,
+function [x, k] = preconditioned_conjugate_gradient_method(Q, ...
+                                                          M, ...
+                                                          b, ...
+                                                          x0, ...
+                                                          tolerance, ...
                                                            max_iterations)
    %
    % Solve,
@@ -13,8 +13,9 @@ function [x, k] = preconditioned_conjugate_gradient_method(Q,
    %
    %   min [phi(x) = (1/2)*<Qx,x> + <b,x>]
    %
-  % using the preconditioned conjugate gradient method (14.54 in
-  % Guler).
+  % using the preconditioned conjugate gradient method (14.56 in
+  % Guler). If ``M`` is the identity matrix, we use the slightly
+  % faster implementation in conjugate_gradient_method.m.
    %
    % INPUT:
    %
@@ -23,7 +24,8 @@ function [x, k] = preconditioned_conjugate_gradient_method(Q,
    %
    %   - ``M`` -- The preconditioning matrix. If the actual matrix used
    %     to precondition ``Q`` is called ``C``, i.e. ``C^(-1) * Q *
-  %     C^(-T) == \bar{Q}``, then M=CC^T.
+  %     C^(-T) == \bar{Q}``, then M=CC^T. However the matrix ``C`` is
+  %     never itself needed. This is explained in Guler, section 14.9.
    %
    %   - ``b`` -- The right-hand-side of the system to solve.
    %
@@ -37,7 +39,7 @@ function [x, k] = preconditioned_conjugate_gradient_method(Q,
    %
    % OUTPUT:
    %
-  %   - ``x`` - The solution to Qx=b.
+  %   - ``x`` - The computed solution to Qx=b.
    %
    %   - ``k`` - The ending value of k; that is, the number of
    %   iterations that were performed.
@@ -46,21 +48,75 @@ function [x, k] = preconditioned_conjugate_gradient_method(Q,
    %
    % All vectors are assumed to be *column* vectors.
    %
+  % The cited algorithm contains a typo; in "The Preconditioned
+  % Conjugate-Gradient Method", we are supposed to define
+  % d_{0} = -z_{0}, not -r_{0} as written.
+  %
+  % The rather verbose name of this function was chosen to avoid
+  % conflicts with other implementations.
+  %
    % REFERENCES:
    %
    %   1. Guler, Osman. Foundations of Optimization. New York, Springer,
-  %   2010.
+  %      2010.
+  %
+  %   2. Shewchuk, Jonathan Richard. An Introduction to the Conjugate
+  %      Gradient Method Without the Agonizing Pain, Edition 1.25.
+  %      August 4, 1994.
    %
  
-  Ct = chol(M);
-  C = Ct';
-  C_inv = inv(C);
-  Ct_inv = inv(Ct);
+  % We use this in the inner loop.
+  sqrt_n = floor(sqrt(length(x0)));
+
+  % Set k=0 first, that way the references to xk,rk,zk,dk which
+  % immediately follow correspond (semantically) to x0,r0,z0,d0.
+  k = 0;
+
+  xk = x0;
+  rk = Q*xk - b;
+  zk = M \ rk;
+  dk = -zk;
+
+  for k = [ 0 : max_iterations ]
+
+    if (norm(rk) < tolerance)
+      % Check our stopping condition. This should catch the k=0 case.
+      x = xk;
+      return;
+    end
+
+    % Used twice, avoid recomputation.
+    rkzk = rk' * zk;
+
+    % The term alpha_k*dk appears twice, but so does Q*dk. We can't
+    % do them both, so we precompute the more expensive operation.
+    Qdk = Q * dk;
+
+    alpha_k = rkzk/(dk' * Qdk);
+    x_next = xk + (alpha_k * dk);
+
+    % The recursive definition of r_next is prone to accumulate
+    % roundoff error. When sqrt(n) divides k, we recompute the
+    % residual to minimize this error. This modification is due to the
+    % second reference.
+    if (mod(k, sqrt_n) == 0)
+      r_next = Q*x_next - b;
+    else
+      r_next = rk + (alpha_k * Qdk);
+    end
+
+    z_next = M \ r_next;
+    beta_next = (r_next' * z_next)/rkzk;
+    d_next = -z_next + beta_next*dk;
  
-  Q_bar = C_inv * Q * Ct_inv;
-  b_bar = C_inv * b;
+    k = k + 1;
+    xk = x_next;
+    rk = r_next;
+    zk = z_next;
+    dk = d_next;
+  end
  
-  % The solution to Q_bar*x_bar == b_bar is x_bar = Ct*x.
-  [x_bar, k] = conjugate_gradient_method(Q_bar, b_bar, x0, tolerance, max_iterations);
-  x = Ct_inv * x_bar;
+  % The algorithm didn't converge, but we still want to return the
+  % terminal value of xk.
+  x = xk;
  end