-function [x, k] = preconditioned_conjugate_gradient_method(Q,
- M,
- b,
- x0,
- tolerance,
+function [x, k] = preconditioned_conjugate_gradient_method(Q, ...
+ M, ...
+ b, ...
+ x0, ...
+ tolerance, ...
max_iterations)
%
% Solve,
%
% min [phi(x) = (1/2)*<Qx,x> + <b,x>]
%
- % using the preconditioned conjugate gradient method (14.54 in
+ % using the preconditioned conjugate gradient method (14.56 in
% Guler).
%
% INPUT:
%
% - ``M`` -- The preconditioning matrix. If the actual matrix used
% to precondition ``Q`` is called ``C``, i.e. ``C^(-1) * Q *
- % C^(-T) == \bar{Q}``, then M=CC^T.
+ % C^(-T) == \bar{Q}``, then M=CC^T. However the matrix ``C`` is
+ % never itself needed. This is explained in Guler, section 14.9.
%
% - ``b`` -- The right-hand-side of the system to solve.
%
%
% OUTPUT:
%
- % - ``x`` - The solution to Qx=b.
+ % - ``x`` - The computed solution to Qx=b.
%
% - ``k`` - The ending value of k; that is, the number of
% iterations that were performed.
%
% All vectors are assumed to be *column* vectors.
%
+ % The cited algorithm contains a typo; in "The Preconditioned
+ % Conjugate-Gradient Method", we are supposed to define
+ % d_{0} = -z_{0}, not -r_{0} as written.
+ %
+ % The rather verbose name of this function was chosen to avoid
+ % conflicts with other implementations.
+ %
% REFERENCES:
%
% 1. Guler, Osman. Foundations of Optimization. New York, Springer,
- % 2010.
+ % 2010.
+ %
+ % 2. Shewchuk, Jonathan Richard. An Introduction to the Conjugate
+ % Gradient Method Without the Agonizing Pain, Edition 1.25.
+ % August 4, 1994.
%
- Ct = chol(M);
- C = Ct';
- C_inv = inv(C);
- Ct_inv = inv(Ct);
+ % We use this in the inner loop.
+ n = length(x0);
+ sqrt_n = floor(sqrt(n));
+
+ % Set k=0 first, that way the references to xk,rk,zk,dk which
+ % immediately follow correspond (semantically) to x0,r0,z0,d0.
+ k = 0;
+
+ xk = x0;
+ rk = Q*xk - b;
+ zk = M \ rk;
+ dk = -zk;
+
+ while (k <= max_iterations && norm(rk, 'inf') > tolerance)
+ % Used twice, avoid recomputation.
+ rkzk = rk' * zk;
+
+ % The term alpha_k*dk appears twice, but so does Q*dk. We can't
+ % do them both, so we precompute the more expensive operation.
+ Qdk = Q * dk;
+
+ % We're going to divide by this quantity...
+ dkQdk = dk' * Qdk;
+
+ % So if it's too close to zero, we replace it with something
+ % comparable but non-zero.
+ if (dkQdk < eps)
+ dkQdk = eps;
+ end
+
+ alpha_k = rkzk/dkQdk;
+ x_next = xk + (alpha_k * dk);
+
+ % The recursive definition of r_next is prone to accumulate
+ % roundoff error. When sqrt(n) divides k, we recompute the
+ % residual to minimize this error. This modification was suggested
+ % by the second reference.
+ if (mod(k, sqrt_n) == 0)
+ r_next = Q*x_next - b;
+ else
+ r_next = rk + (alpha_k * Qdk);
+ end
+
+ z_next = M \ r_next;
+ beta_next = (r_next' * z_next)/rkzk;
+ d_next = -z_next + beta_next*dk;
- Q_bar = C_inv * Q * Ct_inv;
- b_bar = C_inv * b;
+ % We potentially just performed one more iteration than necessary
+ % in order to simplify the loop. Note that due to the structure of
+ % our loop, we will have k > max_iterations when we fail to
+ % converge.
+ k = k + 1;
+ xk = x_next;
+ rk = r_next;
+ zk = z_next;
+ dk = d_next;
+ end
- % The solution to Q_bar*x_bar == b_bar is x_bar = Ct*x.
- [x_bar, k] = conjugate_gradient_method(Q_bar, b_bar, x0, tolerance, max_iterations);
- x = Ct_inv * x_bar;
+ % If we make it here, one of the two stopping conditions was met.
+ x = xk;
end