]> gitweb.michael.orlitzky.com - octave.git/blob - optimization/preconditioned_conjugate_gradient_method.m
Add the cholesky_inf() function.
[octave.git] / optimization / preconditioned_conjugate_gradient_method.m
1 function [x, k] = preconditioned_conjugate_gradient_method(Q, ...
2 M, ...
3 b, ...
4 x0, ...
5 tolerance, ...
6 max_iterations)
7 %
8 % Solve,
9 %
10 % Qx = b
11 %
12 % or equivalently,
13 %
14 % min [phi(x) = (1/2)*<Qx,x> + <b,x>]
15 %
16 % using the preconditioned conjugate gradient method (14.56 in
17 % Guler).
18 %
19 % INPUT:
20 %
21 % - ``Q`` -- The coefficient matrix of the system to solve. Must
22 % be positive definite.
23 %
24 % - ``M`` -- The preconditioning matrix. If the actual matrix used
25 % to precondition ``Q`` is called ``C``, i.e. ``C^(-1) * Q *
26 % C^(-T) == \bar{Q}``, then M=CC^T. However the matrix ``C`` is
27 % never itself needed. This is explained in Guler, section 14.9.
28 %
29 % - ``b`` -- The right-hand-side of the system to solve.
30 %
31 % - ``x0`` -- The starting point for the search.
32 %
33 % - ``tolerance`` -- How close ``Qx`` has to be to ``b`` (in
34 % magnitude) before we stop.
35 %
36 % - ``max_iterations`` -- The maximum number of iterations to
37 % perform.
38 %
39 % OUTPUT:
40 %
41 % - ``x`` - The computed solution to Qx=b.
42 %
43 % - ``k`` - The ending value of k; that is, the number of
44 % iterations that were performed.
45 %
46 % NOTES:
47 %
48 % All vectors are assumed to be *column* vectors.
49 %
50 % The cited algorithm contains a typo; in "The Preconditioned
51 % Conjugate-Gradient Method", we are supposed to define
52 % d_{0} = -z_{0}, not -r_{0} as written.
53 %
54 % The rather verbose name of this function was chosen to avoid
55 % conflicts with other implementations.
56 %
57 % REFERENCES:
58 %
59 % 1. Guler, Osman. Foundations of Optimization. New York, Springer,
60 % 2010.
61 %
62 % 2. Shewchuk, Jonathan Richard. An Introduction to the Conjugate
63 % Gradient Method Without the Agonizing Pain, Edition 1.25.
64 % August 4, 1994.
65 %
66
67 % We use this in the inner loop.
68 n = length(x0);
69 sqrt_n = floor(sqrt(n));
70
71 % Set k=0 first, that way the references to xk,rk,zk,dk which
72 % immediately follow correspond (semantically) to x0,r0,z0,d0.
73 k = 0;
74
75 xk = x0;
76 rk = Q*xk - b;
77 zk = M \ rk;
78 dk = -zk;
79
80 while (k <= max_iterations && norm(rk, 'inf') > tolerance)
81 % Used twice, avoid recomputation.
82 rkzk = rk' * zk;
83
84 % The term alpha_k*dk appears twice, but so does Q*dk. We can't
85 % do them both, so we precompute the more expensive operation.
86 Qdk = Q * dk;
87
88 % We're going to divide by this quantity...
89 dkQdk = dk' * Qdk;
90
91 % So if it's too close to zero, we replace it with something
92 % comparable but non-zero.
93 if (dkQdk < eps)
94 dkQdk = eps;
95 end
96
97 alpha_k = rkzk/dkQdk;
98 x_next = xk + (alpha_k * dk);
99
100 % The recursive definition of r_next is prone to accumulate
101 % roundoff error. When sqrt(n) divides k, we recompute the
102 % residual to minimize this error. This modification was suggested
103 % by the second reference.
104 if (mod(k, sqrt_n) == 0)
105 r_next = Q*x_next - b;
106 else
107 r_next = rk + (alpha_k * Qdk);
108 end
109
110 z_next = M \ r_next;
111 beta_next = (r_next' * z_next)/rkzk;
112 d_next = -z_next + beta_next*dk;
113
114 % We potentially just performed one more iteration than necessary
115 % in order to simplify the loop. Note that due to the structure of
116 % our loop, we will have k > max_iterations when we fail to
117 % converge.
118 k = k + 1;
119 xk = x_next;
120 rk = r_next;
121 zk = z_next;
122 dk = d_next;
123 end
124
125 % If we make it here, one of the two stopping conditions was met.
126 x = xk;
127 end