X-Git-Url: http://gitweb.michael.orlitzky.com/?p=octave.git;a=blobdiff_plain;f=optimization%2Fpreconditioned_conjugate_gradient_method.m;h=dec2eeed97418094671d0db34e327e1351820415;hp=af70af5f14e90b819f7ef2c6b1dc980877a62d7e;hb=62955524e317c9b63006ca41da8e58647d15f632;hpb=af2083885af78b1290c21f2852c6fdba25820918 diff --git a/optimization/preconditioned_conjugate_gradient_method.m b/optimization/preconditioned_conjugate_gradient_method.m index af70af5..dec2eee 100644 --- a/optimization/preconditioned_conjugate_gradient_method.m +++ b/optimization/preconditioned_conjugate_gradient_method.m @@ -1,8 +1,8 @@ -function [x, k] = preconditioned_conjugate_gradient_method(Q, - M, - b, - x0, - tolerance, +function [x, k] = preconditioned_conjugate_gradient_method(Q, ... + M, ... + b, ... + x0, ... + tolerance, ... max_iterations) % % Solve, @@ -13,8 +13,9 @@ function [x, k] = preconditioned_conjugate_gradient_method(Q, % % min [phi(x) = (1/2)* + ] % - % using the preconditioned conjugate gradient method (14.54 in - % Guler). + % using the preconditioned conjugate gradient method (14.56 in + % Guler). If ``M`` is the identity matrix, we use the slightly + % faster implementation in conjugate_gradient_method.m. % % INPUT: % @@ -23,7 +24,8 @@ function [x, k] = preconditioned_conjugate_gradient_method(Q, % % - ``M`` -- The preconditioning matrix. If the actual matrix used % to precondition ``Q`` is called ``C``, i.e. ``C^(-1) * Q * - % C^(-T) == \bar{Q}``, then M=CC^T. + % C^(-T) == \bar{Q}``, then M=CC^T. However the matrix ``C`` is + % never itself needed. This is explained in Guler, section 14.9. % % - ``b`` -- The right-hand-side of the system to solve. % @@ -37,7 +39,7 @@ function [x, k] = preconditioned_conjugate_gradient_method(Q, % % OUTPUT: % - % - ``x`` - The solution to Qx=b. + % - ``x`` - The computed solution to Qx=b. % % - ``k`` - The ending value of k; that is, the number of % iterations that were performed. @@ -46,21 +48,80 @@ function [x, k] = preconditioned_conjugate_gradient_method(Q, % % All vectors are assumed to be *column* vectors. % + % The cited algorithm contains a typo; in "The Preconditioned + % Conjugate-Gradient Method", we are supposed to define + % d_{0} = -z_{0}, not -r_{0} as written. + % + % The rather verbose name of this function was chosen to avoid + % conflicts with other implementations. + % % REFERENCES: % % 1. Guler, Osman. Foundations of Optimization. New York, Springer, - % 2010. + % 2010. + % + % 2. Shewchuk, Jonathan Richard. An Introduction to the Conjugate + % Gradient Method Without the Agonizing Pain, Edition 1.25. + % August 4, 1994. % - Ct = chol(M); - C = Ct'; - C_inv = inv(C); - Ct_inv = inv(Ct); + % We use this in the inner loop. + sqrt_n = floor(sqrt(length(x0))); + + % Set k=0 first, that way the references to xk,rk,zk,dk which + % immediately follow correspond (semantically) to x0,r0,z0,d0. + k = 0; + + xk = x0; + rk = Q*xk - b; + zk = M \ rk; + dk = -zk; + + while (k <= max_iterations && norm(rk, 'inf') > tolerance) + % Used twice, avoid recomputation. + rkzk = rk' * zk; + + % The term alpha_k*dk appears twice, but so does Q*dk. We can't + % do them both, so we precompute the more expensive operation. + Qdk = Q * dk; + + % We're going to divide by this quantity... + dkQdk = dk' * Qdk; + + % So if it's too close to zero, we replace it with something + % comparable but non-zero. + if (dkQdk < eps) + dkQdk = eps; + end + + alpha_k = rkzk/dkQdk; + x_next = xk + (alpha_k * dk); + + % The recursive definition of r_next is prone to accumulate + % roundoff error. When sqrt(n) divides k, we recompute the + % residual to minimize this error. This modification was suggested + % by the second reference. + if (mod(k, sqrt_n) == 0) + r_next = Q*x_next - b; + else + r_next = rk + (alpha_k * Qdk); + end + + z_next = M \ r_next; + beta_next = (r_next' * z_next)/rkzk; + d_next = -z_next + beta_next*dk; - Q_bar = C_inv * Q * Ct_inv; - b_bar = C_inv * b; + % We potentially just performed one more iteration than necessary + % in order to simplify the loop. Note that due to the structure of + % our loop, we will have k > max_iterations when we fail to + % converge. + k = k + 1; + xk = x_next; + rk = r_next; + zk = z_next; + dk = d_next; + end - % The solution to Q_bar*x_bar == b_bar is x_bar = Ct*x. - [x_bar, k] = conjugate_gradient_method(Q_bar, b_bar, x0, tolerance, max_iterations); - x = Ct_inv * x_bar; + % If we make it here, one of the two stopping conditions was met. + x = xk; end