]> gitweb.michael.orlitzky.com - sage.d.git/blob - mjo/ldlt.py
mjo/ldlt.py: delete naive implementations; fix tests.
[sage.d.git] / mjo / ldlt.py
1 from sage.all import *
2
3 def is_positive_semidefinite_naive(A):
4 r"""
5 A naive positive-semidefinite check that tests the eigenvalues for
6 nonnegativity. We follow the sage convention that positive
7 (semi)definite matrices must be symmetric or Hermitian.
8
9 SETUP::
10
11 sage: from mjo.ldlt import is_positive_semidefinite_naive
12
13 TESTS:
14
15 The trivial matrix is vaciously positive-semidefinite::
16
17 sage: A = matrix(QQ, 0)
18 sage: A
19 []
20 sage: is_positive_semidefinite_naive(A)
21 True
22
23 """
24 if A.nrows() == 0:
25 return True # vacuously
26 return A.is_hermitian() and all( v >= 0 for v in A.eigenvalues() )
27
28
29 def block_ldlt(A):
30 r"""
31 Perform a block-`LDL^{T}` factorization of the Hermitian
32 matrix `A`.
33
34 The standard `LDL^{T}` factorization of a positive-definite matrix
35 `A` factors it as `A = LDL^{T}` where `L` is unit-lower-triangular
36 and `D` is diagonal. If one allows row/column swaps via a
37 permutation matrix `P`, then this factorization can be extended to
38 some positive-semidefinite matrices `A` via the factorization
39 `P^{T}AP = LDL^{T}` that places the zeros at the bottom of `D` to
40 avoid division by zero. These factorizations extend easily to
41 complex Hermitian matrices when one replaces the transpose by the
42 conjugate-transpose.
43
44 However, we can go one step further. If, in addition, we allow `D`
45 to potentially contain `2 \times 2` blocks on its diagonal, then
46 every real or complex Hermitian matrix `A` can be factored as `A =
47 PLDL^{*}P^{T}`. When the row/column swaps are made intelligently,
48 this process is numerically stable over inexact rings like ``RDF``.
49 Bunch and Kaufman describe such a "pivot" scheme that is suitable
50 for the solution of Hermitian systems, and that is how we choose
51 our row and column swaps.
52
53 OUTPUT:
54
55 If the input matrix is Hermitian, we return a triple `(P,L,D)`
56 such that `A = PLDL^{*}P^{T}` and
57
58 * `P` is a permutation matrix,
59 * `L` is unit lower-triangular,
60 * `D` is a block-diagonal matrix whose blocks are of size
61 one or two.
62
63 If the input matrix is not Hermitian, the output from this function
64 is undefined.
65
66 SETUP::
67
68 sage: from mjo.ldlt import block_ldlt
69
70 EXAMPLES:
71
72 This three-by-three real symmetric matrix has one positive, one
73 negative, and one zero eigenvalue -- so it is not any flavor of
74 (semi)definite, yet we can still factor it::
75
76 sage: A = matrix(QQ, [[0, 1, 0],
77 ....: [1, 1, 2],
78 ....: [0, 2, 0]])
79 sage: P,L,D = block_ldlt(A)
80 sage: P
81 [0 0 1]
82 [1 0 0]
83 [0 1 0]
84 sage: L
85 [ 1 0 0]
86 [ 2 1 0]
87 [ 1 1/2 1]
88 sage: D
89 [ 1| 0| 0]
90 [--+--+--]
91 [ 0|-4| 0]
92 [--+--+--]
93 [ 0| 0| 0]
94 sage: P.transpose()*A*P == L*D*L.transpose()
95 True
96
97 This two-by-two matrix has no standard factorization, but it
98 constitutes its own block-factorization::
99
100 sage: A = matrix(QQ, [ [0,1],
101 ....: [1,0] ])
102 sage: block_ldlt(A)
103 (
104 [1 0] [1 0] [0 1]
105 [0 1], [0 1], [1 0]
106 )
107
108 The same is true of the following complex Hermitian matrix::
109
110 sage: A = matrix(QQbar, [ [ 0,I],
111 ....: [-I,0] ])
112 sage: block_ldlt(A)
113 (
114 [1 0] [1 0] [ 0 I]
115 [0 1], [0 1], [-I 0]
116 )
117
118 TESTS:
119
120 All three factors should be the identity when the original matrix is::
121
122 sage: set_random_seed()
123 sage: n = ZZ.random_element(6)
124 sage: I = matrix.identity(QQ,n)
125 sage: P,L,D = block_ldlt(I)
126 sage: P == I and L == I and D == I
127 True
128
129 Ensure that a "random" real symmetric matrix is factored correctly::
130
131 sage: set_random_seed()
132 sage: n = ZZ.random_element(6)
133 sage: A = matrix.random(QQ, n)
134 sage: A = A + A.transpose()
135 sage: P,L,D = block_ldlt(A)
136 sage: A == P*L*D*L.transpose()*P.transpose()
137 True
138
139 Ensure that a "random" complex Hermitian matrix is factored correctly::
140
141 sage: set_random_seed()
142 sage: n = ZZ.random_element(6)
143 sage: F = NumberField(x^2 +1, 'I')
144 sage: A = matrix.random(F, n)
145 sage: A = A + A.conjugate_transpose()
146 sage: P,L,D = block_ldlt(A)
147 sage: A == P*L*D*L.conjugate_transpose()*P.conjugate_transpose()
148 True
149
150 Ensure that a "random" complex positive-semidefinite matrix is
151 factored correctly and that the resulting block-diagonal matrix is
152 in fact diagonal::
153
154 sage: set_random_seed()
155 sage: n = ZZ.random_element(6)
156 sage: F = NumberField(x^2 +1, 'I')
157 sage: A = matrix.random(F, n)
158 sage: A = A*A.conjugate_transpose()
159 sage: P,L,D = block_ldlt(A)
160 sage: A == P*L*D*L.conjugate_transpose()*P.conjugate_transpose()
161 True
162 sage: diagonal_matrix(D.diagonal()) == D
163 True
164
165 The factorization should be a no-op on diagonal matrices::
166
167 sage: set_random_seed()
168 sage: n = ZZ.random_element(6)
169 sage: A = matrix.diagonal(random_vector(QQ, n))
170 sage: I = matrix.identity(QQ,n)
171 sage: P,L,D = block_ldlt(A)
172 sage: P == I and L == I and A == D
173 True
174
175 """
176
177 # We have to make at least one copy of the input matrix so that we
178 # can change the base ring to its fraction field. Both "L" and the
179 # intermediate Schur complements will potentially have entries in
180 # the fraction field. However, we don't need to make *two* copies.
181 # We can't store the entries of "D" and "L" in the same matrix if
182 # "D" will contain any 2x2 blocks; but we can still store the
183 # entries of "L" in the copy of "A" that we're going to make.
184 # Contrast this with the non-block LDL^T factorization where the
185 # entries of both "L" and "D" overwrite the lower-left half of "A".
186 #
187 # This grants us an additional speedup, since we don't have to
188 # permute the rows/columns of "L" *and* "A" at each iteration.
189 ring = A.base_ring().fraction_field()
190 A = A.change_ring(ring)
191 MS = A.matrix_space()
192
193 # The magic constant used by Bunch-Kaufman
194 alpha = (1 + ZZ(17).sqrt()) * ~ZZ(8)
195
196 # Keep track of the permutations and diagonal blocks in a vector
197 # rather than in a matrix, for efficiency.
198 n = A.nrows()
199 p = list(range(n))
200 d = []
201
202 def swap_rows_columns(M, k, s):
203 r"""
204 Swap rows/columns ``k`` and ``s`` of the matrix ``M``, and update
205 the list ``p`` accordingly.
206 """
207 if s > k:
208 # s == k would swap row/column k with itself, and we don't
209 # actually want to perform the identity permutation. If
210 # you work out the recursive factorization by hand, you'll
211 # notice that the rows/columns of "L" need to be permuted
212 # as well. A nice side effect of storing "L" within "A"
213 # itself is that we can skip that step. The first column
214 # of "L" is hit by all of the transpositions in
215 # succession, and the second column is hit by all but the
216 # first transposition, and so on.
217 M.swap_columns(k,s)
218 M.swap_rows(k,s)
219
220 p_k = p[k]
221 p[k] = p[s]
222 p[s] = p_k
223
224 # No return value, we're only interested in the "side effects"
225 # of modifing the matrix M (by reference) and the permutation
226 # list p (which is in scope when this function is defined).
227 return
228
229
230 def pivot1x1(M, k, s):
231 r"""
232 Perform a 1x1 pivot swapping rows/columns `k` and `s >= k`.
233 Relies on the fact that matrices are passed by reference,
234 since for performance reasons this routine should overwrite
235 its argument. Updates the local variables ``p`` and ``d`` as
236 well.
237 """
238 swap_rows_columns(M,k,s)
239
240 # Now the pivot is in the (k,k)th position.
241 d.append( matrix(ring, 1, [[A[k,k]]]) )
242
243 # Compute the Schur complement that we'll work on during
244 # the following iteration, and store it back in the lower-
245 # right-hand corner of "A".
246 for i in range(n-k-1):
247 for j in range(i+1):
248 A[k+1+i,k+1+j] = ( A[k+1+i,k+1+j] -
249 A[k+1+i,k]*A[k,k+1+j]/A[k,k] )
250 A[k+1+j,k+1+i] = A[k+1+i,k+1+j].conjugate() # stay hermitian!
251
252 for i in range(n-k-1):
253 # Store the new (kth) column of "L" within the lower-
254 # left-hand corner of "A".
255 A[k+i+1,k] /= A[k,k]
256
257 # No return value, only the desired side effects of updating
258 # p, d, and A.
259 return
260
261 k = 0
262 while k < n:
263 # At each step, we're considering the k-by-k submatrix
264 # contained in the lower-right half of "A", because that's
265 # where we're storing the next iterate. So our indices are
266 # always "k" greater than those of Higham or B&K. Note that
267 # ``n == 0`` is handled by skipping this loop entirely.
268
269 if k == (n-1):
270 # Handle this trivial case manually, since otherwise the
271 # algorithm's references to the e.g. "subdiagonal" are
272 # meaningless. The corresponding entry of "L" will be
273 # fixed later (since it's an on-diagonal element, it gets
274 # set to one eventually).
275 d.append( matrix(ring, 1, [[A[k,k]]]) )
276 k += 1
277 continue
278
279 # Find the largest subdiagonal entry (in magnitude) in the
280 # kth column. This occurs prior to Step (1) in Higham,
281 # but is part of Step (1) in Bunch and Kaufman. We adopt
282 # Higham's "omega" notation instead of B&K's "lambda"
283 # because "lambda" can lead to some confusion.
284 column_1_subdiag = [ a_ki.abs() for a_ki in A[k+1:,k].list() ]
285 omega_1 = max([ a_ki for a_ki in column_1_subdiag ])
286
287 if omega_1 == 0:
288 # In this case, our matrix looks like
289 #
290 # [ a 0 ]
291 # [ 0 B ]
292 #
293 # and we can simply skip to the next step after recording
294 # the 1x1 pivot "a" in the top-left position. The entry "a"
295 # will be adjusted to "1" later on to ensure that "L" is
296 # (block) unit-lower-triangular.
297 d.append( matrix(ring, 1, [[A[k,k]]]) )
298 k += 1
299 continue
300
301 if A[k,k].abs() > alpha*omega_1:
302 # This is the first case in Higham's Step (1), and B&K's
303 # Step (2). Note that we have skipped the part of B&K's
304 # Step (1) where we determine "r", since "r" is not yet
305 # needed and we may waste some time computing it
306 # otherwise. We are performing a 1x1 pivot, but the
307 # rows/columns are already where we want them, so nothing
308 # needs to be permuted.
309 pivot1x1(A,k,k)
310 k += 1
311 continue
312
313 # Now back to Step (1) of Higham, where we find the index "r"
314 # that corresponds to omega_1. This is the "else" branch of
315 # Higham's Step (1).
316 r = k + 1 + column_1_subdiag.index(omega_1)
317
318 # Continuing the "else" branch of Higham's Step (1), and onto
319 # B&K's Step (3) where we find the largest off-diagonal entry
320 # (in magniture) in column "r". Since the matrix is Hermitian,
321 # we need only look at the above-diagonal entries to find the
322 # off-diagonal of maximal magnitude.
323 omega_r = max( a_rj.abs() for a_rj in A[r,k:r].list() )
324
325 if A[k,k].abs()*omega_r >= alpha*(omega_1**2):
326 # Step (2) in Higham or Step (4) in B&K.
327 pivot1x1(A,k,k)
328 k += 1
329 continue
330
331 if A[r,r].abs() > alpha*omega_r:
332 # This is Step (3) in Higham or Step (5) in B&K. Still a 1x1
333 # pivot, but this time we need to swap rows/columns k and r.
334 pivot1x1(A,k,r)
335 k += 1
336 continue
337
338 # If we've made it this far, we're at Step (4) in Higham or
339 # Step (6) in B&K, where we perform a 2x2 pivot.
340 swap_rows_columns(A,k+1,r)
341
342 # The top-left 2x2 submatrix (starting at position k,k) is now
343 # our pivot.
344 E = A[k:k+2,k:k+2]
345 d.append(E)
346
347 C = A[k+2:n,k:k+2]
348 B = A[k+2:,k+2:]
349
350 # We don't actually need the inverse of E, what we really need
351 # is C*E.inverse(), and that can be found by setting
352 #
353 # X = C*E.inverse() <====> XE = C.
354 #
355 # Then "X" can be found easily by solving a system. Note: I
356 # do not actually know that sage solves the system more
357 # intelligently, but this is still The Right Thing To Do.
358 CE_inverse = E.solve_left(C)
359
360 schur_complement = B - (CE_inverse*C.conjugate_transpose())
361
362 # Compute the Schur complement that we'll work on during
363 # the following iteration, and store it back in the lower-
364 # right-hand corner of "A".
365 for i in range(n-k-2):
366 for j in range(i+1):
367 A[k+2+i,k+2+j] = schur_complement[i,j]
368 A[k+2+j,k+2+i] = schur_complement[j,i]
369
370 # The on- and above-diagonal entries of "L" will be fixed
371 # later, so we only need to worry about the lower-left entry
372 # of the 2x2 identity matrix that belongs at the top of the
373 # new column of "L".
374 A[k+1,k] = 0
375 for i in range(n-k-2):
376 for j in range(2):
377 # Store the new (k and (k+1)st) columns of "L" within
378 # the lower-left-hand corner of "A".
379 A[k+i+2,k+j] = CE_inverse[i,j]
380
381
382 k += 2
383
384 MS = A.matrix_space()
385 P = MS.matrix(lambda i,j: p[j] == i)
386
387 # Warning: when n == 0, this works, but returns a matrix
388 # whose (nonexistent) entries are in ZZ rather than in
389 # the base ring of P and L.
390 D = block_diagonal_matrix(d)
391
392 # Overwrite the diagonal and upper-right half of "A",
393 # since we're about to return it as the unit-lower-
394 # triangular "L".
395 for i in range(n):
396 A[i,i] = 1
397 for j in range(i+1,n):
398 A[i,j] = 0
399
400 return (P,A,D)