]> gitweb.michael.orlitzky.com - sage.d.git/blob - mjo/ldlt.py
682eecf2c44a5f9a6bb7855d7b1ce6e88e631af3
[sage.d.git] / mjo / ldlt.py
1 from sage.all import *
2
3 def is_positive_semidefinite_naive(A):
4 r"""
5 A naive positive-semidefinite check that tests the eigenvalues for
6 nonnegativity. We follow the sage convention that positive
7 (semi)definite matrices must be symmetric or Hermitian.
8
9 SETUP::
10
11 sage: from mjo.ldlt import is_positive_semidefinite_naive
12
13 TESTS:
14
15 The trivial matrix is vaciously positive-semidefinite::
16
17 sage: A = matrix(QQ, 0)
18 sage: A
19 []
20 sage: is_positive_semidefinite_naive(A)
21 True
22
23 """
24 if A.nrows() == 0:
25 return True # vacuously
26 return A.is_hermitian() and all( v >= 0 for v in A.eigenvalues() )
27
28
29 def _block_ldlt(A):
30 r"""
31 Perform a user-unfriendly block-`LDL^{T}` factorization of the
32 Hermitian matrix `A`
33
34 This function is used internally to compute the factorization for
35 the user-friendly ``block_ldlt`` function. Whereas that function
36 returns three nice matrices, this one returns
37
38 * A list ``p`` of the first ``n`` natural numbers, permuted.
39 * A matrix whose lower-triangular portion is ``L``, but whose
40 * (strict) upper-triangular portion is junk.
41 * A list of the block-diagonal entries of ``D``
42
43 This is mainly useful to avoid havinf to "undo" the construction
44 of the matrix ``D`` when we don't need it. For example, it's much
45 easier to compute the inertia of a matrix from the list of blocks
46 than it is from the block-diagonal matrix itself, because given a
47 block-diagonal matrix, you first have to figure out where the
48 blocks are!
49 """
50 ring = A.base_ring().fraction_field()
51 A = A.change_ring(ring)
52 MS = A.matrix_space()
53
54 # The magic constant used by Bunch-Kaufman
55 alpha = (1 + ZZ(17).sqrt()) * ~ZZ(8)
56
57 # Keep track of the permutations and diagonal blocks in a vector
58 # rather than in a matrix, for efficiency.
59 n = A.nrows()
60 p = list(range(n))
61 d = []
62
63 def swap_rows_columns(M, k, s):
64 r"""
65 Swap rows/columns ``k`` and ``s`` of the matrix ``M``, and update
66 the list ``p`` accordingly.
67 """
68 if s > k:
69 # s == k would swap row/column k with itself, and we don't
70 # actually want to perform the identity permutation. If
71 # you work out the recursive factorization by hand, you'll
72 # notice that the rows/columns of "L" need to be permuted
73 # as well. A nice side effect of storing "L" within "A"
74 # itself is that we can skip that step. The first column
75 # of "L" is hit by all of the transpositions in
76 # succession, and the second column is hit by all but the
77 # first transposition, and so on.
78 M.swap_columns(k,s)
79 M.swap_rows(k,s)
80
81 p_k = p[k]
82 p[k] = p[s]
83 p[s] = p_k
84
85 # No return value, we're only interested in the "side effects"
86 # of modifing the matrix M (by reference) and the permutation
87 # list p (which is in scope when this function is defined).
88 return
89
90
91 def pivot1x1(M, k, s):
92 r"""
93 Perform a 1x1 pivot swapping rows/columns `k` and `s >= k`.
94 Relies on the fact that matrices are passed by reference,
95 since for performance reasons this routine should overwrite
96 its argument. Updates the local variables ``p`` and ``d`` as
97 well.
98 """
99 swap_rows_columns(M,k,s)
100
101 # Now the pivot is in the (k,k)th position.
102 d.append( matrix(ring, 1, [[A[k,k]]]) )
103
104 # Compute the Schur complement that we'll work on during
105 # the following iteration, and store it back in the lower-
106 # right-hand corner of "A".
107 for i in range(n-k-1):
108 for j in range(i+1):
109 A[k+1+i,k+1+j] = ( A[k+1+i,k+1+j] -
110 A[k+1+i,k]*A[k,k+1+j]/A[k,k] )
111 A[k+1+j,k+1+i] = A[k+1+i,k+1+j].conjugate() # stay hermitian!
112
113 for i in range(n-k-1):
114 # Store the new (kth) column of "L" within the lower-
115 # left-hand corner of "A".
116 A[k+i+1,k] /= A[k,k]
117
118 # No return value, only the desired side effects of updating
119 # p, d, and A.
120 return
121
122 k = 0
123 while k < n:
124 # At each step, we're considering the k-by-k submatrix
125 # contained in the lower-right half of "A", because that's
126 # where we're storing the next iterate. So our indices are
127 # always "k" greater than those of Higham or B&K. Note that
128 # ``n == 0`` is handled by skipping this loop entirely.
129
130 if k == (n-1):
131 # Handle this trivial case manually, since otherwise the
132 # algorithm's references to the e.g. "subdiagonal" are
133 # meaningless. The corresponding entry of "L" will be
134 # fixed later (since it's an on-diagonal element, it gets
135 # set to one eventually).
136 d.append( matrix(ring, 1, [[A[k,k]]]) )
137 k += 1
138 continue
139
140 # Find the largest subdiagonal entry (in magnitude) in the
141 # kth column. This occurs prior to Step (1) in Higham,
142 # but is part of Step (1) in Bunch and Kaufman. We adopt
143 # Higham's "omega" notation instead of B&K's "lambda"
144 # because "lambda" can lead to some confusion.
145 column_1_subdiag = [ a_ki.abs() for a_ki in A[k+1:,k].list() ]
146 omega_1 = max([ a_ki for a_ki in column_1_subdiag ])
147
148 if omega_1 == 0:
149 # In this case, our matrix looks like
150 #
151 # [ a 0 ]
152 # [ 0 B ]
153 #
154 # and we can simply skip to the next step after recording
155 # the 1x1 pivot "a" in the top-left position. The entry "a"
156 # will be adjusted to "1" later on to ensure that "L" is
157 # (block) unit-lower-triangular.
158 d.append( matrix(ring, 1, [[A[k,k]]]) )
159 k += 1
160 continue
161
162 if A[k,k].abs() > alpha*omega_1:
163 # This is the first case in Higham's Step (1), and B&K's
164 # Step (2). Note that we have skipped the part of B&K's
165 # Step (1) where we determine "r", since "r" is not yet
166 # needed and we may waste some time computing it
167 # otherwise. We are performing a 1x1 pivot, but the
168 # rows/columns are already where we want them, so nothing
169 # needs to be permuted.
170 pivot1x1(A,k,k)
171 k += 1
172 continue
173
174 # Now back to Step (1) of Higham, where we find the index "r"
175 # that corresponds to omega_1. This is the "else" branch of
176 # Higham's Step (1).
177 r = k + 1 + column_1_subdiag.index(omega_1)
178
179 # Continuing the "else" branch of Higham's Step (1), and onto
180 # B&K's Step (3) where we find the largest off-diagonal entry
181 # (in magniture) in column "r". Since the matrix is Hermitian,
182 # we need only look at the above-diagonal entries to find the
183 # off-diagonal of maximal magnitude.
184 omega_r = max( a_rj.abs() for a_rj in A[r,k:r].list() )
185
186 if A[k,k].abs()*omega_r >= alpha*(omega_1**2):
187 # Step (2) in Higham or Step (4) in B&K.
188 pivot1x1(A,k,k)
189 k += 1
190 continue
191
192 if A[r,r].abs() > alpha*omega_r:
193 # This is Step (3) in Higham or Step (5) in B&K. Still a 1x1
194 # pivot, but this time we need to swap rows/columns k and r.
195 pivot1x1(A,k,r)
196 k += 1
197 continue
198
199 # If we've made it this far, we're at Step (4) in Higham or
200 # Step (6) in B&K, where we perform a 2x2 pivot.
201 swap_rows_columns(A,k+1,r)
202
203 # The top-left 2x2 submatrix (starting at position k,k) is now
204 # our pivot.
205 E = A[k:k+2,k:k+2]
206 d.append(E)
207
208 C = A[k+2:n,k:k+2]
209 B = A[k+2:,k+2:]
210
211 # We don't actually need the inverse of E, what we really need
212 # is C*E.inverse(), and that can be found by setting
213 #
214 # X = C*E.inverse() <====> XE = C.
215 #
216 # Then "X" can be found easily by solving a system. Note: I
217 # do not actually know that sage solves the system more
218 # intelligently, but this is still The Right Thing To Do.
219 CE_inverse = E.solve_left(C)
220
221 schur_complement = B - (CE_inverse*C.conjugate_transpose())
222
223 # Compute the Schur complement that we'll work on during
224 # the following iteration, and store it back in the lower-
225 # right-hand corner of "A".
226 for i in range(n-k-2):
227 for j in range(i+1):
228 A[k+2+i,k+2+j] = schur_complement[i,j]
229 A[k+2+j,k+2+i] = schur_complement[j,i]
230
231 # The on- and above-diagonal entries of "L" will be fixed
232 # later, so we only need to worry about the lower-left entry
233 # of the 2x2 identity matrix that belongs at the top of the
234 # new column of "L".
235 A[k+1,k] = 0
236 for i in range(n-k-2):
237 for j in range(2):
238 # Store the new (k and (k+1)st) columns of "L" within
239 # the lower-left-hand corner of "A".
240 A[k+i+2,k+j] = CE_inverse[i,j]
241
242
243 k += 2
244
245 for i in range(n):
246 # We skipped this during the main loop, but it's necessary for
247 # correctness.
248 A[i,i] = 1
249
250 return (p,A,d)
251
252 def block_ldlt(A):
253 r"""
254 Perform a block-`LDL^{T}` factorization of the Hermitian
255 matrix `A`.
256
257 The standard `LDL^{T}` factorization of a positive-definite matrix
258 `A` factors it as `A = LDL^{T}` where `L` is unit-lower-triangular
259 and `D` is diagonal. If one allows row/column swaps via a
260 permutation matrix `P`, then this factorization can be extended to
261 some positive-semidefinite matrices `A` via the factorization
262 `P^{T}AP = LDL^{T}` that places the zeros at the bottom of `D` to
263 avoid division by zero. These factorizations extend easily to
264 complex Hermitian matrices when one replaces the transpose by the
265 conjugate-transpose.
266
267 However, we can go one step further. If, in addition, we allow `D`
268 to potentially contain `2 \times 2` blocks on its diagonal, then
269 every real or complex Hermitian matrix `A` can be factored as `A =
270 PLDL^{*}P^{T}`. When the row/column swaps are made intelligently,
271 this process is numerically stable over inexact rings like ``RDF``.
272 Bunch and Kaufman describe such a "pivot" scheme that is suitable
273 for the solution of Hermitian systems, and that is how we choose
274 our row and column swaps.
275
276 OUTPUT:
277
278 If the input matrix is Hermitian, we return a triple `(P,L,D)`
279 such that `A = PLDL^{*}P^{T}` and
280
281 * `P` is a permutation matrix,
282 * `L` is unit lower-triangular,
283 * `D` is a block-diagonal matrix whose blocks are of size
284 one or two.
285
286 If the input matrix is not Hermitian, the output from this function
287 is undefined.
288
289 SETUP::
290
291 sage: from mjo.ldlt import block_ldlt
292
293 EXAMPLES:
294
295 This three-by-three real symmetric matrix has one positive, one
296 negative, and one zero eigenvalue -- so it is not any flavor of
297 (semi)definite, yet we can still factor it::
298
299 sage: A = matrix(QQ, [[0, 1, 0],
300 ....: [1, 1, 2],
301 ....: [0, 2, 0]])
302 sage: P,L,D = block_ldlt(A)
303 sage: P
304 [0 0 1]
305 [1 0 0]
306 [0 1 0]
307 sage: L
308 [ 1 0 0]
309 [ 2 1 0]
310 [ 1 1/2 1]
311 sage: D
312 [ 1| 0| 0]
313 [--+--+--]
314 [ 0|-4| 0]
315 [--+--+--]
316 [ 0| 0| 0]
317 sage: P.transpose()*A*P == L*D*L.transpose()
318 True
319
320 This two-by-two matrix has no standard factorization, but it
321 constitutes its own block-factorization::
322
323 sage: A = matrix(QQ, [ [0,1],
324 ....: [1,0] ])
325 sage: block_ldlt(A)
326 (
327 [1 0] [1 0] [0 1]
328 [0 1], [0 1], [1 0]
329 )
330
331 The same is true of the following complex Hermitian matrix::
332
333 sage: A = matrix(QQbar, [ [ 0,I],
334 ....: [-I,0] ])
335 sage: block_ldlt(A)
336 (
337 [1 0] [1 0] [ 0 I]
338 [0 1], [0 1], [-I 0]
339 )
340
341 TESTS:
342
343 All three factors should be the identity when the original matrix is::
344
345 sage: set_random_seed()
346 sage: n = ZZ.random_element(6)
347 sage: I = matrix.identity(QQ,n)
348 sage: P,L,D = block_ldlt(I)
349 sage: P == I and L == I and D == I
350 True
351
352 Ensure that a "random" real symmetric matrix is factored correctly::
353
354 sage: set_random_seed()
355 sage: n = ZZ.random_element(6)
356 sage: A = matrix.random(QQ, n)
357 sage: A = A + A.transpose()
358 sage: P,L,D = block_ldlt(A)
359 sage: A == P*L*D*L.transpose()*P.transpose()
360 True
361
362 Ensure that a "random" complex Hermitian matrix is factored correctly::
363
364 sage: set_random_seed()
365 sage: n = ZZ.random_element(6)
366 sage: F = NumberField(x^2 +1, 'I')
367 sage: A = matrix.random(F, n)
368 sage: A = A + A.conjugate_transpose()
369 sage: P,L,D = block_ldlt(A)
370 sage: A == P*L*D*L.conjugate_transpose()*P.conjugate_transpose()
371 True
372
373 Ensure that a "random" complex positive-semidefinite matrix is
374 factored correctly and that the resulting block-diagonal matrix is
375 in fact diagonal::
376
377 sage: set_random_seed()
378 sage: n = ZZ.random_element(6)
379 sage: F = NumberField(x^2 +1, 'I')
380 sage: A = matrix.random(F, n)
381 sage: A = A*A.conjugate_transpose()
382 sage: P,L,D = block_ldlt(A)
383 sage: A == P*L*D*L.conjugate_transpose()*P.conjugate_transpose()
384 True
385 sage: diagonal_matrix(D.diagonal()) == D
386 True
387
388 The factorization should be a no-op on diagonal matrices::
389
390 sage: set_random_seed()
391 sage: n = ZZ.random_element(6)
392 sage: A = matrix.diagonal(random_vector(QQ, n))
393 sage: I = matrix.identity(QQ,n)
394 sage: P,L,D = block_ldlt(A)
395 sage: P == I and L == I and A == D
396 True
397
398 """
399
400 # We have to make at least one copy of the input matrix so that we
401 # can change the base ring to its fraction field. Both "L" and the
402 # intermediate Schur complements will potentially have entries in
403 # the fraction field. However, we don't need to make *two* copies.
404 # We can't store the entries of "D" and "L" in the same matrix if
405 # "D" will contain any 2x2 blocks; but we can still store the
406 # entries of "L" in the copy of "A" that we're going to make.
407 # Contrast this with the non-block LDL^T factorization where the
408 # entries of both "L" and "D" overwrite the lower-left half of "A".
409 #
410 # This grants us an additional speedup, since we don't have to
411 # permute the rows/columns of "L" *and* "A" at each iteration.
412 p,L,d = _block_ldlt(A)
413 MS = L.matrix_space()
414 P = MS.matrix(lambda i,j: p[j] == i)
415
416 # Warning: when n == 0, this works, but returns a matrix
417 # whose (nonexistent) entries are in ZZ rather than in
418 # the base ring of P and L.
419 D = block_diagonal_matrix(d)
420
421 # Overwrite the (strict) upper-triangular part of "L", since a
422 # priori it contains the same entries as "A" did after _block_ldlt().
423 n = L.nrows()
424 for i in range(n):
425 for j in range(i+1,n):
426 L[i,j] = 0
427
428 return (P,L,D)