]>
gitweb.michael.orlitzky.com - sage.d.git/blob - mjo/ldlt.py
3 def is_positive_semidefinite_naive(A
):
5 A naive positive-semidefinite check that tests the eigenvalues for
6 nonnegativity. We follow the sage convention that positive
7 (semi)definite matrices must be symmetric or Hermitian.
11 sage: from mjo.ldlt import is_positive_semidefinite_naive
15 The trivial matrix is vaciously positive-semidefinite::
17 sage: A = matrix(QQ, 0)
20 sage: is_positive_semidefinite_naive(A)
25 return True # vacuously
26 return A
.is_hermitian() and all( v
>= 0 for v
in A
.eigenvalues() )
31 Perform a pivoted `LDL^{T}` factorization of the Hermitian
32 positive-semidefinite matrix `A`.
34 This is a naive, recursive implementation that is inefficient due
35 to Python's lack of tail-call optimization. The pivot strategy is
36 to choose the largest diagonal entry of the matrix at each step,
37 and to permute it into the top-left position. Ultimately this
38 results in a factorization `A = PLDL^{T}P^{T}`, where `P` is a
39 permutation matrix, `L` is unit-lower-triangular, and `D` is
40 diagonal decreasing from top-left to bottom-right.
44 The algorithm is based on the discussion in Golub and Van Loan, but with
49 A triple `(P,L,D)` such that `A = PLDL^{T}P^{T}` and where,
51 * `P` is a permutaiton matrix
52 * `L` is unit lower-triangular
53 * `D` is a diagonal matrix whose entries are decreasing from top-left
58 sage: from mjo.ldlt import ldlt_naive, is_positive_semidefinite_naive
62 All three factors should be the identity when the original matrix is::
64 sage: I = matrix.identity(QQ,4)
65 sage: P,L,D = ldlt_naive(I)
66 sage: P == I and L == I and D == I
71 Ensure that a "random" positive-semidefinite matrix is factored correctly::
73 sage: set_random_seed()
74 sage: n = ZZ.random_element(5)
75 sage: A = matrix.random(QQ, n)
76 sage: A = A*A.transpose()
77 sage: is_positive_semidefinite_naive(A)
79 sage: P,L,D = ldlt_naive(A)
80 sage: A == P*L*D*L.transpose()*P.transpose()
86 # Use the fraction field of the given matrix so that division will work
87 # when (for example) our matrix consists of integer entries.
88 ring
= A
.base_ring().fraction_field()
91 # We can get n == 0 if someone feeds us a trivial matrix.
92 P
= matrix
.identity(ring
, n
)
93 L
= matrix
.identity(ring
, n
)
97 A1
= A
.change_ring(ring
)
99 s
= diags
.index(max(diags
))
100 P1
= copy(A1
.matrix_space().identity_matrix())
105 # Golub and Van Loan mention in passing what to do here. This is
106 # only sensible if the matrix is positive-semidefinite, because we
107 # are assuming that we can set everything else to zero as soon as
108 # we hit the first on-diagonal zero.
110 P
= A1
.matrix_space().identity_matrix()
112 D
= A1
.matrix_space().zero()
118 P2
, L2
, D2
= ldlt_naive(A2
- (v1
*v1
.transpose())/alpha1
)
120 P1
= P1
*block_matrix(2,2, [[ZZ(1), ZZ(0)],
122 L1
= block_matrix(2,2, [[ZZ(1), ZZ(0)],
123 [P2
.transpose()*v1
/alpha1
, L2
]])
124 D1
= block_matrix(2,2, [[alpha1
, ZZ(0)],
133 Perform a fast, pivoted `LDL^{T}` factorization of the Hermitian
134 positive-semidefinite matrix `A`.
136 This function is much faster than ``ldlt_naive`` because the
137 tail-recursion has been unrolled into a loop.
139 ring
= A
.base_ring().fraction_field()
140 A
= A
.change_ring(ring
)
142 # Keep track of the permutations in a vector rather than in a
143 # matrix, for efficiency.
148 # We need to loop once for every diagonal entry in the
149 # matrix. So, as many times as it has rows/columns. At each
150 # step, we obtain the permutation needed to put things in the
151 # right place, then the "next" entry (alpha) of D, and finally
152 # another column of L.
153 diags
= A
.diagonal()[k
:n
]
156 # We're working *within* the matrix ``A``, so every index is
157 # offset by k. For example: after the second step, we should
158 # only be looking at the lower 3-by-3 block of a 5-by-5 matrix.
159 s
= k
+ diags
.index(alpha
)
161 # Move the largest diagonal element up into the top-left corner
162 # of the block we're working on (the one starting from index k,k).
163 # Presumably this is faster than hitting the thing with a
164 # permutation matrix.
166 # Since "L" is stored in the lower-left "half" of "A", it's a
167 # good thing that we need to permute "L," too. This is due to
168 # how P2.T appears in the recursive algorithm applied to the
169 # "current" column of L There, P2.T is computed recusively, as
170 # 1 x P3.T, and P3.T = 1 x P4.T, etc, from the bottom up. All
171 # are eventually applied to "v" in order. Here we're working
172 # from the top down, and rather than keep track of what
173 # permutations we need to perform, we just perform them as we
174 # go along. No recursion needed.
178 # Update the permutation "matrix" with the swap we just did.
183 # Now the largest diagonal is in the top-left corner of the
184 # block below and to the right of index k,k. When alpha is
185 # zero, we can just leave the rest of the D/L entries
186 # zero... which is exactly how they start out.
188 # Update the "next" block of A that we'll work on during
189 # the following iteration. I think it's faster to get the
190 # entries of a row than a column here?
191 for i
in range(n
-k
-1):
193 A
[k
+1+j
,k
+1+i
] = A
[k
+1+j
,k
+1+i
] - A
[k
,k
+1+j
]*A
[k
,k
+1+i
]/alpha
194 A
[k
+1+i
,k
+1+j
] = A
[k
+1+j
,k
+1+i
] # keep it symmetric!
196 for i
in range(n
-k
-1):
197 # Store the "new" (kth) column of L, being sure to set
198 # the lower-left "half" from the upper-right "half"
199 A
[k
+i
+1,k
] = A
[k
,k
+1+i
]/alpha
201 MS
= A
.matrix_space()
202 P
= MS
.matrix(lambda i
,j
: p
[j
] == i
)
203 D
= MS
.diagonal_matrix(A
.diagonal())
207 for j
in range(i
+1,n
):
213 def block_ldlt_naive(A
, check_hermitian
=False):
215 Perform a block-`LDL^{T}` factorization of the Hermitian
218 This is a naive, recursive implementation akin to
219 ``ldlt_naive()``, where the pivots (and resulting diagonals) are
220 either `1 \times 1` or `2 \times 2` blocks. The pivots are chosen
221 using the Bunch-Kaufmann scheme that is both fast and numerically
226 A triple `(P,L,D)` such that `A = PLDL^{T}P^{T}` and where,
228 * `P` is a permutation matrix
229 * `L` is unit lower-triangular
230 * `D` is a block-diagonal matrix whose blocks are of size
236 # Use the fraction field of the given matrix so that division will work
237 # when (for example) our matrix consists of integer entries.
238 ring
= A
.base_ring().fraction_field()
241 # We can get n == 0 if someone feeds us a trivial matrix.
242 # For block-LDLT, n=2 is a base case.
243 P
= matrix
.identity(ring
, n
)
244 L
= matrix
.identity(ring
, n
)
248 alpha
= (1 + ZZ(17).sqrt()) * ~
ZZ(8)
249 A1
= A
.change_ring(ring
)
251 # Bunch-Kaufmann step 1, Higham step "zero." We use Higham's
252 # "omega" notation instead of Bunch-Kaufman's "lamda" because
253 # lambda means other things in the same context.
254 column_1_subdiag
= [ a_i1
.abs() for a_i1
in A1
[1:,0].list() ]
255 omega_1
= max([ a_i1
for a_i1
in column_1_subdiag
])
258 # "There's nothing to do at this step of the algorithm,"
259 # which means that our matrix looks like,
264 # We could still do a pivot_one_by_one() here, but it would
265 # pointlessly subract a bunch of zeros and multiply by one.
267 one
= matrix(ring
, 1, 1, [1])
268 P2
, L2
, D2
= block_ldlt_naive(B
)
269 P1
= block_diagonal_matrix(one
, P2
)
270 L1
= block_diagonal_matrix(one
, L2
)
271 D1
= block_diagonal_matrix(one
, D2
)
274 def pivot_one_by_one(M
, c
=None):
275 # Perform a one-by-one pivot on "M," swapping row/columns "c".
276 # If "c" is None, no swap is performed.
278 P1
= copy(M
.matrix_space().identity_matrix())
282 # The top-left entry is now our 1x1 pivot.
286 P2
, L2
, D2
= block_ldlt_naive(B
- (C
*C
.transpose())/M
[0,0])
289 P1
= block_matrix(2,2, [[ZZ(1), ZZ(0)],
292 P1
= P1
*block_matrix(2,2, [[ZZ(1), ZZ(0)],
295 L1
= block_matrix(2,2, [[ZZ(1), ZZ(0)],
296 [P2
.transpose()*C
/M
[0,0], L2
]])
297 D1
= block_matrix(2,2, [[M
[0,0], ZZ(0)],
303 if A1
[0,0].abs() > alpha
*omega_1
:
304 return pivot_one_by_one(A1
)
306 r
= 1 + column_1_subdiag
.index(omega_1
)
308 # If the matrix is Hermitian, we need only look at the above-
309 # diagonal entries to find the off-diagonal of maximal magnitude.
310 omega_r
= max( a_rj
.abs() for a_rj
in A1
[:r
,r
].list() )
312 if A1
[0,0].abs()*omega_r
>= alpha
*(omega_1
**2):
313 return pivot_one_by_one(A1
)
315 if A1
[r
,r
].abs() > alpha
*omega_r
:
317 # Another 1x1 pivot, but this time swapping indices 0,r.
318 return pivot_one_by_one(A1
,r
)
321 # If we made it here, we have to do a 2x2 pivot.
322 P1
= copy(A1
.matrix_space().identity_matrix())
326 # The top-left 2x2 submatrix is now our pivot.
332 # We have a two-by-two matrix that we can do nothing
334 P
= matrix
.identity(ring
, n
)
335 L
= matrix
.identity(ring
, n
)
339 P2
, L2
, D2
= block_ldlt_naive(B
- (C
*E
.inverse()*C
.transpose()))
341 P1
= P1
*block_matrix(2,2, [[ZZ(1), ZZ(0)],
344 L1
= block_matrix(2,2, [[ZZ(1), ZZ(0)],
345 [P2
.transpose()*C
*E
.inverse(), L2
]])
346 D1
= block_diagonal_matrix(E
,D2
)
353 Perform a block-`LDL^{T}` factorization of the Hermitian
356 The standard `LDL^{T}` factorization of a positive-definite matrix
357 `A` factors it as `A = LDL^{T}` where `L` is unit-lower-triangular
358 and `D` is diagonal. If one allows row/column swaps via a
359 permutation matrix `P`, then this factorization can be extended to
360 some positive-semidefinite matrices `A` via the factorization
361 `P^{T}AP = LDL^{T}` that places the zeros at the bottom of `D` to
362 avoid division by zero. These factorizations extend easily to
363 complex Hermitian matrices when one replaces the transpose by the
366 However, we can go one step further. If, in addition, we allow `D`
367 to potentially contain `2 \times 2` blocks on its diagonal, then
368 every real or complex Hermitian matrix `A` can be factored as `A =
369 PLDL^{*}P^{T}`. When the row/column swaps are made intelligently,
370 this process is numerically stable over inexact rings like ``RDF``.
371 Bunch and Kaufman describe such a "pivot" scheme that is suitable
372 for the solution of Hermitian systems, and that is how we choose
373 our row and column swaps.
377 If the input matrix is Hermitian, we return a triple `(P,L,D)`
378 such that `A = PLDL^{*}P^{T}` and
380 * `P` is a permutation matrix,
381 * `L` is unit lower-triangular,
382 * `D` is a block-diagonal matrix whose blocks are of size
385 If the input matrix is not Hermitian, the output from this function
390 This three-by-three real symmetric matrix has one positive, one
391 negative, and one zero eigenvalue -- so it is not any flavor of
392 (semi)definite, yet we can still factor it::
394 sage: A = matrix(QQ, [[0, 1, 0],
397 sage: P,L,D = block_ldlt(A)
412 sage: P.T*A*P == L*D*L.T
415 This two-by-two matrix has no standard factorization, but it
416 constitutes its own block-factorization::
418 sage: A = matrix(QQ, [ [0,1],
426 The same is true of the following complex Hermitian matrix::
428 sage: A = matrix(QQbar, [ [ 0,I],
438 All three factors should be the identity when the original matrix is::
440 sage: set_random_seed()
441 sage: n = ZZ.random_element(6)
442 sage: I = matrix.identity(QQ,n)
443 sage: P,L,D = block_ldlt(I)
444 sage: P == I and L == I and D == I
447 Ensure that a "random" real symmetric matrix is factored correctly::
449 sage: set_random_seed()
450 sage: n = ZZ.random_element(6)
451 sage: F = NumberField(x^2 +1, 'I')
452 sage: A = matrix.random(F, n)
453 sage: A = A + A.transpose()
454 sage: P,L,D = block_ldlt(A)
455 sage: A == P*L*D*L.transpose()*P.transpose()
458 Ensure that a "random" complex Hermitian matrix is factored correctly::
460 sage: set_random_seed()
461 sage: n = ZZ.random_element(6)
462 sage: F = NumberField(x^2 +1, 'I')
463 sage: A = matrix.random(F, n)
464 sage: A = A + A.conjugate_transpose()
465 sage: P,L,D = block_ldlt(A)
466 sage: A == P*L*D*L.transpose()*P.transpose()
469 Ensure that a "random" complex positive-semidefinite matrix is
470 factored correctly and that the resulting block-diagonal matrix is
473 sage: set_random_seed()
474 sage: n = ZZ.random_element(6)
475 sage: F = NumberField(x^2 +1, 'I')
476 sage: A = matrix.random(F, n)
477 sage: A = A*A.conjugate_transpose()
478 sage: P,L,D = block_ldlt(A)
479 sage: A == P*L*D*L.transpose()*P.transpose()
481 sage: diagonal_matrix(D.diagonal()) == D
484 The factorization should be a no-op on diagonal matrices::
486 sage: set_random_seed()
487 sage: n = ZZ.random_element(6)
488 sage: A = matrix.diagonal(random_vector(QQ, n))
489 sage: I = matrix.identity(QQ,n)
490 sage: P,L,D = block_ldlt(A)
491 sage: P == I and L == I and A == D
496 # We have to make at least one copy of the input matrix so that we
497 # can change the base ring to its fraction field. Both "L" and the
498 # intermediate Schur complements will potentially have entries in
499 # the fraction field. However, we don't need to make *two* copies.
500 # We can't store the entries of "D" and "L" in the same matrix if
501 # "D" will contain any 2x2 blocks; but we can still store the
502 # entries of "L" in the copy of "A" that we're going to make.
503 # Contrast this with the non-block LDL^T factorization where the
504 # entries of both "L" and "D" overwrite the lower-left half of "A".
506 # This grants us an additional speedup, since we don't have to
507 # permute the rows/columns of "L" *and* "A" at each iteration.
508 ring
= A
.base_ring().fraction_field()
509 A
= A
.change_ring(ring
)
510 MS
= A
.matrix_space()
512 # The magic constant used by Bunch-Kaufman
513 alpha
= (1 + ZZ(17).sqrt()) * ~
ZZ(8)
515 # Keep track of the permutations and diagonal blocks in a vector
516 # rather than in a matrix, for efficiency.
521 def swap_rows_columns(M
, k
, s
):
523 Swap rows/columns ``k`` and ``s`` of the matrix ``M``, and update
524 the list ``p`` accordingly.
527 # s == k would swap row/column k with itself, and we don't
528 # actually want to perform the identity permutation. If
529 # you work out the recursive factorization by hand, you'll
530 # notice that the rows/columns of "L" need to be permuted
531 # as well. A nice side effect of storing "L" within "A"
532 # itself is that we can skip that step. The first column
533 # of "L" is hit by all of the transpositions in
534 # succession, and the second column is hit by all but the
535 # first transposition, and so on.
543 # No return value, we're only interested in the "side effects"
544 # of modifing the matrix M (by reference) and the permutation
545 # list p (which is in scope when this function is defined).
549 def pivot1x1(M
, k
, s
):
551 Perform a 1x1 pivot swapping rows/columns `k` and `s >= k`.
552 Relies on the fact that matrices are passed by reference,
553 since for performance reasons this routine should overwrite
554 its argument. Updates the local variables ``p`` and ``d`` as
557 swap_rows_columns(M
,k
,s
)
559 # Now the pivot is in the (k,k)th position.
560 d
.append( matrix(ring
, 1, [[A
[k
,k
]]]) )
562 # Compute the Schur complement that we'll work on during
563 # the following iteration, and store it back in the lower-
564 # right-hand corner of "A".
565 for i
in range(n
-k
-1):
567 A
[k
+1+i
,k
+1+j
] = ( A
[k
+1+i
,k
+1+j
] -
568 A
[k
+1+i
,k
]*A
[k
,k
+1+j
]/A
[k
,k
] )
569 A
[k
+1+j
,k
+1+i
] = A
[k
+1+i
,k
+1+j
].conjugate() # stay hermitian!
571 for i
in range(n
-k
-1):
572 # Store the new (kth) column of "L" within the lower-
573 # left-hand corner of "A".
576 # No return value, only the desired side effects of updating
582 # At each step, we're considering the k-by-k submatrix
583 # contained in the lower-right half of "A", because that's
584 # where we're storing the next iterate. So our indices are
585 # always "k" greater than those of Higham or B&K. Note that
586 # ``n == 0`` is handled by skipping this loop entirely.
589 # Handle this trivial case manually, since otherwise the
590 # algorithm's references to the e.g. "subdiagonal" are
591 # meaningless. The corresponding entry of "L" will be
592 # fixed later (since it's an on-diagonal element, it gets
593 # set to one eventually).
594 d
.append( matrix(ring
, 1, [[A
[k
,k
]]]) )
598 # Find the largest subdiagonal entry (in magnitude) in the
599 # kth column. This occurs prior to Step (1) in Higham,
600 # but is part of Step (1) in Bunch and Kaufman. We adopt
601 # Higham's "omega" notation instead of B&K's "lambda"
602 # because "lambda" can lead to some confusion.
603 column_1_subdiag
= [ a_ki
.abs() for a_ki
in A
[k
+1:,k
].list() ]
604 omega_1
= max([ a_ki
for a_ki
in column_1_subdiag
])
607 # In this case, our matrix looks like
612 # and we can simply skip to the next step after recording
613 # the 1x1 pivot "a" in the top-left position. The entry "a"
614 # will be adjusted to "1" later on to ensure that "L" is
615 # (block) unit-lower-triangular.
616 d
.append( matrix(ring
, 1, [[A
[k
,k
]]]) )
620 if A
[k
,k
].abs() > alpha
*omega_1
:
621 # This is the first case in Higham's Step (1), and B&K's
622 # Step (2). Note that we have skipped the part of B&K's
623 # Step (1) where we determine "r", since "r" is not yet
624 # needed and we may waste some time computing it
625 # otherwise. We are performing a 1x1 pivot, but the
626 # rows/columns are already where we want them, so nothing
627 # needs to be permuted.
632 # Now back to Step (1) of Higham, where we find the index "r"
633 # that corresponds to omega_1. This is the "else" branch of
635 r
= k
+ 1 + column_1_subdiag
.index(omega_1
)
637 # Continuing the "else" branch of Higham's Step (1), and onto
638 # B&K's Step (3) where we find the largest off-diagonal entry
639 # (in magniture) in column "r". Since the matrix is Hermitian,
640 # we need only look at the above-diagonal entries to find the
641 # off-diagonal of maximal magnitude.
642 omega_r
= max( a_rj
.abs() for a_rj
in A
[r
,k
:r
].list() )
644 if A
[k
,k
].abs()*omega_r
>= alpha
*(omega_1
**2):
645 # Step (2) in Higham or Step (4) in B&K.
650 if A
[r
,r
].abs() > alpha
*omega_r
:
651 # This is Step (3) in Higham or Step (5) in B&K. Still a 1x1
652 # pivot, but this time we need to swap rows/columns k and r.
657 # If we've made it this far, we're at Step (4) in Higham or
658 # Step (6) in B&K, where we perform a 2x2 pivot.
659 swap_rows_columns(A
,k
+1,r
)
661 # The top-left 2x2 submatrix (starting at position k,k) is now
669 # We don't actually need the inverse of E, what we really need
670 # is C*E.inverse(), and that can be found by setting
672 # X = C*E.inverse() <====> XE = C.
674 # Then "X" can be found easily by solving a system. Note: I
675 # do not actually know that sage solves the system more
676 # intelligently, but this is still The Right Thing To Do.
677 CE_inverse
= E
.solve_left(C
)
679 schur_complement
= B
- (CE_inverse
*C
.conjugate_transpose())
681 # Compute the Schur complement that we'll work on during
682 # the following iteration, and store it back in the lower-
683 # right-hand corner of "A".
684 for i
in range(n
-k
-2):
686 A
[k
+2+i
,k
+2+j
] = schur_complement
[i
,j
]
687 A
[k
+2+j
,k
+2+i
] = schur_complement
[j
,i
]
689 # The on- and above-diagonal entries of "L" will be fixed
690 # later, so we only need to worry about the lower-left entry
691 # of the 2x2 identity matrix that belongs at the top of the
694 for i
in range(n
-k
-2):
696 # Store the new (k and (k+1)st) columns of "L" within
697 # the lower-left-hand corner of "A".
698 A
[k
+i
+2,k
+j
] = CE_inverse
[i
,j
]
703 MS
= A
.matrix_space()
704 P
= MS
.matrix(lambda i
,j
: p
[j
] == i
)
706 # Warning: when n == 0, this works, but returns a matrix
707 # whose (nonexistent) entries are in ZZ rather than in
708 # the base ring of P and L.
709 D
= block_diagonal_matrix(d
)
711 # Overwrite the diagonal and upper-right half of "A",
712 # since we're about to return it as the unit-lower-
716 for j
in range(i
+1,n
):