mjo/ldlt.py

   1 from sage.all import *
   2
   3 def is_positive_semidefinite_naive(A):
   4     r"""
   5     A naive positive-semidefinite check that tests the eigenvalues for
   6     nonnegativity.  We follow the sage convention that positive
   7     (semi)definite matrices must be symmetric or Hermitian.
   8
   9     SETUP::
  10
  11         sage: from mjo.ldlt import is_positive_semidefinite_naive
  12
  13     TESTS:
  14
  15     The trivial matrix is vaciously positive-semidefinite::
  16
  17         sage: A = matrix(QQ, 0)
  18         sage: A
  19         []
  20         sage: is_positive_semidefinite_naive(A)
  21         True
  22
  23     """
  24     if A.nrows() == 0:
  25         return True # vacuously
  26     return A.is_hermitian() and all( v >= 0 for v in A.eigenvalues() )
  27
  28
  29 def _block_ldlt(A):
  30     r"""
  31     Perform a user-unfriendly block-`LDL^{T}` factorization of the
  32     Hermitian matrix `A`
  33
  34     This function is used internally to compute the factorization for
  35     the user-friendly ``block_ldlt`` function. Whereas that function
  36     returns three nice matrices, this one returns
  37
  38       * A list ``p`` of the first ``n`` natural numbers, permuted.
  39       * A matrix whose lower-triangular portion is ``L``, but whose
  40       * (strict) upper-triangular portion is junk.
  41       * A list of the block-diagonal entries of ``D``
  42
  43     This is mainly useful to avoid havinf to "undo" the construction
  44     of the matrix ``D`` when we don't need it. For example, it's much
  45     easier to compute the inertia of a matrix from the list of blocks
  46     than it is from the block-diagonal matrix itself, because given a
  47     block-diagonal matrix, you first have to figure out where the
  48     blocks are!
  49     """
  50     ring = A.base_ring().fraction_field()
  51     A = A.change_ring(ring)
  52     MS = A.matrix_space()
  53
  54     # The magic constant used by Bunch-Kaufman
  55     alpha = (1 + ZZ(17).sqrt()) * ~ZZ(8)
  56
  57     # Keep track of the permutations and diagonal blocks in a vector
  58     # rather than in a matrix, for efficiency.
  59     n = A.nrows()
  60     p = list(range(n))
  61     d = []
  62
  63     def swap_rows_columns(M, k, s):
  64         r"""
  65         Swap rows/columns ``k`` and ``s`` of the matrix ``M``, and update
  66         the list ``p`` accordingly.
  67         """
  68         if s > k:
  69             # s == k would swap row/column k with itself, and we don't
  70             # actually want to perform the identity permutation. If
  71             # you work out the recursive factorization by hand, you'll
  72             # notice that the rows/columns of "L" need to be permuted
  73             # as well. A nice side effect of storing "L" within "A"
  74             # itself is that we can skip that step. The first column
  75             # of "L" is hit by all of the transpositions in
  76             # succession, and the second column is hit by all but the
  77             # first transposition, and so on.
  78             M.swap_columns(k,s)
  79             M.swap_rows(k,s)
  80
  81             p_k = p[k]
  82             p[k] = p[s]
  83             p[s] = p_k
  84
  85         # No return value, we're only interested in the "side effects"
  86         # of modifing the matrix M (by reference) and the permutation
  87         # list p (which is in scope when this function is defined).
  88         return
  89
  90
  91     def pivot1x1(M, k, s):
  92         r"""
  93         Perform a 1x1 pivot swapping rows/columns `k` and `s >= k`.
  94         Relies on the fact that matrices are passed by reference,
  95         since for performance reasons this routine should overwrite
  96         its argument. Updates the local variables ``p`` and ``d`` as
  97         well.
  98         """
  99         swap_rows_columns(M,k,s)
 100
 101         # Now the pivot is in the (k,k)th position.
 102         d.append( matrix(ring, 1, [[A[k,k]]]) )
 103
 104         # Compute the Schur complement that we'll work on during
 105         # the following iteration, and store it back in the lower-
 106         # right-hand corner of "A".
 107         for i in range(n-k-1):
 108             for j in range(i+1):
 109                 A[k+1+i,k+1+j] = ( A[k+1+i,k+1+j] -
 110                                    A[k+1+i,k]*A[k,k+1+j]/A[k,k] )
 111                 A[k+1+j,k+1+i] = A[k+1+i,k+1+j].conjugate() # stay hermitian!
 112
 113         for i in range(n-k-1):
 114             # Store the new (kth) column of "L" within the lower-
 115             # left-hand corner of "A".
 116             A[k+i+1,k] /= A[k,k]
 117
 118         # No return value, only the desired side effects of updating
 119         # p, d, and A.
 120         return
 121
 122     k = 0
 123     while k < n:
 124         # At each step, we're considering the k-by-k submatrix
 125         # contained in the lower-right half of "A", because that's
 126         # where we're storing the next iterate. So our indices are
 127         # always "k" greater than those of Higham or B&K. Note that
 128         # ``n == 0`` is handled by skipping this loop entirely.
 129
 130         if k == (n-1):
 131             # Handle this trivial case manually, since otherwise the
 132             # algorithm's references to the e.g. "subdiagonal" are
 133             # meaningless. The corresponding entry of "L" will be
 134             # fixed later (since it's an on-diagonal element, it gets
 135             # set to one eventually).
 136             d.append( matrix(ring, 1, [[A[k,k]]]) )
 137             k += 1
 138             continue
 139
 140         # Find the largest subdiagonal entry (in magnitude) in the
 141         # kth column. This occurs prior to Step (1) in Higham,
 142         # but is part of Step (1) in Bunch and Kaufman. We adopt
 143         # Higham's "omega" notation instead of B&K's "lambda"
 144         # because "lambda" can lead to some confusion.
 145         column_1_subdiag = [ a_ki.abs() for a_ki in A[k+1:,k].list() ]
 146         omega_1 = max([ a_ki for a_ki in column_1_subdiag ])
 147
 148         if omega_1 == 0:
 149             # In this case, our matrix looks like
 150             #
 151             #   [ a 0 ]
 152             #   [ 0 B ]
 153             #
 154             # and we can simply skip to the next step after recording
 155             # the 1x1 pivot "a" in the top-left position. The entry "a"
 156             # will be adjusted to "1" later on to ensure that "L" is
 157             # (block) unit-lower-triangular.
 158             d.append( matrix(ring, 1, [[A[k,k]]]) )
 159             k += 1
 160             continue
 161
 162         if A[k,k].abs() > alpha*omega_1:
 163             # This is the first case in Higham's Step (1), and B&K's
 164             # Step (2). Note that we have skipped the part of B&K's
 165             # Step (1) where we determine "r", since "r" is not yet
 166             # needed and we may waste some time computing it
 167             # otherwise. We are performing a 1x1 pivot, but the
 168             # rows/columns are already where we want them, so nothing
 169             # needs to be permuted.
 170             pivot1x1(A,k,k)
 171             k += 1
 172             continue
 173
 174         # Now back to Step (1) of Higham, where we find the index "r"
 175         # that corresponds to omega_1. This is the "else" branch of
 176         # Higham's Step (1).
 177         r = k + 1 + column_1_subdiag.index(omega_1)
 178
 179         # Continuing the "else" branch of Higham's Step (1), and onto
 180         # B&K's Step (3) where we find the largest off-diagonal entry
 181         # (in magniture) in column "r". Since the matrix is Hermitian,
 182         # we need only look at the above-diagonal entries to find the
 183         # off-diagonal of maximal magnitude.
 184         omega_r = max( a_rj.abs() for a_rj in A[r,k:r].list() )
 185
 186         if A[k,k].abs()*omega_r >= alpha*(omega_1**2):
 187             # Step (2) in Higham or Step (4) in B&K.
 188             pivot1x1(A,k,k)
 189             k += 1
 190             continue
 191
 192         if A[r,r].abs() > alpha*omega_r:
 193             # This is Step (3) in Higham or Step (5) in B&K. Still a 1x1
 194             # pivot, but this time we need to swap rows/columns k and r.
 195             pivot1x1(A,k,r)
 196             k += 1
 197             continue
 198
 199         # If we've made it this far, we're at Step (4) in Higham or
 200         # Step (6) in B&K, where we perform a 2x2 pivot.
 201         swap_rows_columns(A,k+1,r)
 202
 203         # The top-left 2x2 submatrix (starting at position k,k) is now
 204         # our pivot.
 205         E = A[k:k+2,k:k+2]
 206         d.append(E)
 207
 208         C = A[k+2:n,k:k+2]
 209         B = A[k+2:,k+2:]
 210
 211         # We don't actually need the inverse of E, what we really need
 212         # is C*E.inverse(), and that can be found by setting
 213         #
 214         #   X = C*E.inverse()   <====>   XE = C.
 215         #
 216         # Then "X" can be found easily by solving a system.  Note: I
 217         # do not actually know that sage solves the system more
 218         # intelligently, but this is still The Right Thing To Do.
 219         CE_inverse = E.solve_left(C)
 220
 221         schur_complement = B - (CE_inverse*C.conjugate_transpose())
 222
 223         # Compute the Schur complement that we'll work on during
 224         # the following iteration, and store it back in the lower-
 225         # right-hand corner of "A".
 226         for i in range(n-k-2):
 227             for j in range(i+1):
 228                 A[k+2+i,k+2+j] = schur_complement[i,j]
 229                 A[k+2+j,k+2+i] = schur_complement[j,i]
 230
 231         # The on- and above-diagonal entries of "L" will be fixed
 232         # later, so we only need to worry about the lower-left entry
 233         # of the 2x2 identity matrix that belongs at the top of the
 234         # new column of "L".
 235         A[k+1,k] = 0
 236         for i in range(n-k-2):
 237             for j in range(2):
 238                 # Store the new (k and (k+1)st) columns of "L" within
 239                 # the lower-left-hand corner of "A".
 240                 A[k+i+2,k+j] = CE_inverse[i,j]
 241
 242
 243         k += 2
 244
 245     for i in range(n):
 246         # We skipped this during the main loop, but it's necessary for
 247         # correctness.
 248         A[i,i] = 1
 249
 250     return (p,A,d)
 251
 252 def block_ldlt(A):
 253     r"""
 254     Perform a block-`LDL^{T}` factorization of the Hermitian
 255     matrix `A`.
 256
 257     The standard `LDL^{T}` factorization of a positive-definite matrix
 258     `A` factors it as `A = LDL^{T}` where `L` is unit-lower-triangular
 259     and `D` is diagonal. If one allows row/column swaps via a
 260     permutation matrix `P`, then this factorization can be extended to
 261     some positive-semidefinite matrices `A` via the factorization
 262     `P^{T}AP = LDL^{T}` that places the zeros at the bottom of `D` to
 263     avoid division by zero. These factorizations extend easily to
 264     complex Hermitian matrices when one replaces the transpose by the
 265     conjugate-transpose.
 266
 267     However, we can go one step further. If, in addition, we allow `D`
 268     to potentially contain `2 \times 2` blocks on its diagonal, then
 269     every real or complex Hermitian matrix `A` can be factored as `A =
 270     PLDL^{*}P^{T}`. When the row/column swaps are made intelligently,
 271     this process is numerically stable over inexact rings like ``RDF``.
 272     Bunch and Kaufman describe such a "pivot" scheme that is suitable
 273     for the solution of Hermitian systems, and that is how we choose
 274     our row and column swaps.
 275
 276     OUTPUT:
 277
 278     If the input matrix is Hermitian, we return a triple `(P,L,D)`
 279     such that `A = PLDL^{*}P^{T}` and
 280
 281       * `P` is a permutation matrix,
 282       * `L` is unit lower-triangular,
 283       * `D` is a block-diagonal matrix whose blocks are of size
 284         one or two.
 285
 286     If the input matrix is not Hermitian, the output from this function
 287     is undefined.
 288
 289     SETUP::
 290
 291         sage: from mjo.ldlt import block_ldlt
 292
 293     EXAMPLES:
 294
 295     This three-by-three real symmetric matrix has one positive, one
 296     negative, and one zero eigenvalue -- so it is not any flavor of
 297     (semi)definite, yet we can still factor it::
 298
 299         sage: A =  matrix(QQ, [[0, 1, 0],
 300         ....:                  [1, 1, 2],
 301         ....:                  [0, 2, 0]])
 302         sage: P,L,D = block_ldlt(A)
 303         sage: P
 304         [0 0 1]
 305         [1 0 0]
 306         [0 1 0]
 307         sage: L
 308         [  1   0   0]
 309         [  2   1   0]
 310         [  1 1/2   1]
 311         sage: D
 312         [ 1| 0| 0]
 313         [--+--+--]
 314         [ 0|-4| 0]
 315         [--+--+--]
 316         [ 0| 0| 0]
 317         sage: P.transpose()*A*P == L*D*L.transpose()
 318         True
 319
 320     This two-by-two matrix has no standard factorization, but it
 321     constitutes its own block-factorization::
 322
 323         sage: A = matrix(QQ, [ [0,1],
 324         ....:                  [1,0] ])
 325         sage: block_ldlt(A)
 326         (
 327         [1 0]  [1 0]  [0 1]
 328         [0 1], [0 1], [1 0]
 329         )
 330
 331     The same is true of the following complex Hermitian matrix::
 332
 333         sage: A = matrix(QQbar, [ [ 0,I],
 334         ....:                     [-I,0] ])
 335         sage: block_ldlt(A)
 336         (
 337         [1 0]  [1 0]  [ 0  I]
 338         [0 1], [0 1], [-I  0]
 339         )
 340
 341     TESTS:
 342
 343     All three factors should be the identity when the original matrix is::
 344
 345         sage: set_random_seed()
 346         sage: n = ZZ.random_element(6)
 347         sage: I = matrix.identity(QQ,n)
 348         sage: P,L,D = block_ldlt(I)
 349         sage: P == I and L == I and D == I
 350         True
 351
 352     Ensure that a "random" real symmetric matrix is factored correctly::
 353
 354         sage: set_random_seed()
 355         sage: n = ZZ.random_element(6)
 356         sage: A = matrix.random(QQ, n)
 357         sage: A = A + A.transpose()
 358         sage: P,L,D = block_ldlt(A)
 359         sage: A == P*L*D*L.transpose()*P.transpose()
 360         True
 361
 362     Ensure that a "random" complex Hermitian matrix is factored correctly::
 363
 364         sage: set_random_seed()
 365         sage: n = ZZ.random_element(6)
 366         sage: F = NumberField(x^2 +1, 'I')
 367         sage: A = matrix.random(F, n)
 368         sage: A = A + A.conjugate_transpose()
 369         sage: P,L,D = block_ldlt(A)
 370         sage: A == P*L*D*L.conjugate_transpose()*P.conjugate_transpose()
 371         True
 372
 373     Ensure that a "random" complex positive-semidefinite matrix is
 374     factored correctly and that the resulting block-diagonal matrix is
 375     in fact diagonal::
 376
 377         sage: set_random_seed()
 378         sage: n = ZZ.random_element(6)
 379         sage: F = NumberField(x^2 +1, 'I')
 380         sage: A = matrix.random(F, n)
 381         sage: A = A*A.conjugate_transpose()
 382         sage: P,L,D = block_ldlt(A)
 383         sage: A == P*L*D*L.conjugate_transpose()*P.conjugate_transpose()
 384         True
 385         sage: diagonal_matrix(D.diagonal()) == D
 386         True
 387
 388     The factorization should be a no-op on diagonal matrices::
 389
 390         sage: set_random_seed()
 391         sage: n = ZZ.random_element(6)
 392         sage: A = matrix.diagonal(random_vector(QQ, n))
 393         sage: I = matrix.identity(QQ,n)
 394         sage: P,L,D = block_ldlt(A)
 395         sage: P == I and L == I and A == D
 396         True
 397
 398     """
 399
 400     # We have to make at least one copy of the input matrix so that we
 401     # can change the base ring to its fraction field. Both "L" and the
 402     # intermediate Schur complements will potentially have entries in
 403     # the fraction field. However, we don't need to make *two* copies.
 404     # We can't store the entries of "D" and "L" in the same matrix if
 405     # "D" will contain any 2x2 blocks; but we can still store the
 406     # entries of "L" in the copy of "A" that we're going to make.
 407     # Contrast this with the non-block LDL^T factorization where the
 408     # entries of both "L" and "D" overwrite the lower-left half of "A".
 409     #
 410     # This grants us an additional speedup, since we don't have to
 411     # permute the rows/columns of "L" *and* "A" at each iteration.
 412     p,L,d = _block_ldlt(A)
 413     MS = L.matrix_space()
 414     P = MS.matrix(lambda i,j: p[j] == i)
 415
 416     # Warning: when n == 0, this works, but returns a matrix
 417     # whose (nonexistent) entries are in ZZ rather than in
 418     # the base ring of P and L.
 419     D = block_diagonal_matrix(d)
 420
 421     # Overwrite the (strict) upper-triangular part of "L", since a
 422     # priori it contains the same entries as "A" did after _block_ldlt().
 423     n = L.nrows()
 424     for i in range(n):
 425         for j in range(i+1,n):
 426             L[i,j] = 0
 427
 428     return (P,L,D)