/* * Solve a system of linear equations A*X = B with general M-by-N * matrix A using the QR factorization computed by DecomposeQR(). * * If flags&TRANS != 0: * find the minimum norm solution of an overdetermined system A.T * X = B. * i.e min ||X|| s.t A.T*X = B * * Otherwise: * find the least squares solution of an overdetermined system, i.e., * solve the least squares problem: min || B - A*X ||. * * Arguments: * B On entry, the right hand side N-by-P matrix B. On exit, the solution matrix X. * * A The elements on and above the diagonal contain the min(M,N)-by-N upper * trapezoidal matrix R. The elements below the diagonal with the vector 'tau', * represent the ortogonal matrix Q as product of elementary reflectors. * Matrix A and T are as returned by DecomposeQR() * * tau The vector of N scalar coefficients that together with trilu(A) define * the ortogonal matrix Q as Q = H(1)H(2)...H(N) * * W Workspace, P-by-nb matrix used for work space in blocked invocations. * * flags Indicator flag * * nb The block size used in blocked invocations. If nb is zero or P < nb * unblocked algorithm is used. * * Compatible with lapack.GELS (the m >= n part) */ func SolveQR(B, A, tau, W *matrix.FloatMatrix, flags Flags, nb int) error { var err error = nil var R, BT matrix.FloatMatrix if flags&TRANS != 0 { // Solve overdetermined system A.T*X = B // B' = R.-1*B A.SubMatrix(&R, 0, 0, A.Cols(), A.Cols()) B.SubMatrix(&BT, 0, 0, A.Cols(), B.Cols()) err = SolveTrm(&BT, &R, 1.0, LEFT|UPPER|TRANSA) // Clear bottom part of B B.SubMatrixOf(&BT, A.Cols(), 0) BT.SetIndexes(0.0) // X = Q*B' err = MultQ(B, A, tau, W, LEFT, nb) } else { // solve least square problem min ||A*X - B|| // B' = Q.T*B err = MultQ(B, A, tau, W, LEFT|TRANS, nb) if err != nil { return err } // X = R.-1*B' A.SubMatrix(&R, 0, 0, A.Cols(), A.Cols()) B.SubMatrix(&BT, 0, 0, A.Cols(), B.Cols()) err = SolveTrm(&BT, &R, 1.0, LEFT|UPPER) } return err }
func mNormInf(A *matrix.FloatMatrix) float64 { var amax float64 = 0.0 var row matrix.FloatMatrix for k := 0; k < A.Rows(); k++ { row.SubMatrixOf(A, k, 0, A.Cols(), 1) rmax := ASum(&row) if rmax > amax { amax = rmax } } return amax }
func mNorm1(A *matrix.FloatMatrix) float64 { var amax float64 = 0.0 var col matrix.FloatMatrix for k := 0; k < A.Cols(); k++ { col.SubMatrixOf(A, 0, k, A.Rows(), 1) cmax := ASum(&col) if cmax > amax { amax = cmax } } return amax }
// Make A tridiagonal, lower, non-unit matrix by clearing the strictly upper part // of the matrix. func TriL(A *matrix.FloatMatrix) *matrix.FloatMatrix { var Ac matrix.FloatMatrix mlen := imin(A.Rows(), A.Cols()) for k := 1; k < mlen; k++ { Ac.SubMatrixOf(A, 0, k, k, 1) Ac.SetIndexes(0.0) } if A.Cols() > A.Rows() { Ac.SubMatrixOf(A, 0, A.Rows()) Ac.SetIndexes(0.0) } return A }
func _TestViewUpdate(t *testing.T) { Adata2 := [][]float64{ []float64{4.0, 2.0, 2.0}, []float64{6.0, 4.0, 2.0}, []float64{4.0, 6.0, 1.0}, } A := matrix.FloatMatrixFromTable(Adata2, matrix.RowOrder) N := A.Rows() // simple LU decomposition without pivoting var A11, a10, a01, a00 matrix.FloatMatrix for k := 1; k < N; k++ { a00.SubMatrixOf(A, k-1, k-1, 1, 1) a01.SubMatrixOf(A, k-1, k, 1, A.Cols()-k) a10.SubMatrixOf(A, k, k-1, A.Rows()-k, 1) A11.SubMatrixOf(A, k, k) //t.Logf("A11: %v a01: %v\n", A11, a01) a10.Scale(1.0 / a00.Float()) MVRankUpdate(&A11, &a10, &a01, -1.0) } Ld := TriLU(A.Copy()) Ud := TriU(A) t.Logf("Ld:\n%v\nUd:\n%v\n", Ld, Ud) An := matrix.FloatZeros(N, N) Mult(An, Ld, Ud, 1.0, 1.0, NOTRANS) t.Logf("A == Ld*Ud: %v\n", An.AllClose(An)) }
// Make A tridiagonal, upper, non-unit matrix by clearing the strictly lower part // of the matrix. func TriU(A *matrix.FloatMatrix) *matrix.FloatMatrix { var Ac matrix.FloatMatrix var k int mlen := imin(A.Rows(), A.Cols()) for k = 0; k < mlen; k++ { Ac.SubMatrixOf(A, k+1, k, A.Rows()-k-1, 1) Ac.SetIndexes(0.0) } if A.Cols() < A.Rows() { Ac.SubMatrixOf(A, A.Cols(), 0) Ac.SetIndexes(0.0) } return A }
/* * Generate an M-by-N real matrix Q with ortonormal columns, which is * defined as the product of k elementary reflectors and block reflector T * * Q = H(1) H(2) . . . H(k) * * generated using the compact WY representaion as returned by DecomposeQRT(). * * Arguments: * A On entry, QR factorization as returned by DecomposeQRT() where the lower * trapezoidal part holds the elementary reflectors. On exit, the M-by-N * matrix Q. * * T The block reflector computed from elementary reflectors as returned by * DecomposeQRT() or computed from elementary reflectors and scalar coefficients * by BuildT() * * W Workspace, size A.Cols()-by-nb. * * nb Blocksize for blocked invocations. If nb == 0 default value T.Cols() * is used. * * Compatible with lapack.DORGQR */ func BuildQT(A, T, W *matrix.FloatMatrix, nb int) (*matrix.FloatMatrix, error) { var err error = nil if nb == 0 { nb = A.Cols() } // default is from LEFT if nb != 0 && (W.Cols() < nb || W.Rows() < A.Cols()) { return nil, errors.New("workspace too small") } var Wrk matrix.FloatMatrix Wrk.SubMatrixOf(W, 0, 0, A.Cols(), nb) err = blockedBuildQT(A, T, &Wrk, nb) return A, err }
/* * Compute QR factorization of a M-by-N matrix A using compact WY transformation: A = Q * R, * where Q = I - Y*T*Y.T, T is block reflector and Y holds elementary reflectors as lower * trapezoidal matrix saved below diagonal elements of the matrix A. * * Arguments: * A On entry, the M-by-N matrix A. On exit, the elements on and above * the diagonal contain the min(M,N)-by-N upper trapezoidal matrix R. * The elements below the diagonal with the matrix 'T', represent * the ortogonal matrix Q as product of elementary reflectors. * * T On exit, the block reflector which, together with trilu(A) represent * the ortogonal matrix Q as Q = I - Y*T*Y.T where Y = trilu(A). * * W Workspace, N-by-nb matrix used for work space in blocked invocations. * * nb The block size used in blocked invocations. If nb is zero on N <= nb * unblocked algorithm is used. * * Returns: * Decomposed matrix A and error indicator. * * DecomposeQRT is compatible with lapack.DGEQRT */ func DecomposeQRT(A, T, W *matrix.FloatMatrix, nb int) (*matrix.FloatMatrix, error) { var err error = nil if nb == 0 || A.Cols() <= nb { unblockedQRT(A, T) } else { if W == nil { W = matrix.FloatZeros(A.Cols(), nb) } else if W.Cols() < nb || W.Rows() < A.Cols() { return nil, errors.New("work space too small") } var Wrk matrix.FloatMatrix Wrk.SubMatrixOf(W, 0, 0, A.Cols(), nb) blockedQRT(A, T, &Wrk, nb) } return A, err }
// Make A tridiagonal, upper, unit matrix by clearing the strictly lower part // of the matrix and setting diagonal elements to one. func TriUU(A *matrix.FloatMatrix) *matrix.FloatMatrix { var Ac matrix.FloatMatrix var k int mlen := imin(A.Rows(), A.Cols()) for k = 0; k < mlen; k++ { Ac.SubMatrixOf(A, k+1, k, A.Rows()-k-1, 1) Ac.SetIndexes(0.0) A.SetAt(k, k, 1.0) } // last element on diagonal A.SetAt(k, k, 1.0) if A.Cols() < A.Rows() { Ac.SubMatrixOf(A, A.Cols(), 0) Ac.SetIndexes(0.0) } return A }
/* * Multiply and replace C with Q*C or Q.T*C where Q is a real orthogonal matrix * defined as the product of k elementary reflectors. * * Q = H(1) H(2) . . . H(k) * * as returned by DecomposeQR(). * * Arguments: * C On entry, the M-by-N matrix C. On exit C is overwritten by Q*C or Q.T*C. * * A QR factorization as returne by DecomposeQR() where the lower trapezoidal * part holds the elementary reflectors. * * tau The scalar factors of the elementary reflectors. * * W Workspace, used for blocked invocations. Size C.Cols()-by-nb. * * nb Blocksize for blocked invocations. If C.Cols() <= nb unblocked algorithm * is used. * * flags Indicators. Valid indicators LEFT, RIGHT, TRANS, NOTRANS * * Compatible with lapack.DORMQR */ func MultQ(C, A, tau, W *matrix.FloatMatrix, flags Flags, nb int) error { var err error = nil if nb != 0 && W == nil { return errors.New("workspace not defined") } if flags&RIGHT != 0 { // from right; C*A or C*A.T if C.Cols() != A.Rows() { return errors.New("C*Q: C.Cols != A.Rows") } if nb != 0 && (W.Cols() < nb || W.Rows() < C.Rows()) { return errors.New("workspace too small") } } else { // default is from LEFT; A*C or A.T*C /* if C.Rows() != A.Rows() { return errors.New("Q*C: C.Rows != A.Rows") } */ if nb != 0 && (W.Cols() < nb || W.Rows() < C.Cols()) { return errors.New("workspace too small") } } if nb == 0 { if flags&RIGHT != 0 { w := matrix.FloatZeros(C.Rows(), 1) unblockedMultQRight(C, A, tau, w, flags) } else { w := matrix.FloatZeros(1, C.Cols()) unblockedMultQLeft(C, A, tau, w, flags) } } else { var Wrk matrix.FloatMatrix if flags&RIGHT != 0 { Wrk.SubMatrixOf(W, 0, 0, C.Rows(), nb) blockedMultQRight(C, A, tau, &Wrk, nb, flags) } else { Wrk.SubMatrixOf(W, 0, 0, C.Cols(), nb) blockedMultQLeft(C, A, tau, &Wrk, nb, flags) } } return err }
/* * Generate an M-by-N real matrix Q with ortonormal columns, which is * defined as the product of k elementary reflectors and block reflector T * * Q = H(1) H(2) . . . H(k) * * as returned by DecomposeQRT(). * * Arguments: * A On entry, QR factorization as returned by DecomposeQRT() where the lower * trapezoidal part holds the elementary reflectors. On exit, the M-by-N * matrix Q. * * tau The scalar factors of elementary reflectors as returned by DecomposeQR() * * W Workspace, size A.Cols()-by-nb. * * nb Blocksize for blocked invocations. If nb == 0 unblocked algorith is used * * Compatible with lapack.DORGQR */ func BuildQ(A, tau, W *matrix.FloatMatrix, nb int) (*matrix.FloatMatrix, error) { var err error = nil if nb != 0 && W == nil { return nil, errors.New("workspace not defined") } // default is from LEFT if nb != 0 && (W.Cols() < nb || W.Rows() < A.Cols()) { return nil, errors.New("workspace too small") } if nb == 0 { w := matrix.FloatZeros(1, A.Cols()) err = unblockedBuildQ(A, tau, w, 0) } else { var Wrk matrix.FloatMatrix Wrk.SubMatrixOf(W, 0, 0, A.Cols(), nb) err = blockedBuildQ(A, tau, &Wrk, nb) } return A, err }
/* * Multiply and replace C with Q*C or Q.T*C where Q is a real orthogonal matrix * defined as the product of k elementary reflectors and block reflector T * * Q = H(1) H(2) . . . H(k) * * as returned by DecomposeQRT(). * * Arguments: * C On entry, the M-by-N matrix C. On exit C is overwritten by Q*C or Q.T*C. * * A QR factorization as returned by DecomposeQRT() where the lower trapezoidal * part holds the elementary reflectors. * * T The block reflector computed from elementary reflectors as returned by * DecomposeQRT() or computed from elementary reflectors and scalar coefficients * by BuildT() * * W Workspace, size C.Cols()-by-nb or C.Rows()-by-nb * * nb Blocksize for blocked invocations. If nb == 0 default value T.Cols() * is used. * * flags Indicators. Valid indicators LEFT, RIGHT, TRANS, NOTRANS * * Compatible with lapack.DGEMQRT */ func MultQT(C, A, T, W *matrix.FloatMatrix, flags Flags, nb int) error { var err error = nil if nb == 0 { nb = T.Cols() } if W == nil { return errors.New("workspace not defined") } if flags&RIGHT != 0 { // from right; C*A or C*A.T if C.Cols() != A.Rows() { return errors.New("C*Q: C.Cols != A.Rows") } if W.Cols() < nb || W.Rows() < C.Rows() { return errors.New("workspace too small") } } else { // default is from LEFT; A*C or A.T*C /* if C.Rows() != A.Rows() { return errors.New("Q*C: C.Rows != A.Rows") } */ if W.Cols() < nb || W.Rows() < C.Cols() { return errors.New("workspace too small") } } var Wrk matrix.FloatMatrix if flags&RIGHT != 0 { Wrk.SubMatrixOf(W, 0, 0, C.Rows(), nb) blockedMultQTRight(C, A, T, &Wrk, nb, flags) } else { Wrk.SubMatrixOf(W, 0, 0, C.Cols(), nb) blockedMultQTLeft(C, A, T, &Wrk, nb, flags) } return err }
func _TestPartition2D(t *testing.T) { var ATL, ATR, ABL, ABR, As matrix.FloatMatrix var A00, a01, A02, a10, a11, a12, A20, a21, A22 matrix.FloatMatrix A := matrix.FloatZeros(6, 6) As.SubMatrixOf(A, 1, 1, 4, 4) As.SetIndexes(1.0) partition2x2(&ATL, &ATR, &ABL, &ABR, &As, 0) t.Logf("ATL:\n%v\n", &ATL) for ATL.Rows() < As.Rows() { repartition2x2to3x3(&ATL, &A00, &a01, &A02, &a10, &a11, &a12, &A20, &a21, &A22, &As, 1) t.Logf("m(a12)=%d [%d], m(a11)=%d\n", a12.Cols(), a12.NumElements(), a11.NumElements()) a11.Add(1.0) a21.Add(-2.0) continue3x3to2x2(&ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, &As) } t.Logf("A:\n%v\n", A) }
/* * Blocked version for computing C = C*Q and C = C*Q.T from elementary reflectors * and scalar coefficients. * * Elementary reflectors and scalar coefficients are used to build block reflector T. * Matrix C is updated by applying block reflector T using compact WY algorithm. */ func blockedMultQRight(C, A, tau, W *matrix.FloatMatrix, nb int, flags Flags) { var ATL, ATR, ABL, ABR, AL matrix.FloatMatrix var A00, A10, A11, A20, A21, A22 matrix.FloatMatrix var CL, CR, C0, C1, C2 matrix.FloatMatrix var tT, tB matrix.FloatMatrix var t0, tau1, t2 matrix.FloatMatrix var Wrk, Tw matrix.FloatMatrix var Aref *matrix.FloatMatrix var pAdir, pAstart, pDir, pStart, pCstart, pCdir pDirection var bsz, cb, mb int // partitioning start and direction if flags&TRANS != 0 { // from bottom-right to top-left to produce transpose sequence (C*Q.T) pAstart = pBOTTOMRIGHT pAdir = pTOPLEFT pStart = pBOTTOM pDir = pTOP pCstart = pRIGHT pCdir = pLEFT mb = A.Rows() - A.Cols() cb = C.Cols() - A.Cols() Aref = &ATL } else { // from top-left to bottom-right to produce normal sequence (C*Q) pAstart = pTOPLEFT pAdir = pBOTTOMRIGHT pStart = pTOP pDir = pBOTTOM pCstart = pLEFT pCdir = pRIGHT mb = 0 cb = 0 Aref = &ABR } Twork := matrix.FloatZeros(nb, nb) partition2x2( &ATL, &ATR, &ABL, &ABR, A, mb, 0, pAstart) partition1x2( &CL, &CR, C, cb, pCstart) partition2x1( &tT, &tB, tau, 0, pStart) transpose := flags&TRANS != 0 for Aref.Rows() > 0 && Aref.Cols() > 0 { repartition2x2to3x3(&ATL, &A00, nil, nil, &A10, &A11, nil, &A20, &A21, &A22, A, nb, pAdir) repartition2x1to3x1(&tT, &t0, &tau1, &t2, tau, nb, pDir) bsz = A11.Cols() // C1 block size must match A11 repartition1x2to1x3(&CL, &C0, &C1, &C2, C, bsz, pCdir) // -------------------------------------------------------- // build block reflector from current block merge2x1(&AL, &A11, &A21) Tw.SubMatrixOf(Twork, 0, 0, bsz, bsz) unblkQRBlockReflector(&Tw, &AL, &tau1) // compute: C*Q.T == C - C*(Y*T*Y.T).T = C - C*Y*T.T*Y.T // C*Q == C - C*Y*T*Y.T Wrk.SubMatrixOf(W, 0, 0, C1.Rows(), bsz) updateWithQTRight(&C1, &C2, &A11, &A21, &Tw, &Wrk, nb, transpose) // -------------------------------------------------------- continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, pAdir) continue1x3to1x2( &CL, &CR, &C0, &C1, C, pCdir) continue3x1to2x1( &tT, &tB, &t0, &tau1, tau, pDir) } }
// unblocked LU decomposition with pivots: FLAME LU variant 3 func unblockedLUpiv(A *matrix.FloatMatrix, p *pPivots) error { var err error var ATL, ATR, ABL, ABR matrix.FloatMatrix var A00, a01, A02, a10, a11, a12, A20, a21, A22 matrix.FloatMatrix var AL, AR, A0, a1, A2, aB1, AB0 matrix.FloatMatrix var pT, pB, p0, p1, p2 pPivots err = nil partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, pTOPLEFT) partition1x2( &AL, &AR, A, 0, pLEFT) partitionPivot2x1( &pT, &pB, p, 0, pTOP) for ATL.Rows() < A.Rows() && ATL.Cols() < A.Cols() { repartition2x2to3x3(&ATL, &A00, &a01, &A02, &a10, &a11, &a12, &A20, &a21, &A22 /**/, A, 1, pBOTTOMRIGHT) repartition1x2to1x3(&AL, &A0, &a1, &A2 /**/, A, 1, pRIGHT) repartPivot2x1to3x1(&pT, &p0, &p1, &p2 /**/, p, 1, pBOTTOM) // apply previously computed pivots applyPivots(&a1, &p0) // a01 = trilu(A00) \ a01 (TRSV) MVSolveTrm(&a01, &A00, 1.0, LOWER|UNIT) // a11 = a11 - a10 *a01 a11.Add(Dot(&a10, &a01, -1.0)) // a21 = a21 -A20*a01 MVMult(&a21, &A20, &a01, -1.0, 1.0, NOTRANS) // pivot index on current column [a11, a21].T aB1.SubMatrixOf(&ABR, 0, 0, ABR.Rows(), 1) pivotIndex(&aB1, &p1) // pivots to current column applyPivots(&aB1, &p1) // a21 = a21 / a11 InvScale(&a21, a11.Float()) // apply pivots to previous columns AB0.SubMatrixOf(&ABL, 0, 0) applyPivots(&AB0, &p1) // scale last pivots to origin matrix row numbers p1.pivots[0] += ATL.Rows() continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, pBOTTOMRIGHT) continue1x3to1x2( &AL, &AR, &A0, &a1, A, pRIGHT) contPivot3x1to2x1( &pT, &pB, &p0, &p1, p, pBOTTOM) } if ATL.Cols() < A.Cols() { applyPivots(&ATR, p) SolveTrm(&ATR, &ATL, 1.0, LEFT|UNIT|LOWER) } return err }
/* * Blocked version for computing C = Q*C and C = Q.T*C from elementary reflectors * and scalar coefficients. * * Elementary reflectors and scalar coefficients are used to build block reflector T. * Matrix C is updated by applying block reflector T using compact WY algorithm. */ func blockedMultQLeft(C, A, tau, W *matrix.FloatMatrix, nb int, flags Flags) { var ATL, ATR, ABL, ABR, AL matrix.FloatMatrix var A00, A10, A11, A20, A21, A22 matrix.FloatMatrix var CT, CB, C0, C1, C2 matrix.FloatMatrix var tT, tB matrix.FloatMatrix var t0, tau1, t2 matrix.FloatMatrix var Wrk, Tw matrix.FloatMatrix var Aref *matrix.FloatMatrix var pAdir, pAstart, pDir, pStart pDirection var bsz, mb int // partitioning start and direction if flags&TRANS != 0 || nb == A.Cols() { // from top-left to bottom-right to produce transposed sequence (Q.T*C) pAstart = pTOPLEFT pAdir = pBOTTOMRIGHT pStart = pTOP pDir = pBOTTOM mb = 0 Aref = &ABR } else { // from bottom-right to top-left to produce normal sequence (Q*C) pAstart = pBOTTOMRIGHT pAdir = pTOPLEFT pStart = pBOTTOM pDir = pTOP mb = A.Rows() - A.Cols() Aref = &ATL } Twork := matrix.FloatZeros(nb, nb) partition2x2( &ATL, &ATR, &ABL, &ABR, A, mb, 0, pAstart) partition2x1( &CT, &CB, C, mb, pStart) partition2x1( &tT, &tB, tau, 0, pStart) transpose := flags&TRANS != 0 for Aref.Rows() > 0 && Aref.Cols() > 0 { repartition2x2to3x3(&ATL, &A00, nil, nil, &A10, &A11, nil, &A20, &A21, &A22, A, nb, pAdir) repartition2x1to3x1(&tT, &t0, &tau1, &t2, tau, nb, pDir) bsz = A11.Cols() repartition2x1to3x1(&CT, &C0, &C1, &C2, C, bsz, pDir) // -------------------------------------------------------- // build block reflector from current block merge2x1(&AL, &A11, &A21) Tw.SubMatrixOf(Twork, 0, 0, bsz, bsz) unblkQRBlockReflector(&Tw, &AL, &tau1) // compute: Q*T.C == C - Y*(C.T*Y*T).T transpose == true // Q*C == C - C*Y*T*Y.T transpose == false Wrk.SubMatrixOf(W, 0, 0, C1.Cols(), bsz) updateWithQT(&C1, &C2, &A11, &A21, &Tw, &Wrk, nb, transpose) // -------------------------------------------------------- continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, pAdir) continue3x1to2x1( &CT, &CB, &C0, &C1, C, pDir) continue3x1to2x1( &tT, &tB, &t0, &tau1, tau, pDir) } }
// blocked LU decomposition with pivots: FLAME LU variant 3 func blockedLUpiv(A *matrix.FloatMatrix, p *pPivots, nb int) error { var err error var ATL, ATR, ABL, ABR matrix.FloatMatrix var A00, A01, A02, A10, A11, A12, A20, A21, A22 matrix.FloatMatrix var AL, AR, A0, A1, A2, AB1, AB0 matrix.FloatMatrix var pT, pB, p0, p1, p2 pPivots err = nil partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, pTOPLEFT) partition1x2( &AL, &AR, A, 0, pLEFT) partitionPivot2x1( &pT, &pB, p, 0, pTOP) for ATL.Rows() < A.Rows() && ATL.Cols() < A.Cols() { repartition2x2to3x3(&ATL, &A00, &A01, &A02, &A10, &A11, &A12, &A20, &A21, &A22, A, nb, pBOTTOMRIGHT) repartition1x2to1x3(&AL, &A0, &A1, &A2 /**/, A, nb, pRIGHT) repartPivot2x1to3x1(&pT, &p0, &p1, &p2 /**/, p, nb, pBOTTOM) // apply previously computed pivots applyPivots(&A1, &p0) // a01 = trilu(A00) \ a01 (TRSV) SolveTrm(&A01, &A00, 1.0, LOWER|UNIT) // A11 = A11 - A10*A01 Mult(&A11, &A10, &A01, -1.0, 1.0, NOTRANS) // A21 = A21 - A20*A01 Mult(&A21, &A20, &A01, -1.0, 1.0, NOTRANS) // LU_piv(AB1, p1) AB1.SubMatrixOf(&ABR, 0, 0, ABR.Rows(), A11.Cols()) unblockedLUpiv(&AB1, &p1) // apply pivots to previous columns AB0.SubMatrixOf(&ABL, 0, 0) applyPivots(&AB0, &p1) // scale last pivots to origin matrix row numbers for k, _ := range p1.pivots { p1.pivots[k] += ATL.Rows() } continue3x3to2x2( &ATL, &ATR, &ABL, &ABR /**/, &A00, &A11, &A22, A, pBOTTOMRIGHT) continue1x3to1x2( &AL, &AR /**/, &A0, &A1, A, pRIGHT) contPivot3x1to2x1( &pT, &pB /**/, &p0, &p1, p, pBOTTOM) } if ATL.Cols() < A.Cols() { applyPivots(&ATR, p) SolveTrm(&ATR, &ATL, 1.0, LEFT|UNIT|LOWER) } return err }
/* * Blocked version for computing C = C*Q and C = C*Q.T with block reflector. * */ func blockedMultQTRight(C, A, T, W *matrix.FloatMatrix, nb int, flags Flags) { var ATL, ATR, ABL, ABR matrix.FloatMatrix var A00, A10, A11, A20, A21, A22 matrix.FloatMatrix var CL, CR, C0, C1, C2 matrix.FloatMatrix var TTL, TTR, TBL, TBR matrix.FloatMatrix var T00, T01, T02, T11, T12, T22 matrix.FloatMatrix var Aref *matrix.FloatMatrix var pAdir, pAstart, pCstart, pCdir pDirection var bsz, cb, mb int // partitioning start and direction if flags&TRANS != 0 { // from bottom-right to top-left to produce transpose sequence (C*Q.T) pAstart = pBOTTOMRIGHT pAdir = pTOPLEFT pCstart = pRIGHT pCdir = pLEFT mb = A.Rows() - A.Cols() cb = C.Cols() - A.Cols() Aref = &ATL } else { // from top-left to bottom-right to produce normal sequence (C*Q) pAstart = pTOPLEFT pAdir = pBOTTOMRIGHT pCstart = pLEFT pCdir = pRIGHT mb = 0 cb = 0 Aref = &ABR } partition2x2( &ATL, &ATR, &ABL, &ABR, A, mb, 0, pAstart) partition2x2( &TTL, &TTR, &TBL, &TBR, T, 0, 0, pAstart) partition1x2( &CL, &CR, C, cb, pCstart) transpose := flags&TRANS != 0 for Aref.Rows() > 0 && Aref.Cols() > 0 { repartition2x2to3x3(&ATL, &A00, nil, nil, &A10, &A11, nil, &A20, &A21, &A22, A, nb, pAdir) repartition2x2to3x3(&TTL, &T00, &T01, &T02, nil, &T11, &T12, nil, nil, &T22, T, nb, pAdir) bsz = A11.Cols() repartition1x2to1x3(&CL, &C0, &C1, &C2, C, bsz, pCdir) // -------------------------------------------------------- // compute: C*Q.T == C - C*Y*T.T*Y.T transpose == true // C*Q == C - C*Y*T*Y.T transpose == false var Wrk matrix.FloatMatrix Wrk.SubMatrixOf(W, 0, 0, C1.Rows(), bsz) updateWithQTRight(&C1, &C2, &A11, &A21, &T11, &Wrk, nb, transpose) // -------------------------------------------------------- continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, pAdir) continue3x3to2x2( &TTL, &TTR, &TBL, &TBR, &T00, &T11, &T22, T, pAdir) continue1x3to1x2( &CL, &CR, &C0, &C1, C, pCdir) } }
func TestMultMVTransASmall(t *testing.T) { data6 := [][]float64{ []float64{-1.59e+00, 6.56e-02, 2.14e-01, 6.79e-01, 2.93e-01, 5.24e-01}, []float64{4.28e-01, 1.57e-01, 3.81e-01, 2.19e-01, 2.97e-01, 2.83e-02}, []float64{3.02e-01, 9.70e-02, 3.18e-01, 2.03e-01, 7.53e-01, 1.58e-01}, []float64{1.99e-01, 3.01e-01, 4.69e-01, 3.61e-01, 2.07e-01, 6.07e-01}, []float64{1.93e-01, 5.15e-01, 2.83e-01, 5.71e-01, 8.65e-01, 9.75e-01}, []float64{3.13e-01, 8.14e-01, 2.93e-01, 8.62e-01, 6.97e-01, 7.95e-02}} data5 := [][]float64{ []float64{1.57e-01, 3.81e-01, 2.19e-01, 2.97e-01, 2.83e-02}, []float64{9.70e-02, 3.18e-01, 2.03e-01, 7.53e-01, 1.58e-01}, []float64{3.01e-01, 4.69e-01, 3.61e-01, 2.07e-01, 6.07e-01}, []float64{5.15e-01, 2.83e-01, 5.71e-01, 8.65e-01, 9.75e-01}, []float64{8.14e-01, 2.93e-01, 8.62e-01, 6.97e-01, 7.95e-02}} data2 := []float64{4.28e-01, 3.02e-01, 1.99e-01, 1.93e-01, 3.13e-01} bM := 5 bN := 4 nb := 2 //A := matrix.FloatNormal(bN, bM) //X := matrix.FloatWithValue(bN, 1, 1.0) A := matrix.FloatMatrixFromTable(data5, matrix.RowOrder) X := matrix.FloatNew(5, 1, data2) bM = A.Rows() bN = A.Cols() Ym := matrix.FloatZeros(3, bM) Y1 := matrix.FloatZeros(bM, 1) Y0 := matrix.FloatZeros(bM, 1) Ar := A.FloatArray() Xr := X.FloatArray() Y1r := Y1.FloatArray() blas.GemvFloat(A, X, Y0, 1.0, 1.0, linalg.OptTrans) DMultMV(Y1r, Ar, Xr, 1.0, 1.0, TRANSA, 1, A.LeadingIndex(), 1, 0, bN, 0, bM, nb, nb) ok := Y0.AllClose(Y1) t.Logf("Y0 == Y1: %v\n", ok) if ok || !ok { t.Logf("blas: Y=A.T*X\n%v\n", Y0) t.Logf("Y1: Y1 = A*X\n%v\n", Y1) } // zero Y0, Y1 Y0.Scale(0.0) Y1.Scale(0.0) // test with matrix view; A is view var A0 matrix.FloatMatrix A6 := matrix.FloatMatrixFromTable(data6, matrix.RowOrder) A0.SubMatrixOf(A6, 1, 1) blas.GemvFloat(&A0, X, Y0, 1.0, 1.0, linalg.OptTrans) Ar = A0.FloatArray() DMultMV(Y1r, Ar, Xr, 1.0, 1.0, TRANSA, 1, A0.LeadingIndex(), 1, 0, bN, 0, bM, nb, nb) ok = Y0.AllClose(Y1) t.Logf("lda>rows: Y0 == Y1: %v\n", ok) if ok || !ok { t.Logf("blas: Y=A.T*X\n%v\n", Y0) t.Logf("Y1: Y1 = A*X\n%v\n", Y1) } // Y is view too. Y1.SubMatrixOf(Ym, 0, 0, 1, bM) Y1r = Y1.FloatArray() DMultMV(Y1r, Ar, Xr, 1.0, 1.0, TRANSA, Y1.LeadingIndex(), A0.LeadingIndex(), 1, 0, bN, 0, bM, nb, nb) ok = Y0.AllClose(Y1.Transpose()) t.Logf("Y0 == Y1 row: %v\n", ok) t.Logf("row Y1: %v\n", Y1) }
/* * Blocked version for computing C = Q*C and C = Q.T*C with block reflector. * */ func blockedMultQTLeft(C, A, T, W *matrix.FloatMatrix, nb int, flags Flags) { var ATL, ATR, ABL, ABR matrix.FloatMatrix var A00, A10, A11, A20, A21, A22 matrix.FloatMatrix var CT, CB, C0, C1, C2 matrix.FloatMatrix var TTL, TTR, TBL, TBR matrix.FloatMatrix var T00, T01, T02, T11, T12, T22 matrix.FloatMatrix var Aref *matrix.FloatMatrix var pAdir, pAstart, pCdir, pCstart pDirection var bsz, mb int // partitioning start and direction if flags&TRANS != 0 { // from top-left to bottom-right to produce transposed sequence (Q.T*C) pAstart = pTOPLEFT pAdir = pBOTTOMRIGHT pCstart = pTOP pCdir = pBOTTOM mb = 0 Aref = &ABR } else { // from bottom-right to top-left to produce normal sequence (Q*C) pAstart = pBOTTOMRIGHT pAdir = pTOPLEFT pCstart = pBOTTOM pCdir = pTOP mb = A.Rows() - A.Cols() Aref = &ATL } partition2x2( &ATL, &ATR, &ABL, &ABR, A, mb, 0, pAstart) partition2x2( &TTL, &TTR, &TBL, &TBR, T, 0, 0, pAstart) partition2x1( &CT, &CB, C, mb, pCstart) transpose := flags&TRANS != 0 for Aref.Rows() > 0 && Aref.Cols() > 0 { repartition2x2to3x3(&ATL, &A00, nil, nil, &A10, &A11, nil, &A20, &A21, &A22, A, nb, pAdir) repartition2x2to3x3(&TTL, &T00, &T01, &T02, nil, &T11, &T12, nil, nil, &T22, T, nb, pAdir) bsz = A11.Cols() // must match A11 block size repartition2x1to3x1(&CT, &C0, &C1, &C2, C, bsz, pCdir) // -------------------------------------------------------- // compute: Q.T*C == C - Y*(C.T*Y*T).T transpose == true // Q*C == C - C*Y*T*Y.T transpose == false var Wrk matrix.FloatMatrix Wrk.SubMatrixOf(W, 0, 0, C1.Cols(), bsz) updateWithQT(&C1, &C2, &A11, &A21, &T11, &Wrk, nb, transpose) // -------------------------------------------------------- continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, pAdir) continue3x3to2x2( &TTL, &TTR, &TBL, &TBR, &T00, &T11, &T22, T, pAdir) continue3x1to2x1( &CT, &CB, &C0, &C1, C, pCdir) } }