/* * Build full block reflect T for nc columns from sequence of reflector stored in S. * Reflectors in S are the diagonal of T, off-diagonal values of reflector are computed * from elementary reflector store in lower triangular part of A. */ func buildQRTReflector(T, A, S *cmat.FloatMatrix, nc int, conf *gomas.Config) *gomas.Error { var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, A10, A11, A20, A21, A22 cmat.FloatMatrix var TTL, TTR, TBL, TBR cmat.FloatMatrix var T00, T01, T02, T11, T12, T22 cmat.FloatMatrix var SL, SR cmat.FloatMatrix var S00, S01, S02 cmat.FloatMatrix util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition2x2( &TTL, &TTR, &TBL, &TBR, T, 0, 0, util.PTOPLEFT) util.Partition1x2( &SL, &SR, S, 0, util.PLEFT) nb := conf.LB for m(&ABR)-nb > 0 && n(&ABR)-nb > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &A10, &A11, nil, &A20, &A21, &A22, A, nb, util.PBOTTOMRIGHT) util.Repartition2x2to3x3(&TTL, &T00, &T01, &T02, nil, &T11, &T12, nil, nil, &T22, T, nb, util.PBOTTOMRIGHT) util.Repartition1x2to1x3(&SL, &S00, &S01, &S02, S, nb, util.PRIGHT) // -------------------------------------------------------- // update T01: T01 = -T00*Y1.T*Y2*T11 // Y1 = /A10\ Y2 = /A11\ // \A20/ \A21/ // T11.Copy(&S01) updateQRTReflector(&T01, &A10, &A20, &A11, &A21, &T00, &S01, conf) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, util.PBOTTOMRIGHT) util.Continue3x3to2x2( &TTL, &TTR, &TBL, &TBR, &T00, &T11, &T22, T, util.PBOTTOMRIGHT) util.Continue1x3to1x2( &SL, &SR, &S00, &S01, S, util.PRIGHT) } if m(&ABR) > 0 && n(&ABR) > 0 { } return nil }
func blockedQRT(A, T, W *cmat.FloatMatrix, conf *gomas.Config) *gomas.Error { var err *gomas.Error = nil var ATL, ATR, ABL, ABR, AL, AR cmat.FloatMatrix var A00, A01, A02, A10, A11, A12, A20, A21, A22 cmat.FloatMatrix var TL, TR, W2 cmat.FloatMatrix var T00, T01, T02 cmat.FloatMatrix util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition1x2( &TL, &TR, T, 0, util.PLEFT) nb := conf.LB for m(&ABR)-nb > 0 && n(&ABR)-nb > 0 { util.Repartition2x2to3x3(&ATL, &A00, &A01, &A02, &A10, &A11, &A12, &A20, &A21, &A22, A, nb, util.PBOTTOMRIGHT) util.Repartition1x2to1x3(&TL, &T00, &T01, &T02, T, nb, util.PRIGHT) util.Partition1x2( &AL, &AR, &ABR, nb, util.PLEFT) // -------------------------------------------------------- // decompose left side AL == /A11\ // \A21/ unblockedQRT(&AL, &T01, W) // update A'tail i.e. A12 and A22 with (I - Y*T*Y.T).T * A'tail // compute: Q*T.C == C - Y*(C.T*Y*T).T ar, ac := A12.Size() W2.SubMatrix(W, 0, 0, ac, ar) updateWithQTLeft(&A12, &A22, &A11, &A21, &T01, &W2, true, conf) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, util.PBOTTOMRIGHT) util.Continue1x3to1x2( &TL, &TR, &T00, &T01, T, util.PRIGHT) } if m(&ABR) > 0 && n(&ABR) > 0 { T01.SubMatrix(&TR, 0, 0, n(&ABR), n(&ABR)) unblockedQRT(&ABR, &T01, W) } return err }
/* * Computes upper Hessenberg reduction of N-by-N matrix A using unblocked * algorithm as described in (1). * * Hessengerg reduction: A = Q.T*B*Q, Q unitary, B upper Hessenberg * Q = H(0)*H(1)*...*H(k) where H(k) is k'th Householder reflector. * * Compatible with lapack.DGEHD2. */ func unblkHessGQvdG(A, Tvec, W *cmat.FloatMatrix, row int) { var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a11, a21, A22 cmat.FloatMatrix var AL, AR, A0, a1, A2 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2, w12, v1 cmat.FloatMatrix util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, row, 0, util.PTOPLEFT) util.Partition1x2( &AL, &AR, A, 0, util.PLEFT) util.Partition2x1( &tT, &tB, Tvec, 0, util.PTOP) v1.SubMatrix(W, 0, 0, m(A), 1) for m(&ABR) > 1 && n(&ABR) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, nil, &a11, nil, nil, &a21, &A22, A, 1, util.PBOTTOMRIGHT) util.Repartition1x2to1x3(&AL, &A0, &a1, &A2, A, 1, util.PRIGHT) util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2, Tvec, 1, util.PBOTTOM) // ------------------------------------------------------ // a21 = [beta; H(k)].T computeHouseholderVec(&a21, &tau1) tauval := tau1.Get(0, 0) beta := a21.Get(0, 0) a21.Set(0, 0, 1.0) // v1 := A2*a21 blasd.MVMult(&v1, &A2, &a21, 1.0, 0.0, gomas.NONE) // A2 := A2 - tau*v1*a21 (A2 := A2*H(k)) blasd.MVUpdate(&A2, &v1, &a21, -tauval) w12.SubMatrix(W, 0, 0, n(&A22), 1) // w12 := a21.T*A22 = A22.T*a21 blasd.MVMult(&w12, &A22, &a21, 1.0, 0.0, gomas.TRANS) // A22 := A22 - tau*a21*w12 (A22 := H(k)*A22) blasd.MVUpdate(&A22, &a21, &w12, -tauval) a21.Set(0, 0, beta) // ------------------------------------------------------ util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT) util.Continue1x3to1x2( &AL, &AR, &A0, &a1, A, util.PRIGHT) util.Continue3x1to2x1( &tT, &tB, &t0, &tau1, Tvec, util.PBOTTOM) } }
/* * * Building reduction block for blocked algorithm as described in (1). * * A. update next column * a10 [(U00) (U00) ] [(a10) (V00) ] * a11 := I -[(u10)*T00*(u10).T] * [(a11) - (v01) * T00 * a10] * a12 [(U20) (U20) ] [(a12) (V02) ] * * B. compute Householder reflector for updated column * a21, t11 := Householder(a21) * * C. update intermediate reductions * v10 A02*a21 * v11 := a12*a21 * v12 A22*a21 * * D. update block reflector * t01 := A20*a21 * t11 := t11 */ func unblkBuildHessGQvdG(A, T, V, W *cmat.FloatMatrix) *gomas.Error { var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a10, a11, A20, a21, A22 cmat.FloatMatrix var AL, AR, A0, a1, A2 cmat.FloatMatrix var TTL, TTR, TBL, TBR cmat.FloatMatrix var T00, t01, t11, T22 cmat.FloatMatrix var VL, VR, V0, v1, V2, Y0 cmat.FloatMatrix util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition2x2( &TTL, &TTR, &TBL, &TBR, T, 0, 0, util.PTOPLEFT) util.Partition1x2( &AL, &AR, A, 0, util.PLEFT) util.Partition1x2( &VL, &VR, V, 0, util.PLEFT) var beta float64 for n(&VR) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &a10, &a11, nil, &A20, &a21, &A22, A, 1, util.PBOTTOMRIGHT) util.Repartition2x2to3x3(&TTL, &T00, &t01, nil, nil, &t11, nil, nil, nil, &T22, T, 1, util.PBOTTOMRIGHT) util.Repartition1x2to1x3(&AL, &A0, &a1, &A2, A, 1, util.PRIGHT) util.Repartition1x2to1x3(&VL, &V0, &v1, &V2, V, 1, util.PRIGHT) // ------------------------------------------------------ // Compute Hessenberg update for next column of A: if n(&V0) > 0 { // y10 := T00*a10 (use t01 as workspace?) blasd.Axpby(&t01, &a10, 1.0, 0.0) blasd.MVMultTrm(&t01, &T00, 1.0, gomas.UPPER) // a1 := a1 - V0*T00*a10 blasd.MVMult(&a1, &V0, &t01, -1.0, 1.0, gomas.NONE) // update a1 := (I - Y*T*Y.T).T*a1 (here t01 as workspace) Y0.SubMatrix(A, 1, 0, n(&A00), n(&A00)) updateVecLeftWY2(&a1, &Y0, &A20, &T00, &t01, gomas.TRANS) a10.Set(0, -1, beta) } // Compute Householder reflector computeHouseholderVec(&a21, &t11) beta = a21.Get(0, 0) a21.Set(0, 0, 1.0) // v1 := A2*a21 blasd.MVMult(&v1, &A2, &a21, 1.0, 0.0, gomas.NONE) // update T tauval := t11.Get(0, 0) if tauval != 0.0 { // t01 := -tauval*A20.T*a21 blasd.MVMult(&t01, &A20, &a21, -tauval, 0.0, gomas.TRANS) // t01 := T00*t01 blasd.MVMultTrm(&t01, &T00, 1.0, gomas.UPPER) } // ------------------------------------------------------ util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT) util.Continue3x3to2x2( &TTL, &TTR, &TBL, &TBR, &T00, &t11, &T22, T, util.PBOTTOMRIGHT) util.Continue1x3to1x2( &AL, &AR, &A0, &a1, A, util.PRIGHT) util.Continue1x3to1x2( &VL, &VR, &V0, &v1, V, util.PRIGHT) } A.Set(n(V), n(V)-1, beta) return nil }
/* * Blocked version for computing C = C*Q and C = C*Q.T from elementary reflectors * and scalar coefficients. * * Elementary reflectors and scalar coefficients are used to build block reflector T. * Matrix C is updated by applying block reflector T using compact WY algorithm. */ func blockedMultQRight(C, A, tau, W *cmat.FloatMatrix, flags, nb int, conf *gomas.Config) { var ATL, ATR, ABL, ABR, AL cmat.FloatMatrix var A00, A10, A11, A20, A21, A22 cmat.FloatMatrix var CL, CR, C0, C1, C2 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2 cmat.FloatMatrix var W0, Wrk, Tw, Twork cmat.FloatMatrix var Aref *cmat.FloatMatrix var pAdir, pAstart, pDir, pStart, pCstart, pCdir util.Direction var bsz, cb, mb int // partitioning start and direction if flags&gomas.TRANS != 0 { // from bottom-right to top-left to produce transpose sequence (C*Q.T) pAstart = util.PBOTTOMRIGHT pAdir = util.PTOPLEFT pStart = util.PBOTTOM pDir = util.PTOP pCstart = util.PRIGHT pCdir = util.PLEFT mb = imax(0, m(A)-n(A)) cb = n(C) - n(A) Aref = &ATL } else { // from top-left to bottom-right to produce normal sequence (C*Q) pAstart = util.PTOPLEFT pAdir = util.PBOTTOMRIGHT pStart = util.PTOP pDir = util.PBOTTOM pCstart = util.PLEFT pCdir = util.PRIGHT mb = 0 cb = 0 Aref = &ABR } // intermediate reflector at start of workspace Twork.SetBuf(nb, nb, nb, W.Data()) W0.SetBuf(m(C), nb, m(C), W.Data()[Twork.Len():]) util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, mb, 0, pAstart) util.Partition1x2( &CL, &CR, C, cb, pCstart) util.Partition2x1( &tT, &tB, tau, 0, pStart) transpose := flags&gomas.TRANS != 0 for m(Aref) > 0 && n(Aref) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &A10, &A11, nil, &A20, &A21, &A22, A, nb, pAdir) util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2, tau, nb, pDir) bsz = n(&A11) // C1 block size must match A11 util.Repartition1x2to1x3(&CL, &C0, &C1, &C2, C, bsz, pCdir) // -------------------------------------------------------- // clear & build block reflector from current block util.Merge2x1(&AL, &A11, &A21) Tw.SubMatrix(&Twork, 0, 0, bsz, bsz) blasd.Scale(&Tw, 0.0) unblkQRBlockReflector(&Tw, &AL, &tau1) // compute: C*Q.T == C - C*(Y*T*Y.T).T = C - C*Y*T.T*Y.T // C*Q == C - C*Y*T*Y.T Wrk.SubMatrix(&W0, 0, 0, m(&C1), bsz) updateWithQTRight(&C1, &C2, &A11, &A21, &Tw, &Wrk, transpose, conf) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, pAdir) util.Continue1x3to1x2( &CL, &CR, &C0, &C1, C, pCdir) util.Continue3x1to2x1( &tT, &tB, &t0, &tau1, tau, pDir) } }
/* * Unblocked algorith for computing C = C*Q.T and C = C*Q. * * Q = H(1)H(2)...H(k) where elementary reflectors H(i) are stored on i'th column * below diagonal in A. * * Q.T = (H1(1)*H(2)*...*H(k)).T * = H(k).T*...*H(2).T*H(1).T * = H(k)...H(2)H(1) * * Progressing A from top-left to bottom-right i.e from smaller column numbers * to larger, produces C*H(1)H(2)...H(k) == C*Q. * * Progressing from bottom-right to top-left produces C*H(k)...H(2)H(1) == C*Q.T. */ func unblockedMultQRight(C, A, tau, w *cmat.FloatMatrix, flags int) { var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a10, a11, A20, a21, A22 cmat.FloatMatrix var CL, CR, C0, c1, C2 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2, w1 cmat.FloatMatrix var Aref *cmat.FloatMatrix var pAdir, pAstart, pDir, pStart, pCstart, pCdir util.Direction var cb, mb, tb, nb int // partitioning start and direction if flags&gomas.TRANS != 0 { // from bottom-right to top-left to produce transpose sequence (C*Q.T) pAstart = util.PBOTTOMRIGHT pAdir = util.PTOPLEFT pStart = util.PBOTTOM pDir = util.PTOP pCstart = util.PRIGHT pCdir = util.PLEFT mb = imax(0, m(A)-n(A)) nb = imax(0, n(A)-m(A)) cb = imax(0, n(C)-n(A)) tb = imax(0, tau.Len()-n(A)) Aref = &ATL } else { // from top-left to bottom-right to produce normal sequence (C*Q) pAstart = util.PTOPLEFT pAdir = util.PBOTTOMRIGHT pStart = util.PTOP pDir = util.PBOTTOM pCstart = util.PLEFT pCdir = util.PRIGHT mb = 0 cb = 0 tb = 0 nb = 0 Aref = &ABR } util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, mb, nb, pAstart) util.Partition1x2( &CL, &CR, C, cb, pCstart) util.Partition2x1( &tT, &tB, tau, tb, pStart) for m(Aref) > 0 && n(Aref) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &a10, &a11, nil, &A20, &a21, &A22, A, 1, pAdir) util.Repartition1x2to1x3(&CL, &C0, &c1, &C2, C, 1, pCdir) util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2, tau, 1, pDir) // -------------------------------------------------------- w1.SubMatrix(w, 0, 0, c1.Len(), 1) applyHouseholder2x1(&tau1, &a21, &c1, &C2, &w1, gomas.RIGHT) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, pAdir) util.Continue1x3to1x2( &CL, &CR, &C0, &c1, C, pCdir) util.Continue3x1to2x1( &tT, &tB, &t0, &tau1, tau, pDir) } }
func blkMultRightQL(C, A, tau, W *cmat.FloatMatrix, flags, lb int, conf *gomas.Config) { var ATL, ABR, AL cmat.FloatMatrix var A00, A01, A11, A22 cmat.FloatMatrix var CL, CR, C0, C1, C2 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2 cmat.FloatMatrix var T0, T, W0, Wrk cmat.FloatMatrix var Aref *cmat.FloatMatrix var pAdir, pAstart, pDir, pStart, pCdir, pCstart util.Direction var mb, tb, nb, cb int // partitioning start and direction if flags&gomas.TRANS != 0 { // from top-left to bottom-right to produce transpose sequence (C*Q.T) pAstart = util.PTOPLEFT pAdir = util.PBOTTOMRIGHT pStart = util.PTOP pDir = util.PBOTTOM pCstart = util.PLEFT pCdir = util.PRIGHT mb = imax(0, m(A)-n(A)) nb = imax(0, n(A)-m(A)) cb = imax(0, n(C)-n(A)) tb = imax(0, tau.Len()-n(A)) Aref = &ABR } else { // A from bottom-right to top-left to produce normal sequence (C*Q) pAstart = util.PBOTTOMRIGHT pAdir = util.PTOPLEFT pStart = util.PBOTTOM pDir = util.PTOP pCstart = util.PRIGHT pCdir = util.PLEFT mb = 0 tb = 0 nb = 0 cb = 0 Aref = &ATL } util.Partition2x2( &ATL, nil, nil, &ABR /**/, A, mb, nb, pAstart) util.Partition1x2( &CL, &CR /**/, C, cb, pCstart) util.Partition2x1( &tT, &tB /**/, tau, tb, pStart) transpose := flags&gomas.TRANS != 0 // divide workspace for block reflector and temporary work matrix T0.SetBuf(lb, lb, lb, W.Data()) W0.SetBuf(m(C), lb, m(C), W.Data()[T0.Len():]) for n(Aref) > 0 { util.Repartition2x2to3x3(&ATL, &A00, &A01, nil, nil, &A11, nil, nil, nil, &A22 /**/, A, lb, pAdir) bsz := n(&A11) util.Repartition1x2to1x3(&CL, &C0, &C1, &C2 /**/, C, bsz, pCdir) util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2 /**/, tau, bsz, pDir) // -------------------------------------------------------- util.Merge2x1(&AL, &A01, &A11) T.SubMatrix(&T0, 0, 0, bsz, bsz) blasd.Scale(&T, 0.0) unblkQLBlockReflector(&T, &AL, &tau1) Wrk.SubMatrix(&W0, 0, 0, m(C), bsz) updateQLRight(&C1, &C0, &A11, &A01, &T, &Wrk, transpose, conf) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, nil, nil, &ABR /**/, &A00, &A11, &A22, A, pAdir) util.Continue1x3to1x2( &CL, &CR /**/, &C0, &C1, C, pCdir) util.Continue3x1to2x1( &tT, &tB /**/, &t0, &tau1, tau, pDir) } }
/* * Blocked version for computing C = C*Q and C = C*Q.T from elementary * reflectors and scalar coefficients. * * Elementary reflectors and scalar coefficients are used to build block * reflector T. Matrix C is updated by applying block reflector T using * compact WY algorithm. */ func blockedMultRQRight(C, A, tau, W *cmat.FloatMatrix, flags, lb int, conf *gomas.Config) { var ATL, ABR, AL cmat.FloatMatrix var A00, A10, A11, A22 cmat.FloatMatrix var CL, CR, C0, C1, C2 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2 cmat.FloatMatrix var W0, Wrk, Tw, Twork cmat.FloatMatrix var Aref *cmat.FloatMatrix var pAdir, pAstart, pDir, pStart, pCstart, pCdir util.Direction var bsz, cb, mb, nb, tb int var transpose bool // partitioning start and direction if flags&gomas.TRANS != 0 { // from bottom-right to top-left to produce transpose sequence (C*Q.T) pAstart = util.PBOTTOMRIGHT pAdir = util.PTOPLEFT pStart = util.PBOTTOM pDir = util.PTOP pCstart = util.PRIGHT pCdir = util.PLEFT mb = 0 nb = 0 cb = 0 tb = 0 Aref = &ATL transpose = false } else { // from top-left to bottom-right to produce normal sequence (C*Q) pAstart = util.PTOPLEFT pAdir = util.PBOTTOMRIGHT pStart = util.PTOP pDir = util.PBOTTOM pCstart = util.PLEFT pCdir = util.PRIGHT mb = imax(0, m(A)-n(A)) nb = imax(0, n(A)-m(A)) cb = imax(0, n(C)-m(A)) tb = imax(0, tau.Len()-m(A)) Aref = &ABR transpose = true } // intermediate reflector at start of workspace Twork.SetBuf(lb, lb, lb, W.Data()) W0.SetBuf(m(C), lb, m(C), W.Data()[Twork.Len():]) util.Partition2x2( &ATL, nil, nil, &ABR /**/, A, mb, nb, pAstart) util.Partition1x2( &CL, &CR /**/, C, cb, pCstart) util.Partition2x1( &tT, &tB /**/, tau, tb, pStart) for m(Aref) > 0 && n(Aref) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &A10, &A11, nil, nil, nil, &A22 /**/, A, lb, pAdir) bsz = m(&A11) // C1 block size must match A11 util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2 /**/, tau, bsz, pDir) util.Repartition1x2to1x3(&CL, &C0, &C1, &C2 /**/, C, bsz, pCdir) // -------------------------------------------------------- // clear & build block reflector from current block util.Merge1x2(&AL, &A10, &A11) Tw.SubMatrix(&Twork, 0, 0, bsz, bsz) blasd.Scale(&Tw, 0.0) unblkBlockReflectorRQ(&Tw, &AL, &tau1) Wrk.SubMatrix(&W0, 0, 0, m(&C1), bsz) updateRightRQ(&C1, &C0, &A11, &A10, &Tw, &Wrk, transpose, conf) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, nil, nil, &ABR /**/, &A00, &A11, &A22, A, pAdir) util.Continue1x3to1x2( &CL, &CR /**/, &C0, &C1, C, pCdir) util.Continue3x1to2x1( &tT, &tB /**/, &t0, &tau1, tau, pDir) } }
/* * Blocked version for computing C = Q*C and C = Q.T*C with block reflector. * * Block reflector T is [T(0), T(1), ... T(k-1)], conf.LB*n(A) matrix where * where each T(n), expect T(k-1), is conf.LB*conf.LB. T(k-1) is IB*IB where * IB = imin(LB, K%LB) */ func blockedMultQTLeft(C, A, T, W *cmat.FloatMatrix, flags int, conf *gomas.Config) *gomas.Error { var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, A10, A11, A20, A21, A22 cmat.FloatMatrix var CT, CB, C0, C1, C2 cmat.FloatMatrix var TL, TR, T00, T01, T02 cmat.FloatMatrix var Wrk cmat.FloatMatrix var Aref *cmat.FloatMatrix var pAdir, pAstart, pCdir, pCstart, pTstart, pTdir util.Direction var bsz, mb, tb int lb := conf.LB if conf.LB == 0 { lb = m(T) } transpose := flags&gomas.TRANS != 0 nb := lb //W0 := cmat.MakeMatrix(n(C), conf.LB, W.Data()) // partitioning start and direction if flags&gomas.TRANS != 0 { // from top-left to bottom-right to produce transposed sequence (Q.T*C) pAstart = util.PTOPLEFT pAdir = util.PBOTTOMRIGHT pCstart = util.PTOP pCdir = util.PBOTTOM pTstart = util.PLEFT pTdir = util.PRIGHT mb = 0 tb = nb Aref = &ABR } else { // from bottom-right to top-left to produce normal sequence (Q*C) pAstart = util.PBOTTOMRIGHT pAdir = util.PTOPLEFT pCstart = util.PBOTTOM pCdir = util.PTOP pTstart = util.PRIGHT pTdir = util.PLEFT mb = m(A) - n(A) Aref = &ATL // if N%LB != 0 then the last T is not of size LB and we need // adjust first repartitioning accordingly. tb = n(A) % lb } util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, mb, 0, pAstart) util.Partition1x2( &TL, &TR, T, 0, pTstart) util.Partition2x1( &CT, &CB, C, mb, pCstart) nb = tb for m(Aref) > 0 && n(Aref) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &A10, &A11, nil, &A20, &A21, &A22, A, nb, pAdir) util.Repartition1x2to1x3(&TL, &T00, &T01, &T02, T, nb, pTdir) bsz = n(&A11) // must match A11 block size util.Repartition2x1to3x1(&CT, &C0, &C1, &C2, C, bsz, pCdir) // -------------------------------------------------------- tr, tc := T01.Size() // compute: Q.T*C == C - Y*(C.T*Y*T).T transpose == true // Q*C == C - C*Y*T*Y.T transpose == false Wrk.SubMatrix(W, 0, 0, n(&C1), bsz) if tr != tc { // this happens when n(A) not multiple of LB var Tmp cmat.FloatMatrix Tmp.SubMatrix(&T01, 0, 0, tc, tc) updateWithQTLeft(&C1, &C2, &A11, &A21, &Tmp, &Wrk, transpose, conf) } else { updateWithQTLeft(&C1, &C2, &A11, &A21, &T01, &Wrk, transpose, conf) } // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, pAdir) util.Continue1x3to1x2( &TL, &TR, &T00, &T01, T, pTdir) util.Continue3x1to2x1( &CT, &CB, &C0, &C1, C, pCdir) nb = lb } return nil }
/* * Blocked version for computing C = C*Q and C = C*Q.T with block reflector. * * Block reflector T is [T(0), T(1), ... T(k-1)], conf.LB*n(A) matrix where * where each T(n), expect T(k-1), is conf.LB*conf.LB. T(k-1) is IB*IB where * IB = imin(LB, K%LB) */ func blockedMultQTRight(C, A, T, W *cmat.FloatMatrix, flags int, conf *gomas.Config) { var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, A10, A11, A20, A21, A22 cmat.FloatMatrix var CL, CR, C0, C1, C2 cmat.FloatMatrix //var TTL, TTR, TBL, TBR cmat.FloatMatrix //var T00, T01, T02, T11, T12, T22 cmat.FloatMatrix var TL, TR, T00, T01, T02 cmat.FloatMatrix var Wrk cmat.FloatMatrix var Aref *cmat.FloatMatrix var pAdir, pAstart, pCstart, pCdir, pTstart, pTdir util.Direction var bsz, cb, mb, tb int lb := conf.LB if conf.LB == 0 { lb = m(T) } transpose := flags&gomas.TRANS != 0 nb := lb // partitioning start and direction if flags&gomas.TRANS != 0 { // from bottom-right to top-left to produce transpose sequence (C*Q.T) pAstart = util.PBOTTOMRIGHT pAdir = util.PTOPLEFT pCstart = util.PRIGHT pCdir = util.PLEFT pTstart = util.PRIGHT pTdir = util.PLEFT mb = imax(0, m(A)-n(A)) cb = imax(0, n(C)-n(A)) tb = n(A) % lb Aref = &ATL } else { // from top-left to bottom-right to produce normal sequence (C*Q) pAstart = util.PTOPLEFT pAdir = util.PBOTTOMRIGHT pCstart = util.PLEFT pCdir = util.PRIGHT pTstart = util.PLEFT pTdir = util.PRIGHT mb = 0 cb = 0 tb = nb Aref = &ABR } util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, mb, 0, pAstart) util.Partition1x2( &TL, &TR, T, 0, pTstart) util.Partition1x2( &CL, &CR, C, cb, pCstart) nb = tb for m(Aref) > 0 && n(Aref) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &A10, &A11, nil, &A20, &A21, &A22, A, nb, pAdir) util.Repartition1x2to1x3(&TL, &T00, &T01, &T02, T, nb, pTdir) bsz = n(&A11) util.Repartition1x2to1x3(&CL, &C0, &C1, &C2, C, bsz, pCdir) // -------------------------------------------------------- tr, tc := T01.Size() // compute: C*Q.T == C - C*Y*T.T*Y.T transpose == true // C*Q == C - C*Y*T*Y.T transpose == false Wrk.SubMatrix(W, 0, 0, m(&C1), bsz) if tr != tc { // this happens when n(A) not multiple of LB var Tmp cmat.FloatMatrix Tmp.SubMatrix(&T01, 0, 0, tc, tc) updateWithQTRight(&C1, &C2, &A11, &A21, &Tmp, &Wrk, transpose, conf) } else { updateWithQTRight(&C1, &C2, &A11, &A21, &T01, &Wrk, transpose, conf) } // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, pAdir) util.Continue1x3to1x2( &TL, &TR, &T00, &T01, T, pTdir) util.Continue1x3to1x2( &CL, &CR, &C0, &C1, C, pCdir) nb = lb } }
// unblocked LU decomposition with pivots: FLAME LU variant 3; Left-looking func unblockedLUpiv(A *cmat.FloatMatrix, p *Pivots, offset int, conf *gomas.Config) *gomas.Error { var err *gomas.Error = nil var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a01, A02, a10, a11, a12, A20, a21, A22 cmat.FloatMatrix var AL, AR, A0, a1, A2, aB1, AB0 cmat.FloatMatrix var pT, pB, p0, p1, p2 Pivots err = nil util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition1x2( &AL, &AR, A, 0, util.PLEFT) partitionPivot2x1( &pT, &pB, *p, 0, util.PTOP) for m(&ATL) < m(A) && n(&ATL) < n(A) { util.Repartition2x2to3x3(&ATL, &A00, &a01, &A02, &a10, &a11, &a12, &A20, &a21, &A22 /**/, A, 1, util.PBOTTOMRIGHT) util.Repartition1x2to1x3(&AL, &A0, &a1, &A2 /**/, A, 1, util.PRIGHT) repartPivot2x1to3x1(&pT, &p0, &p1, &p2 /**/, *p, 1, util.PBOTTOM) // apply previously computed pivots on current column applyPivots(&a1, p0) // a01 = trilu(A00) \ a01 (TRSV) blasd.MVSolveTrm(&a01, &A00, 1.0, gomas.LOWER|gomas.UNIT) // a11 = a11 - a10 *a01 aval := a11.Get(0, 0) - blasd.Dot(&a10, &a01) a11.Set(0, 0, aval) // a21 = a21 -A20*a01 blasd.MVMult(&a21, &A20, &a01, -1.0, 1.0, gomas.NONE) // pivot index on current column [a11, a21].T aB1.Column(&ABR, 0) p1[0] = pivotIndex(&aB1) // pivots to current column applyPivots(&aB1, p1) // a21 = a21 / a11 if aval == 0.0 { if err == nil { ij := m(&ATL) + p1[0] - 1 err = gomas.NewError(gomas.ESINGULAR, "DecomposeLU", ij) } } else { blasd.InvScale(&a21, a11.Get(0, 0)) } // apply pivots to previous columns AB0.SubMatrix(&ABL, 0, 0) applyPivots(&AB0, p1) // scale last pivots to origin matrix row numbers p1[0] += m(&ATL) util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT) util.Continue1x3to1x2( &AL, &AR, &A0, &a1, A, util.PRIGHT) contPivot3x1to2x1( &pT, &pB, p0, p1, *p, util.PBOTTOM) } if n(&ATL) < n(A) { applyPivots(&ATR, *p) blasd.SolveTrm(&ATR, &ATL, 1.0, gomas.LEFT|gomas.UNIT|gomas.LOWER, conf) } return err }
// blocked LU decomposition with pivots: FLAME LU variant 3; left-looking version func blockedLUpiv(A *cmat.FloatMatrix, p *Pivots, nb int, conf *gomas.Config) *gomas.Error { var err *gomas.Error = nil var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, A01, A02, A10, A11, A12, A20, A21, A22 cmat.FloatMatrix var AL, AR, A0, A1, A2, AB1, AB0 cmat.FloatMatrix var pT, pB, p0, p1, p2 Pivots util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition1x2( &AL, &AR, A, 0, util.PLEFT) partitionPivot2x1( &pT, &pB, *p, 0, util.PTOP) for m(&ATL) < m(A) && n(&ATL) < n(A) { util.Repartition2x2to3x3(&ATL, &A00, &A01, &A02, &A10, &A11, &A12, &A20, &A21, &A22 /**/, A, nb, util.PBOTTOMRIGHT) util.Repartition1x2to1x3(&AL, &A0, &A1, &A2 /**/, A, nb, util.PRIGHT) repartPivot2x1to3x1(&pT, &p0, &p1, &p2 /**/, *p, nb, util.PBOTTOM) // apply previously computed pivots applyPivots(&A1, p0) // a01 = trilu(A00) \ a01 (TRSV) blasd.SolveTrm(&A01, &A00, 1.0, gomas.LOWER|gomas.UNIT) // A11 = A11 - A10*A01 blasd.Mult(&A11, &A10, &A01, -1.0, 1.0, gomas.NONE) // A21 = A21 - A20*A01 blasd.Mult(&A21, &A20, &A01, -1.0, 1.0, gomas.NONE) // LU_piv(AB1, p1) AB1.SubMatrix(&ABR, 0, 0, m(&ABR), n(&A11)) unblockedLUpiv(&AB1, &p1, m(&ATL), conf) // apply pivots to previous columns AB0.SubMatrix(&ABL, 0, 0) applyPivots(&AB0, p1) // scale last pivots to origin matrix row numbers for k, _ := range p1 { p1[k] += m(&ATL) } util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR /**/, &A00, &A11, &A22, A, util.PBOTTOMRIGHT) util.Continue1x3to1x2( &AL, &AR /**/, &A0, &A1, A, util.PRIGHT) contPivot3x1to2x1( &pT, &pB /**/, p0, p1, *p, util.PBOTTOM) } if n(&ATL) < n(A) { applyPivots(&ATR, *p) blasd.SolveTrm(&ATR, &ATL, 1.0, gomas.LEFT|gomas.UNIT|gomas.LOWER) } return err }