/* * Blocked LQ decomposition with compact WY transform. As implemented * in lapack.DGELQF subroutine. */ func blockedLQ(A, Tvec, Twork, W *cmat.FloatMatrix, lb int, conf *gomas.Config) { var ATL, ATR, ABL, ABR, AR cmat.FloatMatrix var A00, A11, A12, A21, A22 cmat.FloatMatrix var TT, TB cmat.FloatMatrix var t0, tau, t2 cmat.FloatMatrix var Wrk, w1 cmat.FloatMatrix util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition2x1( &TT, &TB, Tvec, 0, util.PTOP) //nb := conf.LB for m(&ABR)-lb > 0 && n(&ABR)-lb > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, nil, &A11, &A12, nil, &A21, &A22, A, lb, util.PBOTTOMRIGHT) util.Repartition2x1to3x1(&TT, &t0, &tau, &t2, Tvec, lb, util.PBOTTOM) // current block size cb, rb := A11.Size() if rb < cb { cb = rb } // -------------------------------------------------------- // decompose left side AL == /A11\ // \A21/ w1.SubMatrix(W, 0, 0, cb, 1) util.Merge1x2(&AR, &A11, &A12) unblockedLQ(&AR, &tau, &w1) // build block reflector unblkBlockReflectorLQ(Twork, &AR, &tau) // update A'tail i.e. A21 and A22 with A'*(I - Y*T*Y.T).T // compute: C - Y*(C.T*Y*T).T ar, ac := A21.Size() Wrk.SubMatrix(W, 0, 0, ar, ac) updateRightLQ(&A21, &A22, &A11, &A12, Twork, &Wrk, true, conf) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, util.PBOTTOMRIGHT) util.Continue3x1to2x1( &TT, &TB, &t0, &tau, Tvec, util.PBOTTOM) } // last block with unblocked if m(&ABR) > 0 && n(&ABR) > 0 { w1.SubMatrix(W, 0, 0, m(&ABR), 1) unblockedLQ(&ABR, &t2, &w1) } }
/* * Blocked RQ decomposition with compact WY transform. As implemented * in lapack.DGERQF subroutine. */ func blockedRQ(A, Tvec, Twork, W *cmat.FloatMatrix, lb int, conf *gomas.Config) { var ATL, ABR, AL cmat.FloatMatrix var A00, A01, A10, A11, A22 cmat.FloatMatrix var TT, TB cmat.FloatMatrix var t0, tau, t2 cmat.FloatMatrix var Wrk, w1 cmat.FloatMatrix util.Partition2x2( &ATL, nil, nil, &ABR /**/, A, 0, 0, util.PBOTTOMRIGHT) util.Partition2x1( &TT, &TB /**/, Tvec, 0, util.PBOTTOM) for m(&ATL)-lb > 0 && n(&ATL)-lb > 0 { util.Repartition2x2to3x3(&ATL, &A00, &A01, nil, &A10, &A11, nil, nil, nil, &A22 /**/, A, lb, util.PTOPLEFT) util.Repartition2x1to3x1(&TT, &t0, &tau, &t2 /**/, Tvec, n(&A11), util.PTOP) // current block size cb, rb := A11.Size() if rb < cb { cb = rb } // -------------------------------------------------------- // decompose left side AL == ( A10 A11 ) w1.SubMatrix(W, 0, 0, cb, 1) util.Merge1x2(&AL, &A10, &A11) unblockedRQ(&AL, &tau, &w1) // build block reflector unblkBlockReflectorRQ(Twork, &AL, &tau) // compute: (A00 A01)(I - Y*T*Y.T) ar, ac := A01.Size() Wrk.SubMatrix(W, 0, 0, ar, ac) updateRightRQ(&A01, &A00, &A11, &A10, Twork, &Wrk, false, conf) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, nil, nil, &ABR, &A00, &A11, &A22, A, util.PTOPLEFT) util.Continue3x1to2x1( &TT, &TB, &t0, &tau, Tvec, util.PTOP) } // last block with unblocked if m(&ATL) > 0 && n(&ATL) > 0 { w1.SubMatrix(W, 0, 0, m(&ATL), 1) unblockedRQ(&ATL, &TT, &w1) } }
func blkBuildLQ(A, Tvec, Twork, W *cmat.FloatMatrix, K, lb int, conf *gomas.Config) { var ATL, ATR, ABL, ABR, AL cmat.FloatMatrix var A00, A10, A11, A12, A21, A22 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau, t2, Wrk, D, T cmat.FloatMatrix nk := n(A) - K mk := m(A) - K uk := K % lb util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, mk+uk, nk+uk, util.PBOTTOMRIGHT) util.Partition2x1( &tT, &tB, Tvec, mk+uk, util.PBOTTOM) // zero the bottom part __CHECK HERE: nk? or mk? if nk+uk > 0 { blasd.Scale(&ABL, 0.0) if uk > 0 { // number of reflectors is not multiple of blocking factor // do the first part with unblocked code. unblkBuildLQ(&ABR, &tB, W, m(&ABR)-uk, n(&ABR)-uk, true) } else { // blocking factor is multiple of K blasd.Scale(&ABR, 0.0) D.Diag(&ABR) blasd.Add(&D, 1.0) } } for m(&ATL) > 0 && n(&ATL) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &A10, &A11, &A12, nil, &A21, &A22, A, lb, util.PTOPLEFT) util.Repartition2x1to3x1(&tT, &t0, &tau, &t2, Tvec, lb, util.PTOP) // ------------------------------------------------------ util.Merge1x2(&AL, &A11, &A12) // build block reflector T.SubMatrix(Twork, 0, 0, n(&A11), n(&A11)) unblkBlockReflectorLQ(&T, &AL, &tau) // update A21 and A22 with (I - Y*T*Y.T) from right ar, ac := A21.Size() Wrk.SubMatrix(W, 0, 0, ar, ac) updateRightLQ(&A21, &A22, &A11, &A12, &T, &Wrk, false, conf) // update current block unblkBuildLQ(&AL, &tau, W, 0, n(&A12), false) // zero top rows blasd.Scale(&A10, 0.0) // ------------------------------------------------------ util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, util.PTOPLEFT) util.Continue3x1to2x1( &tT, &tB, &t0, &tau, Tvec, util.PTOP) } }
/* * Blocked version for computing C = C*Q and C = C*Q.T from elementary * reflectors and scalar coefficients. * * Elementary reflectors and scalar coefficients are used to build block * reflector T. Matrix C is updated by applying block reflector T using * compact WY algorithm. */ func blockedMultRQRight(C, A, tau, W *cmat.FloatMatrix, flags, lb int, conf *gomas.Config) { var ATL, ABR, AL cmat.FloatMatrix var A00, A10, A11, A22 cmat.FloatMatrix var CL, CR, C0, C1, C2 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2 cmat.FloatMatrix var W0, Wrk, Tw, Twork cmat.FloatMatrix var Aref *cmat.FloatMatrix var pAdir, pAstart, pDir, pStart, pCstart, pCdir util.Direction var bsz, cb, mb, nb, tb int var transpose bool // partitioning start and direction if flags&gomas.TRANS != 0 { // from bottom-right to top-left to produce transpose sequence (C*Q.T) pAstart = util.PBOTTOMRIGHT pAdir = util.PTOPLEFT pStart = util.PBOTTOM pDir = util.PTOP pCstart = util.PRIGHT pCdir = util.PLEFT mb = 0 nb = 0 cb = 0 tb = 0 Aref = &ATL transpose = false } else { // from top-left to bottom-right to produce normal sequence (C*Q) pAstart = util.PTOPLEFT pAdir = util.PBOTTOMRIGHT pStart = util.PTOP pDir = util.PBOTTOM pCstart = util.PLEFT pCdir = util.PRIGHT mb = imax(0, m(A)-n(A)) nb = imax(0, n(A)-m(A)) cb = imax(0, n(C)-m(A)) tb = imax(0, tau.Len()-m(A)) Aref = &ABR transpose = true } // intermediate reflector at start of workspace Twork.SetBuf(lb, lb, lb, W.Data()) W0.SetBuf(m(C), lb, m(C), W.Data()[Twork.Len():]) util.Partition2x2( &ATL, nil, nil, &ABR /**/, A, mb, nb, pAstart) util.Partition1x2( &CL, &CR /**/, C, cb, pCstart) util.Partition2x1( &tT, &tB /**/, tau, tb, pStart) for m(Aref) > 0 && n(Aref) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &A10, &A11, nil, nil, nil, &A22 /**/, A, lb, pAdir) bsz = m(&A11) // C1 block size must match A11 util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2 /**/, tau, bsz, pDir) util.Repartition1x2to1x3(&CL, &C0, &C1, &C2 /**/, C, bsz, pCdir) // -------------------------------------------------------- // clear & build block reflector from current block util.Merge1x2(&AL, &A10, &A11) Tw.SubMatrix(&Twork, 0, 0, bsz, bsz) blasd.Scale(&Tw, 0.0) unblkBlockReflectorRQ(&Tw, &AL, &tau1) Wrk.SubMatrix(&W0, 0, 0, m(&C1), bsz) updateRightRQ(&C1, &C0, &A11, &A10, &Tw, &Wrk, transpose, conf) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, nil, nil, &ABR /**/, &A00, &A11, &A22, A, pAdir) util.Continue1x3to1x2( &CL, &CR /**/, &C0, &C1, C, pCdir) util.Continue3x1to2x1( &tT, &tB /**/, &t0, &tau1, tau, pDir) } }
/* * Blocked version for computing C = Q*C and C = Q.T*C from elementary * reflectors and scalar coefficients. * * Elementary reflectors and scalar coefficients are used to build block * reflector T. Matrix C is updated by applying block reflector T using * compact WY algorithm. */ func blockedMultLQLeft(C, A, tau, W *cmat.FloatMatrix, flags, lb int, conf *gomas.Config) { var ATL, ATR, ABL, ABR, AR cmat.FloatMatrix var A00, A11, A12, A22 cmat.FloatMatrix var CT, CB, C0, C1, C2 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2 cmat.FloatMatrix var Wrk, W0, Tw, Twork cmat.FloatMatrix var Aref *cmat.FloatMatrix var pAdir, pAstart, pDir, pStart util.Direction var bsz, mb, nb, cb, tb int // partitioning start and direction if flags&gomas.TRANS != 0 || lb == n(A) { // from bottom-right to top-left to produce transposed sequence (Q.T*C) pAstart = util.PBOTTOMRIGHT pAdir = util.PTOPLEFT pStart = util.PBOTTOM pDir = util.PTOP mb = imax(0, m(A)-n(A)) nb = imax(0, n(A)-m(A)) cb = imax(0, m(C)-m(A)) tb = imax(0, tau.Len()-m(A)) Aref = &ATL } else { // from top-left to bottom-right to produce normal sequence (Q*C) pAstart = util.PTOPLEFT pAdir = util.PBOTTOMRIGHT pStart = util.PTOP pDir = util.PBOTTOM mb = 0 nb = 0 cb = 0 tb = 0 Aref = &ABR } util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, mb, nb, pAstart) util.Partition2x1( &CT, &CB, C, cb, pStart) util.Partition2x1( &tT, &tB, tau, tb, pStart) transpose := flags&gomas.TRANS == 0 // intermediate reflector at start of workspace Twork.SetBuf(lb, lb, lb, W.Data()) W0.SetBuf(n(C), lb, n(C), W.Data()[Twork.Len():]) for m(Aref) > 0 && n(Aref) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, nil, &A11, &A12, nil, nil, &A22, A, lb, pAdir) bsz = m(&A11) util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2, tau, bsz, pDir) util.Repartition2x1to3x1(&CT, &C0, &C1, &C2, C, bsz, pDir) // -------------------------------------------------------- // clear & build block reflector from current block util.Merge1x2(&AR, &A11, &A12) Tw.SubMatrix(&Twork, 0, 0, bsz, bsz) blasd.Scale(&Tw, 0.0) unblkBlockReflectorLQ(&Tw, &AR, &tau1) // compute: Q*T.C == C - Y*(C.T*Y*T).T transpose == true // Q*C == C - C*Y*T*Y.T transpose == false Wrk.SubMatrix(&W0, 0, 0, n(C), bsz) updateLeftLQ(&C1, &C2, &A11, &A12, &Tw, &Wrk, transpose, conf) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, pAdir) util.Continue3x1to2x1( &CT, &CB, &C0, &C1, C, pDir) util.Continue3x1to2x1( &tT, &tB, &t0, &tau1, tau, pDir) } }