/* * like LAPACK/dlafrt.f * * Build block reflector T from HH reflector stored in TriLU(A) and coefficients * in tau. * * Q = I - Y*T*Y.T; Householder H = I - tau*v*v.T * * T = | T 0 | z = -tau*T*Y.T*v * | z c | c = tau * * Q = H(1)H(2)...H(k) building forward here. */ func unblkBlockReflectorRQ(T, A, tau *cmat.FloatMatrix) { var ATL, ABR cmat.FloatMatrix var A00, a10, A20, a11, a21, A22 cmat.FloatMatrix var TTL, TBR cmat.FloatMatrix var T00, t11, t21, T22 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2 cmat.FloatMatrix util.Partition2x2( &ATL, nil, nil, &ABR /**/, A, 0, 0, util.PBOTTOMRIGHT) util.Partition2x2( &TTL, nil, nil, &TBR /**/, T, 0, 0, util.PBOTTOMRIGHT) util.Partition2x1( &tT, &tB /**/, tau, 0, util.PBOTTOM) for m(&ATL) > 0 && n(&ATL) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &a10, &a11, nil, &A20, &a21, &A22 /**/, A, 1, util.PTOPLEFT) util.Repartition2x2to3x3(&TTL, &T00, nil, nil, nil, &t11, nil, nil, &t21, &T22 /**/, T, 1, util.PTOPLEFT) util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2 /**/, tau, 1, util.PTOP) // -------------------------------------------------- // t11 := tau tauval := tau1.Get(0, 0) if tauval != 0.0 { t11.Set(0, 0, tauval) // t21 := -tauval*(a21 + A20*a10) blasd.Axpby(&t21, &a21, 1.0, 0.0) blasd.MVMult(&t21, &A20, &a10, -tauval, -tauval, gomas.NONE) // t21 := T22*t21 blasd.MVMultTrm(&t21, &T22, 1.0, gomas.LOWER) } // -------------------------------------------------- util.Continue3x3to2x2( &ATL, nil, nil, &ABR /**/, &A00, &a11, &A22, A, util.PTOPLEFT) util.Continue3x3to2x2( &TTL, nil, nil, &TBR /**/, &T00, &t11, &T22, T, util.PTOPLEFT) util.Continue3x1to2x1( &tT, &tB /**/, &t0, &tau1, tau, util.PTOP) } }
/* * like LAPACK/dlafrt.f * * Build block reflector T from HH reflector stored in TriLU(A) and coefficients * in tau. * * Q = I - Y*T*Y.T; Householder H = I - tau*v*v.T * * T = | T z | z = -tau*T*Y.T*v * | 0 c | c = tau * * Q = H(1)H(2)...H(k) building forward here. */ func unblkQLBlockReflector(T, A, tau *cmat.FloatMatrix) { var ATL, ABR cmat.FloatMatrix var A00, a01, a11, A02, a12, A22 cmat.FloatMatrix var TTL, TBR cmat.FloatMatrix var T00, t11, t21, T22 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2 cmat.FloatMatrix util.Partition2x2( &ATL, nil, nil, &ABR, A, 0, 0, util.PBOTTOMRIGHT) util.Partition2x2( &TTL, nil, nil, &TBR, T, 0, 0, util.PBOTTOMRIGHT) util.Partition2x1( &tT, &tB, tau, 0, util.PBOTTOM) for m(&ATL) > 0 && n(&ATL) > 0 { util.Repartition2x2to3x3(&ATL, &A00, &a01, &A02, nil, &a11, &a12, nil, nil, &A22, A, 1, util.PTOPLEFT) util.Repartition2x2to3x3(&TTL, &T00, nil, nil, nil, &t11, nil, nil, &t21, &T22, T, 1, util.PTOPLEFT) util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2, tau, 1, util.PTOP) // -------------------------------------------------- // t11 := tau tauval := tau1.Get(0, 0) if tauval != 0.0 { t11.Set(0, 0, tauval) // t21 := -tauval*(a12.T + &A02.T*a12) blasd.Axpby(&t21, &a12, 1.0, 0.0) blasd.MVMult(&t21, &A02, &a01, -tauval, -tauval, gomas.TRANSA) // t21 := T22*t01 blasd.MVMultTrm(&t21, &T22, 1.0, gomas.LOWER) } // -------------------------------------------------- util.Continue3x3to2x2( &ATL, nil, nil, &ABR, &A00, &a11, &A22, A, util.PTOPLEFT) util.Continue3x3to2x2( &TTL, nil, nil, &TBR, &T00, &t11, &T22, T, util.PTOPLEFT) util.Continue3x1to2x1( &tT, &tB, &t0, &tau1, tau, util.PTOP) } }
/* * like LAPACK/dlafrt.f * * Build block reflector T from HH reflector stored in TriLU(A) and coefficients * in tau. * * Q = I - Y*T*Y.T; Householder H = I - tau*v*v.T * * T = | T z | z = -tau*T*Y.T*v * | 0 c | c = tau * * Q = H(1)H(2)...H(k) building forward here. */ func unblkBlockReflectorLQ(T, A, tau *cmat.FloatMatrix) { var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a01, A02, a11, a12, A22 cmat.FloatMatrix var TTL, TTR, TBL, TBR cmat.FloatMatrix var T00, t01, T02, t11, t12, T22 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2 cmat.FloatMatrix util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition2x2( &TTL, &TTR, &TBL, &TBR, T, 0, 0, util.PTOPLEFT) util.Partition2x1( &tT, &tB, tau, 0, util.PTOP) for m(&ABR) > 0 && n(&ABR) > 0 { util.Repartition2x2to3x3(&ATL, &A00, &a01, &A02, nil, &a11, &a12, nil, nil, &A22, A, 1, util.PBOTTOMRIGHT) util.Repartition2x2to3x3(&TTL, &T00, &t01, &T02, nil, &t11, &t12, nil, nil, &T22, T, 1, util.PBOTTOMRIGHT) util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2, tau, 1, util.PBOTTOM) // -------------------------------------------------- // t11 := tau tauval := tau1.Get(0, 0) if tauval != 0.0 { t11.Set(0, 0, tauval) // t01 := -tauval*(a01 + A02*a12) blasd.Axpby(&t01, &a01, 1.0, 0.0) blasd.MVMult(&t01, &A02, &a12, -tauval, -tauval, gomas.NONE) // t01 := T00*t01 blasd.MVMultTrm(&t01, &T00, 1.0, gomas.UPPER) } // -------------------------------------------------- util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT) util.Continue3x3to2x2( &TTL, &TTR, &TBL, &TBR, &T00, &t11, &T22, T, util.PBOTTOMRIGHT) util.Continue3x1to2x1( &tT, &tB, &t0, &tau1, tau, util.PBOTTOM) } }
/* * Unblocked QR decomposition with block reflector T. */ func unblockedQRT(A, T, W *cmat.FloatMatrix) *gomas.Error { var err *gomas.Error = nil var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a10, a11, a12, A20, a21, A22 cmat.FloatMatrix var TTL, TTR, TBL, TBR cmat.FloatMatrix var T00, t01, T02, t11, t12, T22, w12 cmat.FloatMatrix util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition2x2( &TTL, &TTR, &TBL, &TBR, T, 0, 0, util.PTOPLEFT) for m(&ABR) > 0 && n(&ABR) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &a10, &a11, &a12, &A20, &a21, &A22, A, 1, util.PBOTTOMRIGHT) util.Repartition2x2to3x3(&TTL, &T00, &t01, &T02, nil, &t11, &t12, nil, nil, &T22, T, 1, util.PBOTTOMRIGHT) // ------------------------------------------------------ computeHouseholder(&a11, &a21, &t11) // H*[a12 A22].T w12.SubMatrix(W, 0, 0, a12.Len(), 1) applyHouseholder2x1(&t11, &a21, &a12, &A22, &w12, gomas.LEFT) // update T tauval := t11.Get(0, 0) if tauval != 0.0 { // t01 := -tauval*(a10.T + &A20.T*a21) //a10.CopyTo(&t01) blasd.Axpby(&t01, &a10, 1.0, 0.0) blasd.MVMult(&t01, &A20, &a21, -tauval, -tauval, gomas.TRANSA) // t01 := T00*t01 blasd.MVMultTrm(&t01, &T00, 1.0, gomas.UPPER) } // ------------------------------------------------------ util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT) util.Continue3x3to2x2( &TTL, &TTR, &TBL, &TBR, &T00, &t11, &T22, T, util.PBOTTOMRIGHT) } return err }
/* * Build full block reflect T for nc columns from sequence of reflector stored in S. * Reflectors in S are the diagonal of T, off-diagonal values of reflector are computed * from elementary reflector store in lower triangular part of A. */ func buildQRTReflector(T, A, S *cmat.FloatMatrix, nc int, conf *gomas.Config) *gomas.Error { var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, A10, A11, A20, A21, A22 cmat.FloatMatrix var TTL, TTR, TBL, TBR cmat.FloatMatrix var T00, T01, T02, T11, T12, T22 cmat.FloatMatrix var SL, SR cmat.FloatMatrix var S00, S01, S02 cmat.FloatMatrix util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition2x2( &TTL, &TTR, &TBL, &TBR, T, 0, 0, util.PTOPLEFT) util.Partition1x2( &SL, &SR, S, 0, util.PLEFT) nb := conf.LB for m(&ABR)-nb > 0 && n(&ABR)-nb > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &A10, &A11, nil, &A20, &A21, &A22, A, nb, util.PBOTTOMRIGHT) util.Repartition2x2to3x3(&TTL, &T00, &T01, &T02, nil, &T11, &T12, nil, nil, &T22, T, nb, util.PBOTTOMRIGHT) util.Repartition1x2to1x3(&SL, &S00, &S01, &S02, S, nb, util.PRIGHT) // -------------------------------------------------------- // update T01: T01 = -T00*Y1.T*Y2*T11 // Y1 = /A10\ Y2 = /A11\ // \A20/ \A21/ // T11.Copy(&S01) updateQRTReflector(&T01, &A10, &A20, &A11, &A21, &T00, &S01, conf) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, util.PBOTTOMRIGHT) util.Continue3x3to2x2( &TTL, &TTR, &TBL, &TBR, &T00, &T11, &T22, T, util.PBOTTOMRIGHT) util.Continue1x3to1x2( &SL, &SR, &S00, &S01, S, util.PRIGHT) } if m(&ABR) > 0 && n(&ABR) > 0 { } return nil }
/* * Blocked LQ decomposition with compact WY transform. As implemented * in lapack.DGELQF subroutine. */ func blockedLQ(A, Tvec, Twork, W *cmat.FloatMatrix, lb int, conf *gomas.Config) { var ATL, ATR, ABL, ABR, AR cmat.FloatMatrix var A00, A11, A12, A21, A22 cmat.FloatMatrix var TT, TB cmat.FloatMatrix var t0, tau, t2 cmat.FloatMatrix var Wrk, w1 cmat.FloatMatrix util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition2x1( &TT, &TB, Tvec, 0, util.PTOP) //nb := conf.LB for m(&ABR)-lb > 0 && n(&ABR)-lb > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, nil, &A11, &A12, nil, &A21, &A22, A, lb, util.PBOTTOMRIGHT) util.Repartition2x1to3x1(&TT, &t0, &tau, &t2, Tvec, lb, util.PBOTTOM) // current block size cb, rb := A11.Size() if rb < cb { cb = rb } // -------------------------------------------------------- // decompose left side AL == /A11\ // \A21/ w1.SubMatrix(W, 0, 0, cb, 1) util.Merge1x2(&AR, &A11, &A12) unblockedLQ(&AR, &tau, &w1) // build block reflector unblkBlockReflectorLQ(Twork, &AR, &tau) // update A'tail i.e. A21 and A22 with A'*(I - Y*T*Y.T).T // compute: C - Y*(C.T*Y*T).T ar, ac := A21.Size() Wrk.SubMatrix(W, 0, 0, ar, ac) updateRightLQ(&A21, &A22, &A11, &A12, Twork, &Wrk, true, conf) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, util.PBOTTOMRIGHT) util.Continue3x1to2x1( &TT, &TB, &t0, &tau, Tvec, util.PBOTTOM) } // last block with unblocked if m(&ABR) > 0 && n(&ABR) > 0 { w1.SubMatrix(W, 0, 0, m(&ABR), 1) unblockedLQ(&ABR, &t2, &w1) } }
// blocked LU decomposition w/o pivots, FLAME LU nopivots variant 5 func blockedLUnoPiv(A *cmat.FloatMatrix, nb int, conf *gomas.Config) *gomas.Error { var err *gomas.Error = nil var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, A01, A02, A10, A11, A12, A20, A21, A22 cmat.FloatMatrix util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) for m(&ATL) < m(A)-nb { util.Repartition2x2to3x3(&ATL, &A00, &A01, &A02, &A10, &A11, &A12, &A20, &A21, &A22, A, nb, util.PBOTTOMRIGHT) // A00 = LU(A00) unblockedLUnoPiv(&A11, conf) // A12 = trilu(A00)*A12.-1 (TRSM) blasd.SolveTrm(&A12, &A11, 1.0, gomas.LEFT|gomas.LOWER|gomas.UNIT) // A21 = A21.-1*triu(A00) (TRSM) blasd.SolveTrm(&A21, &A11, 1.0, gomas.RIGHT|gomas.UPPER) // A22 = A22 - A21*A12 blasd.Mult(&A22, &A21, &A12, -1.0, 1.0, gomas.NONE) util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, util.PBOTTOMRIGHT) } // last block if m(&ATL) < m(A) { unblockedLUnoPiv(&ABR, conf) } return err }
/* * Blocked QR decomposition with compact WY transform. * * Compatible with lapack.DGEQRF. */ func blockedQL(A, Tvec, Twork, W *cmat.FloatMatrix, lb int, conf *gomas.Config) { var ATL, ATR, ABL, ABR, AL cmat.FloatMatrix var A00, A01, A10, A11, A22 cmat.FloatMatrix var TT, TB cmat.FloatMatrix var t0, tau, t2 cmat.FloatMatrix var Wrk, w1 cmat.FloatMatrix util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PBOTTOMRIGHT) util.Partition2x1( &TT, &TB, Tvec, 0, util.PBOTTOM) nb := lb for m(&ATL)-nb > 0 && n(&ATL)-nb > 0 { util.Repartition2x2to3x3(&ATL, &A00, &A01, nil, &A10, &A11, nil, nil, nil, &A22, A, nb, util.PTOPLEFT) util.Repartition2x1to3x1(&TT, &t0, &tau, &t2, Tvec, nb, util.PTOP) // current block size cb, rb := A11.Size() if rb < cb { cb = rb } // -------------------------------------------------------- // decompose righ side AL == /A01\ // \A11/ w1.SubMatrix(W, 0, 0, cb, 1) util.Merge2x1(&AL, &A01, &A11) unblockedQL(&AL, &tau, &w1) // build block reflector unblkQLBlockReflector(Twork, &AL, &tau) // update A'tail i.e. A10 and A00 with (I - Y*T*Y.T).T * A'tail // compute: C - Y*(C.T*Y*T).T ar, ac := A10.Size() Wrk.SubMatrix(W, 0, 0, ac, ar) updateQLLeft(&A10, &A00, &A11, &A01, Twork, &Wrk, true, conf) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, util.PTOPLEFT) util.Continue3x1to2x1( &TT, &TB, &t0, &tau, Tvec, util.PTOP) } // last block with unblocked if m(&ATL) > 0 && n(&ATL) > 0 { w1.SubMatrix(W, 0, 0, n(&ATL), 1) unblockedQL(&ATL, &t0, &w1) } }
// unblocked LU decomposition w/o pivots, FLAME LU nopivots variant 5 func unblockedLUnoPiv(A *cmat.FloatMatrix, conf *gomas.Config) *gomas.Error { var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a01, A02, a10, a11, a12, A20, a21, A22 cmat.FloatMatrix var err *gomas.Error = nil util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) for m(&ATL) < m(A) { util.Repartition2x2to3x3(&ATL, &A00, &a01, &A02, &a10, &a11, &a12, &A20, &a21, &A22, A, 1, util.PBOTTOMRIGHT) // a21 = a21/a11 blasd.InvScale(&a21, a11.Get(0, 0)) // A22 = A22 - a21*a12 blasd.MVUpdate(&A22, &a21, &a12, -1.0) util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT) } return err }
/* * Reduce upper triangular matrix to tridiagonal. * * Elementary reflectors Q = H(n-1)...H(2)H(1) are stored on upper * triangular part of A. Reflector H(n-1) saved at column A(n) and * scalar multiplier to tau[n-1]. If parameter `tail` is true then * this function is used to reduce tail part of partially reduced * matrix and tau-vector partitioning is starting from last position. */ func unblkReduceTridiagUpper(A, tauq, W *cmat.FloatMatrix, tail bool) { var ATL, ABR cmat.FloatMatrix var A00, a01, a11, A22 cmat.FloatMatrix var tqT, tqB, tq0, tauq1, tq2 cmat.FloatMatrix var y21 cmat.FloatMatrix var v0 float64 toff := 1 if tail { toff = 0 } util.Partition2x2( &ATL, nil, nil, &ABR, A, 0, 0, util.PBOTTOMRIGHT) util.Partition2x1( &tqT, &tqB, tauq, toff, util.PBOTTOM) for n(&ATL) > 0 { util.Repartition2x2to3x3(&ATL, &A00, &a01, nil, nil, &a11, nil, nil, nil, &A22, A, 1, util.PTOPLEFT) util.Repartition2x1to3x1(&tqT, &tq0, &tauq1, &tq2, tauq, 1, util.PTOP) // set temp vectors for this round y21.SetBuf(n(&A00), 1, n(&A00), W.Data()) // ------------------------------------------------------ // Compute householder to zero super-diagonal entries computeHouseholderRev(&a01, &tauq1) tauqv := tauq1.Get(0, 0) // set superdiagonal to unit v0 = a01.Get(-1, 0) a01.Set(-1, 0, 1.0) // y21 := A22*a12t blasd.MVMultSym(&y21, &A00, &a01, tauqv, 0.0, gomas.UPPER) // beta := tauq*a12t*y21 beta := tauqv * blasd.Dot(&a01, &y21) // y21 := y21 - 0.5*beta*a125 blasd.Axpy(&y21, &a01, -0.5*beta) // A22 := A22 - a12t*y21.T - y21*a12.T blasd.MVUpdate2Sym(&A00, &a01, &y21, -1.0, gomas.UPPER) // restore superdiagonal value a01.Set(-1, 0, v0) // ------------------------------------------------------ util.Continue3x3to2x2( &ATL, nil, nil, &ABR, &A00, &a11, &A22, A, util.PTOPLEFT) util.Continue3x1to2x1( &tqT, &tqB, &tq0, &tauq1, tauq, util.PTOP) } }
func blkReduceTridiagUpper(A, tauq, Y, W *cmat.FloatMatrix, lb int, conf *gomas.Config) { var ATL, ABR cmat.FloatMatrix var A00, A01, A11, A22 cmat.FloatMatrix var YT, YB, Y0, Y1, Y2 cmat.FloatMatrix var tqT, tqB, tq0, tauq1, tq2 cmat.FloatMatrix var v0 float64 util.Partition2x2( &ATL, nil, nil, &ABR, A, 0, 0, util.PBOTTOMRIGHT) util.Partition2x1( &YT, &YB, Y, 0, util.PBOTTOM) util.Partition2x1( &tqT, &tqB, tauq, 1, util.PBOTTOM) for m(&ATL)-lb > 0 { util.Repartition2x2to3x3(&ATL, &A00, &A01, nil, nil, &A11, nil, nil, nil, &A22, A, lb, util.PTOPLEFT) util.Repartition2x1to3x1(&YT, &Y0, &Y1, &Y2, Y, lb, util.PTOP) util.Repartition2x1to3x1(&tqT, &tq0, &tauq1, &tq2, tauq, lb, util.PTOP) // ------------------------------------------------------ unblkBuildTridiagUpper(&ATL, &tauq1, &YT, W) // set subdiagonal entry to unit v0 = A01.Get(-1, 0) A01.Set(-1, 0, 1.0) // A22 := A22 - A01*Y0.T - Y0*A01.T blasd.Update2Sym(&A00, &A01, &Y0, -1.0, 1.0, gomas.UPPER, conf) // restore subdiagonal entry A01.Set(-1, 0, v0) // ------------------------------------------------------ util.Continue3x3to2x2( &ATL, nil, nil, &ABR, &A00, &A11, &A22, A, util.PTOPLEFT) util.Continue3x1to2x1( &YT, &YB, &Y0, &Y1, Y, util.PTOP) util.Continue3x1to2x1( &tqT, &tqB, &tq0, &tauq1, tauq, util.PTOP) } if m(&ATL) > 0 { unblkReduceTridiagUpper(&ATL, &tqT, W, true) } }
/* * Blocked RQ decomposition with compact WY transform. As implemented * in lapack.DGERQF subroutine. */ func blockedRQ(A, Tvec, Twork, W *cmat.FloatMatrix, lb int, conf *gomas.Config) { var ATL, ABR, AL cmat.FloatMatrix var A00, A01, A10, A11, A22 cmat.FloatMatrix var TT, TB cmat.FloatMatrix var t0, tau, t2 cmat.FloatMatrix var Wrk, w1 cmat.FloatMatrix util.Partition2x2( &ATL, nil, nil, &ABR /**/, A, 0, 0, util.PBOTTOMRIGHT) util.Partition2x1( &TT, &TB /**/, Tvec, 0, util.PBOTTOM) for m(&ATL)-lb > 0 && n(&ATL)-lb > 0 { util.Repartition2x2to3x3(&ATL, &A00, &A01, nil, &A10, &A11, nil, nil, nil, &A22 /**/, A, lb, util.PTOPLEFT) util.Repartition2x1to3x1(&TT, &t0, &tau, &t2 /**/, Tvec, n(&A11), util.PTOP) // current block size cb, rb := A11.Size() if rb < cb { cb = rb } // -------------------------------------------------------- // decompose left side AL == ( A10 A11 ) w1.SubMatrix(W, 0, 0, cb, 1) util.Merge1x2(&AL, &A10, &A11) unblockedRQ(&AL, &tau, &w1) // build block reflector unblkBlockReflectorRQ(Twork, &AL, &tau) // compute: (A00 A01)(I - Y*T*Y.T) ar, ac := A01.Size() Wrk.SubMatrix(W, 0, 0, ar, ac) updateRightRQ(&A01, &A00, &A11, &A10, Twork, &Wrk, false, conf) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, nil, nil, &ABR, &A00, &A11, &A22, A, util.PTOPLEFT) util.Continue3x1to2x1( &TT, &TB, &t0, &tau, Tvec, util.PTOP) } // last block with unblocked if m(&ATL) > 0 && n(&ATL) > 0 { w1.SubMatrix(W, 0, 0, m(&ATL), 1) unblockedRQ(&ATL, &TT, &w1) } }
/* * Tridiagonal reduction of LOWER triangular symmetric matrix, zero elements below 1st * subdiagonal: * * A = (1 - tau*u*u.t)*A*(1 - tau*u*u.T) * = (I - tau*( 0 0 )) (a11 a12) (I - tau*( 0 0 )) * ( ( 0 u*u.t)) (a21 A22) ( ( 0 u*u.t)) * * a11, a12, a21 not affected * * from LEFT: * A22 = A22 - tau*u*u.T*A22 * from RIGHT: * A22 = A22 - tau*A22*u.u.T * * LEFT and RIGHT: * A22 = A22 - tau*u*u.T*A22 - tau*(A22 - tau*u*u.T*A22)*u*u.T * = A22 - tau*u*u.T*A22 - tau*A22*u*u.T + tau*tau*u*u.T*A22*u*u.T * [x = tau*A22*u (vector)] (SYMV) * A22 = A22 - u*x.T - x*u.T + tau*u*u.T*x*u.T * [beta = tau*u.T*x (scalar)] (DOT) * = A22 - u*x.T - x*u.T + beta*u*u.T * = A22 - u*(x - 0.5*beta*u).T - (x - 0.5*beta*u)*u.T * [w = x - 0.5*beta*u] (AXPY) * = A22 - u*w.T - w*u.T (SYR2) * * Result of reduction for N = 5: * ( d . . . . ) * ( e d . . . ) * ( v1 e d . . ) * ( v1 v2 e d . ) * ( v1 v2 v3 e d ) */ func unblkReduceTridiagLower(A, tauq, W *cmat.FloatMatrix) { var ATL, ABR cmat.FloatMatrix var A00, a11, a21, A22 cmat.FloatMatrix var tqT, tqB, tq0, tauq1, tq2 cmat.FloatMatrix var y21 cmat.FloatMatrix var v0 float64 util.Partition2x2( &ATL, nil, nil, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition2x1( &tqT, &tqB, tauq, 0, util.PTOP) for m(&ABR) > 0 && n(&ABR) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, nil, &a11, nil, nil, &a21, &A22, A, 1, util.PBOTTOMRIGHT) util.Repartition2x1to3x1(&tqT, &tq0, &tauq1, &tq2, tauq, 1, util.PBOTTOM) // set temp vectors for this round y21.SetBuf(n(&A22), 1, n(&A22), W.Data()) // ------------------------------------------------------ // Compute householder to zero subdiagonal entries computeHouseholderVec(&a21, &tauq1) tauqv := tauq1.Get(0, 0) // set subdiagonal to unit v0 = a21.Get(0, 0) a21.Set(0, 0, 1.0) // y21 := tauq*A22*a21 blasd.MVMultSym(&y21, &A22, &a21, tauqv, 0.0, gomas.LOWER) // beta := tauq*a21.T*y21 beta := tauqv * blasd.Dot(&a21, &y21) // y21 := y21 - 0.5*beta*a21 blasd.Axpy(&y21, &a21, -0.5*beta) // A22 := A22 - a21*y21.T - y21*a21.T blasd.MVUpdate2Sym(&A22, &a21, &y21, -1.0, gomas.LOWER) // restore subdiagonal a21.Set(0, 0, v0) // ------------------------------------------------------ util.Continue3x3to2x2( &ATL, nil, nil, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT) util.Continue3x1to2x1( &tqT, &tqB, &tq0, &tauq1, tauq, util.PBOTTOM) } }
func blockedCHOL(A *cmat.FloatMatrix, flags int, conf *gomas.Config) *gomas.Error { var err, firstErr *gomas.Error var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, A01, A02, A10, A11, A12, A20, A21, A22 cmat.FloatMatrix nb := conf.LB err = nil firstErr = nil util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) for m(A)-m(&ATL) > nb { util.Repartition2x2to3x3(&ATL, &A00, &A01, &A02, &A10, &A11, &A12, &A20, &A21, &A22, A, nb, util.PBOTTOMRIGHT) if flags&gomas.LOWER != 0 { // A11 = chol(A11) err = unblockedLowerCHOL(&A11, flags, m(&ATL)) // A21 = A21 * tril(A11).-1 blasd.SolveTrm(&A21, &A11, 1.0, gomas.RIGHT|gomas.LOWER|gomas.TRANSA, conf) // A22 = A22 - A21*A21.T blasd.UpdateSym(&A22, &A21, -1.0, 1.0, gomas.LOWER, conf) } else { // A11 = chol(A11) err = unblockedUpperCHOL(&A11, flags, m(&ATL)) // A12 = triu(A11).-1 * A12 blasd.SolveTrm(&A12, &A11, 1.0, gomas.UPPER|gomas.TRANSA, conf) // A22 = A22 - A12.T*A12 blasd.UpdateSym(&A22, &A12, -1.0, 1.0, gomas.UPPER|gomas.TRANSA, conf) } if err != nil && firstErr == nil { firstErr = err } util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, util.PBOTTOMRIGHT) } if m(&ATL) < m(A) { // last block if flags&gomas.LOWER != 0 { unblockedLowerCHOL(&ABR, flags, 0) } else { unblockedUpperCHOL(&ABR, flags, 0) } } return firstErr }
/* * Unblocked code for generating M by N matrix Q with orthogonal columns which * are defined as the last N columns of the product of K first elementary * reflectors. * * Parameter nk is last nk elementary reflectors that are not used in computing * the matrix Q. Parameter mk length of the first unused elementary reflectors * First nk columns are zeroed and subdiagonal mk-nk is set to unit. * * Compatible with lapack.DORG2L subroutine. */ func unblkBuildQL(A, Tvec, W *cmat.FloatMatrix, mk, nk int, mayClear bool) { var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a01, a10, a11, a21, A22 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2, w12, D cmat.FloatMatrix // (mk, nk) = (rows, columns) of upper left partition util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, mk, nk, util.PTOPLEFT) util.Partition2x1( &tT, &tB, Tvec, nk, util.PTOP) // zero the left side if nk > 0 && mayClear { blasd.Scale(&ABL, 0.0) blasd.Scale(&ATL, 0.0) D.Diag(&ATL, nk-mk) blasd.Add(&D, 1.0) } for m(&ABR) > 0 && n(&ABR) > 0 { util.Repartition2x2to3x3(&ATL, &A00, &a01, nil, &a10, &a11, nil, nil, &a21, &A22, A, 1, util.PBOTTOMRIGHT) util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2, Tvec, 1, util.PBOTTOM) // ------------------------------------------------------ w12.SubMatrix(W, 0, 0, a10.Len(), 1) applyHouseholder2x1(&tau1, &a01, &a10, &A00, &w12, gomas.LEFT) blasd.Scale(&a01, -tau1.Get(0, 0)) a11.Set(0, 0, 1.0-tau1.Get(0, 0)) // zero bottom elements blasd.Scale(&a21, 0.0) // ------------------------------------------------------ util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT) util.Continue3x1to2x1( &tT, &tB, &t0, &tau1, Tvec, util.PBOTTOM) } }
/* * Unblocked code for generating M by N matrix Q with orthogonal columns which * are defined as the first N columns of the product of K first elementary * reflectors. * * Parameters nk = n(A)-K, mk = m(A)-K define the initial partitioning of * matrix A. * * Q = H(k)H(k-1)...H(1) , 0 < k <= M, where H(i) = I - tau*v*v.T * * Computation is ordered as H(k)*H(k-1)...*H(1)*I ie. from bottom to top. * * If k < M rows k+1:M are cleared and diagonal entries [k+1:M,k+1:M] are * set to unit. Then the matrix Q is generated by right multiplying elements below * of i'th elementary reflector H(i). * * Compatible to lapack.xORG2L subroutine. */ func unblkBuildLQ(A, Tvec, W *cmat.FloatMatrix, mk, nk int, mayClear bool) { var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a10, a11, a12, a21, A22 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2, w12, D cmat.FloatMatrix util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, mk, nk, util.PBOTTOMRIGHT) util.Partition2x1( &tT, &tB, Tvec, mk, util.PBOTTOM) // zero the bottom part if mk > 0 && mayClear { blasd.Scale(&ABL, 0.0) blasd.Scale(&ABR, 0.0) D.Diag(&ABR) blasd.Add(&D, 1.0) } for m(&ATL) > 0 && n(&ATL) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &a10, &a11, &a12, nil, &a21, &A22, A, 1, util.PTOPLEFT) util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2, Tvec, 1, util.PTOP) // ------------------------------------------------------ w12.SubMatrix(W, 0, 0, a21.Len(), 1) applyHouseholder2x1(&tau1, &a12, &a21, &A22, &w12, gomas.RIGHT) blasd.Scale(&a12, -tau1.Get(0, 0)) a11.Set(0, 0, 1.0-tau1.Get(0, 0)) // zero blasd.Scale(&a10, 0.0) // ------------------------------------------------------ util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, util.PTOPLEFT) util.Continue3x1to2x1( &tT, &tB, &t0, &tau1, Tvec, util.PTOP) } }
func blockedQRT(A, T, W *cmat.FloatMatrix, conf *gomas.Config) *gomas.Error { var err *gomas.Error = nil var ATL, ATR, ABL, ABR, AL, AR cmat.FloatMatrix var A00, A01, A02, A10, A11, A12, A20, A21, A22 cmat.FloatMatrix var TL, TR, W2 cmat.FloatMatrix var T00, T01, T02 cmat.FloatMatrix util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition1x2( &TL, &TR, T, 0, util.PLEFT) nb := conf.LB for m(&ABR)-nb > 0 && n(&ABR)-nb > 0 { util.Repartition2x2to3x3(&ATL, &A00, &A01, &A02, &A10, &A11, &A12, &A20, &A21, &A22, A, nb, util.PBOTTOMRIGHT) util.Repartition1x2to1x3(&TL, &T00, &T01, &T02, T, nb, util.PRIGHT) util.Partition1x2( &AL, &AR, &ABR, nb, util.PLEFT) // -------------------------------------------------------- // decompose left side AL == /A11\ // \A21/ unblockedQRT(&AL, &T01, W) // update A'tail i.e. A12 and A22 with (I - Y*T*Y.T).T * A'tail // compute: Q*T.C == C - Y*(C.T*Y*T).T ar, ac := A12.Size() W2.SubMatrix(W, 0, 0, ac, ar) updateWithQTLeft(&A12, &A22, &A11, &A21, &T01, &W2, true, conf) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, util.PBOTTOMRIGHT) util.Continue1x3to1x2( &TL, &TR, &T00, &T01, T, util.PRIGHT) } if m(&ABR) > 0 && n(&ABR) > 0 { T01.SubMatrix(&TR, 0, 0, n(&ABR), n(&ABR)) unblockedQRT(&ABR, &T01, W) } return err }
func trdsecEigenBuildInplace(Q, z *cmat.FloatMatrix) { var QTL, QBR, Q00, q11, q12, q21, Q22, qi cmat.FloatMatrix var zk0, zk1, dk0, dk1 float64 util.Partition2x2( &QTL, nil, nil, &QBR /**/, Q, 0, 0, util.PTOPLEFT) for m(&QBR) > 0 { util.Repartition2x2to3x3(&QTL, &Q00, nil, nil, nil, &q11, &q12, nil, &q21, &Q22 /**/, Q, 1, util.PBOTTOMRIGHT) //--------------------------------------------------------------- k := m(&Q00) zk0 = z.GetAt(k) dk0 = q11.Get(0, 0) q11.Set(0, 0, zk0/dk0) for i := 0; i < q12.Len(); i++ { zk1 = z.GetAt(k + i + 1) dk0 = q12.GetAt(i) dk1 = q21.GetAt(i) q12.SetAt(i, zk0/dk1) q21.SetAt(i, zk1/dk0) } //--------------------------------------------------------------- util.Continue3x3to2x2( &QTL, nil, nil, &QBR /**/, &Q00, &q11, &Q22 /**/, Q, util.PBOTTOMRIGHT) } // scale column eigenvectors for k := 0; k < z.Len(); k++ { qi.Column(Q, k) t := blasd.Nrm2(&qi) blasd.InvScale(&qi, t) } }
func unblockedLowerCHOL(A *cmat.FloatMatrix, flags int, nr int) (err *gomas.Error) { var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a10, a11, A20, a21, A22 cmat.FloatMatrix err = nil util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) for m(&ATL) < m(A) { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &a10, &a11, nil, &A20, &a21, &A22, A, 1, util.PBOTTOMRIGHT) // a11 = sqrt(a11) aval := a11.Get(0, 0) if aval < 0.0 { if err == nil { err = gomas.NewError(gomas.ENEGATIVE, "DecomposeCHOL", m(&ATL)+nr) } } else { a11.Set(0, 0, math.Sqrt(aval)) } // a21 = a21/a11 blasd.InvScale(&a21, a11.Get(0, 0)) // A22 = A22 - a21*a21' (SYR) blasd.MVUpdateSym(&A22, &a21, -1.0, gomas.LOWER) util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT) } return }
/* * Unblocked RQ decomposition. As implemented * in lapack.DGERQ2 subroutine. */ func unblockedRQ(A, Tvec, W *cmat.FloatMatrix) { var ATL, ABR cmat.FloatMatrix var A00, a11, a01, a10, A22 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2, w12 cmat.FloatMatrix util.Partition2x2( &ATL, nil, nil, &ABR, A, 0, 0, util.PBOTTOMRIGHT) util.Partition2x1( &tT, &tB, Tvec, 0, util.PBOTTOM) for m(&ATL) > 0 && n(&ATL) > 0 { util.Repartition2x2to3x3(&ATL, &A00, &a01, nil, &a10, &a11, nil, nil, nil, &A22, A, 1, util.PTOPLEFT) util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2, Tvec, 1, util.PTOP) // ------------------------------------------------------ computeHouseholder(&a11, &a10, &tau1) w12.SubMatrix(W, 0, 0, a01.Len(), 1) applyHouseholder2x1(&tau1, &a10, &a01, &A00, &w12, gomas.RIGHT) // ------------------------------------------------------ util.Continue3x3to2x2( &ATL, nil, nil, &ABR, &A00, &a11, &A22, A, util.PTOPLEFT) util.Continue3x1to2x1( &tT, &tB, &t0, &tau1, Tvec, util.PTOP) } }
/* * Blocked version for computing C = C*Q and C = C*Q.T from elementary reflectors * and scalar coefficients. * * Elementary reflectors and scalar coefficients are used to build block reflector T. * Matrix C is updated by applying block reflector T using compact WY algorithm. */ func blockedMultQRight(C, A, tau, W *cmat.FloatMatrix, flags, nb int, conf *gomas.Config) { var ATL, ATR, ABL, ABR, AL cmat.FloatMatrix var A00, A10, A11, A20, A21, A22 cmat.FloatMatrix var CL, CR, C0, C1, C2 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2 cmat.FloatMatrix var W0, Wrk, Tw, Twork cmat.FloatMatrix var Aref *cmat.FloatMatrix var pAdir, pAstart, pDir, pStart, pCstart, pCdir util.Direction var bsz, cb, mb int // partitioning start and direction if flags&gomas.TRANS != 0 { // from bottom-right to top-left to produce transpose sequence (C*Q.T) pAstart = util.PBOTTOMRIGHT pAdir = util.PTOPLEFT pStart = util.PBOTTOM pDir = util.PTOP pCstart = util.PRIGHT pCdir = util.PLEFT mb = imax(0, m(A)-n(A)) cb = n(C) - n(A) Aref = &ATL } else { // from top-left to bottom-right to produce normal sequence (C*Q) pAstart = util.PTOPLEFT pAdir = util.PBOTTOMRIGHT pStart = util.PTOP pDir = util.PBOTTOM pCstart = util.PLEFT pCdir = util.PRIGHT mb = 0 cb = 0 Aref = &ABR } // intermediate reflector at start of workspace Twork.SetBuf(nb, nb, nb, W.Data()) W0.SetBuf(m(C), nb, m(C), W.Data()[Twork.Len():]) util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, mb, 0, pAstart) util.Partition1x2( &CL, &CR, C, cb, pCstart) util.Partition2x1( &tT, &tB, tau, 0, pStart) transpose := flags&gomas.TRANS != 0 for m(Aref) > 0 && n(Aref) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &A10, &A11, nil, &A20, &A21, &A22, A, nb, pAdir) util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2, tau, nb, pDir) bsz = n(&A11) // C1 block size must match A11 util.Repartition1x2to1x3(&CL, &C0, &C1, &C2, C, bsz, pCdir) // -------------------------------------------------------- // clear & build block reflector from current block util.Merge2x1(&AL, &A11, &A21) Tw.SubMatrix(&Twork, 0, 0, bsz, bsz) blasd.Scale(&Tw, 0.0) unblkQRBlockReflector(&Tw, &AL, &tau1) // compute: C*Q.T == C - C*(Y*T*Y.T).T = C - C*Y*T.T*Y.T // C*Q == C - C*Y*T*Y.T Wrk.SubMatrix(&W0, 0, 0, m(&C1), bsz) updateWithQTRight(&C1, &C2, &A11, &A21, &Tw, &Wrk, transpose, conf) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, pAdir) util.Continue1x3to1x2( &CL, &CR, &C0, &C1, C, pCdir) util.Continue3x1to2x1( &tT, &tB, &t0, &tau1, tau, pDir) } }
/* * Computes upper Hessenberg reduction of N-by-N matrix A using unblocked * algorithm as described in (1). * * Hessengerg reduction: A = Q.T*B*Q, Q unitary, B upper Hessenberg * Q = H(0)*H(1)*...*H(k) where H(k) is k'th Householder reflector. * * Compatible with lapack.DGEHD2. */ func unblkHessGQvdG(A, Tvec, W *cmat.FloatMatrix, row int) { var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a11, a21, A22 cmat.FloatMatrix var AL, AR, A0, a1, A2 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2, w12, v1 cmat.FloatMatrix util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, row, 0, util.PTOPLEFT) util.Partition1x2( &AL, &AR, A, 0, util.PLEFT) util.Partition2x1( &tT, &tB, Tvec, 0, util.PTOP) v1.SubMatrix(W, 0, 0, m(A), 1) for m(&ABR) > 1 && n(&ABR) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, nil, &a11, nil, nil, &a21, &A22, A, 1, util.PBOTTOMRIGHT) util.Repartition1x2to1x3(&AL, &A0, &a1, &A2, A, 1, util.PRIGHT) util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2, Tvec, 1, util.PBOTTOM) // ------------------------------------------------------ // a21 = [beta; H(k)].T computeHouseholderVec(&a21, &tau1) tauval := tau1.Get(0, 0) beta := a21.Get(0, 0) a21.Set(0, 0, 1.0) // v1 := A2*a21 blasd.MVMult(&v1, &A2, &a21, 1.0, 0.0, gomas.NONE) // A2 := A2 - tau*v1*a21 (A2 := A2*H(k)) blasd.MVUpdate(&A2, &v1, &a21, -tauval) w12.SubMatrix(W, 0, 0, n(&A22), 1) // w12 := a21.T*A22 = A22.T*a21 blasd.MVMult(&w12, &A22, &a21, 1.0, 0.0, gomas.TRANS) // A22 := A22 - tau*a21*w12 (A22 := H(k)*A22) blasd.MVUpdate(&A22, &a21, &w12, -tauval) a21.Set(0, 0, beta) // ------------------------------------------------------ util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT) util.Continue1x3to1x2( &AL, &AR, &A0, &a1, A, util.PRIGHT) util.Continue3x1to2x1( &tT, &tB, &t0, &tau1, Tvec, util.PBOTTOM) } }
/* * Blocked version of Hessenberg reduction algorithm as presented in (1). This * version uses compact-WY transformation. * * Some notes: * * Elementary reflectors stored in [A11; A21].T are not on diagonal of A11. Update of * a block aligned with A11; A21 is as follow * * 1. Update from left Q(k)*C: * c0 0 c0 * (I - Y*T*Y.T).T*C = C - Y*(C.T*Y)*T.T = C1 - Y1 * (C1.T.Y1+C2.T*Y2)*T.T = C1-Y1*W * C2 Y2 C2-Y2*W * * where W = (C1.T*Y1+C2.T*Y2)*T.T and first row of C is not affected by update * * 2. Update from right C*Q(k): * 0 * C - C*Y*T*Y.T = c0;C1;C2 - c0;C1;C2 * Y1 *T*(0;Y1;Y2) = c0; C1-W*Y1; C2-W*Y2 * Y2 * where W = (C1*Y1 + C2*Y2)*T and first column of C is not affected * */ func blkHessGQvdG(A, Tvec, W *cmat.FloatMatrix, nb int, conf *gomas.Config) *gomas.Error { var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, A11, A12, A21, A22, A2 cmat.FloatMatrix var tT, tB, td cmat.FloatMatrix var t0, t1, t2, T cmat.FloatMatrix var V, VT, VB /*V0, V1, V2,*/, Y1, Y2, W0 cmat.FloatMatrix //fmt.Printf("blkHessGQvdG...\n") T.SubMatrix(W, 0, 0, conf.LB, conf.LB) V.SubMatrix(W, conf.LB, 0, m(A), conf.LB) td.Diag(&T) util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition2x1( &tT, &tB, Tvec, 0, util.PTOP) for m(&ABR) > nb+1 && n(&ABR) > nb { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, nil, &A11, &A12, nil, &A21, &A22, A, nb, util.PBOTTOMRIGHT) util.Repartition2x1to3x1(&tT, &t0, &t1, &t2, Tvec, nb, util.PBOTTOM) util.Partition2x1( &VT, &VB, &V, m(&ATL), util.PTOP) // ------------------------------------------------------ unblkBuildHessGQvdG(&ABR, &T, &VB, nil) blasd.Copy(&t1, &td) // m(Y) == m(ABR)-1, n(Y) == n(A11) Y1.SubMatrix(&ABR, 1, 0, n(&A11), n(&A11)) Y2.SubMatrix(&ABR, 1+n(&A11), 0, m(&A21)-1, n(&A11)) // [A01; A02] == ATR := ATR*(I - Y*T*Y.T) updateHessRightWY(&ATR, &Y1, &Y2, &T, &VT, conf) // A2 = [A12; A22].T util.Merge2x1(&A2, &A12, &A22) // A2 := A2 - VB*T*A21.T be := A21.Get(0, -1) A21.Set(0, -1, 1.0) blasd.MultTrm(&VB, &T, 1.0, gomas.UPPER|gomas.RIGHT) blasd.Mult(&A2, &VB, &A21, -1.0, 1.0, gomas.TRANSB, conf) A21.Set(0, -1, be) // A2 := (I - Y*T*Y.T).T * A2 W0.SubMatrix(&V, 0, 0, n(&A2), n(&Y2)) updateHessLeftWY(&A2, &Y1, &Y2, &T, &W0, conf) // ------------------------------------------------------ util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, util.PBOTTOMRIGHT) util.Continue3x1to2x1( &tT, &tB, &t0, &t1, Tvec, util.PBOTTOM) } if m(&ABR) > 1 { // do the rest with unblocked util.Merge2x1(&A2, &ATR, &ABR) W0.SetBuf(m(A), 1, m(A), W.Data()) unblkHessGQvdG(&A2, &tB, &W0, m(&ATR)) } return nil }
/* * * Building reduction block for blocked algorithm as described in (1). * * A. update next column * a10 [(U00) (U00) ] [(a10) (V00) ] * a11 := I -[(u10)*T00*(u10).T] * [(a11) - (v01) * T00 * a10] * a12 [(U20) (U20) ] [(a12) (V02) ] * * B. compute Householder reflector for updated column * a21, t11 := Householder(a21) * * C. update intermediate reductions * v10 A02*a21 * v11 := a12*a21 * v12 A22*a21 * * D. update block reflector * t01 := A20*a21 * t11 := t11 */ func unblkBuildHessGQvdG(A, T, V, W *cmat.FloatMatrix) *gomas.Error { var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a10, a11, A20, a21, A22 cmat.FloatMatrix var AL, AR, A0, a1, A2 cmat.FloatMatrix var TTL, TTR, TBL, TBR cmat.FloatMatrix var T00, t01, t11, T22 cmat.FloatMatrix var VL, VR, V0, v1, V2, Y0 cmat.FloatMatrix util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition2x2( &TTL, &TTR, &TBL, &TBR, T, 0, 0, util.PTOPLEFT) util.Partition1x2( &AL, &AR, A, 0, util.PLEFT) util.Partition1x2( &VL, &VR, V, 0, util.PLEFT) var beta float64 for n(&VR) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &a10, &a11, nil, &A20, &a21, &A22, A, 1, util.PBOTTOMRIGHT) util.Repartition2x2to3x3(&TTL, &T00, &t01, nil, nil, &t11, nil, nil, nil, &T22, T, 1, util.PBOTTOMRIGHT) util.Repartition1x2to1x3(&AL, &A0, &a1, &A2, A, 1, util.PRIGHT) util.Repartition1x2to1x3(&VL, &V0, &v1, &V2, V, 1, util.PRIGHT) // ------------------------------------------------------ // Compute Hessenberg update for next column of A: if n(&V0) > 0 { // y10 := T00*a10 (use t01 as workspace?) blasd.Axpby(&t01, &a10, 1.0, 0.0) blasd.MVMultTrm(&t01, &T00, 1.0, gomas.UPPER) // a1 := a1 - V0*T00*a10 blasd.MVMult(&a1, &V0, &t01, -1.0, 1.0, gomas.NONE) // update a1 := (I - Y*T*Y.T).T*a1 (here t01 as workspace) Y0.SubMatrix(A, 1, 0, n(&A00), n(&A00)) updateVecLeftWY2(&a1, &Y0, &A20, &T00, &t01, gomas.TRANS) a10.Set(0, -1, beta) } // Compute Householder reflector computeHouseholderVec(&a21, &t11) beta = a21.Get(0, 0) a21.Set(0, 0, 1.0) // v1 := A2*a21 blasd.MVMult(&v1, &A2, &a21, 1.0, 0.0, gomas.NONE) // update T tauval := t11.Get(0, 0) if tauval != 0.0 { // t01 := -tauval*A20.T*a21 blasd.MVMult(&t01, &A20, &a21, -tauval, 0.0, gomas.TRANS) // t01 := T00*t01 blasd.MVMultTrm(&t01, &T00, 1.0, gomas.UPPER) } // ------------------------------------------------------ util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT) util.Continue3x3to2x2( &TTL, &TTR, &TBL, &TBR, &T00, &t11, &T22, T, util.PBOTTOMRIGHT) util.Continue1x3to1x2( &AL, &AR, &A0, &a1, A, util.PRIGHT) util.Continue1x3to1x2( &VL, &VR, &V0, &v1, V, util.PRIGHT) } A.Set(n(V), n(V)-1, beta) return nil }
func blkBuildLQ(A, Tvec, Twork, W *cmat.FloatMatrix, K, lb int, conf *gomas.Config) { var ATL, ATR, ABL, ABR, AL cmat.FloatMatrix var A00, A10, A11, A12, A21, A22 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau, t2, Wrk, D, T cmat.FloatMatrix nk := n(A) - K mk := m(A) - K uk := K % lb util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, mk+uk, nk+uk, util.PBOTTOMRIGHT) util.Partition2x1( &tT, &tB, Tvec, mk+uk, util.PBOTTOM) // zero the bottom part __CHECK HERE: nk? or mk? if nk+uk > 0 { blasd.Scale(&ABL, 0.0) if uk > 0 { // number of reflectors is not multiple of blocking factor // do the first part with unblocked code. unblkBuildLQ(&ABR, &tB, W, m(&ABR)-uk, n(&ABR)-uk, true) } else { // blocking factor is multiple of K blasd.Scale(&ABR, 0.0) D.Diag(&ABR) blasd.Add(&D, 1.0) } } for m(&ATL) > 0 && n(&ATL) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &A10, &A11, &A12, nil, &A21, &A22, A, lb, util.PTOPLEFT) util.Repartition2x1to3x1(&tT, &t0, &tau, &t2, Tvec, lb, util.PTOP) // ------------------------------------------------------ util.Merge1x2(&AL, &A11, &A12) // build block reflector T.SubMatrix(Twork, 0, 0, n(&A11), n(&A11)) unblkBlockReflectorLQ(&T, &AL, &tau) // update A21 and A22 with (I - Y*T*Y.T) from right ar, ac := A21.Size() Wrk.SubMatrix(W, 0, 0, ar, ac) updateRightLQ(&A21, &A22, &A11, &A12, &T, &Wrk, false, conf) // update current block unblkBuildLQ(&AL, &tau, W, 0, n(&A12), false) // zero top rows blasd.Scale(&A10, 0.0) // ------------------------------------------------------ util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, util.PTOPLEFT) util.Continue3x1to2x1( &tT, &tB, &t0, &tau, Tvec, util.PTOP) } }
/* * This is adaptation of TRIRED_LAZY_UNB algorithm from (1). */ func unblkBuildTridiagUpper(A, tauq, Y, W *cmat.FloatMatrix) { var ATL, ABR cmat.FloatMatrix var A00, a01, A02, a11, a12, A22 cmat.FloatMatrix var YTL, YBR cmat.FloatMatrix var Y00, y01, Y02, y11, y12, Y22 cmat.FloatMatrix var tqT, tqB, tq0, tauq1, tq2 cmat.FloatMatrix var w12 cmat.FloatMatrix var v0 float64 util.Partition2x2( &ATL, nil, nil, &ABR, A, 0, 0, util.PBOTTOMRIGHT) util.Partition2x2( &YTL, nil, nil, &YBR, Y, 0, 0, util.PBOTTOMRIGHT) util.Partition2x1( &tqT, &tqB, tauq, 0, util.PBOTTOM) k := 0 for k < n(Y) { util.Repartition2x2to3x3(&ATL, &A00, &a01, &A02, nil, &a11, &a12, nil, nil, &A22, A, 1, util.PTOPLEFT) util.Repartition2x2to3x3(&YTL, &Y00, &y01, &Y02, nil, &y11, &y12, nil, nil, &Y22, Y, 1, util.PTOPLEFT) util.Repartition2x1to3x1(&tqT, &tq0, &tauq1, &tq2, tauq, 1, util.PTOP) // set temp vectors for this round w12.SubMatrix(Y, -1, 0, 1, n(&Y02)) // ------------------------------------------------------ if n(&Y02) > 0 { aa := blasd.Dot(&a12, &y12) aa += blasd.Dot(&y12, &a12) a11.Set(0, 0, a11.Get(0, 0)-aa) // a01 := a01 - A02*y12 blasd.MVMult(&a01, &A02, &y12, -1.0, 1.0, gomas.NONE) // a01 := a01 - Y02*a12 blasd.MVMult(&a01, &Y02, &a12, -1.0, 1.0, gomas.NONE) // restore superdiagonal value a12.Set(0, 0, v0) } // Compute householder to zero subdiagonal entries computeHouseholderRev(&a01, &tauq1) tauqv := tauq1.Get(0, 0) // set sub&iagonal to unit v0 = a01.Get(-1, 0) a01.Set(-1, 0, 1.0) // y01 := tauq*A00*a01 blasd.MVMultSym(&y01, &A00, &a01, tauqv, 0.0, gomas.UPPER) // w12 := A02.T*a01 blasd.MVMult(&w12, &A02, &a01, 1.0, 0.0, gomas.TRANS) // y01 := y01 - Y02*(A02.T*a01) blasd.MVMult(&y01, &Y02, &w12, -tauqv, 1.0, gomas.NONE) // w12 := Y02.T*a01 blasd.MVMult(&w12, &Y02, &a01, 1.0, 0.0, gomas.TRANS) // y01 := y01 - A02*(Y02.T*a01) blasd.MVMult(&y01, &A02, &w12, -tauqv, 1.0, gomas.NONE) // beta := tauq*a01.T*y01 beta := tauqv * blasd.Dot(&a01, &y01) // y01 := y01 - 0.5*beta*a01 blasd.Axpy(&y01, &a01, -0.5*beta) // ------------------------------------------------------ k += 1 util.Continue3x3to2x2( &ATL, nil, nil, &ABR, &A00, &a11, &A22, A, util.PTOPLEFT) util.Continue3x3to2x2( &YTL, nil, nil, &YBR, &Y00, &y11, &Y22, A, util.PTOPLEFT) util.Continue3x1to2x1( &tqT, &tqB, &tq0, &tauq1, tauq, util.PTOP) } // restore superdiagonal value A.Set(m(&ATL)-1, n(&ATL), v0) }
/* * Unblocked algorith for computing C = Q.T*C and C = Q*C. * * Q = H(1)H(2)...H(k) where elementary reflectors H(i) are stored on i'th column * below diagonal in A. * * Progressing A from top-left to bottom-right i.e from smaller column numbers * to larger, produces H(k)...H(2)H(1) == Q.T. and C = Q.T*C * * Progressing from bottom-right to top-left produces H(1)H(2)...H(k) == Q and C = Q*C */ func unblockedMultQLeft(C, A, tau, w *cmat.FloatMatrix, flags int) { var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a10, a11, A20, a21, A22 cmat.FloatMatrix var CT, CB, C0, c1t, C2 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2, w1 cmat.FloatMatrix var Aref *cmat.FloatMatrix var pAdir, pAstart, pDir, pStart util.Direction var mb, tb, nb int // partitioning start and direction if flags&gomas.TRANS != 0 { // from top-left to bottom-right to produce transposed sequence (Q.T*C) pAstart = util.PTOPLEFT pAdir = util.PBOTTOMRIGHT pStart = util.PTOP pDir = util.PBOTTOM mb = 0 tb = 0 nb = 0 Aref = &ABR } else { // from bottom-right to top-left to produce normal sequence (Q*C) pAstart = util.PBOTTOMRIGHT pAdir = util.PTOPLEFT pStart = util.PBOTTOM pDir = util.PTOP mb = imax(0, m(A)-n(A)) nb = imax(0, n(A)-m(A)) tb = imax(0, tau.Len()-n(A)) Aref = &ATL } util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, mb, nb, pAstart) util.Partition2x1( &CT, &CB, C, mb, pStart) util.Partition2x1( &tT, &tB, tau, tb, pStart) for m(Aref) > 0 && n(Aref) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &a10, &a11, nil, &A20, &a21, &A22, A, 1, pAdir) util.Repartition2x1to3x1(&CT, &C0, &c1t, &C2, C, 1, pDir) util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2, tau, 1, pDir) // -------------------------------------------------------- w1.SubMatrix(w, 0, 0, c1t.Len(), 1) applyHouseholder2x1(&tau1, &a21, &c1t, &C2, &w1, gomas.LEFT) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, pAdir) util.Continue3x1to2x1( &CT, &CB, &C0, &c1t, C, pDir) util.Continue3x1to2x1( &tT, &tB, &t0, &tau1, tau, pDir) } }
/* * Blocked version for computing C = Q*C and C = Q.T*C from elementary reflectors * and scalar coefficients. * * Elementary reflectors and scalar coefficients are used to build block reflector T. * Matrix C is updated by applying block reflector T using compact WY algorithm. */ func blockedMultQLeft(C, A, tau, W *cmat.FloatMatrix, flags, nb int, conf *gomas.Config) { var ATL, ATR, ABL, ABR, AL cmat.FloatMatrix var A00, A10, A11, A20, A21, A22 cmat.FloatMatrix var CT, CB, C0, C1, C2 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2 cmat.FloatMatrix var Wrk, W0, Tw, Twork cmat.FloatMatrix var Aref *cmat.FloatMatrix var pAdir, pAstart, pDir, pStart util.Direction var bsz, mb int // partitioning start and direction if flags&gomas.TRANS != 0 || nb == n(A) { // from top-left to bottom-right to produce transposed sequence (Q.T*C) pAstart = util.PTOPLEFT pAdir = util.PBOTTOMRIGHT pStart = util.PTOP pDir = util.PBOTTOM mb = 0 Aref = &ABR } else { // from bottom-right to top-left to produce normal sequence (Q*C) pAstart = util.PBOTTOMRIGHT pAdir = util.PTOPLEFT pStart = util.PBOTTOM pDir = util.PTOP mb = imax(0, m(A)-n(A)) Aref = &ATL } util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, mb, 0, pAstart) util.Partition2x1( &CT, &CB, C, mb, pStart) util.Partition2x1( &tT, &tB, tau, 0, pStart) transpose := flags&gomas.TRANS != 0 // intermediate reflector at start of workspace Twork.SetBuf(nb, nb, nb, W.Data()) W0.SetBuf(n(C), nb, n(C), W.Data()[Twork.Len():]) for m(Aref) > 0 && n(Aref) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &A10, &A11, nil, &A20, &A21, &A22, A, nb, pAdir) util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2, tau, nb, pDir) bsz = n(&A11) util.Repartition2x1to3x1(&CT, &C0, &C1, &C2, C, bsz, pDir) // -------------------------------------------------------- // clear & build block reflector from current block util.Merge2x1(&AL, &A11, &A21) Tw.SubMatrix(&Twork, 0, 0, bsz, bsz) blasd.Scale(&Tw, 0.0) unblkQRBlockReflector(&Tw, &AL, &tau1) // compute: Q*T.C == C - Y*(C.T*Y*T).T transpose == true // Q*C == C - C*Y*T*Y.T transpose == false Wrk.SubMatrix(&W0, 0, 0, n(&C1), bsz) updateWithQTLeft(&C1, &C2, &A11, &A21, &Tw, &Wrk, transpose, conf) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, pAdir) util.Continue3x1to2x1( &CT, &CB, &C0, &C1, C, pDir) util.Continue3x1to2x1( &tT, &tB, &t0, &tau1, tau, pDir) } }
func blkMultLeftQL(C, A, tau, W *cmat.FloatMatrix, flags, lb int, conf *gomas.Config) { var ATL /*ATR, ABL,*/, ABR, AL cmat.FloatMatrix var A00, A01, A11, A22 cmat.FloatMatrix var CT, CB, C0, C1, C2 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2 cmat.FloatMatrix var T0, T, W0, Wrk cmat.FloatMatrix var Aref *cmat.FloatMatrix var pAdir, pAstart, pDir, pStart util.Direction var mb, tb, nb int // partitioning start and direction if flags&gomas.TRANS != 0 { // A from bottom-right to top-left to produce transposed sequence (Q.T*C) pAstart = util.PBOTTOMRIGHT pAdir = util.PTOPLEFT pStart = util.PBOTTOM pDir = util.PTOP mb = 0 tb = 0 nb = 0 Aref = &ATL } else { // from top-left to bottom-right to produce normal sequence (Q*C) pAstart = util.PTOPLEFT pAdir = util.PBOTTOMRIGHT pStart = util.PTOP pDir = util.PBOTTOM mb = imax(0, m(A)-n(A)) nb = imax(0, n(A)-m(A)) tb = imax(0, tau.Len()-n(A)) Aref = &ABR } util.Partition2x2( &ATL, nil, nil, &ABR, A, mb, nb, pAstart) util.Partition2x1( &CT, &CB, C, mb, pStart) util.Partition2x1( &tT, &tB, tau, tb, pStart) transpose := flags&gomas.TRANS != 0 // divide workspace for block reflector and temporart space T0.SetBuf(lb, lb, lb, W.Data()) W0.SetBuf(n(C), lb, n(C), W.Data()[T0.Len():]) for n(Aref) > 0 { util.Repartition2x2to3x3(&ATL, &A00, &A01, nil, nil, &A11, nil, nil, nil, &A22, A, lb, pAdir) util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2, tau, lb, pDir) bsz := n(&A11) util.Repartition2x1to3x1(&CT, &C0, &C1, &C2, C, bsz, pDir) // -------------------------------------------------------- // build block reflector for current block util.Merge2x1(&AL, &A01, &A11) T.SubMatrix(&T0, 0, 0, bsz, bsz) blasd.Scale(&T, 0.0) unblkQLBlockReflector(&T, &AL, &tau1) // update with (I - Y*T*Y.T) or (I - Y*T*Y.T).T Wrk.SubMatrix(&W0, 0, 0, n(&C1), bsz) updateQLLeft(&C1, &C0, &A11, &A01, &T, &Wrk, transpose, conf) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, nil, nil, &ABR, &A00, &A11, &A22, A, pAdir) util.Continue3x1to2x1( &CT, &CB, &C0, &C1, C, pDir) util.Continue3x1to2x1( &tT, &tB, &t0, &tau1, tau, pDir) } }
func blkMultRightQL(C, A, tau, W *cmat.FloatMatrix, flags, lb int, conf *gomas.Config) { var ATL, ABR, AL cmat.FloatMatrix var A00, A01, A11, A22 cmat.FloatMatrix var CL, CR, C0, C1, C2 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2 cmat.FloatMatrix var T0, T, W0, Wrk cmat.FloatMatrix var Aref *cmat.FloatMatrix var pAdir, pAstart, pDir, pStart, pCdir, pCstart util.Direction var mb, tb, nb, cb int // partitioning start and direction if flags&gomas.TRANS != 0 { // from top-left to bottom-right to produce transpose sequence (C*Q.T) pAstart = util.PTOPLEFT pAdir = util.PBOTTOMRIGHT pStart = util.PTOP pDir = util.PBOTTOM pCstart = util.PLEFT pCdir = util.PRIGHT mb = imax(0, m(A)-n(A)) nb = imax(0, n(A)-m(A)) cb = imax(0, n(C)-n(A)) tb = imax(0, tau.Len()-n(A)) Aref = &ABR } else { // A from bottom-right to top-left to produce normal sequence (C*Q) pAstart = util.PBOTTOMRIGHT pAdir = util.PTOPLEFT pStart = util.PBOTTOM pDir = util.PTOP pCstart = util.PRIGHT pCdir = util.PLEFT mb = 0 tb = 0 nb = 0 cb = 0 Aref = &ATL } util.Partition2x2( &ATL, nil, nil, &ABR /**/, A, mb, nb, pAstart) util.Partition1x2( &CL, &CR /**/, C, cb, pCstart) util.Partition2x1( &tT, &tB /**/, tau, tb, pStart) transpose := flags&gomas.TRANS != 0 // divide workspace for block reflector and temporary work matrix T0.SetBuf(lb, lb, lb, W.Data()) W0.SetBuf(m(C), lb, m(C), W.Data()[T0.Len():]) for n(Aref) > 0 { util.Repartition2x2to3x3(&ATL, &A00, &A01, nil, nil, &A11, nil, nil, nil, &A22 /**/, A, lb, pAdir) bsz := n(&A11) util.Repartition1x2to1x3(&CL, &C0, &C1, &C2 /**/, C, bsz, pCdir) util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2 /**/, tau, bsz, pDir) // -------------------------------------------------------- util.Merge2x1(&AL, &A01, &A11) T.SubMatrix(&T0, 0, 0, bsz, bsz) blasd.Scale(&T, 0.0) unblkQLBlockReflector(&T, &AL, &tau1) Wrk.SubMatrix(&W0, 0, 0, m(C), bsz) updateQLRight(&C1, &C0, &A11, &A01, &T, &Wrk, transpose, conf) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, nil, nil, &ABR /**/, &A00, &A11, &A22, A, pAdir) util.Continue1x3to1x2( &CL, &CR /**/, &C0, &C1, C, pCdir) util.Continue3x1to2x1( &tT, &tB /**/, &t0, &tau1, tau, pDir) } }