/* * Applies a real elementary reflector H to a real m by n matrix A, * from either the left or the right. H is represented in the form * * H = I - tau * ( 1 ) * ( 1 v.T ) * ( v ) * * where tau is a real scalar and v is a real vector. * * If tau = 0, then H is taken to be the unit cmat. * * A is /a1\ a1 := a1 - w1 * \A2/ A2 := A2 - v*w1 * w1 := tau*(a1 + A2.T*v) if side == LEFT * := tau*(a1 + A2*v) if side == RIGHT * * Intermediate work space w1 required as parameter, no allocation. */ func applyHouseholder2x1(tau, v, a1, A2, w1 *cmat.FloatMatrix, flags int) *gomas.Error { var err *gomas.Error = nil tval := tau.Get(0, 0) if tval == 0.0 { return err } // shape oblivious vector copy. blasd.Axpby(w1, a1, 1.0, 0.0) if flags&gomas.LEFT != 0 { // w1 = a1 + A2.T*v err = blasd.MVMult(w1, A2, v, 1.0, 1.0, gomas.TRANSA) } else { // w1 = a1 + A2*v err = blasd.MVMult(w1, A2, v, 1.0, 1.0, gomas.NONE) } // w1 = tau*w1 blasd.Scale(w1, tval) // a1 = a1 - w1 blasd.Axpy(a1, w1, -1.0) // A2 = A2 - v*w1 if flags&gomas.LEFT != 0 { err = blasd.MVUpdate(A2, v, w1, -1.0) } else { err = blasd.MVUpdate(A2, w1, v, -1.0) } return err }
/* * like LAPACK/dlafrt.f * * Build block reflector T from HH reflector stored in TriLU(A) and coefficients * in tau. * * Q = I - Y*T*Y.T; Householder H = I - tau*v*v.T * * T = | T z | z = -tau*T*Y.T*v * | 0 c | c = tau * * Q = H(1)H(2)...H(k) building forward here. */ func unblkQLBlockReflector(T, A, tau *cmat.FloatMatrix) { var ATL, ABR cmat.FloatMatrix var A00, a01, a11, A02, a12, A22 cmat.FloatMatrix var TTL, TBR cmat.FloatMatrix var T00, t11, t21, T22 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2 cmat.FloatMatrix util.Partition2x2( &ATL, nil, nil, &ABR, A, 0, 0, util.PBOTTOMRIGHT) util.Partition2x2( &TTL, nil, nil, &TBR, T, 0, 0, util.PBOTTOMRIGHT) util.Partition2x1( &tT, &tB, tau, 0, util.PBOTTOM) for m(&ATL) > 0 && n(&ATL) > 0 { util.Repartition2x2to3x3(&ATL, &A00, &a01, &A02, nil, &a11, &a12, nil, nil, &A22, A, 1, util.PTOPLEFT) util.Repartition2x2to3x3(&TTL, &T00, nil, nil, nil, &t11, nil, nil, &t21, &T22, T, 1, util.PTOPLEFT) util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2, tau, 1, util.PTOP) // -------------------------------------------------- // t11 := tau tauval := tau1.Get(0, 0) if tauval != 0.0 { t11.Set(0, 0, tauval) // t21 := -tauval*(a12.T + &A02.T*a12) blasd.Axpby(&t21, &a12, 1.0, 0.0) blasd.MVMult(&t21, &A02, &a01, -tauval, -tauval, gomas.TRANSA) // t21 := T22*t01 blasd.MVMultTrm(&t21, &T22, 1.0, gomas.LOWER) } // -------------------------------------------------- util.Continue3x3to2x2( &ATL, nil, nil, &ABR, &A00, &a11, &A22, A, util.PTOPLEFT) util.Continue3x3to2x2( &TTL, nil, nil, &TBR, &T00, &t11, &T22, T, util.PTOPLEFT) util.Continue3x1to2x1( &tT, &tB, &t0, &tau1, tau, util.PTOP) } }
/* * like LAPACK/dlafrt.f * * Build block reflector T from HH reflector stored in TriLU(A) and coefficients * in tau. * * Q = I - Y*T*Y.T; Householder H = I - tau*v*v.T * * T = | T 0 | z = -tau*T*Y.T*v * | z c | c = tau * * Q = H(1)H(2)...H(k) building forward here. */ func unblkBlockReflectorRQ(T, A, tau *cmat.FloatMatrix) { var ATL, ABR cmat.FloatMatrix var A00, a10, A20, a11, a21, A22 cmat.FloatMatrix var TTL, TBR cmat.FloatMatrix var T00, t11, t21, T22 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2 cmat.FloatMatrix util.Partition2x2( &ATL, nil, nil, &ABR /**/, A, 0, 0, util.PBOTTOMRIGHT) util.Partition2x2( &TTL, nil, nil, &TBR /**/, T, 0, 0, util.PBOTTOMRIGHT) util.Partition2x1( &tT, &tB /**/, tau, 0, util.PBOTTOM) for m(&ATL) > 0 && n(&ATL) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &a10, &a11, nil, &A20, &a21, &A22 /**/, A, 1, util.PTOPLEFT) util.Repartition2x2to3x3(&TTL, &T00, nil, nil, nil, &t11, nil, nil, &t21, &T22 /**/, T, 1, util.PTOPLEFT) util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2 /**/, tau, 1, util.PTOP) // -------------------------------------------------- // t11 := tau tauval := tau1.Get(0, 0) if tauval != 0.0 { t11.Set(0, 0, tauval) // t21 := -tauval*(a21 + A20*a10) blasd.Axpby(&t21, &a21, 1.0, 0.0) blasd.MVMult(&t21, &A20, &a10, -tauval, -tauval, gomas.NONE) // t21 := T22*t21 blasd.MVMultTrm(&t21, &T22, 1.0, gomas.LOWER) } // -------------------------------------------------- util.Continue3x3to2x2( &ATL, nil, nil, &ABR /**/, &A00, &a11, &A22, A, util.PTOPLEFT) util.Continue3x3to2x2( &TTL, nil, nil, &TBR /**/, &T00, &t11, &T22, T, util.PTOPLEFT) util.Continue3x1to2x1( &tT, &tB /**/, &t0, &tau1, tau, util.PTOP) } }
/* * like LAPACK/dlafrt.f * * Build block reflector T from HH reflector stored in TriLU(A) and coefficients * in tau. * * Q = I - Y*T*Y.T; Householder H = I - tau*v*v.T * * T = | T z | z = -tau*T*Y.T*v * | 0 c | c = tau * * Q = H(1)H(2)...H(k) building forward here. */ func unblkBlockReflectorLQ(T, A, tau *cmat.FloatMatrix) { var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a01, A02, a11, a12, A22 cmat.FloatMatrix var TTL, TTR, TBL, TBR cmat.FloatMatrix var T00, t01, T02, t11, t12, T22 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2 cmat.FloatMatrix util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition2x2( &TTL, &TTR, &TBL, &TBR, T, 0, 0, util.PTOPLEFT) util.Partition2x1( &tT, &tB, tau, 0, util.PTOP) for m(&ABR) > 0 && n(&ABR) > 0 { util.Repartition2x2to3x3(&ATL, &A00, &a01, &A02, nil, &a11, &a12, nil, nil, &A22, A, 1, util.PBOTTOMRIGHT) util.Repartition2x2to3x3(&TTL, &T00, &t01, &T02, nil, &t11, &t12, nil, nil, &T22, T, 1, util.PBOTTOMRIGHT) util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2, tau, 1, util.PBOTTOM) // -------------------------------------------------- // t11 := tau tauval := tau1.Get(0, 0) if tauval != 0.0 { t11.Set(0, 0, tauval) // t01 := -tauval*(a01 + A02*a12) blasd.Axpby(&t01, &a01, 1.0, 0.0) blasd.MVMult(&t01, &A02, &a12, -tauval, -tauval, gomas.NONE) // t01 := T00*t01 blasd.MVMultTrm(&t01, &T00, 1.0, gomas.UPPER) } // -------------------------------------------------- util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT) util.Continue3x3to2x2( &TTL, &TTR, &TBL, &TBR, &T00, &t11, &T22, T, util.PBOTTOMRIGHT) util.Continue3x1to2x1( &tT, &tB, &t0, &tau1, tau, util.PBOTTOM) } }
/* * Unblocked QR decomposition with block reflector T. */ func unblockedQRT(A, T, W *cmat.FloatMatrix) *gomas.Error { var err *gomas.Error = nil var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a10, a11, a12, A20, a21, A22 cmat.FloatMatrix var TTL, TTR, TBL, TBR cmat.FloatMatrix var T00, t01, T02, t11, t12, T22, w12 cmat.FloatMatrix util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition2x2( &TTL, &TTR, &TBL, &TBR, T, 0, 0, util.PTOPLEFT) for m(&ABR) > 0 && n(&ABR) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &a10, &a11, &a12, &A20, &a21, &A22, A, 1, util.PBOTTOMRIGHT) util.Repartition2x2to3x3(&TTL, &T00, &t01, &T02, nil, &t11, &t12, nil, nil, &T22, T, 1, util.PBOTTOMRIGHT) // ------------------------------------------------------ computeHouseholder(&a11, &a21, &t11) // H*[a12 A22].T w12.SubMatrix(W, 0, 0, a12.Len(), 1) applyHouseholder2x1(&t11, &a21, &a12, &A22, &w12, gomas.LEFT) // update T tauval := t11.Get(0, 0) if tauval != 0.0 { // t01 := -tauval*(a10.T + &A20.T*a21) //a10.CopyTo(&t01) blasd.Axpby(&t01, &a10, 1.0, 0.0) blasd.MVMult(&t01, &A20, &a21, -tauval, -tauval, gomas.TRANSA) // t01 := T00*t01 blasd.MVMultTrm(&t01, &T00, 1.0, gomas.UPPER) } // ------------------------------------------------------ util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT) util.Continue3x3to2x2( &TTL, &TTR, &TBL, &TBR, &T00, &t11, &T22, T, util.PBOTTOMRIGHT) } return err }
/* * Update vector with compact WY Householder block * (I - Y*T*Y.T)*v = v - Y*T*Y.T*v * * LEFT: * 1 | 0 * v0 = v0 = v0 * 0 | Q v1 Q*v1 = v1 - Y*T*Y.T*v1 * * 1 | 0 * v0 = v0 = v0 * 0 | Q.T v1 Q.T*v1 = v1 - Y*T.T*Y.T*v1 * * RIGHT: * v0 | v1 * 1 | 0 = v0 | v1*Q = v0 | v1 - v1*Y*T*Y.T * 0 | Q * * v0 | v1 * 1 | 0 = v0 | v1*Q.T = v0 | v1 - v1*Y*T.T*Y.T * 0 | Q.T */ func updateVecLeftWY2(v, Y1, Y2, T, w *cmat.FloatMatrix, bits int) { var v1, v2 cmat.FloatMatrix var w0 cmat.FloatMatrix v1.SubMatrix(v, 1, 0, n(Y1), 1) v2.SubMatrix(v, n(Y1)+1, 0, m(Y2), 1) w0.SubMatrix(w, 0, 0, m(Y1), 1) // w0 := Y1.T*v1 + Y2.T*v2 blasd.Copy(&w0, &v1) blasd.MVMultTrm(&w0, Y1, 1.0, gomas.LOWER|gomas.UNIT|gomas.TRANS) blasd.MVMult(&w0, Y2, &v2, 1.0, 1.0, gomas.TRANS) // w0 := op(T)*w0 blasd.MVMultTrm(&w0, T, 1.0, bits|gomas.UPPER) // v2 := v2 - Y2*w0 blasd.MVMult(&v2, Y2, &w0, -1.0, 1.0, gomas.NONE) // v1 := v1 - Y1*w0 blasd.MVMultTrm(&w0, Y1, 1.0, gomas.LOWER|gomas.UNIT) blasd.Axpy(&v1, &w0, -1.0) }
/* * Apply elementary Householder reflector v to matrix A2. * * H = I - tau*v*v.t; * * RIGHT: A = A*H = A - tau*A*v*v.T = A - tau*w1*v.T * LEFT: A = H*A = A - tau*v*v.T*A = A - tau*v*A.T*v = A - tau*v*w1 */ func applyHouseholder1x1(tau, v, A2, w1 *cmat.FloatMatrix, flags int) *gomas.Error { var err *gomas.Error = nil tval := tau.Get(0, 0) if tval == 0.0 { return nil } if flags&gomas.LEFT != 0 { // w1 = A2.T*v err = blasd.MVMult(w1, A2, v, 1.0, 0.0, gomas.TRANSA) if err == nil { // A2 = A2 - tau*v*w1; m(A2) == len(v) && n(A2) == len(w1) err = blasd.MVUpdate(A2, v, w1, -tval) } } else { // w1 = A2*v err = blasd.MVMult(w1, A2, v, 1.0, 0.0, gomas.NONE) if err == nil { // A2 = A2 - tau*w1*v; m(A2) == len(w1) && n(A2) == len(v) err = blasd.MVUpdate(A2, w1, v, -tval) } } return err }
/* From LAPACK/dlarf.f * * Applies a real elementary reflector H to a real m by n matrix A, * from either the left or the right. H is represented in the form * * H = I - tau * ( 1 ) * ( 1 v.T ) * ( v ) * * where tau is a real scalar and v is a real vector. * * If tau = 0, then H is taken to be the unit cmat. * * A is /a1\ a1 := a1 - w1 * \A2/ A2 := A2 - v*w1 * w1 := tau*(a1 + A2.T*v) if side == LEFT * := tau*(a1 + A2*v) if side == RIGHT * * Allocates/frees intermediate work space matrix w1. */ func applyHouseholder(tau, v, a1, A2 *cmat.FloatMatrix, flags int) { tval := tau.Get(0, 0) if tval == 0.0 { return } w1 := cmat.NewCopy(a1) if flags&gomas.LEFT != 0 { // w1 = a1 + A2.T*v blasd.MVMult(w1, A2, v, 1.0, 1.0, gomas.TRANSA) } else { // w1 = a1 + A2*v blasd.MVMult(w1, A2, v, 1.0, 1.0, gomas.NONE) } // w1 = tau*w1 blasd.Scale(w1, tval) // a1 = a1 - w1 blasd.Axpy(a1, w1, -1.0) // A2 = A2 - v*w1 blasd.MVUpdate(A2, v, w1, -1.0) }
// unblocked LU decomposition with pivots: FLAME LU variant 3; Left-looking func unblockedLUpiv(A *cmat.FloatMatrix, p *Pivots, offset int, conf *gomas.Config) *gomas.Error { var err *gomas.Error = nil var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a01, A02, a10, a11, a12, A20, a21, A22 cmat.FloatMatrix var AL, AR, A0, a1, A2, aB1, AB0 cmat.FloatMatrix var pT, pB, p0, p1, p2 Pivots err = nil util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition1x2( &AL, &AR, A, 0, util.PLEFT) partitionPivot2x1( &pT, &pB, *p, 0, util.PTOP) for m(&ATL) < m(A) && n(&ATL) < n(A) { util.Repartition2x2to3x3(&ATL, &A00, &a01, &A02, &a10, &a11, &a12, &A20, &a21, &A22 /**/, A, 1, util.PBOTTOMRIGHT) util.Repartition1x2to1x3(&AL, &A0, &a1, &A2 /**/, A, 1, util.PRIGHT) repartPivot2x1to3x1(&pT, &p0, &p1, &p2 /**/, *p, 1, util.PBOTTOM) // apply previously computed pivots on current column applyPivots(&a1, p0) // a01 = trilu(A00) \ a01 (TRSV) blasd.MVSolveTrm(&a01, &A00, 1.0, gomas.LOWER|gomas.UNIT) // a11 = a11 - a10 *a01 aval := a11.Get(0, 0) - blasd.Dot(&a10, &a01) a11.Set(0, 0, aval) // a21 = a21 -A20*a01 blasd.MVMult(&a21, &A20, &a01, -1.0, 1.0, gomas.NONE) // pivot index on current column [a11, a21].T aB1.Column(&ABR, 0) p1[0] = pivotIndex(&aB1) // pivots to current column applyPivots(&aB1, p1) // a21 = a21 / a11 if aval == 0.0 { if err == nil { ij := m(&ATL) + p1[0] - 1 err = gomas.NewError(gomas.ESINGULAR, "DecomposeLU", ij) } } else { blasd.InvScale(&a21, a11.Get(0, 0)) } // apply pivots to previous columns AB0.SubMatrix(&ABL, 0, 0) applyPivots(&AB0, p1) // scale last pivots to origin matrix row numbers p1[0] += m(&ATL) util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT) util.Continue1x3to1x2( &AL, &AR, &A0, &a1, A, util.PRIGHT) contPivot3x1to2x1( &pT, &pB, p0, p1, *p, util.PBOTTOM) } if n(&ATL) < n(A) { applyPivots(&ATR, *p) blasd.SolveTrm(&ATR, &ATL, 1.0, gomas.LEFT|gomas.UNIT|gomas.LOWER, conf) } return err }
/* * Computes upper Hessenberg reduction of N-by-N matrix A using unblocked * algorithm as described in (1). * * Hessengerg reduction: A = Q.T*B*Q, Q unitary, B upper Hessenberg * Q = H(0)*H(1)*...*H(k) where H(k) is k'th Householder reflector. * * Compatible with lapack.DGEHD2. */ func unblkHessGQvdG(A, Tvec, W *cmat.FloatMatrix, row int) { var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a11, a21, A22 cmat.FloatMatrix var AL, AR, A0, a1, A2 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2, w12, v1 cmat.FloatMatrix util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, row, 0, util.PTOPLEFT) util.Partition1x2( &AL, &AR, A, 0, util.PLEFT) util.Partition2x1( &tT, &tB, Tvec, 0, util.PTOP) v1.SubMatrix(W, 0, 0, m(A), 1) for m(&ABR) > 1 && n(&ABR) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, nil, &a11, nil, nil, &a21, &A22, A, 1, util.PBOTTOMRIGHT) util.Repartition1x2to1x3(&AL, &A0, &a1, &A2, A, 1, util.PRIGHT) util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2, Tvec, 1, util.PBOTTOM) // ------------------------------------------------------ // a21 = [beta; H(k)].T computeHouseholderVec(&a21, &tau1) tauval := tau1.Get(0, 0) beta := a21.Get(0, 0) a21.Set(0, 0, 1.0) // v1 := A2*a21 blasd.MVMult(&v1, &A2, &a21, 1.0, 0.0, gomas.NONE) // A2 := A2 - tau*v1*a21 (A2 := A2*H(k)) blasd.MVUpdate(&A2, &v1, &a21, -tauval) w12.SubMatrix(W, 0, 0, n(&A22), 1) // w12 := a21.T*A22 = A22.T*a21 blasd.MVMult(&w12, &A22, &a21, 1.0, 0.0, gomas.TRANS) // A22 := A22 - tau*a21*w12 (A22 := H(k)*A22) blasd.MVUpdate(&A22, &a21, &w12, -tauval) a21.Set(0, 0, beta) // ------------------------------------------------------ util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT) util.Continue1x3to1x2( &AL, &AR, &A0, &a1, A, util.PRIGHT) util.Continue3x1to2x1( &tT, &tB, &t0, &tau1, Tvec, util.PBOTTOM) } }
/* * * Building reduction block for blocked algorithm as described in (1). * * A. update next column * a10 [(U00) (U00) ] [(a10) (V00) ] * a11 := I -[(u10)*T00*(u10).T] * [(a11) - (v01) * T00 * a10] * a12 [(U20) (U20) ] [(a12) (V02) ] * * B. compute Householder reflector for updated column * a21, t11 := Householder(a21) * * C. update intermediate reductions * v10 A02*a21 * v11 := a12*a21 * v12 A22*a21 * * D. update block reflector * t01 := A20*a21 * t11 := t11 */ func unblkBuildHessGQvdG(A, T, V, W *cmat.FloatMatrix) *gomas.Error { var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a10, a11, A20, a21, A22 cmat.FloatMatrix var AL, AR, A0, a1, A2 cmat.FloatMatrix var TTL, TTR, TBL, TBR cmat.FloatMatrix var T00, t01, t11, T22 cmat.FloatMatrix var VL, VR, V0, v1, V2, Y0 cmat.FloatMatrix util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition2x2( &TTL, &TTR, &TBL, &TBR, T, 0, 0, util.PTOPLEFT) util.Partition1x2( &AL, &AR, A, 0, util.PLEFT) util.Partition1x2( &VL, &VR, V, 0, util.PLEFT) var beta float64 for n(&VR) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &a10, &a11, nil, &A20, &a21, &A22, A, 1, util.PBOTTOMRIGHT) util.Repartition2x2to3x3(&TTL, &T00, &t01, nil, nil, &t11, nil, nil, nil, &T22, T, 1, util.PBOTTOMRIGHT) util.Repartition1x2to1x3(&AL, &A0, &a1, &A2, A, 1, util.PRIGHT) util.Repartition1x2to1x3(&VL, &V0, &v1, &V2, V, 1, util.PRIGHT) // ------------------------------------------------------ // Compute Hessenberg update for next column of A: if n(&V0) > 0 { // y10 := T00*a10 (use t01 as workspace?) blasd.Axpby(&t01, &a10, 1.0, 0.0) blasd.MVMultTrm(&t01, &T00, 1.0, gomas.UPPER) // a1 := a1 - V0*T00*a10 blasd.MVMult(&a1, &V0, &t01, -1.0, 1.0, gomas.NONE) // update a1 := (I - Y*T*Y.T).T*a1 (here t01 as workspace) Y0.SubMatrix(A, 1, 0, n(&A00), n(&A00)) updateVecLeftWY2(&a1, &Y0, &A20, &T00, &t01, gomas.TRANS) a10.Set(0, -1, beta) } // Compute Householder reflector computeHouseholderVec(&a21, &t11) beta = a21.Get(0, 0) a21.Set(0, 0, 1.0) // v1 := A2*a21 blasd.MVMult(&v1, &A2, &a21, 1.0, 0.0, gomas.NONE) // update T tauval := t11.Get(0, 0) if tauval != 0.0 { // t01 := -tauval*A20.T*a21 blasd.MVMult(&t01, &A20, &a21, -tauval, 0.0, gomas.TRANS) // t01 := T00*t01 blasd.MVMultTrm(&t01, &T00, 1.0, gomas.UPPER) } // ------------------------------------------------------ util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT) util.Continue3x3to2x2( &TTL, &TTR, &TBL, &TBR, &T00, &t11, &T22, T, util.PBOTTOMRIGHT) util.Continue1x3to1x2( &AL, &AR, &A0, &a1, A, util.PRIGHT) util.Continue1x3to1x2( &VL, &VR, &V0, &v1, V, util.PRIGHT) } A.Set(n(V), n(V)-1, beta) return nil }
/* * This is adaptation of TRIRED_LAZY_UNB algorithm from (1). */ func unblkBuildTridiagUpper(A, tauq, Y, W *cmat.FloatMatrix) { var ATL, ABR cmat.FloatMatrix var A00, a01, A02, a11, a12, A22 cmat.FloatMatrix var YTL, YBR cmat.FloatMatrix var Y00, y01, Y02, y11, y12, Y22 cmat.FloatMatrix var tqT, tqB, tq0, tauq1, tq2 cmat.FloatMatrix var w12 cmat.FloatMatrix var v0 float64 util.Partition2x2( &ATL, nil, nil, &ABR, A, 0, 0, util.PBOTTOMRIGHT) util.Partition2x2( &YTL, nil, nil, &YBR, Y, 0, 0, util.PBOTTOMRIGHT) util.Partition2x1( &tqT, &tqB, tauq, 0, util.PBOTTOM) k := 0 for k < n(Y) { util.Repartition2x2to3x3(&ATL, &A00, &a01, &A02, nil, &a11, &a12, nil, nil, &A22, A, 1, util.PTOPLEFT) util.Repartition2x2to3x3(&YTL, &Y00, &y01, &Y02, nil, &y11, &y12, nil, nil, &Y22, Y, 1, util.PTOPLEFT) util.Repartition2x1to3x1(&tqT, &tq0, &tauq1, &tq2, tauq, 1, util.PTOP) // set temp vectors for this round w12.SubMatrix(Y, -1, 0, 1, n(&Y02)) // ------------------------------------------------------ if n(&Y02) > 0 { aa := blasd.Dot(&a12, &y12) aa += blasd.Dot(&y12, &a12) a11.Set(0, 0, a11.Get(0, 0)-aa) // a01 := a01 - A02*y12 blasd.MVMult(&a01, &A02, &y12, -1.0, 1.0, gomas.NONE) // a01 := a01 - Y02*a12 blasd.MVMult(&a01, &Y02, &a12, -1.0, 1.0, gomas.NONE) // restore superdiagonal value a12.Set(0, 0, v0) } // Compute householder to zero subdiagonal entries computeHouseholderRev(&a01, &tauq1) tauqv := tauq1.Get(0, 0) // set sub&iagonal to unit v0 = a01.Get(-1, 0) a01.Set(-1, 0, 1.0) // y01 := tauq*A00*a01 blasd.MVMultSym(&y01, &A00, &a01, tauqv, 0.0, gomas.UPPER) // w12 := A02.T*a01 blasd.MVMult(&w12, &A02, &a01, 1.0, 0.0, gomas.TRANS) // y01 := y01 - Y02*(A02.T*a01) blasd.MVMult(&y01, &Y02, &w12, -tauqv, 1.0, gomas.NONE) // w12 := Y02.T*a01 blasd.MVMult(&w12, &Y02, &a01, 1.0, 0.0, gomas.TRANS) // y01 := y01 - A02*(Y02.T*a01) blasd.MVMult(&y01, &A02, &w12, -tauqv, 1.0, gomas.NONE) // beta := tauq*a01.T*y01 beta := tauqv * blasd.Dot(&a01, &y01) // y01 := y01 - 0.5*beta*a01 blasd.Axpy(&y01, &a01, -0.5*beta) // ------------------------------------------------------ k += 1 util.Continue3x3to2x2( &ATL, nil, nil, &ABR, &A00, &a11, &A22, A, util.PTOPLEFT) util.Continue3x3to2x2( &YTL, nil, nil, &YBR, &Y00, &y11, &Y22, A, util.PTOPLEFT) util.Continue3x1to2x1( &tqT, &tqB, &tq0, &tauq1, tauq, util.PTOP) } // restore superdiagonal value A.Set(m(&ATL)-1, n(&ATL), v0) }
/* * This is adaptation of TRIRED_LAZY_UNB algorithm from (1). */ func unblkBuildTridiagLower(A, tauq, Y, W *cmat.FloatMatrix) { var ATL, ABR cmat.FloatMatrix var A00, a10, a11, A20, a21, A22 cmat.FloatMatrix var YTL, YBR cmat.FloatMatrix var Y00, y10, y11, Y20, y21, Y22 cmat.FloatMatrix var tqT, tqB, tq0, tauq1, tq2 cmat.FloatMatrix var w12 cmat.FloatMatrix var v0 float64 util.Partition2x2( &ATL, nil, nil, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition2x2( &YTL, nil, nil, &YBR, Y, 0, 0, util.PTOPLEFT) util.Partition2x1( &tqT, &tqB, tauq, 0, util.PTOP) k := 0 for k < n(Y) { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &a10, &a11, nil, &A20, &a21, &A22, A, 1, util.PBOTTOMRIGHT) util.Repartition2x2to3x3(&YTL, &Y00, nil, nil, &y10, &y11, nil, &Y20, &y21, &Y22, Y, 1, util.PBOTTOMRIGHT) util.Repartition2x1to3x1(&tqT, &tq0, &tauq1, &tq2, tauq, 1, util.PBOTTOM) // set temp vectors for this round //w12.SetBuf(y10.Len(), 1, y10.Len(), W.Data()) w12.SubMatrix(Y, 0, 0, 1, n(&Y00)) // ------------------------------------------------------ if n(&Y00) > 0 { aa := blasd.Dot(&a10, &y10) aa += blasd.Dot(&y10, &a10) a11.Set(0, 0, a11.Get(0, 0)-aa) // a21 := a21 - A20*y10 blasd.MVMult(&a21, &A20, &y10, -1.0, 1.0, gomas.NONE) // a21 := a21 - Y20*a10 blasd.MVMult(&a21, &Y20, &a10, -1.0, 1.0, gomas.NONE) // restore subdiagonal value a10.Set(0, -1, v0) } // Compute householder to zero subdiagonal entries computeHouseholderVec(&a21, &tauq1) tauqv := tauq1.Get(0, 0) // set subdiagonal to unit v0 = a21.Get(0, 0) a21.Set(0, 0, 1.0) // y21 := tauq*A22*a21 blasd.MVMultSym(&y21, &A22, &a21, tauqv, 0.0, gomas.LOWER) // w12 := A20.T*a21 blasd.MVMult(&w12, &A20, &a21, 1.0, 0.0, gomas.TRANS) // y21 := y21 - Y20*(A20.T*a21) blasd.MVMult(&y21, &Y20, &w12, -tauqv, 1.0, gomas.NONE) // w12 := Y20.T*a21 blasd.MVMult(&w12, &Y20, &a21, 1.0, 0.0, gomas.TRANS) // y21 := y21 - A20*(Y20.T*a21) blasd.MVMult(&y21, &A20, &w12, -tauqv, 1.0, gomas.NONE) // beta := tauq*a21.T*y21 beta := tauqv * blasd.Dot(&a21, &y21) // y21 := y21 - 0.5*beta*a21 blasd.Axpy(&y21, &a21, -0.5*beta) // ------------------------------------------------------ k += 1 util.Continue3x3to2x2( &ATL, nil, nil, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT) util.Continue3x3to2x2( &YTL, nil, nil, &YBR, &Y00, &y11, &Y22, A, util.PBOTTOMRIGHT) util.Continue3x1to2x1( &tqT, &tqB, &tq0, &tauq1, tauq, util.PBOTTOM) } // restore subdiagonal value A.Set(m(&ATL), n(&ATL)-1, v0) }
/* * Unblocked solve A*X = B for Bunch-Kauffman factorized symmetric real matrix. */ func unblkSolveBKUpper(B, A *cmat.FloatMatrix, p Pivots, phase int, conf *gomas.Config) *gomas.Error { var err *gomas.Error = nil var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a01, A02, a11, a12t, A22 cmat.FloatMatrix var Aref *cmat.FloatMatrix var BT, BB, B0, b1, B2, Bx cmat.FloatMatrix var pT, pB, p0, p1, p2 Pivots var aStart, aDir, bStart, bDir util.Direction var nc int np := 0 if phase == 2 { aStart = util.PTOPLEFT aDir = util.PBOTTOMRIGHT bStart = util.PTOP bDir = util.PBOTTOM nc = 1 Aref = &ABR } else { aStart = util.PBOTTOMRIGHT aDir = util.PTOPLEFT bStart = util.PBOTTOM bDir = util.PTOP nc = m(A) Aref = &ATL } util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, aStart) util.Partition2x1( &BT, &BB, B, 0, bStart) partitionPivot2x1( &pT, &pB, p, 0, bStart) // phase 1: // - solve U*D*X = B, overwriting B with X // - looping from BOTTOM to TOP // phase 1: // - solve U*X = B, overwriting B with X // - looping from TOP to BOTTOM for n(Aref) > 0 { // see if next diagonal block is 1x1 or 2x2 np = 1 if p[nc-1] < 0 { np = 2 } // repartition according the pivot size util.Repartition2x2to3x3(&ATL, &A00, &a01, &A02, nil, &a11, &a12t, nil, nil, &A22 /**/, A, np, aDir) util.Repartition2x1to3x1(&BT, &B0, &b1, &B2 /**/, B, np, bDir) repartPivot2x1to3x1(&pT, &p0, &p1, &p2 /**/, p, np, bDir) // ------------------------------------------------------------ switch phase { case 1: // computes D.-1*(U.-1*B); // b1 is current row, last row of BT if np == 1 { if p1[0] != nc { // swap rows on top part of B swapRows(&BT, m(&BT)-1, p1[0]-1) } // B0 = B0 - a01*b1 blasd.MVUpdate(&B0, &a01, &b1, -1.0) // b1 = b1/d1 blasd.InvScale(&b1, a11.Get(0, 0)) nc -= 1 } else if np == 2 { if p1[0] != -nc { // swap rows on top part of B swapRows(&BT, m(&BT)-2, -p1[0]-1) } b := a11.Get(0, 1) apb := a11.Get(0, 0) / b dpb := a11.Get(1, 1) / b // (a/b)*(d/b)-1.0 == (a*d - b^2)/b^2 scale := apb*dpb - 1.0 scale *= b // B0 = B0 - a01*b1 blasd.Mult(&B0, &a01, &b1, -1.0, 1.0, gomas.NONE, conf) // b1 = a11.-1*b1.T //(2x2 block, no subroutine for doing this in-place) for k := 0; k < n(&b1); k++ { s0 := b1.Get(0, k) s1 := b1.Get(1, k) b1.Set(0, k, (dpb*s0-s1)/scale) b1.Set(1, k, (apb*s1-s0)/scale) } nc -= 2 } case 2: // compute X = U.-T*B if np == 1 { blasd.MVMult(&b1, &B0, &a01, -1.0, 1.0, gomas.TRANS) if p1[0] != nc { // swap rows on bottom part of B util.Merge2x1(&Bx, &B0, &b1) swapRows(&Bx, m(&Bx)-1, p1[0]-1) } nc += 1 } else if np == 2 { blasd.Mult(&b1, &a01, &B0, -1.0, 1.0, gomas.TRANSA, conf) if p1[0] != -nc { // swap rows on bottom part of B util.Merge2x1(&Bx, &B0, &b1) swapRows(&Bx, m(&Bx)-2, -p1[0]-1) } nc += 2 } } // ------------------------------------------------------------ util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, aDir) util.Continue3x1to2x1( &BT, &BB, &B0, &b1, B, bDir) contPivot3x1to2x1( &pT, &pB, p0, p1, p, bDir) } return err }
/* * Find diagonal pivot and build incrementaly updated block. * * d x r2 x x x c1 | x x kp1 k | w w * d r2 x x x c1 | x x kp1 k | w w * r2 r2 r2 r2 c1 | x x kp1 k | w w * d x x c1 | x x kp1 k | w w * d x c1 | x x kp1 k | w w * d c1 | x x kp1 k | w w * c1 | x x kp1 k | w w * -------------------------- ------------- * (AL) (AR) (WL) (WR) * * Matrix AL contains the unfactored part of the matrix and AR the already * factored columns. Matrix WR is updated values of factored part ie. * w(i) = l(i)d(i). Matrix WL will have updated values for next column. * Column WL(k) contains updated AL(c1) and WL(kp1) possible pivot row AL(r2). * * On exit, for 1x1 diagonal the rightmost column of WL (k) holds the updated * value of AL(c1). If pivoting this required the WL(k) holds the actual pivoted * column/row. * * For 2x2 diagonal blocks WL(k) holds the updated AL(c1) and WL(kp1) holds * actual values of pivot column/row AL(r2), without the diagonal pivots. */ func findAndBuildBKPivotUpper(AL, AR, WL, WR *cmat.FloatMatrix, k int) (int, int) { var r, q int var rcol, qrow, src, wk, wkp1, wrow cmat.FloatMatrix lc := n(AL) - 1 wc := n(WL) - 1 lr := m(AL) - 1 // Copy AL[:,lc] to WL[:,wc] and update with WR[0:] src.SubMatrix(AL, 0, lc, m(AL), 1) wk.SubMatrix(WL, 0, wc, m(AL), 1) blasd.Copy(&wk, &src) if k > 0 { wrow.SubMatrix(WR, lr, 0, 1, n(WR)) blasd.MVMult(&wk, AR, &wrow, -1.0, 1.0, gomas.NONE) } if m(AL) == 1 { return -1, 1 } // amax is on-diagonal element of current column amax := math.Abs(WL.Get(lr, wc)) // find max off-diagonal on first column. rcol.SubMatrix(WL, 0, wc, lr, 1) // r is row index and rmax is its absolute value r = blasd.IAmax(&rcol) rmax := math.Abs(rcol.Get(r, 0)) if amax >= bkALPHA*rmax { // no pivoting, 1x1 diagonal return -1, 1 } // Now we need to copy row r to WL[:,wc-1] and update it wkp1.SubMatrix(WL, 0, wc-1, m(AL), 1) if r > 0 { // above the diagonal part of AL qrow.SubMatrix(AL, 0, r, r, 1) blasd.Copy(&wkp1, &qrow) } var wkr cmat.FloatMatrix qrow.SubMatrix(AL, r, r, 1, m(AL)-r) wkr.SubMatrix(&wkp1, r, 0, m(AL)-r, 1) blasd.Copy(&wkr, &qrow) if k > 0 { // update wkp1 wrow.SubMatrix(WR, r, 0, 1, n(WR)) blasd.MVMult(&wkp1, AR, &wrow, -1.0, 1.0, gomas.NONE) } // set on-diagonal entry to zero to avoid hitting it. p1 := wkp1.Get(r, 0) wkp1.Set(r, 0, 0.0) // max off-diagonal on r'th column/row at index q q = blasd.IAmax(&wkp1) qmax := math.Abs(wkp1.Get(q, 0)) wkp1.Set(r, 0, p1) if amax >= bkALPHA*rmax*(rmax/qmax) { // no pivoting, 1x1 diagonal return -1, 1 } // if q == r then qmax is not off-diagonal, qmax == WR[r,1] and // we get 1x1 pivot as following is always true if math.Abs(WL.Get(r, wc-1)) >= bkALPHA*qmax { // 1x1 pivoting and interchange with k, r // pivot row in column WL[:,-2] to WL[:,-1] src.SubMatrix(WL, 0, wc-1, m(AL), 1) wkp1.SubMatrix(WL, 0, wc, m(AL), 1) blasd.Copy(&wkp1, &src) wkp1.Set(-1, 0, src.Get(r, 0)) wkp1.Set(r, 0, src.Get(-1, 0)) return r, 1 } else { // 2x2 pivoting and interchange with k+1, r return r, 2 } return -1, 1 }
/* * Find diagonal pivot and build incrementaly updated block. * * (AL) (AR) (WL) (WR) * -------------------------- ---------- k'th row in W * x x | c1 w w | k kp1 * x x | c1 d w w | k kp1 * x x | c1 x d w w | k kp1 * x x | c1 x x d w w | k kp1 * x x | c1 r2 r2 r2 r2 w w | k kp1 * x x | c1 x x x r2 d w w | k kp1 * x x | c1 x x x r2 x d w w | k kp1 * * Matrix AR contains the unfactored part of the matrix and AL the already * factored columns. Matrix WL is updated values of factored part ie. * w(i) = l(i)d(i). Matrix WR will have updated values for next column. * Column WR(k) contains updated AR(c1) and WR(kp1) possible pivot row AR(r2). */ func findAndBuildBKPivotLower(AL, AR, WL, WR *cmat.FloatMatrix, k int) (int, int) { var r, q int var rcol, qrow, src, wk, wkp1, wrow cmat.FloatMatrix // Copy AR column 0 to WR column 0 and update with WL[0:] src.SubMatrix(AR, 0, 0, m(AR), 1) wk.SubMatrix(WR, 0, 0, m(AR), 1) wk.Copy(&src) if k > 0 { wrow.SubMatrix(WL, 0, 0, 1, n(WL)) blasd.MVMult(&wk, AL, &wrow, -1.0, 1.0, gomas.NONE) } if m(AR) == 1 { return 0, 1 } amax := math.Abs(WR.Get(0, 0)) // find max off-diagonal on first column. rcol.SubMatrix(WR, 1, 0, m(AR)-1, 1) // r is row index and rmax is its absolute value r = blasd.IAmax(&rcol) + 1 rmax := math.Abs(rcol.Get(r-1, 0)) if amax >= bkALPHA*rmax { // no pivoting, 1x1 diagonal return 0, 1 } // Now we need to copy row r to WR[:,1] and update it wkp1.SubMatrix(WR, 0, 1, m(AR), 1) qrow.SubMatrix(AR, r, 0, 1, r+1) blasd.Copy(&wkp1, &qrow) if r < m(AR)-1 { var wkr cmat.FloatMatrix qrow.SubMatrix(AR, r, r, m(AR)-r, 1) wkr.SubMatrix(&wkp1, r, 0, m(&wkp1)-r, 1) blasd.Copy(&wkr, &qrow) } if k > 0 { // update wkp1 wrow.SubMatrix(WL, r, 0, 1, n(WL)) blasd.MVMult(&wkp1, AL, &wrow, -1.0, 1.0, gomas.NONE) } // set on-diagonal entry to zero to avoid finding it p1 := wkp1.Get(r, 0) wkp1.Set(r, 0, 0.0) // max off-diagonal on r'th column/row at index q q = blasd.IAmax(&wkp1) qmax := math.Abs(wkp1.Get(q, 0)) // restore on-diagonal entry wkp1.Set(r, 0, p1) if amax >= bkALPHA*rmax*(rmax/qmax) { // no pivoting, 1x1 diagonal return 0, 1 } // if q == r then qmax is not off-diagonal, qmax == WR[r,1] and // we get 1x1 pivot as following is always true if math.Abs(WR.Get(r, 1)) >= bkALPHA*qmax { // 1x1 pivoting and interchange with k, r // pivot row in column WR[:,1] to W[:,0] src.SubMatrix(WR, 0, 1, m(AR), 1) wkp1.SubMatrix(WR, 0, 0, m(AR), 1) blasd.Copy(&wkp1, &src) wkp1.Set(0, 0, src.Get(r, 0)) wkp1.Set(r, 0, src.Get(0, 0)) return r, 1 } else { // 2x2 pivoting and interchange with k+1, r return r, 2 } return 0, 1 }
/* * Compute unblocked bidiagonal reduction for A when M >= N * * Diagonal and first super/sub diagonal are overwritten with the * upper/lower bidiagonal matrix B. * * This computing (1-tauq*v*v.T)*A*(1-taup*u.u.T) from left to right. */ func unblkReduceBidiagLeft(A, tauq, taup, W *cmat.FloatMatrix) { var ATL, ABR cmat.FloatMatrix var A00, a11, a12t, a21, A22 cmat.FloatMatrix var tqT, tqB, tq0, tauq1, tq2 cmat.FloatMatrix var tpT, tpB, tp0, taup1, tp2 cmat.FloatMatrix var y21, z21 cmat.FloatMatrix var v0 float64 util.Partition2x2( &ATL, nil, nil, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition2x1( &tqT, &tqB, tauq, 0, util.PTOP) util.Partition2x1( &tpT, &tpB, taup, 0, util.PTOP) for m(&ABR) > 0 && n(&ABR) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, nil, &a11, &a12t, nil, &a21, &A22, A, 1, util.PBOTTOMRIGHT) util.Repartition2x1to3x1(&tqT, &tq0, &tauq1, &tq2, tauq, 1, util.PBOTTOM) util.Repartition2x1to3x1(&tpT, &tp0, &taup1, &tp2, taup, 1, util.PBOTTOM) // set temp vectors for this round y21.SetBuf(n(&a12t), 1, n(&a12t), W.Data()) z21.SetBuf(m(&a21), 1, m(&a21), W.Data()[y21.Len():]) // ------------------------------------------------------ // Compute householder to zero subdiagonal entries computeHouseholder(&a11, &a21, &tauq1) // y21 := a12 + A22.T*a21 blasd.Axpby(&y21, &a12t, 1.0, 0.0) blasd.MVMult(&y21, &A22, &a21, 1.0, 1.0, gomas.TRANSA) // a12t := a12t - tauq*y21 tauqv := tauq1.Get(0, 0) blasd.Axpy(&a12t, &y21, -tauqv) // Compute householder to zero elements above 1st superdiagonal computeHouseholderVec(&a12t, &taup1) v0 = a12t.Get(0, 0) a12t.Set(0, 0, 1.0) taupv := taup1.Get(0, 0) // [v == a12t, u == a21] beta := blasd.Dot(&y21, &a12t) // z21 := tauq*beta*u blasd.Axpby(&z21, &a21, tauqv*beta, 0.0) // z21 := A22*v - z21 blasd.MVMult(&z21, &A22, &a12t, 1.0, -1.0, gomas.NONE) // A22 := A22 - tauq*u*y21 blasd.MVUpdate(&A22, &a21, &y21, -tauqv) // A22 := A22 - taup*z21*v blasd.MVUpdate(&A22, &z21, &a12t, -taupv) a12t.Set(0, 0, v0) // ------------------------------------------------------ util.Continue3x3to2x2( &ATL, nil, nil, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT) util.Continue3x1to2x1( &tqT, &tqB, &tq0, &tauq1, tauq, util.PBOTTOM) util.Continue3x1to2x1( &tpT, &tpB, &tp0, &taup1, taup, util.PBOTTOM) } }
/* * This is adaptation of BIRED_LAZY_UNB algorithm from (1). * * Z matrix accumulates updates of row transformations i.e. first * Householder that zeros off diagonal entries on row. Vector z21 * is updates for current round, Z20 are already accumulated updates. * Vector z21 updates a12 before next transformation. * * Y matrix accumulates updates on column tranformations ie Householder * that zeros elements below sub-diagonal. Vector y21 is updates for current * round, Y20 are already accumulated updates. Vector y21 updates * a21 befor next transformation. * * Z, Y matrices upper trigonal part is not needed, temporary vector * w00 that has maximum length of n(Y) is placed on the last column of * Z matrix on each iteration. */ func unblkBuildBidiagRight(A, tauq, taup, Y, Z *cmat.FloatMatrix) { var ATL, ABL, ABR cmat.FloatMatrix var A00, a01, A02, a10, a11, a12t, A20, a21, A22 cmat.FloatMatrix var YTL, YBR, ZTL, ZBR cmat.FloatMatrix var Y00, y10, Y20, y11, y21, Y22 cmat.FloatMatrix var Z00, z10, Z20, z11, z21, Z22 cmat.FloatMatrix var tqT, tqB, tq0, tauq1, tq2 cmat.FloatMatrix var tpT, tpB, tp0, taup1, tp2 cmat.FloatMatrix var w00 cmat.FloatMatrix var v0 float64 // Y is workspace for building updates for first Householder. // And Z is space for build updates for second Householder // Y is n(A)-2,nb and Z is m(A)-1,nb util.Partition2x2( &ATL, nil, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition2x2( &YTL, nil, nil, &YBR, Y, 0, 0, util.PTOPLEFT) util.Partition2x2( &ZTL, nil, nil, &ZBR, Z, 0, 0, util.PTOPLEFT) util.Partition2x1( &tqT, &tqB, tauq, 0, util.PTOP) util.Partition2x1( &tpT, &tpB, taup, 0, util.PTOP) k := 0 for k < n(Y) { util.Repartition2x2to3x3(&ATL, &A00, &a01, &A02, &a10, &a11, &a12t, &A20, &a21, &A22, A, 1, util.PBOTTOMRIGHT) util.Repartition2x2to3x3(&YTL, &Y00, nil, nil, &y10, &y11, nil, &Y20, &y21, &Y22, Y, 1, util.PBOTTOMRIGHT) util.Repartition2x2to3x3(&ZTL, &Z00, nil, nil, &z10, &z11, nil, &Z20, &z21, &Z22, Z, 1, util.PBOTTOMRIGHT) util.Repartition2x1to3x1(&tqT, &tq0, &tauq1, &tq2, tauq, 1, util.PBOTTOM) util.Repartition2x1to3x1(&tpT, &tp0, &taup1, &tp2, taup, 1, util.PBOTTOM) // set temp vectors for this round, w00.SubMatrix(Z, 0, n(Z)-1, m(&A02), 1) // ------------------------------------------------------ // u10 == a10, U20 == A20, u21 == a21, // v10 == a01, V20 == A02, v21 == a12t if n(&Y20) > 0 { // a11 := a11 - u10t*z10 - y10*v10 aa := blasd.Dot(&a10, &z10) aa += blasd.Dot(&y10, &a01) a11.Set(0, 0, a11.Get(0, 0)-aa) // a12t := a12t - V20*z10 - Z20*u10 blasd.MVMult(&a12t, &A02, &y10, -1.0, 1.0, gomas.TRANS) blasd.MVMult(&a12t, &Z20, &a10, -1.0, 1.0, gomas.NONE) // a21 := a21 - Y20*v10 - U20*z10 blasd.MVMult(&a21, &Y20, &a01, -1.0, 1.0, gomas.NONE) blasd.MVMult(&a21, &A20, &z10, -1.0, 1.0, gomas.NONE) // here restore bidiagonal entry a10.Set(0, -1, v0) } // Compute householder to zero superdiagonal entries computeHouseholder(&a11, &a12t, &taup1) taupv := taup1.Get(0, 0) // y21 := a21 + A22*v21 - Y20*U20.T*v21 - V20*Z20.T*v21 blasd.Axpby(&y21, &a21, 1.0, 0.0) blasd.MVMult(&y21, &A22, &a12t, 1.0, 1.0, gomas.NONE) // w00 := U20.T*v21 [= A02*a12t] blasd.MVMult(&w00, &A02, &a12t, 1.0, 0.0, gomas.NONE) // y21 := y21 - U20*w00 [U20 == A20] blasd.MVMult(&y21, &Y20, &w00, -1.0, 1.0, gomas.NONE) // w00 := Z20.T*v21 blasd.MVMult(&w00, &Z20, &a12t, 1.0, 0.0, gomas.TRANS) // y21 := y21 - V20*w00 [V20 == A02.T] blasd.MVMult(&y21, &A20, &w00, -1.0, 1.0, gomas.NONE) // a21 := a21 - taup*y21 blasd.Scale(&y21, taupv) blasd.Axpy(&a21, &y21, -1.0) // Compute householder to zero elements below 1st subdiagonal computeHouseholderVec(&a21, &tauq1) v0 = a21.Get(0, 0) a21.Set(0, 0, 1.0) tauqv := tauq1.Get(0, 0) // z21 := tauq*(A22*y - V20*Y20.T*u - Z20*U20.T*u - beta*v) // [v == a12t, u == a21] beta := blasd.Dot(&y21, &a21) // z21 := beta*v blasd.Axpby(&z21, &a12t, beta, 0.0) // w00 = Y20.T*u blasd.MVMult(&w00, &Y20, &a21, 1.0, 0.0, gomas.TRANS) // z21 = z21 + V20*w00 == A02.T*w00 blasd.MVMult(&z21, &A02, &w00, 1.0, 1.0, gomas.TRANS) // w00 := U20.T*u (U20.T == A20.T) blasd.MVMult(&w00, &A20, &a21, 1.0, 0.0, gomas.TRANS) // z21 := z21 + Z20*w00 blasd.MVMult(&z21, &Z20, &w00, 1.0, 1.0, gomas.NONE) // z21 := -tauq*z21 + tauq*A22*v blasd.MVMult(&z21, &A22, &a21, tauqv, -tauqv, gomas.TRANS) // ------------------------------------------------------ k += 1 util.Continue3x3to2x2( &ATL, nil, &ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT) util.Continue3x3to2x2( &YTL, nil, nil, &YBR, &Y00, &y11, &Y22, Y, util.PBOTTOMRIGHT) util.Continue3x3to2x2( &ZTL, nil, nil, &ZBR, &Z00, &z11, &Z22, Z, util.PBOTTOMRIGHT) util.Continue3x1to2x1( &tqT, &tqB, &tq0, &tauq1, tauq, util.PBOTTOM) util.Continue3x1to2x1( &tpT, &tpB, &tp0, &taup1, taup, util.PBOTTOM) } // restore ABL.Set(0, -1, v0) }