Esempio n. 1
0
/*
 * Applies a real elementary reflector H to a real m by n matrix A,
 * from either the left or the right. H is represented in the form
 *
 *       H = I - tau * ( 1 ) * ( 1 v.T )
 *                     ( v )
 *
 * where tau is a real scalar and v is a real vector.
 *
 * If tau = 0, then H is taken to be the unit cmat.
 *
 * A is /a1\   a1 := a1 - w1
 *      \A2/   A2 := A2 - v*w1
 *             w1 := tau*(a1 + A2.T*v) if side == LEFT
 *                := tau*(a1 + A2*v)   if side == RIGHT
 *
 * Intermediate work space w1 required as parameter, no allocation.
 */
func applyHouseholder2x1(tau, v, a1, A2, w1 *cmat.FloatMatrix, flags int) *gomas.Error {
	var err *gomas.Error = nil
	tval := tau.Get(0, 0)
	if tval == 0.0 {
		return err
	}

	// shape oblivious vector copy.
	blasd.Axpby(w1, a1, 1.0, 0.0)
	if flags&gomas.LEFT != 0 {
		// w1 = a1 + A2.T*v
		err = blasd.MVMult(w1, A2, v, 1.0, 1.0, gomas.TRANSA)
	} else {
		// w1 = a1 + A2*v
		err = blasd.MVMult(w1, A2, v, 1.0, 1.0, gomas.NONE)
	}
	// w1 = tau*w1
	blasd.Scale(w1, tval)

	// a1 = a1 - w1
	blasd.Axpy(a1, w1, -1.0)

	// A2 = A2 - v*w1
	if flags&gomas.LEFT != 0 {
		err = blasd.MVUpdate(A2, v, w1, -1.0)
	} else {
		err = blasd.MVUpdate(A2, w1, v, -1.0)
	}
	return err
}
Esempio n. 2
0
File: rq.go Progetto: hrautila/gomas
/*
 * like LAPACK/dlafrt.f
 *
 * Build block reflector T from HH reflector stored in TriLU(A) and coefficients
 * in tau.
 *
 * Q = I - Y*T*Y.T; Householder H = I - tau*v*v.T
 *
 * T = | T  0 |   z = -tau*T*Y.T*v
 *     | z  c |   c = tau
 *
 * Q = H(1)H(2)...H(k) building forward here.
 */
func unblkBlockReflectorRQ(T, A, tau *cmat.FloatMatrix) {
	var ATL, ABR cmat.FloatMatrix
	var A00, a10, A20, a11, a21, A22 cmat.FloatMatrix
	var TTL, TBR cmat.FloatMatrix
	var T00, t11, t21, T22 cmat.FloatMatrix
	var tT, tB cmat.FloatMatrix
	var t0, tau1, t2 cmat.FloatMatrix

	util.Partition2x2(
		&ATL, nil,
		nil, &ABR /**/, A, 0, 0, util.PBOTTOMRIGHT)
	util.Partition2x2(
		&TTL, nil,
		nil, &TBR /**/, T, 0, 0, util.PBOTTOMRIGHT)
	util.Partition2x1(
		&tT,
		&tB /**/, tau, 0, util.PBOTTOM)

	for m(&ATL) > 0 && n(&ATL) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			&a10, &a11, nil,
			&A20, &a21, &A22 /**/, A, 1, util.PTOPLEFT)
		util.Repartition2x2to3x3(&TTL,
			&T00, nil, nil,
			nil, &t11, nil,
			nil, &t21, &T22 /**/, T, 1, util.PTOPLEFT)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&tau1,
			&t2 /**/, tau, 1, util.PTOP)
		// --------------------------------------------------

		// t11 := tau
		tauval := tau1.Get(0, 0)
		if tauval != 0.0 {
			t11.Set(0, 0, tauval)

			// t21 := -tauval*(a21 + A20*a10)
			blasd.Axpby(&t21, &a21, 1.0, 0.0)
			blasd.MVMult(&t21, &A20, &a10, -tauval, -tauval, gomas.NONE)
			// t21 := T22*t21
			blasd.MVMultTrm(&t21, &T22, 1.0, gomas.LOWER)
		}

		// --------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, nil,
			nil, &ABR /**/, &A00, &a11, &A22, A, util.PTOPLEFT)
		util.Continue3x3to2x2(
			&TTL, nil,
			nil, &TBR /**/, &T00, &t11, &T22, T, util.PTOPLEFT)
		util.Continue3x1to2x1(
			&tT,
			&tB /**/, &t0, &tau1, tau, util.PTOP)
	}
}
Esempio n. 3
0
File: ql.go Progetto: hrautila/gomas
/*
 * like LAPACK/dlafrt.f
 *
 * Build block reflector T from HH reflector stored in TriLU(A) and coefficients
 * in tau.
 *
 * Q = I - Y*T*Y.T; Householder H = I - tau*v*v.T
 *
 * T = | T  z |   z = -tau*T*Y.T*v
 *     | 0  c |   c = tau
 *
 * Q = H(1)H(2)...H(k) building forward here.
 */
func unblkQLBlockReflector(T, A, tau *cmat.FloatMatrix) {
	var ATL, ABR cmat.FloatMatrix
	var A00, a01, a11, A02, a12, A22 cmat.FloatMatrix
	var TTL, TBR cmat.FloatMatrix
	var T00, t11, t21, T22 cmat.FloatMatrix
	var tT, tB cmat.FloatMatrix
	var t0, tau1, t2 cmat.FloatMatrix

	util.Partition2x2(
		&ATL, nil,
		nil, &ABR, A, 0, 0, util.PBOTTOMRIGHT)
	util.Partition2x2(
		&TTL, nil,
		nil, &TBR, T, 0, 0, util.PBOTTOMRIGHT)
	util.Partition2x1(
		&tT,
		&tB, tau, 0, util.PBOTTOM)

	for m(&ATL) > 0 && n(&ATL) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, &a01, &A02,
			nil, &a11, &a12,
			nil, nil, &A22, A, 1, util.PTOPLEFT)
		util.Repartition2x2to3x3(&TTL,
			&T00, nil, nil,
			nil, &t11, nil,
			nil, &t21, &T22, T, 1, util.PTOPLEFT)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&tau1,
			&t2, tau, 1, util.PTOP)
		// --------------------------------------------------

		// t11 := tau
		tauval := tau1.Get(0, 0)
		if tauval != 0.0 {
			t11.Set(0, 0, tauval)

			// t21 := -tauval*(a12.T + &A02.T*a12)
			blasd.Axpby(&t21, &a12, 1.0, 0.0)
			blasd.MVMult(&t21, &A02, &a01, -tauval, -tauval, gomas.TRANSA)
			// t21 := T22*t01
			blasd.MVMultTrm(&t21, &T22, 1.0, gomas.LOWER)
		}

		// --------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, nil,
			nil, &ABR, &A00, &a11, &A22, A, util.PTOPLEFT)
		util.Continue3x3to2x2(
			&TTL, nil,
			nil, &TBR, &T00, &t11, &T22, T, util.PTOPLEFT)
		util.Continue3x1to2x1(
			&tT,
			&tB, &t0, &tau1, tau, util.PTOP)
	}
}
Esempio n. 4
0
File: lq.go Progetto: hrautila/gomas
/*
 * like LAPACK/dlafrt.f
 *
 * Build block reflector T from HH reflector stored in TriLU(A) and coefficients
 * in tau.
 *
 * Q = I - Y*T*Y.T; Householder H = I - tau*v*v.T
 *
 * T = | T  z |   z = -tau*T*Y.T*v
 *     | 0  c |   c = tau
 *
 * Q = H(1)H(2)...H(k) building forward here.
 */
func unblkBlockReflectorLQ(T, A, tau *cmat.FloatMatrix) {
	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, a01, A02, a11, a12, A22 cmat.FloatMatrix
	var TTL, TTR, TBL, TBR cmat.FloatMatrix
	var T00, t01, T02, t11, t12, T22 cmat.FloatMatrix
	var tT, tB cmat.FloatMatrix
	var t0, tau1, t2 cmat.FloatMatrix

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, 0, 0, util.PTOPLEFT)
	util.Partition2x2(
		&TTL, &TTR,
		&TBL, &TBR, T, 0, 0, util.PTOPLEFT)
	util.Partition2x1(
		&tT,
		&tB, tau, 0, util.PTOP)

	for m(&ABR) > 0 && n(&ABR) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, &a01, &A02,
			nil, &a11, &a12,
			nil, nil, &A22, A, 1, util.PBOTTOMRIGHT)
		util.Repartition2x2to3x3(&TTL,
			&T00, &t01, &T02,
			nil, &t11, &t12,
			nil, nil, &T22, T, 1, util.PBOTTOMRIGHT)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&tau1,
			&t2, tau, 1, util.PBOTTOM)
		// --------------------------------------------------

		// t11 := tau
		tauval := tau1.Get(0, 0)
		if tauval != 0.0 {
			t11.Set(0, 0, tauval)

			// t01 := -tauval*(a01 + A02*a12)
			blasd.Axpby(&t01, &a01, 1.0, 0.0)
			blasd.MVMult(&t01, &A02, &a12, -tauval, -tauval, gomas.NONE)
			// t01 := T00*t01
			blasd.MVMultTrm(&t01, &T00, 1.0, gomas.UPPER)
		}

		// --------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT)
		util.Continue3x3to2x2(
			&TTL, &TTR,
			&TBL, &TBR, &T00, &t11, &T22, T, util.PBOTTOMRIGHT)
		util.Continue3x1to2x1(
			&tT,
			&tB, &t0, &tau1, tau, util.PBOTTOM)
	}
}
Esempio n. 5
0
/*
 * Unblocked QR decomposition with block reflector T.
 */
func unblockedQRT(A, T, W *cmat.FloatMatrix) *gomas.Error {
	var err *gomas.Error = nil
	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, a10, a11, a12, A20, a21, A22 cmat.FloatMatrix
	var TTL, TTR, TBL, TBR cmat.FloatMatrix
	var T00, t01, T02, t11, t12, T22, w12 cmat.FloatMatrix

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, 0, 0, util.PTOPLEFT)
	util.Partition2x2(
		&TTL, &TTR,
		&TBL, &TBR, T, 0, 0, util.PTOPLEFT)

	for m(&ABR) > 0 && n(&ABR) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			&a10, &a11, &a12,
			&A20, &a21, &A22, A, 1, util.PBOTTOMRIGHT)
		util.Repartition2x2to3x3(&TTL,
			&T00, &t01, &T02,
			nil, &t11, &t12,
			nil, nil, &T22, T, 1, util.PBOTTOMRIGHT)

		// ------------------------------------------------------

		computeHouseholder(&a11, &a21, &t11)

		// H*[a12 A22].T
		w12.SubMatrix(W, 0, 0, a12.Len(), 1)
		applyHouseholder2x1(&t11, &a21, &a12, &A22, &w12, gomas.LEFT)

		// update T
		tauval := t11.Get(0, 0)
		if tauval != 0.0 {
			// t01 := -tauval*(a10.T + &A20.T*a21)
			//a10.CopyTo(&t01)
			blasd.Axpby(&t01, &a10, 1.0, 0.0)
			blasd.MVMult(&t01, &A20, &a21, -tauval, -tauval, gomas.TRANSA)
			// t01 := T00*t01
			blasd.MVMultTrm(&t01, &T00, 1.0, gomas.UPPER)
		}

		// ------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT)
		util.Continue3x3to2x2(
			&TTL, &TTR,
			&TBL, &TBR, &T00, &t11, &T22, T, util.PBOTTOMRIGHT)
	}
	return err
}
Esempio n. 6
0
/*
 *
 *  Building reduction block for blocked algorithm as described in (1).
 *
 *  A. update next column
 *    a10        [(U00)     (U00)  ]   [(a10)    (V00)            ]
 *    a11 :=  I -[(u10)*T00*(u10).T] * [(a11)  - (v01) * T00 * a10]
 *    a12        [(U20)     (U20)  ]   [(a12)    (V02)            ]
 *
 *  B. compute Householder reflector for updated column
 *    a21, t11 := Householder(a21)
 *
 *  C. update intermediate reductions
 *    v10      A02*a21
 *    v11  :=  a12*a21
 *    v12      A22*a21
 *
 *  D. update block reflector
 *    t01 :=  A20*a21
 *    t11 :=  t11
 */
func unblkBuildHessGQvdG(A, T, V, W *cmat.FloatMatrix) *gomas.Error {

	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, a10, a11, A20, a21, A22 cmat.FloatMatrix
	var AL, AR, A0, a1, A2 cmat.FloatMatrix
	var TTL, TTR, TBL, TBR cmat.FloatMatrix
	var T00, t01, t11, T22 cmat.FloatMatrix
	var VL, VR, V0, v1, V2, Y0 cmat.FloatMatrix

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, 0, 0, util.PTOPLEFT)
	util.Partition2x2(
		&TTL, &TTR,
		&TBL, &TBR, T, 0, 0, util.PTOPLEFT)
	util.Partition1x2(
		&AL, &AR, A, 0, util.PLEFT)
	util.Partition1x2(
		&VL, &VR, V, 0, util.PLEFT)

	var beta float64

	for n(&VR) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			&a10, &a11, nil,
			&A20, &a21, &A22, A, 1, util.PBOTTOMRIGHT)
		util.Repartition2x2to3x3(&TTL,
			&T00, &t01, nil,
			nil, &t11, nil,
			nil, nil, &T22, T, 1, util.PBOTTOMRIGHT)
		util.Repartition1x2to1x3(&AL,
			&A0, &a1, &A2, A, 1, util.PRIGHT)
		util.Repartition1x2to1x3(&VL,
			&V0, &v1, &V2, V, 1, util.PRIGHT)

		// ------------------------------------------------------
		// Compute Hessenberg update for next column of A:
		if n(&V0) > 0 {
			// y10 := T00*a10  (use t01 as workspace?)
			blasd.Axpby(&t01, &a10, 1.0, 0.0)
			blasd.MVMultTrm(&t01, &T00, 1.0, gomas.UPPER)

			// a1 := a1 - V0*T00*a10
			blasd.MVMult(&a1, &V0, &t01, -1.0, 1.0, gomas.NONE)

			// update a1 := (I - Y*T*Y.T).T*a1 (here t01 as workspace)
			Y0.SubMatrix(A, 1, 0, n(&A00), n(&A00))
			updateVecLeftWY2(&a1, &Y0, &A20, &T00, &t01, gomas.TRANS)
			a10.Set(0, -1, beta)
		}

		// Compute Householder reflector
		computeHouseholderVec(&a21, &t11)
		beta = a21.Get(0, 0)
		a21.Set(0, 0, 1.0)

		// v1 := A2*a21
		blasd.MVMult(&v1, &A2, &a21, 1.0, 0.0, gomas.NONE)

		// update T
		tauval := t11.Get(0, 0)
		if tauval != 0.0 {
			// t01 := -tauval*A20.T*a21
			blasd.MVMult(&t01, &A20, &a21, -tauval, 0.0, gomas.TRANS)
			// t01 := T00*t01
			blasd.MVMultTrm(&t01, &T00, 1.0, gomas.UPPER)
		}
		// ------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT)
		util.Continue3x3to2x2(
			&TTL, &TTR,
			&TBL, &TBR, &T00, &t11, &T22, T, util.PBOTTOMRIGHT)
		util.Continue1x3to1x2(
			&AL, &AR, &A0, &a1, A, util.PRIGHT)
		util.Continue1x3to1x2(
			&VL, &VR, &V0, &v1, V, util.PRIGHT)
	}
	A.Set(n(V), n(V)-1, beta)
	return nil
}
Esempio n. 7
0
/*
 * Compute unblocked bidiagonal reduction for A when M >= N
 *
 * Diagonal and first super/sub diagonal are overwritten with the
 * upper/lower bidiagonal matrix B.
 *
 * This computing (1-tauq*v*v.T)*A*(1-taup*u.u.T) from left to right.
 */
func unblkReduceBidiagLeft(A, tauq, taup, W *cmat.FloatMatrix) {
	var ATL, ABR cmat.FloatMatrix
	var A00, a11, a12t, a21, A22 cmat.FloatMatrix
	var tqT, tqB, tq0, tauq1, tq2 cmat.FloatMatrix
	var tpT, tpB, tp0, taup1, tp2 cmat.FloatMatrix
	var y21, z21 cmat.FloatMatrix
	var v0 float64

	util.Partition2x2(
		&ATL, nil,
		nil, &ABR, A, 0, 0, util.PTOPLEFT)
	util.Partition2x1(
		&tqT,
		&tqB, tauq, 0, util.PTOP)
	util.Partition2x1(
		&tpT,
		&tpB, taup, 0, util.PTOP)

	for m(&ABR) > 0 && n(&ABR) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			nil, &a11, &a12t,
			nil, &a21, &A22, A, 1, util.PBOTTOMRIGHT)
		util.Repartition2x1to3x1(&tqT,
			&tq0,
			&tauq1,
			&tq2, tauq, 1, util.PBOTTOM)
		util.Repartition2x1to3x1(&tpT,
			&tp0,
			&taup1,
			&tp2, taup, 1, util.PBOTTOM)

		// set temp vectors for this round
		y21.SetBuf(n(&a12t), 1, n(&a12t), W.Data())
		z21.SetBuf(m(&a21), 1, m(&a21), W.Data()[y21.Len():])
		// ------------------------------------------------------

		// Compute householder to zero subdiagonal entries
		computeHouseholder(&a11, &a21, &tauq1)

		// y21 := a12 + A22.T*a21
		blasd.Axpby(&y21, &a12t, 1.0, 0.0)
		blasd.MVMult(&y21, &A22, &a21, 1.0, 1.0, gomas.TRANSA)

		// a12t := a12t - tauq*y21
		tauqv := tauq1.Get(0, 0)
		blasd.Axpy(&a12t, &y21, -tauqv)

		// Compute householder to zero elements above 1st superdiagonal
		computeHouseholderVec(&a12t, &taup1)
		v0 = a12t.Get(0, 0)
		a12t.Set(0, 0, 1.0)
		taupv := taup1.Get(0, 0)

		// [v == a12t, u == a21]
		beta := blasd.Dot(&y21, &a12t)
		// z21 := tauq*beta*u
		blasd.Axpby(&z21, &a21, tauqv*beta, 0.0)
		// z21 := A22*v - z21
		blasd.MVMult(&z21, &A22, &a12t, 1.0, -1.0, gomas.NONE)
		// A22 := A22 - tauq*u*y21
		blasd.MVUpdate(&A22, &a21, &y21, -tauqv)
		// A22 := A22 - taup*z21*v
		blasd.MVUpdate(&A22, &z21, &a12t, -taupv)

		a12t.Set(0, 0, v0)
		// ------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, nil,
			nil, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT)
		util.Continue3x1to2x1(
			&tqT,
			&tqB, &tq0, &tauq1, tauq, util.PBOTTOM)
		util.Continue3x1to2x1(
			&tpT,
			&tpB, &tp0, &taup1, taup, util.PBOTTOM)
	}
}
Esempio n. 8
0
/*
 * This is adaptation of BIRED_LAZY_UNB algorithm from (1).
 *
 * Z matrix accumulates updates of row transformations i.e. first
 * Householder that zeros off diagonal entries on row. Vector z21
 * is updates for current round, Z20 are already accumulated updates.
 * Vector z21 updates a12 before next transformation.
 *
 * Y matrix accumulates updates on column tranformations ie Householder
 * that zeros elements below sub-diagonal. Vector y21 is updates for current
 * round, Y20 are already accumulated updates.  Vector y21 updates
 * a21 befor next transformation.
 *
 * Z, Y matrices upper trigonal part is not needed, temporary vector
 * w00 that has maximum length of n(Y) is placed on the last column of
 * Z matrix on each iteration.
 */
func unblkBuildBidiagRight(A, tauq, taup, Y, Z *cmat.FloatMatrix) {
	var ATL, ABL, ABR cmat.FloatMatrix
	var A00, a01, A02, a10, a11, a12t, A20, a21, A22 cmat.FloatMatrix
	var YTL, YBR, ZTL, ZBR cmat.FloatMatrix
	var Y00, y10, Y20, y11, y21, Y22 cmat.FloatMatrix
	var Z00, z10, Z20, z11, z21, Z22 cmat.FloatMatrix
	var tqT, tqB, tq0, tauq1, tq2 cmat.FloatMatrix
	var tpT, tpB, tp0, taup1, tp2 cmat.FloatMatrix
	var w00 cmat.FloatMatrix
	var v0 float64

	// Y is workspace for building updates for first Householder.
	// And Z is space for build updates for second Householder
	// Y is n(A)-2,nb and Z is m(A)-1,nb

	util.Partition2x2(
		&ATL, nil,
		&ABL, &ABR, A, 0, 0, util.PTOPLEFT)
	util.Partition2x2(
		&YTL, nil,
		nil, &YBR, Y, 0, 0, util.PTOPLEFT)
	util.Partition2x2(
		&ZTL, nil,
		nil, &ZBR, Z, 0, 0, util.PTOPLEFT)
	util.Partition2x1(
		&tqT,
		&tqB, tauq, 0, util.PTOP)
	util.Partition2x1(
		&tpT,
		&tpB, taup, 0, util.PTOP)

	k := 0
	for k < n(Y) {
		util.Repartition2x2to3x3(&ATL,
			&A00, &a01, &A02,
			&a10, &a11, &a12t,
			&A20, &a21, &A22, A, 1, util.PBOTTOMRIGHT)
		util.Repartition2x2to3x3(&YTL,
			&Y00, nil, nil,
			&y10, &y11, nil,
			&Y20, &y21, &Y22, Y, 1, util.PBOTTOMRIGHT)
		util.Repartition2x2to3x3(&ZTL,
			&Z00, nil, nil,
			&z10, &z11, nil,
			&Z20, &z21, &Z22, Z, 1, util.PBOTTOMRIGHT)
		util.Repartition2x1to3x1(&tqT,
			&tq0,
			&tauq1,
			&tq2, tauq, 1, util.PBOTTOM)
		util.Repartition2x1to3x1(&tpT,
			&tp0,
			&taup1,
			&tp2, taup, 1, util.PBOTTOM)

		// set temp vectors for this round,
		w00.SubMatrix(Z, 0, n(Z)-1, m(&A02), 1)
		// ------------------------------------------------------
		// u10 == a10, U20 == A20, u21 == a21,
		// v10 == a01, V20 == A02, v21 == a12t
		if n(&Y20) > 0 {
			// a11 := a11 - u10t*z10 - y10*v10
			aa := blasd.Dot(&a10, &z10)
			aa += blasd.Dot(&y10, &a01)
			a11.Set(0, 0, a11.Get(0, 0)-aa)
			// a12t := a12t - V20*z10 - Z20*u10
			blasd.MVMult(&a12t, &A02, &y10, -1.0, 1.0, gomas.TRANS)
			blasd.MVMult(&a12t, &Z20, &a10, -1.0, 1.0, gomas.NONE)
			// a21 := a21 - Y20*v10 - U20*z10
			blasd.MVMult(&a21, &Y20, &a01, -1.0, 1.0, gomas.NONE)
			blasd.MVMult(&a21, &A20, &z10, -1.0, 1.0, gomas.NONE)
			// here restore bidiagonal entry
			a10.Set(0, -1, v0)
		}
		// Compute householder to zero superdiagonal entries
		computeHouseholder(&a11, &a12t, &taup1)
		taupv := taup1.Get(0, 0)

		// y21 := a21 + A22*v21 - Y20*U20.T*v21 - V20*Z20.T*v21
		blasd.Axpby(&y21, &a21, 1.0, 0.0)
		blasd.MVMult(&y21, &A22, &a12t, 1.0, 1.0, gomas.NONE)

		// w00 := U20.T*v21 [= A02*a12t]
		blasd.MVMult(&w00, &A02, &a12t, 1.0, 0.0, gomas.NONE)
		// y21 := y21 - U20*w00 [U20 == A20]
		blasd.MVMult(&y21, &Y20, &w00, -1.0, 1.0, gomas.NONE)
		// w00 := Z20.T*v21
		blasd.MVMult(&w00, &Z20, &a12t, 1.0, 0.0, gomas.TRANS)
		// y21 := y21 - V20*w00  [V20 == A02.T]
		blasd.MVMult(&y21, &A20, &w00, -1.0, 1.0, gomas.NONE)

		// a21 := a21 - taup*y21
		blasd.Scale(&y21, taupv)
		blasd.Axpy(&a21, &y21, -1.0)

		// Compute householder to zero elements below 1st subdiagonal
		computeHouseholderVec(&a21, &tauq1)
		v0 = a21.Get(0, 0)
		a21.Set(0, 0, 1.0)
		tauqv := tauq1.Get(0, 0)

		// z21 := tauq*(A22*y - V20*Y20.T*u - Z20*U20.T*u - beta*v)
		// [v == a12t, u == a21]
		beta := blasd.Dot(&y21, &a21)
		// z21 := beta*v
		blasd.Axpby(&z21, &a12t, beta, 0.0)
		// w00 = Y20.T*u
		blasd.MVMult(&w00, &Y20, &a21, 1.0, 0.0, gomas.TRANS)
		// z21 = z21 + V20*w00 == A02.T*w00
		blasd.MVMult(&z21, &A02, &w00, 1.0, 1.0, gomas.TRANS)
		// w00 := U20.T*u  (U20.T == A20.T)
		blasd.MVMult(&w00, &A20, &a21, 1.0, 0.0, gomas.TRANS)
		// z21 := z21 + Z20*w00
		blasd.MVMult(&z21, &Z20, &w00, 1.0, 1.0, gomas.NONE)
		// z21 := -tauq*z21 + tauq*A22*v
		blasd.MVMult(&z21, &A22, &a21, tauqv, -tauqv, gomas.TRANS)
		// ------------------------------------------------------
		k += 1
		util.Continue3x3to2x2(
			&ATL, nil,
			&ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT)
		util.Continue3x3to2x2(
			&YTL, nil,
			nil, &YBR, &Y00, &y11, &Y22, Y, util.PBOTTOMRIGHT)
		util.Continue3x3to2x2(
			&ZTL, nil,
			nil, &ZBR, &Z00, &z11, &Z22, Z, util.PBOTTOMRIGHT)
		util.Continue3x1to2x1(
			&tqT,
			&tqB, &tq0, &tauq1, tauq, util.PBOTTOM)
		util.Continue3x1to2x1(
			&tpT,
			&tpB, &tp0, &taup1, taup, util.PBOTTOM)
	}
	// restore
	ABL.Set(0, -1, v0)
}