Пример #1
0
/*
 * like LAPACK/dlafrt.f
 *
 * Build block reflector T from HH reflector stored in TriLU(A) and coefficients
 * in tau.
 *
 * Q = I - Y*T*Y.T; Householder H = I - tau*v*v.T
 *
 * T = | T  0 |   z = -tau*T*Y.T*v
 *     | z  c |   c = tau
 *
 * Q = H(1)H(2)...H(k) building forward here.
 */
func unblkBlockReflectorRQ(T, A, tau *cmat.FloatMatrix) {
	var ATL, ABR cmat.FloatMatrix
	var A00, a10, A20, a11, a21, A22 cmat.FloatMatrix
	var TTL, TBR cmat.FloatMatrix
	var T00, t11, t21, T22 cmat.FloatMatrix
	var tT, tB cmat.FloatMatrix
	var t0, tau1, t2 cmat.FloatMatrix

	util.Partition2x2(
		&ATL, nil,
		nil, &ABR /**/, A, 0, 0, util.PBOTTOMRIGHT)
	util.Partition2x2(
		&TTL, nil,
		nil, &TBR /**/, T, 0, 0, util.PBOTTOMRIGHT)
	util.Partition2x1(
		&tT,
		&tB /**/, tau, 0, util.PBOTTOM)

	for m(&ATL) > 0 && n(&ATL) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			&a10, &a11, nil,
			&A20, &a21, &A22 /**/, A, 1, util.PTOPLEFT)
		util.Repartition2x2to3x3(&TTL,
			&T00, nil, nil,
			nil, &t11, nil,
			nil, &t21, &T22 /**/, T, 1, util.PTOPLEFT)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&tau1,
			&t2 /**/, tau, 1, util.PTOP)
		// --------------------------------------------------

		// t11 := tau
		tauval := tau1.Get(0, 0)
		if tauval != 0.0 {
			t11.Set(0, 0, tauval)

			// t21 := -tauval*(a21 + A20*a10)
			blasd.Axpby(&t21, &a21, 1.0, 0.0)
			blasd.MVMult(&t21, &A20, &a10, -tauval, -tauval, gomas.NONE)
			// t21 := T22*t21
			blasd.MVMultTrm(&t21, &T22, 1.0, gomas.LOWER)
		}

		// --------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, nil,
			nil, &ABR /**/, &A00, &a11, &A22, A, util.PTOPLEFT)
		util.Continue3x3to2x2(
			&TTL, nil,
			nil, &TBR /**/, &T00, &t11, &T22, T, util.PTOPLEFT)
		util.Continue3x1to2x1(
			&tT,
			&tB /**/, &t0, &tau1, tau, util.PTOP)
	}
}
Пример #2
0
/*
 * like LAPACK/dlafrt.f
 *
 * Build block reflector T from HH reflector stored in TriLU(A) and coefficients
 * in tau.
 *
 * Q = I - Y*T*Y.T; Householder H = I - tau*v*v.T
 *
 * T = | T  z |   z = -tau*T*Y.T*v
 *     | 0  c |   c = tau
 *
 * Q = H(1)H(2)...H(k) building forward here.
 */
func unblkQLBlockReflector(T, A, tau *cmat.FloatMatrix) {
	var ATL, ABR cmat.FloatMatrix
	var A00, a01, a11, A02, a12, A22 cmat.FloatMatrix
	var TTL, TBR cmat.FloatMatrix
	var T00, t11, t21, T22 cmat.FloatMatrix
	var tT, tB cmat.FloatMatrix
	var t0, tau1, t2 cmat.FloatMatrix

	util.Partition2x2(
		&ATL, nil,
		nil, &ABR, A, 0, 0, util.PBOTTOMRIGHT)
	util.Partition2x2(
		&TTL, nil,
		nil, &TBR, T, 0, 0, util.PBOTTOMRIGHT)
	util.Partition2x1(
		&tT,
		&tB, tau, 0, util.PBOTTOM)

	for m(&ATL) > 0 && n(&ATL) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, &a01, &A02,
			nil, &a11, &a12,
			nil, nil, &A22, A, 1, util.PTOPLEFT)
		util.Repartition2x2to3x3(&TTL,
			&T00, nil, nil,
			nil, &t11, nil,
			nil, &t21, &T22, T, 1, util.PTOPLEFT)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&tau1,
			&t2, tau, 1, util.PTOP)
		// --------------------------------------------------

		// t11 := tau
		tauval := tau1.Get(0, 0)
		if tauval != 0.0 {
			t11.Set(0, 0, tauval)

			// t21 := -tauval*(a12.T + &A02.T*a12)
			blasd.Axpby(&t21, &a12, 1.0, 0.0)
			blasd.MVMult(&t21, &A02, &a01, -tauval, -tauval, gomas.TRANSA)
			// t21 := T22*t01
			blasd.MVMultTrm(&t21, &T22, 1.0, gomas.LOWER)
		}

		// --------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, nil,
			nil, &ABR, &A00, &a11, &A22, A, util.PTOPLEFT)
		util.Continue3x3to2x2(
			&TTL, nil,
			nil, &TBR, &T00, &t11, &T22, T, util.PTOPLEFT)
		util.Continue3x1to2x1(
			&tT,
			&tB, &t0, &tau1, tau, util.PTOP)
	}
}
Пример #3
0
/*
 * like LAPACK/dlafrt.f
 *
 * Build block reflector T from HH reflector stored in TriLU(A) and coefficients
 * in tau.
 *
 * Q = I - Y*T*Y.T; Householder H = I - tau*v*v.T
 *
 * T = | T  z |   z = -tau*T*Y.T*v
 *     | 0  c |   c = tau
 *
 * Q = H(1)H(2)...H(k) building forward here.
 */
func unblkBlockReflectorLQ(T, A, tau *cmat.FloatMatrix) {
	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, a01, A02, a11, a12, A22 cmat.FloatMatrix
	var TTL, TTR, TBL, TBR cmat.FloatMatrix
	var T00, t01, T02, t11, t12, T22 cmat.FloatMatrix
	var tT, tB cmat.FloatMatrix
	var t0, tau1, t2 cmat.FloatMatrix

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, 0, 0, util.PTOPLEFT)
	util.Partition2x2(
		&TTL, &TTR,
		&TBL, &TBR, T, 0, 0, util.PTOPLEFT)
	util.Partition2x1(
		&tT,
		&tB, tau, 0, util.PTOP)

	for m(&ABR) > 0 && n(&ABR) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, &a01, &A02,
			nil, &a11, &a12,
			nil, nil, &A22, A, 1, util.PBOTTOMRIGHT)
		util.Repartition2x2to3x3(&TTL,
			&T00, &t01, &T02,
			nil, &t11, &t12,
			nil, nil, &T22, T, 1, util.PBOTTOMRIGHT)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&tau1,
			&t2, tau, 1, util.PBOTTOM)
		// --------------------------------------------------

		// t11 := tau
		tauval := tau1.Get(0, 0)
		if tauval != 0.0 {
			t11.Set(0, 0, tauval)

			// t01 := -tauval*(a01 + A02*a12)
			blasd.Axpby(&t01, &a01, 1.0, 0.0)
			blasd.MVMult(&t01, &A02, &a12, -tauval, -tauval, gomas.NONE)
			// t01 := T00*t01
			blasd.MVMultTrm(&t01, &T00, 1.0, gomas.UPPER)
		}

		// --------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT)
		util.Continue3x3to2x2(
			&TTL, &TTR,
			&TBL, &TBR, &T00, &t11, &T22, T, util.PBOTTOMRIGHT)
		util.Continue3x1to2x1(
			&tT,
			&tB, &t0, &tau1, tau, util.PBOTTOM)
	}
}
Пример #4
0
/*
 * Unblocked QR decomposition with block reflector T.
 */
func unblockedQRT(A, T, W *cmat.FloatMatrix) *gomas.Error {
	var err *gomas.Error = nil
	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, a10, a11, a12, A20, a21, A22 cmat.FloatMatrix
	var TTL, TTR, TBL, TBR cmat.FloatMatrix
	var T00, t01, T02, t11, t12, T22, w12 cmat.FloatMatrix

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, 0, 0, util.PTOPLEFT)
	util.Partition2x2(
		&TTL, &TTR,
		&TBL, &TBR, T, 0, 0, util.PTOPLEFT)

	for m(&ABR) > 0 && n(&ABR) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			&a10, &a11, &a12,
			&A20, &a21, &A22, A, 1, util.PBOTTOMRIGHT)
		util.Repartition2x2to3x3(&TTL,
			&T00, &t01, &T02,
			nil, &t11, &t12,
			nil, nil, &T22, T, 1, util.PBOTTOMRIGHT)

		// ------------------------------------------------------

		computeHouseholder(&a11, &a21, &t11)

		// H*[a12 A22].T
		w12.SubMatrix(W, 0, 0, a12.Len(), 1)
		applyHouseholder2x1(&t11, &a21, &a12, &A22, &w12, gomas.LEFT)

		// update T
		tauval := t11.Get(0, 0)
		if tauval != 0.0 {
			// t01 := -tauval*(a10.T + &A20.T*a21)
			//a10.CopyTo(&t01)
			blasd.Axpby(&t01, &a10, 1.0, 0.0)
			blasd.MVMult(&t01, &A20, &a21, -tauval, -tauval, gomas.TRANSA)
			// t01 := T00*t01
			blasd.MVMultTrm(&t01, &T00, 1.0, gomas.UPPER)
		}

		// ------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT)
		util.Continue3x3to2x2(
			&TTL, &TTR,
			&TBL, &TBR, &T00, &t11, &T22, T, util.PBOTTOMRIGHT)
	}
	return err
}
Пример #5
0
/*
 * Build full block reflect T for nc columns from sequence of reflector stored in S.
 * Reflectors in S are the diagonal of T, off-diagonal values of reflector are computed
 * from elementary reflector store in lower triangular part of A.
 */
func buildQRTReflector(T, A, S *cmat.FloatMatrix, nc int, conf *gomas.Config) *gomas.Error {
	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, A10, A11, A20, A21, A22 cmat.FloatMatrix
	var TTL, TTR, TBL, TBR cmat.FloatMatrix
	var T00, T01, T02, T11, T12, T22 cmat.FloatMatrix
	var SL, SR cmat.FloatMatrix
	var S00, S01, S02 cmat.FloatMatrix

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, 0, 0, util.PTOPLEFT)
	util.Partition2x2(
		&TTL, &TTR,
		&TBL, &TBR, T, 0, 0, util.PTOPLEFT)
	util.Partition1x2(
		&SL, &SR, S, 0, util.PLEFT)

	nb := conf.LB
	for m(&ABR)-nb > 0 && n(&ABR)-nb > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			&A10, &A11, nil,
			&A20, &A21, &A22, A, nb, util.PBOTTOMRIGHT)
		util.Repartition2x2to3x3(&TTL,
			&T00, &T01, &T02,
			nil, &T11, &T12,
			nil, nil, &T22, T, nb, util.PBOTTOMRIGHT)
		util.Repartition1x2to1x3(&SL,
			&S00, &S01, &S02, S, nb, util.PRIGHT)
		// --------------------------------------------------------
		// update T01: T01 = -T00*Y1.T*Y2*T11
		//  Y1 = /A10\   Y2 = /A11\
		//       \A20/        \A21/
		//
		T11.Copy(&S01)
		updateQRTReflector(&T01, &A10, &A20, &A11, &A21, &T00, &S01, conf)
		// --------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &A11, &A22, A, util.PBOTTOMRIGHT)
		util.Continue3x3to2x2(
			&TTL, &TTR,
			&TBL, &TBR, &T00, &T11, &T22, T, util.PBOTTOMRIGHT)
		util.Continue1x3to1x2(
			&SL, &SR, &S00, &S01, S, util.PRIGHT)
	}
	if m(&ABR) > 0 && n(&ABR) > 0 {
	}
	return nil
}
Пример #6
0
/*
 * Blocked LQ decomposition with compact WY transform. As implemented
 * in lapack.DGELQF subroutine.
 */
func blockedLQ(A, Tvec, Twork, W *cmat.FloatMatrix, lb int, conf *gomas.Config) {
	var ATL, ATR, ABL, ABR, AR cmat.FloatMatrix
	var A00, A11, A12, A21, A22 cmat.FloatMatrix
	var TT, TB cmat.FloatMatrix
	var t0, tau, t2 cmat.FloatMatrix
	var Wrk, w1 cmat.FloatMatrix

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, 0, 0, util.PTOPLEFT)
	util.Partition2x1(
		&TT,
		&TB, Tvec, 0, util.PTOP)

	//nb := conf.LB
	for m(&ABR)-lb > 0 && n(&ABR)-lb > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			nil, &A11, &A12,
			nil, &A21, &A22, A, lb, util.PBOTTOMRIGHT)
		util.Repartition2x1to3x1(&TT,
			&t0,
			&tau,
			&t2, Tvec, lb, util.PBOTTOM)

		// current block size
		cb, rb := A11.Size()
		if rb < cb {
			cb = rb
		}
		// --------------------------------------------------------
		// decompose left side AL == /A11\
		//                           \A21/
		w1.SubMatrix(W, 0, 0, cb, 1)
		util.Merge1x2(&AR, &A11, &A12)
		unblockedLQ(&AR, &tau, &w1)

		// build block reflector
		unblkBlockReflectorLQ(Twork, &AR, &tau)

		// update A'tail i.e. A21 and A22 with A'*(I - Y*T*Y.T).T
		// compute: C - Y*(C.T*Y*T).T
		ar, ac := A21.Size()
		Wrk.SubMatrix(W, 0, 0, ar, ac)
		updateRightLQ(&A21, &A22, &A11, &A12, Twork, &Wrk, true, conf)
		// --------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &A11, &A22, A, util.PBOTTOMRIGHT)
		util.Continue3x1to2x1(
			&TT,
			&TB, &t0, &tau, Tvec, util.PBOTTOM)
	}

	// last block with unblocked
	if m(&ABR) > 0 && n(&ABR) > 0 {
		w1.SubMatrix(W, 0, 0, m(&ABR), 1)
		unblockedLQ(&ABR, &t2, &w1)
	}
}
Пример #7
0
// blocked LU decomposition w/o pivots, FLAME LU nopivots variant 5
func blockedLUnoPiv(A *cmat.FloatMatrix, nb int, conf *gomas.Config) *gomas.Error {
	var err *gomas.Error = nil
	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, A01, A02, A10, A11, A12, A20, A21, A22 cmat.FloatMatrix

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, 0, 0, util.PTOPLEFT)

	for m(&ATL) < m(A)-nb {
		util.Repartition2x2to3x3(&ATL,
			&A00, &A01, &A02,
			&A10, &A11, &A12,
			&A20, &A21, &A22, A, nb, util.PBOTTOMRIGHT)

		// A00 = LU(A00)
		unblockedLUnoPiv(&A11, conf)
		// A12 = trilu(A00)*A12.-1  (TRSM)
		blasd.SolveTrm(&A12, &A11, 1.0, gomas.LEFT|gomas.LOWER|gomas.UNIT)
		// A21 = A21.-1*triu(A00) (TRSM)
		blasd.SolveTrm(&A21, &A11, 1.0, gomas.RIGHT|gomas.UPPER)
		// A22 = A22 - A21*A12
		blasd.Mult(&A22, &A21, &A12, -1.0, 1.0, gomas.NONE)

		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &A11, &A22, A, util.PBOTTOMRIGHT)
	}
	// last block
	if m(&ATL) < m(A) {
		unblockedLUnoPiv(&ABR, conf)
	}
	return err
}
Пример #8
0
/*
 * Blocked QR decomposition with compact WY transform.
 *
 * Compatible with lapack.DGEQRF.
 */
func blockedQL(A, Tvec, Twork, W *cmat.FloatMatrix, lb int, conf *gomas.Config) {
	var ATL, ATR, ABL, ABR, AL cmat.FloatMatrix
	var A00, A01, A10, A11, A22 cmat.FloatMatrix
	var TT, TB cmat.FloatMatrix
	var t0, tau, t2 cmat.FloatMatrix
	var Wrk, w1 cmat.FloatMatrix

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, 0, 0, util.PBOTTOMRIGHT)
	util.Partition2x1(
		&TT,
		&TB, Tvec, 0, util.PBOTTOM)

	nb := lb
	for m(&ATL)-nb > 0 && n(&ATL)-nb > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, &A01, nil,
			&A10, &A11, nil,
			nil, nil, &A22, A, nb, util.PTOPLEFT)
		util.Repartition2x1to3x1(&TT,
			&t0,
			&tau,
			&t2, Tvec, nb, util.PTOP)

		// current block size
		cb, rb := A11.Size()
		if rb < cb {
			cb = rb
		}
		// --------------------------------------------------------
		// decompose righ side AL == /A01\
		//                           \A11/
		w1.SubMatrix(W, 0, 0, cb, 1)
		util.Merge2x1(&AL, &A01, &A11)
		unblockedQL(&AL, &tau, &w1)

		// build block reflector
		unblkQLBlockReflector(Twork, &AL, &tau)

		// update A'tail i.e. A10 and A00 with (I - Y*T*Y.T).T * A'tail
		// compute: C - Y*(C.T*Y*T).T
		ar, ac := A10.Size()
		Wrk.SubMatrix(W, 0, 0, ac, ar)
		updateQLLeft(&A10, &A00, &A11, &A01, Twork, &Wrk, true, conf)
		// --------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &A11, &A22, A, util.PTOPLEFT)
		util.Continue3x1to2x1(
			&TT,
			&TB, &t0, &tau, Tvec, util.PTOP)
	}

	// last block with unblocked
	if m(&ATL) > 0 && n(&ATL) > 0 {
		w1.SubMatrix(W, 0, 0, n(&ATL), 1)
		unblockedQL(&ATL, &t0, &w1)
	}
}
Пример #9
0
// unblocked LU decomposition w/o pivots, FLAME LU nopivots variant 5
func unblockedLUnoPiv(A *cmat.FloatMatrix, conf *gomas.Config) *gomas.Error {
	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, a01, A02, a10, a11, a12, A20, a21, A22 cmat.FloatMatrix
	var err *gomas.Error = nil

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, 0, 0, util.PTOPLEFT)

	for m(&ATL) < m(A) {
		util.Repartition2x2to3x3(&ATL,
			&A00, &a01, &A02,
			&a10, &a11, &a12,
			&A20, &a21, &A22, A, 1, util.PBOTTOMRIGHT)

		// a21 = a21/a11
		blasd.InvScale(&a21, a11.Get(0, 0))
		// A22 = A22 - a21*a12
		blasd.MVUpdate(&A22, &a21, &a12, -1.0)

		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT)
	}
	return err
}
Пример #10
0
/*
 * Reduce upper triangular matrix to tridiagonal.
 *
 * Elementary reflectors Q = H(n-1)...H(2)H(1) are stored on upper
 * triangular part of A. Reflector H(n-1) saved at column A(n) and
 * scalar multiplier to tau[n-1]. If parameter `tail` is true then
 * this function is used to reduce tail part of partially reduced
 * matrix and tau-vector partitioning is starting from last position.
 */
func unblkReduceTridiagUpper(A, tauq, W *cmat.FloatMatrix, tail bool) {
	var ATL, ABR cmat.FloatMatrix
	var A00, a01, a11, A22 cmat.FloatMatrix
	var tqT, tqB, tq0, tauq1, tq2 cmat.FloatMatrix
	var y21 cmat.FloatMatrix
	var v0 float64

	toff := 1
	if tail {
		toff = 0
	}
	util.Partition2x2(
		&ATL, nil,
		nil, &ABR, A, 0, 0, util.PBOTTOMRIGHT)
	util.Partition2x1(
		&tqT,
		&tqB, tauq, toff, util.PBOTTOM)

	for n(&ATL) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, &a01, nil,
			nil, &a11, nil,
			nil, nil, &A22, A, 1, util.PTOPLEFT)
		util.Repartition2x1to3x1(&tqT,
			&tq0,
			&tauq1,
			&tq2, tauq, 1, util.PTOP)
		// set temp vectors for this round
		y21.SetBuf(n(&A00), 1, n(&A00), W.Data())
		// ------------------------------------------------------

		// Compute householder to zero super-diagonal entries
		computeHouseholderRev(&a01, &tauq1)
		tauqv := tauq1.Get(0, 0)

		// set superdiagonal to unit
		v0 = a01.Get(-1, 0)
		a01.Set(-1, 0, 1.0)

		// y21 := A22*a12t
		blasd.MVMultSym(&y21, &A00, &a01, tauqv, 0.0, gomas.UPPER)
		// beta := tauq*a12t*y21
		beta := tauqv * blasd.Dot(&a01, &y21)
		// y21  := y21 - 0.5*beta*a125
		blasd.Axpy(&y21, &a01, -0.5*beta)
		// A22 := A22 - a12t*y21.T - y21*a12.T
		blasd.MVUpdate2Sym(&A00, &a01, &y21, -1.0, gomas.UPPER)

		// restore superdiagonal value
		a01.Set(-1, 0, v0)
		// ------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, nil,
			nil, &ABR, &A00, &a11, &A22, A, util.PTOPLEFT)
		util.Continue3x1to2x1(
			&tqT,
			&tqB, &tq0, &tauq1, tauq, util.PTOP)
	}
}
Пример #11
0
func blkReduceTridiagUpper(A, tauq, Y, W *cmat.FloatMatrix, lb int, conf *gomas.Config) {
	var ATL, ABR cmat.FloatMatrix
	var A00, A01, A11, A22 cmat.FloatMatrix
	var YT, YB, Y0, Y1, Y2 cmat.FloatMatrix
	var tqT, tqB, tq0, tauq1, tq2 cmat.FloatMatrix
	var v0 float64

	util.Partition2x2(
		&ATL, nil,
		nil, &ABR, A, 0, 0, util.PBOTTOMRIGHT)
	util.Partition2x1(
		&YT,
		&YB, Y, 0, util.PBOTTOM)
	util.Partition2x1(
		&tqT,
		&tqB, tauq, 1, util.PBOTTOM)

	for m(&ATL)-lb > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, &A01, nil,
			nil, &A11, nil,
			nil, nil, &A22, A, lb, util.PTOPLEFT)
		util.Repartition2x1to3x1(&YT,
			&Y0,
			&Y1,
			&Y2, Y, lb, util.PTOP)
		util.Repartition2x1to3x1(&tqT,
			&tq0,
			&tauq1,
			&tq2, tauq, lb, util.PTOP)
		// ------------------------------------------------------
		unblkBuildTridiagUpper(&ATL, &tauq1, &YT, W)

		// set subdiagonal entry to unit
		v0 = A01.Get(-1, 0)
		A01.Set(-1, 0, 1.0)

		// A22 := A22 - A01*Y0.T - Y0*A01.T
		blasd.Update2Sym(&A00, &A01, &Y0, -1.0, 1.0, gomas.UPPER, conf)

		// restore subdiagonal entry
		A01.Set(-1, 0, v0)
		// ------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, nil,
			nil, &ABR, &A00, &A11, &A22, A, util.PTOPLEFT)
		util.Continue3x1to2x1(
			&YT,
			&YB, &Y0, &Y1, Y, util.PTOP)
		util.Continue3x1to2x1(
			&tqT,
			&tqB, &tq0, &tauq1, tauq, util.PTOP)
	}

	if m(&ATL) > 0 {
		unblkReduceTridiagUpper(&ATL, &tqT, W, true)
	}
}
Пример #12
0
/*
 * Blocked RQ decomposition with compact WY transform. As implemented
 * in lapack.DGERQF subroutine.
 */
func blockedRQ(A, Tvec, Twork, W *cmat.FloatMatrix, lb int, conf *gomas.Config) {
	var ATL, ABR, AL cmat.FloatMatrix
	var A00, A01, A10, A11, A22 cmat.FloatMatrix
	var TT, TB cmat.FloatMatrix
	var t0, tau, t2 cmat.FloatMatrix
	var Wrk, w1 cmat.FloatMatrix

	util.Partition2x2(
		&ATL, nil,
		nil, &ABR /**/, A, 0, 0, util.PBOTTOMRIGHT)
	util.Partition2x1(
		&TT,
		&TB /**/, Tvec, 0, util.PBOTTOM)

	for m(&ATL)-lb > 0 && n(&ATL)-lb > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, &A01, nil,
			&A10, &A11, nil,
			nil, nil, &A22 /**/, A, lb, util.PTOPLEFT)
		util.Repartition2x1to3x1(&TT,
			&t0,
			&tau,
			&t2 /**/, Tvec, n(&A11), util.PTOP)

		// current block size
		cb, rb := A11.Size()
		if rb < cb {
			cb = rb
		}
		// --------------------------------------------------------
		// decompose left side AL == ( A10 A11 )
		w1.SubMatrix(W, 0, 0, cb, 1)
		util.Merge1x2(&AL, &A10, &A11)
		unblockedRQ(&AL, &tau, &w1)

		// build block reflector
		unblkBlockReflectorRQ(Twork, &AL, &tau)

		// compute: (A00 A01)(I - Y*T*Y.T)
		ar, ac := A01.Size()
		Wrk.SubMatrix(W, 0, 0, ar, ac)
		updateRightRQ(&A01, &A00, &A11, &A10, Twork, &Wrk, false, conf)
		// --------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, nil,
			nil, &ABR, &A00, &A11, &A22, A, util.PTOPLEFT)
		util.Continue3x1to2x1(
			&TT,
			&TB, &t0, &tau, Tvec, util.PTOP)
	}

	// last block with unblocked
	if m(&ATL) > 0 && n(&ATL) > 0 {
		w1.SubMatrix(W, 0, 0, m(&ATL), 1)
		unblockedRQ(&ATL, &TT, &w1)
	}
}
Пример #13
0
/*
 * Tridiagonal reduction of LOWER triangular symmetric matrix, zero elements below 1st
 * subdiagonal:
 *
 *   A =  (1 - tau*u*u.t)*A*(1 - tau*u*u.T)
 *     =  (I - tau*( 0   0   )) (a11 a12) (I - tau*( 0  0   ))
 *        (        ( 0  u*u.t)) (a21 A22) (        ( 0 u*u.t))
 *
 *  a11, a12, a21 not affected
 *
 *  from LEFT:
 *    A22 = A22 - tau*u*u.T*A22
 *  from RIGHT:
 *    A22 = A22 - tau*A22*u.u.T
 *
 *  LEFT and RIGHT:
 *    A22   = A22 - tau*u*u.T*A22 - tau*(A22 - tau*u*u.T*A22)*u*u.T
 *          = A22 - tau*u*u.T*A22 - tau*A22*u*u.T + tau*tau*u*u.T*A22*u*u.T
 *    [x    = tau*A22*u (vector)]  (SYMV)
 *    A22   = A22 - u*x.T - x*u.T + tau*u*u.T*x*u.T
 *    [beta = tau*u.T*x (scalar)]  (DOT)
 *          = A22 - u*x.T - x*u.T + beta*u*u.T
 *          = A22 - u*(x - 0.5*beta*u).T - (x - 0.5*beta*u)*u.T
 *    [w    = x - 0.5*beta*u]      (AXPY)
 *          = A22 - u*w.T - w*u.T  (SYR2)
 *
 * Result of reduction for N = 5:
 *    ( d  .  .  . . )
 *    ( e  d  .  . . )
 *    ( v1 e  d  . . )
 *    ( v1 v2 e  d . )
 *    ( v1 v2 v3 e d )
 */
func unblkReduceTridiagLower(A, tauq, W *cmat.FloatMatrix) {
	var ATL, ABR cmat.FloatMatrix
	var A00, a11, a21, A22 cmat.FloatMatrix
	var tqT, tqB, tq0, tauq1, tq2 cmat.FloatMatrix
	var y21 cmat.FloatMatrix
	var v0 float64

	util.Partition2x2(
		&ATL, nil,
		nil, &ABR, A, 0, 0, util.PTOPLEFT)
	util.Partition2x1(
		&tqT,
		&tqB, tauq, 0, util.PTOP)

	for m(&ABR) > 0 && n(&ABR) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			nil, &a11, nil,
			nil, &a21, &A22, A, 1, util.PBOTTOMRIGHT)
		util.Repartition2x1to3x1(&tqT,
			&tq0,
			&tauq1,
			&tq2, tauq, 1, util.PBOTTOM)
		// set temp vectors for this round
		y21.SetBuf(n(&A22), 1, n(&A22), W.Data())
		// ------------------------------------------------------

		// Compute householder to zero subdiagonal entries
		computeHouseholderVec(&a21, &tauq1)
		tauqv := tauq1.Get(0, 0)

		// set subdiagonal to unit
		v0 = a21.Get(0, 0)
		a21.Set(0, 0, 1.0)

		// y21 := tauq*A22*a21
		blasd.MVMultSym(&y21, &A22, &a21, tauqv, 0.0, gomas.LOWER)
		// beta := tauq*a21.T*y21
		beta := tauqv * blasd.Dot(&a21, &y21)
		// y21  := y21 - 0.5*beta*a21
		blasd.Axpy(&y21, &a21, -0.5*beta)
		// A22 := A22 - a21*y21.T - y21*a21.T
		blasd.MVUpdate2Sym(&A22, &a21, &y21, -1.0, gomas.LOWER)

		// restore subdiagonal
		a21.Set(0, 0, v0)
		// ------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, nil,
			nil, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT)
		util.Continue3x1to2x1(
			&tqT,
			&tqB, &tq0, &tauq1, tauq, util.PBOTTOM)
	}
}
Пример #14
0
func blockedCHOL(A *cmat.FloatMatrix, flags int, conf *gomas.Config) *gomas.Error {
	var err, firstErr *gomas.Error
	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, A01, A02, A10, A11, A12, A20, A21, A22 cmat.FloatMatrix

	nb := conf.LB
	err = nil
	firstErr = nil
	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, 0, 0, util.PTOPLEFT)

	for m(A)-m(&ATL) > nb {
		util.Repartition2x2to3x3(&ATL,
			&A00, &A01, &A02,
			&A10, &A11, &A12,
			&A20, &A21, &A22, A, nb, util.PBOTTOMRIGHT)

		if flags&gomas.LOWER != 0 {
			// A11 = chol(A11)
			err = unblockedLowerCHOL(&A11, flags, m(&ATL))
			// A21 = A21 * tril(A11).-1
			blasd.SolveTrm(&A21, &A11, 1.0, gomas.RIGHT|gomas.LOWER|gomas.TRANSA, conf)
			// A22 = A22 - A21*A21.T
			blasd.UpdateSym(&A22, &A21, -1.0, 1.0, gomas.LOWER, conf)
		} else {
			// A11 = chol(A11)
			err = unblockedUpperCHOL(&A11, flags, m(&ATL))
			// A12 = triu(A11).-1 * A12
			blasd.SolveTrm(&A12, &A11, 1.0, gomas.UPPER|gomas.TRANSA, conf)
			// A22 = A22 - A12.T*A12
			blasd.UpdateSym(&A22, &A12, -1.0, 1.0, gomas.UPPER|gomas.TRANSA, conf)
		}
		if err != nil && firstErr == nil {
			firstErr = err
		}

		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &A11, &A22, A, util.PBOTTOMRIGHT)
	}

	if m(&ATL) < m(A) {
		// last block
		if flags&gomas.LOWER != 0 {
			unblockedLowerCHOL(&ABR, flags, 0)
		} else {
			unblockedUpperCHOL(&ABR, flags, 0)
		}
	}
	return firstErr
}
Пример #15
0
/*
 * Unblocked code for generating M by N matrix Q with orthogonal columns which
 * are defined as the last N columns of the product of K first elementary
 * reflectors.
 *
 * Parameter nk is last nk elementary reflectors that are not used in computing
 * the matrix Q. Parameter mk length of the first unused elementary reflectors
 * First nk columns are zeroed and subdiagonal mk-nk is set to unit.
 *
 * Compatible with lapack.DORG2L subroutine.
 */
func unblkBuildQL(A, Tvec, W *cmat.FloatMatrix, mk, nk int, mayClear bool) {
	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, a01, a10, a11, a21, A22 cmat.FloatMatrix
	var tT, tB cmat.FloatMatrix
	var t0, tau1, t2, w12, D cmat.FloatMatrix

	// (mk, nk) = (rows, columns) of upper left partition
	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, mk, nk, util.PTOPLEFT)
	util.Partition2x1(
		&tT,
		&tB, Tvec, nk, util.PTOP)

	// zero the left side
	if nk > 0 && mayClear {
		blasd.Scale(&ABL, 0.0)
		blasd.Scale(&ATL, 0.0)
		D.Diag(&ATL, nk-mk)
		blasd.Add(&D, 1.0)
	}

	for m(&ABR) > 0 && n(&ABR) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, &a01, nil,
			&a10, &a11, nil,
			nil, &a21, &A22, A, 1, util.PBOTTOMRIGHT)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&tau1,
			&t2, Tvec, 1, util.PBOTTOM)
		// ------------------------------------------------------
		w12.SubMatrix(W, 0, 0, a10.Len(), 1)
		applyHouseholder2x1(&tau1, &a01, &a10, &A00, &w12, gomas.LEFT)

		blasd.Scale(&a01, -tau1.Get(0, 0))
		a11.Set(0, 0, 1.0-tau1.Get(0, 0))

		// zero bottom elements
		blasd.Scale(&a21, 0.0)
		// ------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT)
		util.Continue3x1to2x1(
			&tT,
			&tB, &t0, &tau1, Tvec, util.PBOTTOM)
	}
}
Пример #16
0
/*
 * Unblocked code for generating M by N matrix Q with orthogonal columns which
 * are defined as the first N columns of the product of K first elementary
 * reflectors.
 *
 * Parameters nk = n(A)-K, mk = m(A)-K define the initial partitioning of
 * matrix A.
 *
 *  Q = H(k)H(k-1)...H(1)  , 0 < k <= M, where H(i) = I - tau*v*v.T
 *
 * Computation is ordered as H(k)*H(k-1)...*H(1)*I ie. from bottom to top.
 *
 * If k < M rows k+1:M are cleared and diagonal entries [k+1:M,k+1:M] are
 * set to unit. Then the matrix Q is generated by right multiplying elements below
 * of i'th elementary reflector H(i).
 *
 * Compatible to lapack.xORG2L subroutine.
 */
func unblkBuildLQ(A, Tvec, W *cmat.FloatMatrix, mk, nk int, mayClear bool) {
	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, a10, a11, a12, a21, A22 cmat.FloatMatrix
	var tT, tB cmat.FloatMatrix
	var t0, tau1, t2, w12, D cmat.FloatMatrix

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, mk, nk, util.PBOTTOMRIGHT)
	util.Partition2x1(
		&tT,
		&tB, Tvec, mk, util.PBOTTOM)

	// zero the bottom part
	if mk > 0 && mayClear {
		blasd.Scale(&ABL, 0.0)
		blasd.Scale(&ABR, 0.0)
		D.Diag(&ABR)
		blasd.Add(&D, 1.0)
	}

	for m(&ATL) > 0 && n(&ATL) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			&a10, &a11, &a12,
			nil, &a21, &A22, A, 1, util.PTOPLEFT)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&tau1,
			&t2, Tvec, 1, util.PTOP)
		// ------------------------------------------------------

		w12.SubMatrix(W, 0, 0, a21.Len(), 1)
		applyHouseholder2x1(&tau1, &a12, &a21, &A22, &w12, gomas.RIGHT)

		blasd.Scale(&a12, -tau1.Get(0, 0))
		a11.Set(0, 0, 1.0-tau1.Get(0, 0))

		// zero
		blasd.Scale(&a10, 0.0)
		// ------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &a11, &A22, A, util.PTOPLEFT)
		util.Continue3x1to2x1(
			&tT,
			&tB, &t0, &tau1, Tvec, util.PTOP)
	}
}
Пример #17
0
func blockedQRT(A, T, W *cmat.FloatMatrix, conf *gomas.Config) *gomas.Error {
	var err *gomas.Error = nil
	var ATL, ATR, ABL, ABR, AL, AR cmat.FloatMatrix
	var A00, A01, A02, A10, A11, A12, A20, A21, A22 cmat.FloatMatrix
	var TL, TR, W2 cmat.FloatMatrix
	var T00, T01, T02 cmat.FloatMatrix

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, 0, 0, util.PTOPLEFT)
	util.Partition1x2(
		&TL, &TR, T, 0, util.PLEFT)

	nb := conf.LB
	for m(&ABR)-nb > 0 && n(&ABR)-nb > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, &A01, &A02,
			&A10, &A11, &A12,
			&A20, &A21, &A22, A, nb, util.PBOTTOMRIGHT)
		util.Repartition1x2to1x3(&TL,
			&T00, &T01, &T02, T, nb, util.PRIGHT)
		util.Partition1x2(
			&AL, &AR, &ABR, nb, util.PLEFT)
		// --------------------------------------------------------
		// decompose left side AL == /A11\
		//                           \A21/
		unblockedQRT(&AL, &T01, W)

		// update A'tail i.e. A12 and A22 with (I - Y*T*Y.T).T * A'tail
		// compute: Q*T.C == C - Y*(C.T*Y*T).T
		ar, ac := A12.Size()
		W2.SubMatrix(W, 0, 0, ac, ar)
		updateWithQTLeft(&A12, &A22, &A11, &A21, &T01, &W2, true, conf)
		// --------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &A11, &A22, A, util.PBOTTOMRIGHT)
		util.Continue1x3to1x2(
			&TL, &TR, &T00, &T01, T, util.PRIGHT)
	}
	if m(&ABR) > 0 && n(&ABR) > 0 {
		T01.SubMatrix(&TR, 0, 0, n(&ABR), n(&ABR))
		unblockedQRT(&ABR, &T01, W)
	}
	return err
}
Пример #18
0
func trdsecEigenBuildInplace(Q, z *cmat.FloatMatrix) {
	var QTL, QBR, Q00, q11, q12, q21, Q22, qi cmat.FloatMatrix
	var zk0, zk1, dk0, dk1 float64

	util.Partition2x2(
		&QTL, nil,
		nil, &QBR /**/, Q, 0, 0, util.PTOPLEFT)

	for m(&QBR) > 0 {
		util.Repartition2x2to3x3(&QTL,
			&Q00, nil, nil,
			nil, &q11, &q12,
			nil, &q21, &Q22 /**/, Q, 1, util.PBOTTOMRIGHT)
		//---------------------------------------------------------------
		k := m(&Q00)
		zk0 = z.GetAt(k)
		dk0 = q11.Get(0, 0)
		q11.Set(0, 0, zk0/dk0)

		for i := 0; i < q12.Len(); i++ {
			zk1 = z.GetAt(k + i + 1)
			dk0 = q12.GetAt(i)
			dk1 = q21.GetAt(i)
			q12.SetAt(i, zk0/dk1)
			q21.SetAt(i, zk1/dk0)
		}
		//---------------------------------------------------------------
		util.Continue3x3to2x2(
			&QTL, nil,
			nil, &QBR /**/, &Q00, &q11, &Q22 /**/, Q, util.PBOTTOMRIGHT)
	}
	// scale column eigenvectors
	for k := 0; k < z.Len(); k++ {
		qi.Column(Q, k)
		t := blasd.Nrm2(&qi)
		blasd.InvScale(&qi, t)
	}
}
Пример #19
0
func unblockedLowerCHOL(A *cmat.FloatMatrix, flags int, nr int) (err *gomas.Error) {
	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, a10, a11, A20, a21, A22 cmat.FloatMatrix

	err = nil
	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, 0, 0, util.PTOPLEFT)

	for m(&ATL) < m(A) {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			&a10, &a11, nil,
			&A20, &a21, &A22, A, 1, util.PBOTTOMRIGHT)

		// a11 = sqrt(a11)
		aval := a11.Get(0, 0)
		if aval < 0.0 {
			if err == nil {
				err = gomas.NewError(gomas.ENEGATIVE, "DecomposeCHOL", m(&ATL)+nr)
			}
		} else {
			a11.Set(0, 0, math.Sqrt(aval))
		}

		// a21 = a21/a11
		blasd.InvScale(&a21, a11.Get(0, 0))
		// A22 = A22 - a21*a21' (SYR)
		blasd.MVUpdateSym(&A22, &a21, -1.0, gomas.LOWER)

		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT)

	}
	return
}
Пример #20
0
/*
 * Unblocked RQ decomposition. As implemented
 * in lapack.DGERQ2 subroutine.
 */
func unblockedRQ(A, Tvec, W *cmat.FloatMatrix) {
	var ATL, ABR cmat.FloatMatrix
	var A00, a11, a01, a10, A22 cmat.FloatMatrix
	var tT, tB cmat.FloatMatrix
	var t0, tau1, t2, w12 cmat.FloatMatrix

	util.Partition2x2(
		&ATL, nil,
		nil, &ABR, A, 0, 0, util.PBOTTOMRIGHT)
	util.Partition2x1(
		&tT,
		&tB, Tvec, 0, util.PBOTTOM)

	for m(&ATL) > 0 && n(&ATL) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, &a01, nil,
			&a10, &a11, nil,
			nil, nil, &A22, A, 1, util.PTOPLEFT)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&tau1,
			&t2, Tvec, 1, util.PTOP)
		// ------------------------------------------------------
		computeHouseholder(&a11, &a10, &tau1)

		w12.SubMatrix(W, 0, 0, a01.Len(), 1)
		applyHouseholder2x1(&tau1, &a10, &a01, &A00, &w12, gomas.RIGHT)
		// ------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, nil,
			nil, &ABR, &A00, &a11, &A22, A, util.PTOPLEFT)
		util.Continue3x1to2x1(
			&tT,
			&tB, &t0, &tau1, Tvec, util.PTOP)
	}
}
Пример #21
0
/*
 * Blocked version for computing C = C*Q and C = C*Q.T from elementary reflectors
 * and scalar coefficients.
 *
 * Elementary reflectors and scalar coefficients are used to build block reflector T.
 * Matrix C is updated by applying block reflector T using compact WY algorithm.
 */
func blockedMultQRight(C, A, tau, W *cmat.FloatMatrix, flags, nb int, conf *gomas.Config) {
	var ATL, ATR, ABL, ABR, AL cmat.FloatMatrix
	var A00, A10, A11, A20, A21, A22 cmat.FloatMatrix
	var CL, CR, C0, C1, C2 cmat.FloatMatrix
	var tT, tB cmat.FloatMatrix
	var t0, tau1, t2 cmat.FloatMatrix
	var W0, Wrk, Tw, Twork cmat.FloatMatrix

	var Aref *cmat.FloatMatrix
	var pAdir, pAstart, pDir, pStart, pCstart, pCdir util.Direction
	var bsz, cb, mb int

	// partitioning start and direction
	if flags&gomas.TRANS != 0 {
		// from bottom-right to top-left to produce transpose sequence (C*Q.T)
		pAstart = util.PBOTTOMRIGHT
		pAdir = util.PTOPLEFT
		pStart = util.PBOTTOM
		pDir = util.PTOP
		pCstart = util.PRIGHT
		pCdir = util.PLEFT
		mb = imax(0, m(A)-n(A))
		cb = n(C) - n(A)
		Aref = &ATL
	} else {
		// from top-left to bottom-right to produce normal sequence (C*Q)
		pAstart = util.PTOPLEFT
		pAdir = util.PBOTTOMRIGHT
		pStart = util.PTOP
		pDir = util.PBOTTOM
		pCstart = util.PLEFT
		pCdir = util.PRIGHT
		mb = 0
		cb = 0
		Aref = &ABR
	}

	// intermediate reflector at start of workspace
	Twork.SetBuf(nb, nb, nb, W.Data())
	W0.SetBuf(m(C), nb, m(C), W.Data()[Twork.Len():])

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, mb, 0, pAstart)
	util.Partition1x2(
		&CL, &CR, C, cb, pCstart)
	util.Partition2x1(
		&tT,
		&tB, tau, 0, pStart)

	transpose := flags&gomas.TRANS != 0

	for m(Aref) > 0 && n(Aref) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			&A10, &A11, nil,
			&A20, &A21, &A22, A, nb, pAdir)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&tau1,
			&t2, tau, nb, pDir)

		bsz = n(&A11) // C1 block size must match A11
		util.Repartition1x2to1x3(&CL,
			&C0, &C1, &C2, C, bsz, pCdir)
		// --------------------------------------------------------
		// clear & build block reflector from current block
		util.Merge2x1(&AL, &A11, &A21)
		Tw.SubMatrix(&Twork, 0, 0, bsz, bsz)
		blasd.Scale(&Tw, 0.0)
		unblkQRBlockReflector(&Tw, &AL, &tau1)

		// compute: C*Q.T == C - C*(Y*T*Y.T).T = C - C*Y*T.T*Y.T
		//          C*Q   == C - C*Y*T*Y.T
		Wrk.SubMatrix(&W0, 0, 0, m(&C1), bsz)
		updateWithQTRight(&C1, &C2, &A11, &A21, &Tw, &Wrk, transpose, conf)
		// --------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &A11, &A22, A, pAdir)
		util.Continue1x3to1x2(
			&CL, &CR, &C0, &C1, C, pCdir)
		util.Continue3x1to2x1(
			&tT,
			&tB, &t0, &tau1, tau, pDir)
	}

}
Пример #22
0
/*
 * Computes upper Hessenberg reduction of N-by-N matrix A using unblocked
 * algorithm as described in (1).
 *
 * Hessengerg reduction: A = Q.T*B*Q, Q unitary, B upper Hessenberg
 *  Q = H(0)*H(1)*...*H(k) where H(k) is k'th Householder reflector.
 *
 * Compatible with lapack.DGEHD2.
 */
func unblkHessGQvdG(A, Tvec, W *cmat.FloatMatrix, row int) {
	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, a11, a21, A22 cmat.FloatMatrix
	var AL, AR, A0, a1, A2 cmat.FloatMatrix
	var tT, tB cmat.FloatMatrix
	var t0, tau1, t2, w12, v1 cmat.FloatMatrix

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, row, 0, util.PTOPLEFT)
	util.Partition1x2(
		&AL, &AR, A, 0, util.PLEFT)
	util.Partition2x1(
		&tT,
		&tB, Tvec, 0, util.PTOP)

	v1.SubMatrix(W, 0, 0, m(A), 1)

	for m(&ABR) > 1 && n(&ABR) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			nil, &a11, nil,
			nil, &a21, &A22, A, 1, util.PBOTTOMRIGHT)
		util.Repartition1x2to1x3(&AL,
			&A0, &a1, &A2, A, 1, util.PRIGHT)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&tau1,
			&t2, Tvec, 1, util.PBOTTOM)

		// ------------------------------------------------------
		// a21 = [beta; H(k)].T
		computeHouseholderVec(&a21, &tau1)
		tauval := tau1.Get(0, 0)
		beta := a21.Get(0, 0)
		a21.Set(0, 0, 1.0)

		// v1 := A2*a21
		blasd.MVMult(&v1, &A2, &a21, 1.0, 0.0, gomas.NONE)

		// A2 := A2 - tau*v1*a21   (A2 := A2*H(k))
		blasd.MVUpdate(&A2, &v1, &a21, -tauval)

		w12.SubMatrix(W, 0, 0, n(&A22), 1)
		// w12 := a21.T*A22 = A22.T*a21
		blasd.MVMult(&w12, &A22, &a21, 1.0, 0.0, gomas.TRANS)
		// A22 := A22 - tau*a21*w12   (A22 := H(k)*A22)
		blasd.MVUpdate(&A22, &a21, &w12, -tauval)

		a21.Set(0, 0, beta)
		// ------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT)
		util.Continue1x3to1x2(
			&AL, &AR, &A0, &a1, A, util.PRIGHT)
		util.Continue3x1to2x1(
			&tT,
			&tB, &t0, &tau1, Tvec, util.PBOTTOM)
	}
}
Пример #23
0
/*
 * Blocked version of Hessenberg reduction algorithm as presented in (1). This
 * version uses compact-WY transformation.
 *
 * Some notes:
 *
 * Elementary reflectors stored in [A11; A21].T are not on diagonal of A11. Update of
 * a block aligned with A11; A21 is as follow
 *
 * 1. Update from left Q(k)*C:
 *                                         c0   0                            c0
 * (I - Y*T*Y.T).T*C = C - Y*(C.T*Y)*T.T = C1 - Y1 * (C1.T.Y1+C2.T*Y2)*T.T = C1-Y1*W
 *                                         C2   Y2                           C2-Y2*W
 *
 * where W = (C1.T*Y1+C2.T*Y2)*T.T and first row of C is not affected by update
 *
 * 2. Update from right C*Q(k):
 *                                       0
 * C - C*Y*T*Y.T = c0;C1;C2 - c0;C1;C2 * Y1 *T*(0;Y1;Y2) = c0; C1-W*Y1; C2-W*Y2
 *                                       Y2
 * where  W = (C1*Y1 + C2*Y2)*T and first column of C is not affected
 *
 */
func blkHessGQvdG(A, Tvec, W *cmat.FloatMatrix, nb int, conf *gomas.Config) *gomas.Error {
	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, A11, A12, A21, A22, A2 cmat.FloatMatrix
	var tT, tB, td cmat.FloatMatrix
	var t0, t1, t2, T cmat.FloatMatrix
	var V, VT, VB /*V0, V1, V2,*/, Y1, Y2, W0 cmat.FloatMatrix

	//fmt.Printf("blkHessGQvdG...\n")
	T.SubMatrix(W, 0, 0, conf.LB, conf.LB)
	V.SubMatrix(W, conf.LB, 0, m(A), conf.LB)
	td.Diag(&T)

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, 0, 0, util.PTOPLEFT)
	util.Partition2x1(
		&tT,
		&tB, Tvec, 0, util.PTOP)

	for m(&ABR) > nb+1 && n(&ABR) > nb {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			nil, &A11, &A12,
			nil, &A21, &A22, A, nb, util.PBOTTOMRIGHT)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&t1,
			&t2, Tvec, nb, util.PBOTTOM)

		util.Partition2x1(
			&VT,
			&VB, &V, m(&ATL), util.PTOP)
		// ------------------------------------------------------

		unblkBuildHessGQvdG(&ABR, &T, &VB, nil)
		blasd.Copy(&t1, &td)

		// m(Y) == m(ABR)-1, n(Y) == n(A11)
		Y1.SubMatrix(&ABR, 1, 0, n(&A11), n(&A11))
		Y2.SubMatrix(&ABR, 1+n(&A11), 0, m(&A21)-1, n(&A11))

		// [A01; A02] == ATR := ATR*(I - Y*T*Y.T)
		updateHessRightWY(&ATR, &Y1, &Y2, &T, &VT, conf)

		// A2 = [A12; A22].T
		util.Merge2x1(&A2, &A12, &A22)

		// A2 := A2 - VB*T*A21.T
		be := A21.Get(0, -1)
		A21.Set(0, -1, 1.0)
		blasd.MultTrm(&VB, &T, 1.0, gomas.UPPER|gomas.RIGHT)
		blasd.Mult(&A2, &VB, &A21, -1.0, 1.0, gomas.TRANSB, conf)
		A21.Set(0, -1, be)

		// A2 := (I - Y*T*Y.T).T * A2
		W0.SubMatrix(&V, 0, 0, n(&A2), n(&Y2))
		updateHessLeftWY(&A2, &Y1, &Y2, &T, &W0, conf)

		// ------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &A11, &A22, A, util.PBOTTOMRIGHT)
		util.Continue3x1to2x1(
			&tT,
			&tB, &t0, &t1, Tvec, util.PBOTTOM)
	}

	if m(&ABR) > 1 {
		// do the rest with unblocked
		util.Merge2x1(&A2, &ATR, &ABR)
		W0.SetBuf(m(A), 1, m(A), W.Data())
		unblkHessGQvdG(&A2, &tB, &W0, m(&ATR))
	}
	return nil
}
Пример #24
0
/*
 *
 *  Building reduction block for blocked algorithm as described in (1).
 *
 *  A. update next column
 *    a10        [(U00)     (U00)  ]   [(a10)    (V00)            ]
 *    a11 :=  I -[(u10)*T00*(u10).T] * [(a11)  - (v01) * T00 * a10]
 *    a12        [(U20)     (U20)  ]   [(a12)    (V02)            ]
 *
 *  B. compute Householder reflector for updated column
 *    a21, t11 := Householder(a21)
 *
 *  C. update intermediate reductions
 *    v10      A02*a21
 *    v11  :=  a12*a21
 *    v12      A22*a21
 *
 *  D. update block reflector
 *    t01 :=  A20*a21
 *    t11 :=  t11
 */
func unblkBuildHessGQvdG(A, T, V, W *cmat.FloatMatrix) *gomas.Error {

	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, a10, a11, A20, a21, A22 cmat.FloatMatrix
	var AL, AR, A0, a1, A2 cmat.FloatMatrix
	var TTL, TTR, TBL, TBR cmat.FloatMatrix
	var T00, t01, t11, T22 cmat.FloatMatrix
	var VL, VR, V0, v1, V2, Y0 cmat.FloatMatrix

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, 0, 0, util.PTOPLEFT)
	util.Partition2x2(
		&TTL, &TTR,
		&TBL, &TBR, T, 0, 0, util.PTOPLEFT)
	util.Partition1x2(
		&AL, &AR, A, 0, util.PLEFT)
	util.Partition1x2(
		&VL, &VR, V, 0, util.PLEFT)

	var beta float64

	for n(&VR) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			&a10, &a11, nil,
			&A20, &a21, &A22, A, 1, util.PBOTTOMRIGHT)
		util.Repartition2x2to3x3(&TTL,
			&T00, &t01, nil,
			nil, &t11, nil,
			nil, nil, &T22, T, 1, util.PBOTTOMRIGHT)
		util.Repartition1x2to1x3(&AL,
			&A0, &a1, &A2, A, 1, util.PRIGHT)
		util.Repartition1x2to1x3(&VL,
			&V0, &v1, &V2, V, 1, util.PRIGHT)

		// ------------------------------------------------------
		// Compute Hessenberg update for next column of A:
		if n(&V0) > 0 {
			// y10 := T00*a10  (use t01 as workspace?)
			blasd.Axpby(&t01, &a10, 1.0, 0.0)
			blasd.MVMultTrm(&t01, &T00, 1.0, gomas.UPPER)

			// a1 := a1 - V0*T00*a10
			blasd.MVMult(&a1, &V0, &t01, -1.0, 1.0, gomas.NONE)

			// update a1 := (I - Y*T*Y.T).T*a1 (here t01 as workspace)
			Y0.SubMatrix(A, 1, 0, n(&A00), n(&A00))
			updateVecLeftWY2(&a1, &Y0, &A20, &T00, &t01, gomas.TRANS)
			a10.Set(0, -1, beta)
		}

		// Compute Householder reflector
		computeHouseholderVec(&a21, &t11)
		beta = a21.Get(0, 0)
		a21.Set(0, 0, 1.0)

		// v1 := A2*a21
		blasd.MVMult(&v1, &A2, &a21, 1.0, 0.0, gomas.NONE)

		// update T
		tauval := t11.Get(0, 0)
		if tauval != 0.0 {
			// t01 := -tauval*A20.T*a21
			blasd.MVMult(&t01, &A20, &a21, -tauval, 0.0, gomas.TRANS)
			// t01 := T00*t01
			blasd.MVMultTrm(&t01, &T00, 1.0, gomas.UPPER)
		}
		// ------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT)
		util.Continue3x3to2x2(
			&TTL, &TTR,
			&TBL, &TBR, &T00, &t11, &T22, T, util.PBOTTOMRIGHT)
		util.Continue1x3to1x2(
			&AL, &AR, &A0, &a1, A, util.PRIGHT)
		util.Continue1x3to1x2(
			&VL, &VR, &V0, &v1, V, util.PRIGHT)
	}
	A.Set(n(V), n(V)-1, beta)
	return nil
}
Пример #25
0
func blkBuildLQ(A, Tvec, Twork, W *cmat.FloatMatrix, K, lb int, conf *gomas.Config) {
	var ATL, ATR, ABL, ABR, AL cmat.FloatMatrix
	var A00, A10, A11, A12, A21, A22 cmat.FloatMatrix
	var tT, tB cmat.FloatMatrix
	var t0, tau, t2, Wrk, D, T cmat.FloatMatrix

	nk := n(A) - K
	mk := m(A) - K
	uk := K % lb
	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, mk+uk, nk+uk, util.PBOTTOMRIGHT)
	util.Partition2x1(
		&tT,
		&tB, Tvec, mk+uk, util.PBOTTOM)

	// zero the bottom part __CHECK HERE: nk? or mk?
	if nk+uk > 0 {
		blasd.Scale(&ABL, 0.0)
		if uk > 0 {
			// number of reflectors is not multiple of blocking factor
			// do the first part with unblocked code.
			unblkBuildLQ(&ABR, &tB, W, m(&ABR)-uk, n(&ABR)-uk, true)
		} else {
			// blocking factor is multiple of K
			blasd.Scale(&ABR, 0.0)
			D.Diag(&ABR)
			blasd.Add(&D, 1.0)
		}
	}

	for m(&ATL) > 0 && n(&ATL) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			&A10, &A11, &A12,
			nil, &A21, &A22, A, lb, util.PTOPLEFT)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&tau,
			&t2, Tvec, lb, util.PTOP)
		// ------------------------------------------------------
		util.Merge1x2(&AL, &A11, &A12)

		// build block reflector
		T.SubMatrix(Twork, 0, 0, n(&A11), n(&A11))
		unblkBlockReflectorLQ(&T, &AL, &tau)

		// update A21 and A22 with (I - Y*T*Y.T) from right
		ar, ac := A21.Size()
		Wrk.SubMatrix(W, 0, 0, ar, ac)
		updateRightLQ(&A21, &A22, &A11, &A12, &T, &Wrk, false, conf)

		// update current block
		unblkBuildLQ(&AL, &tau, W, 0, n(&A12), false)

		// zero top rows
		blasd.Scale(&A10, 0.0)
		// ------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &A11, &A22, A, util.PTOPLEFT)
		util.Continue3x1to2x1(
			&tT,
			&tB, &t0, &tau, Tvec, util.PTOP)
	}
}
Пример #26
0
/*
 * This is adaptation of TRIRED_LAZY_UNB algorithm from (1).
 */
func unblkBuildTridiagUpper(A, tauq, Y, W *cmat.FloatMatrix) {
	var ATL, ABR cmat.FloatMatrix
	var A00, a01, A02, a11, a12, A22 cmat.FloatMatrix
	var YTL, YBR cmat.FloatMatrix
	var Y00, y01, Y02, y11, y12, Y22 cmat.FloatMatrix
	var tqT, tqB, tq0, tauq1, tq2 cmat.FloatMatrix
	var w12 cmat.FloatMatrix
	var v0 float64

	util.Partition2x2(
		&ATL, nil,
		nil, &ABR, A, 0, 0, util.PBOTTOMRIGHT)
	util.Partition2x2(
		&YTL, nil,
		nil, &YBR, Y, 0, 0, util.PBOTTOMRIGHT)
	util.Partition2x1(
		&tqT,
		&tqB, tauq, 0, util.PBOTTOM)

	k := 0
	for k < n(Y) {
		util.Repartition2x2to3x3(&ATL,
			&A00, &a01, &A02,
			nil, &a11, &a12,
			nil, nil, &A22, A, 1, util.PTOPLEFT)
		util.Repartition2x2to3x3(&YTL,
			&Y00, &y01, &Y02,
			nil, &y11, &y12,
			nil, nil, &Y22, Y, 1, util.PTOPLEFT)
		util.Repartition2x1to3x1(&tqT,
			&tq0,
			&tauq1,
			&tq2, tauq, 1, util.PTOP)

		// set temp vectors for this round
		w12.SubMatrix(Y, -1, 0, 1, n(&Y02))
		// ------------------------------------------------------

		if n(&Y02) > 0 {
			aa := blasd.Dot(&a12, &y12)
			aa += blasd.Dot(&y12, &a12)
			a11.Set(0, 0, a11.Get(0, 0)-aa)

			// a01 := a01 - A02*y12
			blasd.MVMult(&a01, &A02, &y12, -1.0, 1.0, gomas.NONE)
			// a01 := a01 - Y02*a12
			blasd.MVMult(&a01, &Y02, &a12, -1.0, 1.0, gomas.NONE)

			// restore superdiagonal value
			a12.Set(0, 0, v0)
		}
		// Compute householder to zero subdiagonal entries
		computeHouseholderRev(&a01, &tauq1)
		tauqv := tauq1.Get(0, 0)

		// set sub&iagonal to unit
		v0 = a01.Get(-1, 0)
		a01.Set(-1, 0, 1.0)

		// y01 := tauq*A00*a01
		blasd.MVMultSym(&y01, &A00, &a01, tauqv, 0.0, gomas.UPPER)
		// w12 := A02.T*a01
		blasd.MVMult(&w12, &A02, &a01, 1.0, 0.0, gomas.TRANS)
		// y01 := y01 - Y02*(A02.T*a01)
		blasd.MVMult(&y01, &Y02, &w12, -tauqv, 1.0, gomas.NONE)
		// w12 := Y02.T*a01
		blasd.MVMult(&w12, &Y02, &a01, 1.0, 0.0, gomas.TRANS)
		// y01 := y01 - A02*(Y02.T*a01)
		blasd.MVMult(&y01, &A02, &w12, -tauqv, 1.0, gomas.NONE)

		// beta := tauq*a01.T*y01
		beta := tauqv * blasd.Dot(&a01, &y01)
		// y01  := y01 - 0.5*beta*a01
		blasd.Axpy(&y01, &a01, -0.5*beta)

		// ------------------------------------------------------
		k += 1
		util.Continue3x3to2x2(
			&ATL, nil,
			nil, &ABR, &A00, &a11, &A22, A, util.PTOPLEFT)
		util.Continue3x3to2x2(
			&YTL, nil,
			nil, &YBR, &Y00, &y11, &Y22, A, util.PTOPLEFT)
		util.Continue3x1to2x1(
			&tqT,
			&tqB, &tq0, &tauq1, tauq, util.PTOP)
	}
	// restore superdiagonal value
	A.Set(m(&ATL)-1, n(&ATL), v0)
}
Пример #27
0
/*
 * Unblocked algorith for computing C = Q.T*C and C = Q*C.
 *
 * Q = H(1)H(2)...H(k) where elementary reflectors H(i) are stored on i'th column
 * below diagonal in A.
 *
 * Progressing A from top-left to bottom-right i.e from smaller column numbers
 * to larger, produces H(k)...H(2)H(1) == Q.T. and C = Q.T*C
 *
 * Progressing from bottom-right to top-left produces H(1)H(2)...H(k) == Q and C = Q*C
 */
func unblockedMultQLeft(C, A, tau, w *cmat.FloatMatrix, flags int) {
	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, a10, a11, A20, a21, A22 cmat.FloatMatrix
	var CT, CB, C0, c1t, C2 cmat.FloatMatrix
	var tT, tB cmat.FloatMatrix
	var t0, tau1, t2, w1 cmat.FloatMatrix

	var Aref *cmat.FloatMatrix
	var pAdir, pAstart, pDir, pStart util.Direction
	var mb, tb, nb int

	// partitioning start and direction
	if flags&gomas.TRANS != 0 {
		// from top-left to bottom-right to produce transposed sequence (Q.T*C)
		pAstart = util.PTOPLEFT
		pAdir = util.PBOTTOMRIGHT
		pStart = util.PTOP
		pDir = util.PBOTTOM
		mb = 0
		tb = 0
		nb = 0
		Aref = &ABR
	} else {
		// from bottom-right to top-left to produce normal sequence (Q*C)
		pAstart = util.PBOTTOMRIGHT
		pAdir = util.PTOPLEFT
		pStart = util.PBOTTOM
		pDir = util.PTOP
		mb = imax(0, m(A)-n(A))
		nb = imax(0, n(A)-m(A))
		tb = imax(0, tau.Len()-n(A))
		Aref = &ATL
	}

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, mb, nb, pAstart)
	util.Partition2x1(
		&CT,
		&CB, C, mb, pStart)
	util.Partition2x1(
		&tT,
		&tB, tau, tb, pStart)

	for m(Aref) > 0 && n(Aref) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			&a10, &a11, nil,
			&A20, &a21, &A22, A, 1, pAdir)
		util.Repartition2x1to3x1(&CT,
			&C0,
			&c1t,
			&C2, C, 1, pDir)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&tau1,
			&t2, tau, 1, pDir)

		// --------------------------------------------------------

		w1.SubMatrix(w, 0, 0, c1t.Len(), 1)
		applyHouseholder2x1(&tau1, &a21, &c1t, &C2, &w1, gomas.LEFT)

		// --------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &a11, &A22, A, pAdir)
		util.Continue3x1to2x1(
			&CT,
			&CB, &C0, &c1t, C, pDir)
		util.Continue3x1to2x1(
			&tT,
			&tB, &t0, &tau1, tau, pDir)
	}
}
Пример #28
0
/*
 * Blocked version for computing C = Q*C and C = Q.T*C from elementary reflectors
 * and scalar coefficients.
 *
 * Elementary reflectors and scalar coefficients are used to build block reflector T.
 * Matrix C is updated by applying block reflector T using compact WY algorithm.
 */
func blockedMultQLeft(C, A, tau, W *cmat.FloatMatrix, flags, nb int, conf *gomas.Config) {
	var ATL, ATR, ABL, ABR, AL cmat.FloatMatrix
	var A00, A10, A11, A20, A21, A22 cmat.FloatMatrix
	var CT, CB, C0, C1, C2 cmat.FloatMatrix
	var tT, tB cmat.FloatMatrix
	var t0, tau1, t2 cmat.FloatMatrix
	var Wrk, W0, Tw, Twork cmat.FloatMatrix

	var Aref *cmat.FloatMatrix
	var pAdir, pAstart, pDir, pStart util.Direction
	var bsz, mb int

	// partitioning start and direction
	if flags&gomas.TRANS != 0 || nb == n(A) {
		// from top-left to bottom-right to produce transposed sequence (Q.T*C)
		pAstart = util.PTOPLEFT
		pAdir = util.PBOTTOMRIGHT
		pStart = util.PTOP
		pDir = util.PBOTTOM
		mb = 0
		Aref = &ABR
	} else {
		// from bottom-right to top-left to produce normal sequence (Q*C)
		pAstart = util.PBOTTOMRIGHT
		pAdir = util.PTOPLEFT
		pStart = util.PBOTTOM
		pDir = util.PTOP
		mb = imax(0, m(A)-n(A))
		Aref = &ATL
	}

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, mb, 0, pAstart)
	util.Partition2x1(
		&CT,
		&CB, C, mb, pStart)
	util.Partition2x1(
		&tT,
		&tB, tau, 0, pStart)

	transpose := flags&gomas.TRANS != 0

	// intermediate reflector at start of workspace
	Twork.SetBuf(nb, nb, nb, W.Data())
	W0.SetBuf(n(C), nb, n(C), W.Data()[Twork.Len():])

	for m(Aref) > 0 && n(Aref) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			&A10, &A11, nil,
			&A20, &A21, &A22, A, nb, pAdir)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&tau1,
			&t2, tau, nb, pDir)
		bsz = n(&A11)
		util.Repartition2x1to3x1(&CT,
			&C0,
			&C1,
			&C2, C, bsz, pDir)
		// --------------------------------------------------------
		// clear & build block reflector from current block
		util.Merge2x1(&AL, &A11, &A21)
		Tw.SubMatrix(&Twork, 0, 0, bsz, bsz)
		blasd.Scale(&Tw, 0.0)
		unblkQRBlockReflector(&Tw, &AL, &tau1)

		// compute: Q*T.C == C - Y*(C.T*Y*T).T  transpose == true
		//          Q*C   == C - C*Y*T*Y.T      transpose == false
		Wrk.SubMatrix(&W0, 0, 0, n(&C1), bsz)
		updateWithQTLeft(&C1, &C2, &A11, &A21, &Tw, &Wrk, transpose, conf)
		// --------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &A11, &A22, A, pAdir)
		util.Continue3x1to2x1(
			&CT,
			&CB, &C0, &C1, C, pDir)
		util.Continue3x1to2x1(
			&tT,
			&tB, &t0, &tau1, tau, pDir)
	}

}
Пример #29
0
func blkMultLeftQL(C, A, tau, W *cmat.FloatMatrix, flags, lb int, conf *gomas.Config) {
	var ATL /*ATR, ABL,*/, ABR, AL cmat.FloatMatrix
	var A00, A01, A11, A22 cmat.FloatMatrix
	var CT, CB, C0, C1, C2 cmat.FloatMatrix
	var tT, tB cmat.FloatMatrix
	var t0, tau1, t2 cmat.FloatMatrix
	var T0, T, W0, Wrk cmat.FloatMatrix

	var Aref *cmat.FloatMatrix
	var pAdir, pAstart, pDir, pStart util.Direction
	var mb, tb, nb int

	// partitioning start and direction
	if flags&gomas.TRANS != 0 {
		// A from bottom-right to top-left to produce transposed sequence (Q.T*C)
		pAstart = util.PBOTTOMRIGHT
		pAdir = util.PTOPLEFT
		pStart = util.PBOTTOM
		pDir = util.PTOP
		mb = 0
		tb = 0
		nb = 0
		Aref = &ATL
	} else {
		// from top-left to bottom-right to produce normal sequence (Q*C)
		pAstart = util.PTOPLEFT
		pAdir = util.PBOTTOMRIGHT
		pStart = util.PTOP
		pDir = util.PBOTTOM
		mb = imax(0, m(A)-n(A))
		nb = imax(0, n(A)-m(A))
		tb = imax(0, tau.Len()-n(A))
		Aref = &ABR
	}

	util.Partition2x2(
		&ATL, nil,
		nil, &ABR, A, mb, nb, pAstart)
	util.Partition2x1(
		&CT,
		&CB, C, mb, pStart)
	util.Partition2x1(
		&tT,
		&tB, tau, tb, pStart)

	transpose := flags&gomas.TRANS != 0
	// divide workspace for block reflector and temporart space
	T0.SetBuf(lb, lb, lb, W.Data())
	W0.SetBuf(n(C), lb, n(C), W.Data()[T0.Len():])

	for n(Aref) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, &A01, nil,
			nil, &A11, nil,
			nil, nil, &A22, A, lb, pAdir)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&tau1,
			&t2, tau, lb, pDir)
		bsz := n(&A11)
		util.Repartition2x1to3x1(&CT,
			&C0,
			&C1,
			&C2, C, bsz, pDir)
		// --------------------------------------------------------
		// build block reflector for current block
		util.Merge2x1(&AL, &A01, &A11)
		T.SubMatrix(&T0, 0, 0, bsz, bsz)
		blasd.Scale(&T, 0.0)
		unblkQLBlockReflector(&T, &AL, &tau1)

		// update with (I - Y*T*Y.T) or (I - Y*T*Y.T).T
		Wrk.SubMatrix(&W0, 0, 0, n(&C1), bsz)
		updateQLLeft(&C1, &C0, &A11, &A01, &T, &Wrk, transpose, conf)
		// --------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, nil,
			nil, &ABR, &A00, &A11, &A22, A, pAdir)
		util.Continue3x1to2x1(
			&CT,
			&CB, &C0, &C1, C, pDir)
		util.Continue3x1to2x1(
			&tT,
			&tB, &t0, &tau1, tau, pDir)
	}
}
Пример #30
0
func blkMultRightQL(C, A, tau, W *cmat.FloatMatrix, flags, lb int, conf *gomas.Config) {
	var ATL, ABR, AL cmat.FloatMatrix
	var A00, A01, A11, A22 cmat.FloatMatrix
	var CL, CR, C0, C1, C2 cmat.FloatMatrix
	var tT, tB cmat.FloatMatrix
	var t0, tau1, t2 cmat.FloatMatrix
	var T0, T, W0, Wrk cmat.FloatMatrix

	var Aref *cmat.FloatMatrix
	var pAdir, pAstart, pDir, pStart, pCdir, pCstart util.Direction
	var mb, tb, nb, cb int

	// partitioning start and direction
	if flags&gomas.TRANS != 0 {
		// from top-left to bottom-right to produce transpose sequence (C*Q.T)
		pAstart = util.PTOPLEFT
		pAdir = util.PBOTTOMRIGHT
		pStart = util.PTOP
		pDir = util.PBOTTOM
		pCstart = util.PLEFT
		pCdir = util.PRIGHT
		mb = imax(0, m(A)-n(A))
		nb = imax(0, n(A)-m(A))
		cb = imax(0, n(C)-n(A))
		tb = imax(0, tau.Len()-n(A))
		Aref = &ABR
	} else {
		// A from bottom-right to top-left to produce normal sequence (C*Q)
		pAstart = util.PBOTTOMRIGHT
		pAdir = util.PTOPLEFT
		pStart = util.PBOTTOM
		pDir = util.PTOP
		pCstart = util.PRIGHT
		pCdir = util.PLEFT
		mb = 0
		tb = 0
		nb = 0
		cb = 0
		Aref = &ATL
	}

	util.Partition2x2(
		&ATL, nil,
		nil, &ABR /**/, A, mb, nb, pAstart)
	util.Partition1x2(
		&CL, &CR /**/, C, cb, pCstart)
	util.Partition2x1(
		&tT,
		&tB /**/, tau, tb, pStart)

	transpose := flags&gomas.TRANS != 0
	// divide workspace for block reflector and temporary work matrix
	T0.SetBuf(lb, lb, lb, W.Data())
	W0.SetBuf(m(C), lb, m(C), W.Data()[T0.Len():])

	for n(Aref) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, &A01, nil,
			nil, &A11, nil,
			nil, nil, &A22 /**/, A, lb, pAdir)
		bsz := n(&A11)
		util.Repartition1x2to1x3(&CL,
			&C0, &C1, &C2 /**/, C, bsz, pCdir)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&tau1,
			&t2 /**/, tau, bsz, pDir)
		// --------------------------------------------------------
		util.Merge2x1(&AL, &A01, &A11)
		T.SubMatrix(&T0, 0, 0, bsz, bsz)
		blasd.Scale(&T, 0.0)
		unblkQLBlockReflector(&T, &AL, &tau1)

		Wrk.SubMatrix(&W0, 0, 0, m(C), bsz)
		updateQLRight(&C1, &C0, &A11, &A01, &T, &Wrk, transpose, conf)
		// --------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, nil,
			nil, &ABR /**/, &A00, &A11, &A22, A, pAdir)
		util.Continue1x3to1x2(
			&CL, &CR /**/, &C0, &C1, C, pCdir)
		util.Continue3x1to2x1(
			&tT,
			&tB /**/, &t0, &tau1, tau, pDir)
	}
}