예제 #1
0
파일: qrt.go 프로젝트: hrautila/gomas
/*
 * Build full block reflect T for nc columns from sequence of reflector stored in S.
 * Reflectors in S are the diagonal of T, off-diagonal values of reflector are computed
 * from elementary reflector store in lower triangular part of A.
 */
func buildQRTReflector(T, A, S *cmat.FloatMatrix, nc int, conf *gomas.Config) *gomas.Error {
	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, A10, A11, A20, A21, A22 cmat.FloatMatrix
	var TTL, TTR, TBL, TBR cmat.FloatMatrix
	var T00, T01, T02, T11, T12, T22 cmat.FloatMatrix
	var SL, SR cmat.FloatMatrix
	var S00, S01, S02 cmat.FloatMatrix

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, 0, 0, util.PTOPLEFT)
	util.Partition2x2(
		&TTL, &TTR,
		&TBL, &TBR, T, 0, 0, util.PTOPLEFT)
	util.Partition1x2(
		&SL, &SR, S, 0, util.PLEFT)

	nb := conf.LB
	for m(&ABR)-nb > 0 && n(&ABR)-nb > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			&A10, &A11, nil,
			&A20, &A21, &A22, A, nb, util.PBOTTOMRIGHT)
		util.Repartition2x2to3x3(&TTL,
			&T00, &T01, &T02,
			nil, &T11, &T12,
			nil, nil, &T22, T, nb, util.PBOTTOMRIGHT)
		util.Repartition1x2to1x3(&SL,
			&S00, &S01, &S02, S, nb, util.PRIGHT)
		// --------------------------------------------------------
		// update T01: T01 = -T00*Y1.T*Y2*T11
		//  Y1 = /A10\   Y2 = /A11\
		//       \A20/        \A21/
		//
		T11.Copy(&S01)
		updateQRTReflector(&T01, &A10, &A20, &A11, &A21, &T00, &S01, conf)
		// --------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &A11, &A22, A, util.PBOTTOMRIGHT)
		util.Continue3x3to2x2(
			&TTL, &TTR,
			&TBL, &TBR, &T00, &T11, &T22, T, util.PBOTTOMRIGHT)
		util.Continue1x3to1x2(
			&SL, &SR, &S00, &S01, S, util.PRIGHT)
	}
	if m(&ABR) > 0 && n(&ABR) > 0 {
	}
	return nil
}
예제 #2
0
파일: qrt.go 프로젝트: hrautila/gomas
func blockedQRT(A, T, W *cmat.FloatMatrix, conf *gomas.Config) *gomas.Error {
	var err *gomas.Error = nil
	var ATL, ATR, ABL, ABR, AL, AR cmat.FloatMatrix
	var A00, A01, A02, A10, A11, A12, A20, A21, A22 cmat.FloatMatrix
	var TL, TR, W2 cmat.FloatMatrix
	var T00, T01, T02 cmat.FloatMatrix

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, 0, 0, util.PTOPLEFT)
	util.Partition1x2(
		&TL, &TR, T, 0, util.PLEFT)

	nb := conf.LB
	for m(&ABR)-nb > 0 && n(&ABR)-nb > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, &A01, &A02,
			&A10, &A11, &A12,
			&A20, &A21, &A22, A, nb, util.PBOTTOMRIGHT)
		util.Repartition1x2to1x3(&TL,
			&T00, &T01, &T02, T, nb, util.PRIGHT)
		util.Partition1x2(
			&AL, &AR, &ABR, nb, util.PLEFT)
		// --------------------------------------------------------
		// decompose left side AL == /A11\
		//                           \A21/
		unblockedQRT(&AL, &T01, W)

		// update A'tail i.e. A12 and A22 with (I - Y*T*Y.T).T * A'tail
		// compute: Q*T.C == C - Y*(C.T*Y*T).T
		ar, ac := A12.Size()
		W2.SubMatrix(W, 0, 0, ac, ar)
		updateWithQTLeft(&A12, &A22, &A11, &A21, &T01, &W2, true, conf)
		// --------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &A11, &A22, A, util.PBOTTOMRIGHT)
		util.Continue1x3to1x2(
			&TL, &TR, &T00, &T01, T, util.PRIGHT)
	}
	if m(&ABR) > 0 && n(&ABR) > 0 {
		T01.SubMatrix(&TR, 0, 0, n(&ABR), n(&ABR))
		unblockedQRT(&ABR, &T01, W)
	}
	return err
}
예제 #3
0
파일: hess.go 프로젝트: hrautila/gomas
/*
 * Computes upper Hessenberg reduction of N-by-N matrix A using unblocked
 * algorithm as described in (1).
 *
 * Hessengerg reduction: A = Q.T*B*Q, Q unitary, B upper Hessenberg
 *  Q = H(0)*H(1)*...*H(k) where H(k) is k'th Householder reflector.
 *
 * Compatible with lapack.DGEHD2.
 */
func unblkHessGQvdG(A, Tvec, W *cmat.FloatMatrix, row int) {
	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, a11, a21, A22 cmat.FloatMatrix
	var AL, AR, A0, a1, A2 cmat.FloatMatrix
	var tT, tB cmat.FloatMatrix
	var t0, tau1, t2, w12, v1 cmat.FloatMatrix

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, row, 0, util.PTOPLEFT)
	util.Partition1x2(
		&AL, &AR, A, 0, util.PLEFT)
	util.Partition2x1(
		&tT,
		&tB, Tvec, 0, util.PTOP)

	v1.SubMatrix(W, 0, 0, m(A), 1)

	for m(&ABR) > 1 && n(&ABR) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			nil, &a11, nil,
			nil, &a21, &A22, A, 1, util.PBOTTOMRIGHT)
		util.Repartition1x2to1x3(&AL,
			&A0, &a1, &A2, A, 1, util.PRIGHT)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&tau1,
			&t2, Tvec, 1, util.PBOTTOM)

		// ------------------------------------------------------
		// a21 = [beta; H(k)].T
		computeHouseholderVec(&a21, &tau1)
		tauval := tau1.Get(0, 0)
		beta := a21.Get(0, 0)
		a21.Set(0, 0, 1.0)

		// v1 := A2*a21
		blasd.MVMult(&v1, &A2, &a21, 1.0, 0.0, gomas.NONE)

		// A2 := A2 - tau*v1*a21   (A2 := A2*H(k))
		blasd.MVUpdate(&A2, &v1, &a21, -tauval)

		w12.SubMatrix(W, 0, 0, n(&A22), 1)
		// w12 := a21.T*A22 = A22.T*a21
		blasd.MVMult(&w12, &A22, &a21, 1.0, 0.0, gomas.TRANS)
		// A22 := A22 - tau*a21*w12   (A22 := H(k)*A22)
		blasd.MVUpdate(&A22, &a21, &w12, -tauval)

		a21.Set(0, 0, beta)
		// ------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT)
		util.Continue1x3to1x2(
			&AL, &AR, &A0, &a1, A, util.PRIGHT)
		util.Continue3x1to2x1(
			&tT,
			&tB, &t0, &tau1, Tvec, util.PBOTTOM)
	}
}
예제 #4
0
파일: hess.go 프로젝트: hrautila/gomas
/*
 *
 *  Building reduction block for blocked algorithm as described in (1).
 *
 *  A. update next column
 *    a10        [(U00)     (U00)  ]   [(a10)    (V00)            ]
 *    a11 :=  I -[(u10)*T00*(u10).T] * [(a11)  - (v01) * T00 * a10]
 *    a12        [(U20)     (U20)  ]   [(a12)    (V02)            ]
 *
 *  B. compute Householder reflector for updated column
 *    a21, t11 := Householder(a21)
 *
 *  C. update intermediate reductions
 *    v10      A02*a21
 *    v11  :=  a12*a21
 *    v12      A22*a21
 *
 *  D. update block reflector
 *    t01 :=  A20*a21
 *    t11 :=  t11
 */
func unblkBuildHessGQvdG(A, T, V, W *cmat.FloatMatrix) *gomas.Error {

	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, a10, a11, A20, a21, A22 cmat.FloatMatrix
	var AL, AR, A0, a1, A2 cmat.FloatMatrix
	var TTL, TTR, TBL, TBR cmat.FloatMatrix
	var T00, t01, t11, T22 cmat.FloatMatrix
	var VL, VR, V0, v1, V2, Y0 cmat.FloatMatrix

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, 0, 0, util.PTOPLEFT)
	util.Partition2x2(
		&TTL, &TTR,
		&TBL, &TBR, T, 0, 0, util.PTOPLEFT)
	util.Partition1x2(
		&AL, &AR, A, 0, util.PLEFT)
	util.Partition1x2(
		&VL, &VR, V, 0, util.PLEFT)

	var beta float64

	for n(&VR) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			&a10, &a11, nil,
			&A20, &a21, &A22, A, 1, util.PBOTTOMRIGHT)
		util.Repartition2x2to3x3(&TTL,
			&T00, &t01, nil,
			nil, &t11, nil,
			nil, nil, &T22, T, 1, util.PBOTTOMRIGHT)
		util.Repartition1x2to1x3(&AL,
			&A0, &a1, &A2, A, 1, util.PRIGHT)
		util.Repartition1x2to1x3(&VL,
			&V0, &v1, &V2, V, 1, util.PRIGHT)

		// ------------------------------------------------------
		// Compute Hessenberg update for next column of A:
		if n(&V0) > 0 {
			// y10 := T00*a10  (use t01 as workspace?)
			blasd.Axpby(&t01, &a10, 1.0, 0.0)
			blasd.MVMultTrm(&t01, &T00, 1.0, gomas.UPPER)

			// a1 := a1 - V0*T00*a10
			blasd.MVMult(&a1, &V0, &t01, -1.0, 1.0, gomas.NONE)

			// update a1 := (I - Y*T*Y.T).T*a1 (here t01 as workspace)
			Y0.SubMatrix(A, 1, 0, n(&A00), n(&A00))
			updateVecLeftWY2(&a1, &Y0, &A20, &T00, &t01, gomas.TRANS)
			a10.Set(0, -1, beta)
		}

		// Compute Householder reflector
		computeHouseholderVec(&a21, &t11)
		beta = a21.Get(0, 0)
		a21.Set(0, 0, 1.0)

		// v1 := A2*a21
		blasd.MVMult(&v1, &A2, &a21, 1.0, 0.0, gomas.NONE)

		// update T
		tauval := t11.Get(0, 0)
		if tauval != 0.0 {
			// t01 := -tauval*A20.T*a21
			blasd.MVMult(&t01, &A20, &a21, -tauval, 0.0, gomas.TRANS)
			// t01 := T00*t01
			blasd.MVMultTrm(&t01, &T00, 1.0, gomas.UPPER)
		}
		// ------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT)
		util.Continue3x3to2x2(
			&TTL, &TTR,
			&TBL, &TBR, &T00, &t11, &T22, T, util.PBOTTOMRIGHT)
		util.Continue1x3to1x2(
			&AL, &AR, &A0, &a1, A, util.PRIGHT)
		util.Continue1x3to1x2(
			&VL, &VR, &V0, &v1, V, util.PRIGHT)
	}
	A.Set(n(V), n(V)-1, beta)
	return nil
}
예제 #5
0
파일: qrmult.go 프로젝트: hrautila/gomas
/*
 * Blocked version for computing C = C*Q and C = C*Q.T from elementary reflectors
 * and scalar coefficients.
 *
 * Elementary reflectors and scalar coefficients are used to build block reflector T.
 * Matrix C is updated by applying block reflector T using compact WY algorithm.
 */
func blockedMultQRight(C, A, tau, W *cmat.FloatMatrix, flags, nb int, conf *gomas.Config) {
	var ATL, ATR, ABL, ABR, AL cmat.FloatMatrix
	var A00, A10, A11, A20, A21, A22 cmat.FloatMatrix
	var CL, CR, C0, C1, C2 cmat.FloatMatrix
	var tT, tB cmat.FloatMatrix
	var t0, tau1, t2 cmat.FloatMatrix
	var W0, Wrk, Tw, Twork cmat.FloatMatrix

	var Aref *cmat.FloatMatrix
	var pAdir, pAstart, pDir, pStart, pCstart, pCdir util.Direction
	var bsz, cb, mb int

	// partitioning start and direction
	if flags&gomas.TRANS != 0 {
		// from bottom-right to top-left to produce transpose sequence (C*Q.T)
		pAstart = util.PBOTTOMRIGHT
		pAdir = util.PTOPLEFT
		pStart = util.PBOTTOM
		pDir = util.PTOP
		pCstart = util.PRIGHT
		pCdir = util.PLEFT
		mb = imax(0, m(A)-n(A))
		cb = n(C) - n(A)
		Aref = &ATL
	} else {
		// from top-left to bottom-right to produce normal sequence (C*Q)
		pAstart = util.PTOPLEFT
		pAdir = util.PBOTTOMRIGHT
		pStart = util.PTOP
		pDir = util.PBOTTOM
		pCstart = util.PLEFT
		pCdir = util.PRIGHT
		mb = 0
		cb = 0
		Aref = &ABR
	}

	// intermediate reflector at start of workspace
	Twork.SetBuf(nb, nb, nb, W.Data())
	W0.SetBuf(m(C), nb, m(C), W.Data()[Twork.Len():])

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, mb, 0, pAstart)
	util.Partition1x2(
		&CL, &CR, C, cb, pCstart)
	util.Partition2x1(
		&tT,
		&tB, tau, 0, pStart)

	transpose := flags&gomas.TRANS != 0

	for m(Aref) > 0 && n(Aref) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			&A10, &A11, nil,
			&A20, &A21, &A22, A, nb, pAdir)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&tau1,
			&t2, tau, nb, pDir)

		bsz = n(&A11) // C1 block size must match A11
		util.Repartition1x2to1x3(&CL,
			&C0, &C1, &C2, C, bsz, pCdir)
		// --------------------------------------------------------
		// clear & build block reflector from current block
		util.Merge2x1(&AL, &A11, &A21)
		Tw.SubMatrix(&Twork, 0, 0, bsz, bsz)
		blasd.Scale(&Tw, 0.0)
		unblkQRBlockReflector(&Tw, &AL, &tau1)

		// compute: C*Q.T == C - C*(Y*T*Y.T).T = C - C*Y*T.T*Y.T
		//          C*Q   == C - C*Y*T*Y.T
		Wrk.SubMatrix(&W0, 0, 0, m(&C1), bsz)
		updateWithQTRight(&C1, &C2, &A11, &A21, &Tw, &Wrk, transpose, conf)
		// --------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &A11, &A22, A, pAdir)
		util.Continue1x3to1x2(
			&CL, &CR, &C0, &C1, C, pCdir)
		util.Continue3x1to2x1(
			&tT,
			&tB, &t0, &tau1, tau, pDir)
	}

}
예제 #6
0
파일: qrmult.go 프로젝트: hrautila/gomas
/*
 * Unblocked algorith for computing C = C*Q.T and C = C*Q.
 *
 * Q = H(1)H(2)...H(k) where elementary reflectors H(i) are stored on i'th column
 * below diagonal in A.
 *
 *     Q.T = (H1(1)*H(2)*...*H(k)).T
 *         = H(k).T*...*H(2).T*H(1).T
 *         = H(k)...H(2)H(1)
 *
 * Progressing A from top-left to bottom-right i.e from smaller column numbers
 * to larger, produces C*H(1)H(2)...H(k) == C*Q.
 *
 * Progressing from bottom-right to top-left produces C*H(k)...H(2)H(1) == C*Q.T.
 */
func unblockedMultQRight(C, A, tau, w *cmat.FloatMatrix, flags int) {
	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, a10, a11, A20, a21, A22 cmat.FloatMatrix
	var CL, CR, C0, c1, C2 cmat.FloatMatrix
	var tT, tB cmat.FloatMatrix
	var t0, tau1, t2, w1 cmat.FloatMatrix

	var Aref *cmat.FloatMatrix
	var pAdir, pAstart, pDir, pStart, pCstart, pCdir util.Direction
	var cb, mb, tb, nb int

	// partitioning start and direction
	if flags&gomas.TRANS != 0 {
		// from bottom-right to top-left to produce transpose sequence (C*Q.T)
		pAstart = util.PBOTTOMRIGHT
		pAdir = util.PTOPLEFT
		pStart = util.PBOTTOM
		pDir = util.PTOP
		pCstart = util.PRIGHT
		pCdir = util.PLEFT
		mb = imax(0, m(A)-n(A))
		nb = imax(0, n(A)-m(A))
		cb = imax(0, n(C)-n(A))
		tb = imax(0, tau.Len()-n(A))
		Aref = &ATL
	} else {
		// from top-left to bottom-right to produce normal sequence (C*Q)
		pAstart = util.PTOPLEFT
		pAdir = util.PBOTTOMRIGHT
		pStart = util.PTOP
		pDir = util.PBOTTOM
		pCstart = util.PLEFT
		pCdir = util.PRIGHT
		mb = 0
		cb = 0
		tb = 0
		nb = 0
		Aref = &ABR
	}

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, mb, nb, pAstart)
	util.Partition1x2(
		&CL, &CR, C, cb, pCstart)
	util.Partition2x1(
		&tT,
		&tB, tau, tb, pStart)

	for m(Aref) > 0 && n(Aref) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			&a10, &a11, nil,
			&A20, &a21, &A22, A, 1, pAdir)
		util.Repartition1x2to1x3(&CL,
			&C0, &c1, &C2, C, 1, pCdir)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&tau1,
			&t2, tau, 1, pDir)

		// --------------------------------------------------------

		w1.SubMatrix(w, 0, 0, c1.Len(), 1)
		applyHouseholder2x1(&tau1, &a21, &c1, &C2, &w1, gomas.RIGHT)

		// --------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &a11, &A22, A, pAdir)
		util.Continue1x3to1x2(
			&CL, &CR, &C0, &c1, C, pCdir)
		util.Continue3x1to2x1(
			&tT,
			&tB, &t0, &tau1, tau, pDir)
	}
}
예제 #7
0
파일: qlmult.go 프로젝트: hrautila/gomas
func blkMultRightQL(C, A, tau, W *cmat.FloatMatrix, flags, lb int, conf *gomas.Config) {
	var ATL, ABR, AL cmat.FloatMatrix
	var A00, A01, A11, A22 cmat.FloatMatrix
	var CL, CR, C0, C1, C2 cmat.FloatMatrix
	var tT, tB cmat.FloatMatrix
	var t0, tau1, t2 cmat.FloatMatrix
	var T0, T, W0, Wrk cmat.FloatMatrix

	var Aref *cmat.FloatMatrix
	var pAdir, pAstart, pDir, pStart, pCdir, pCstart util.Direction
	var mb, tb, nb, cb int

	// partitioning start and direction
	if flags&gomas.TRANS != 0 {
		// from top-left to bottom-right to produce transpose sequence (C*Q.T)
		pAstart = util.PTOPLEFT
		pAdir = util.PBOTTOMRIGHT
		pStart = util.PTOP
		pDir = util.PBOTTOM
		pCstart = util.PLEFT
		pCdir = util.PRIGHT
		mb = imax(0, m(A)-n(A))
		nb = imax(0, n(A)-m(A))
		cb = imax(0, n(C)-n(A))
		tb = imax(0, tau.Len()-n(A))
		Aref = &ABR
	} else {
		// A from bottom-right to top-left to produce normal sequence (C*Q)
		pAstart = util.PBOTTOMRIGHT
		pAdir = util.PTOPLEFT
		pStart = util.PBOTTOM
		pDir = util.PTOP
		pCstart = util.PRIGHT
		pCdir = util.PLEFT
		mb = 0
		tb = 0
		nb = 0
		cb = 0
		Aref = &ATL
	}

	util.Partition2x2(
		&ATL, nil,
		nil, &ABR /**/, A, mb, nb, pAstart)
	util.Partition1x2(
		&CL, &CR /**/, C, cb, pCstart)
	util.Partition2x1(
		&tT,
		&tB /**/, tau, tb, pStart)

	transpose := flags&gomas.TRANS != 0
	// divide workspace for block reflector and temporary work matrix
	T0.SetBuf(lb, lb, lb, W.Data())
	W0.SetBuf(m(C), lb, m(C), W.Data()[T0.Len():])

	for n(Aref) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, &A01, nil,
			nil, &A11, nil,
			nil, nil, &A22 /**/, A, lb, pAdir)
		bsz := n(&A11)
		util.Repartition1x2to1x3(&CL,
			&C0, &C1, &C2 /**/, C, bsz, pCdir)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&tau1,
			&t2 /**/, tau, bsz, pDir)
		// --------------------------------------------------------
		util.Merge2x1(&AL, &A01, &A11)
		T.SubMatrix(&T0, 0, 0, bsz, bsz)
		blasd.Scale(&T, 0.0)
		unblkQLBlockReflector(&T, &AL, &tau1)

		Wrk.SubMatrix(&W0, 0, 0, m(C), bsz)
		updateQLRight(&C1, &C0, &A11, &A01, &T, &Wrk, transpose, conf)
		// --------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, nil,
			nil, &ABR /**/, &A00, &A11, &A22, A, pAdir)
		util.Continue1x3to1x2(
			&CL, &CR /**/, &C0, &C1, C, pCdir)
		util.Continue3x1to2x1(
			&tT,
			&tB /**/, &t0, &tau1, tau, pDir)
	}
}
예제 #8
0
파일: rqmult.go 프로젝트: hrautila/gomas
/*
 * Blocked version for computing C = C*Q and C = C*Q.T from elementary
 * reflectors and scalar coefficients.
 *
 * Elementary reflectors and scalar coefficients are used to build block
 * reflector T. Matrix C is updated by applying block reflector T using
 * compact WY algorithm.
 */
func blockedMultRQRight(C, A, tau, W *cmat.FloatMatrix, flags, lb int, conf *gomas.Config) {
	var ATL, ABR, AL cmat.FloatMatrix
	var A00, A10, A11, A22 cmat.FloatMatrix
	var CL, CR, C0, C1, C2 cmat.FloatMatrix
	var tT, tB cmat.FloatMatrix
	var t0, tau1, t2 cmat.FloatMatrix
	var W0, Wrk, Tw, Twork cmat.FloatMatrix

	var Aref *cmat.FloatMatrix
	var pAdir, pAstart, pDir, pStart, pCstart, pCdir util.Direction
	var bsz, cb, mb, nb, tb int
	var transpose bool

	// partitioning start and direction
	if flags&gomas.TRANS != 0 {
		// from bottom-right to top-left to produce transpose sequence (C*Q.T)
		pAstart = util.PBOTTOMRIGHT
		pAdir = util.PTOPLEFT
		pStart = util.PBOTTOM
		pDir = util.PTOP
		pCstart = util.PRIGHT
		pCdir = util.PLEFT
		mb = 0
		nb = 0
		cb = 0
		tb = 0
		Aref = &ATL
		transpose = false
	} else {
		// from top-left to bottom-right to produce normal sequence (C*Q)
		pAstart = util.PTOPLEFT
		pAdir = util.PBOTTOMRIGHT
		pStart = util.PTOP
		pDir = util.PBOTTOM
		pCstart = util.PLEFT
		pCdir = util.PRIGHT
		mb = imax(0, m(A)-n(A))
		nb = imax(0, n(A)-m(A))
		cb = imax(0, n(C)-m(A))
		tb = imax(0, tau.Len()-m(A))
		Aref = &ABR
		transpose = true
	}

	// intermediate reflector at start of workspace
	Twork.SetBuf(lb, lb, lb, W.Data())
	W0.SetBuf(m(C), lb, m(C), W.Data()[Twork.Len():])

	util.Partition2x2(
		&ATL, nil,
		nil, &ABR /**/, A, mb, nb, pAstart)
	util.Partition1x2(
		&CL, &CR /**/, C, cb, pCstart)
	util.Partition2x1(
		&tT,
		&tB /**/, tau, tb, pStart)

	for m(Aref) > 0 && n(Aref) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			&A10, &A11, nil,
			nil, nil, &A22 /**/, A, lb, pAdir)
		bsz = m(&A11) // C1 block size must match A11
		util.Repartition2x1to3x1(&tT,
			&t0,
			&tau1,
			&t2 /**/, tau, bsz, pDir)
		util.Repartition1x2to1x3(&CL,
			&C0, &C1, &C2 /**/, C, bsz, pCdir)
		// --------------------------------------------------------
		// clear & build block reflector from current block
		util.Merge1x2(&AL, &A10, &A11)
		Tw.SubMatrix(&Twork, 0, 0, bsz, bsz)
		blasd.Scale(&Tw, 0.0)
		unblkBlockReflectorRQ(&Tw, &AL, &tau1)

		Wrk.SubMatrix(&W0, 0, 0, m(&C1), bsz)
		updateRightRQ(&C1, &C0, &A11, &A10, &Tw, &Wrk, transpose, conf)
		// --------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, nil,
			nil, &ABR /**/, &A00, &A11, &A22, A, pAdir)
		util.Continue1x3to1x2(
			&CL, &CR /**/, &C0, &C1, C, pCdir)
		util.Continue3x1to2x1(
			&tT,
			&tB /**/, &t0, &tau1, tau, pDir)
	}

}
예제 #9
0
파일: qrtmult.go 프로젝트: hrautila/gomas
/*
 * Blocked version for computing C = Q*C and C = Q.T*C with block reflector.
 *
 * Block reflector T is [T(0), T(1), ... T(k-1)], conf.LB*n(A) matrix where
 * where each T(n), expect T(k-1), is conf.LB*conf.LB. T(k-1) is IB*IB where
 * IB = imin(LB, K%LB)
 */
func blockedMultQTLeft(C, A, T, W *cmat.FloatMatrix, flags int, conf *gomas.Config) *gomas.Error {
	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, A10, A11, A20, A21, A22 cmat.FloatMatrix
	var CT, CB, C0, C1, C2 cmat.FloatMatrix
	var TL, TR, T00, T01, T02 cmat.FloatMatrix
	var Wrk cmat.FloatMatrix

	var Aref *cmat.FloatMatrix
	var pAdir, pAstart, pCdir, pCstart, pTstart, pTdir util.Direction
	var bsz, mb, tb int

	lb := conf.LB
	if conf.LB == 0 {
		lb = m(T)
	}
	transpose := flags&gomas.TRANS != 0
	nb := lb
	//W0 := cmat.MakeMatrix(n(C), conf.LB, W.Data())

	// partitioning start and direction
	if flags&gomas.TRANS != 0 {
		// from top-left to bottom-right to produce transposed sequence (Q.T*C)
		pAstart = util.PTOPLEFT
		pAdir = util.PBOTTOMRIGHT
		pCstart = util.PTOP
		pCdir = util.PBOTTOM
		pTstart = util.PLEFT
		pTdir = util.PRIGHT
		mb = 0
		tb = nb
		Aref = &ABR
	} else {
		// from bottom-right to top-left to produce normal sequence (Q*C)
		pAstart = util.PBOTTOMRIGHT
		pAdir = util.PTOPLEFT
		pCstart = util.PBOTTOM
		pCdir = util.PTOP
		pTstart = util.PRIGHT
		pTdir = util.PLEFT
		mb = m(A) - n(A)
		Aref = &ATL
		// if N%LB != 0 then the last T is not of size LB and we need
		// adjust first repartitioning accordingly.
		tb = n(A) % lb
	}

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, mb, 0, pAstart)
	util.Partition1x2(
		&TL, &TR, T, 0, pTstart)
	util.Partition2x1(
		&CT,
		&CB, C, mb, pCstart)

	nb = tb
	for m(Aref) > 0 && n(Aref) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			&A10, &A11, nil,
			&A20, &A21, &A22, A, nb, pAdir)
		util.Repartition1x2to1x3(&TL,
			&T00, &T01, &T02, T, nb, pTdir)

		bsz = n(&A11) // must match A11 block size
		util.Repartition2x1to3x1(&CT,
			&C0,
			&C1,
			&C2, C, bsz, pCdir)
		// --------------------------------------------------------
		tr, tc := T01.Size()
		// compute: Q.T*C == C - Y*(C.T*Y*T).T  transpose == true
		//          Q*C   == C - C*Y*T*Y.T      transpose == false
		Wrk.SubMatrix(W, 0, 0, n(&C1), bsz)
		if tr != tc {
			// this happens when n(A) not multiple of LB
			var Tmp cmat.FloatMatrix
			Tmp.SubMatrix(&T01, 0, 0, tc, tc)
			updateWithQTLeft(&C1, &C2, &A11, &A21, &Tmp, &Wrk, transpose, conf)
		} else {
			updateWithQTLeft(&C1, &C2, &A11, &A21, &T01, &Wrk, transpose, conf)
		}

		// --------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &A11, &A22, A, pAdir)
		util.Continue1x3to1x2(
			&TL, &TR, &T00, &T01, T, pTdir)
		util.Continue3x1to2x1(
			&CT,
			&CB, &C0, &C1, C, pCdir)

		nb = lb
	}
	return nil
}
예제 #10
0
파일: qrtmult.go 프로젝트: hrautila/gomas
/*
 * Blocked version for computing C = C*Q and C = C*Q.T with block reflector.
 *
 * Block reflector T is [T(0), T(1), ... T(k-1)], conf.LB*n(A) matrix where
 * where each T(n), expect T(k-1), is conf.LB*conf.LB. T(k-1) is IB*IB where
 * IB = imin(LB, K%LB)
 */
func blockedMultQTRight(C, A, T, W *cmat.FloatMatrix, flags int, conf *gomas.Config) {
	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, A10, A11, A20, A21, A22 cmat.FloatMatrix
	var CL, CR, C0, C1, C2 cmat.FloatMatrix
	//var TTL, TTR, TBL, TBR cmat.FloatMatrix
	//var T00, T01, T02, T11, T12, T22 cmat.FloatMatrix
	var TL, TR, T00, T01, T02 cmat.FloatMatrix
	var Wrk cmat.FloatMatrix

	var Aref *cmat.FloatMatrix
	var pAdir, pAstart, pCstart, pCdir, pTstart, pTdir util.Direction
	var bsz, cb, mb, tb int

	lb := conf.LB
	if conf.LB == 0 {
		lb = m(T)
	}
	transpose := flags&gomas.TRANS != 0
	nb := lb

	// partitioning start and direction
	if flags&gomas.TRANS != 0 {
		// from bottom-right to top-left to produce transpose sequence (C*Q.T)
		pAstart = util.PBOTTOMRIGHT
		pAdir = util.PTOPLEFT
		pCstart = util.PRIGHT
		pCdir = util.PLEFT
		pTstart = util.PRIGHT
		pTdir = util.PLEFT
		mb = imax(0, m(A)-n(A))
		cb = imax(0, n(C)-n(A))
		tb = n(A) % lb
		Aref = &ATL
	} else {
		// from top-left to bottom-right to produce normal sequence (C*Q)
		pAstart = util.PTOPLEFT
		pAdir = util.PBOTTOMRIGHT
		pCstart = util.PLEFT
		pCdir = util.PRIGHT
		pTstart = util.PLEFT
		pTdir = util.PRIGHT
		mb = 0
		cb = 0
		tb = nb
		Aref = &ABR
	}

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, mb, 0, pAstart)
	util.Partition1x2(
		&TL, &TR, T, 0, pTstart)
	util.Partition1x2(
		&CL, &CR, C, cb, pCstart)

	nb = tb
	for m(Aref) > 0 && n(Aref) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			&A10, &A11, nil,
			&A20, &A21, &A22, A, nb, pAdir)
		util.Repartition1x2to1x3(&TL,
			&T00, &T01, &T02, T, nb, pTdir)

		bsz = n(&A11)
		util.Repartition1x2to1x3(&CL,
			&C0, &C1, &C2, C, bsz, pCdir)
		// --------------------------------------------------------
		tr, tc := T01.Size()
		// compute: C*Q.T == C - C*Y*T.T*Y.T   transpose == true
		//          C*Q   == C - C*Y*T*Y.T     transpose == false
		Wrk.SubMatrix(W, 0, 0, m(&C1), bsz)
		if tr != tc {
			// this happens when n(A) not multiple of LB
			var Tmp cmat.FloatMatrix
			Tmp.SubMatrix(&T01, 0, 0, tc, tc)
			updateWithQTRight(&C1, &C2, &A11, &A21, &Tmp, &Wrk, transpose, conf)
		} else {
			updateWithQTRight(&C1, &C2, &A11, &A21, &T01, &Wrk, transpose, conf)
		}
		// --------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &A11, &A22, A, pAdir)
		util.Continue1x3to1x2(
			&TL, &TR, &T00, &T01, T, pTdir)
		util.Continue1x3to1x2(
			&CL, &CR, &C0, &C1, C, pCdir)

		nb = lb
	}
}
예제 #11
0
파일: lu.go 프로젝트: hrautila/gomas
// unblocked LU decomposition with pivots: FLAME LU variant 3; Left-looking
func unblockedLUpiv(A *cmat.FloatMatrix, p *Pivots, offset int, conf *gomas.Config) *gomas.Error {
	var err *gomas.Error = nil
	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, a01, A02, a10, a11, a12, A20, a21, A22 cmat.FloatMatrix
	var AL, AR, A0, a1, A2, aB1, AB0 cmat.FloatMatrix
	var pT, pB, p0, p1, p2 Pivots

	err = nil
	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, 0, 0, util.PTOPLEFT)
	util.Partition1x2(
		&AL, &AR, A, 0, util.PLEFT)
	partitionPivot2x1(
		&pT,
		&pB, *p, 0, util.PTOP)

	for m(&ATL) < m(A) && n(&ATL) < n(A) {
		util.Repartition2x2to3x3(&ATL,
			&A00, &a01, &A02,
			&a10, &a11, &a12,
			&A20, &a21, &A22 /**/, A, 1, util.PBOTTOMRIGHT)
		util.Repartition1x2to1x3(&AL,
			&A0, &a1, &A2 /**/, A, 1, util.PRIGHT)
		repartPivot2x1to3x1(&pT,
			&p0, &p1, &p2 /**/, *p, 1, util.PBOTTOM)

		// apply previously computed pivots on current column
		applyPivots(&a1, p0)

		// a01 = trilu(A00) \ a01 (TRSV)
		blasd.MVSolveTrm(&a01, &A00, 1.0, gomas.LOWER|gomas.UNIT)
		// a11 = a11 - a10 *a01
		aval := a11.Get(0, 0) - blasd.Dot(&a10, &a01)
		a11.Set(0, 0, aval)
		// a21 = a21 -A20*a01
		blasd.MVMult(&a21, &A20, &a01, -1.0, 1.0, gomas.NONE)

		// pivot index on current column [a11, a21].T
		aB1.Column(&ABR, 0)
		p1[0] = pivotIndex(&aB1)
		// pivots to current column
		applyPivots(&aB1, p1)

		// a21 = a21 / a11
		if aval == 0.0 {
			if err == nil {
				ij := m(&ATL) + p1[0] - 1
				err = gomas.NewError(gomas.ESINGULAR, "DecomposeLU", ij)
			}
		} else {
			blasd.InvScale(&a21, a11.Get(0, 0))
		}

		// apply pivots to previous columns
		AB0.SubMatrix(&ABL, 0, 0)
		applyPivots(&AB0, p1)
		// scale last pivots to origin matrix row numbers
		p1[0] += m(&ATL)

		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT)
		util.Continue1x3to1x2(
			&AL, &AR, &A0, &a1, A, util.PRIGHT)
		contPivot3x1to2x1(
			&pT,
			&pB, p0, p1, *p, util.PBOTTOM)
	}
	if n(&ATL) < n(A) {
		applyPivots(&ATR, *p)
		blasd.SolveTrm(&ATR, &ATL, 1.0, gomas.LEFT|gomas.UNIT|gomas.LOWER, conf)
	}
	return err
}
예제 #12
0
파일: lu.go 프로젝트: hrautila/gomas
// blocked LU decomposition with pivots: FLAME LU variant 3; left-looking version
func blockedLUpiv(A *cmat.FloatMatrix, p *Pivots, nb int, conf *gomas.Config) *gomas.Error {
	var err *gomas.Error = nil
	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, A01, A02, A10, A11, A12, A20, A21, A22 cmat.FloatMatrix
	var AL, AR, A0, A1, A2, AB1, AB0 cmat.FloatMatrix
	var pT, pB, p0, p1, p2 Pivots

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, 0, 0, util.PTOPLEFT)
	util.Partition1x2(
		&AL, &AR, A, 0, util.PLEFT)
	partitionPivot2x1(
		&pT,
		&pB, *p, 0, util.PTOP)

	for m(&ATL) < m(A) && n(&ATL) < n(A) {
		util.Repartition2x2to3x3(&ATL,
			&A00, &A01, &A02,
			&A10, &A11, &A12,
			&A20, &A21, &A22 /**/, A, nb, util.PBOTTOMRIGHT)
		util.Repartition1x2to1x3(&AL,
			&A0, &A1, &A2 /**/, A, nb, util.PRIGHT)
		repartPivot2x1to3x1(&pT,
			&p0, &p1, &p2 /**/, *p, nb, util.PBOTTOM)

		// apply previously computed pivots
		applyPivots(&A1, p0)

		// a01 = trilu(A00) \ a01 (TRSV)
		blasd.SolveTrm(&A01, &A00, 1.0, gomas.LOWER|gomas.UNIT)
		// A11 = A11 - A10*A01
		blasd.Mult(&A11, &A10, &A01, -1.0, 1.0, gomas.NONE)
		// A21 = A21 - A20*A01
		blasd.Mult(&A21, &A20, &A01, -1.0, 1.0, gomas.NONE)

		// LU_piv(AB1, p1)
		AB1.SubMatrix(&ABR, 0, 0, m(&ABR), n(&A11))
		unblockedLUpiv(&AB1, &p1, m(&ATL), conf)

		// apply pivots to previous columns
		AB0.SubMatrix(&ABL, 0, 0)
		applyPivots(&AB0, p1)
		// scale last pivots to origin matrix row numbers
		for k, _ := range p1 {
			p1[k] += m(&ATL)
		}

		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR /**/, &A00, &A11, &A22, A, util.PBOTTOMRIGHT)
		util.Continue1x3to1x2(
			&AL, &AR /**/, &A0, &A1, A, util.PRIGHT)
		contPivot3x1to2x1(
			&pT,
			&pB /**/, p0, p1, *p, util.PBOTTOM)
	}
	if n(&ATL) < n(A) {
		applyPivots(&ATR, *p)
		blasd.SolveTrm(&ATR, &ATL, 1.0, gomas.LEFT|gomas.UNIT|gomas.LOWER)
	}
	return err
}