Exemplo n.º 1
0
/*
 * Compute LDL^T factorization of real symmetric matrix.
 *
 * Computes of a real symmetric matrix A using Bunch-Kauffman pivoting method.
 * The form of factorization is
 *
 *    A = L*D*L.T  or A = U*D*U.T
 *
 * where L (or U) is product of permutation and unit lower (or upper) triangular matrix
 * and D is block diagonal symmetric matrix with 1x1 and 2x2 blocks.
 *
 * Arguments
 *  A     On entry, the N-by-N symmetric matrix A. If flags bit LOWER (or UPPER) is set then
 *        lower (or upper) triangular matrix and strictly upper (or lower) part is not
 *        accessed. On exit, the block diagonal matrix D and lower (or upper) triangular
 *        product matrix L (or U).
 *
 *  W     Workspace, size as returned by WorksizeBK().
 *
 *  ipiv  Pivot vector. On exit details of interchanges and the block structure of D. If
 *        ipiv[k] > 0 then D[k,k] is 1x1 and rows and columns k and ipiv[k]-1 were changed.
 *        If ipiv[k] == ipiv[k+1] < 0 then D[k,k] is 2x2. If A is lower then rows and
 *        columns k+1 and ipiv[k]-1  were changed. And if A is upper then rows and columns
 *        k and ipvk[k]-1 were changed.
 *
 *  flags Indicator bits, LOWER or UPPER.
 *
 *  confs Optional blocking configuration. If not provided then default blocking
 *        as returned by DefaultConf() is used.
 *
 *  Unblocked algorithm is used if blocking configuration LB is zero or if N < LB.
 *
 *  Compatible with lapack.SYTRF.
 */
func BKFactor(A, W *cmat.FloatMatrix, ipiv Pivots, flags int, confs ...*gomas.Config) *gomas.Error {
	var err *gomas.Error = nil
	conf := gomas.CurrentConf(confs...)

	for k, _ := range ipiv {
		ipiv[k] = 0
	}
	wsz := BKFactorWork(A, conf)
	if W.Len() < wsz {
		return gomas.NewError(gomas.EWORK, "DecomposeBK", wsz)
	}

	var Wrk cmat.FloatMatrix
	if n(A) < conf.LB || conf.LB == 0 {
		// make workspace rows(A)*2 matrix
		Wrk.SetBuf(m(A), 2, m(A), W.Data())
		if flags&gomas.LOWER != 0 {
			err, _ = unblkDecompBKLower(A, &Wrk, ipiv, conf)
		} else if flags&gomas.UPPER != 0 {
			err, _ = unblkDecompBKUpper(A, &Wrk, ipiv, conf)
		}
	} else {
		// make workspace rows(A)*(LB+1) matrix
		Wrk.SetBuf(m(A), conf.LB+1, m(A), W.Data())
		if flags&gomas.LOWER != 0 {
			err = blkDecompBKLower(A, &Wrk, &ipiv, conf)
		} else if flags&gomas.UPPER != 0 {
			err = blkDecompBKUpper(A, &Wrk, &ipiv, conf)
		}
	}
	return err
}
Exemplo n.º 2
0
Arquivo: rq.go Projeto: hrautila/gomas
/*
 * Compute RQ factorization of a M-by-N matrix A: A = R*Q
 *
 * Arguments:
 *  A    On entry, the M-by-N matrix A, M <= N. On exit, upper triangular matrix R
 *       and the orthogonal matrix Q as product of elementary reflectors.
 *
 * tau  On exit, the scalar factors of the elementary reflectors.
 *
 * W    Workspace, M-by-nb matrix used for work space in blocked invocations.
 *
 * conf The blocking configuration. If nil then default blocking configuration
 *      is used. Member conf.LB defines blocking size of blocked algorithms.
 *      If it is zero then unblocked algorithm is used.
 *
 * Returns:
 *      Error indicator.
 *
 * Additional information
 *
 *  Ortogonal matrix Q is product of elementary reflectors H(k)
 *
 *    Q = H(0)H(1),...,H(K-1), where K = min(M,N)
 *
 *  Elementary reflector H(k) is stored on row k of A right of the diagonal with
 *  implicit unit value on diagonal entry. The vector TAU holds scalar factors of
 *  the elementary reflectors.
 *
 *  Contents of matrix A after factorization is as follow:
 *
 *    ( v0 v0 r  r  r  r )  M=4, N=6
 *    ( v1 v1 v1 r  r  r )
 *    ( v2 v2 v2 v2 r  r )
 *    ( v3 v3 v3 v3 v3 r )
 *
 *  where l is element of L, vk is element of H(k).
 *
 *  RQFactor is compatible with lapack.DGERQF
 */
func RQFactor(A, tau, W *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error {
	var err *gomas.Error = nil
	conf := gomas.CurrentConf(confs...)

	// must have: M <= N
	if m(A) > n(A) {
		return gomas.NewError(gomas.ESIZE, "RQFactor")
	}

	wsmin := wsLQ(A, 0)
	if W == nil || W.Len() < wsmin {
		return gomas.NewError(gomas.EWORK, "RQFactor", wsmin)
	}
	lb := estimateLB(A, W.Len(), wsRQ)
	lb = imin(lb, conf.LB)
	if lb == 0 || m(A) <= lb {
		unblockedRQ(A, tau, W)
	} else {
		var Twork, Wrk cmat.FloatMatrix
		// block reflector T in first LB*LB elements in workspace
		// the rest, m(A)-LB*LB, is workspace for intermediate matrix operands
		Twork.SetBuf(lb, lb, lb, W.Data())
		Wrk.SetBuf(m(A)-lb, lb, m(A)-lb, W.Data()[Twork.Len():])
		blockedRQ(A, tau, &Twork, &Wrk, lb, conf)
	}
	return err
}
Exemplo n.º 3
0
/*
 * Generate the M by N matrix Q with orthogonal rows which
 * are defined as the first M rows of the product of K first elementary
 * reflectors.
 *
 * Arguments
 *   A     On entry, the elementary reflectors as returned by LQFactor().
 *         stored right of diagonal of the M by N matrix A.
 *         On exit, the orthogonal matrix Q
 *
 *   tau   Scalar coefficents of elementary reflectors
 *
 *   W     Workspace
 *
 *   K     The number of elementary reflector whose product define the matrix Q
 *
 *   conf  Optional blocking configuration.
 *
 * Compatible with lapackd.ORGLQ.
 */
func LQBuild(A, tau, W *cmat.FloatMatrix, K int, confs ...*gomas.Config) *gomas.Error {
	var err *gomas.Error = nil
	conf := gomas.CurrentConf(confs...)
	if K <= 0 || K > n(A) {
		return gomas.NewError(gomas.EVALUE, "LQBuild", K)
	}
	wsz := wsBuildLQ(A, 0)
	if W == nil || W.Len() < wsz {
		return gomas.NewError(gomas.EWORK, "LQBuild", wsz)
	}

	// adjust blocking factor for workspace size
	lb := estimateLB(A, W.Len(), wsBuildLQ)
	//lb = imin(lb, conf.LB)
	lb = conf.LB
	if lb == 0 || m(A) <= lb {
		unblkBuildLQ(A, tau, W, m(A)-K, n(A)-K, true)
	} else {
		var Twork, Wrk cmat.FloatMatrix
		Twork.SetBuf(lb, lb, lb, W.Data())
		Wrk.SetBuf(m(A)-lb, lb, m(A)-lb, W.Data()[Twork.Len():])
		blkBuildLQ(A, tau, &Twork, &Wrk, K, lb, conf)
	}
	return err
}
Exemplo n.º 4
0
Arquivo: ql.go Projeto: hrautila/gomas
/*
 * Compute QL factorization of a M-by-N matrix A: A = Q * L.
 *
 * Arguments:
 *  A    On entry, the M-by-N matrix A, M >= N. On exit, lower triangular matrix L
 *       and the orthogonal matrix Q as product of elementary reflectors.
 *
 * tau  On exit, the scalar factors of the elemenentary reflectors.
 *
 * W    Workspace, N-by-nb matrix used for work space in blocked invocations.
 *
 * conf The blocking configuration. If nil then default blocking configuration
 *      is used. Member conf.LB defines blocking size of blocked algorithms.
 *      If it is zero then unblocked algorithm is used.
 *
 * Returns:
 *      Error indicator.
 *
 * Additional information
 *
 *  Ortogonal matrix Q is product of elementary reflectors H(k)
 *
 *    Q = H(K-1)...H(1)H(0), where K = min(M,N)
 *
 *  Elementary reflector H(k) is stored on column k of A above the diagonal with
 *  implicit unit value on diagonal entry. The vector TAU holds scalar factors
 *  of the elementary reflectors.
 *
 *  Contents of matrix A after factorization is as follow:
 *
 *    ( v0 v1 v2 v3 )   for M=6, N=4
 *    ( v0 v1 v2 v3 )
 *    ( l  v1 v2 v3 )
 *    ( l  l  v2 v3 )
 *    ( l  l  l  v3 )
 *    ( l  l  l  l  )
 *
 *  where l is element of L, vk is element of H(k).
 *
 * DecomposeQL is compatible with lapack.DGEQLF
 */
func QLFactor(A, tau, W *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error {
	var err *gomas.Error = nil
	var tauh cmat.FloatMatrix
	conf := gomas.CurrentConf(confs...)

	if m(A) < n(A) {
		return gomas.NewError(gomas.ESIZE, "QLFactor")
	}
	wsmin := wsQL(A, 0)
	if W == nil || W.Len() < wsmin {
		return gomas.NewError(gomas.EWORK, "QLFactor", wsmin)
	}
	if tau.Len() < n(A) {
		return gomas.NewError(gomas.ESIZE, "QLFactor")
	}
	tauh.SubMatrix(tau, 0, 0, n(A), 1)
	lb := estimateLB(A, W.Len(), wsQL)
	lb = imin(lb, conf.LB)

	if lb == 0 || n(A) <= lb {
		unblockedQL(A, &tauh, W)
	} else {
		var Twork, Wrk cmat.FloatMatrix
		// block reflector T in first LB*LB elements in workspace
		// the rest, n(A)-LB*LB, is workspace for intermediate matrix operands
		Twork.SetBuf(conf.LB, conf.LB, -1, W.Data())
		Wrk.SetBuf(n(A)-conf.LB, conf.LB, -1, W.Data()[Twork.Len():])
		blockedQL(A, &tauh, &Twork, &Wrk, lb, conf)
	}
	return err
}
Exemplo n.º 5
0
/*
 * Compute QR factorization of a M-by-N matrix A using compact WY transformation: A = Q * R,
 * where Q = I - Y*T*Y.T, T is block reflector and Y holds elementary reflectors as lower
 * trapezoidal matrix saved below diagonal elements of the matrix A.
 *
 * Arguments:
 *  A    On entry, the M-by-N matrix A. On exit, the elements on and above
 *       the diagonal contain the min(M,N)-by-N upper trapezoidal matrix R.
 *       The elements below the diagonal with the matrix 'T', represent
 *       the ortogonal matrix Q as product of elementary reflectors.
 *
 * T     On exit, the K block reflectors which, together with trilu(A) represent
 *       the ortogonal matrix Q as Q = I - Y*T*Y.T where Y = trilu(A).
 *       K is ceiling(N/LB) where LB is blocking size from used blocking configuration.
 *       The matrix T is LB*N augmented matrix of K block reflectors,
 *       T = [T(0) T(1) .. T(K-1)].  Block reflector T(n) is LB*LB matrix, expect
 *       reflector T(K-1) that is IB*IB matrix  where IB = min(LB, K % LB)
 *
 * W     Workspace, required size returned by QRTFactorWork().
 *
 * conf  Optional blocking configuration. If not provided then default configuration
 *       is used.
 *
 * Returns:
 *      Error indicator.
 *
 * QRTFactor is compatible with lapack.DGEQRT
 */
func QRTFactor(A, T, W *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error {
	var err *gomas.Error = nil
	conf := gomas.CurrentConf(confs...)
	ok := false
	rsize := 0

	if m(A) < n(A) {
		return gomas.NewError(gomas.ESIZE, "QRTFactor")
	}
	wsz := QRTFactorWork(A, conf)
	if W == nil || W.Len() < wsz {
		return gomas.NewError(gomas.EWORK, "QRTFactor", wsz)
	}

	tr, tc := T.Size()
	if conf.LB == 0 || conf.LB > n(A) {
		ok = tr == tc && tr == n(A)
		rsize = n(A) * n(A)
	} else {
		ok = tr == conf.LB && tc == n(A)
		rsize = conf.LB * n(A)
	}
	if !ok {
		return gomas.NewError(gomas.ESMALL, "QRTFactor", rsize)
	}

	if conf.LB == 0 || n(A) <= conf.LB {
		err = unblockedQRT(A, T, W)
	} else {
		Wrk := cmat.MakeMatrix(n(A), conf.LB, W.Data())
		err = blockedQRT(A, T, Wrk, conf)
	}
	return err
}
Exemplo n.º 6
0
func axpby(Y, X *cmat.FloatMatrix, alpha, beta float64, N int) {
	var x, y C.mvec_t

	xr, _ := X.Size()
	x.md = (*C.double)(unsafe.Pointer(&X.Data()[0]))
	x.inc = C.int(1)
	if xr == 1 {
		x.inc = C.int(X.Stride())
	}
	yr, _ := Y.Size()
	y.md = (*C.double)(unsafe.Pointer(&Y.Data()[0]))
	y.inc = C.int(1)
	if yr == 1 {
		y.inc = C.int(Y.Stride())
	}
	if beta == 1.0 {
		C.__d_vec_axpy(
			(*C.mvec_t)(unsafe.Pointer(&y)),
			(*C.mvec_t)(unsafe.Pointer(&x)),
			C.double(alpha), C.int(N))
	} else {
		C.__d_vec_axpby(
			(*C.mvec_t)(unsafe.Pointer(&y)),
			(*C.mvec_t)(unsafe.Pointer(&x)),
			C.double(alpha), C.double(beta), C.int(N))
	}
	return
}
Exemplo n.º 7
0
/*
 * Reduce general matrix A to upper Hessenberg form H by similiarity
 * transformation H = Q.T*A*Q.
 *
 * Arguments:
 *  A    On entry, the general matrix A. On exit, the elements on and
 *       above the first subdiagonal contain the reduced matrix H.
 *       The elements below the first subdiagonal with the vector tau
 *       represent the ortogonal matrix A as product of elementary reflectors.
 *
 *  tau  On exit, the scalar factors of the elementary reflectors.
 *
 *  W    Workspace, as defined by HessReduceWork()
 *
 *  conf The blocking configration.
 *
 * HessReduce is compatible with lapack.DGEHRD.
 */
func HessReduce(A, tau, W *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error {
	var err *gomas.Error = nil
	conf := gomas.CurrentConf(confs...)

	wmin := m(A)
	wopt := HessReduceWork(A, conf)
	wsz := W.Len()
	if wsz < wmin {
		return gomas.NewError(gomas.EWORK, "ReduceHess", wmin)
	}
	// use blocked version if workspace big enough for blocksize 4
	lb := conf.LB
	if wsz < wopt {
		lb = estimateLB(A, wsz, wsHess)
	}
	if lb == 0 || n(A) <= lb {
		unblkHessGQvdG(A, tau, W, 0)
	} else {
		// blocked version
		var W0 cmat.FloatMatrix
		// shape workspace for blocked algorithm
		W0.SetBuf(m(A)+lb, lb, m(A)+lb, W.Data())
		blkHessGQvdG(A, tau, &W0, lb, conf)
	}
	return err
}
Exemplo n.º 8
0
/*
 * Multiply and replace C with Q*C or Q.T*C where Q is a real orthogonal matrix
 * defined as the product of k elementary reflectors and block reflector T
 *
 *    Q = H(1) H(2) . . . H(k)
 *
 * as returned by DecomposeQRT().
 *
 * Arguments:
 *  C     On entry, the M-by-N matrix C. On exit C is overwritten by Q*C or Q.T*C.
 *
 *  A     QR factorization as returned by QRTFactor() where the lower trapezoidal
 *        part holds the elementary reflectors.
 *
 *  T     The block reflector computed from elementary reflectors as returned by
 *        DecomposeQRT() or computed from elementary reflectors and scalar coefficients
 *        by BuildT()
 *
 *  W     Workspace, size as returned by QRTMultWork()
 *
 *  conf  Blocking configuration
 *
 *  flags Indicators. Valid indicators LEFT, RIGHT, TRANS, NOTRANS
 *
 * Preconditions:
 *   a.   cols(A) == cols(T),
 *          columns A define number of elementary reflector, must match order of block reflector.
 *   b.   if conf.LB == 0, cols(T) == rows(T)
 *          unblocked invocation, block reflector T is upper triangular
 *   c.   if conf.LB != 0, rows(T) == conf.LB
 *          blocked invocation, T is sequence of triangular block reflectors of order LB
 *   d.   if LEFT, rows(C) >= cols(A) && cols(C) >= rows(A)
 *
 *   e.   if RIGHT, cols(C) >= cols(A) && rows(C) >= rows(A)
 *
 * Compatible with lapack.DGEMQRT
 */
func QRTMult(C, A, T, W *cmat.FloatMatrix, flags int, confs ...*gomas.Config) *gomas.Error {
	var err *gomas.Error = nil
	conf := gomas.CurrentConf(confs...)

	wsz := QRTMultWork(C, T, flags, conf)
	if W == nil || W.Len() < wsz {
		return gomas.NewError(gomas.EWORK, "QRTMult", wsz)
	}
	ok := false
	switch flags & gomas.RIGHT {
	case gomas.RIGHT:
		ok = n(C) >= m(A)
	default:
		ok = m(C) >= n(A)
	}
	if !ok {
		return gomas.NewError(gomas.ESIZE, "QRTMult")
	}

	var Wrk cmat.FloatMatrix
	if flags&gomas.RIGHT != 0 {
		Wrk.SetBuf(m(C), conf.LB, m(C), W.Data())
		blockedMultQTRight(C, A, T, &Wrk, flags, conf)

	} else {
		Wrk.SetBuf(n(C), conf.LB, n(C), W.Data())
		blockedMultQTLeft(C, A, T, &Wrk, flags, conf)
	}
	return err
}
Exemplo n.º 9
0
/*
 * Reduce upper triangular matrix to tridiagonal.
 *
 * Elementary reflectors Q = H(n-1)...H(2)H(1) are stored on upper
 * triangular part of A. Reflector H(n-1) saved at column A(n) and
 * scalar multiplier to tau[n-1]. If parameter `tail` is true then
 * this function is used to reduce tail part of partially reduced
 * matrix and tau-vector partitioning is starting from last position.
 */
func unblkReduceTridiagUpper(A, tauq, W *cmat.FloatMatrix, tail bool) {
	var ATL, ABR cmat.FloatMatrix
	var A00, a01, a11, A22 cmat.FloatMatrix
	var tqT, tqB, tq0, tauq1, tq2 cmat.FloatMatrix
	var y21 cmat.FloatMatrix
	var v0 float64

	toff := 1
	if tail {
		toff = 0
	}
	util.Partition2x2(
		&ATL, nil,
		nil, &ABR, A, 0, 0, util.PBOTTOMRIGHT)
	util.Partition2x1(
		&tqT,
		&tqB, tauq, toff, util.PBOTTOM)

	for n(&ATL) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, &a01, nil,
			nil, &a11, nil,
			nil, nil, &A22, A, 1, util.PTOPLEFT)
		util.Repartition2x1to3x1(&tqT,
			&tq0,
			&tauq1,
			&tq2, tauq, 1, util.PTOP)
		// set temp vectors for this round
		y21.SetBuf(n(&A00), 1, n(&A00), W.Data())
		// ------------------------------------------------------

		// Compute householder to zero super-diagonal entries
		computeHouseholderRev(&a01, &tauq1)
		tauqv := tauq1.Get(0, 0)

		// set superdiagonal to unit
		v0 = a01.Get(-1, 0)
		a01.Set(-1, 0, 1.0)

		// y21 := A22*a12t
		blasd.MVMultSym(&y21, &A00, &a01, tauqv, 0.0, gomas.UPPER)
		// beta := tauq*a12t*y21
		beta := tauqv * blasd.Dot(&a01, &y21)
		// y21  := y21 - 0.5*beta*a125
		blasd.Axpy(&y21, &a01, -0.5*beta)
		// A22 := A22 - a12t*y21.T - y21*a12.T
		blasd.MVUpdate2Sym(&A00, &a01, &y21, -1.0, gomas.UPPER)

		// restore superdiagonal value
		a01.Set(-1, 0, v0)
		// ------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, nil,
			nil, &ABR, &A00, &a11, &A22, A, util.PTOPLEFT)
		util.Continue3x1to2x1(
			&tqT,
			&tqB, &tq0, &tauq1, tauq, util.PTOP)
	}
}
Exemplo n.º 10
0
func minvscale(A *cmat.FloatMatrix, alpha float64, M, N int) {
	var a C.mdata_t

	a.md = (*C.double)(unsafe.Pointer(&A.Data()[0]))
	a.step = C.int(A.Stride())
	C.__d_blk_invscale(
		(*C.mdata_t)(unsafe.Pointer(&a)), C.double(alpha), C.int(M), C.int(N))
	return
}
Exemplo n.º 11
0
/*
 * Tridiagonal reduction of LOWER triangular symmetric matrix, zero elements below 1st
 * subdiagonal:
 *
 *   A =  (1 - tau*u*u.t)*A*(1 - tau*u*u.T)
 *     =  (I - tau*( 0   0   )) (a11 a12) (I - tau*( 0  0   ))
 *        (        ( 0  u*u.t)) (a21 A22) (        ( 0 u*u.t))
 *
 *  a11, a12, a21 not affected
 *
 *  from LEFT:
 *    A22 = A22 - tau*u*u.T*A22
 *  from RIGHT:
 *    A22 = A22 - tau*A22*u.u.T
 *
 *  LEFT and RIGHT:
 *    A22   = A22 - tau*u*u.T*A22 - tau*(A22 - tau*u*u.T*A22)*u*u.T
 *          = A22 - tau*u*u.T*A22 - tau*A22*u*u.T + tau*tau*u*u.T*A22*u*u.T
 *    [x    = tau*A22*u (vector)]  (SYMV)
 *    A22   = A22 - u*x.T - x*u.T + tau*u*u.T*x*u.T
 *    [beta = tau*u.T*x (scalar)]  (DOT)
 *          = A22 - u*x.T - x*u.T + beta*u*u.T
 *          = A22 - u*(x - 0.5*beta*u).T - (x - 0.5*beta*u)*u.T
 *    [w    = x - 0.5*beta*u]      (AXPY)
 *          = A22 - u*w.T - w*u.T  (SYR2)
 *
 * Result of reduction for N = 5:
 *    ( d  .  .  . . )
 *    ( e  d  .  . . )
 *    ( v1 e  d  . . )
 *    ( v1 v2 e  d . )
 *    ( v1 v2 v3 e d )
 */
func unblkReduceTridiagLower(A, tauq, W *cmat.FloatMatrix) {
	var ATL, ABR cmat.FloatMatrix
	var A00, a11, a21, A22 cmat.FloatMatrix
	var tqT, tqB, tq0, tauq1, tq2 cmat.FloatMatrix
	var y21 cmat.FloatMatrix
	var v0 float64

	util.Partition2x2(
		&ATL, nil,
		nil, &ABR, A, 0, 0, util.PTOPLEFT)
	util.Partition2x1(
		&tqT,
		&tqB, tauq, 0, util.PTOP)

	for m(&ABR) > 0 && n(&ABR) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			nil, &a11, nil,
			nil, &a21, &A22, A, 1, util.PBOTTOMRIGHT)
		util.Repartition2x1to3x1(&tqT,
			&tq0,
			&tauq1,
			&tq2, tauq, 1, util.PBOTTOM)
		// set temp vectors for this round
		y21.SetBuf(n(&A22), 1, n(&A22), W.Data())
		// ------------------------------------------------------

		// Compute householder to zero subdiagonal entries
		computeHouseholderVec(&a21, &tauq1)
		tauqv := tauq1.Get(0, 0)

		// set subdiagonal to unit
		v0 = a21.Get(0, 0)
		a21.Set(0, 0, 1.0)

		// y21 := tauq*A22*a21
		blasd.MVMultSym(&y21, &A22, &a21, tauqv, 0.0, gomas.LOWER)
		// beta := tauq*a21.T*y21
		beta := tauqv * blasd.Dot(&a21, &y21)
		// y21  := y21 - 0.5*beta*a21
		blasd.Axpy(&y21, &a21, -0.5*beta)
		// A22 := A22 - a21*y21.T - y21*a21.T
		blasd.MVUpdate2Sym(&A22, &a21, &y21, -1.0, gomas.LOWER)

		// restore subdiagonal
		a21.Set(0, 0, v0)
		// ------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, nil,
			nil, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT)
		util.Continue3x1to2x1(
			&tqT,
			&tqB, &tq0, &tauq1, tauq, util.PBOTTOM)
	}
}
Exemplo n.º 12
0
func vinvscal(X *cmat.FloatMatrix, alpha float64, N int) {
	var x C.mvec_t

	xr, _ := X.Size()
	x.md = (*C.double)(unsafe.Pointer(&X.Data()[0]))
	x.inc = C.int(1)
	if xr == 1 {
		x.inc = C.int(X.Stride())
	}
	C.__d_vec_invscal(
		(*C.mvec_t)(unsafe.Pointer(&x)), C.double(alpha), C.int(N))
	return
}
Exemplo n.º 13
0
func plus(Ac, Bc *cmat.FloatMatrix, alpha, beta float64, bits, S, L, R, E int) {
	var Am, Bm C.mdata_t

	Am.md = (*C.double)(unsafe.Pointer(&Ac.Data()[0]))
	Am.step = C.int(Ac.Stride())
	Bm.md = (*C.double)(unsafe.Pointer(&Bc.Data()[0]))
	Bm.step = C.int(Bc.Stride())

	C.__d_scale_plus(
		(*C.mdata_t)(unsafe.Pointer(&Am)),
		(*C.mdata_t)(unsafe.Pointer(&Bm)),
		C.double(alpha), C.double(beta), C.int(bits),
		C.int(S), C.int(L), C.int(R), C.int(E))
}
Exemplo n.º 14
0
func sum(X *cmat.FloatMatrix, N int) float64 {
	var x C.mvec_t
	var dc C.double

	xr, _ := X.Size()
	x.md = (*C.double)(unsafe.Pointer(&X.Data()[0]))
	x.inc = C.int(1)
	if xr == 1 {
		x.inc = C.int(X.Stride())
	}
	dc = C.__d_vec_sum_recursive(
		(*C.mvec_t)(unsafe.Pointer(&x)), C.int(N))
	return float64(dc)
}
Exemplo n.º 15
0
func iamax(X *cmat.FloatMatrix, N int) int {
	var x C.mvec_t
	var ix C.int

	xr, _ := X.Size()
	x.md = (*C.double)(unsafe.Pointer(&X.Data()[0]))
	x.inc = C.int(1)
	if xr == 1 {
		x.inc = C.int(X.Stride())
	}
	ix = C.__d_vec_iamax(
		(*C.mvec_t)(unsafe.Pointer(&x)), C.int(N))
	return int(ix)
}
Exemplo n.º 16
0
func mtranspose(A, B *cmat.FloatMatrix, M, N int) {
	var a, b C.mdata_t
	if M == 0 || N == 0 {
		return
	}

	a.md = (*C.double)(unsafe.Pointer(&A.Data()[0]))
	a.step = C.int(A.Stride())
	b.md = (*C.double)(unsafe.Pointer(&B.Data()[0]))
	b.step = C.int(B.Stride())
	C.__d_blk_transpose(
		(*C.mdata_t)(unsafe.Pointer(&a)),
		(*C.mdata_t)(unsafe.Pointer(&b)), C.int(M), C.int(N))
	return
}
Exemplo n.º 17
0
func EigenSym(D, A, W *cmat.FloatMatrix, bits int, confs ...*gomas.Config) (err *gomas.Error) {

	var sD, sE, E, tau, Wred cmat.FloatMatrix
	var vv *cmat.FloatMatrix

	err = nil
	vv = nil
	conf := gomas.CurrentConf(confs...)

	if m(A) != n(A) || D.Len() != m(A) {
		err = gomas.NewError(gomas.ESIZE, "EigenSym")
		return
	}
	if bits&gomas.WANTV != 0 && W.Len() < 3*n(A) {
		err = gomas.NewError(gomas.EWORK, "EigenSym")
		return
	}

	if bits&(gomas.LOWER|gomas.UPPER) == 0 {
		bits = bits | gomas.LOWER
	}
	ioff := 1
	if bits&gomas.LOWER != 0 {
		ioff = -1
	}
	E.SetBuf(n(A)-1, 1, n(A)-1, W.Data())
	tau.SetBuf(n(A), 1, n(A), W.Data()[n(A)-1:])
	wrl := W.Len() - 2*n(A) - 1
	Wred.SetBuf(wrl, 1, wrl, W.Data()[2*n(A)-1:])

	// reduce to tridiagonal
	if err = TRDReduce(A, &tau, &Wred, bits, conf); err != nil {
		err.Update("EigenSym")
		return
	}
	sD.Diag(A)
	sE.Diag(A, ioff)
	blasd.Copy(D, &sD)
	blasd.Copy(&E, &sE)

	if bits&gomas.WANTV != 0 {
		if err = TRDBuild(A, &tau, &Wred, n(A), bits, conf); err != nil {
			err.Update("EigenSym")
			return
		}
		vv = A
	}

	// resize workspace
	wrl = W.Len() - n(A) - 1
	Wred.SetBuf(wrl, 1, wrl, W.Data()[n(A)-1:])

	if err = TRDEigen(D, &E, vv, &Wred, bits, conf); err != nil {
		err.Update("EigenSym")
		return
	}
	return
}
Exemplo n.º 18
0
func syrk(Cc, Ac *cmat.FloatMatrix, alpha, beta float64, bits, P, S, E int, conf *gomas.Config) error {
	var Am, Cm C.mdata_t

	Am.md = (*C.double)(unsafe.Pointer(&Ac.Data()[0]))
	Am.step = C.int(Ac.Stride())
	Cm.md = (*C.double)(unsafe.Pointer(&Cc.Data()[0]))
	Cm.step = C.int(Cc.Stride())

	C.__d_rank_blk(
		(*C.mdata_t)(unsafe.Pointer(&Cm)),
		(*C.mdata_t)(unsafe.Pointer(&Am)),
		C.double(alpha), C.double(beta), C.int(bits),
		C.int(P), C.int(S), C.int(E),
		C.int(conf.KB), C.int(conf.NB), C.int(conf.MB))
	return nil
}
Exemplo n.º 19
0
func trsm(Bc, Ac *cmat.FloatMatrix, alpha float64, bits, N, S, E int, conf *gomas.Config) error {
	var Am, Bm C.mdata_t

	Am.md = (*C.double)(unsafe.Pointer(&Ac.Data()[0]))
	Am.step = C.int(Ac.Stride())
	Bm.md = (*C.double)(unsafe.Pointer(&Bc.Data()[0]))
	Bm.step = C.int(Bc.Stride())

	C.__d_solve_blocked(
		(*C.mdata_t)(unsafe.Pointer(&Bm)),
		(*C.mdata_t)(unsafe.Pointer(&Am)),
		C.double(alpha), C.int(bits),
		C.int(N), C.int(S), C.int(E),
		C.int(conf.KB), C.int(conf.NB), C.int(conf.MB))

	return nil
}
Exemplo n.º 20
0
func trmv(X, A *cmat.FloatMatrix, alpha float64, bits, N int) error {
	var Am C.mdata_t
	var Xm C.mvec_t

	xr, _ := X.Size()
	Am.md = (*C.double)(unsafe.Pointer(&A.Data()[0]))
	Am.step = C.int(A.Stride())
	Xm.md = (*C.double)(unsafe.Pointer(&X.Data()[0]))
	Xm.inc = C.int(1)
	// if row vectors, change increment
	if xr == 1 {
		Xm.inc = C.int(X.Stride())
	}
	C.__d_trmv_unb(
		(*C.mvec_t)(unsafe.Pointer(&Xm)),
		(*C.mdata_t)(unsafe.Pointer(&Am)),
		C.double(alpha), C.int(bits), C.int(N))
	return nil
}
Exemplo n.º 21
0
func vswap(X, Y *cmat.FloatMatrix, N int) {
	var x, y C.mvec_t

	xr, _ := X.Size()
	x.md = (*C.double)(unsafe.Pointer(&X.Data()[0]))
	x.inc = C.int(1)
	if xr == 1 {
		x.inc = C.int(X.Stride())
	}
	yr, _ := Y.Size()
	y.md = (*C.double)(unsafe.Pointer(&Y.Data()[0]))
	y.inc = C.int(1)
	if yr == 1 {
		y.inc = C.int(Y.Stride())
	}
	C.__d_vec_swap(
		(*C.mvec_t)(unsafe.Pointer(&x)),
		(*C.mvec_t)(unsafe.Pointer(&y)), C.int(N))
	return
}
Exemplo n.º 22
0
func dot(X, Y *cmat.FloatMatrix, N int) float64 {
	var x, y C.mvec_t
	var dc C.double

	xr, _ := X.Size()
	x.md = (*C.double)(unsafe.Pointer(&X.Data()[0]))
	x.inc = C.int(1)
	if xr == 1 {
		x.inc = C.int(X.Stride())
	}
	yr, _ := Y.Size()
	y.md = (*C.double)(unsafe.Pointer(&Y.Data()[0]))
	y.inc = C.int(1)
	if yr == 1 {
		y.inc = C.int(Y.Stride())
	}
	dc = C.__d_vec_dot_recursive(
		(*C.mvec_t)(unsafe.Pointer(&x)),
		(*C.mvec_t)(unsafe.Pointer(&y)), C.int(N))
	return float64(dc)
}
Exemplo n.º 23
0
func gemv(Y, A, X *cmat.FloatMatrix, alpha, beta float64, bits, S, L, R, E int) {
	var Am C.mdata_t
	var Xm, Ym C.mvec_t

	xr, _ := X.Size()
	yr, _ := Y.Size()
	Am.md = (*C.double)(unsafe.Pointer(&A.Data()[0]))
	Am.step = C.int(A.Stride())
	Xm.md = (*C.double)(unsafe.Pointer(&X.Data()[0]))
	Ym.md = (*C.double)(unsafe.Pointer(&Y.Data()[0]))
	Ym.inc = C.int(1)
	Xm.inc = C.int(1)

	// if row vectors, change increment
	if xr == 1 {
		Xm.inc = C.int(X.Stride())
	}
	if yr == 1 {
		Ym.inc = C.int(Y.Stride())
	}

	C.__d_gemv_unb(
		(*C.mvec_t)(unsafe.Pointer(&Ym)),
		(*C.mdata_t)(unsafe.Pointer(&Am)),
		(*C.mvec_t)(unsafe.Pointer(&Xm)),
		C.double(alpha),
		/*C.double(beta),*/
		C.int(bits),
		C.int(S), C.int(L), C.int(R), C.int(E))
}
Exemplo n.º 24
0
func updtrmv(A, X, Y *cmat.FloatMatrix, alpha float64, bits, N, M int) error {
	var Am C.mdata_t
	var Xm, Ym C.mvec_t

	xr, _ := X.Size()
	yr, _ := Y.Size()
	Am.md = (*C.double)(unsafe.Pointer(&A.Data()[0]))
	Am.step = C.int(A.Stride())
	Xm.md = (*C.double)(unsafe.Pointer(&X.Data()[0]))
	Ym.md = (*C.double)(unsafe.Pointer(&Y.Data()[0]))
	Ym.inc = C.int(1)
	Xm.inc = C.int(1)

	// if row vectors, change increment
	if xr == 1 {
		Xm.inc = C.int(X.Stride())
	}
	if yr == 1 {
		Ym.inc = C.int(Y.Stride())
	}

	C.__d_update_trmv_unb(
		(*C.mdata_t)(unsafe.Pointer(&Am)),
		(*C.mvec_t)(unsafe.Pointer(&Xm)),
		(*C.mvec_t)(unsafe.Pointer(&Ym)),
		C.double(alpha), C.int(bits), C.int(N), C.int(M))
	return nil
}
Exemplo n.º 25
0
/*
 * Reduce symmetric matrix to tridiagonal form by similiarity transformation A = Q*T*Q.T
 *
 * Arguments
 *  A      On entry, symmetric matrix with elemets stored in upper (lower) triangular
 *         part. On exit, diagonal and first super (sub) diagonals hold matrix T. The upper
 *         (lower) triangular part above (below) first super(sub)diagonal is used to store
 *         orthogonal matrix Q.
 *
 *  tau    Scalar coefficients of elementary reflectors.
 *
 *  W      Workspace
 *
 *  flags  LOWER or UPPER
 *
 *  confs  Optional blocking configuration
 *
 * If LOWER, then the matrix Q is represented as product of elementary reflectors
 *
 *   Q = H(1)H(2)...H(n-1).
 *
 * If UPPER, then the matrix Q is represented as product
 *
 *   Q = H(n-1)...H(2)H(1).
 *
 * Each H(k) has form I - tau*v*v.T.
 *
 * The contents of A on exit is as follow for N = 5.
 *
 *  LOWER                    UPPER
 *   ( d  .  .  .  . )         ( d  e  v1 v2 v3 )
 *   ( e  d  .  .  . )         ( .  d  e  v2 v3 )
 *   ( v1 e  d  .  . )         ( .  .  d  e  v3 )
 *   ( v1 v2 e  d  . )         ( .  .  .  d  e  )
 *   ( v1 v2 v3 e  d )         ( .  .  .  .  d  )
 */
func TRDReduce(A, tau, W *cmat.FloatMatrix, flags int, confs ...*gomas.Config) *gomas.Error {
	var err *gomas.Error = nil
	var Y cmat.FloatMatrix

	// default to lower triangular if uplo not defined
	if flags&(gomas.LOWER|gomas.UPPER) == 0 {
		flags = flags | gomas.LOWER
	}
	ok := m(A) == n(A) && tau.Len() >= n(A)
	if !ok {
		return gomas.NewError(gomas.ESIZE, "ReduceTridiag")
	}

	conf := gomas.CurrentConf(confs...)
	lb := conf.LB
	wsmin := wsTridiag(A, 0)
	if W.Len() < wsmin {
		return gomas.NewError(gomas.EWORK, "ReduceTridiag", wsmin)
	}

	if flags&gomas.LOWER != 0 {
		if lb == 0 || n(A)-1 < lb {
			unblkReduceTridiagLower(A, tau, W)
		} else {
			Y.SetBuf(m(A), lb, m(A), W.Data())
			blkReduceTridiagLower(A, tau, &Y, W, lb, conf)
		}
	} else {
		if lb == 0 || n(A)-1 < lb {
			unblkReduceTridiagUpper(A, tau, W, false)
		} else {
			Y.SetBuf(m(A), lb, m(A), W.Data())
			blkReduceTridiagUpper(A, tau, &Y, W, lb, conf)
		}
	}
	return err
}
Exemplo n.º 26
0
func gemm(Cc, Ac, Bc *cmat.FloatMatrix, alpha, beta float64, bits, P, S, L, R, E int, conf *gomas.Config) {
	var Am, Cm, Bm C.mdata_t

	Am.md = (*C.double)(unsafe.Pointer(&Ac.Data()[0]))
	Am.step = C.int(Ac.Stride())
	Bm.md = (*C.double)(unsafe.Pointer(&Bc.Data()[0]))
	Bm.step = C.int(Bc.Stride())
	Cm.md = (*C.double)(unsafe.Pointer(&Cc.Data()[0]))
	Cm.step = C.int(Cc.Stride())

	C.__d_gemm_inner(
		(*C.mdata_t)(unsafe.Pointer(&Cm)),
		(*C.mdata_t)(unsafe.Pointer(&Am)),
		(*C.mdata_t)(unsafe.Pointer(&Bm)),
		C.double(alpha), C.double(beta), C.int(bits),
		C.int(P), C.int(S), C.int(L), C.int(R), C.int(E),
		C.int(conf.KB), C.int(conf.NB), C.int(conf.MB))
}
Exemplo n.º 27
0
/*
 * Blocked version of Hessenberg reduction algorithm as presented in (1). This
 * version uses compact-WY transformation.
 *
 * Some notes:
 *
 * Elementary reflectors stored in [A11; A21].T are not on diagonal of A11. Update of
 * a block aligned with A11; A21 is as follow
 *
 * 1. Update from left Q(k)*C:
 *                                         c0   0                            c0
 * (I - Y*T*Y.T).T*C = C - Y*(C.T*Y)*T.T = C1 - Y1 * (C1.T.Y1+C2.T*Y2)*T.T = C1-Y1*W
 *                                         C2   Y2                           C2-Y2*W
 *
 * where W = (C1.T*Y1+C2.T*Y2)*T.T and first row of C is not affected by update
 *
 * 2. Update from right C*Q(k):
 *                                       0
 * C - C*Y*T*Y.T = c0;C1;C2 - c0;C1;C2 * Y1 *T*(0;Y1;Y2) = c0; C1-W*Y1; C2-W*Y2
 *                                       Y2
 * where  W = (C1*Y1 + C2*Y2)*T and first column of C is not affected
 *
 */
func blkHessGQvdG(A, Tvec, W *cmat.FloatMatrix, nb int, conf *gomas.Config) *gomas.Error {
	var ATL, ATR, ABL, ABR cmat.FloatMatrix
	var A00, A11, A12, A21, A22, A2 cmat.FloatMatrix
	var tT, tB, td cmat.FloatMatrix
	var t0, t1, t2, T cmat.FloatMatrix
	var V, VT, VB /*V0, V1, V2,*/, Y1, Y2, W0 cmat.FloatMatrix

	//fmt.Printf("blkHessGQvdG...\n")
	T.SubMatrix(W, 0, 0, conf.LB, conf.LB)
	V.SubMatrix(W, conf.LB, 0, m(A), conf.LB)
	td.Diag(&T)

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, 0, 0, util.PTOPLEFT)
	util.Partition2x1(
		&tT,
		&tB, Tvec, 0, util.PTOP)

	for m(&ABR) > nb+1 && n(&ABR) > nb {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			nil, &A11, &A12,
			nil, &A21, &A22, A, nb, util.PBOTTOMRIGHT)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&t1,
			&t2, Tvec, nb, util.PBOTTOM)

		util.Partition2x1(
			&VT,
			&VB, &V, m(&ATL), util.PTOP)
		// ------------------------------------------------------

		unblkBuildHessGQvdG(&ABR, &T, &VB, nil)
		blasd.Copy(&t1, &td)

		// m(Y) == m(ABR)-1, n(Y) == n(A11)
		Y1.SubMatrix(&ABR, 1, 0, n(&A11), n(&A11))
		Y2.SubMatrix(&ABR, 1+n(&A11), 0, m(&A21)-1, n(&A11))

		// [A01; A02] == ATR := ATR*(I - Y*T*Y.T)
		updateHessRightWY(&ATR, &Y1, &Y2, &T, &VT, conf)

		// A2 = [A12; A22].T
		util.Merge2x1(&A2, &A12, &A22)

		// A2 := A2 - VB*T*A21.T
		be := A21.Get(0, -1)
		A21.Set(0, -1, 1.0)
		blasd.MultTrm(&VB, &T, 1.0, gomas.UPPER|gomas.RIGHT)
		blasd.Mult(&A2, &VB, &A21, -1.0, 1.0, gomas.TRANSB, conf)
		A21.Set(0, -1, be)

		// A2 := (I - Y*T*Y.T).T * A2
		W0.SubMatrix(&V, 0, 0, n(&A2), n(&Y2))
		updateHessLeftWY(&A2, &Y1, &Y2, &T, &W0, conf)

		// ------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &A11, &A22, A, util.PBOTTOMRIGHT)
		util.Continue3x1to2x1(
			&tT,
			&tB, &t0, &t1, Tvec, util.PBOTTOM)
	}

	if m(&ABR) > 1 {
		// do the rest with unblocked
		util.Merge2x1(&A2, &ATR, &ABR)
		W0.SetBuf(m(A), 1, m(A), W.Data())
		unblkHessGQvdG(&A2, &tB, &W0, m(&ATR))
	}
	return nil
}
Exemplo n.º 28
0
/*
 * Blocked version for computing C = C*Q and C = C*Q.T from elementary reflectors
 * and scalar coefficients.
 *
 * Elementary reflectors and scalar coefficients are used to build block reflector T.
 * Matrix C is updated by applying block reflector T using compact WY algorithm.
 */
func blockedMultQRight(C, A, tau, W *cmat.FloatMatrix, flags, nb int, conf *gomas.Config) {
	var ATL, ATR, ABL, ABR, AL cmat.FloatMatrix
	var A00, A10, A11, A20, A21, A22 cmat.FloatMatrix
	var CL, CR, C0, C1, C2 cmat.FloatMatrix
	var tT, tB cmat.FloatMatrix
	var t0, tau1, t2 cmat.FloatMatrix
	var W0, Wrk, Tw, Twork cmat.FloatMatrix

	var Aref *cmat.FloatMatrix
	var pAdir, pAstart, pDir, pStart, pCstart, pCdir util.Direction
	var bsz, cb, mb int

	// partitioning start and direction
	if flags&gomas.TRANS != 0 {
		// from bottom-right to top-left to produce transpose sequence (C*Q.T)
		pAstart = util.PBOTTOMRIGHT
		pAdir = util.PTOPLEFT
		pStart = util.PBOTTOM
		pDir = util.PTOP
		pCstart = util.PRIGHT
		pCdir = util.PLEFT
		mb = imax(0, m(A)-n(A))
		cb = n(C) - n(A)
		Aref = &ATL
	} else {
		// from top-left to bottom-right to produce normal sequence (C*Q)
		pAstart = util.PTOPLEFT
		pAdir = util.PBOTTOMRIGHT
		pStart = util.PTOP
		pDir = util.PBOTTOM
		pCstart = util.PLEFT
		pCdir = util.PRIGHT
		mb = 0
		cb = 0
		Aref = &ABR
	}

	// intermediate reflector at start of workspace
	Twork.SetBuf(nb, nb, nb, W.Data())
	W0.SetBuf(m(C), nb, m(C), W.Data()[Twork.Len():])

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, mb, 0, pAstart)
	util.Partition1x2(
		&CL, &CR, C, cb, pCstart)
	util.Partition2x1(
		&tT,
		&tB, tau, 0, pStart)

	transpose := flags&gomas.TRANS != 0

	for m(Aref) > 0 && n(Aref) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			&A10, &A11, nil,
			&A20, &A21, &A22, A, nb, pAdir)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&tau1,
			&t2, tau, nb, pDir)

		bsz = n(&A11) // C1 block size must match A11
		util.Repartition1x2to1x3(&CL,
			&C0, &C1, &C2, C, bsz, pCdir)
		// --------------------------------------------------------
		// clear & build block reflector from current block
		util.Merge2x1(&AL, &A11, &A21)
		Tw.SubMatrix(&Twork, 0, 0, bsz, bsz)
		blasd.Scale(&Tw, 0.0)
		unblkQRBlockReflector(&Tw, &AL, &tau1)

		// compute: C*Q.T == C - C*(Y*T*Y.T).T = C - C*Y*T.T*Y.T
		//          C*Q   == C - C*Y*T*Y.T
		Wrk.SubMatrix(&W0, 0, 0, m(&C1), bsz)
		updateWithQTRight(&C1, &C2, &A11, &A21, &Tw, &Wrk, transpose, conf)
		// --------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &A11, &A22, A, pAdir)
		util.Continue1x3to1x2(
			&CL, &CR, &C0, &C1, C, pCdir)
		util.Continue3x1to2x1(
			&tT,
			&tB, &t0, &tau1, tau, pDir)
	}

}
Exemplo n.º 29
0
/*
 * Blocked version for computing C = Q*C and C = Q.T*C from elementary reflectors
 * and scalar coefficients.
 *
 * Elementary reflectors and scalar coefficients are used to build block reflector T.
 * Matrix C is updated by applying block reflector T using compact WY algorithm.
 */
func blockedMultQLeft(C, A, tau, W *cmat.FloatMatrix, flags, nb int, conf *gomas.Config) {
	var ATL, ATR, ABL, ABR, AL cmat.FloatMatrix
	var A00, A10, A11, A20, A21, A22 cmat.FloatMatrix
	var CT, CB, C0, C1, C2 cmat.FloatMatrix
	var tT, tB cmat.FloatMatrix
	var t0, tau1, t2 cmat.FloatMatrix
	var Wrk, W0, Tw, Twork cmat.FloatMatrix

	var Aref *cmat.FloatMatrix
	var pAdir, pAstart, pDir, pStart util.Direction
	var bsz, mb int

	// partitioning start and direction
	if flags&gomas.TRANS != 0 || nb == n(A) {
		// from top-left to bottom-right to produce transposed sequence (Q.T*C)
		pAstart = util.PTOPLEFT
		pAdir = util.PBOTTOMRIGHT
		pStart = util.PTOP
		pDir = util.PBOTTOM
		mb = 0
		Aref = &ABR
	} else {
		// from bottom-right to top-left to produce normal sequence (Q*C)
		pAstart = util.PBOTTOMRIGHT
		pAdir = util.PTOPLEFT
		pStart = util.PBOTTOM
		pDir = util.PTOP
		mb = imax(0, m(A)-n(A))
		Aref = &ATL
	}

	util.Partition2x2(
		&ATL, &ATR,
		&ABL, &ABR, A, mb, 0, pAstart)
	util.Partition2x1(
		&CT,
		&CB, C, mb, pStart)
	util.Partition2x1(
		&tT,
		&tB, tau, 0, pStart)

	transpose := flags&gomas.TRANS != 0

	// intermediate reflector at start of workspace
	Twork.SetBuf(nb, nb, nb, W.Data())
	W0.SetBuf(n(C), nb, n(C), W.Data()[Twork.Len():])

	for m(Aref) > 0 && n(Aref) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, nil, nil,
			&A10, &A11, nil,
			&A20, &A21, &A22, A, nb, pAdir)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&tau1,
			&t2, tau, nb, pDir)
		bsz = n(&A11)
		util.Repartition2x1to3x1(&CT,
			&C0,
			&C1,
			&C2, C, bsz, pDir)
		// --------------------------------------------------------
		// clear & build block reflector from current block
		util.Merge2x1(&AL, &A11, &A21)
		Tw.SubMatrix(&Twork, 0, 0, bsz, bsz)
		blasd.Scale(&Tw, 0.0)
		unblkQRBlockReflector(&Tw, &AL, &tau1)

		// compute: Q*T.C == C - Y*(C.T*Y*T).T  transpose == true
		//          Q*C   == C - C*Y*T*Y.T      transpose == false
		Wrk.SubMatrix(&W0, 0, 0, n(&C1), bsz)
		updateWithQTLeft(&C1, &C2, &A11, &A21, &Tw, &Wrk, transpose, conf)
		// --------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, &ATR,
			&ABL, &ABR, &A00, &A11, &A22, A, pAdir)
		util.Continue3x1to2x1(
			&CT,
			&CB, &C0, &C1, C, pDir)
		util.Continue3x1to2x1(
			&tT,
			&tB, &t0, &tau1, tau, pDir)
	}

}
Exemplo n.º 30
0
func blkMultLeftQL(C, A, tau, W *cmat.FloatMatrix, flags, lb int, conf *gomas.Config) {
	var ATL /*ATR, ABL,*/, ABR, AL cmat.FloatMatrix
	var A00, A01, A11, A22 cmat.FloatMatrix
	var CT, CB, C0, C1, C2 cmat.FloatMatrix
	var tT, tB cmat.FloatMatrix
	var t0, tau1, t2 cmat.FloatMatrix
	var T0, T, W0, Wrk cmat.FloatMatrix

	var Aref *cmat.FloatMatrix
	var pAdir, pAstart, pDir, pStart util.Direction
	var mb, tb, nb int

	// partitioning start and direction
	if flags&gomas.TRANS != 0 {
		// A from bottom-right to top-left to produce transposed sequence (Q.T*C)
		pAstart = util.PBOTTOMRIGHT
		pAdir = util.PTOPLEFT
		pStart = util.PBOTTOM
		pDir = util.PTOP
		mb = 0
		tb = 0
		nb = 0
		Aref = &ATL
	} else {
		// from top-left to bottom-right to produce normal sequence (Q*C)
		pAstart = util.PTOPLEFT
		pAdir = util.PBOTTOMRIGHT
		pStart = util.PTOP
		pDir = util.PBOTTOM
		mb = imax(0, m(A)-n(A))
		nb = imax(0, n(A)-m(A))
		tb = imax(0, tau.Len()-n(A))
		Aref = &ABR
	}

	util.Partition2x2(
		&ATL, nil,
		nil, &ABR, A, mb, nb, pAstart)
	util.Partition2x1(
		&CT,
		&CB, C, mb, pStart)
	util.Partition2x1(
		&tT,
		&tB, tau, tb, pStart)

	transpose := flags&gomas.TRANS != 0
	// divide workspace for block reflector and temporart space
	T0.SetBuf(lb, lb, lb, W.Data())
	W0.SetBuf(n(C), lb, n(C), W.Data()[T0.Len():])

	for n(Aref) > 0 {
		util.Repartition2x2to3x3(&ATL,
			&A00, &A01, nil,
			nil, &A11, nil,
			nil, nil, &A22, A, lb, pAdir)
		util.Repartition2x1to3x1(&tT,
			&t0,
			&tau1,
			&t2, tau, lb, pDir)
		bsz := n(&A11)
		util.Repartition2x1to3x1(&CT,
			&C0,
			&C1,
			&C2, C, bsz, pDir)
		// --------------------------------------------------------
		// build block reflector for current block
		util.Merge2x1(&AL, &A01, &A11)
		T.SubMatrix(&T0, 0, 0, bsz, bsz)
		blasd.Scale(&T, 0.0)
		unblkQLBlockReflector(&T, &AL, &tau1)

		// update with (I - Y*T*Y.T) or (I - Y*T*Y.T).T
		Wrk.SubMatrix(&W0, 0, 0, n(&C1), bsz)
		updateQLLeft(&C1, &C0, &A11, &A01, &T, &Wrk, transpose, conf)
		// --------------------------------------------------------
		util.Continue3x3to2x2(
			&ATL, nil,
			nil, &ABR, &A00, &A11, &A22, A, pAdir)
		util.Continue3x1to2x1(
			&CT,
			&CB, &C0, &C1, C, pDir)
		util.Continue3x1to2x1(
			&tT,
			&tB, &t0, &tau1, tau, pDir)
	}
}