/* * Compute LDL^T factorization of real symmetric matrix. * * Computes of a real symmetric matrix A using Bunch-Kauffman pivoting method. * The form of factorization is * * A = L*D*L.T or A = U*D*U.T * * where L (or U) is product of permutation and unit lower (or upper) triangular matrix * and D is block diagonal symmetric matrix with 1x1 and 2x2 blocks. * * Arguments * A On entry, the N-by-N symmetric matrix A. If flags bit LOWER (or UPPER) is set then * lower (or upper) triangular matrix and strictly upper (or lower) part is not * accessed. On exit, the block diagonal matrix D and lower (or upper) triangular * product matrix L (or U). * * W Workspace, size as returned by WorksizeBK(). * * ipiv Pivot vector. On exit details of interchanges and the block structure of D. If * ipiv[k] > 0 then D[k,k] is 1x1 and rows and columns k and ipiv[k]-1 were changed. * If ipiv[k] == ipiv[k+1] < 0 then D[k,k] is 2x2. If A is lower then rows and * columns k+1 and ipiv[k]-1 were changed. And if A is upper then rows and columns * k and ipvk[k]-1 were changed. * * flags Indicator bits, LOWER or UPPER. * * confs Optional blocking configuration. If not provided then default blocking * as returned by DefaultConf() is used. * * Unblocked algorithm is used if blocking configuration LB is zero or if N < LB. * * Compatible with lapack.SYTRF. */ func BKFactor(A, W *cmat.FloatMatrix, ipiv Pivots, flags int, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) for k, _ := range ipiv { ipiv[k] = 0 } wsz := BKFactorWork(A, conf) if W.Len() < wsz { return gomas.NewError(gomas.EWORK, "DecomposeBK", wsz) } var Wrk cmat.FloatMatrix if n(A) < conf.LB || conf.LB == 0 { // make workspace rows(A)*2 matrix Wrk.SetBuf(m(A), 2, m(A), W.Data()) if flags&gomas.LOWER != 0 { err, _ = unblkDecompBKLower(A, &Wrk, ipiv, conf) } else if flags&gomas.UPPER != 0 { err, _ = unblkDecompBKUpper(A, &Wrk, ipiv, conf) } } else { // make workspace rows(A)*(LB+1) matrix Wrk.SetBuf(m(A), conf.LB+1, m(A), W.Data()) if flags&gomas.LOWER != 0 { err = blkDecompBKLower(A, &Wrk, &ipiv, conf) } else if flags&gomas.UPPER != 0 { err = blkDecompBKUpper(A, &Wrk, &ipiv, conf) } } return err }
/* * Compute RQ factorization of a M-by-N matrix A: A = R*Q * * Arguments: * A On entry, the M-by-N matrix A, M <= N. On exit, upper triangular matrix R * and the orthogonal matrix Q as product of elementary reflectors. * * tau On exit, the scalar factors of the elementary reflectors. * * W Workspace, M-by-nb matrix used for work space in blocked invocations. * * conf The blocking configuration. If nil then default blocking configuration * is used. Member conf.LB defines blocking size of blocked algorithms. * If it is zero then unblocked algorithm is used. * * Returns: * Error indicator. * * Additional information * * Ortogonal matrix Q is product of elementary reflectors H(k) * * Q = H(0)H(1),...,H(K-1), where K = min(M,N) * * Elementary reflector H(k) is stored on row k of A right of the diagonal with * implicit unit value on diagonal entry. The vector TAU holds scalar factors of * the elementary reflectors. * * Contents of matrix A after factorization is as follow: * * ( v0 v0 r r r r ) M=4, N=6 * ( v1 v1 v1 r r r ) * ( v2 v2 v2 v2 r r ) * ( v3 v3 v3 v3 v3 r ) * * where l is element of L, vk is element of H(k). * * RQFactor is compatible with lapack.DGERQF */ func RQFactor(A, tau, W *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) // must have: M <= N if m(A) > n(A) { return gomas.NewError(gomas.ESIZE, "RQFactor") } wsmin := wsLQ(A, 0) if W == nil || W.Len() < wsmin { return gomas.NewError(gomas.EWORK, "RQFactor", wsmin) } lb := estimateLB(A, W.Len(), wsRQ) lb = imin(lb, conf.LB) if lb == 0 || m(A) <= lb { unblockedRQ(A, tau, W) } else { var Twork, Wrk cmat.FloatMatrix // block reflector T in first LB*LB elements in workspace // the rest, m(A)-LB*LB, is workspace for intermediate matrix operands Twork.SetBuf(lb, lb, lb, W.Data()) Wrk.SetBuf(m(A)-lb, lb, m(A)-lb, W.Data()[Twork.Len():]) blockedRQ(A, tau, &Twork, &Wrk, lb, conf) } return err }
/* * Generate the M by N matrix Q with orthogonal rows which * are defined as the first M rows of the product of K first elementary * reflectors. * * Arguments * A On entry, the elementary reflectors as returned by LQFactor(). * stored right of diagonal of the M by N matrix A. * On exit, the orthogonal matrix Q * * tau Scalar coefficents of elementary reflectors * * W Workspace * * K The number of elementary reflector whose product define the matrix Q * * conf Optional blocking configuration. * * Compatible with lapackd.ORGLQ. */ func LQBuild(A, tau, W *cmat.FloatMatrix, K int, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) if K <= 0 || K > n(A) { return gomas.NewError(gomas.EVALUE, "LQBuild", K) } wsz := wsBuildLQ(A, 0) if W == nil || W.Len() < wsz { return gomas.NewError(gomas.EWORK, "LQBuild", wsz) } // adjust blocking factor for workspace size lb := estimateLB(A, W.Len(), wsBuildLQ) //lb = imin(lb, conf.LB) lb = conf.LB if lb == 0 || m(A) <= lb { unblkBuildLQ(A, tau, W, m(A)-K, n(A)-K, true) } else { var Twork, Wrk cmat.FloatMatrix Twork.SetBuf(lb, lb, lb, W.Data()) Wrk.SetBuf(m(A)-lb, lb, m(A)-lb, W.Data()[Twork.Len():]) blkBuildLQ(A, tau, &Twork, &Wrk, K, lb, conf) } return err }
/* * Compute QL factorization of a M-by-N matrix A: A = Q * L. * * Arguments: * A On entry, the M-by-N matrix A, M >= N. On exit, lower triangular matrix L * and the orthogonal matrix Q as product of elementary reflectors. * * tau On exit, the scalar factors of the elemenentary reflectors. * * W Workspace, N-by-nb matrix used for work space in blocked invocations. * * conf The blocking configuration. If nil then default blocking configuration * is used. Member conf.LB defines blocking size of blocked algorithms. * If it is zero then unblocked algorithm is used. * * Returns: * Error indicator. * * Additional information * * Ortogonal matrix Q is product of elementary reflectors H(k) * * Q = H(K-1)...H(1)H(0), where K = min(M,N) * * Elementary reflector H(k) is stored on column k of A above the diagonal with * implicit unit value on diagonal entry. The vector TAU holds scalar factors * of the elementary reflectors. * * Contents of matrix A after factorization is as follow: * * ( v0 v1 v2 v3 ) for M=6, N=4 * ( v0 v1 v2 v3 ) * ( l v1 v2 v3 ) * ( l l v2 v3 ) * ( l l l v3 ) * ( l l l l ) * * where l is element of L, vk is element of H(k). * * DecomposeQL is compatible with lapack.DGEQLF */ func QLFactor(A, tau, W *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil var tauh cmat.FloatMatrix conf := gomas.CurrentConf(confs...) if m(A) < n(A) { return gomas.NewError(gomas.ESIZE, "QLFactor") } wsmin := wsQL(A, 0) if W == nil || W.Len() < wsmin { return gomas.NewError(gomas.EWORK, "QLFactor", wsmin) } if tau.Len() < n(A) { return gomas.NewError(gomas.ESIZE, "QLFactor") } tauh.SubMatrix(tau, 0, 0, n(A), 1) lb := estimateLB(A, W.Len(), wsQL) lb = imin(lb, conf.LB) if lb == 0 || n(A) <= lb { unblockedQL(A, &tauh, W) } else { var Twork, Wrk cmat.FloatMatrix // block reflector T in first LB*LB elements in workspace // the rest, n(A)-LB*LB, is workspace for intermediate matrix operands Twork.SetBuf(conf.LB, conf.LB, -1, W.Data()) Wrk.SetBuf(n(A)-conf.LB, conf.LB, -1, W.Data()[Twork.Len():]) blockedQL(A, &tauh, &Twork, &Wrk, lb, conf) } return err }
/* * Compute QR factorization of a M-by-N matrix A using compact WY transformation: A = Q * R, * where Q = I - Y*T*Y.T, T is block reflector and Y holds elementary reflectors as lower * trapezoidal matrix saved below diagonal elements of the matrix A. * * Arguments: * A On entry, the M-by-N matrix A. On exit, the elements on and above * the diagonal contain the min(M,N)-by-N upper trapezoidal matrix R. * The elements below the diagonal with the matrix 'T', represent * the ortogonal matrix Q as product of elementary reflectors. * * T On exit, the K block reflectors which, together with trilu(A) represent * the ortogonal matrix Q as Q = I - Y*T*Y.T where Y = trilu(A). * K is ceiling(N/LB) where LB is blocking size from used blocking configuration. * The matrix T is LB*N augmented matrix of K block reflectors, * T = [T(0) T(1) .. T(K-1)]. Block reflector T(n) is LB*LB matrix, expect * reflector T(K-1) that is IB*IB matrix where IB = min(LB, K % LB) * * W Workspace, required size returned by QRTFactorWork(). * * conf Optional blocking configuration. If not provided then default configuration * is used. * * Returns: * Error indicator. * * QRTFactor is compatible with lapack.DGEQRT */ func QRTFactor(A, T, W *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) ok := false rsize := 0 if m(A) < n(A) { return gomas.NewError(gomas.ESIZE, "QRTFactor") } wsz := QRTFactorWork(A, conf) if W == nil || W.Len() < wsz { return gomas.NewError(gomas.EWORK, "QRTFactor", wsz) } tr, tc := T.Size() if conf.LB == 0 || conf.LB > n(A) { ok = tr == tc && tr == n(A) rsize = n(A) * n(A) } else { ok = tr == conf.LB && tc == n(A) rsize = conf.LB * n(A) } if !ok { return gomas.NewError(gomas.ESMALL, "QRTFactor", rsize) } if conf.LB == 0 || n(A) <= conf.LB { err = unblockedQRT(A, T, W) } else { Wrk := cmat.MakeMatrix(n(A), conf.LB, W.Data()) err = blockedQRT(A, T, Wrk, conf) } return err }
func axpby(Y, X *cmat.FloatMatrix, alpha, beta float64, N int) { var x, y C.mvec_t xr, _ := X.Size() x.md = (*C.double)(unsafe.Pointer(&X.Data()[0])) x.inc = C.int(1) if xr == 1 { x.inc = C.int(X.Stride()) } yr, _ := Y.Size() y.md = (*C.double)(unsafe.Pointer(&Y.Data()[0])) y.inc = C.int(1) if yr == 1 { y.inc = C.int(Y.Stride()) } if beta == 1.0 { C.__d_vec_axpy( (*C.mvec_t)(unsafe.Pointer(&y)), (*C.mvec_t)(unsafe.Pointer(&x)), C.double(alpha), C.int(N)) } else { C.__d_vec_axpby( (*C.mvec_t)(unsafe.Pointer(&y)), (*C.mvec_t)(unsafe.Pointer(&x)), C.double(alpha), C.double(beta), C.int(N)) } return }
/* * Reduce general matrix A to upper Hessenberg form H by similiarity * transformation H = Q.T*A*Q. * * Arguments: * A On entry, the general matrix A. On exit, the elements on and * above the first subdiagonal contain the reduced matrix H. * The elements below the first subdiagonal with the vector tau * represent the ortogonal matrix A as product of elementary reflectors. * * tau On exit, the scalar factors of the elementary reflectors. * * W Workspace, as defined by HessReduceWork() * * conf The blocking configration. * * HessReduce is compatible with lapack.DGEHRD. */ func HessReduce(A, tau, W *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) wmin := m(A) wopt := HessReduceWork(A, conf) wsz := W.Len() if wsz < wmin { return gomas.NewError(gomas.EWORK, "ReduceHess", wmin) } // use blocked version if workspace big enough for blocksize 4 lb := conf.LB if wsz < wopt { lb = estimateLB(A, wsz, wsHess) } if lb == 0 || n(A) <= lb { unblkHessGQvdG(A, tau, W, 0) } else { // blocked version var W0 cmat.FloatMatrix // shape workspace for blocked algorithm W0.SetBuf(m(A)+lb, lb, m(A)+lb, W.Data()) blkHessGQvdG(A, tau, &W0, lb, conf) } return err }
/* * Multiply and replace C with Q*C or Q.T*C where Q is a real orthogonal matrix * defined as the product of k elementary reflectors and block reflector T * * Q = H(1) H(2) . . . H(k) * * as returned by DecomposeQRT(). * * Arguments: * C On entry, the M-by-N matrix C. On exit C is overwritten by Q*C or Q.T*C. * * A QR factorization as returned by QRTFactor() where the lower trapezoidal * part holds the elementary reflectors. * * T The block reflector computed from elementary reflectors as returned by * DecomposeQRT() or computed from elementary reflectors and scalar coefficients * by BuildT() * * W Workspace, size as returned by QRTMultWork() * * conf Blocking configuration * * flags Indicators. Valid indicators LEFT, RIGHT, TRANS, NOTRANS * * Preconditions: * a. cols(A) == cols(T), * columns A define number of elementary reflector, must match order of block reflector. * b. if conf.LB == 0, cols(T) == rows(T) * unblocked invocation, block reflector T is upper triangular * c. if conf.LB != 0, rows(T) == conf.LB * blocked invocation, T is sequence of triangular block reflectors of order LB * d. if LEFT, rows(C) >= cols(A) && cols(C) >= rows(A) * * e. if RIGHT, cols(C) >= cols(A) && rows(C) >= rows(A) * * Compatible with lapack.DGEMQRT */ func QRTMult(C, A, T, W *cmat.FloatMatrix, flags int, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) wsz := QRTMultWork(C, T, flags, conf) if W == nil || W.Len() < wsz { return gomas.NewError(gomas.EWORK, "QRTMult", wsz) } ok := false switch flags & gomas.RIGHT { case gomas.RIGHT: ok = n(C) >= m(A) default: ok = m(C) >= n(A) } if !ok { return gomas.NewError(gomas.ESIZE, "QRTMult") } var Wrk cmat.FloatMatrix if flags&gomas.RIGHT != 0 { Wrk.SetBuf(m(C), conf.LB, m(C), W.Data()) blockedMultQTRight(C, A, T, &Wrk, flags, conf) } else { Wrk.SetBuf(n(C), conf.LB, n(C), W.Data()) blockedMultQTLeft(C, A, T, &Wrk, flags, conf) } return err }
/* * Reduce upper triangular matrix to tridiagonal. * * Elementary reflectors Q = H(n-1)...H(2)H(1) are stored on upper * triangular part of A. Reflector H(n-1) saved at column A(n) and * scalar multiplier to tau[n-1]. If parameter `tail` is true then * this function is used to reduce tail part of partially reduced * matrix and tau-vector partitioning is starting from last position. */ func unblkReduceTridiagUpper(A, tauq, W *cmat.FloatMatrix, tail bool) { var ATL, ABR cmat.FloatMatrix var A00, a01, a11, A22 cmat.FloatMatrix var tqT, tqB, tq0, tauq1, tq2 cmat.FloatMatrix var y21 cmat.FloatMatrix var v0 float64 toff := 1 if tail { toff = 0 } util.Partition2x2( &ATL, nil, nil, &ABR, A, 0, 0, util.PBOTTOMRIGHT) util.Partition2x1( &tqT, &tqB, tauq, toff, util.PBOTTOM) for n(&ATL) > 0 { util.Repartition2x2to3x3(&ATL, &A00, &a01, nil, nil, &a11, nil, nil, nil, &A22, A, 1, util.PTOPLEFT) util.Repartition2x1to3x1(&tqT, &tq0, &tauq1, &tq2, tauq, 1, util.PTOP) // set temp vectors for this round y21.SetBuf(n(&A00), 1, n(&A00), W.Data()) // ------------------------------------------------------ // Compute householder to zero super-diagonal entries computeHouseholderRev(&a01, &tauq1) tauqv := tauq1.Get(0, 0) // set superdiagonal to unit v0 = a01.Get(-1, 0) a01.Set(-1, 0, 1.0) // y21 := A22*a12t blasd.MVMultSym(&y21, &A00, &a01, tauqv, 0.0, gomas.UPPER) // beta := tauq*a12t*y21 beta := tauqv * blasd.Dot(&a01, &y21) // y21 := y21 - 0.5*beta*a125 blasd.Axpy(&y21, &a01, -0.5*beta) // A22 := A22 - a12t*y21.T - y21*a12.T blasd.MVUpdate2Sym(&A00, &a01, &y21, -1.0, gomas.UPPER) // restore superdiagonal value a01.Set(-1, 0, v0) // ------------------------------------------------------ util.Continue3x3to2x2( &ATL, nil, nil, &ABR, &A00, &a11, &A22, A, util.PTOPLEFT) util.Continue3x1to2x1( &tqT, &tqB, &tq0, &tauq1, tauq, util.PTOP) } }
func minvscale(A *cmat.FloatMatrix, alpha float64, M, N int) { var a C.mdata_t a.md = (*C.double)(unsafe.Pointer(&A.Data()[0])) a.step = C.int(A.Stride()) C.__d_blk_invscale( (*C.mdata_t)(unsafe.Pointer(&a)), C.double(alpha), C.int(M), C.int(N)) return }
/* * Tridiagonal reduction of LOWER triangular symmetric matrix, zero elements below 1st * subdiagonal: * * A = (1 - tau*u*u.t)*A*(1 - tau*u*u.T) * = (I - tau*( 0 0 )) (a11 a12) (I - tau*( 0 0 )) * ( ( 0 u*u.t)) (a21 A22) ( ( 0 u*u.t)) * * a11, a12, a21 not affected * * from LEFT: * A22 = A22 - tau*u*u.T*A22 * from RIGHT: * A22 = A22 - tau*A22*u.u.T * * LEFT and RIGHT: * A22 = A22 - tau*u*u.T*A22 - tau*(A22 - tau*u*u.T*A22)*u*u.T * = A22 - tau*u*u.T*A22 - tau*A22*u*u.T + tau*tau*u*u.T*A22*u*u.T * [x = tau*A22*u (vector)] (SYMV) * A22 = A22 - u*x.T - x*u.T + tau*u*u.T*x*u.T * [beta = tau*u.T*x (scalar)] (DOT) * = A22 - u*x.T - x*u.T + beta*u*u.T * = A22 - u*(x - 0.5*beta*u).T - (x - 0.5*beta*u)*u.T * [w = x - 0.5*beta*u] (AXPY) * = A22 - u*w.T - w*u.T (SYR2) * * Result of reduction for N = 5: * ( d . . . . ) * ( e d . . . ) * ( v1 e d . . ) * ( v1 v2 e d . ) * ( v1 v2 v3 e d ) */ func unblkReduceTridiagLower(A, tauq, W *cmat.FloatMatrix) { var ATL, ABR cmat.FloatMatrix var A00, a11, a21, A22 cmat.FloatMatrix var tqT, tqB, tq0, tauq1, tq2 cmat.FloatMatrix var y21 cmat.FloatMatrix var v0 float64 util.Partition2x2( &ATL, nil, nil, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition2x1( &tqT, &tqB, tauq, 0, util.PTOP) for m(&ABR) > 0 && n(&ABR) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, nil, &a11, nil, nil, &a21, &A22, A, 1, util.PBOTTOMRIGHT) util.Repartition2x1to3x1(&tqT, &tq0, &tauq1, &tq2, tauq, 1, util.PBOTTOM) // set temp vectors for this round y21.SetBuf(n(&A22), 1, n(&A22), W.Data()) // ------------------------------------------------------ // Compute householder to zero subdiagonal entries computeHouseholderVec(&a21, &tauq1) tauqv := tauq1.Get(0, 0) // set subdiagonal to unit v0 = a21.Get(0, 0) a21.Set(0, 0, 1.0) // y21 := tauq*A22*a21 blasd.MVMultSym(&y21, &A22, &a21, tauqv, 0.0, gomas.LOWER) // beta := tauq*a21.T*y21 beta := tauqv * blasd.Dot(&a21, &y21) // y21 := y21 - 0.5*beta*a21 blasd.Axpy(&y21, &a21, -0.5*beta) // A22 := A22 - a21*y21.T - y21*a21.T blasd.MVUpdate2Sym(&A22, &a21, &y21, -1.0, gomas.LOWER) // restore subdiagonal a21.Set(0, 0, v0) // ------------------------------------------------------ util.Continue3x3to2x2( &ATL, nil, nil, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT) util.Continue3x1to2x1( &tqT, &tqB, &tq0, &tauq1, tauq, util.PBOTTOM) } }
func vinvscal(X *cmat.FloatMatrix, alpha float64, N int) { var x C.mvec_t xr, _ := X.Size() x.md = (*C.double)(unsafe.Pointer(&X.Data()[0])) x.inc = C.int(1) if xr == 1 { x.inc = C.int(X.Stride()) } C.__d_vec_invscal( (*C.mvec_t)(unsafe.Pointer(&x)), C.double(alpha), C.int(N)) return }
func plus(Ac, Bc *cmat.FloatMatrix, alpha, beta float64, bits, S, L, R, E int) { var Am, Bm C.mdata_t Am.md = (*C.double)(unsafe.Pointer(&Ac.Data()[0])) Am.step = C.int(Ac.Stride()) Bm.md = (*C.double)(unsafe.Pointer(&Bc.Data()[0])) Bm.step = C.int(Bc.Stride()) C.__d_scale_plus( (*C.mdata_t)(unsafe.Pointer(&Am)), (*C.mdata_t)(unsafe.Pointer(&Bm)), C.double(alpha), C.double(beta), C.int(bits), C.int(S), C.int(L), C.int(R), C.int(E)) }
func sum(X *cmat.FloatMatrix, N int) float64 { var x C.mvec_t var dc C.double xr, _ := X.Size() x.md = (*C.double)(unsafe.Pointer(&X.Data()[0])) x.inc = C.int(1) if xr == 1 { x.inc = C.int(X.Stride()) } dc = C.__d_vec_sum_recursive( (*C.mvec_t)(unsafe.Pointer(&x)), C.int(N)) return float64(dc) }
func iamax(X *cmat.FloatMatrix, N int) int { var x C.mvec_t var ix C.int xr, _ := X.Size() x.md = (*C.double)(unsafe.Pointer(&X.Data()[0])) x.inc = C.int(1) if xr == 1 { x.inc = C.int(X.Stride()) } ix = C.__d_vec_iamax( (*C.mvec_t)(unsafe.Pointer(&x)), C.int(N)) return int(ix) }
func mtranspose(A, B *cmat.FloatMatrix, M, N int) { var a, b C.mdata_t if M == 0 || N == 0 { return } a.md = (*C.double)(unsafe.Pointer(&A.Data()[0])) a.step = C.int(A.Stride()) b.md = (*C.double)(unsafe.Pointer(&B.Data()[0])) b.step = C.int(B.Stride()) C.__d_blk_transpose( (*C.mdata_t)(unsafe.Pointer(&a)), (*C.mdata_t)(unsafe.Pointer(&b)), C.int(M), C.int(N)) return }
func EigenSym(D, A, W *cmat.FloatMatrix, bits int, confs ...*gomas.Config) (err *gomas.Error) { var sD, sE, E, tau, Wred cmat.FloatMatrix var vv *cmat.FloatMatrix err = nil vv = nil conf := gomas.CurrentConf(confs...) if m(A) != n(A) || D.Len() != m(A) { err = gomas.NewError(gomas.ESIZE, "EigenSym") return } if bits&gomas.WANTV != 0 && W.Len() < 3*n(A) { err = gomas.NewError(gomas.EWORK, "EigenSym") return } if bits&(gomas.LOWER|gomas.UPPER) == 0 { bits = bits | gomas.LOWER } ioff := 1 if bits&gomas.LOWER != 0 { ioff = -1 } E.SetBuf(n(A)-1, 1, n(A)-1, W.Data()) tau.SetBuf(n(A), 1, n(A), W.Data()[n(A)-1:]) wrl := W.Len() - 2*n(A) - 1 Wred.SetBuf(wrl, 1, wrl, W.Data()[2*n(A)-1:]) // reduce to tridiagonal if err = TRDReduce(A, &tau, &Wred, bits, conf); err != nil { err.Update("EigenSym") return } sD.Diag(A) sE.Diag(A, ioff) blasd.Copy(D, &sD) blasd.Copy(&E, &sE) if bits&gomas.WANTV != 0 { if err = TRDBuild(A, &tau, &Wred, n(A), bits, conf); err != nil { err.Update("EigenSym") return } vv = A } // resize workspace wrl = W.Len() - n(A) - 1 Wred.SetBuf(wrl, 1, wrl, W.Data()[n(A)-1:]) if err = TRDEigen(D, &E, vv, &Wred, bits, conf); err != nil { err.Update("EigenSym") return } return }
func syrk(Cc, Ac *cmat.FloatMatrix, alpha, beta float64, bits, P, S, E int, conf *gomas.Config) error { var Am, Cm C.mdata_t Am.md = (*C.double)(unsafe.Pointer(&Ac.Data()[0])) Am.step = C.int(Ac.Stride()) Cm.md = (*C.double)(unsafe.Pointer(&Cc.Data()[0])) Cm.step = C.int(Cc.Stride()) C.__d_rank_blk( (*C.mdata_t)(unsafe.Pointer(&Cm)), (*C.mdata_t)(unsafe.Pointer(&Am)), C.double(alpha), C.double(beta), C.int(bits), C.int(P), C.int(S), C.int(E), C.int(conf.KB), C.int(conf.NB), C.int(conf.MB)) return nil }
func trsm(Bc, Ac *cmat.FloatMatrix, alpha float64, bits, N, S, E int, conf *gomas.Config) error { var Am, Bm C.mdata_t Am.md = (*C.double)(unsafe.Pointer(&Ac.Data()[0])) Am.step = C.int(Ac.Stride()) Bm.md = (*C.double)(unsafe.Pointer(&Bc.Data()[0])) Bm.step = C.int(Bc.Stride()) C.__d_solve_blocked( (*C.mdata_t)(unsafe.Pointer(&Bm)), (*C.mdata_t)(unsafe.Pointer(&Am)), C.double(alpha), C.int(bits), C.int(N), C.int(S), C.int(E), C.int(conf.KB), C.int(conf.NB), C.int(conf.MB)) return nil }
func trmv(X, A *cmat.FloatMatrix, alpha float64, bits, N int) error { var Am C.mdata_t var Xm C.mvec_t xr, _ := X.Size() Am.md = (*C.double)(unsafe.Pointer(&A.Data()[0])) Am.step = C.int(A.Stride()) Xm.md = (*C.double)(unsafe.Pointer(&X.Data()[0])) Xm.inc = C.int(1) // if row vectors, change increment if xr == 1 { Xm.inc = C.int(X.Stride()) } C.__d_trmv_unb( (*C.mvec_t)(unsafe.Pointer(&Xm)), (*C.mdata_t)(unsafe.Pointer(&Am)), C.double(alpha), C.int(bits), C.int(N)) return nil }
func vswap(X, Y *cmat.FloatMatrix, N int) { var x, y C.mvec_t xr, _ := X.Size() x.md = (*C.double)(unsafe.Pointer(&X.Data()[0])) x.inc = C.int(1) if xr == 1 { x.inc = C.int(X.Stride()) } yr, _ := Y.Size() y.md = (*C.double)(unsafe.Pointer(&Y.Data()[0])) y.inc = C.int(1) if yr == 1 { y.inc = C.int(Y.Stride()) } C.__d_vec_swap( (*C.mvec_t)(unsafe.Pointer(&x)), (*C.mvec_t)(unsafe.Pointer(&y)), C.int(N)) return }
func dot(X, Y *cmat.FloatMatrix, N int) float64 { var x, y C.mvec_t var dc C.double xr, _ := X.Size() x.md = (*C.double)(unsafe.Pointer(&X.Data()[0])) x.inc = C.int(1) if xr == 1 { x.inc = C.int(X.Stride()) } yr, _ := Y.Size() y.md = (*C.double)(unsafe.Pointer(&Y.Data()[0])) y.inc = C.int(1) if yr == 1 { y.inc = C.int(Y.Stride()) } dc = C.__d_vec_dot_recursive( (*C.mvec_t)(unsafe.Pointer(&x)), (*C.mvec_t)(unsafe.Pointer(&y)), C.int(N)) return float64(dc) }
func gemv(Y, A, X *cmat.FloatMatrix, alpha, beta float64, bits, S, L, R, E int) { var Am C.mdata_t var Xm, Ym C.mvec_t xr, _ := X.Size() yr, _ := Y.Size() Am.md = (*C.double)(unsafe.Pointer(&A.Data()[0])) Am.step = C.int(A.Stride()) Xm.md = (*C.double)(unsafe.Pointer(&X.Data()[0])) Ym.md = (*C.double)(unsafe.Pointer(&Y.Data()[0])) Ym.inc = C.int(1) Xm.inc = C.int(1) // if row vectors, change increment if xr == 1 { Xm.inc = C.int(X.Stride()) } if yr == 1 { Ym.inc = C.int(Y.Stride()) } C.__d_gemv_unb( (*C.mvec_t)(unsafe.Pointer(&Ym)), (*C.mdata_t)(unsafe.Pointer(&Am)), (*C.mvec_t)(unsafe.Pointer(&Xm)), C.double(alpha), /*C.double(beta),*/ C.int(bits), C.int(S), C.int(L), C.int(R), C.int(E)) }
func updtrmv(A, X, Y *cmat.FloatMatrix, alpha float64, bits, N, M int) error { var Am C.mdata_t var Xm, Ym C.mvec_t xr, _ := X.Size() yr, _ := Y.Size() Am.md = (*C.double)(unsafe.Pointer(&A.Data()[0])) Am.step = C.int(A.Stride()) Xm.md = (*C.double)(unsafe.Pointer(&X.Data()[0])) Ym.md = (*C.double)(unsafe.Pointer(&Y.Data()[0])) Ym.inc = C.int(1) Xm.inc = C.int(1) // if row vectors, change increment if xr == 1 { Xm.inc = C.int(X.Stride()) } if yr == 1 { Ym.inc = C.int(Y.Stride()) } C.__d_update_trmv_unb( (*C.mdata_t)(unsafe.Pointer(&Am)), (*C.mvec_t)(unsafe.Pointer(&Xm)), (*C.mvec_t)(unsafe.Pointer(&Ym)), C.double(alpha), C.int(bits), C.int(N), C.int(M)) return nil }
/* * Reduce symmetric matrix to tridiagonal form by similiarity transformation A = Q*T*Q.T * * Arguments * A On entry, symmetric matrix with elemets stored in upper (lower) triangular * part. On exit, diagonal and first super (sub) diagonals hold matrix T. The upper * (lower) triangular part above (below) first super(sub)diagonal is used to store * orthogonal matrix Q. * * tau Scalar coefficients of elementary reflectors. * * W Workspace * * flags LOWER or UPPER * * confs Optional blocking configuration * * If LOWER, then the matrix Q is represented as product of elementary reflectors * * Q = H(1)H(2)...H(n-1). * * If UPPER, then the matrix Q is represented as product * * Q = H(n-1)...H(2)H(1). * * Each H(k) has form I - tau*v*v.T. * * The contents of A on exit is as follow for N = 5. * * LOWER UPPER * ( d . . . . ) ( d e v1 v2 v3 ) * ( e d . . . ) ( . d e v2 v3 ) * ( v1 e d . . ) ( . . d e v3 ) * ( v1 v2 e d . ) ( . . . d e ) * ( v1 v2 v3 e d ) ( . . . . d ) */ func TRDReduce(A, tau, W *cmat.FloatMatrix, flags int, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil var Y cmat.FloatMatrix // default to lower triangular if uplo not defined if flags&(gomas.LOWER|gomas.UPPER) == 0 { flags = flags | gomas.LOWER } ok := m(A) == n(A) && tau.Len() >= n(A) if !ok { return gomas.NewError(gomas.ESIZE, "ReduceTridiag") } conf := gomas.CurrentConf(confs...) lb := conf.LB wsmin := wsTridiag(A, 0) if W.Len() < wsmin { return gomas.NewError(gomas.EWORK, "ReduceTridiag", wsmin) } if flags&gomas.LOWER != 0 { if lb == 0 || n(A)-1 < lb { unblkReduceTridiagLower(A, tau, W) } else { Y.SetBuf(m(A), lb, m(A), W.Data()) blkReduceTridiagLower(A, tau, &Y, W, lb, conf) } } else { if lb == 0 || n(A)-1 < lb { unblkReduceTridiagUpper(A, tau, W, false) } else { Y.SetBuf(m(A), lb, m(A), W.Data()) blkReduceTridiagUpper(A, tau, &Y, W, lb, conf) } } return err }
func gemm(Cc, Ac, Bc *cmat.FloatMatrix, alpha, beta float64, bits, P, S, L, R, E int, conf *gomas.Config) { var Am, Cm, Bm C.mdata_t Am.md = (*C.double)(unsafe.Pointer(&Ac.Data()[0])) Am.step = C.int(Ac.Stride()) Bm.md = (*C.double)(unsafe.Pointer(&Bc.Data()[0])) Bm.step = C.int(Bc.Stride()) Cm.md = (*C.double)(unsafe.Pointer(&Cc.Data()[0])) Cm.step = C.int(Cc.Stride()) C.__d_gemm_inner( (*C.mdata_t)(unsafe.Pointer(&Cm)), (*C.mdata_t)(unsafe.Pointer(&Am)), (*C.mdata_t)(unsafe.Pointer(&Bm)), C.double(alpha), C.double(beta), C.int(bits), C.int(P), C.int(S), C.int(L), C.int(R), C.int(E), C.int(conf.KB), C.int(conf.NB), C.int(conf.MB)) }
/* * Blocked version of Hessenberg reduction algorithm as presented in (1). This * version uses compact-WY transformation. * * Some notes: * * Elementary reflectors stored in [A11; A21].T are not on diagonal of A11. Update of * a block aligned with A11; A21 is as follow * * 1. Update from left Q(k)*C: * c0 0 c0 * (I - Y*T*Y.T).T*C = C - Y*(C.T*Y)*T.T = C1 - Y1 * (C1.T.Y1+C2.T*Y2)*T.T = C1-Y1*W * C2 Y2 C2-Y2*W * * where W = (C1.T*Y1+C2.T*Y2)*T.T and first row of C is not affected by update * * 2. Update from right C*Q(k): * 0 * C - C*Y*T*Y.T = c0;C1;C2 - c0;C1;C2 * Y1 *T*(0;Y1;Y2) = c0; C1-W*Y1; C2-W*Y2 * Y2 * where W = (C1*Y1 + C2*Y2)*T and first column of C is not affected * */ func blkHessGQvdG(A, Tvec, W *cmat.FloatMatrix, nb int, conf *gomas.Config) *gomas.Error { var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, A11, A12, A21, A22, A2 cmat.FloatMatrix var tT, tB, td cmat.FloatMatrix var t0, t1, t2, T cmat.FloatMatrix var V, VT, VB /*V0, V1, V2,*/, Y1, Y2, W0 cmat.FloatMatrix //fmt.Printf("blkHessGQvdG...\n") T.SubMatrix(W, 0, 0, conf.LB, conf.LB) V.SubMatrix(W, conf.LB, 0, m(A), conf.LB) td.Diag(&T) util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition2x1( &tT, &tB, Tvec, 0, util.PTOP) for m(&ABR) > nb+1 && n(&ABR) > nb { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, nil, &A11, &A12, nil, &A21, &A22, A, nb, util.PBOTTOMRIGHT) util.Repartition2x1to3x1(&tT, &t0, &t1, &t2, Tvec, nb, util.PBOTTOM) util.Partition2x1( &VT, &VB, &V, m(&ATL), util.PTOP) // ------------------------------------------------------ unblkBuildHessGQvdG(&ABR, &T, &VB, nil) blasd.Copy(&t1, &td) // m(Y) == m(ABR)-1, n(Y) == n(A11) Y1.SubMatrix(&ABR, 1, 0, n(&A11), n(&A11)) Y2.SubMatrix(&ABR, 1+n(&A11), 0, m(&A21)-1, n(&A11)) // [A01; A02] == ATR := ATR*(I - Y*T*Y.T) updateHessRightWY(&ATR, &Y1, &Y2, &T, &VT, conf) // A2 = [A12; A22].T util.Merge2x1(&A2, &A12, &A22) // A2 := A2 - VB*T*A21.T be := A21.Get(0, -1) A21.Set(0, -1, 1.0) blasd.MultTrm(&VB, &T, 1.0, gomas.UPPER|gomas.RIGHT) blasd.Mult(&A2, &VB, &A21, -1.0, 1.0, gomas.TRANSB, conf) A21.Set(0, -1, be) // A2 := (I - Y*T*Y.T).T * A2 W0.SubMatrix(&V, 0, 0, n(&A2), n(&Y2)) updateHessLeftWY(&A2, &Y1, &Y2, &T, &W0, conf) // ------------------------------------------------------ util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, util.PBOTTOMRIGHT) util.Continue3x1to2x1( &tT, &tB, &t0, &t1, Tvec, util.PBOTTOM) } if m(&ABR) > 1 { // do the rest with unblocked util.Merge2x1(&A2, &ATR, &ABR) W0.SetBuf(m(A), 1, m(A), W.Data()) unblkHessGQvdG(&A2, &tB, &W0, m(&ATR)) } return nil }
/* * Blocked version for computing C = C*Q and C = C*Q.T from elementary reflectors * and scalar coefficients. * * Elementary reflectors and scalar coefficients are used to build block reflector T. * Matrix C is updated by applying block reflector T using compact WY algorithm. */ func blockedMultQRight(C, A, tau, W *cmat.FloatMatrix, flags, nb int, conf *gomas.Config) { var ATL, ATR, ABL, ABR, AL cmat.FloatMatrix var A00, A10, A11, A20, A21, A22 cmat.FloatMatrix var CL, CR, C0, C1, C2 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2 cmat.FloatMatrix var W0, Wrk, Tw, Twork cmat.FloatMatrix var Aref *cmat.FloatMatrix var pAdir, pAstart, pDir, pStart, pCstart, pCdir util.Direction var bsz, cb, mb int // partitioning start and direction if flags&gomas.TRANS != 0 { // from bottom-right to top-left to produce transpose sequence (C*Q.T) pAstart = util.PBOTTOMRIGHT pAdir = util.PTOPLEFT pStart = util.PBOTTOM pDir = util.PTOP pCstart = util.PRIGHT pCdir = util.PLEFT mb = imax(0, m(A)-n(A)) cb = n(C) - n(A) Aref = &ATL } else { // from top-left to bottom-right to produce normal sequence (C*Q) pAstart = util.PTOPLEFT pAdir = util.PBOTTOMRIGHT pStart = util.PTOP pDir = util.PBOTTOM pCstart = util.PLEFT pCdir = util.PRIGHT mb = 0 cb = 0 Aref = &ABR } // intermediate reflector at start of workspace Twork.SetBuf(nb, nb, nb, W.Data()) W0.SetBuf(m(C), nb, m(C), W.Data()[Twork.Len():]) util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, mb, 0, pAstart) util.Partition1x2( &CL, &CR, C, cb, pCstart) util.Partition2x1( &tT, &tB, tau, 0, pStart) transpose := flags&gomas.TRANS != 0 for m(Aref) > 0 && n(Aref) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &A10, &A11, nil, &A20, &A21, &A22, A, nb, pAdir) util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2, tau, nb, pDir) bsz = n(&A11) // C1 block size must match A11 util.Repartition1x2to1x3(&CL, &C0, &C1, &C2, C, bsz, pCdir) // -------------------------------------------------------- // clear & build block reflector from current block util.Merge2x1(&AL, &A11, &A21) Tw.SubMatrix(&Twork, 0, 0, bsz, bsz) blasd.Scale(&Tw, 0.0) unblkQRBlockReflector(&Tw, &AL, &tau1) // compute: C*Q.T == C - C*(Y*T*Y.T).T = C - C*Y*T.T*Y.T // C*Q == C - C*Y*T*Y.T Wrk.SubMatrix(&W0, 0, 0, m(&C1), bsz) updateWithQTRight(&C1, &C2, &A11, &A21, &Tw, &Wrk, transpose, conf) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, pAdir) util.Continue1x3to1x2( &CL, &CR, &C0, &C1, C, pCdir) util.Continue3x1to2x1( &tT, &tB, &t0, &tau1, tau, pDir) } }
/* * Blocked version for computing C = Q*C and C = Q.T*C from elementary reflectors * and scalar coefficients. * * Elementary reflectors and scalar coefficients are used to build block reflector T. * Matrix C is updated by applying block reflector T using compact WY algorithm. */ func blockedMultQLeft(C, A, tau, W *cmat.FloatMatrix, flags, nb int, conf *gomas.Config) { var ATL, ATR, ABL, ABR, AL cmat.FloatMatrix var A00, A10, A11, A20, A21, A22 cmat.FloatMatrix var CT, CB, C0, C1, C2 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2 cmat.FloatMatrix var Wrk, W0, Tw, Twork cmat.FloatMatrix var Aref *cmat.FloatMatrix var pAdir, pAstart, pDir, pStart util.Direction var bsz, mb int // partitioning start and direction if flags&gomas.TRANS != 0 || nb == n(A) { // from top-left to bottom-right to produce transposed sequence (Q.T*C) pAstart = util.PTOPLEFT pAdir = util.PBOTTOMRIGHT pStart = util.PTOP pDir = util.PBOTTOM mb = 0 Aref = &ABR } else { // from bottom-right to top-left to produce normal sequence (Q*C) pAstart = util.PBOTTOMRIGHT pAdir = util.PTOPLEFT pStart = util.PBOTTOM pDir = util.PTOP mb = imax(0, m(A)-n(A)) Aref = &ATL } util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, mb, 0, pAstart) util.Partition2x1( &CT, &CB, C, mb, pStart) util.Partition2x1( &tT, &tB, tau, 0, pStart) transpose := flags&gomas.TRANS != 0 // intermediate reflector at start of workspace Twork.SetBuf(nb, nb, nb, W.Data()) W0.SetBuf(n(C), nb, n(C), W.Data()[Twork.Len():]) for m(Aref) > 0 && n(Aref) > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &A10, &A11, nil, &A20, &A21, &A22, A, nb, pAdir) util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2, tau, nb, pDir) bsz = n(&A11) util.Repartition2x1to3x1(&CT, &C0, &C1, &C2, C, bsz, pDir) // -------------------------------------------------------- // clear & build block reflector from current block util.Merge2x1(&AL, &A11, &A21) Tw.SubMatrix(&Twork, 0, 0, bsz, bsz) blasd.Scale(&Tw, 0.0) unblkQRBlockReflector(&Tw, &AL, &tau1) // compute: Q*T.C == C - Y*(C.T*Y*T).T transpose == true // Q*C == C - C*Y*T*Y.T transpose == false Wrk.SubMatrix(&W0, 0, 0, n(&C1), bsz) updateWithQTLeft(&C1, &C2, &A11, &A21, &Tw, &Wrk, transpose, conf) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, pAdir) util.Continue3x1to2x1( &CT, &CB, &C0, &C1, C, pDir) util.Continue3x1to2x1( &tT, &tB, &t0, &tau1, tau, pDir) } }
func blkMultLeftQL(C, A, tau, W *cmat.FloatMatrix, flags, lb int, conf *gomas.Config) { var ATL /*ATR, ABL,*/, ABR, AL cmat.FloatMatrix var A00, A01, A11, A22 cmat.FloatMatrix var CT, CB, C0, C1, C2 cmat.FloatMatrix var tT, tB cmat.FloatMatrix var t0, tau1, t2 cmat.FloatMatrix var T0, T, W0, Wrk cmat.FloatMatrix var Aref *cmat.FloatMatrix var pAdir, pAstart, pDir, pStart util.Direction var mb, tb, nb int // partitioning start and direction if flags&gomas.TRANS != 0 { // A from bottom-right to top-left to produce transposed sequence (Q.T*C) pAstart = util.PBOTTOMRIGHT pAdir = util.PTOPLEFT pStart = util.PBOTTOM pDir = util.PTOP mb = 0 tb = 0 nb = 0 Aref = &ATL } else { // from top-left to bottom-right to produce normal sequence (Q*C) pAstart = util.PTOPLEFT pAdir = util.PBOTTOMRIGHT pStart = util.PTOP pDir = util.PBOTTOM mb = imax(0, m(A)-n(A)) nb = imax(0, n(A)-m(A)) tb = imax(0, tau.Len()-n(A)) Aref = &ABR } util.Partition2x2( &ATL, nil, nil, &ABR, A, mb, nb, pAstart) util.Partition2x1( &CT, &CB, C, mb, pStart) util.Partition2x1( &tT, &tB, tau, tb, pStart) transpose := flags&gomas.TRANS != 0 // divide workspace for block reflector and temporart space T0.SetBuf(lb, lb, lb, W.Data()) W0.SetBuf(n(C), lb, n(C), W.Data()[T0.Len():]) for n(Aref) > 0 { util.Repartition2x2to3x3(&ATL, &A00, &A01, nil, nil, &A11, nil, nil, nil, &A22, A, lb, pAdir) util.Repartition2x1to3x1(&tT, &t0, &tau1, &t2, tau, lb, pDir) bsz := n(&A11) util.Repartition2x1to3x1(&CT, &C0, &C1, &C2, C, bsz, pDir) // -------------------------------------------------------- // build block reflector for current block util.Merge2x1(&AL, &A01, &A11) T.SubMatrix(&T0, 0, 0, bsz, bsz) blasd.Scale(&T, 0.0) unblkQLBlockReflector(&T, &AL, &tau1) // update with (I - Y*T*Y.T) or (I - Y*T*Y.T).T Wrk.SubMatrix(&W0, 0, 0, n(&C1), bsz) updateQLLeft(&C1, &C0, &A11, &A01, &T, &Wrk, transpose, conf) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, nil, nil, &ABR, &A00, &A11, &A22, A, pAdir) util.Continue3x1to2x1( &CT, &CB, &C0, &C1, C, pDir) util.Continue3x1to2x1( &tT, &tB, &t0, &tau1, tau, pDir) } }