/* * Compute QR factorization of a M-by-N matrix A using compact WY transformation: A = Q * R, * where Q = I - Y*T*Y.T, T is block reflector and Y holds elementary reflectors as lower * trapezoidal matrix saved below diagonal elements of the matrix A. * * Arguments: * A On entry, the M-by-N matrix A. On exit, the elements on and above * the diagonal contain the min(M,N)-by-N upper trapezoidal matrix R. * The elements below the diagonal with the matrix 'T', represent * the ortogonal matrix Q as product of elementary reflectors. * * T On exit, the K block reflectors which, together with trilu(A) represent * the ortogonal matrix Q as Q = I - Y*T*Y.T where Y = trilu(A). * K is ceiling(N/LB) where LB is blocking size from used blocking configuration. * The matrix T is LB*N augmented matrix of K block reflectors, * T = [T(0) T(1) .. T(K-1)]. Block reflector T(n) is LB*LB matrix, expect * reflector T(K-1) that is IB*IB matrix where IB = min(LB, K % LB) * * W Workspace, required size returned by QRTFactorWork(). * * conf Optional blocking configuration. If not provided then default configuration * is used. * * Returns: * Error indicator. * * QRTFactor is compatible with lapack.DGEQRT */ func QRTFactor(A, T, W *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) ok := false rsize := 0 if m(A) < n(A) { return gomas.NewError(gomas.ESIZE, "QRTFactor") } wsz := QRTFactorWork(A, conf) if W == nil || W.Len() < wsz { return gomas.NewError(gomas.EWORK, "QRTFactor", wsz) } tr, tc := T.Size() if conf.LB == 0 || conf.LB > n(A) { ok = tr == tc && tr == n(A) rsize = n(A) * n(A) } else { ok = tr == conf.LB && tc == n(A) rsize = conf.LB * n(A) } if !ok { return gomas.NewError(gomas.ESMALL, "QRTFactor", rsize) } if conf.LB == 0 || n(A) <= conf.LB { err = unblockedQRT(A, T, W) } else { Wrk := cmat.MakeMatrix(n(A), conf.LB, W.Data()) err = blockedQRT(A, T, Wrk, conf) } return err }
/* * Solve a system of linear equations A*X = B or A.T*X = B with general N-by-N * matrix A using the LU factorization computed by LUFactor(). * * Arguments: * B On entry, the right hand side matrix B. On exit, the solution matrix X. * * A The factor L and U from the factorization A = P*L*U as computed by * LUFactor() * * pivots The pivot indices from LUFactor(). * * flags The indicator of the form of the system of equations. * If flags&TRANSA then system is transposed. All other values * indicate non transposed system. * * Compatible with lapack.DGETRS. */ func LUSolve(B, A *cmat.FloatMatrix, pivots Pivots, flags int, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.DefaultConf() if len(confs) > 0 { conf = confs[0] } ar, ac := A.Size() br, _ := B.Size() if ar != ac { return gomas.NewError(gomas.ENOTSQUARE, "SolveLU") } if br != ac { return gomas.NewError(gomas.ESIZE, "SolveLU") } if pivots != nil { applyPivots(B, pivots) } if flags&gomas.TRANSA != 0 { // transposed X = A.-1*B == (L.T*U.T).-1*B == U.-T*(L.-T*B) blasd.SolveTrm(B, A, 1.0, gomas.LOWER|gomas.UNIT|gomas.TRANSA, conf) blasd.SolveTrm(B, A, 1.0, gomas.UPPER|gomas.TRANSA, conf) } else { // non-transposed X = A.-1*B == (L*U).-1*B == U.-1*(L.-1*B) blasd.SolveTrm(B, A, 1.0, gomas.LOWER|gomas.UNIT, conf) blasd.SolveTrm(B, A, 1.0, gomas.UPPER, conf) } return err }
func MVMult(Y, A, X *cmat.FloatMatrix, alpha, beta float64, bits int, confs ...*gomas.Config) *gomas.Error { ok := true yr, yc := Y.Size() ar, ac := A.Size() xr, xc := X.Size() if ar*ac == 0 { return nil } if yr != 1 && yc != 1 { return gomas.NewError(gomas.ENEED_VECTOR, "MVMult") } if xr != 1 && xc != 1 { return gomas.NewError(gomas.ENEED_VECTOR, "MVMult") } nx := X.Len() ny := Y.Len() if bits&gomas.TRANSA != 0 { bits |= gomas.TRANS } if bits&gomas.TRANS != 0 { ok = ny == ac && nx == ar } else { ok = ny == ar && nx == ac } if !ok { return gomas.NewError(gomas.ESIZE, "MVMult") } if beta != 1.0 { vscal(Y, beta, ny) } gemv(Y, A, X, alpha, beta, bits, 0, nx, 0, ny) return nil }
/* * Generate the M by N matrix Q with orthogonal rows which * are defined as the first M rows of the product of K first elementary * reflectors. * * Arguments * A On entry, the elementary reflectors as returned by LQFactor(). * stored right of diagonal of the M by N matrix A. * On exit, the orthogonal matrix Q * * tau Scalar coefficents of elementary reflectors * * W Workspace * * K The number of elementary reflector whose product define the matrix Q * * conf Optional blocking configuration. * * Compatible with lapackd.ORGLQ. */ func LQBuild(A, tau, W *cmat.FloatMatrix, K int, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) if K <= 0 || K > n(A) { return gomas.NewError(gomas.EVALUE, "LQBuild", K) } wsz := wsBuildLQ(A, 0) if W == nil || W.Len() < wsz { return gomas.NewError(gomas.EWORK, "LQBuild", wsz) } // adjust blocking factor for workspace size lb := estimateLB(A, W.Len(), wsBuildLQ) //lb = imin(lb, conf.LB) lb = conf.LB if lb == 0 || m(A) <= lb { unblkBuildLQ(A, tau, W, m(A)-K, n(A)-K, true) } else { var Twork, Wrk cmat.FloatMatrix Twork.SetBuf(lb, lb, lb, W.Data()) Wrk.SetBuf(m(A)-lb, lb, m(A)-lb, W.Data()[Twork.Len():]) blkBuildLQ(A, tau, &Twork, &Wrk, K, lb, conf) } return err }
/* * Symmetric matrix-vector multiplication. Y = beta*Y + alpha*A*X */ func MVMultSym(Y, A, X *cmat.FloatMatrix, alpha, beta float64, bits int, confs ...*gomas.Config) *gomas.Error { ok := true yr, yc := Y.Size() ar, ac := A.Size() xr, xc := X.Size() if ar*ac == 0 { return nil } if yr != 1 && yc != 1 { return gomas.NewError(gomas.ENEED_VECTOR, "MVMultSym") } if xr != 1 && xc != 1 { return gomas.NewError(gomas.ENEED_VECTOR, "MVMultSym") } nx := X.Len() ny := Y.Len() ok = ny == ar && nx == ac && ac == ar if !ok { return gomas.NewError(gomas.ESIZE, "MVMultSym") } if beta != 1.0 { vscal(Y, beta, ny) } symv(Y, A, X, alpha, bits, nx) return nil }
/* * Compute RQ factorization of a M-by-N matrix A: A = R*Q * * Arguments: * A On entry, the M-by-N matrix A, M <= N. On exit, upper triangular matrix R * and the orthogonal matrix Q as product of elementary reflectors. * * tau On exit, the scalar factors of the elementary reflectors. * * W Workspace, M-by-nb matrix used for work space in blocked invocations. * * conf The blocking configuration. If nil then default blocking configuration * is used. Member conf.LB defines blocking size of blocked algorithms. * If it is zero then unblocked algorithm is used. * * Returns: * Error indicator. * * Additional information * * Ortogonal matrix Q is product of elementary reflectors H(k) * * Q = H(0)H(1),...,H(K-1), where K = min(M,N) * * Elementary reflector H(k) is stored on row k of A right of the diagonal with * implicit unit value on diagonal entry. The vector TAU holds scalar factors of * the elementary reflectors. * * Contents of matrix A after factorization is as follow: * * ( v0 v0 r r r r ) M=4, N=6 * ( v1 v1 v1 r r r ) * ( v2 v2 v2 v2 r r ) * ( v3 v3 v3 v3 v3 r ) * * where l is element of L, vk is element of H(k). * * RQFactor is compatible with lapack.DGERQF */ func RQFactor(A, tau, W *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) // must have: M <= N if m(A) > n(A) { return gomas.NewError(gomas.ESIZE, "RQFactor") } wsmin := wsLQ(A, 0) if W == nil || W.Len() < wsmin { return gomas.NewError(gomas.EWORK, "RQFactor", wsmin) } lb := estimateLB(A, W.Len(), wsRQ) lb = imin(lb, conf.LB) if lb == 0 || m(A) <= lb { unblockedRQ(A, tau, W) } else { var Twork, Wrk cmat.FloatMatrix // block reflector T in first LB*LB elements in workspace // the rest, m(A)-LB*LB, is workspace for intermediate matrix operands Twork.SetBuf(lb, lb, lb, W.Data()) Wrk.SetBuf(m(A)-lb, lb, m(A)-lb, W.Data()[Twork.Len():]) blockedRQ(A, tau, &Twork, &Wrk, lb, conf) } return err }
/* * Multiply and replace C with Q*C or Q.T*C where Q is a real orthogonal matrix * defined as the product of k elementary reflectors and block reflector T * * Q = H(1) H(2) . . . H(k) * * as returned by DecomposeQRT(). * * Arguments: * C On entry, the M-by-N matrix C. On exit C is overwritten by Q*C or Q.T*C. * * A QR factorization as returned by QRTFactor() where the lower trapezoidal * part holds the elementary reflectors. * * T The block reflector computed from elementary reflectors as returned by * DecomposeQRT() or computed from elementary reflectors and scalar coefficients * by BuildT() * * W Workspace, size as returned by QRTMultWork() * * conf Blocking configuration * * flags Indicators. Valid indicators LEFT, RIGHT, TRANS, NOTRANS * * Preconditions: * a. cols(A) == cols(T), * columns A define number of elementary reflector, must match order of block reflector. * b. if conf.LB == 0, cols(T) == rows(T) * unblocked invocation, block reflector T is upper triangular * c. if conf.LB != 0, rows(T) == conf.LB * blocked invocation, T is sequence of triangular block reflectors of order LB * d. if LEFT, rows(C) >= cols(A) && cols(C) >= rows(A) * * e. if RIGHT, cols(C) >= cols(A) && rows(C) >= rows(A) * * Compatible with lapack.DGEMQRT */ func QRTMult(C, A, T, W *cmat.FloatMatrix, flags int, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) wsz := QRTMultWork(C, T, flags, conf) if W == nil || W.Len() < wsz { return gomas.NewError(gomas.EWORK, "QRTMult", wsz) } ok := false switch flags & gomas.RIGHT { case gomas.RIGHT: ok = n(C) >= m(A) default: ok = m(C) >= n(A) } if !ok { return gomas.NewError(gomas.ESIZE, "QRTMult") } var Wrk cmat.FloatMatrix if flags&gomas.RIGHT != 0 { Wrk.SetBuf(m(C), conf.LB, m(C), W.Data()) blockedMultQTRight(C, A, T, &Wrk, flags, conf) } else { Wrk.SetBuf(n(C), conf.LB, n(C), W.Data()) blockedMultQTLeft(C, A, T, &Wrk, flags, conf) } return err }
/* * Compute * B = B*diag(D).-1 flags & RIGHT == true * B = diag(D).-1*B flags & LEFT == true * * If flags is LEFT (RIGHT) then element-wise divides columns (rows) of B with vector D. * * Arguments: * B M-by-N matrix if flags&RIGHT == true or N-by-M matrix if flags&LEFT == true * * D N element column or row vector or N-by-N matrix * * flags Indicator bits, LEFT or RIGHT */ func SolveDiag(B, D *cmat.FloatMatrix, flags int, confs ...*gomas.Config) *gomas.Error { var c, d0 cmat.FloatMatrix var d *cmat.FloatMatrix conf := gomas.CurrentConf(confs...) d = D if !D.IsVector() { d0.Diag(D) d = &d0 } dn := d0.Len() br, bc := B.Size() switch flags & (gomas.LEFT | gomas.RIGHT) { case gomas.LEFT: if br != dn { return gomas.NewError(gomas.ESIZE, "SolveDiag") } // scale rows; for k := 0; k < dn; k++ { c.Row(B, k) blasd.InvScale(&c, d.GetAt(k), conf) } case gomas.RIGHT: if bc != dn { return gomas.NewError(gomas.ESIZE, "SolveDiag") } // scale columns for k := 0; k < dn; k++ { c.Column(B, k) blasd.InvScale(&c, d.GetAt(k), conf) } } return nil }
// Solve secular function arising in symmetric eigenproblems. On exit 'Y' contains new // eigenvalues and 'V' the rank-one update vector corresponding new eigenvalues. // The matrix Qd holds for each eigenvalue then computed deltas as row vectors. // On entry 'D' holds original eigenvalues and 'Z' is the rank-one update vector. func TRDSecularSolveAll(y, v, Qd, d, z *cmat.FloatMatrix, rho float64, confs ...*gomas.Config) (err *gomas.Error) { var delta cmat.FloatMatrix var lmbda float64 var e, ei int ei = 0 err = nil if y.Len() != d.Len() || z.Len() != d.Len() || m(Qd) != n(Qd) || m(Qd) != d.Len() { err = gomas.NewError(gomas.ESIZE, "TRDSecularSolveAll") return } for i := 0; i < d.Len(); i++ { delta.Row(Qd, i) lmbda, e = trdsecRoot(d, z, &delta, i, rho) if e < 0 && ei == 0 { ei = -(i + 1) } y.SetAt(i, lmbda) } if ei == 0 { trdsecUpdateVecDelta(v, Qd, d, rho) } else { err = gomas.NewError(gomas.ECONVERGE, "TRDSecularSolveAll", ei) } return }
/* * Compute QL factorization of a M-by-N matrix A: A = Q * L. * * Arguments: * A On entry, the M-by-N matrix A, M >= N. On exit, lower triangular matrix L * and the orthogonal matrix Q as product of elementary reflectors. * * tau On exit, the scalar factors of the elemenentary reflectors. * * W Workspace, N-by-nb matrix used for work space in blocked invocations. * * conf The blocking configuration. If nil then default blocking configuration * is used. Member conf.LB defines blocking size of blocked algorithms. * If it is zero then unblocked algorithm is used. * * Returns: * Error indicator. * * Additional information * * Ortogonal matrix Q is product of elementary reflectors H(k) * * Q = H(K-1)...H(1)H(0), where K = min(M,N) * * Elementary reflector H(k) is stored on column k of A above the diagonal with * implicit unit value on diagonal entry. The vector TAU holds scalar factors * of the elementary reflectors. * * Contents of matrix A after factorization is as follow: * * ( v0 v1 v2 v3 ) for M=6, N=4 * ( v0 v1 v2 v3 ) * ( l v1 v2 v3 ) * ( l l v2 v3 ) * ( l l l v3 ) * ( l l l l ) * * where l is element of L, vk is element of H(k). * * DecomposeQL is compatible with lapack.DGEQLF */ func QLFactor(A, tau, W *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil var tauh cmat.FloatMatrix conf := gomas.CurrentConf(confs...) if m(A) < n(A) { return gomas.NewError(gomas.ESIZE, "QLFactor") } wsmin := wsQL(A, 0) if W == nil || W.Len() < wsmin { return gomas.NewError(gomas.EWORK, "QLFactor", wsmin) } if tau.Len() < n(A) { return gomas.NewError(gomas.ESIZE, "QLFactor") } tauh.SubMatrix(tau, 0, 0, n(A), 1) lb := estimateLB(A, W.Len(), wsQL) lb = imin(lb, conf.LB) if lb == 0 || n(A) <= lb { unblockedQL(A, &tauh, W) } else { var Twork, Wrk cmat.FloatMatrix // block reflector T in first LB*LB elements in workspace // the rest, n(A)-LB*LB, is workspace for intermediate matrix operands Twork.SetBuf(conf.LB, conf.LB, -1, W.Data()) Wrk.SetBuf(n(A)-conf.LB, conf.LB, -1, W.Data()[Twork.Len():]) blockedQL(A, &tauh, &Twork, &Wrk, lb, conf) } return err }
func EigenSym(D, A, W *cmat.FloatMatrix, bits int, confs ...*gomas.Config) (err *gomas.Error) { var sD, sE, E, tau, Wred cmat.FloatMatrix var vv *cmat.FloatMatrix err = nil vv = nil conf := gomas.CurrentConf(confs...) if m(A) != n(A) || D.Len() != m(A) { err = gomas.NewError(gomas.ESIZE, "EigenSym") return } if bits&gomas.WANTV != 0 && W.Len() < 3*n(A) { err = gomas.NewError(gomas.EWORK, "EigenSym") return } if bits&(gomas.LOWER|gomas.UPPER) == 0 { bits = bits | gomas.LOWER } ioff := 1 if bits&gomas.LOWER != 0 { ioff = -1 } E.SetBuf(n(A)-1, 1, n(A)-1, W.Data()) tau.SetBuf(n(A), 1, n(A), W.Data()[n(A)-1:]) wrl := W.Len() - 2*n(A) - 1 Wred.SetBuf(wrl, 1, wrl, W.Data()[2*n(A)-1:]) // reduce to tridiagonal if err = TRDReduce(A, &tau, &Wred, bits, conf); err != nil { err.Update("EigenSym") return } sD.Diag(A) sE.Diag(A, ioff) blasd.Copy(D, &sD) blasd.Copy(&E, &sE) if bits&gomas.WANTV != 0 { if err = TRDBuild(A, &tau, &Wred, n(A), bits, conf); err != nil { err.Update("EigenSym") return } vv = A } // resize workspace wrl = W.Len() - n(A) - 1 Wred.SetBuf(wrl, 1, wrl, W.Data()[n(A)-1:]) if err = TRDEigen(D, &E, vv, &Wred, bits, conf); err != nil { err.Update("EigenSym") return } return }
// Computes eigenvectors corresponding the updated eigenvalues and rank-one update vector. // The matrix Qd holds precomputed deltas as returned by TRDSecularSolveAll(). If Qd is nil or // Qd same as the matrix Q then computation is in-place and Q is assumed to hold precomputed // deltas. On exit, Q holds the column eigenvectors. func TRDSecularEigen(Q, v, Qd *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error { if m(Q) != n(Q) || (Qd != nil && (m(Qd) != n(Qd) || m(Qd) != m(Q))) { return gomas.NewError(gomas.ESIZE, "TRDSecularEigen") } if m(Q) != v.Len() { return gomas.NewError(gomas.ESIZE, "TRDSecularEigen") } if Qd == nil || Qd == Q { trdsecEigenBuildInplace(Q, v) } else { trdsecEigenBuild(Q, v, Qd) } return nil }
func Swap(X, Y *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error { xr, xc := X.Size() yr, yc := Y.Size() if xr != 1 && xc != 1 { return gomas.NewError(gomas.ENEED_VECTOR, "Swap") } if yr != 1 && yc != 1 { return gomas.NewError(gomas.ENEED_VECTOR, "Swap") } if X.Len() != Y.Len() { return gomas.NewError(gomas.ESIZE, "Swap") } vswap(X, Y, X.Len()) return nil }
/* * Reduce general matrix A to upper Hessenberg form H by similiarity * transformation H = Q.T*A*Q. * * Arguments: * A On entry, the general matrix A. On exit, the elements on and * above the first subdiagonal contain the reduced matrix H. * The elements below the first subdiagonal with the vector tau * represent the ortogonal matrix A as product of elementary reflectors. * * tau On exit, the scalar factors of the elementary reflectors. * * W Workspace, as defined by HessReduceWork() * * conf The blocking configration. * * HessReduce is compatible with lapack.DGEHRD. */ func HessReduce(A, tau, W *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) wmin := m(A) wopt := HessReduceWork(A, conf) wsz := W.Len() if wsz < wmin { return gomas.NewError(gomas.EWORK, "ReduceHess", wmin) } // use blocked version if workspace big enough for blocksize 4 lb := conf.LB if wsz < wopt { lb = estimateLB(A, wsz, wsHess) } if lb == 0 || n(A) <= lb { unblkHessGQvdG(A, tau, W, 0) } else { // blocked version var W0 cmat.FloatMatrix // shape workspace for blocked algorithm W0.SetBuf(m(A)+lb, lb, m(A)+lb, W.Data()) blkHessGQvdG(A, tau, &W0, lb, conf) } return err }
/* * Reduce a general M-by-N matrix A to upper or lower bidiagonal form B * by an ortogonal transformation A = Q*B*P.T, B = Q.T*A*P * * * Arguments * A On entry, the real M-by-N matrix. On exit the upper/lower * bidiagonal matrix and ortogonal matrices Q and P. * * tauq Scalar factors for elementary reflector forming the * ortogonal matrix Q. * * taup Scalar factors for elementary reflector forming the * ortogonal matrix P. * * W Workspace needed for reduction. * * conf Current blocking configuration. Optional. * * * Details * * Matrices Q and P are products of elementary reflectors H(k) and G(k) * * If M > N: * Q = H(1)*H(2)*...*H(N) and P = G(1)*G(2)*...*G(N-1) * * where H(k) = 1 - tauq*u*u.T and G(k) = 1 - taup*v*v.T * * Elementary reflector H(k) are stored on columns of A below the diagonal with * implicit unit value on diagonal entry. Vector TAUQ holds corresponding scalar * factors. Reflector G(k) are stored on rows of A right of first superdiagonal * with implicit unit value on superdiagonal. Corresponding scalar factors are * stored on vector TAUP. * * If M < N: * Q = H(1)*H(2)*...*H(N-1) and P = G(1)*G(2)*...*G(N) * * where H(k) = 1 - tauq*u*u.T and G(k) = 1 - taup*v*v.T * * Elementary reflector H(k) are stored on columns of A below the first sub diagonal * with implicit unit value on sub diagonal entry. Vector TAUQ holds corresponding * scalar factors. Reflector G(k) are sotre on rows of A right of diagonal with * implicit unit value on superdiagonal. Corresponding scalar factors are stored * on vector TAUP. * * Contents of matrix A after reductions are as follows. * * M = 6 and N = 5: M = 5 and N = 6: * * ( d e v1 v1 v1 ) ( d v1 v1 v1 v1 v1 ) * ( u1 d e v2 v2 ) ( e d v2 v2 v2 v2 ) * ( u1 u2 d e v3 ) ( u1 e d v3 v3 v3 ) * ( u1 u2 u3 d e ) ( u1 u2 e d v4 v4 ) * ( u1 u2 u3 u4 d ) ( u1 u2 u3 e d v5 ) * ( u1 u2 u3 u4 u5 ) */ func BDReduce(A, tauq, taup, W *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) _ = conf wmin := wsBired(A, 0) wsz := W.Len() if wsz < wmin { return gomas.NewError(gomas.EWORK, "ReduceBidiag", wmin) } lb := conf.LB wneed := wsBired(A, lb) if wneed > wsz { lb = estimateLB(A, wsz, wsBired) } if m(A) >= n(A) { if lb > 0 && n(A) > lb { blkBidiagLeft(A, tauq, taup, W, lb, conf) } else { unblkReduceBidiagLeft(A, tauq, taup, W) } } else { if lb > 0 && m(A) > lb { blkBidiagRight(A, tauq, taup, W, lb, conf) } else { unblkReduceBidiagRight(A, tauq, taup, W) } } return err }
/* * Compute LDL^T factorization of real symmetric matrix. * * Computes of a real symmetric matrix A using Bunch-Kauffman pivoting method. * The form of factorization is * * A = L*D*L.T or A = U*D*U.T * * where L (or U) is product of permutation and unit lower (or upper) triangular matrix * and D is block diagonal symmetric matrix with 1x1 and 2x2 blocks. * * Arguments * A On entry, the N-by-N symmetric matrix A. If flags bit LOWER (or UPPER) is set then * lower (or upper) triangular matrix and strictly upper (or lower) part is not * accessed. On exit, the block diagonal matrix D and lower (or upper) triangular * product matrix L (or U). * * W Workspace, size as returned by WorksizeBK(). * * ipiv Pivot vector. On exit details of interchanges and the block structure of D. If * ipiv[k] > 0 then D[k,k] is 1x1 and rows and columns k and ipiv[k]-1 were changed. * If ipiv[k] == ipiv[k+1] < 0 then D[k,k] is 2x2. If A is lower then rows and * columns k+1 and ipiv[k]-1 were changed. And if A is upper then rows and columns * k and ipvk[k]-1 were changed. * * flags Indicator bits, LOWER or UPPER. * * confs Optional blocking configuration. If not provided then default blocking * as returned by DefaultConf() is used. * * Unblocked algorithm is used if blocking configuration LB is zero or if N < LB. * * Compatible with lapack.SYTRF. */ func BKFactor(A, W *cmat.FloatMatrix, ipiv Pivots, flags int, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) for k, _ := range ipiv { ipiv[k] = 0 } wsz := BKFactorWork(A, conf) if W.Len() < wsz { return gomas.NewError(gomas.EWORK, "DecomposeBK", wsz) } var Wrk cmat.FloatMatrix if n(A) < conf.LB || conf.LB == 0 { // make workspace rows(A)*2 matrix Wrk.SetBuf(m(A), 2, m(A), W.Data()) if flags&gomas.LOWER != 0 { err, _ = unblkDecompBKLower(A, &Wrk, ipiv, conf) } else if flags&gomas.UPPER != 0 { err, _ = unblkDecompBKUpper(A, &Wrk, ipiv, conf) } } else { // make workspace rows(A)*(LB+1) matrix Wrk.SetBuf(m(A), conf.LB+1, m(A), W.Data()) if flags&gomas.LOWER != 0 { err = blkDecompBKLower(A, &Wrk, &ipiv, conf) } else if flags&gomas.UPPER != 0 { err = blkDecompBKUpper(A, &Wrk, &ipiv, conf) } } return err }
func MVSolveTrm(X, A *cmat.FloatMatrix, alpha float64, bits int, confs ...*gomas.Config) *gomas.Error { ar, ac := A.Size() xr, xc := X.Size() if ar*ac == 0 { return nil } if xr != 1 && xc != 1 { return gomas.NewError(gomas.ENEED_VECTOR, "MVSolveTrm") } nx := X.Len() if ac != nx || ar != ac { return gomas.NewError(gomas.ESIZE, "MVSolveTrm") } trsv(X, A, alpha, bits, nx) return nil }
func MVUpdateSym(A, X *cmat.FloatMatrix, alpha float64, bits int, confs ...*gomas.Config) *gomas.Error { ar, ac := A.Size() xr, xc := X.Size() if ar*ac == 0 { return nil } if xr != 1 && xc != 1 { return gomas.NewError(gomas.ENEED_VECTOR, "MVUpdateSym") } nx := X.Len() if ac != nx || ar != ac { return gomas.NewError(gomas.ESIZE, "MVUpdateSym") } updtrmv(A, X, X, alpha, bits, nx, nx) return nil }
/* * Multiply and replace C with Q*C or Q.T*C where Q is a real orthogonal matrix * defined as the product of k elementary reflectors. * * Q = H(0) H(1) . . . H(K-1) * * as returned by QRFactor(). * * Arguments: * C On entry, the M-by-N matrix C or if flag bit RIGHT is set then N-by-M matrix * On exit C is overwritten by Q*C or Q.T*C. If bit RIGHT is set then C is * overwritten by C*Q or C*Q.T * * A QR factorization as returned by QRFactor() where the lower trapezoidal * part holds the elementary reflectors. * * tau The scalar factors of the elementary reflectors. * * W Workspace matrix, required size is returned by WorksizeMultQ(). * * flags Indicators. Valid indicators LEFT, RIGHT, TRANS * * conf Blocking configuration. Field LB defines block size. If it is zero * unblocked invocation is assumed. Actual blocking size is adjusted * to available workspace size and minimum of configured block size and * block size implied by workspace is used. * * Compatible with lapack.DORMQR */ func QRMult(C, A, tau, W *cmat.FloatMatrix, flags int, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) // default to multiply from left if side not defined if flags&(gomas.LEFT|gomas.RIGHT) == 0 { flags = flags | gomas.LEFT } // n(A) is number of elementary reflectors defining the Q matrix ok := false wsizer := wsMultQLeft switch flags & gomas.RIGHT { case gomas.RIGHT: ok = n(C) == m(A) wsizer = wsMultQRight default: ok = m(C) == m(A) } if !ok { return gomas.NewError(gomas.ESIZE, "QRMult") } // minimum workspace size wsz := wsizer(C, 0) if W == nil || W.Len() < wsz { return gomas.NewError(gomas.EWORK, "QRMult", wsz) } // estimate blocking factor for current workspace lb := estimateLB(C, W.Len(), wsizer) lb = imin(lb, conf.LB) if lb == 0 || n(A) <= lb { if flags&gomas.RIGHT != 0 { unblockedMultQRight(C, A, tau, W, flags) } else { unblockedMultQLeft(C, A, tau, W, flags) } } else { if flags&gomas.RIGHT != 0 { blockedMultQRight(C, A, tau, W, flags, lb, conf) } else { blockedMultQLeft(C, A, tau, W, flags, lb, conf) } } return err }
/* * Build block reflector T from Householder elementary reflectors stored in TriLU(A) * and scalar factors in tau. * * Q = I - Y*T*Y.T; Householder H = I - tau*v*v.T * * T = | T z | z = -tau*T*Y.T*v * | 0 c | c = tau * * Compatible with lapack.DLAFRT */ func QRReflector(T, A, tau *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error { if n(T) < n(A) || m(T) < n(A) { return gomas.NewError(gomas.ESIZE, "QRReflector") } unblkQRBlockReflector(T, A, tau) return nil }
func Copy(A, B *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error { ar, ac := A.Size() br, bc := B.Size() avec := ar == 1 || ac == 1 bvec := br == 1 || bc == 1 if avec && bvec { if A.Len() < B.Len() { return gomas.NewError(gomas.ESIZE, "Copy") } vcopy(A, B, A.Len()) return nil } if ar != br || ac != bc { return gomas.NewError(gomas.ESIZE, "Copy") } mcopy(A, B, ar, ac) return nil }
func Transpose(A, B *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error { ar, ac := A.Size() br, bc := B.Size() if ar != bc || ac != br { return gomas.NewError(gomas.ESIZE, "Transpose") } mtranspose(A, B, br, bc) return nil }
/* * Build block reflector T from Householder elementary reflectors stored in A * and scalar factors in tau. * * Q = I - Y*T*Y.T; Householder H = I - tau*v*v.T * * T = | T 0 | z = -tau*T*Yt*v * | z c | c = tau * * Compatible with lapack.DLAFRT */ func RQReflector(T, A, tau *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error { if n(T) < m(A) || m(T) < m(A) { return gomas.NewError(gomas.ESIZE, "BuildLQT") } unblkBlockReflectorRQ(T, A, tau) return nil }
func Axpby(Y, X *cmat.FloatMatrix, alpha, beta float64, confs ...*gomas.Config) *gomas.Error { if X.Len() == 0 || Y.Len() == 0 { return nil } xr, xc := X.Size() if xr != 1 && xc != 1 { return gomas.NewError(gomas.ENEED_VECTOR, "Axpby") } yr, yc := Y.Size() if yr != 1 && yc != 1 { return gomas.NewError(gomas.ENEED_VECTOR, "Axpby") } nx := X.Len() if nx != Y.Len() { return gomas.NewError(gomas.ESIZE, "Axpby") } axpby(Y, X, alpha, beta, nx) return nil }
/* * Triangular matrix multiplication. */ func MultTrm(B, A *cmat.FloatMatrix, alpha float64, bits int, confs ...*gomas.Config) *gomas.Error { conf := gomas.DefaultConf() if len(confs) > 0 { conf = confs[0] } if B.Len() == 0 || A.Len() == 0 { return nil } ok := true ar, ac := A.Size() br, bc := B.Size() P := ac E := bc switch { case bits&gomas.RIGHT != 0: ok = bc == ar && ar == ac E = br case bits&gomas.LEFT != 0: fallthrough default: ok = ac == br && ar == ac } if !ok { return gomas.NewError(gomas.ESIZE, "MultTrm") } // single threaded if conf.NProc == 1 || conf.WB <= 0 || E < conf.WB/2 { trmm(B, A, alpha, bits, P, 0, E, conf) return nil } // parallelized wait := make(chan int, 4) _, nN := blocking(0, E, conf.WB/2) nT := 0 for j := 0; j < nN; j++ { jS := blockIndex(j, nN, conf.WB/2, E) jL := blockIndex(j+1, nN, conf.WB/2, E) task := func(q chan int) { trmm(B, A, alpha, bits, P, jS, jL, conf) q <- 1 } conf.Sched.Schedule(gomas.NewTask(task, wait)) nT += 1 } for nT > 0 { <-wait nT -= 1 } return nil }
/* * Build block reflector for QL factorized matrix. */ func QLReflector(T, A, tau *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error { var tauh cmat.FloatMatrix if n(T) < n(A) || m(T) < n(A) { return gomas.NewError(gomas.ESIZE, "QLReflector") } tauh.SubMatrix(tau, 0, 0, imin(m(A), n(A)), 1) unblkQLBlockReflector(T, A, &tauh) return nil }
func Update2Sym(Cc, A, B *cmat.FloatMatrix, alpha, beta float64, bits int, confs ...*gomas.Config) *gomas.Error { conf := gomas.DefaultConf() if len(confs) > 0 { conf = confs[0] } ok := true cr, cc := Cc.Size() ar, ac := A.Size() br, bc := B.Size() if cr*cc == 0 { return nil } P := ac E := cr if bits&gomas.TRANS != 0 && bits&gomas.TRANSA == 0 { bits |= gomas.TRANSA } switch { case bits&gomas.TRANSA != 0: ok = cr == cc && cr == ac && bc == ac && br == ar P = ar default: ok = cr == cc && cr == ar && br == ar && bc == ac } if !ok { return gomas.NewError(gomas.ESIZE, "Update2Sym") } if conf.NProc == 1 || conf.WB <= 0 || E <= conf.WB { syr2k(Cc, A, B, alpha, beta, bits, P, 0, E, conf) return nil } // parallelized wait := make(chan int, 4) _, nN := blocking(0, E, conf.WB) nT := 0 for j := 0; j < nN; j++ { jS := blockIndex(j, nN, conf.WB, E) jE := blockIndex(j+1, nN, conf.WB, E) task := func(q chan int) { syr2k(Cc, A, B, alpha, beta, bits, P, jS, jE, conf) q <- 1 } conf.Sched.Schedule(gomas.NewTask(task, wait)) nT += 1 } for nT > 0 { <-wait nT -= 1 } return nil }
// Solve secular function arising in symmetric eigenproblems. On exit 'Y' contains new // eigenvalues On entry 'D' holds original eigenvalues and 'Z' is the rank-one update vector. // Parameter 'delta' is workspace needed for computation. func TRDSecularSolve(Y, D, Z, delta *cmat.FloatMatrix, rho float64, confs ...*gomas.Config) (err *gomas.Error) { var lmbda float64 var e, ei int ei = 0 err = nil if delta.Len() != D.Len() || Y.Len() != D.Len() || Z.Len() != D.Len() { err = gomas.NewError(gomas.ESIZE, "TRDSecularSolve") return } for i := 0; i < D.Len(); i++ { lmbda, e = trdsecRoot(D, Z, delta, i, rho) if e < 0 && ei == 0 { ei = -(i + 1) } Y.SetAt(i, lmbda) } if ei != 0 { err = gomas.NewError(gomas.ECONVERGE, "TRDSecularSolve", ei) } return }
func MVMultTrm(X, A *cmat.FloatMatrix, alpha float64, bits int, confs ...*gomas.Config) *gomas.Error { ar, ac := A.Size() xr, xc := X.Size() if ar*ac == 0 { return nil } if xr != 1 && xc != 1 { return gomas.NewError(gomas.ENEED_VECTOR, "MVMultTrm") } nx := X.Len() if ac != nx || ar != ac { return gomas.NewError(gomas.ESIZE, "MVMultTrm") } if ar == 1 { vscal(X, alpha*A.Get(0, 0), nx) } else { trmv(X, A, alpha, bits, nx) } return nil }
func MVUpdateTrm(A, X, Y *cmat.FloatMatrix, alpha float64, bits int, confs ...*gomas.Config) *gomas.Error { ar, ac := A.Size() yr, yc := Y.Size() xr, xc := X.Size() if ar*ac == 0 { return nil } if yr != 1 && yc != 1 { return gomas.NewError(gomas.ENEED_VECTOR, "MVUpdateTrm") } if xr != 1 && xc != 1 { return gomas.NewError(gomas.ENEED_VECTOR, "MVUpdateTrm") } nx := X.Len() ny := Y.Len() if ac != ny || ar != nx { return gomas.NewError(gomas.ESIZE, "MVUpdateTrm") } updtrmv(A, X, Y, alpha, bits, ny, nx) return nil }