/* * Reduce a general M-by-N matrix A to upper or lower bidiagonal form B * by an ortogonal transformation A = Q*B*P.T, B = Q.T*A*P * * * Arguments * A On entry, the real M-by-N matrix. On exit the upper/lower * bidiagonal matrix and ortogonal matrices Q and P. * * tauq Scalar factors for elementary reflector forming the * ortogonal matrix Q. * * taup Scalar factors for elementary reflector forming the * ortogonal matrix P. * * W Workspace needed for reduction. * * conf Current blocking configuration. Optional. * * * Details * * Matrices Q and P are products of elementary reflectors H(k) and G(k) * * If M > N: * Q = H(1)*H(2)*...*H(N) and P = G(1)*G(2)*...*G(N-1) * * where H(k) = 1 - tauq*u*u.T and G(k) = 1 - taup*v*v.T * * Elementary reflector H(k) are stored on columns of A below the diagonal with * implicit unit value on diagonal entry. Vector TAUQ holds corresponding scalar * factors. Reflector G(k) are stored on rows of A right of first superdiagonal * with implicit unit value on superdiagonal. Corresponding scalar factors are * stored on vector TAUP. * * If M < N: * Q = H(1)*H(2)*...*H(N-1) and P = G(1)*G(2)*...*G(N) * * where H(k) = 1 - tauq*u*u.T and G(k) = 1 - taup*v*v.T * * Elementary reflector H(k) are stored on columns of A below the first sub diagonal * with implicit unit value on sub diagonal entry. Vector TAUQ holds corresponding * scalar factors. Reflector G(k) are sotre on rows of A right of diagonal with * implicit unit value on superdiagonal. Corresponding scalar factors are stored * on vector TAUP. * * Contents of matrix A after reductions are as follows. * * M = 6 and N = 5: M = 5 and N = 6: * * ( d e v1 v1 v1 ) ( d v1 v1 v1 v1 v1 ) * ( u1 d e v2 v2 ) ( e d v2 v2 v2 v2 ) * ( u1 u2 d e v3 ) ( u1 e d v3 v3 v3 ) * ( u1 u2 u3 d e ) ( u1 u2 e d v4 v4 ) * ( u1 u2 u3 u4 d ) ( u1 u2 u3 e d v5 ) * ( u1 u2 u3 u4 u5 ) */ func BDReduce(A, tauq, taup, W *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) _ = conf wmin := wsBired(A, 0) wsz := W.Len() if wsz < wmin { return gomas.NewError(gomas.EWORK, "ReduceBidiag", wmin) } lb := conf.LB wneed := wsBired(A, lb) if wneed > wsz { lb = estimateLB(A, wsz, wsBired) } if m(A) >= n(A) { if lb > 0 && n(A) > lb { blkBidiagLeft(A, tauq, taup, W, lb, conf) } else { unblkReduceBidiagLeft(A, tauq, taup, W) } } else { if lb > 0 && m(A) > lb { blkBidiagRight(A, tauq, taup, W, lb, conf) } else { unblkReduceBidiagRight(A, tauq, taup, W) } } return err }
/* * Compute RQ factorization of a M-by-N matrix A: A = R*Q * * Arguments: * A On entry, the M-by-N matrix A, M <= N. On exit, upper triangular matrix R * and the orthogonal matrix Q as product of elementary reflectors. * * tau On exit, the scalar factors of the elementary reflectors. * * W Workspace, M-by-nb matrix used for work space in blocked invocations. * * conf The blocking configuration. If nil then default blocking configuration * is used. Member conf.LB defines blocking size of blocked algorithms. * If it is zero then unblocked algorithm is used. * * Returns: * Error indicator. * * Additional information * * Ortogonal matrix Q is product of elementary reflectors H(k) * * Q = H(0)H(1),...,H(K-1), where K = min(M,N) * * Elementary reflector H(k) is stored on row k of A right of the diagonal with * implicit unit value on diagonal entry. The vector TAU holds scalar factors of * the elementary reflectors. * * Contents of matrix A after factorization is as follow: * * ( v0 v0 r r r r ) M=4, N=6 * ( v1 v1 v1 r r r ) * ( v2 v2 v2 v2 r r ) * ( v3 v3 v3 v3 v3 r ) * * where l is element of L, vk is element of H(k). * * RQFactor is compatible with lapack.DGERQF */ func RQFactor(A, tau, W *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) // must have: M <= N if m(A) > n(A) { return gomas.NewError(gomas.ESIZE, "RQFactor") } wsmin := wsLQ(A, 0) if W == nil || W.Len() < wsmin { return gomas.NewError(gomas.EWORK, "RQFactor", wsmin) } lb := estimateLB(A, W.Len(), wsRQ) lb = imin(lb, conf.LB) if lb == 0 || m(A) <= lb { unblockedRQ(A, tau, W) } else { var Twork, Wrk cmat.FloatMatrix // block reflector T in first LB*LB elements in workspace // the rest, m(A)-LB*LB, is workspace for intermediate matrix operands Twork.SetBuf(lb, lb, lb, W.Data()) Wrk.SetBuf(m(A)-lb, lb, m(A)-lb, W.Data()[Twork.Len():]) blockedRQ(A, tau, &Twork, &Wrk, lb, conf) } return err }
/* * Generate the M by N matrix Q with orthogonal rows which * are defined as the first M rows of the product of K first elementary * reflectors. * * Arguments * A On entry, the elementary reflectors as returned by LQFactor(). * stored right of diagonal of the M by N matrix A. * On exit, the orthogonal matrix Q * * tau Scalar coefficents of elementary reflectors * * W Workspace * * K The number of elementary reflector whose product define the matrix Q * * conf Optional blocking configuration. * * Compatible with lapackd.ORGLQ. */ func LQBuild(A, tau, W *cmat.FloatMatrix, K int, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) if K <= 0 || K > n(A) { return gomas.NewError(gomas.EVALUE, "LQBuild", K) } wsz := wsBuildLQ(A, 0) if W == nil || W.Len() < wsz { return gomas.NewError(gomas.EWORK, "LQBuild", wsz) } // adjust blocking factor for workspace size lb := estimateLB(A, W.Len(), wsBuildLQ) //lb = imin(lb, conf.LB) lb = conf.LB if lb == 0 || m(A) <= lb { unblkBuildLQ(A, tau, W, m(A)-K, n(A)-K, true) } else { var Twork, Wrk cmat.FloatMatrix Twork.SetBuf(lb, lb, lb, W.Data()) Wrk.SetBuf(m(A)-lb, lb, m(A)-lb, W.Data()[Twork.Len():]) blkBuildLQ(A, tau, &Twork, &Wrk, K, lb, conf) } return err }
/* * Reduce general matrix A to upper Hessenberg form H by similiarity * transformation H = Q.T*A*Q. * * Arguments: * A On entry, the general matrix A. On exit, the elements on and * above the first subdiagonal contain the reduced matrix H. * The elements below the first subdiagonal with the vector tau * represent the ortogonal matrix A as product of elementary reflectors. * * tau On exit, the scalar factors of the elementary reflectors. * * W Workspace, as defined by HessReduceWork() * * conf The blocking configration. * * HessReduce is compatible with lapack.DGEHRD. */ func HessReduce(A, tau, W *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) wmin := m(A) wopt := HessReduceWork(A, conf) wsz := W.Len() if wsz < wmin { return gomas.NewError(gomas.EWORK, "ReduceHess", wmin) } // use blocked version if workspace big enough for blocksize 4 lb := conf.LB if wsz < wopt { lb = estimateLB(A, wsz, wsHess) } if lb == 0 || n(A) <= lb { unblkHessGQvdG(A, tau, W, 0) } else { // blocked version var W0 cmat.FloatMatrix // shape workspace for blocked algorithm W0.SetBuf(m(A)+lb, lb, m(A)+lb, W.Data()) blkHessGQvdG(A, tau, &W0, lb, conf) } return err }
/* * Multiply and replace C with Q*C or Q.T*C where Q is a real orthogonal matrix * defined as the product of k elementary reflectors and block reflector T * * Q = H(1) H(2) . . . H(k) * * as returned by DecomposeQRT(). * * Arguments: * C On entry, the M-by-N matrix C. On exit C is overwritten by Q*C or Q.T*C. * * A QR factorization as returned by QRTFactor() where the lower trapezoidal * part holds the elementary reflectors. * * T The block reflector computed from elementary reflectors as returned by * DecomposeQRT() or computed from elementary reflectors and scalar coefficients * by BuildT() * * W Workspace, size as returned by QRTMultWork() * * conf Blocking configuration * * flags Indicators. Valid indicators LEFT, RIGHT, TRANS, NOTRANS * * Preconditions: * a. cols(A) == cols(T), * columns A define number of elementary reflector, must match order of block reflector. * b. if conf.LB == 0, cols(T) == rows(T) * unblocked invocation, block reflector T is upper triangular * c. if conf.LB != 0, rows(T) == conf.LB * blocked invocation, T is sequence of triangular block reflectors of order LB * d. if LEFT, rows(C) >= cols(A) && cols(C) >= rows(A) * * e. if RIGHT, cols(C) >= cols(A) && rows(C) >= rows(A) * * Compatible with lapack.DGEMQRT */ func QRTMult(C, A, T, W *cmat.FloatMatrix, flags int, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) wsz := QRTMultWork(C, T, flags, conf) if W == nil || W.Len() < wsz { return gomas.NewError(gomas.EWORK, "QRTMult", wsz) } ok := false switch flags & gomas.RIGHT { case gomas.RIGHT: ok = n(C) >= m(A) default: ok = m(C) >= n(A) } if !ok { return gomas.NewError(gomas.ESIZE, "QRTMult") } var Wrk cmat.FloatMatrix if flags&gomas.RIGHT != 0 { Wrk.SetBuf(m(C), conf.LB, m(C), W.Data()) blockedMultQTRight(C, A, T, &Wrk, flags, conf) } else { Wrk.SetBuf(n(C), conf.LB, n(C), W.Data()) blockedMultQTLeft(C, A, T, &Wrk, flags, conf) } return err }
/* * Solve a system of linear equations A*X = B with general M-by-N * matrix A using the QR factorization computed by DecomposeQRT(). * * If flags&gomas.TRANS != 0: * find the minimum norm solution of an overdetermined system A.T * X = B. * i.e min ||X|| s.t A.T*X = B * * Otherwise: * find the least squares solution of an overdetermined system, i.e., * solve the least squares problem: min || B - A*X ||. * * Arguments: * B On entry, the right hand side N-by-P matrix B. On exit, the solution matrix X. * * A The elements on and above the diagonal contain the min(M,N)-by-N upper * trapezoidal matrix R. The elements below the diagonal with the matrix 'T', * represent the ortogonal matrix Q as product of elementary reflectors. * Matrix A and T are as returned by DecomposeQRT() * * T The block reflector computed from elementary reflectors as returned by * DecomposeQRT() or computed from elementary reflectors and scalar coefficients * by BuildT() * * W Workspace, size as returned by WorkspaceMultQT() * * flags Indicator flag * * conf Blocking configuration * * Compatible with lapack.GELS (the m >= n part) */ func QRTSolve(B, A, T, W *cmat.FloatMatrix, flags int, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil var R, BT cmat.FloatMatrix conf := gomas.CurrentConf(confs...) if flags&gomas.TRANS != 0 { // Solve overdetermined system A.T*X = B // B' = R.-1*B R.SubMatrix(A, 0, 0, n(A), n(A)) BT.SubMatrix(B, 0, 0, n(A), n(B)) err = blasd.SolveTrm(&BT, &R, 1.0, gomas.LEFT|gomas.UPPER|gomas.TRANSA, conf) // Clear bottom part of B BT.SubMatrix(B, n(A), 0) BT.SetFrom(cmat.NewFloatConstSource(0.0)) // X = Q*B' err = QRTMult(B, A, T, W, gomas.LEFT, conf) } else { // solve least square problem min ||A*X - B|| // B' = Q.T*B err = QRTMult(B, A, T, W, gomas.LEFT|gomas.TRANS, conf) if err != nil { return err } // X = R.-1*B' R.SubMatrix(A, 0, 0, n(A), n(A)) BT.SubMatrix(B, 0, 0, n(A), n(B)) err = blasd.SolveTrm(&BT, &R, 1.0, gomas.LEFT|gomas.UPPER, conf) } return err }
/* * Compute LDL^T factorization of real symmetric matrix. * * Computes of a real symmetric matrix A using Bunch-Kauffman pivoting method. * The form of factorization is * * A = L*D*L.T or A = U*D*U.T * * where L (or U) is product of permutation and unit lower (or upper) triangular matrix * and D is block diagonal symmetric matrix with 1x1 and 2x2 blocks. * * Arguments * A On entry, the N-by-N symmetric matrix A. If flags bit LOWER (or UPPER) is set then * lower (or upper) triangular matrix and strictly upper (or lower) part is not * accessed. On exit, the block diagonal matrix D and lower (or upper) triangular * product matrix L (or U). * * W Workspace, size as returned by WorksizeBK(). * * ipiv Pivot vector. On exit details of interchanges and the block structure of D. If * ipiv[k] > 0 then D[k,k] is 1x1 and rows and columns k and ipiv[k]-1 were changed. * If ipiv[k] == ipiv[k+1] < 0 then D[k,k] is 2x2. If A is lower then rows and * columns k+1 and ipiv[k]-1 were changed. And if A is upper then rows and columns * k and ipvk[k]-1 were changed. * * flags Indicator bits, LOWER or UPPER. * * confs Optional blocking configuration. If not provided then default blocking * as returned by DefaultConf() is used. * * Unblocked algorithm is used if blocking configuration LB is zero or if N < LB. * * Compatible with lapack.SYTRF. */ func BKFactor(A, W *cmat.FloatMatrix, ipiv Pivots, flags int, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) for k, _ := range ipiv { ipiv[k] = 0 } wsz := BKFactorWork(A, conf) if W.Len() < wsz { return gomas.NewError(gomas.EWORK, "DecomposeBK", wsz) } var Wrk cmat.FloatMatrix if n(A) < conf.LB || conf.LB == 0 { // make workspace rows(A)*2 matrix Wrk.SetBuf(m(A), 2, m(A), W.Data()) if flags&gomas.LOWER != 0 { err, _ = unblkDecompBKLower(A, &Wrk, ipiv, conf) } else if flags&gomas.UPPER != 0 { err, _ = unblkDecompBKUpper(A, &Wrk, ipiv, conf) } } else { // make workspace rows(A)*(LB+1) matrix Wrk.SetBuf(m(A), conf.LB+1, m(A), W.Data()) if flags&gomas.LOWER != 0 { err = blkDecompBKLower(A, &Wrk, &ipiv, conf) } else if flags&gomas.UPPER != 0 { err = blkDecompBKUpper(A, &Wrk, &ipiv, conf) } } return err }
/* * Compute * B = B*diag(D).-1 flags & RIGHT == true * B = diag(D).-1*B flags & LEFT == true * * If flags is LEFT (RIGHT) then element-wise divides columns (rows) of B with vector D. * * Arguments: * B M-by-N matrix if flags&RIGHT == true or N-by-M matrix if flags&LEFT == true * * D N element column or row vector or N-by-N matrix * * flags Indicator bits, LEFT or RIGHT */ func SolveDiag(B, D *cmat.FloatMatrix, flags int, confs ...*gomas.Config) *gomas.Error { var c, d0 cmat.FloatMatrix var d *cmat.FloatMatrix conf := gomas.CurrentConf(confs...) d = D if !D.IsVector() { d0.Diag(D) d = &d0 } dn := d0.Len() br, bc := B.Size() switch flags & (gomas.LEFT | gomas.RIGHT) { case gomas.LEFT: if br != dn { return gomas.NewError(gomas.ESIZE, "SolveDiag") } // scale rows; for k := 0; k < dn; k++ { c.Row(B, k) blasd.InvScale(&c, d.GetAt(k), conf) } case gomas.RIGHT: if bc != dn { return gomas.NewError(gomas.ESIZE, "SolveDiag") } // scale columns for k := 0; k < dn; k++ { c.Column(B, k) blasd.InvScale(&c, d.GetAt(k), conf) } } return nil }
/* * Compute QL factorization of a M-by-N matrix A: A = Q * L. * * Arguments: * A On entry, the M-by-N matrix A, M >= N. On exit, lower triangular matrix L * and the orthogonal matrix Q as product of elementary reflectors. * * tau On exit, the scalar factors of the elemenentary reflectors. * * W Workspace, N-by-nb matrix used for work space in blocked invocations. * * conf The blocking configuration. If nil then default blocking configuration * is used. Member conf.LB defines blocking size of blocked algorithms. * If it is zero then unblocked algorithm is used. * * Returns: * Error indicator. * * Additional information * * Ortogonal matrix Q is product of elementary reflectors H(k) * * Q = H(K-1)...H(1)H(0), where K = min(M,N) * * Elementary reflector H(k) is stored on column k of A above the diagonal with * implicit unit value on diagonal entry. The vector TAU holds scalar factors * of the elementary reflectors. * * Contents of matrix A after factorization is as follow: * * ( v0 v1 v2 v3 ) for M=6, N=4 * ( v0 v1 v2 v3 ) * ( l v1 v2 v3 ) * ( l l v2 v3 ) * ( l l l v3 ) * ( l l l l ) * * where l is element of L, vk is element of H(k). * * DecomposeQL is compatible with lapack.DGEQLF */ func QLFactor(A, tau, W *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil var tauh cmat.FloatMatrix conf := gomas.CurrentConf(confs...) if m(A) < n(A) { return gomas.NewError(gomas.ESIZE, "QLFactor") } wsmin := wsQL(A, 0) if W == nil || W.Len() < wsmin { return gomas.NewError(gomas.EWORK, "QLFactor", wsmin) } if tau.Len() < n(A) { return gomas.NewError(gomas.ESIZE, "QLFactor") } tauh.SubMatrix(tau, 0, 0, n(A), 1) lb := estimateLB(A, W.Len(), wsQL) lb = imin(lb, conf.LB) if lb == 0 || n(A) <= lb { unblockedQL(A, &tauh, W) } else { var Twork, Wrk cmat.FloatMatrix // block reflector T in first LB*LB elements in workspace // the rest, n(A)-LB*LB, is workspace for intermediate matrix operands Twork.SetBuf(conf.LB, conf.LB, -1, W.Data()) Wrk.SetBuf(n(A)-conf.LB, conf.LB, -1, W.Data()[Twork.Len():]) blockedQL(A, &tauh, &Twork, &Wrk, lb, conf) } return err }
/* * Compute QR factorization of a M-by-N matrix A using compact WY transformation: A = Q * R, * where Q = I - Y*T*Y.T, T is block reflector and Y holds elementary reflectors as lower * trapezoidal matrix saved below diagonal elements of the matrix A. * * Arguments: * A On entry, the M-by-N matrix A. On exit, the elements on and above * the diagonal contain the min(M,N)-by-N upper trapezoidal matrix R. * The elements below the diagonal with the matrix 'T', represent * the ortogonal matrix Q as product of elementary reflectors. * * T On exit, the K block reflectors which, together with trilu(A) represent * the ortogonal matrix Q as Q = I - Y*T*Y.T where Y = trilu(A). * K is ceiling(N/LB) where LB is blocking size from used blocking configuration. * The matrix T is LB*N augmented matrix of K block reflectors, * T = [T(0) T(1) .. T(K-1)]. Block reflector T(n) is LB*LB matrix, expect * reflector T(K-1) that is IB*IB matrix where IB = min(LB, K % LB) * * W Workspace, required size returned by QRTFactorWork(). * * conf Optional blocking configuration. If not provided then default configuration * is used. * * Returns: * Error indicator. * * QRTFactor is compatible with lapack.DGEQRT */ func QRTFactor(A, T, W *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) ok := false rsize := 0 if m(A) < n(A) { return gomas.NewError(gomas.ESIZE, "QRTFactor") } wsz := QRTFactorWork(A, conf) if W == nil || W.Len() < wsz { return gomas.NewError(gomas.EWORK, "QRTFactor", wsz) } tr, tc := T.Size() if conf.LB == 0 || conf.LB > n(A) { ok = tr == tc && tr == n(A) rsize = n(A) * n(A) } else { ok = tr == conf.LB && tc == n(A) rsize = conf.LB * n(A) } if !ok { return gomas.NewError(gomas.ESMALL, "QRTFactor", rsize) } if conf.LB == 0 || n(A) <= conf.LB { err = unblockedQRT(A, T, W) } else { Wrk := cmat.MakeMatrix(n(A), conf.LB, W.Data()) err = blockedQRT(A, T, Wrk, conf) } return err }
/* * Calculate required workspace to decompose matrix A using compact WY transformation. * If blocking configuration is not provided then default configuation will be used. * * Returns size of workspace as number of elements. */ func QRTFactorWork(A *cmat.FloatMatrix, confs ...*gomas.Config) int { conf := gomas.CurrentConf(confs...) sz := n(A) if conf.LB > 0 && n(A) > conf.LB { sz *= conf.LB } return sz }
func EigenSym(D, A, W *cmat.FloatMatrix, bits int, confs ...*gomas.Config) (err *gomas.Error) { var sD, sE, E, tau, Wred cmat.FloatMatrix var vv *cmat.FloatMatrix err = nil vv = nil conf := gomas.CurrentConf(confs...) if m(A) != n(A) || D.Len() != m(A) { err = gomas.NewError(gomas.ESIZE, "EigenSym") return } if bits&gomas.WANTV != 0 && W.Len() < 3*n(A) { err = gomas.NewError(gomas.EWORK, "EigenSym") return } if bits&(gomas.LOWER|gomas.UPPER) == 0 { bits = bits | gomas.LOWER } ioff := 1 if bits&gomas.LOWER != 0 { ioff = -1 } E.SetBuf(n(A)-1, 1, n(A)-1, W.Data()) tau.SetBuf(n(A), 1, n(A), W.Data()[n(A)-1:]) wrl := W.Len() - 2*n(A) - 1 Wred.SetBuf(wrl, 1, wrl, W.Data()[2*n(A)-1:]) // reduce to tridiagonal if err = TRDReduce(A, &tau, &Wred, bits, conf); err != nil { err.Update("EigenSym") return } sD.Diag(A) sE.Diag(A, ioff) blasd.Copy(D, &sD) blasd.Copy(&E, &sE) if bits&gomas.WANTV != 0 { if err = TRDBuild(A, &tau, &Wred, n(A), bits, conf); err != nil { err.Update("EigenSym") return } vv = A } // resize workspace wrl = W.Len() - n(A) - 1 Wred.SetBuf(wrl, 1, wrl, W.Data()[n(A)-1:]) if err = TRDEigen(D, &E, vv, &Wred, bits, conf); err != nil { err.Update("EigenSym") return } return }
func BDMultWork(A *cmat.FloatMatrix, confs ...*gomas.Config) int { conf := gomas.CurrentConf(confs...) nl := wsMultBidiagLeft(A, conf.LB) nr := wsMultBidiagRight(A, conf.LB) if nl > nr { return nl } return nr }
/* * Calculate workspace size needed to compute C*Q or Q*C with QR decomposition * computed with DecomposeQR(). */ func QRMultWork(C *cmat.FloatMatrix, bits int, confs ...*gomas.Config) (sz int) { conf := gomas.CurrentConf(confs...) switch bits & gomas.RIGHT { case gomas.RIGHT: sz = wsMultQRight(C, conf.LB) default: sz = wsMultQLeft(C, conf.LB) } return }
func main() { flag.Parse() M := N + N/10 conf := gomas.CurrentConf() A := cmat.NewMatrix(M, N) A0 := cmat.NewCopy(A) tau := cmat.NewMatrix(N, 1) W := lapackd.Workspace(lapackd.QRFactorWork(A, conf)) zeromean := cmat.NewFloatNormSource() A.SetFrom(zeromean) cumtime := 0.0 mintime := 0.0 maxtime := 0.0 for i := 0; i < count; i++ { flushCache() t1 := time.Now() // ---------------------------------------------- lapackd.QRFactor(A, tau, W, conf) // ---------------------------------------------- t2 := time.Now() tm := t2.Sub(t1) if mintime == 0.0 || tm.Seconds() < mintime { mintime = tm.Seconds() } if maxtime == 0.0 || tm.Seconds() > maxtime { maxtime = tm.Seconds() } cumtime += tm.Seconds() if verbose { fmt.Printf("%3d %12.4f msec, %9.4f gflops\n", i, 1e+3*tm.Seconds(), gflops(M, N, tm.Seconds())) } blasd.Copy(A, A0) } cumtime /= float64(count) minflops := gflops(M, N, maxtime) avgflops := gflops(M, N, cumtime) maxflops := gflops(M, N, mintime) fmt.Printf("%5d %5d %3d %9.4f %9.4f %9.4f Gflops\n", M, N, conf.LB, minflops, avgflops, maxflops) }
/* * Calculate workspace size needed to compute C*Q or Q*C with QR decomposition * computed with DecomposeQRT(). */ func QRTMultWork(C, T *cmat.FloatMatrix, bits int, confs ...*gomas.Config) (sz int) { conf := gomas.CurrentConf(confs...) switch bits & gomas.RIGHT { case gomas.RIGHT: sz = m(C) default: sz = n(C) } if conf.LB > 0 { // add space for intermediate reflector and account // for blocking factor sz = (sz + conf.LB) * conf.LB } return }
/* * Multiply and replace C with Q*C or Q.T*C where Q is a real orthogonal matrix * defined as the product of k elementary reflectors. * * Q = H(0) H(1) . . . H(K-1) * * as returned by QRFactor(). * * Arguments: * C On entry, the M-by-N matrix C or if flag bit RIGHT is set then N-by-M matrix * On exit C is overwritten by Q*C or Q.T*C. If bit RIGHT is set then C is * overwritten by C*Q or C*Q.T * * A QR factorization as returned by QRFactor() where the lower trapezoidal * part holds the elementary reflectors. * * tau The scalar factors of the elementary reflectors. * * W Workspace matrix, required size is returned by WorksizeMultQ(). * * flags Indicators. Valid indicators LEFT, RIGHT, TRANS * * conf Blocking configuration. Field LB defines block size. If it is zero * unblocked invocation is assumed. Actual blocking size is adjusted * to available workspace size and minimum of configured block size and * block size implied by workspace is used. * * Compatible with lapack.DORMQR */ func QRMult(C, A, tau, W *cmat.FloatMatrix, flags int, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) // default to multiply from left if side not defined if flags&(gomas.LEFT|gomas.RIGHT) == 0 { flags = flags | gomas.LEFT } // n(A) is number of elementary reflectors defining the Q matrix ok := false wsizer := wsMultQLeft switch flags & gomas.RIGHT { case gomas.RIGHT: ok = n(C) == m(A) wsizer = wsMultQRight default: ok = m(C) == m(A) } if !ok { return gomas.NewError(gomas.ESIZE, "QRMult") } // minimum workspace size wsz := wsizer(C, 0) if W == nil || W.Len() < wsz { return gomas.NewError(gomas.EWORK, "QRMult", wsz) } // estimate blocking factor for current workspace lb := estimateLB(C, W.Len(), wsizer) lb = imin(lb, conf.LB) if lb == 0 || n(A) <= lb { if flags&gomas.RIGHT != 0 { unblockedMultQRight(C, A, tau, W, flags) } else { unblockedMultQLeft(C, A, tau, W, flags) } } else { if flags&gomas.RIGHT != 0 { blockedMultQRight(C, A, tau, W, flags, lb, conf) } else { blockedMultQLeft(C, A, tau, W, flags, lb, conf) } } return err }
/* * Solve A*X = B with symmetric real matrix A. * * Solves a system of linear equations A*X = B with a real symmetric matrix A using * the factorization A = U*D*U**T or A = L*D*L**T computed by BKFactor(). * * Arguments * B On entry, right hand side matrix B. On exit, the solution matrix X. * * A Block diagonal matrix D and the multipliers used to compute factor U * (or L) as returned by BKFactor(). * * ipiv Block structure of matrix D and details of interchanges. * * flags Indicator bits, LOWER or UPPER. * * confs Optional blocking configuration. * * Currently only unblocked algorightm implemented. Compatible with lapack.SYTRS. */ func BKSolve(B, A *cmat.FloatMatrix, ipiv Pivots, flags int, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) if n(A) != m(B) { return gomas.NewError(gomas.ESIZE, "SolveBK") } if flags&gomas.LOWER != 0 { // first part: Z = D.-1*(L.-1*B) err = unblkSolveBKLower(B, A, ipiv, 1, conf) // second part: X = L.-T*Z err = unblkSolveBKLower(B, A, ipiv, 2, conf) } else if flags&gomas.UPPER != 0 { // first part: Z = D.-1*(U.-1*B) err = unblkSolveBKUpper(B, A, ipiv, 1, conf) // second part: X = U.-T*Z err = unblkSolveBKUpper(B, A, ipiv, 2, conf) } return err }
/* * Solve a system of linear equations A.T*X = B with general M-by-N * matrix A using the QR factorization computed by LQFactor(). * * If flags&TRANS != 0: * find the minimum norm solution of an overdetermined system A.T * X = B. * i.e min ||X|| s.t A.T*X = B * * Otherwise: * find the least squares solution of an overdetermined system, i.e., * solve the least squares problem: min || B - A*X ||. * * Arguments: * B On entry, the right hand side N-by-P matrix B. On exit, the solution matrix X. * * A The elements on and below the diagonal contain the M-by-min(M,N) lower * trapezoidal matrix L. The elements right of the diagonal with the vector 'tau', * represent the ortogonal matrix Q as product of elementary reflectors. * Matrix A is as returned by LQFactor() * * tau The vector of N scalar coefficients that together with trilu(A) define * the ortogonal matrix Q as Q = H(N)H(N-1)...H(1) * * W Workspace, size required returned WorksizeMultLQ(). * * flags Indicator flags * * conf Optinal blocking configuration. If not given default will be used. Unblocked * invocation is indicated with conf.LB == 0. * * Compatible with lapack.GELS (the m < n part) */ func LQSolve(B, A, tau, W *cmat.FloatMatrix, flags int, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil var L, BL cmat.FloatMatrix conf := gomas.CurrentConf(confs...) wsmin := wsMultLQLeft(B, 0) if W.Len() < wsmin { return gomas.NewError(gomas.EWORK, "SolveLQ", wsmin) } if flags&gomas.TRANS != 0 { // solve: MIN ||A.T*X - B|| // B' = Q.T*B err = LQMult(B, A, tau, W, gomas.LEFT, conf) if err != nil { return err } // X = L.-1*B' L.SubMatrix(A, 0, 0, m(A), m(A)) BL.SubMatrix(B, 0, 0, m(A), n(B)) err = blasd.SolveTrm(&BL, &L, 1.0, gomas.LEFT|gomas.LOWER|gomas.TRANSA, conf) } else { // Solve underdetermined system A*X = B // B' = L.-1*B L.SubMatrix(A, 0, 0, m(A), m(A)) BL.SubMatrix(B, 0, 0, m(A), n(B)) err = blasd.SolveTrm(&BL, &L, 1.0, gomas.LEFT|gomas.LOWER, conf) // Clear bottom part of B BL.SubMatrix(B, m(A), 0) BL.SetFrom(cmat.NewFloatConstSource(0.0)) // X = Q.T*B' err = LQMult(B, A, tau, W, gomas.LEFT|gomas.TRANS, conf) } return err }
/* * Compute an LU factorization of a general M-by-N matrix using * partial pivoting with row interchanges. * * Arguments: * A On entry, the M-by-N matrix to be factored. On exit the factors * L and U from factorization A = P*L*U, the unit diagonal elements * of L are not stored. * * pivots On exit the pivot indices. * * nb Blocking factor for blocked invocations. If bn == 0 or * min(M,N) < nb unblocked algorithm is used. * * Returns: * LU factorization and error indicator. * * Compatible with lapack.DGETRF */ func LUFactor(A *cmat.FloatMatrix, pivots Pivots, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) if pivots == nil { return luFactorNoPiv(A, confs...) } mlen := imin(m(A), n(A)) if len(pivots) < mlen { return gomas.NewError(gomas.ESIZE_PIVOTS, "DecomposeLU") } // clear pivot array for k, _ := range pivots { pivots[k] = 0 } if mlen <= conf.LB || conf.LB == 0 { err = unblockedLUpiv(A, &pivots, 0, conf) } else { err = blockedLUpiv(A, &pivots, conf.LB, conf) } return err }
/* * Reduce symmetric matrix to tridiagonal form by similiarity transformation A = Q*T*Q.T * * Arguments * A On entry, symmetric matrix with elemets stored in upper (lower) triangular * part. On exit, diagonal and first super (sub) diagonals hold matrix T. The upper * (lower) triangular part above (below) first super(sub)diagonal is used to store * orthogonal matrix Q. * * tau Scalar coefficients of elementary reflectors. * * W Workspace * * flags LOWER or UPPER * * confs Optional blocking configuration * * If LOWER, then the matrix Q is represented as product of elementary reflectors * * Q = H(1)H(2)...H(n-1). * * If UPPER, then the matrix Q is represented as product * * Q = H(n-1)...H(2)H(1). * * Each H(k) has form I - tau*v*v.T. * * The contents of A on exit is as follow for N = 5. * * LOWER UPPER * ( d . . . . ) ( d e v1 v2 v3 ) * ( e d . . . ) ( . d e v2 v3 ) * ( v1 e d . . ) ( . . d e v3 ) * ( v1 v2 e d . ) ( . . . d e ) * ( v1 v2 v3 e d ) ( . . . . d ) */ func TRDReduce(A, tau, W *cmat.FloatMatrix, flags int, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil var Y cmat.FloatMatrix // default to lower triangular if uplo not defined if flags&(gomas.LOWER|gomas.UPPER) == 0 { flags = flags | gomas.LOWER } ok := m(A) == n(A) && tau.Len() >= n(A) if !ok { return gomas.NewError(gomas.ESIZE, "ReduceTridiag") } conf := gomas.CurrentConf(confs...) lb := conf.LB wsmin := wsTridiag(A, 0) if W.Len() < wsmin { return gomas.NewError(gomas.EWORK, "ReduceTridiag", wsmin) } if flags&gomas.LOWER != 0 { if lb == 0 || n(A)-1 < lb { unblkReduceTridiagLower(A, tau, W) } else { Y.SetBuf(m(A), lb, m(A), W.Data()) blkReduceTridiagLower(A, tau, &Y, W, lb, conf) } } else { if lb == 0 || n(A)-1 < lb { unblkReduceTridiagUpper(A, tau, W, false) } else { Y.SetBuf(m(A), lb, m(A), W.Data()) blkReduceTridiagUpper(A, tau, &Y, W, lb, conf) } } return err }
/* * Multiply and replace C with Q*C or Q.T*C where Q is a real orthogonal matrix * defined as the product of k elementary reflectors. * * Q = H(0)H(1)...H(K-1) * * as returned by RQFactor(). * * Arguments: * C On entry, the M-by-N matrix C or if flag bit RIGHT is set then * N-by-M matrix. On exit C is overwritten by Q*C or Q.T*C. * If bit RIGHT is set then C is overwritten by C*Q or C*Q.T * * A RQ factorization as returned by RQFactor() where the upper * trapezoidal part holds the elementary reflectors. * * tau The scalar factors of the elementary reflectors. * * W Workspace matrix, required size is returned by RQMultWork(). * * flags Indicators. Valid indicators LEFT, RIGHT, TRANS * * conf Blocking configuration. Field LB defines block sized. If it is zero * unblocked invocation is assumed. * * Compatible with lapack.DORMRQ * * Notes: * m(A) is number of elementary reflectors * n(A) is the order of the Q matrix * * LEFT : m(C) >= n(Q) --> m(A) <= n(C) <= n(A) * RIGHT: n(C) >= m(Q) --> m(A) <= m(C) <= n(A) */ func RQMult(C, A, tau, W *cmat.FloatMatrix, flags int, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil var wsmin int var tauval float64 var Qh, tauh cmat.FloatMatrix conf := gomas.CurrentConf(confs...) // default to multiply from left if side not defined if flags&(gomas.LEFT|gomas.RIGHT) == 0 { flags = flags | gomas.LEFT } // m(A) is number of elementary reflectors, Q is n(A)-by-n(A) matrix ok := false lb := 0 hr, hc := m(A), n(A) switch flags & gomas.RIGHT { case gomas.RIGHT: ok = n(C) <= n(A) && m(A) <= n(C) wsmin = wsMultRQRight(C, 0) hc = n(C) lb = estimateLB(C, W.Len(), wsMultRQRight) default: ok = m(C) <= n(A) && m(A) <= m(C) wsmin = wsMultRQLeft(C, 0) hc = m(C) lb = estimateLB(C, W.Len(), wsMultRQLeft) } if !ok { return gomas.NewError(gomas.ESIZE, "MultRQ") } if W == nil || W.Len() < wsmin { return gomas.NewError(gomas.EWORK, "MultRQ", wsmin) } lb = imin(lb, conf.LB) Qh.SubMatrix(A, 0, 0, hr, hc) tauh.SubMatrix(tau, 0, 0, m(A), 1) if hc == hr { // m-by-m multiplication, H(K) is unit vector // set last tauval to zero, householder functions expect this tauval = tau.Get(hc-1, 0) tau.Set(hc-1, 0, 0.0) } if lb == 0 || m(A) <= lb { if flags&gomas.RIGHT != 0 { unblockedMultRQRight(C, &Qh, &tauh, W, flags) } else { unblockedMultRQLeft(C, &Qh, &tauh, W, flags) } } else { //lb = conf.LB if flags&gomas.RIGHT != 0 { blockedMultRQRight(C, &Qh, &tauh, W, flags, lb, conf) } else { blockedMultRQLeft(C, &Qh, &tauh, W, flags, lb, conf) } } if hc == hr { // restore tau value tau.Set(hc-1, 0, tauval) } return err }
/* * Calculate required workspace to decompose matrix A with current blocking configuration. * If blocking configuration is not provided then default configuation will be used. * * Returns size of workspace as number of elements. */ func RQFactorWork(A *cmat.FloatMatrix, confs ...*gomas.Config) int { conf := gomas.CurrentConf(confs...) return wsRQ(A, conf.LB) }
/* * Compute worksize needed for Hessenberg reduction of matrix A with * a blocking configuration. */ func HessReduceWork(A *cmat.FloatMatrix, confs ...*gomas.Config) int { conf := gomas.CurrentConf(confs...) return wsHess(A, conf.LB) }
/* * \brief Compute SVD of general M-by-N matrix. * * Computes the singular values and, optionally, the left and/or right * singular vectors from the SVD of a The SVD of A has the form * * A = U*S*V.T * * where S is the diagonal matrix with singular values, U is an orthogonal * matrix of left singular vectors, and V.T is an orthogonal matrix of right * singular vectors. * * If left singular vectors are requested by setting bit gomas.WANTU and M >= N, the matrix U is * either M-by-N or M-by-M. If M < N then U is M-by-M. * * If left singular vectors are requested by setting bit gomas.WANTV and M >= N, the matrix V is * either N-by-N. If M < N then V is M-by-N or N-by-N. * */ func SVD(S, U, V, A, W *cmat.FloatMatrix, bits int, confs ...*gomas.Config) (err *gomas.Error) { err = nil tall := m(A) >= n(A) conf := gomas.CurrentConf(confs...) if tall && S.Len() < n(A) { err = gomas.NewError(gomas.ESIZE, "SVD") return } if !tall && S.Len() < m(A) { err = gomas.NewError(gomas.ESIZE, "SVD") return } if bits&gomas.WANTU != 0 { if U == nil { err = gomas.NewError(gomas.EVALUE, "SVD") return } if tall { // if M >= N; U is either M-by-N or M-by-M if m(U) != m(A) || (n(U) != m(A) && n(U) != n(A)) { err = gomas.NewError(gomas.ESIZE, "SVD") return } } else { if m(U) != m(A) { err = gomas.NewError(gomas.ESIZE, "SVD") return } // U is square is M < N if m(U) != n(U) { err = gomas.NewError(gomas.ESIZE, "SVD") return } } } if bits&gomas.WANTV != 0 { if V == nil { err = gomas.NewError(gomas.EVALUE, "SVD") return } if tall { if n(V) != n(A) { err = gomas.NewError(gomas.ESIZE, "SVD") return } // V is square is M >= N if m(V) != n(V) { err = gomas.NewError(gomas.ESIZE, "SVD") return } } else { // if M < N; V is either M-by-N or N-by-N if n(V) != n(A) || (m(V) != m(A) && m(V) != n(A)) { err = gomas.NewError(gomas.ESIZE, "SVD") return } } } if tall { if n(A) <= 2 { err = svdSmall(S, U, V, A, W, bits, conf) } else { err = svdTall(S, U, V, A, W, bits, conf) } } else { if m(A) <= 2 { err = svdSmall(S, U, V, A, W, bits, conf) } else { err = svdWide(S, U, V, A, W, bits, conf) } } if err != nil { err.Update("SVD") } return }
func TRDReduceWork(A *cmat.FloatMatrix, confs ...*gomas.Config) int { conf := gomas.CurrentConf(confs...) return wsTridiag(A, conf.LB) }
func LQBuildWork(A *cmat.FloatMatrix, confs ...*gomas.Config) int { conf := gomas.CurrentConf(confs...) return wsBuildLQ(A, conf.LB) }
func QRSolveWork(C *cmat.FloatMatrix, confs ...*gomas.Config) int { conf := gomas.CurrentConf(confs...) return wsMultQLeft(C, conf.LB) }