/* * Compute * B = B*diag(D).-1 flags & RIGHT == true * B = diag(D).-1*B flags & LEFT == true * * If flags is LEFT (RIGHT) then element-wise divides columns (rows) of B with vector D. * * Arguments: * B M-by-N matrix if flags&RIGHT == true or N-by-M matrix if flags&LEFT == true * * D N element column or row vector or N-by-N matrix * * flags Indicator bits, LEFT or RIGHT */ func SolveDiag(B, D *cmat.FloatMatrix, flags int, confs ...*gomas.Config) *gomas.Error { var c, d0 cmat.FloatMatrix var d *cmat.FloatMatrix conf := gomas.CurrentConf(confs...) d = D if !D.IsVector() { d0.Diag(D) d = &d0 } dn := d0.Len() br, bc := B.Size() switch flags & (gomas.LEFT | gomas.RIGHT) { case gomas.LEFT: if br != dn { return gomas.NewError(gomas.ESIZE, "SolveDiag") } // scale rows; for k := 0; k < dn; k++ { c.Row(B, k) blasd.InvScale(&c, d.GetAt(k), conf) } case gomas.RIGHT: if bc != dn { return gomas.NewError(gomas.ESIZE, "SolveDiag") } // scale columns for k := 0; k < dn; k++ { c.Column(B, k) blasd.InvScale(&c, d.GetAt(k), conf) } } return nil }
/* * Blocked QR decomposition with compact WY transform. * * Compatible with lapack.DGEQRF. */ func blockedQL(A, Tvec, Twork, W *cmat.FloatMatrix, lb int, conf *gomas.Config) { var ATL, ATR, ABL, ABR, AL cmat.FloatMatrix var A00, A01, A10, A11, A22 cmat.FloatMatrix var TT, TB cmat.FloatMatrix var t0, tau, t2 cmat.FloatMatrix var Wrk, w1 cmat.FloatMatrix util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PBOTTOMRIGHT) util.Partition2x1( &TT, &TB, Tvec, 0, util.PBOTTOM) nb := lb for m(&ATL)-nb > 0 && n(&ATL)-nb > 0 { util.Repartition2x2to3x3(&ATL, &A00, &A01, nil, &A10, &A11, nil, nil, nil, &A22, A, nb, util.PTOPLEFT) util.Repartition2x1to3x1(&TT, &t0, &tau, &t2, Tvec, nb, util.PTOP) // current block size cb, rb := A11.Size() if rb < cb { cb = rb } // -------------------------------------------------------- // decompose righ side AL == /A01\ // \A11/ w1.SubMatrix(W, 0, 0, cb, 1) util.Merge2x1(&AL, &A01, &A11) unblockedQL(&AL, &tau, &w1) // build block reflector unblkQLBlockReflector(Twork, &AL, &tau) // update A'tail i.e. A10 and A00 with (I - Y*T*Y.T).T * A'tail // compute: C - Y*(C.T*Y*T).T ar, ac := A10.Size() Wrk.SubMatrix(W, 0, 0, ac, ar) updateQLLeft(&A10, &A00, &A11, &A01, Twork, &Wrk, true, conf) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, util.PTOPLEFT) util.Continue3x1to2x1( &TT, &TB, &t0, &tau, Tvec, util.PTOP) } // last block with unblocked if m(&ATL) > 0 && n(&ATL) > 0 { w1.SubMatrix(W, 0, 0, n(&ATL), 1) unblockedQL(&ATL, &t0, &w1) } }
/* * Solve a system of linear equations A*X = B or A.T*X = B with general N-by-N * matrix A using the LU factorization computed by LUFactor(). * * Arguments: * B On entry, the right hand side matrix B. On exit, the solution matrix X. * * A The factor L and U from the factorization A = P*L*U as computed by * LUFactor() * * pivots The pivot indices from LUFactor(). * * flags The indicator of the form of the system of equations. * If flags&TRANSA then system is transposed. All other values * indicate non transposed system. * * Compatible with lapack.DGETRS. */ func LUSolve(B, A *cmat.FloatMatrix, pivots Pivots, flags int, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.DefaultConf() if len(confs) > 0 { conf = confs[0] } ar, ac := A.Size() br, _ := B.Size() if ar != ac { return gomas.NewError(gomas.ENOTSQUARE, "SolveLU") } if br != ac { return gomas.NewError(gomas.ESIZE, "SolveLU") } if pivots != nil { applyPivots(B, pivots) } if flags&gomas.TRANSA != 0 { // transposed X = A.-1*B == (L.T*U.T).-1*B == U.-T*(L.-T*B) blasd.SolveTrm(B, A, 1.0, gomas.LOWER|gomas.UNIT|gomas.TRANSA, conf) blasd.SolveTrm(B, A, 1.0, gomas.UPPER|gomas.TRANSA, conf) } else { // non-transposed X = A.-1*B == (L*U).-1*B == U.-1*(L.-1*B) blasd.SolveTrm(B, A, 1.0, gomas.LOWER|gomas.UNIT, conf) blasd.SolveTrm(B, A, 1.0, gomas.UPPER, conf) } return err }
func updtrmv(A, X, Y *cmat.FloatMatrix, alpha float64, bits, N, M int) error { var Am C.mdata_t var Xm, Ym C.mvec_t xr, _ := X.Size() yr, _ := Y.Size() Am.md = (*C.double)(unsafe.Pointer(&A.Data()[0])) Am.step = C.int(A.Stride()) Xm.md = (*C.double)(unsafe.Pointer(&X.Data()[0])) Ym.md = (*C.double)(unsafe.Pointer(&Y.Data()[0])) Ym.inc = C.int(1) Xm.inc = C.int(1) // if row vectors, change increment if xr == 1 { Xm.inc = C.int(X.Stride()) } if yr == 1 { Ym.inc = C.int(Y.Stride()) } C.__d_update_trmv_unb( (*C.mdata_t)(unsafe.Pointer(&Am)), (*C.mvec_t)(unsafe.Pointer(&Xm)), (*C.mvec_t)(unsafe.Pointer(&Ym)), C.double(alpha), C.int(bits), C.int(N), C.int(M)) return nil }
/* * Blocked LQ decomposition with compact WY transform. As implemented * in lapack.DGELQF subroutine. */ func blockedLQ(A, Tvec, Twork, W *cmat.FloatMatrix, lb int, conf *gomas.Config) { var ATL, ATR, ABL, ABR, AR cmat.FloatMatrix var A00, A11, A12, A21, A22 cmat.FloatMatrix var TT, TB cmat.FloatMatrix var t0, tau, t2 cmat.FloatMatrix var Wrk, w1 cmat.FloatMatrix util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition2x1( &TT, &TB, Tvec, 0, util.PTOP) //nb := conf.LB for m(&ABR)-lb > 0 && n(&ABR)-lb > 0 { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, nil, &A11, &A12, nil, &A21, &A22, A, lb, util.PBOTTOMRIGHT) util.Repartition2x1to3x1(&TT, &t0, &tau, &t2, Tvec, lb, util.PBOTTOM) // current block size cb, rb := A11.Size() if rb < cb { cb = rb } // -------------------------------------------------------- // decompose left side AL == /A11\ // \A21/ w1.SubMatrix(W, 0, 0, cb, 1) util.Merge1x2(&AR, &A11, &A12) unblockedLQ(&AR, &tau, &w1) // build block reflector unblkBlockReflectorLQ(Twork, &AR, &tau) // update A'tail i.e. A21 and A22 with A'*(I - Y*T*Y.T).T // compute: C - Y*(C.T*Y*T).T ar, ac := A21.Size() Wrk.SubMatrix(W, 0, 0, ar, ac) updateRightLQ(&A21, &A22, &A11, &A12, Twork, &Wrk, true, conf) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, util.PBOTTOMRIGHT) util.Continue3x1to2x1( &TT, &TB, &t0, &tau, Tvec, util.PBOTTOM) } // last block with unblocked if m(&ABR) > 0 && n(&ABR) > 0 { w1.SubMatrix(W, 0, 0, m(&ABR), 1) unblockedLQ(&ABR, &t2, &w1) } }
func gemv(Y, A, X *cmat.FloatMatrix, alpha, beta float64, bits, S, L, R, E int) { var Am C.mdata_t var Xm, Ym C.mvec_t xr, _ := X.Size() yr, _ := Y.Size() Am.md = (*C.double)(unsafe.Pointer(&A.Data()[0])) Am.step = C.int(A.Stride()) Xm.md = (*C.double)(unsafe.Pointer(&X.Data()[0])) Ym.md = (*C.double)(unsafe.Pointer(&Y.Data()[0])) Ym.inc = C.int(1) Xm.inc = C.int(1) // if row vectors, change increment if xr == 1 { Xm.inc = C.int(X.Stride()) } if yr == 1 { Ym.inc = C.int(Y.Stride()) } C.__d_gemv_unb( (*C.mvec_t)(unsafe.Pointer(&Ym)), (*C.mdata_t)(unsafe.Pointer(&Am)), (*C.mvec_t)(unsafe.Pointer(&Xm)), C.double(alpha), /*C.double(beta),*/ C.int(bits), C.int(S), C.int(L), C.int(R), C.int(E)) }
func axpby(Y, X *cmat.FloatMatrix, alpha, beta float64, N int) { var x, y C.mvec_t xr, _ := X.Size() x.md = (*C.double)(unsafe.Pointer(&X.Data()[0])) x.inc = C.int(1) if xr == 1 { x.inc = C.int(X.Stride()) } yr, _ := Y.Size() y.md = (*C.double)(unsafe.Pointer(&Y.Data()[0])) y.inc = C.int(1) if yr == 1 { y.inc = C.int(Y.Stride()) } if beta == 1.0 { C.__d_vec_axpy( (*C.mvec_t)(unsafe.Pointer(&y)), (*C.mvec_t)(unsafe.Pointer(&x)), C.double(alpha), C.int(N)) } else { C.__d_vec_axpby( (*C.mvec_t)(unsafe.Pointer(&y)), (*C.mvec_t)(unsafe.Pointer(&x)), C.double(alpha), C.double(beta), C.int(N)) } return }
/* * Compute QR factorization of a M-by-N matrix A using compact WY transformation: A = Q * R, * where Q = I - Y*T*Y.T, T is block reflector and Y holds elementary reflectors as lower * trapezoidal matrix saved below diagonal elements of the matrix A. * * Arguments: * A On entry, the M-by-N matrix A. On exit, the elements on and above * the diagonal contain the min(M,N)-by-N upper trapezoidal matrix R. * The elements below the diagonal with the matrix 'T', represent * the ortogonal matrix Q as product of elementary reflectors. * * T On exit, the K block reflectors which, together with trilu(A) represent * the ortogonal matrix Q as Q = I - Y*T*Y.T where Y = trilu(A). * K is ceiling(N/LB) where LB is blocking size from used blocking configuration. * The matrix T is LB*N augmented matrix of K block reflectors, * T = [T(0) T(1) .. T(K-1)]. Block reflector T(n) is LB*LB matrix, expect * reflector T(K-1) that is IB*IB matrix where IB = min(LB, K % LB) * * W Workspace, required size returned by QRTFactorWork(). * * conf Optional blocking configuration. If not provided then default configuration * is used. * * Returns: * Error indicator. * * QRTFactor is compatible with lapack.DGEQRT */ func QRTFactor(A, T, W *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil conf := gomas.CurrentConf(confs...) ok := false rsize := 0 if m(A) < n(A) { return gomas.NewError(gomas.ESIZE, "QRTFactor") } wsz := QRTFactorWork(A, conf) if W == nil || W.Len() < wsz { return gomas.NewError(gomas.EWORK, "QRTFactor", wsz) } tr, tc := T.Size() if conf.LB == 0 || conf.LB > n(A) { ok = tr == tc && tr == n(A) rsize = n(A) * n(A) } else { ok = tr == conf.LB && tc == n(A) rsize = conf.LB * n(A) } if !ok { return gomas.NewError(gomas.ESMALL, "QRTFactor", rsize) } if conf.LB == 0 || n(A) <= conf.LB { err = unblockedQRT(A, T, W) } else { Wrk := cmat.MakeMatrix(n(A), conf.LB, W.Data()) err = blockedQRT(A, T, Wrk, conf) } return err }
func TestSubMatrixGob(t *testing.T) { var B, As cmat.FloatMatrix var network bytes.Buffer N := 32 A := cmat.NewMatrix(N, N) zeromean := cmat.NewFloatNormSource() A.SetFrom(zeromean) As.SubMatrix(A, 3, 3, N-6, N-6) enc := gob.NewEncoder(&network) dec := gob.NewDecoder(&network) // encode to network err := enc.Encode(&As) if err != nil { t.Logf("encode error: %v\n", err) t.FailNow() } // decode from network err = dec.Decode(&B) if err != nil { t.Logf("decode error: %v\n", err) t.FailNow() } ar, ac := As.Size() br, bc := B.Size() t.Logf("As[%d,%d] == B[%d,%d]: %v\n", ar, ac, br, bc, B.AllClose(&As)) }
/* * Apply diagonal pivot (row and column swapped) to symmetric matrix blocks. * AR[0,0] is on diagonal and AL is block to the left of diagonal and AR the * triangular diagonal block. Need to swap row and column. * * LOWER triangular; moving from top-left to bottom-right * * d * x d | * -------------------------- * x x | d * S1 S1| S1 P1 x x x P2 -- current row/col 'srcix' * x x | x S2 d x x x * x x | x S2 x d x x * x x | x S2 x x d x * D1 D1| D1 P2 D2 D2 D2 P3 -- swap with row/col 'dstix' * x x | x S3 x x x D3 d * x x | x S3 x x x D3 x d * (ABL) (ABR) * * UPPER triangular; moving from bottom-right to top-left * * (ATL) (ATR) * d x x D3 x x x S3 x | x * d x D3 x x x S3 x | x * d D3 x x x S3 x | x * P3 D2 D2 D2 P2 D1| D1 -- dstinx * d x x S2 x | x * d x S2 x | x * d S2 x | x * P1 S1| S1 -- srcinx * d | x * ----------------------------- * | d * (ABR) */ func applyPivotSym2(AL, AR *cmat.FloatMatrix, srcix, dstix int, flags int) { var s, d cmat.FloatMatrix _, lc := AL.Size() rr, rc := AR.Size() if flags&gomas.LOWER != 0 { // AL is [ABL]; AR is [ABR]; P1 is AR[0,0], P2 is AR[index, 0] // S1 -- D1 AL.SubMatrix(&s, srcix, 0, 1, lc) AL.SubMatrix(&d, dstix, 0, 1, lc) blasd.Swap(&s, &d) if srcix > 0 { AR.SubMatrix(&s, srcix, 0, 1, srcix) AR.SubMatrix(&d, dstix, 0, 1, srcix) blasd.Swap(&s, &d) } // S2 -- D2 AR.SubMatrix(&s, srcix+1, srcix, dstix-srcix-1, 1) AR.SubMatrix(&d, dstix, srcix+1, 1, dstix-srcix-1) blasd.Swap(&s, &d) // S3 -- D3 AR.SubMatrix(&s, dstix+1, srcix, rr-dstix-1, 1) AR.SubMatrix(&d, dstix+1, dstix, rr-dstix-1, 1) blasd.Swap(&s, &d) // swap P1 and P3 p1 := AR.Get(srcix, srcix) p3 := AR.Get(dstix, dstix) AR.Set(srcix, srcix, p3) AR.Set(dstix, dstix, p1) return } if flags&gomas.UPPER != 0 { // AL is ATL, AR is ATR; P1 is AL[srcix, srcix]; // S1 -- D1; AR.SubMatrix(&s, srcix, 0, 1, rc) AR.SubMatrix(&d, dstix, 0, 1, rc) blasd.Swap(&s, &d) if srcix < lc-1 { // not the corner element AL.SubMatrix(&s, srcix, srcix+1, 1, srcix) AL.SubMatrix(&d, dstix, srcix+1, 1, srcix) blasd.Swap(&s, &d) } // S2 -- D2 AL.SubMatrix(&s, dstix+1, srcix, srcix-dstix-1, 1) AL.SubMatrix(&d, dstix, dstix+1, 1, srcix-dstix-1) blasd.Swap(&s, &d) // S3 -- D3 AL.SubMatrix(&s, 0, srcix, dstix, 1) AL.SubMatrix(&d, 0, dstix, dstix, 1) blasd.Swap(&s, &d) //fmt.Printf("3, AR=%v\n", AR) // swap P1 and P3 p1 := AR.Get(0, 0) p3 := AL.Get(dstix, dstix) AR.Set(srcix, srcix, p3) AL.Set(dstix, dstix, p1) return } }
/* * Blocked RQ decomposition with compact WY transform. As implemented * in lapack.DGERQF subroutine. */ func blockedRQ(A, Tvec, Twork, W *cmat.FloatMatrix, lb int, conf *gomas.Config) { var ATL, ABR, AL cmat.FloatMatrix var A00, A01, A10, A11, A22 cmat.FloatMatrix var TT, TB cmat.FloatMatrix var t0, tau, t2 cmat.FloatMatrix var Wrk, w1 cmat.FloatMatrix util.Partition2x2( &ATL, nil, nil, &ABR /**/, A, 0, 0, util.PBOTTOMRIGHT) util.Partition2x1( &TT, &TB /**/, Tvec, 0, util.PBOTTOM) for m(&ATL)-lb > 0 && n(&ATL)-lb > 0 { util.Repartition2x2to3x3(&ATL, &A00, &A01, nil, &A10, &A11, nil, nil, nil, &A22 /**/, A, lb, util.PTOPLEFT) util.Repartition2x1to3x1(&TT, &t0, &tau, &t2 /**/, Tvec, n(&A11), util.PTOP) // current block size cb, rb := A11.Size() if rb < cb { cb = rb } // -------------------------------------------------------- // decompose left side AL == ( A10 A11 ) w1.SubMatrix(W, 0, 0, cb, 1) util.Merge1x2(&AL, &A10, &A11) unblockedRQ(&AL, &tau, &w1) // build block reflector unblkBlockReflectorRQ(Twork, &AL, &tau) // compute: (A00 A01)(I - Y*T*Y.T) ar, ac := A01.Size() Wrk.SubMatrix(W, 0, 0, ar, ac) updateRightRQ(&A01, &A00, &A11, &A10, Twork, &Wrk, false, conf) // -------------------------------------------------------- util.Continue3x3to2x2( &ATL, nil, nil, &ABR, &A00, &A11, &A22, A, util.PTOPLEFT) util.Continue3x1to2x1( &TT, &TB, &t0, &tau, Tvec, util.PTOP) } // last block with unblocked if m(&ATL) > 0 && n(&ATL) > 0 { w1.SubMatrix(W, 0, 0, m(&ATL), 1) unblockedRQ(&ATL, &TT, &w1) } }
/* * Merge 1 by 1 block from 1 by 2 block. * * ABLK <-- AL | AR * */ func Merge1x2(ABLK, AL, AR *cmat.FloatMatrix) { lr, lc := AL.Size() _, rc := AR.Size() if lc > 0 { ABLK.SubMatrix(AL, 0, 0, lr, lc+rc) } else { ABLK.SubMatrix(AR, 0, 0, lr, rc) } }
func Transpose(A, B *cmat.FloatMatrix, confs ...*gomas.Config) *gomas.Error { ar, ac := A.Size() br, bc := B.Size() if ar != bc || ac != br { return gomas.NewError(gomas.ESIZE, "Transpose") } mtranspose(A, B, br, bc) return nil }
/* * Merge 1 by 1 block from 2 by 1 block. * * AT * Abkl <-- -- * AB * */ func Merge2x1(ABLK, AT, AB *cmat.FloatMatrix) { tr, tc := AT.Size() br, _ := AB.Size() if tr > 0 { ABLK.SubMatrix(AT, 0, 0, tr+br, tc) } else { ABLK.SubMatrix(AB, 0, 0, br, tc) } }
func Sum(X *cmat.FloatMatrix, confs ...*gomas.Config) float64 { if X.Len() == 0 { return 0.0 } xr, xc := X.Size() if xr != 1 && xc != 1 { return 0.0 } return sum(X, X.Len()) }
func swapCols(A *cmat.FloatMatrix, src, dst int) { var c0, c1 cmat.FloatMatrix ar, _ := A.Size() if src == dst || ar == 0 { return } c0.SubMatrix(A, 0, src, ar, 1) c1.SubMatrix(A, 0, dst, ar, 1) blasd.Swap(&c0, &c1) }
func IAmax(X *cmat.FloatMatrix, confs ...*gomas.Config) int { if X.Len() == 0 { return -1 } xr, xc := X.Size() if xr != 1 && xc != 1 { return -1 } return iamax(X, X.Len()) }
func swapRows(A *cmat.FloatMatrix, src, dst int) { var r0, r1 cmat.FloatMatrix ar, ac := A.Size() if src == dst || ar == 0 { return } r0.SubMatrix(A, src, 0, 1, ac) r1.SubMatrix(A, dst, 0, 1, ac) blasd.Swap(&r0, &r1) }
func Nrm2(X *cmat.FloatMatrix, confs ...*gomas.Config) float64 { if X.Len() == 0 { return 0.0 } xr, xc := X.Size() if xr != 1 && xc != 1 { return 0.0 } nx := X.Len() return vnrm2(X, nx) }
/* * Triangular matrix multiplication. */ func MultTrm(B, A *cmat.FloatMatrix, alpha float64, bits int, confs ...*gomas.Config) *gomas.Error { conf := gomas.DefaultConf() if len(confs) > 0 { conf = confs[0] } if B.Len() == 0 || A.Len() == 0 { return nil } ok := true ar, ac := A.Size() br, bc := B.Size() P := ac E := bc switch { case bits&gomas.RIGHT != 0: ok = bc == ar && ar == ac E = br case bits&gomas.LEFT != 0: fallthrough default: ok = ac == br && ar == ac } if !ok { return gomas.NewError(gomas.ESIZE, "MultTrm") } // single threaded if conf.NProc == 1 || conf.WB <= 0 || E < conf.WB/2 { trmm(B, A, alpha, bits, P, 0, E, conf) return nil } // parallelized wait := make(chan int, 4) _, nN := blocking(0, E, conf.WB/2) nT := 0 for j := 0; j < nN; j++ { jS := blockIndex(j, nN, conf.WB/2, E) jL := blockIndex(j+1, nN, conf.WB/2, E) task := func(q chan int) { trmm(B, A, alpha, bits, P, jS, jL, conf) q <- 1 } conf.Sched.Schedule(gomas.NewTask(task, wait)) nT += 1 } for nT > 0 { <-wait nT -= 1 } return nil }
func computeHouseholderVec(x, tau *cmat.FloatMatrix) { var alpha, x2 cmat.FloatMatrix r, _ := x.Size() alpha.SubMatrix(x, 0, 0, 1, 1) if r == 1 { x2.SubMatrix(x, 0, 1, 1, x.Len()-1) // row vector } else { x2.SubMatrix(x, 1, 0, x.Len()-1, 1) } computeHouseholder(&alpha, &x2, tau) }
func Scale(A *cmat.FloatMatrix, alpha float64, confs ...*gomas.Config) *gomas.Error { if A.Len() == 0 { return nil } ar, ac := A.Size() if ar != 1 && ac != 1 { mscale(A, alpha, ar, ac) return nil } vscal(A, alpha, A.Len()) return nil }
func mNorm1(A *cmat.FloatMatrix) float64 { var amax float64 = 0.0 var col cmat.FloatMatrix _, acols := A.Size() for k := 0; k < acols; k++ { col.Column(A, k) cmax := blasd.ASum(&col) if cmax > amax { amax = cmax } } return amax }
func vinvscal(X *cmat.FloatMatrix, alpha float64, N int) { var x C.mvec_t xr, _ := X.Size() x.md = (*C.double)(unsafe.Pointer(&X.Data()[0])) x.inc = C.int(1) if xr == 1 { x.inc = C.int(X.Stride()) } C.__d_vec_invscal( (*C.mvec_t)(unsafe.Pointer(&x)), C.double(alpha), C.int(N)) return }
func mNormInf(A *cmat.FloatMatrix) float64 { var amax float64 = 0.0 var row cmat.FloatMatrix arows, _ := A.Size() for k := 0; k < arows; k++ { row.Row(A, k) rmax := blasd.ASum(&row) if rmax > amax { amax = rmax } } return amax }
/* * Partition p to 2 by 1 blocks. * * AT * A --> -- * AB * * Parameter nb is initial block size for AT (pTOP) or AB (pBOTTOM). */ func Partition2x1(AT, AB, A *cmat.FloatMatrix, nb int, side Direction) { ar, ac := A.Size() if nb > ar { nb = ar } switch side { case PTOP: AT.SubMatrix(A, 0, 0, nb, ac) AB.SubMatrix(A, nb, 0, ar-nb, ac) case PBOTTOM: AT.SubMatrix(A, 0, 0, ar-nb, ac) AB.SubMatrix(A, ar-nb, 0, nb, ac) } }
/* * Apply diagonal pivot (row and column swapped) to symmetric matrix blocks. * AR[0,0] is on diagonal and AL is block to the left of diagonal and AR the * triangular diagonal block. Need to swap row and column. * * LOWER triangular; moving from top-left to bottom-right * * d * x d * x x d | * -------------------------- * S1 S1 S1 | P1 x x x P2 -- current row * x x x | S2 d x x x * x x x | S2 x d x x * x x x | S2 x x d x * D1 D1 D1 | P2 D2 D2 D2 P3 -- swap with row 'index' * x x x | S3 x x x D3 d * x x x | S3 x x x D3 x d * (ABL) (ABR) * * UPPER triangular; moving from bottom-right to top-left * * (ATL) (ATR) * d x x D3 x x x | S3 x x * d x D3 x x x | S3 x x * d D3 x x x | S3 x x * P3 D2 D2 D2| P2 D1 D1 * d x x | S2 x x * d x | S2 x x * d | S2 x x * ----------------------------- * | P1 S1 S1 * | d x * | d * (ABR) */ func applyPivotSym(AL, AR *cmat.FloatMatrix, index int, flags int) { var s, d cmat.FloatMatrix lr, lc := AL.Size() rr, rc := AR.Size() if flags&gomas.LOWER != 0 { // AL is [ABL]; AR is [ABR]; P1 is AR[0,0], P2 is AR[index, 0] // S1 -- D1 s.SubMatrix(AL, 0, 0, 1, lc) d.SubMatrix(AL, index, 0, 1, lc) blasd.Swap(&s, &d) // S2 -- D2 s.SubMatrix(AR, 1, 0, index-1, 1) d.SubMatrix(AR, index, 1, 1, index-1) blasd.Swap(&s, &d) // S3 -- D3 s.SubMatrix(AR, index+1, 0, rr-index-1, 1) d.SubMatrix(AR, index+1, index, rr-index-1, 1) blasd.Swap(&s, &d) // swap P1 and P3 p1 := AR.Get(0, 0) p3 := AR.Get(index, index) AR.Set(0, 0, p3) AR.Set(index, index, p1) return } if flags&gomas.UPPER != 0 { // AL is merged from [ATL, ATR], AR is [ABR]; P1 is AR[0, 0]; P2 is AL[index, -1] colno := lc - rc // S1 -- D1; S1 is on the first row of AR s.SubMatrix(AR, 0, 1, 1, rc-1) d.SubMatrix(AL, index, colno+1, 1, rc-1) blasd.Swap(&s, &d) // S2 -- D2 s.SubMatrix(AL, index+1, colno, lr-index-2, 1) d.SubMatrix(AL, index, index+1, 1, colno-index-1) blasd.Swap(&s, &d) // S3 -- D3 s.SubMatrix(AL, 0, index, index, 1) d.SubMatrix(AL, 0, colno, index, 1) blasd.Swap(&s, &d) //fmt.Printf("3, AR=%v\n", AR) // swap P1 and P3 p1 := AR.Get(0, 0) p3 := AL.Get(index, index) AR.Set(0, 0, p3) AL.Set(index, index, p1) return } }
func iamax(X *cmat.FloatMatrix, N int) int { var x C.mvec_t var ix C.int xr, _ := X.Size() x.md = (*C.double)(unsafe.Pointer(&X.Data()[0])) x.inc = C.int(1) if xr == 1 { x.inc = C.int(X.Stride()) } ix = C.__d_vec_iamax( (*C.mvec_t)(unsafe.Pointer(&x)), C.int(N)) return int(ix) }
/* * Partition A to 1 by 2 blocks. * * A --> AL | AR * * Parameter nb is initial block size for AL (pLEFT) or AR (pRIGHT). */ func Partition1x2(AL, AR, A *cmat.FloatMatrix, nb int, side Direction) { ar, ac := A.Size() if nb > ac { nb = ac } switch side { case PLEFT: AL.SubMatrix(A, 0, 0, ar, nb) AR.SubMatrix(A, 0, nb, ar, ac-nb) case PRIGHT: AL.SubMatrix(A, 0, 0, ar, ac-nb) AR.SubMatrix(A, 0, ac-nb, ar, nb) } }
func sum(X *cmat.FloatMatrix, N int) float64 { var x C.mvec_t var dc C.double xr, _ := X.Size() x.md = (*C.double)(unsafe.Pointer(&X.Data()[0])) x.inc = C.int(1) if xr == 1 { x.inc = C.int(X.Stride()) } dc = C.__d_vec_sum_recursive( (*C.mvec_t)(unsafe.Pointer(&x)), C.int(N)) return float64(dc) }