func EigenSym(D, A, W *cmat.FloatMatrix, bits int, confs ...*gomas.Config) (err *gomas.Error) { var sD, sE, E, tau, Wred cmat.FloatMatrix var vv *cmat.FloatMatrix err = nil vv = nil conf := gomas.CurrentConf(confs...) if m(A) != n(A) || D.Len() != m(A) { err = gomas.NewError(gomas.ESIZE, "EigenSym") return } if bits&gomas.WANTV != 0 && W.Len() < 3*n(A) { err = gomas.NewError(gomas.EWORK, "EigenSym") return } if bits&(gomas.LOWER|gomas.UPPER) == 0 { bits = bits | gomas.LOWER } ioff := 1 if bits&gomas.LOWER != 0 { ioff = -1 } E.SetBuf(n(A)-1, 1, n(A)-1, W.Data()) tau.SetBuf(n(A), 1, n(A), W.Data()[n(A)-1:]) wrl := W.Len() - 2*n(A) - 1 Wred.SetBuf(wrl, 1, wrl, W.Data()[2*n(A)-1:]) // reduce to tridiagonal if err = TRDReduce(A, &tau, &Wred, bits, conf); err != nil { err.Update("EigenSym") return } sD.Diag(A) sE.Diag(A, ioff) blasd.Copy(D, &sD) blasd.Copy(&E, &sE) if bits&gomas.WANTV != 0 { if err = TRDBuild(A, &tau, &Wred, n(A), bits, conf); err != nil { err.Update("EigenSym") return } vv = A } // resize workspace wrl = W.Len() - n(A) - 1 Wred.SetBuf(wrl, 1, wrl, W.Data()[n(A)-1:]) if err = TRDEigen(D, &E, vv, &Wred, bits, conf); err != nil { err.Update("EigenSym") return } return }
/* * Generates the real orthogonal matrix Q which is defined as the product of K elementary * reflectors of order N embedded in matrix A as returned by TRDReduce(). * * A On entry tridiagonal reduction as returned by TRDReduce(). * On exit the orthogonal matrix Q. * * tau Scalar coefficients of elementary reflectors. * * W Workspace * * K Number of reflectors , 0 < K < N * * flags LOWER or UPPER * * confs Optional blocking configuration * * If flags has UPPER set then * Q = H(K)...H(1)H(0) where 0 < K < N-1 * * If flags has LOWR set then * Q = H(0)H(1)...H(K) where 0 < K < N-1 */ func TRDBuild(A, tau, W *cmat.FloatMatrix, K, flags int, confs ...*gomas.Config) *gomas.Error { var err *gomas.Error = nil var Qh, tauh cmat.FloatMatrix var s, d cmat.FloatMatrix if K > m(A)-1 { K = m(A) - 1 } switch flags & (gomas.LOWER | gomas.UPPER) { case gomas.LOWER: // Shift Q matrix embedded in A right and fill first column // unit column vector for j := m(A) - 1; j > 0; j-- { s.SubMatrix(A, j, j-1, m(A)-j, 1) d.SubMatrix(A, j, j, m(A)-j, 1) blasd.Copy(&d, &s) A.Set(0, j, 0.0) } // zero first column and set first entry to one d.Column(A, 0) blasd.Scale(&d, 0.0) d.Set(0, 0, 1.0) Qh.SubMatrix(A, 1, 1, m(A)-1, m(A)-1) tauh.SubMatrix(tau, 0, 0, m(A)-1, 1) err = QRBuild(&Qh, &tauh, W, K, confs...) case gomas.UPPER: // Shift Q matrix embedded in A left and fill last column // unit column vector for j := 1; j < m(A); j++ { s.SubMatrix(A, 0, j, j, 1) d.SubMatrix(A, 0, j-1, j, 1) blasd.Copy(&d, &s) A.Set(-1, j-1, 0.0) } // zero last column and set last entry to one d.Column(A, m(A)-1) blasd.Scale(&d, 0.0) d.Set(-1, 0, 1.0) Qh.SubMatrix(A, 0, 0, m(A)-1, m(A)-1) tauh.SubMatrix(tau, 0, 0, m(A)-1, 1) err = QLBuild(&Qh, &tauh, W, K, confs...) } if err != nil { err.Update("TRDBuild") } return err }
func TestTrdMultUpper(t *testing.T) { var dt, et, da, ea cmat.FloatMatrix N := 843 nb := 48 conf := gomas.NewConf() conf.LB = nb A := cmat.NewMatrix(N, N) tau := cmat.NewMatrix(N, 1) src := cmat.NewFloatNormSource() // create symmetric matrix A.SetFrom(src, cmat.SYMM) A0 := cmat.NewCopy(A) W := lapackd.Workspace(lapackd.TRDReduceWork(A, conf)) lapackd.TRDReduce(A, tau, W, gomas.UPPER, conf) // make tridiagonal matrix T T0 := cmat.NewMatrix(N, N) dt.Diag(T0) da.Diag(A) blasd.Copy(&dt, &da) ea.Diag(A, 1) et.Diag(T0, 1) blasd.Copy(&et, &ea) et.Diag(T0, -1) blasd.Copy(&et, &ea) T1 := cmat.NewCopy(T0) // compute Q*T*Q.T (unblocked) conf.LB = 0 lapackd.TRDMult(T0, A, tau, W, gomas.LEFT|gomas.UPPER, conf) lapackd.TRDMult(T0, A, tau, W, gomas.RIGHT|gomas.TRANS|gomas.UPPER, conf) blasd.Plus(T0, A0, 1.0, -1.0, gomas.NONE) nrm := lapackd.NormP(T0, lapackd.NORM_ONE) t.Logf("N=%d, unblk.||A - Q*T*Q.T||_1: %e\n", N, nrm) // compute Q*T*Q.T (blocked) conf.LB = nb W = lapackd.Workspace(lapackd.TRDMultWork(A, gomas.LEFT|gomas.UPPER, conf)) lapackd.TRDMult(T1, A, tau, W, gomas.LEFT|gomas.UPPER, conf) lapackd.TRDMult(T1, A, tau, W, gomas.RIGHT|gomas.TRANS|gomas.UPPER, conf) blasd.Plus(T1, A0, 1.0, -1.0, gomas.NONE) nrm = lapackd.NormP(T1, lapackd.NORM_ONE) t.Logf("N=%d, blk.||A - Q*T*Q.T||_1: %e\n", N, nrm) }
func TestBiredWide(t *testing.T) { N := 811 M := 693 nb := 32 conf := gomas.NewConf() conf.LB = 0 ediag := -1 zeromean := cmat.NewFloatNormSource() A := cmat.NewMatrix(M, N) A.SetFrom(zeromean) A0 := cmat.NewCopy(A) tauq := cmat.NewMatrix(N, 1) taup := cmat.NewMatrix(N, 1) W := lapackd.Workspace(M + N) lapackd.BDReduce(A, tauq, taup, W, conf) var D, E, Bd, Be cmat.FloatMatrix D.Diag(A) E.Diag(A, ediag) B := cmat.NewMatrix(M, N) Bd.Diag(B) Be.Diag(B, ediag) blasd.Copy(&Bd, &D) blasd.Copy(&Be, &E) Bt := cmat.NewMatrix(N, M) blasd.Transpose(Bt, B) conf.LB = nb W0 := lapackd.Workspace(lapackd.BDMultWork(B, conf)) lapackd.BDMult(B, A, tauq, W0, gomas.MULTQ|gomas.LEFT, conf) lapackd.BDMult(Bt, A, tauq, W0, gomas.MULTQ|gomas.RIGHT|gomas.TRANS, conf) lapackd.BDMult(B, A, taup, W0, gomas.MULTP|gomas.RIGHT|gomas.TRANS, conf) lapackd.BDMult(Bt, A, taup, W0, gomas.MULTP|gomas.LEFT, conf) blasd.Plus(B, A0, 1.0, -1.0, gomas.NONE) nrm := lapackd.NormP(B, lapackd.NORM_ONE) t.Logf("M=%d, N=%d ||A - Q*B*P.T||_1 : %e\n", M, N, nrm) blasd.Plus(Bt, A0, 1.0, -1.0, gomas.TRANSB) nrm = lapackd.NormP(Bt, lapackd.NORM_ONE) t.Logf("M=%d, N=%d ||A.T - P*B.T*Q.T||_1 : %e\n", M, N, nrm) }
func main() { flag.Parse() M := N + N/10 conf := gomas.CurrentConf() A := cmat.NewMatrix(M, N) A0 := cmat.NewCopy(A) tau := cmat.NewMatrix(N, 1) W := lapackd.Workspace(lapackd.QRFactorWork(A, conf)) zeromean := cmat.NewFloatNormSource() A.SetFrom(zeromean) cumtime := 0.0 mintime := 0.0 maxtime := 0.0 for i := 0; i < count; i++ { flushCache() t1 := time.Now() // ---------------------------------------------- lapackd.QRFactor(A, tau, W, conf) // ---------------------------------------------- t2 := time.Now() tm := t2.Sub(t1) if mintime == 0.0 || tm.Seconds() < mintime { mintime = tm.Seconds() } if maxtime == 0.0 || tm.Seconds() > maxtime { maxtime = tm.Seconds() } cumtime += tm.Seconds() if verbose { fmt.Printf("%3d %12.4f msec, %9.4f gflops\n", i, 1e+3*tm.Seconds(), gflops(M, N, tm.Seconds())) } blasd.Copy(A, A0) } cumtime /= float64(count) minflops := gflops(M, N, maxtime) avgflops := gflops(M, N, cumtime) maxflops := gflops(M, N, mintime) fmt.Printf("%5d %5d %3d %9.4f %9.4f %9.4f Gflops\n", M, N, conf.LB, minflops, avgflops, maxflops) }
/* * Update vector with compact WY Householder block * (I - Y*T*Y.T)*v = v - Y*T*Y.T*v * * LEFT: * 1 | 0 * v0 = v0 = v0 * 0 | Q v1 Q*v1 = v1 - Y*T*Y.T*v1 * * 1 | 0 * v0 = v0 = v0 * 0 | Q.T v1 Q.T*v1 = v1 - Y*T.T*Y.T*v1 * * RIGHT: * v0 | v1 * 1 | 0 = v0 | v1*Q = v0 | v1 - v1*Y*T*Y.T * 0 | Q * * v0 | v1 * 1 | 0 = v0 | v1*Q.T = v0 | v1 - v1*Y*T.T*Y.T * 0 | Q.T */ func updateVecLeftWY2(v, Y1, Y2, T, w *cmat.FloatMatrix, bits int) { var v1, v2 cmat.FloatMatrix var w0 cmat.FloatMatrix v1.SubMatrix(v, 1, 0, n(Y1), 1) v2.SubMatrix(v, n(Y1)+1, 0, m(Y2), 1) w0.SubMatrix(w, 0, 0, m(Y1), 1) // w0 := Y1.T*v1 + Y2.T*v2 blasd.Copy(&w0, &v1) blasd.MVMultTrm(&w0, Y1, 1.0, gomas.LOWER|gomas.UNIT|gomas.TRANS) blasd.MVMult(&w0, Y2, &v2, 1.0, 1.0, gomas.TRANS) // w0 := op(T)*w0 blasd.MVMultTrm(&w0, T, 1.0, bits|gomas.UPPER) // v2 := v2 - Y2*w0 blasd.MVMult(&v2, Y2, &w0, -1.0, 1.0, gomas.NONE) // v1 := v1 - Y1*w0 blasd.MVMultTrm(&w0, Y1, 1.0, gomas.LOWER|gomas.UNIT) blasd.Axpy(&v1, &w0, -1.0) }
func unblkBoundedBKUpper(A, wrk *cmat.FloatMatrix, p *Pivots, ncol int, conf *gomas.Config) (*gomas.Error, int) { var err *gomas.Error = nil var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a01, A02, a11, a12, A22, a11inv cmat.FloatMatrix var w00, w01, w11 cmat.FloatMatrix var cwrk cmat.FloatMatrix var pT, pB, p0, p1, p2 Pivots err = nil nc := 0 if ncol > n(A) { ncol = n(A) } // permanent working space for symmetric inverse of a11 a11inv.SubMatrix(wrk, m(wrk)-2, 0, 2, 2) a11inv.Set(0, 1, -1.0) a11inv.Set(1, 0, -1.0) util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PBOTTOMRIGHT) partitionPivot2x1( &pT, &pB, *p, 0, util.PBOTTOM) for n(&ATL) > 0 && nc < ncol { util.Partition2x2( &w00, &w01, nil, &w11, wrk, nc, nc, util.PBOTTOMRIGHT) r, np := findAndBuildBKPivotUpper(&ATL, &ATR, &w00, &w01, nc) if np > ncol-nc { // next pivot does not fit into ncol columns, // return with number of factorized columns return err, nc } cwrk.SubMatrix(&w00, 0, n(&w00)-np, m(&ATL), np) if r != -1 { // pivoting needed; do swaping here k := m(&ATL) - np applyBKPivotSymUpper(&ATL, k, r) // swap right hand rows to get correct updates swapRows(&ATR, k, r) swapRows(&w01, k, r) if np == 2 && r != k { /* for 2x2 blocks we need diagonal pivots. * [r, r] | [ r,-1] * a11 == ---------------- 2-by-2 pivot, swapping [1,0] and [r,0] * [-1,r] | [-1,-1] */ t0 := w00.Get(k, -1) tr := w00.Get(r, -1) w00.Set(k, -1, tr) w00.Set(r, -1, t0) t0 = w00.Get(k, -2) tr = w00.Get(r, -2) w00.Set(k, -2, tr) w00.Set(r, -2, t0) } } // repartition according the pivot size util.Repartition2x2to3x3(&ATL, &A00, &a01, &A02, nil, &a11, &a12, nil, nil, &A22 /**/, A, np, util.PTOPLEFT) repartPivot2x1to3x1(&pT, &p0, &p1, &p2 /**/, *p, np, util.PTOP) // ------------------------------------------------------------ wlc := n(&w00) - np cwrk.SubMatrix(&w00, 0, wlc, m(&a01), n(&a01)) if np == 1 { // a11.Set(0, 0, w00.Get(m(&a01), wlc)) // a21 = a21/a11 blasd.Copy(&a01, &cwrk) blasd.InvScale(&a01, a11.Get(0, 0)) // store pivot point relative to original matrix if r == -1 { p1[0] = m(&ATL) } else { p1[0] = r + 1 } } else if np == 2 { /* a | b d/b | -1 * w00 == ------ == a11 --> a11.-1 == -------- * scale * . | d -1 | a/b */ a := w00.Get(m(&ATL)-2, -2) b := w00.Get(m(&ATL)-2, -1) d := w00.Get(m(&ATL)-1, -1) a11inv.Set(0, 0, d/b) a11inv.Set(1, 1, a/b) // denominator: (a/b)*(d/b)-1.0 == (a*d - b^2)/b^2 scale := 1.0 / ((a/b)*(d/b) - 1.0) scale /= b // a01 = a01*a11.-1 blasd.Mult(&a01, &cwrk, &a11inv, scale, 0.0, gomas.NONE, conf) a11.Set(0, 0, a) a11.Set(0, 1, b) a11.Set(1, 1, d) // store pivot point relative to original matrix p1[0] = -(r + 1) p1[1] = p1[0] } // ------------------------------------------------------------ nc += np util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, util.PTOPLEFT) contPivot3x1to2x1( &pT, &pB, p0, p1, *p, util.PTOP) } return err, nc }
/* * Find diagonal pivot and build incrementaly updated block. * * d x r2 x x x c1 | x x kp1 k | w w * d r2 x x x c1 | x x kp1 k | w w * r2 r2 r2 r2 c1 | x x kp1 k | w w * d x x c1 | x x kp1 k | w w * d x c1 | x x kp1 k | w w * d c1 | x x kp1 k | w w * c1 | x x kp1 k | w w * -------------------------- ------------- * (AL) (AR) (WL) (WR) * * Matrix AL contains the unfactored part of the matrix and AR the already * factored columns. Matrix WR is updated values of factored part ie. * w(i) = l(i)d(i). Matrix WL will have updated values for next column. * Column WL(k) contains updated AL(c1) and WL(kp1) possible pivot row AL(r2). * * On exit, for 1x1 diagonal the rightmost column of WL (k) holds the updated * value of AL(c1). If pivoting this required the WL(k) holds the actual pivoted * column/row. * * For 2x2 diagonal blocks WL(k) holds the updated AL(c1) and WL(kp1) holds * actual values of pivot column/row AL(r2), without the diagonal pivots. */ func findAndBuildBKPivotUpper(AL, AR, WL, WR *cmat.FloatMatrix, k int) (int, int) { var r, q int var rcol, qrow, src, wk, wkp1, wrow cmat.FloatMatrix lc := n(AL) - 1 wc := n(WL) - 1 lr := m(AL) - 1 // Copy AL[:,lc] to WL[:,wc] and update with WR[0:] src.SubMatrix(AL, 0, lc, m(AL), 1) wk.SubMatrix(WL, 0, wc, m(AL), 1) blasd.Copy(&wk, &src) if k > 0 { wrow.SubMatrix(WR, lr, 0, 1, n(WR)) blasd.MVMult(&wk, AR, &wrow, -1.0, 1.0, gomas.NONE) } if m(AL) == 1 { return -1, 1 } // amax is on-diagonal element of current column amax := math.Abs(WL.Get(lr, wc)) // find max off-diagonal on first column. rcol.SubMatrix(WL, 0, wc, lr, 1) // r is row index and rmax is its absolute value r = blasd.IAmax(&rcol) rmax := math.Abs(rcol.Get(r, 0)) if amax >= bkALPHA*rmax { // no pivoting, 1x1 diagonal return -1, 1 } // Now we need to copy row r to WL[:,wc-1] and update it wkp1.SubMatrix(WL, 0, wc-1, m(AL), 1) if r > 0 { // above the diagonal part of AL qrow.SubMatrix(AL, 0, r, r, 1) blasd.Copy(&wkp1, &qrow) } var wkr cmat.FloatMatrix qrow.SubMatrix(AL, r, r, 1, m(AL)-r) wkr.SubMatrix(&wkp1, r, 0, m(AL)-r, 1) blasd.Copy(&wkr, &qrow) if k > 0 { // update wkp1 wrow.SubMatrix(WR, r, 0, 1, n(WR)) blasd.MVMult(&wkp1, AR, &wrow, -1.0, 1.0, gomas.NONE) } // set on-diagonal entry to zero to avoid hitting it. p1 := wkp1.Get(r, 0) wkp1.Set(r, 0, 0.0) // max off-diagonal on r'th column/row at index q q = blasd.IAmax(&wkp1) qmax := math.Abs(wkp1.Get(q, 0)) wkp1.Set(r, 0, p1) if amax >= bkALPHA*rmax*(rmax/qmax) { // no pivoting, 1x1 diagonal return -1, 1 } // if q == r then qmax is not off-diagonal, qmax == WR[r,1] and // we get 1x1 pivot as following is always true if math.Abs(WL.Get(r, wc-1)) >= bkALPHA*qmax { // 1x1 pivoting and interchange with k, r // pivot row in column WL[:,-2] to WL[:,-1] src.SubMatrix(WL, 0, wc-1, m(AL), 1) wkp1.SubMatrix(WL, 0, wc, m(AL), 1) blasd.Copy(&wkp1, &src) wkp1.Set(-1, 0, src.Get(r, 0)) wkp1.Set(r, 0, src.Get(-1, 0)) return r, 1 } else { // 2x2 pivoting and interchange with k+1, r return r, 2 } return -1, 1 }
/* * Unblocked Bunch-Kauffman LDL factorization. * * Corresponds lapack.DSYTF2 */ func unblkDecompBKUpper(A, wrk *cmat.FloatMatrix, p Pivots, conf *gomas.Config) (*gomas.Error, int) { var err *gomas.Error = nil var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a01, A02, a11, a12, A22, a11inv cmat.FloatMatrix var cwrk cmat.FloatMatrix var pT, pB, p0, p1, p2 Pivots err = nil nc := 0 util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PBOTTOMRIGHT) partitionPivot2x1( &pT, &pB, p, 0, util.PBOTTOM) // permanent working space for symmetric inverse of a11 a11inv.SubMatrix(wrk, 0, n(wrk)-2, 2, 2) a11inv.Set(1, 0, -1.0) a11inv.Set(0, 1, -1.0) for n(&ATL) > 0 { nr := m(&ATL) - 1 r, np := findBKPivotUpper(&ATL) if r != -1 { cwrk.SubMatrix(&ATL, 0, n(&ATL)-np, m(&ATL), np) // pivoting needed; do swaping here applyBKPivotSymUpper(&ATL, m(&ATL)-np, r) if np == 2 { /* [r, r] | [r, nr] * a11 == ---------------- 2-by-2 pivot, swapping [nr-1,nr] and [r,nr] * [nr,r] | [nr,nr] (nr is the current diagonal entry) */ t := ATL.Get(nr-1, nr) ATL.Set(nr-1, nr, ATL.Get(r, nr)) ATL.Set(r, nr, t) } } // repartition according the pivot size util.Repartition2x2to3x3(&ATL, &A00, &a01, &A02, nil, &a11, &a12, nil, nil, &A22 /**/, A, np, util.PTOPLEFT) repartPivot2x1to3x1(&pT, &p0, &p1, &p2 /**/, p, np, util.PTOP) // ------------------------------------------------------------ if np == 1 { // A00 = A00 - a01*a01.T/a11 blasd.MVUpdateTrm(&A00, &a01, &a01, -1.0/a11.Get(0, 0), gomas.UPPER) // a01 = a01/a11 blasd.InvScale(&a01, a11.Get(0, 0)) // store pivot point relative to original matrix if r == -1 { p1[0] = m(&ATL) } else { p1[0] = r + 1 } } else if np == 2 { /* see comments on unblkDecompBKLower() */ a := a11.Get(0, 0) b := a11.Get(0, 1) d := a11.Get(1, 1) a11inv.Set(0, 0, d/b) a11inv.Set(1, 1, a/b) // denominator: (a/b)*(d/b)-1.0 == (a*d - b^2)/b^2 scale := 1.0 / ((a/b)*(d/b) - 1.0) scale /= b // cwrk = a21 cwrk.SubMatrix(wrk, 2, 0, m(&a01), n(&a01)) blasd.Copy(&cwrk, &a01) // a01 = a01*a11.-1 blasd.Mult(&a01, &cwrk, &a11inv, scale, 0.0, gomas.NONE, conf) // A00 = A00 - a01*a11.-1*a01.T = A00 - a01*cwrk.T blasd.UpdateTrm(&A00, &a01, &cwrk, -1.0, 1.0, gomas.UPPER|gomas.TRANSB, conf) // store pivot point relative to original matrix p1[0] = -(r + 1) p1[1] = p1[0] } // ------------------------------------------------------------ nc += np util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, util.PTOPLEFT) contPivot3x1to2x1( &pT, &pB, p0, p1, p, util.PTOP) } return err, nc }
// test: M < N, U=[m,m] and V=[m,n] or V=[n,n] (square) func testWide(M, N int, square bool, t *testing.T) { var A, A0, W, S, U, Uu, V, Vv *cmat.FloatMatrix var sD cmat.FloatMatrix var s string wsize := M * N if wsize < 100 { wsize = 100 } S = cmat.NewMatrix(M, 1) A = cmat.NewMatrix(M, N) U = cmat.NewMatrix(M, M) Uu = cmat.NewMatrix(M, M) if square { V = cmat.NewMatrix(N, N) Vv = cmat.NewMatrix(N, N) } else { V = cmat.NewMatrix(M, N) Vv = cmat.NewMatrix(M, M) } src := cmat.NewFloatNormSource() A.SetFrom(src) A0 = cmat.NewCopy(A) W = cmat.NewMatrix(wsize, 1) if err := lapackd.SVD(S, U, V, A, W, gomas.WANTU|gomas.WANTV); err != nil { t.Errorf("SVD error: %v\n", err) return } // ||I - U.T*U|| sD.Diag(Uu) blasd.Mult(Uu, U, U, 1.0, 0.0, gomas.TRANSA) blasd.Add(&sD, -1.0) nrm0 := lapackd.NormP(Uu, lapackd.NORM_ONE) // ||I - V*V.T|| sD.Diag(Vv) blasd.Mult(Vv, V, V, 1.0, 0.0, gomas.TRANSB) blasd.Add(&sD, -1.0) nrm1 := lapackd.NormP(Vv, lapackd.NORM_ONE) if square { // right vectors are N-by-N Sg := cmat.NewMatrix(M, N) A1 := cmat.NewMatrix(M, N) sD.Diag(Sg) blasd.Copy(&sD, S) blasd.Mult(A1, Sg, V, 1.0, 0.0, gomas.NONE) blasd.Mult(A0, U, A1, -1.0, 1.0, gomas.NONE) s = "U=[m,m], V=[n,n]" } else { // right vectors are M-by-N lapackd.MultDiag(V, S, gomas.LEFT) blasd.Mult(A0, U, V, -1.0, 1.0, gomas.NONE) s = "U=[m,m], V=[m,n]" } nrm2 := lapackd.NormP(A0, lapackd.NORM_ONE) if N < 10 { t.Logf("A - U*S*V.T:\n%v\n", A0) } t.Logf("M=%d, N=%d, %s ||A - U*S*V.T||_1 :%e\n", M, N, s, nrm2) t.Logf(" ||I - U.T*U||_1 : %e\n", nrm0) t.Logf(" ||I - V*V.T||_1 : %e\n", nrm1) }
func svdWide(S, U, V, A, W *cmat.FloatMatrix, bits int, conf *gomas.Config) (err *gomas.Error) { var uu, vv *cmat.FloatMatrix var tauq, taup, Wred, sD, sE, L, Vm cmat.FloatMatrix if (bits & (gomas.WANTU | gomas.WANTV)) != 0 { if W.Len() < 4*n(A) { err = gomas.NewError(gomas.ESIZE, "SVD") return } } tauq.SetBuf(m(A)-1, 1, m(A)-1, W.Data()) taup.SetBuf(m(A), 1, m(A), W.Data()[tauq.Len():]) wrl := W.Len() - 2*m(A) - 1 Wred.SetBuf(wrl, 1, wrl, W.Data()[2*m(A)-1:]) if svdCrossover(n(A), m(A)) { goto do_n_much_bigger } // reduce to bidiagonal form if err = BDReduce(A, &tauq, &taup, &Wred, conf); err != nil { return } sD.Diag(A) sE.Diag(A, -1) blasd.Copy(S, &sD) // leftt vectors if bits&gomas.WANTU != 0 { L.SubMatrix(A, 0, 0, m(A), m(A)) U.Copy(&L) cmat.TriL(U, 0) if err = BDBuild(U, &tauq, &Wred, m(U), gomas.WANTQ|gomas.LOWER, conf); err != nil { return } uu = U } // right vectors if bits&gomas.WANTV != 0 { if m(V) == m(A) { // V is M-by-N; copy and make upper triangular V.Copy(A) //cmat.TriU(V, 0) if err = BDBuild(V, &taup, &Wred, m(V), gomas.WANTP, conf); err != nil { return } } else { // V is N-by-N eye := cmat.FloatDiagonalSource{1.0} V.SetFrom(&eye, cmat.SYMM) err = BDMult(V, A, &taup, &Wred, gomas.MULTP|gomas.LEFT|gomas.TRANS, conf) if err != nil { return } } vv = V } err = BDSvd(S, &sE, uu, vv, W, bits|gomas.LOWER) return do_n_much_bigger: // here N >> M, use LQ factor first if err = LQFactor(A, &taup, &Wred, conf); err != nil { return } if bits&gomas.WANTV != 0 { if m(V) == m(A) { V.Copy(A) if err = LQBuild(V, &taup, &Wred, m(A), conf); err != nil { return } } else { // V is N-by-N eye := cmat.FloatDiagonalSource{1.0} V.SetFrom(&eye, cmat.SYMM) if err = LQMult(V, A, &taup, &Wred, gomas.RIGHT, conf); err != nil { return } } } L.SubMatrix(A, 0, 0, m(A), m(A)) cmat.TriL(&L, 0) // resize tauq/taup for UPPER bidiagonal reduction tauq.SetBuf(m(A), 1, m(A), W.Data()) taup.SetBuf(m(A)-1, 1, m(A)-1, W.Data()[tauq.Len():]) // bidiagonal reduce if err = BDReduce(&L, &tauq, &taup, &Wred, conf); err != nil { return } if bits&gomas.WANTV != 0 { Vm.SubMatrix(V, 0, 0, m(A), n(A)) err = BDMult(&Vm, &L, &taup, &Wred, gomas.MULTP|gomas.LEFT|gomas.TRANS, conf) if err != nil { return } vv = V } if bits&gomas.WANTU != 0 { U.Copy(&L) if err = BDBuild(U, &tauq, &Wred, m(U), gomas.WANTQ, conf); err != nil { return } uu = U } sD.Diag(A) sE.Diag(A, 1) blasd.Copy(S, &sD) err = BDSvd(S, &sE, uu, vv, W, bits|gomas.UPPER, conf) return }
/* * Find diagonal pivot and build incrementaly updated block. * * (AL) (AR) (WL) (WR) * -------------------------- ---------- k'th row in W * x x | c1 w w | k kp1 * x x | c1 d w w | k kp1 * x x | c1 x d w w | k kp1 * x x | c1 x x d w w | k kp1 * x x | c1 r2 r2 r2 r2 w w | k kp1 * x x | c1 x x x r2 d w w | k kp1 * x x | c1 x x x r2 x d w w | k kp1 * * Matrix AR contains the unfactored part of the matrix and AL the already * factored columns. Matrix WL is updated values of factored part ie. * w(i) = l(i)d(i). Matrix WR will have updated values for next column. * Column WR(k) contains updated AR(c1) and WR(kp1) possible pivot row AR(r2). */ func findAndBuildBKPivotLower(AL, AR, WL, WR *cmat.FloatMatrix, k int) (int, int) { var r, q int var rcol, qrow, src, wk, wkp1, wrow cmat.FloatMatrix // Copy AR column 0 to WR column 0 and update with WL[0:] src.SubMatrix(AR, 0, 0, m(AR), 1) wk.SubMatrix(WR, 0, 0, m(AR), 1) wk.Copy(&src) if k > 0 { wrow.SubMatrix(WL, 0, 0, 1, n(WL)) blasd.MVMult(&wk, AL, &wrow, -1.0, 1.0, gomas.NONE) } if m(AR) == 1 { return 0, 1 } amax := math.Abs(WR.Get(0, 0)) // find max off-diagonal on first column. rcol.SubMatrix(WR, 1, 0, m(AR)-1, 1) // r is row index and rmax is its absolute value r = blasd.IAmax(&rcol) + 1 rmax := math.Abs(rcol.Get(r-1, 0)) if amax >= bkALPHA*rmax { // no pivoting, 1x1 diagonal return 0, 1 } // Now we need to copy row r to WR[:,1] and update it wkp1.SubMatrix(WR, 0, 1, m(AR), 1) qrow.SubMatrix(AR, r, 0, 1, r+1) blasd.Copy(&wkp1, &qrow) if r < m(AR)-1 { var wkr cmat.FloatMatrix qrow.SubMatrix(AR, r, r, m(AR)-r, 1) wkr.SubMatrix(&wkp1, r, 0, m(&wkp1)-r, 1) blasd.Copy(&wkr, &qrow) } if k > 0 { // update wkp1 wrow.SubMatrix(WL, r, 0, 1, n(WL)) blasd.MVMult(&wkp1, AL, &wrow, -1.0, 1.0, gomas.NONE) } // set on-diagonal entry to zero to avoid finding it p1 := wkp1.Get(r, 0) wkp1.Set(r, 0, 0.0) // max off-diagonal on r'th column/row at index q q = blasd.IAmax(&wkp1) qmax := math.Abs(wkp1.Get(q, 0)) // restore on-diagonal entry wkp1.Set(r, 0, p1) if amax >= bkALPHA*rmax*(rmax/qmax) { // no pivoting, 1x1 diagonal return 0, 1 } // if q == r then qmax is not off-diagonal, qmax == WR[r,1] and // we get 1x1 pivot as following is always true if math.Abs(WR.Get(r, 1)) >= bkALPHA*qmax { // 1x1 pivoting and interchange with k, r // pivot row in column WR[:,1] to W[:,0] src.SubMatrix(WR, 0, 1, m(AR), 1) wkp1.SubMatrix(WR, 0, 0, m(AR), 1) blasd.Copy(&wkp1, &src) wkp1.Set(0, 0, src.Get(r, 0)) wkp1.Set(r, 0, src.Get(0, 0)) return r, 1 } else { // 2x2 pivoting and interchange with k+1, r return r, 2 } return 0, 1 }
/* * Unblocked Bunch-Kauffman LDL factorization. * * Corresponds lapack.DSYTF2 */ func unblkDecompBKLower(A, wrk *cmat.FloatMatrix, p Pivots, conf *gomas.Config) (*gomas.Error, int) { var err *gomas.Error = nil var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a10t, a11, A20, a21, A22, a11inv cmat.FloatMatrix var cwrk cmat.FloatMatrix var pT, pB, p0, p1, p2 Pivots err = nil nc := 0 util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) partitionPivot2x1( &pT, &pB, p, 0, util.PTOP) // permanent working space for symmetric inverse of a11 a11inv.SubMatrix(wrk, 0, n(wrk)-2, 2, 2) a11inv.Set(1, 0, -1.0) a11inv.Set(0, 1, -1.0) for n(&ABR) > 0 { r, np := findBKPivotLower(&ABR) if r != 0 && r != np-1 { // pivoting needed; do swaping here applyBKPivotSymLower(&ABR, np-1, r) if np == 2 { /* [0,0] | [r,0] * a11 == ------------- 2-by-2 pivot, swapping [1,0] and [r,0] * [r,0] | [r,r] */ t := ABR.Get(1, 0) ABR.Set(1, 0, ABR.Get(r, 0)) ABR.Set(r, 0, t) } } // repartition according the pivot size util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &a10t, &a11, nil, &A20, &a21, &A22 /**/, A, np, util.PBOTTOMRIGHT) repartPivot2x1to3x1(&pT, &p0, &p1, &p2 /**/, p, np, util.PBOTTOM) // ------------------------------------------------------------ if np == 1 { // A22 = A22 - a21*a21.T/a11 blasd.MVUpdateTrm(&A22, &a21, &a21, -1.0/a11.Get(0, 0), gomas.LOWER) // a21 = a21/a11 blasd.InvScale(&a21, a11.Get(0, 0)) // store pivot point relative to original matrix p1[0] = r + m(&ATL) + 1 } else if np == 2 { /* from Bunch-Kaufmann 1977: * (E2 C.T) = ( I2 0 )( E 0 )( I[n-2] E.-1*C.T ) * (C B ) ( C*E.-1 I[n-2] )( 0 A[n-2] )( 0 I2 ) * * A[n-2] = B - C*E.-1*C.T * * E.-1 is inverse of a symmetric matrix, cannot use * triangular solve. We calculate inverse of 2x2 matrix. * Following is inspired by lapack.SYTF2 * * a | b 1 d | -b b d/b | -1 * inv ----- = ------ * ------ = ----------- * -------- * b | d (ad-b^2) -b | a (a*d - b^2) -1 | a/b * */ a := a11.Get(0, 0) b := a11.Get(1, 0) d := a11.Get(1, 1) a11inv.Set(0, 0, d/b) a11inv.Set(1, 1, a/b) // denominator: (a/b)*(d/b)-1.0 == (a*d - b^2)/b^2 scale := 1.0 / ((a/b)*(d/b) - 1.0) scale /= b // cwrk = a21 cwrk.SubMatrix(wrk, 2, 0, m(&a21), n(&a21)) blasd.Copy(&cwrk, &a21) // a21 = a21*a11.-1 blasd.Mult(&a21, &cwrk, &a11inv, scale, 0.0, gomas.NONE, conf) // A22 = A22 - a21*a11.-1*a21.T = A22 - a21*cwrk.T blasd.UpdateTrm(&A22, &a21, &cwrk, -1.0, 1.0, gomas.LOWER|gomas.TRANSB, conf) // store pivot point relative to original matrix p1[0] = -(r + m(&ATL) + 1) p1[1] = p1[0] } // ------------------------------------------------------------ nc += np util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT) contPivot3x1to2x1( &pT, &pB, p0, p1, p, util.PBOTTOM) } return err, nc }
/* * Generate one of the orthogonal matrices Q or P.T determined by BDReduce() when * reducing a real matrix A to bidiagonal form. Q and P.T are defined as products * elementary reflectors H(i) or G(i) respectively. * * Orthogonal matrix Q is generated if flag WANTQ is set. And matrix P respectively * if flag WANTP is set. */ func BDBuild(A, tau, W *cmat.FloatMatrix, K, flags int, confs ...*gomas.Config) *gomas.Error { var Qh, Ph, tauh, d, s cmat.FloatMatrix var err *gomas.Error = nil if m(A) == 0 || n(A) == 0 { return nil } if m(A) > n(A) || (m(A) == n(A) && flags&gomas.LOWER == 0) { switch flags & (gomas.WANTQ | gomas.WANTP) { case gomas.WANTQ: tauh.SubMatrix(tau, 0, 0, n(A), 1) err = QRBuild(A, &tauh, W, K, confs...) case gomas.WANTP: // Shift P matrix embedded in A down and fill first column and row // to unit vector for j := n(A) - 1; j > 0; j-- { s.SubMatrix(A, j-1, j, 1, n(A)-j) d.SubMatrix(A, j, j, 1, n(A)-j) blasd.Copy(&d, &s) A.Set(j, 0, 0.0) } // zero first row and set first entry to one d.Row(A, 0) blasd.Scale(&d, 0.0) d.Set(0, 0, 1.0) Ph.SubMatrix(A, 1, 1, n(A)-1, n(A)-1) tauh.SubMatrix(tau, 0, 0, n(A)-1, 1) if K > n(A)-1 { K = n(A) - 1 } err = LQBuild(&Ph, &tauh, W, K, confs...) } } else { switch flags & (gomas.WANTQ | gomas.WANTP) { case gomas.WANTQ: // Shift Q matrix embedded in A right and fill first column and row // to unit vector for j := m(A) - 1; j > 0; j-- { s.SubMatrix(A, j, j-1, m(A)-j, 1) d.SubMatrix(A, j, j, m(A)-j, 1) blasd.Copy(&d, &s) A.Set(0, j, 0.0) } // zero first column and set first entry to one d.Column(A, 0) blasd.Scale(&d, 0.0) d.Set(0, 0, 1.0) Qh.SubMatrix(A, 1, 1, m(A)-1, m(A)-1) tauh.SubMatrix(tau, 0, 0, m(A)-1, 1) if K > m(A)-1 { K = m(A) - 1 } err = QRBuild(&Qh, &tauh, W, K, confs...) case gomas.WANTP: tauh.SubMatrix(tau, 0, 0, m(A), 1) err = LQBuild(A, &tauh, W, K, confs...) } } if err != nil { err.Update("BDBuild") } return err }
/* * Unblocked, bounded Bunch-Kauffman LDL factorization for at most ncol columns. * At most ncol columns are factorized and trailing matrix updates are restricted * to ncol columns. Also original columns are accumulated to working matrix, which * is used by calling blocked algorithm to update the trailing matrix with BLAS3 * update. * * Corresponds lapack.DLASYF */ func unblkBoundedBKLower(A, wrk *cmat.FloatMatrix, p *Pivots, ncol int, conf *gomas.Config) (*gomas.Error, int) { var err *gomas.Error = nil var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a10t, a11, A20, a21, A22, a11inv cmat.FloatMatrix var w00, w10, w11 cmat.FloatMatrix var cwrk cmat.FloatMatrix var pT, pB, p0, p1, p2 Pivots err = nil nc := 0 if ncol > n(A) { ncol = n(A) } // permanent working space for symmetric inverse of a11 a11inv.SubMatrix(wrk, 0, n(wrk)-2, 2, 2) a11inv.Set(1, 0, -1.0) a11inv.Set(0, 1, -1.0) util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) partitionPivot2x1( &pT, &pB, *p, 0, util.PTOP) for n(&ABR) > 0 && nc < ncol { util.Partition2x2( &w00, nil, &w10, &w11, wrk, nc, nc, util.PTOPLEFT) r, np := findAndBuildBKPivotLower(&ABL, &ABR, &w10, &w11, nc) if np > ncol-nc { // next pivot does not fit into ncol columns, restore last column, // return with number of factorized columns return err, nc } if r != 0 && r != np-1 { // pivoting needed; do swaping here applyBKPivotSymLower(&ABR, np-1, r) // swap left hand rows to get correct updates swapRows(&ABL, np-1, r) swapRows(&w10, np-1, r) if np == 2 { /* * [0,0] | [r,0] * a11 == ------------- 2-by-2 pivot, swapping [1,0] and [r,0] * [r,0] | [r,r] */ t0 := w11.Get(1, 0) tr := w11.Get(r, 0) w11.Set(1, 0, tr) w11.Set(r, 0, t0) // interchange diagonal entries on w11[:,1] t0 = w11.Get(1, 1) tr = w11.Get(r, 1) w11.Set(1, 1, tr) w11.Set(r, 1, t0) } } // repartition according the pivot size util.Repartition2x2to3x3(&ATL, &A00, nil, nil, &a10t, &a11, nil, &A20, &a21, &A22 /**/, A, np, util.PBOTTOMRIGHT) repartPivot2x1to3x1(&pT, &p0, &p1, &p2 /**/, *p, np, util.PBOTTOM) // ------------------------------------------------------------ if np == 1 { // cwrk.SubMatrix(&w11, np, 0, m(&a21), np) a11.Set(0, 0, w11.Get(0, 0)) // a21 = a21/a11 blasd.Copy(&a21, &cwrk) blasd.InvScale(&a21, a11.Get(0, 0)) // store pivot point relative to original matrix p1[0] = r + m(&ATL) + 1 } else if np == 2 { /* * See comments for this block in unblkDecompBKLower(). */ a := w11.Get(0, 0) b := w11.Get(1, 0) d := w11.Get(1, 1) a11inv.Set(0, 0, d/b) a11inv.Set(1, 1, a/b) // denominator: (a/b)*(d/b)-1.0 == (a*d - b^2)/b^2 scale := 1.0 / ((a/b)*(d/b) - 1.0) scale /= b cwrk.SubMatrix(&w11, np, 0, m(&a21), np) // a21 = a21*a11.-1 blasd.Mult(&a21, &cwrk, &a11inv, scale, 0.0, gomas.NONE, conf) a11.Set(0, 0, a) a11.Set(1, 0, b) a11.Set(1, 1, d) // store pivot point relative to original matrix p1[0] = -(r + m(&ATL) + 1) p1[1] = p1[0] } // ------------------------------------------------------------ nc += np util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, util.PBOTTOMRIGHT) contPivot3x1to2x1( &pT, &pB, p0, p1, *p, util.PBOTTOM) } return err, nc }
/* * Blocked version of Hessenberg reduction algorithm as presented in (1). This * version uses compact-WY transformation. * * Some notes: * * Elementary reflectors stored in [A11; A21].T are not on diagonal of A11. Update of * a block aligned with A11; A21 is as follow * * 1. Update from left Q(k)*C: * c0 0 c0 * (I - Y*T*Y.T).T*C = C - Y*(C.T*Y)*T.T = C1 - Y1 * (C1.T.Y1+C2.T*Y2)*T.T = C1-Y1*W * C2 Y2 C2-Y2*W * * where W = (C1.T*Y1+C2.T*Y2)*T.T and first row of C is not affected by update * * 2. Update from right C*Q(k): * 0 * C - C*Y*T*Y.T = c0;C1;C2 - c0;C1;C2 * Y1 *T*(0;Y1;Y2) = c0; C1-W*Y1; C2-W*Y2 * Y2 * where W = (C1*Y1 + C2*Y2)*T and first column of C is not affected * */ func blkHessGQvdG(A, Tvec, W *cmat.FloatMatrix, nb int, conf *gomas.Config) *gomas.Error { var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, A11, A12, A21, A22, A2 cmat.FloatMatrix var tT, tB, td cmat.FloatMatrix var t0, t1, t2, T cmat.FloatMatrix var V, VT, VB /*V0, V1, V2,*/, Y1, Y2, W0 cmat.FloatMatrix //fmt.Printf("blkHessGQvdG...\n") T.SubMatrix(W, 0, 0, conf.LB, conf.LB) V.SubMatrix(W, conf.LB, 0, m(A), conf.LB) td.Diag(&T) util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition2x1( &tT, &tB, Tvec, 0, util.PTOP) for m(&ABR) > nb+1 && n(&ABR) > nb { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, nil, &A11, &A12, nil, &A21, &A22, A, nb, util.PBOTTOMRIGHT) util.Repartition2x1to3x1(&tT, &t0, &t1, &t2, Tvec, nb, util.PBOTTOM) util.Partition2x1( &VT, &VB, &V, m(&ATL), util.PTOP) // ------------------------------------------------------ unblkBuildHessGQvdG(&ABR, &T, &VB, nil) blasd.Copy(&t1, &td) // m(Y) == m(ABR)-1, n(Y) == n(A11) Y1.SubMatrix(&ABR, 1, 0, n(&A11), n(&A11)) Y2.SubMatrix(&ABR, 1+n(&A11), 0, m(&A21)-1, n(&A11)) // [A01; A02] == ATR := ATR*(I - Y*T*Y.T) updateHessRightWY(&ATR, &Y1, &Y2, &T, &VT, conf) // A2 = [A12; A22].T util.Merge2x1(&A2, &A12, &A22) // A2 := A2 - VB*T*A21.T be := A21.Get(0, -1) A21.Set(0, -1, 1.0) blasd.MultTrm(&VB, &T, 1.0, gomas.UPPER|gomas.RIGHT) blasd.Mult(&A2, &VB, &A21, -1.0, 1.0, gomas.TRANSB, conf) A21.Set(0, -1, be) // A2 := (I - Y*T*Y.T).T * A2 W0.SubMatrix(&V, 0, 0, n(&A2), n(&Y2)) updateHessLeftWY(&A2, &Y1, &Y2, &T, &W0, conf) // ------------------------------------------------------ util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, util.PBOTTOMRIGHT) util.Continue3x1to2x1( &tT, &tB, &t0, &t1, Tvec, util.PBOTTOM) } if m(&ABR) > 1 { // do the rest with unblocked util.Merge2x1(&A2, &ATR, &ABR) W0.SetBuf(m(A), 1, m(A), W.Data()) unblkHessGQvdG(&A2, &tB, &W0, m(&ATR)) } return nil }
// Compute SVD when m(A) >= n(A) func svdTall(S, U, V, A, W *cmat.FloatMatrix, bits int, conf *gomas.Config) (err *gomas.Error) { var uu, vv *cmat.FloatMatrix var tauq, taup, Wred, sD, sE, R, Un cmat.FloatMatrix if (bits & (gomas.WANTU | gomas.WANTV)) != 0 { if W.Len() < 4*n(A) { err = gomas.NewError(gomas.ESIZE, "SVD") return } } tauq.SetBuf(n(A), 1, n(A), W.Data()) taup.SetBuf(n(A)-1, 1, n(A)-1, W.Data()[tauq.Len():]) wrl := W.Len() - 2*n(A) - 1 Wred.SetBuf(wrl, 1, wrl, W.Data()[2*n(A)-1:]) if svdCrossover(m(A), n(A)) { goto do_m_much_bigger } // reduce to bidiagonal form if err = BDReduce(A, &tauq, &taup, &Wred, conf); err != nil { return } sD.Diag(A) sE.Diag(A, 1) blasd.Copy(S, &sD) // left vectors if bits&gomas.WANTU != 0 { if n(U) == n(A) { // U is M-by-N; copy and make lower triangular U.Copy(A) cmat.TriL(U, 0) if err = BDBuild(U, &tauq, &Wred, n(U), gomas.WANTQ, conf); err != nil { return } } else { // U is M-by-M eye := cmat.FloatDiagonalSource{1.0} U.SetFrom(&eye, cmat.SYMM) if err = BDMult(U, A, &tauq, &Wred, gomas.MULTQ|gomas.RIGHT, conf); err != nil { return } } uu = U } // right vectors if bits&gomas.WANTV != 0 { R.SubMatrix(A, 0, 0, n(A), n(A)) V.Copy(&R) cmat.TriU(V, 0) if err = BDBuild(V, &taup, &Wred, m(V), gomas.WANTP, conf); err != nil { return } vv = V } err = BDSvd(S, &sE, uu, vv, W, bits|gomas.UPPER) return do_m_much_bigger: // M >> N here; first use QR factorization if err = QRFactor(A, &tauq, &Wred, conf); err != nil { return } if bits&gomas.WANTU != 0 { if n(U) == n(A) { U.Copy(A) if err = QRBuild(U, &tauq, &Wred, n(U), conf); err != nil { return } } else { // U is M-by-M eye := cmat.FloatDiagonalSource{1.0} U.SetFrom(&eye, cmat.SYMM) if err = QRMult(U, A, &tauq, &Wred, gomas.LEFT, conf); err != nil { return } } } R.SubMatrix(A, 0, 0, n(A), n(A)) cmat.TriU(&R, 0) // bidiagonal reduce if err = BDReduce(&R, &tauq, &taup, &Wred, conf); err != nil { return } if bits&gomas.WANTU != 0 { Un.SubMatrix(U, 0, 0, m(A), n(A)) if err = BDMult(&Un, &R, &tauq, &Wred, gomas.MULTQ|gomas.RIGHT, conf); err != nil { return } uu = U } if bits&gomas.WANTV != 0 { V.Copy(&R) if err = BDBuild(V, &taup, &Wred, m(V), gomas.WANTP, conf); err != nil { return } vv = V } sD.Diag(A) sE.Diag(A, 1) blasd.Copy(S, &sD) err = BDSvd(S, &sE, uu, vv, W, bits|gomas.UPPER, conf) return }