// compute: // C*Q.T = C*(I -Y*T*Y.T).T == C - C*Y*T.T*Y.T // or // C*Q = (I -Y*T*Y.T)*C == C - C*Y*T*Y.T // // // where C = ( C2 C1 ), Y = ( Y2 Y1 ) // // C1 is K*nb, C2 is K*P, Y1 is nb*nb triuu, Y2 is nb*P, T is nb*nb // W = K*nb func updateRightRQ(C1, C2, Y1t, Y2t, T, W *cmat.FloatMatrix, transpose bool, conf *gomas.Config) { // -- compute: W = C*Y = C1*Y1 + C2*Y2 // W = C1 blasd.Plus(W, C1, 0.0, 1.0, gomas.NONE) // W = C1*Y1t.T blasd.MultTrm(W, Y1t, 1.0, gomas.RIGHT|gomas.LOWER|gomas.UNIT|gomas.TRANSA, conf) // W = W + C2*Y2t.T blasd.Mult(W, C2, Y2t, 1.0, 1.0, gomas.TRANSB, conf) // --- here: W == C*Y --- tflags := gomas.RIGHT | gomas.LOWER if transpose { tflags |= gomas.TRANSA } // W = W*T or W*T.T blasd.MultTrm(W, T, 1.0, tflags, conf) // --- here: W == C*Y*T or C*Y*T.T --- // C2 = C2 - W*Y2t blasd.Mult(C2, W, Y2t, -1.0, 1.0, gomas.NONE, conf) // C1 = C1 - W*Y1t // W = W*Y1 blasd.MultTrm(W, Y1t, 1.0, gomas.RIGHT|gomas.LOWER|gomas.UNIT, conf) // C1 = C1 - W blasd.Plus(C1, W, 1.0, -1.0, gomas.NONE) // --- here: C = (I - Y*T*Y.T).T * C --- }
// compute: // Q.T*C = (I -Y*T*Y.T).T*C == C - Y*(C.T*Y*T).T // or // Q*C = (I -Y*T*Y.T)*C == C - Y*(C.T*Y*T.T).T // // // where C = ( C2 ) Y = ( Y2 Y1 ) // ( C1 ) // // C1 is nb*K, C2 is P*K, Y1 is nb*nb triuu, Y2 is nb*P, T is nb*nb // W = K*nb func updateLeftRQ(C1, C2, Y1t, Y2t, T, W *cmat.FloatMatrix, transpose bool, conf *gomas.Config) { // W = C1.T blasd.Plus(W, C1, 0.0, 1.0, gomas.TRANSB) // W = C1.T*Y1.T blasd.MultTrm(W, Y1t, 1.0, gomas.RIGHT|gomas.LOWER|gomas.UNIT|gomas.TRANSA, conf) // W = W + C2.T*Y2.T blasd.Mult(W, C2, Y2t, 1.0, 1.0, gomas.TRANSA|gomas.TRANSB, conf) // --- here: W == C.T*Y == C1.T*Y1.T + C2.T*Y2.T --- tflags := gomas.RIGHT | gomas.LOWER if !transpose { tflags |= gomas.TRANSA } // W = W*T or W*T.T blasd.MultTrm(W, T, 1.0, tflags, conf) // --- here: W == C.T*Y*T or C.T*Y*T.T --- // C2 = C2 - Y2*W.T blasd.Mult(C2, Y2t, W, -1.0, 1.0, gomas.TRANSA|gomas.TRANSB, conf) // W = Y1*W.T ==> W.T = W*Y1 blasd.MultTrm(W, Y1t, 1.0, gomas.RIGHT|gomas.LOWER|gomas.UNIT, conf) // C1 = C1 - W.T blasd.Plus(C1, W, 1.0, -1.0, gomas.TRANSB) // --- here: C = (I - Y*T*Y.T).T * C --- }
func TestDTrmmUnitUpper(t *testing.T) { var d cmat.FloatMatrix N := 563 K := 171 A := cmat.NewMatrix(N, N) B := cmat.NewMatrix(N, K) B0 := cmat.NewMatrix(N, K) C := cmat.NewMatrix(N, K) zeros := cmat.NewFloatConstSource(0.0) ones := cmat.NewFloatConstSource(1.0) zeromean := cmat.NewFloatNormSource() A.SetFrom(zeromean, cmat.UPPER|cmat.UNIT) B.SetFrom(ones) B0.SetFrom(ones) // B = A*B blasd.MultTrm(B, A, 1.0, gomas.UPPER|gomas.LEFT|gomas.UNIT) d.Diag(A).SetFrom(ones) blasd.Mult(C, A, B0, 1.0, 0.0, gomas.NONE) ok := C.AllClose(B) t.Logf("trmm(B, A, L|U|N|U) == gemm(C, TriUU(A), B) : %v\n", ok) B.SetFrom(ones) // B = A.T*B d.Diag(A).SetFrom(zeros) blasd.MultTrm(B, A, 1.0, gomas.UPPER|gomas.LEFT|gomas.TRANSA|gomas.UNIT) d.Diag(A).SetFrom(ones) blasd.Mult(C, A, B0, 1.0, 0.0, gomas.TRANSA) ok = C.AllClose(B) t.Logf("trmm(B, A, L|U|T|U) == gemm(C, TriUU(A).T, B) : %v\n", ok) }
func TestDTrmmLowerRight(t *testing.T) { N := 563 K := 171 nofail := true A := cmat.NewMatrix(N, N) B := cmat.NewMatrix(K, N) B0 := cmat.NewMatrix(K, N) C := cmat.NewMatrix(K, N) ones := cmat.NewFloatConstSource(1.0) zeromean := cmat.NewFloatNormSource() A.SetFrom(zeromean, cmat.LOWER) B.SetFrom(ones) B0.SetFrom(ones) // B = B*A blasd.MultTrm(B, A, 1.0, gomas.LOWER|gomas.RIGHT) blasd.Mult(C, B0, A, 1.0, 0.0, gomas.NONE) ok := C.AllClose(B) nofail = nofail && ok t.Logf("trmm(B, A, R|L|N) == gemm(C, B, TriL(A)) : %v\n", ok) B.SetFrom(ones) // B = B*A.T blasd.MultTrm(B, A, 1.0, gomas.LOWER|gomas.RIGHT|gomas.TRANSA) blasd.Mult(C, B0, A, 1.0, 0.0, gomas.TRANSB) ok = C.AllClose(B) nofail = nofail && ok t.Logf("trmm(B, A, R|L|T) == gemm(C, B, TriL(A).T) : %v\n", ok) }
func TestDTrmmUnitUpperRight(t *testing.T) { var d cmat.FloatMatrix N := 563 K := 171 A := cmat.NewMatrix(N, N) B := cmat.NewMatrix(K, N) B0 := cmat.NewMatrix(K, N) C := cmat.NewMatrix(K, N) zeros := cmat.NewFloatConstSource(0.0) ones := cmat.NewFloatConstSource(1.0) zeromean := cmat.NewFloatNormSource() A.SetFrom(zeromean, cmat.UPPER|cmat.UNIT) B.SetFrom(ones) B0.SetFrom(ones) // B = B*A blasd.MultTrm(B, A, 1.0, gomas.UPPER|gomas.RIGHT|gomas.UNIT) d.Diag(A).SetFrom(ones) blasd.Mult(C, B0, A, 1.0, 0.0, gomas.NONE) ok := C.AllClose(B) t.Logf("trmm(B, A, R|U|N|U) == gemm(C, B, TriUU(A)) : %v\n", ok) B.SetFrom(ones) // B = B*A.T d.SetFrom(zeros) blasd.MultTrm(B, A, 1.0, gomas.UPPER|gomas.RIGHT|gomas.TRANSA|gomas.UNIT) d.SetFrom(ones) blasd.Mult(C, B0, A, 1.0, 0.0, gomas.TRANSB) ok = C.AllClose(B) t.Logf("trmm(B, A, R|U|T|U) == gemm(C, B, TriUU(A).T) : %v\n", ok) }
func TestDTrmmLower(t *testing.T) { N := 563 K := 171 nofail := true A := cmat.NewMatrix(N, N) B := cmat.NewMatrix(N, K) B0 := cmat.NewMatrix(N, K) C := cmat.NewMatrix(N, K) ones := cmat.NewFloatConstSource(1.0) zeromean := cmat.NewFloatNormSource() A.SetFrom(zeromean, cmat.LOWER) B.SetFrom(ones) B0.SetFrom(ones) // B = A*B blasd.MultTrm(B, A, 1.0, gomas.LOWER|gomas.LEFT) blasd.Mult(C, A, B0, 1.0, 0.0, gomas.NONE) ok := C.AllClose(B) nofail = nofail && ok t.Logf("trmm(B, A, L|L|N) == gemm(C, TriL(A), B) : %v\n", ok) B.SetFrom(ones) // B = A.T*B blasd.MultTrm(B, A, 1.0, gomas.LOWER|gomas.LEFT|gomas.TRANSA) blasd.Mult(C, A, B0, 1.0, 0.0, gomas.TRANSA) ok = C.AllClose(B) nofail = nofail && ok t.Logf("trmm(B, A, L|L|T) == gemm(C, TriL(A).T, B) : %v\n", ok) }
func test_bdsvd(N, flags, kind int, verbose bool, t *testing.T) { var At, sD, sE, tmp cmat.FloatMatrix uplo := "upper" offdiag := 1 if flags&gomas.LOWER != 0 { offdiag = -1 uplo = "lower" } A0 := cmat.NewMatrix(N, N) desc := setDiagonals(A0, offdiag, kind) At.SubMatrix(A0, 0, 0, N, N) sD.Diag(A0, 0) sE.Diag(A0, offdiag) D := cmat.NewCopy(&sD) E := cmat.NewCopy(&sE) // unit singular vectors U := cmat.NewMatrix(N, N) sD.Diag(U, 0) sD.Add(1.0) V := cmat.NewMatrix(N, N) sD.Diag(V, 0) sD.Add(1.0) W := cmat.NewMatrix(4*N, 1) C := cmat.NewMatrix(N, N) lapackd.BDSvd(D, E, U, V, W, flags|gomas.WANTU|gomas.WANTV) blasd.Mult(C, U, U, 1.0, 0.0, gomas.TRANSA) sD.Diag(C) sD.Add(-1.0) nrmu := lapackd.NormP(C, lapackd.NORM_ONE) blasd.Mult(C, V, V, 1.0, 0.0, gomas.TRANSB) sD.Add(-1.0) nrmv := lapackd.NormP(C, lapackd.NORM_ONE) blasd.Mult(C, U, A0, 1.0, 0.0, gomas.TRANSA) blasd.Mult(&At, C, V, 1.0, 0.0, gomas.TRANSB) if verbose && N < 10 { t.Logf("D:\n%v\n", asRow(&tmp, D)) t.Logf("U:\n%v\n", U) t.Logf("V:\n%v\n", V) t.Logf("U.T*A*V\n%v\n", &At) } sD.Diag(&At) blasd.Axpy(&sD, D, -1.0) nrma := lapackd.NormP(&At, lapackd.NORM_ONE) t.Logf("N=%d [%s,%s] ||U.T*A*V - bdsvd(A)||_1: %e\n", N, uplo, desc, nrma) t.Logf(" ||I - U.T*U||_1: %e\n", nrmu) t.Logf(" ||I - V.T*V||_1: %e\n", nrmv) }
// test: C = C*Q.T func TestQLMultRightTrans(t *testing.T) { var d, di0, di1 cmat.FloatMatrix M := 891 N := 853 lb := 36 conf := gomas.NewConf() A := cmat.NewMatrix(M, N) src := cmat.NewFloatNormSource() A.SetFrom(src) C0 := cmat.NewMatrix(N, M) d.Diag(C0, M-N) ones := cmat.NewFloatConstSource(1.0) d.SetFrom(ones) C1 := cmat.NewCopy(C0) I0 := cmat.NewMatrix(N, N) I1 := cmat.NewCopy(I0) di0.Diag(I0) di1.Diag(I1) tau := cmat.NewMatrix(N, 1) W := cmat.NewMatrix(lb*(M+N), 1) conf.LB = lb lapackd.QLFactor(A, tau, W, conf) conf.LB = 0 lapackd.QLMult(C0, A, tau, W, gomas.RIGHT|gomas.TRANS, conf) // I = Q*Q.T - I blasd.Mult(I0, C0, C0, 1.0, 0.0, gomas.TRANSB, conf) blasd.Add(&di0, -1.0) n0 := lapackd.NormP(I0, lapackd.NORM_ONE) conf.LB = lb lapackd.QLMult(C1, A, tau, W, gomas.RIGHT|gomas.TRANS, conf) // I = Q*Q.T - I blasd.Mult(I1, C1, C1, 1.0, 0.0, gomas.TRANSB, conf) blasd.Add(&di1, -1.0) n1 := lapackd.NormP(I1, lapackd.NORM_ONE) if N < 10 { t.Logf("unblk C0*Q:\n%v\n", C0) t.Logf("blk. C2*Q:\n%v\n", C1) } blasd.Plus(C0, C1, 1.0, -1.0, gomas.NONE) n2 := lapackd.NormP(C0, lapackd.NORM_ONE) t.Logf("M=%d, N=%d ||unblk.QLMult(C) - blk.QLMult(C)||_1: %e\n", M, N, n2) t.Logf("unblk M=%d, N=%d ||I - Q*Q.T||_1: %e\n", M, N, n0) t.Logf("blk M=%d, N=%d ||I - Q*Q.T||_1: %e\n", M, N, n1) }
func TestUpperCHOL(t *testing.T) { N := 311 K := 43 nb := 0 conf := gomas.NewConf() conf.LB = nb Z := cmat.NewMatrix(N, N) A := cmat.NewMatrix(N, N) A0 := cmat.NewMatrix(N, N) B := cmat.NewMatrix(N, K) X := cmat.NewMatrix(N, K) unitrand := cmat.NewFloatUniformSource() Z.SetFrom(unitrand) blasd.Mult(A, Z, Z, 1.0, 0.0, gomas.TRANSB) A0.Copy(A) B.SetFrom(unitrand) X.Copy(B) // A = chol(A) = U.T*U t.Logf("Unblocked version: nb=%d\n", conf.LB) lapackd.CHOLFactor(A, gomas.UPPER, conf) // X = A.-1*B = U.-1*(U.-T*B) lapackd.CHOLSolve(X, A, gomas.UPPER) // B = B - A*X blasd.Mult(B, A0, X, -1.0, 1.0, gomas.NONE) // ||B - A*X||_1 nrm := lapackd.NormP(B, lapackd.NORM_ONE) t.Logf("N=%d: ||B - A*X||_1: %e\n", N, nrm) // A = chol(A) = U.T*U A.Copy(A0) B.SetFrom(unitrand) X.Copy(B) conf.LB = 16 t.Logf("Blocked version: nb=%d\n", conf.LB) lapackd.CHOLFactor(A, gomas.UPPER, conf) // X = A.-1*B = U.-1*(U.-T*B) lapackd.CHOLSolve(X, A, gomas.UPPER) // B = B - A*X blasd.Mult(B, A0, X, -1.0, 1.0, gomas.NONE) // ||B - A*X||_1 nrm = lapackd.NormP(B, lapackd.NORM_ONE) t.Logf("N=%d: ||B - A*X||_1: %e\n", N, nrm) }
func TestQLBuildwithK(t *testing.T) { var dc cmat.FloatMatrix M := 711 N := 707 K := 691 lb := 36 conf := gomas.NewConf() A := cmat.NewMatrix(M, N) src := cmat.NewFloatNormSource() A.SetFrom(src) tau := cmat.NewMatrix(N, 1) W := cmat.NewMatrix(M+N, 1) C := cmat.NewMatrix(N, N) conf.LB = lb lapackd.QLFactor(A, tau, W, conf) A1 := cmat.NewCopy(A) conf.LB = 0 lapackd.QLBuild(A, tau, W, K, conf) blasd.Mult(C, A, A, 1.0, 0.0, gomas.TRANSA, conf) dc.Diag(C) blasd.Add(&dc, -1.0) if N < 10 { t.Logf("unblk.QLBuild Q:\n%v\n", A) t.Logf("unblk.QLBuild Q.T*Q:\n%v\n", C) } n0 := lapackd.NormP(C, lapackd.NORM_ONE) conf.LB = lb W1 := lapackd.Workspace(lapackd.QLBuildWork(A1, conf)) lapackd.QLBuild(A1, tau, W1, K, conf) if N < 10 { t.Logf("blk.QLBuild Q:\n%v\n", A1) } // compute: I - Q.T*Q blasd.Mult(C, A1, A1, 1.0, 0.0, gomas.TRANSA, conf) blasd.Add(&dc, -1.0) n1 := lapackd.NormP(C, lapackd.NORM_ONE) blasd.Plus(A, A1, 1.0, -1.0, gomas.NONE) n2 := lapackd.NormP(A, lapackd.NORM_ONE) t.Logf("M=%d, N=%d, K=%d ||unblk.QLBuild(A) - blk.QLBuild(A)||_1 :%e\n", M, N, K, n2) t.Logf("unblk M=%d, N=%d, K=%d ||Q.T*Q - I||_1 : %e\n", M, N, K, n0) t.Logf("blk M=%d, N=%d, K=%d ||Q.T*Q - I||_1 : %e\n", M, N, K, n1) }
func test_trdevd(N, flags, kind int, verbose bool, t *testing.T) { var At, sD, sE, tmp cmat.FloatMatrix A0 := cmat.NewMatrix(N, N) desc := setTrdDiagonals(A0, kind) At.SubMatrix(A0, 0, 0, N, N) sD.Diag(A0, 0) sE.Diag(A0, 1) D := cmat.NewCopy(&sD) E := cmat.NewCopy(&sE) V := cmat.NewMatrix(N, N) sD.Diag(V, 0) sD.Add(1.0) W := cmat.NewMatrix(4*N, 1) C := cmat.NewMatrix(N, N) if verbose && N < 10 { t.Logf("A0:\n%v\n", A0.ToString("%6.3f")) t.Logf("V.pre:\n%v\n", V.ToString("%6.3f")) } lapackd.TRDEigen(D, E, V, W, flags|gomas.WANTV) for k := 0; k < N-1; k++ { if E.GetAt(k) != 0.0 { t.Logf("E[%d] != 0.0 (%e)\n", k, E.GetAt(k)) } } blasd.Mult(C, V, V, 1.0, 0.0, gomas.TRANSB) sD.Diag(C) sD.Add(-1.0) nrmv := lapackd.NormP(C, lapackd.NORM_ONE) blasd.Mult(C, V, A0, 1.0, 0.0, gomas.TRANSA) blasd.Mult(&At, C, V, 1.0, 0.0, gomas.NONE) if verbose && N < 10 { t.Logf("D:\n%v\n", asRow(&tmp, D).ToString("%6.3f")) t.Logf("V:\n%v\n", V.ToString("%6.3f")) t.Logf("V.T*A*V\n%v\n", At.ToString("%6.3f")) } sD.Diag(&At) blasd.Axpy(&sD, D, -1.0) nrma := lapackd.NormP(&At, lapackd.NORM_ONE) t.Logf("N=%d [%s] ||V.T*A*V - eigen(A)||_1: %e\n", N, desc, nrma) t.Logf(" ||I - V.T*V||_1: %e\n", nrmv) }
func TestLQBuild(t *testing.T) { var dc cmat.FloatMatrix M := 877 N := 913 K := 831 lb := 48 conf := gomas.NewConf() _ = lb A := cmat.NewMatrix(M, N) src := cmat.NewFloatNormSource() A.SetFrom(src) tau := cmat.NewMatrix(M, 1) W := cmat.NewMatrix(M, 1) C := cmat.NewMatrix(M, M) dc.Diag(C) conf.LB = lb lapackd.LQFactor(A, tau, W, conf) A1 := cmat.NewCopy(A) conf.LB = 0 lapackd.LQBuild(A, tau, W, K, conf) if N < 10 { t.Logf("unblk.LQBuild Q:\n%v\n", A) } blasd.Mult(C, A, A, 1.0, 0.0, gomas.TRANSB, conf) blasd.Add(&dc, -1.0) n0 := lapackd.NormP(C, lapackd.NORM_ONE) conf.LB = lb W2 := lapackd.Workspace(lapackd.LQBuildWork(A, conf)) lapackd.LQBuild(A1, tau, W2, K, conf) if N < 10 { t.Logf("blk.LQBuild Q:\n%v\n", A1) } blasd.Mult(C, A1, A1, 1.0, 0.0, gomas.TRANSB, conf) blasd.Add(&dc, -1.0) n1 := lapackd.NormP(C, lapackd.NORM_ONE) blasd.Plus(A, A1, 1.0, -1.0, gomas.NONE) n2 := lapackd.NormP(A, lapackd.NORM_ONE) t.Logf("M=%d, N=%d, K=%d ||unblk.LQBuild(A) - blk.LQBuild(A)||_1 :%e\n", M, N, K, n2) t.Logf("unblk M=%d, N=%d, K=%d ||I - Q*Q.T||_1 : %e\n", M, N, K, n0) t.Logf(" blk M=%d, N=%d, K=%d ||I - Q*Q.T||_1 : %e\n", M, N, K, n1) }
// Simple and slow LQ decomposition with Givens rotations func TestGivensLQ(t *testing.T) { var d cmat.FloatMatrix M := 149 N := 167 A := cmat.NewMatrix(M, N) A1 := cmat.NewCopy(A) ones := cmat.NewFloatConstSource(1.0) src := cmat.NewFloatNormSource() A.SetFrom(src) A0 := cmat.NewCopy(A) Qt := cmat.NewMatrix(N, N) d.Diag(Qt) d.SetFrom(ones) // R = G(n)...G(2)G(1)*A; Q = G(1).T*G(2).T...G(n).T ; Q.T = G(n)...G(2)G(1) for i := 0; i < M; i++ { // zero elements right of diagonal for j := N - 2; j >= i; j-- { c, s, r := lapackd.ComputeGivens(A.Get(i, j), A.Get(i, j+1)) A.Set(i, j, r) A.Set(i, j+1, 0.0) // apply rotation to this column starting from row i+1 lapackd.ApplyGivensRight(A, j, j+1, i+1, M-i-1, c, s) // update Qt = G(k)*Qt lapackd.ApplyGivensRight(Qt, j, j+1, 0, N, c, s) } } // A = L*Q blasd.Mult(A1, A, Qt, 1.0, 0.0, gomas.TRANSB) blasd.Plus(A0, A1, 1.0, -1.0, gomas.NONE) nrm := lapackd.NormP(A0, lapackd.NORM_ONE) t.Logf("M=%d, N=%d ||A - L*G(1)..G(n)||_1: %e\n", M, N, nrm) }
// test: min ||X|| s.t A.T*X = B func TestSolveQR(t *testing.T) { M := 799 N := 711 K := 241 nb := 32 conf := gomas.NewConf() conf.LB = nb tau := cmat.NewMatrix(N, 1) A := cmat.NewMatrix(M, N) src := cmat.NewFloatNormSource() A.SetFrom(src) A0 := cmat.NewCopy(A) B0 := cmat.NewMatrix(M, K) B0.SetFrom(src) B := cmat.NewCopy(B0) W := lapackd.Workspace(lapackd.QRFactorWork(A, conf)) lapackd.QRFactor(A, tau, W, conf) lapackd.QRSolve(B, A, tau, W, gomas.TRANS, conf) var Bmin cmat.FloatMatrix Bmin.SubMatrix(B0, 0, 0, N, K) blasd.Mult(&Bmin, A0, B, 1.0, -1.0, gomas.TRANSA, conf) nrm := lapackd.NormP(&Bmin, lapackd.NORM_ONE) t.Logf("M=%d, N=%d ||B - A.T*X||_1: %e\n", M, N, nrm) }
// blocked LU decomposition w/o pivots, FLAME LU nopivots variant 5 func blockedLUnoPiv(A *cmat.FloatMatrix, nb int, conf *gomas.Config) *gomas.Error { var err *gomas.Error = nil var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, A01, A02, A10, A11, A12, A20, A21, A22 cmat.FloatMatrix util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) for m(&ATL) < m(A)-nb { util.Repartition2x2to3x3(&ATL, &A00, &A01, &A02, &A10, &A11, &A12, &A20, &A21, &A22, A, nb, util.PBOTTOMRIGHT) // A00 = LU(A00) unblockedLUnoPiv(&A11, conf) // A12 = trilu(A00)*A12.-1 (TRSM) blasd.SolveTrm(&A12, &A11, 1.0, gomas.LEFT|gomas.LOWER|gomas.UNIT) // A21 = A21.-1*triu(A00) (TRSM) blasd.SolveTrm(&A21, &A11, 1.0, gomas.RIGHT|gomas.UPPER) // A22 = A22 - A21*A12 blasd.Mult(&A22, &A21, &A12, -1.0, 1.0, gomas.NONE) util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, util.PBOTTOMRIGHT) } // last block if m(&ATL) < m(A) { unblockedLUnoPiv(&ABR, conf) } return err }
// test: min || B - A.T*X || func TestLeastSquaresLQ(t *testing.T) { M := 723 N := 811 K := 273 nb := 32 conf := gomas.NewConf() conf.LB = nb tau := cmat.NewMatrix(M, 1) A := cmat.NewMatrix(M, N) src := cmat.NewFloatNormSource() A.SetFrom(src) B0 := cmat.NewMatrix(M, K) B0.SetFrom(src) B := cmat.NewMatrix(N, K) // B = A.T*B0 blasd.Mult(B, A, B0, 1.0, 0.0, gomas.TRANSA, conf) W := lapackd.Workspace(lapackd.LQFactorWork(A, conf)) lapackd.LQFactor(A, tau, W, conf) // B' = A.-1*B lapackd.LQSolve(B, A, tau, W, gomas.TRANS, conf) // expect B[0:M,0:K] == B0[0:M,0:K], B[M:N,0:K] == 0 var X cmat.FloatMatrix X.SubMatrix(B, 0, 0, M, K) blasd.Plus(&X, B0, 1.0, -1.0, gomas.NONE) nrm := lapackd.NormP(&X, lapackd.NORM_ONE) t.Logf("M=%d, N=%d ||B0 - min( ||A.T*X - B0|| ) ||_1: %e\n", M, N, nrm) }
func TestBlockedDecomposeCHOL(t *testing.T) { N := 119 nb := 16 conf := gomas.NewConf() conf.LB = nb Z := cmat.NewMatrix(N, N) AL := cmat.NewMatrix(N, N) AU := cmat.NewMatrix(N, N) unitrand := cmat.NewFloatUniformSource() Z.SetFrom(unitrand) blasd.Mult(AL, Z, Z, 1.0, 0.0, gomas.TRANSB) AU.Copy(AL) eu := lapackd.CHOLFactor(AU, gomas.UPPER, conf) el := lapackd.CHOLFactor(AL, gomas.LOWER, conf) _, _ = eu, el Z.Transpose(AU) if N < 10 { t.Logf("AU.T:\n%v\n", Z) t.Logf("AL:\n%v\n", AL) } ok := AL.AllClose(Z) t.Logf("Decompose(AL) == Decompose(AU).T: %v\n", ok) }
func TestLU(t *testing.T) { N := 119 K := 41 nb := 0 A := cmat.NewMatrix(N, N) A0 := cmat.NewMatrix(N, N) B := cmat.NewMatrix(N, K) X := cmat.NewMatrix(N, K) unitrand := cmat.NewFloatUniformSource() A.SetFrom(unitrand) A0.Copy(A) B.SetFrom(unitrand) X.Copy(B) piv := lapackd.NewPivots(N) conf := gomas.DefaultConf() conf.LB = nb // R = lu(A) = P*L*U lapackd.LUFactor(A, piv, conf) // X = A.-1*B = U.-1*(L.-1*B) lapackd.LUSolve(X, A, piv, gomas.NONE) // B = B - A*X blasd.Mult(B, A0, X, -1.0, 1.0, gomas.NONE) nrm := lapackd.NormP(B, lapackd.NORM_ONE) t.Logf("Unblocked decomposition: nb=%d\n", conf.LB) t.Logf("N=%d ||B - A*X||_1: %e\n", N, nrm) // blocked conf.LB = 16 A.Copy(A0) B.SetFrom(unitrand) X.Copy(B) // lu(A) = P*L*U lapackd.LUFactor(A, piv, conf) // X = A.-1*B = U.-1*(L.-1*B) lapackd.LUSolve(X, A, piv, gomas.NONE) // B = B - A*X blasd.Mult(B, A0, X, -1.0, 1.0, gomas.NONE) nrm = lapackd.NormP(B, lapackd.NORM_ONE) t.Logf("Blocked decomposition: nb=%d\n", conf.LB) t.Logf("N=%d ||B - A*X||_1: %e\n", N, nrm) }
func TestQRBuild(t *testing.T) { var d cmat.FloatMatrix M := 911 N := 899 K := 873 lb := 36 conf := gomas.NewConf() A := cmat.NewMatrix(M, N) src := cmat.NewFloatNormSource() A.SetFrom(src) tau := cmat.NewMatrix(N, 1) W := cmat.NewMatrix(N+M, 1) C := cmat.NewMatrix(N, N) d.Diag(C) conf.LB = lb lapackd.QRFactor(A, tau, W, conf) A1 := cmat.NewCopy(A) conf.LB = 0 lapackd.QRBuild(A, tau, W, K, conf) blasd.Mult(C, A, A, 1.0, 0.0, gomas.TRANSA, conf) blasd.Add(&d, -1.0) n0 := lapackd.NormP(C, lapackd.NORM_ONE) conf.LB = lb W2 := lapackd.Workspace(lapackd.QRBuildWork(A, conf)) lapackd.QRBuild(A1, tau, W2, K, conf) blasd.Mult(C, A1, A1, 1.0, 0.0, gomas.TRANSA, conf) blasd.Add(&d, -1.0) n1 := lapackd.NormP(C, lapackd.NORM_ONE) blasd.Plus(A, A1, 1.0, -1.0, gomas.NONE) n2 := lapackd.NormP(A, lapackd.NORM_ONE) t.Logf("M=%d, N=%d, K=%d ||unblk.QRBuild(A) - blk.QRBuild(A)||_1 :%e\n", M, N, K, n2) t.Logf("unblk M=%d, N=%d, K=%d ||I - Q.T*Q||_1: %e\n", M, N, K, n0) t.Logf(" blk M=%d, N=%d, K=%d ||I - Q.T*Q||_1: %e\n", M, N, K, n1) }
func testEigen(N int, bits int, t *testing.T) { var A, A0, W, D, V *cmat.FloatMatrix var sD cmat.FloatMatrix var s string = "lower" if bits&gomas.UPPER != 0 { s = "upper" } wsize := N * N if wsize < 100 { wsize = 100 } D = cmat.NewMatrix(N, 1) A = cmat.NewMatrix(N, N) V = cmat.NewMatrix(N, N) src := cmat.NewFloatNormSource() A.SetFrom(src, cmat.SYMM) A0 = cmat.NewCopy(A) W = cmat.NewMatrix(wsize, 1) if err := lapackd.EigenSym(D, A, W, bits|gomas.WANTV); err != nil { t.Errorf("EigenSym error: %v\n", err) return } // ||I - V.T*V|| sD.Diag(V) blasd.Mult(V, A, A, 1.0, 0.0, gomas.TRANSA) blasd.Add(&sD, -1.0) nrm1 := lapackd.NormP(V, lapackd.NORM_ONE) // left vectors are M-by-N V.Copy(A) lapackd.MultDiag(V, D, gomas.RIGHT) blasd.Mult(A0, V, A, -1.0, 1.0, gomas.TRANSB) nrm2 := lapackd.NormP(A0, lapackd.NORM_ONE) t.Logf("N=%d, [%s] ||A - V*D*V.T||_1 :%e\n", N, s, nrm2) t.Logf(" ||I - V.T*V||_1 : %e\n", nrm1) }
func TestDSyrkUpper(t *testing.T) { var ok bool conf := gomas.NewConf() A := cmat.NewMatrix(N, N) A0 := cmat.NewMatrix(N, N) B := cmat.NewMatrix(N, K) Bt := cmat.NewMatrix(K, N) ones := cmat.NewFloatConstSource(1.0) zeromean := cmat.NewFloatUniformSource() _, _ = ones, zeromean A.SetFrom(ones, cmat.UPPER) A0.Copy(A) B.SetFrom(ones) Bt.Transpose(B) // B = A*B blasd.UpdateSym(A, B, 1.0, 1.0, gomas.UPPER, conf) blasd.Mult(A0, B, B, 1.0, 1.0, gomas.TRANSB) cmat.TriU(A0, cmat.NONE) ok = A0.AllClose(A) t.Logf("UpdateSym(A, B, U|N) == TriU(Mult(A, B, B.T)) : %v\n", ok) if N < 10 { t.Logf("UpdateSym(A, B)\n%v\n", A) t.Logf("Mult(A, B.T, B)\n%v\n", A0) } A.SetFrom(ones, cmat.UPPER) A0.Copy(A) blasd.UpdateSym(A, Bt, 1.0, 1.0, gomas.UPPER|gomas.TRANSA, conf) blasd.Mult(A0, Bt, Bt, 1.0, 1.0, gomas.TRANSA) cmat.TriU(A0, cmat.NONE) ok = A0.AllClose(A) t.Logf("UpdateSym(A, B, U|T) == TriU(Mult(A, B.T, B)) : %v\n", ok) if N < 10 { t.Logf("UpdateSym(A, B)\n%v\n", A) t.Logf("Mult(A, B.T, B)\n%v\n", A0) } }
/* * test2: C0 = A*B.T; C1 = B*A.T; C0 == C1.T * A[M,K], B[K,N], C[M,N] and M != N == K */ func TestDGemm2(t *testing.T) { M := 411 N := 377 K := N A := cmat.NewMatrix(M, K) B := cmat.NewMatrix(K, N) C := cmat.NewMatrix(M, N) Ct := cmat.NewMatrix(N, M) T := cmat.NewMatrix(M, N) zeromean := cmat.NewFloatNormSource() A.SetFrom(zeromean) B.SetFrom(zeromean) blasd.Mult(C, A, B, 1.0, 0.0, gomas.TRANSB) blasd.Mult(Ct, B, A, 1.0, 0.0, gomas.TRANSB) T.Transpose(Ct) ok := C.AllClose(T) t.Logf("gemm(A, B.T) == transpose(gemm(B, A.T)) : %v\n", ok) }
// update T: T = -T1*Y1.T*Y2*T2 // Y1 = /Y10\ Y2 = /Y11\ // \Y20/ \Y21/ // // T = -T1 * [Y10.T*Y11 + Y20.T*Y21]*T2 // // T1 is K*K triangular upper matrix // T2 is nb*nb triangular upper matrix // T is K*nb block matrix // Y10 is nb*K block matrix // Y20 is M-K-nb*K block matrix // Y11 is nb*nb triangular lower unit diagonal matrix // Y21 is M-K-nb*nb block matrix // func updateQRTReflector(T, Y10, Y20, Y11, Y21, T1, T2 *cmat.FloatMatrix, conf *gomas.Config) { // T = Y10.T if n(Y10) == 0 { return } // T = Y10.T blasd.Plus(T, Y10, 0.0, 1.0, gomas.TRANSB) // T = Y10.T*Y11 blasd.MultTrm(T, Y11, 1.0, gomas.LOWER|gomas.UNIT|gomas.RIGHT, conf) // T = T + Y20.T*Y21 blasd.Mult(T, Y20, Y21, 1.0, 1.0, gomas.TRANSA, conf) // -- here: T == Y1.T*Y2 // T = -T1*T blasd.MultTrm(T, T1, -1.0, gomas.UPPER, conf) // T = T*T2 blasd.MultTrm(T, T2, 1.0, gomas.UPPER|gomas.RIGHT, conf) }
func main() { flag.Parse() C := cmat.NewMatrix(N, N) A := cmat.NewMatrix(N, N) B := cmat.NewMatrix(N, N) zeromean := cmat.NewFloatNormSource() A.SetFrom(zeromean) B.SetFrom(zeromean) cumtime := 0.0 mintime := 0.0 maxtime := 0.0 for i := 0; i < count; i++ { flushCache() t1 := time.Now() // ---------------------------------------------- blasd.Mult(C, A, B, 1.0, 0.0, gomas.NONE) // ---------------------------------------------- t2 := time.Now() tm := t2.Sub(t1) if mintime == 0.0 || tm.Seconds() < mintime { mintime = tm.Seconds() } if maxtime == 0.0 || tm.Seconds() > maxtime { maxtime = tm.Seconds() } cumtime += tm.Seconds() if verbose { fmt.Printf("%3d %12.4f msec, %9.4f gflops\n", i, 1e+3*tm.Seconds(), gflops(N, tm.Seconds())) } } cumtime /= float64(count) minflops := gflops(N, maxtime) avgflops := gflops(N, cumtime) maxflops := gflops(N, mintime) fmt.Printf("%5d %9.4f %9.4f %9.4f Gflops\n", N, minflops, avgflops, maxflops) }
// test: min || B - A*X || func TestLeastSquaresQR(t *testing.T) { M := 811 N := 723 K := 311 nb := 32 conf := gomas.NewConf() conf.LB = nb tau := cmat.NewMatrix(N, 1) A := cmat.NewMatrix(M, N) src := cmat.NewFloatNormSource() A.SetFrom(src) B0 := cmat.NewMatrix(N, K) B0.SetFrom(src) B := cmat.NewMatrix(M, K) // B = A*B0 blasd.Mult(B, A, B0, 1.0, 0.0, gomas.NONE, conf) W := lapackd.Workspace(lapackd.QRFactorWork(A, conf)) err := lapackd.QRFactor(A, tau, W, conf) if err != nil { t.Logf("DecomposeQR: %v\n", err) } // B' = A.-1*B err = lapackd.QRSolve(B, A, tau, W, gomas.NONE, conf) if err != nil { t.Logf("SolveQR: %v\n", err) } // expect B[0:N,0:K] == B0[0:N,0:K], B[N:M,0:K] == 0 var X cmat.FloatMatrix X.SubMatrix(B, 0, 0, N, K) blasd.Plus(&X, B0, 1.0, -1.0, gomas.NONE) nrm := lapackd.NormP(&X, lapackd.NORM_ONE) t.Logf("M=%d, N=%d ||B0 - min( ||A*X - B0|| ) ||_1: %e\n", M, N, nrm) }
// Simple and slow QR decomposition with Givens rotations func TestGivensQR(t *testing.T) { var d cmat.FloatMatrix M := 181 N := 159 A := cmat.NewMatrix(M, N) A1 := cmat.NewCopy(A) ones := cmat.NewFloatConstSource(1.0) src := cmat.NewFloatNormSource() A.SetFrom(src) A0 := cmat.NewCopy(A) Qt := cmat.NewMatrix(M, M) d.Diag(Qt) d.SetFrom(ones) // R = G(n)...G(2)G(1)*A; Q = G(1).T*G(2).T...G(n).T ; Q.T = G(n)...G(2)G(1) // for all columns ... for j := 0; j < N; j++ { // ... zero elements below diagonal, starting from bottom for i := M - 2; i >= j; i-- { c, s, r := lapackd.ComputeGivens(A.Get(i, j), A.Get(i+1, j)) A.Set(i, j, r) A.Set(i+1, j, 0.0) // apply rotations on this row starting from column j, N-j column lapackd.ApplyGivensLeft(A, i, i+1, j+1, N-j-1, c, s) // update Qt = G(k)*Qt lapackd.ApplyGivensLeft(Qt, i, i+1, 0, M, c, s) } } // check: A = Q*R blasd.Mult(A1, Qt, A, 1.0, 0.0, gomas.TRANSA) blasd.Plus(A0, A1, 1.0, -1.0, gomas.NONE) nrm := lapackd.NormP(A0, lapackd.NORM_ONE) t.Logf("M=%d, N=%d ||A - G(n)..G(1)*R||_1: %e\n", M, N, nrm) }
func test1(N int, beta float64, t *testing.T) { var sI cmat.FloatMatrix if N&0x1 != 0 { N = N + 1 } D := cmat.NewMatrix(N, 1) Z := cmat.NewMatrix(N, 1) Y := cmat.NewMatrix(N, 1) V := cmat.NewMatrix(N, 1) Q := cmat.NewMatrix(N, N) I := cmat.NewMatrix(N, N) D.SetAt(0, 1.0) Z.SetAt(0, 2.0) for i := 1; i < N-1; i++ { if i < N/2 { D.SetAt(i, 2.0-float64(N/2-i)*beta) } else { D.SetAt(i, 2.0+float64(i+1-N/2)*beta) } Z.SetAt(i, beta) } D.SetAt(N-1, 10.0/3.0) Z.SetAt(N-1, 2.0) w := blasd.Nrm2(Z) blasd.InvScale(Z, w) rho := 1.0 / (w * w) lapackd.TRDSecularSolveAll(Y, V, Q, D, Z, rho) lapackd.TRDSecularEigen(Q, V, nil) blasd.Mult(I, Q, Q, 1.0, 0.0, gomas.TRANSA) sI.Diag(I) sI.Add(-1.0) nrm := lapackd.NormP(I, lapackd.NORM_ONE) t.Logf("N=%d, beta=%e ||I - Q.T*Q||_1: %e\n", N, beta, nrm) }
/* * Blocked version of Hessenberg reduction algorithm as presented in (1). This * version uses compact-WY transformation. * * Some notes: * * Elementary reflectors stored in [A11; A21].T are not on diagonal of A11. Update of * a block aligned with A11; A21 is as follow * * 1. Update from left Q(k)*C: * c0 0 c0 * (I - Y*T*Y.T).T*C = C - Y*(C.T*Y)*T.T = C1 - Y1 * (C1.T.Y1+C2.T*Y2)*T.T = C1-Y1*W * C2 Y2 C2-Y2*W * * where W = (C1.T*Y1+C2.T*Y2)*T.T and first row of C is not affected by update * * 2. Update from right C*Q(k): * 0 * C - C*Y*T*Y.T = c0;C1;C2 - c0;C1;C2 * Y1 *T*(0;Y1;Y2) = c0; C1-W*Y1; C2-W*Y2 * Y2 * where W = (C1*Y1 + C2*Y2)*T and first column of C is not affected * */ func blkHessGQvdG(A, Tvec, W *cmat.FloatMatrix, nb int, conf *gomas.Config) *gomas.Error { var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, A11, A12, A21, A22, A2 cmat.FloatMatrix var tT, tB, td cmat.FloatMatrix var t0, t1, t2, T cmat.FloatMatrix var V, VT, VB /*V0, V1, V2,*/, Y1, Y2, W0 cmat.FloatMatrix //fmt.Printf("blkHessGQvdG...\n") T.SubMatrix(W, 0, 0, conf.LB, conf.LB) V.SubMatrix(W, conf.LB, 0, m(A), conf.LB) td.Diag(&T) util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PTOPLEFT) util.Partition2x1( &tT, &tB, Tvec, 0, util.PTOP) for m(&ABR) > nb+1 && n(&ABR) > nb { util.Repartition2x2to3x3(&ATL, &A00, nil, nil, nil, &A11, &A12, nil, &A21, &A22, A, nb, util.PBOTTOMRIGHT) util.Repartition2x1to3x1(&tT, &t0, &t1, &t2, Tvec, nb, util.PBOTTOM) util.Partition2x1( &VT, &VB, &V, m(&ATL), util.PTOP) // ------------------------------------------------------ unblkBuildHessGQvdG(&ABR, &T, &VB, nil) blasd.Copy(&t1, &td) // m(Y) == m(ABR)-1, n(Y) == n(A11) Y1.SubMatrix(&ABR, 1, 0, n(&A11), n(&A11)) Y2.SubMatrix(&ABR, 1+n(&A11), 0, m(&A21)-1, n(&A11)) // [A01; A02] == ATR := ATR*(I - Y*T*Y.T) updateHessRightWY(&ATR, &Y1, &Y2, &T, &VT, conf) // A2 = [A12; A22].T util.Merge2x1(&A2, &A12, &A22) // A2 := A2 - VB*T*A21.T be := A21.Get(0, -1) A21.Set(0, -1, 1.0) blasd.MultTrm(&VB, &T, 1.0, gomas.UPPER|gomas.RIGHT) blasd.Mult(&A2, &VB, &A21, -1.0, 1.0, gomas.TRANSB, conf) A21.Set(0, -1, be) // A2 := (I - Y*T*Y.T).T * A2 W0.SubMatrix(&V, 0, 0, n(&A2), n(&Y2)) updateHessLeftWY(&A2, &Y1, &Y2, &T, &W0, conf) // ------------------------------------------------------ util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, util.PBOTTOMRIGHT) util.Continue3x1to2x1( &tT, &tB, &t0, &t1, Tvec, util.PBOTTOM) } if m(&ABR) > 1 { // do the rest with unblocked util.Merge2x1(&A2, &ATR, &ABR) W0.SetBuf(m(A), 1, m(A), W.Data()) unblkHessGQvdG(&A2, &tB, &W0, m(&ATR)) } return nil }
/* * Unblocked solve A*X = B for Bunch-Kauffman factorized symmetric real matrix. */ func unblkSolveBKUpper(B, A *cmat.FloatMatrix, p Pivots, phase int, conf *gomas.Config) *gomas.Error { var err *gomas.Error = nil var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a01, A02, a11, a12t, A22 cmat.FloatMatrix var Aref *cmat.FloatMatrix var BT, BB, B0, b1, B2, Bx cmat.FloatMatrix var pT, pB, p0, p1, p2 Pivots var aStart, aDir, bStart, bDir util.Direction var nc int np := 0 if phase == 2 { aStart = util.PTOPLEFT aDir = util.PBOTTOMRIGHT bStart = util.PTOP bDir = util.PBOTTOM nc = 1 Aref = &ABR } else { aStart = util.PBOTTOMRIGHT aDir = util.PTOPLEFT bStart = util.PBOTTOM bDir = util.PTOP nc = m(A) Aref = &ATL } util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, aStart) util.Partition2x1( &BT, &BB, B, 0, bStart) partitionPivot2x1( &pT, &pB, p, 0, bStart) // phase 1: // - solve U*D*X = B, overwriting B with X // - looping from BOTTOM to TOP // phase 1: // - solve U*X = B, overwriting B with X // - looping from TOP to BOTTOM for n(Aref) > 0 { // see if next diagonal block is 1x1 or 2x2 np = 1 if p[nc-1] < 0 { np = 2 } // repartition according the pivot size util.Repartition2x2to3x3(&ATL, &A00, &a01, &A02, nil, &a11, &a12t, nil, nil, &A22 /**/, A, np, aDir) util.Repartition2x1to3x1(&BT, &B0, &b1, &B2 /**/, B, np, bDir) repartPivot2x1to3x1(&pT, &p0, &p1, &p2 /**/, p, np, bDir) // ------------------------------------------------------------ switch phase { case 1: // computes D.-1*(U.-1*B); // b1 is current row, last row of BT if np == 1 { if p1[0] != nc { // swap rows on top part of B swapRows(&BT, m(&BT)-1, p1[0]-1) } // B0 = B0 - a01*b1 blasd.MVUpdate(&B0, &a01, &b1, -1.0) // b1 = b1/d1 blasd.InvScale(&b1, a11.Get(0, 0)) nc -= 1 } else if np == 2 { if p1[0] != -nc { // swap rows on top part of B swapRows(&BT, m(&BT)-2, -p1[0]-1) } b := a11.Get(0, 1) apb := a11.Get(0, 0) / b dpb := a11.Get(1, 1) / b // (a/b)*(d/b)-1.0 == (a*d - b^2)/b^2 scale := apb*dpb - 1.0 scale *= b // B0 = B0 - a01*b1 blasd.Mult(&B0, &a01, &b1, -1.0, 1.0, gomas.NONE, conf) // b1 = a11.-1*b1.T //(2x2 block, no subroutine for doing this in-place) for k := 0; k < n(&b1); k++ { s0 := b1.Get(0, k) s1 := b1.Get(1, k) b1.Set(0, k, (dpb*s0-s1)/scale) b1.Set(1, k, (apb*s1-s0)/scale) } nc -= 2 } case 2: // compute X = U.-T*B if np == 1 { blasd.MVMult(&b1, &B0, &a01, -1.0, 1.0, gomas.TRANS) if p1[0] != nc { // swap rows on bottom part of B util.Merge2x1(&Bx, &B0, &b1) swapRows(&Bx, m(&Bx)-1, p1[0]-1) } nc += 1 } else if np == 2 { blasd.Mult(&b1, &a01, &B0, -1.0, 1.0, gomas.TRANSA, conf) if p1[0] != -nc { // swap rows on bottom part of B util.Merge2x1(&Bx, &B0, &b1) swapRows(&Bx, m(&Bx)-2, -p1[0]-1) } nc += 2 } } // ------------------------------------------------------------ util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, aDir) util.Continue3x1to2x1( &BT, &BB, &B0, &b1, B, bDir) contPivot3x1to2x1( &pT, &pB, p0, p1, p, bDir) } return err }
func unblkBoundedBKUpper(A, wrk *cmat.FloatMatrix, p *Pivots, ncol int, conf *gomas.Config) (*gomas.Error, int) { var err *gomas.Error = nil var ATL, ATR, ABL, ABR cmat.FloatMatrix var A00, a01, A02, a11, a12, A22, a11inv cmat.FloatMatrix var w00, w01, w11 cmat.FloatMatrix var cwrk cmat.FloatMatrix var pT, pB, p0, p1, p2 Pivots err = nil nc := 0 if ncol > n(A) { ncol = n(A) } // permanent working space for symmetric inverse of a11 a11inv.SubMatrix(wrk, m(wrk)-2, 0, 2, 2) a11inv.Set(0, 1, -1.0) a11inv.Set(1, 0, -1.0) util.Partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, util.PBOTTOMRIGHT) partitionPivot2x1( &pT, &pB, *p, 0, util.PBOTTOM) for n(&ATL) > 0 && nc < ncol { util.Partition2x2( &w00, &w01, nil, &w11, wrk, nc, nc, util.PBOTTOMRIGHT) r, np := findAndBuildBKPivotUpper(&ATL, &ATR, &w00, &w01, nc) if np > ncol-nc { // next pivot does not fit into ncol columns, // return with number of factorized columns return err, nc } cwrk.SubMatrix(&w00, 0, n(&w00)-np, m(&ATL), np) if r != -1 { // pivoting needed; do swaping here k := m(&ATL) - np applyBKPivotSymUpper(&ATL, k, r) // swap right hand rows to get correct updates swapRows(&ATR, k, r) swapRows(&w01, k, r) if np == 2 && r != k { /* for 2x2 blocks we need diagonal pivots. * [r, r] | [ r,-1] * a11 == ---------------- 2-by-2 pivot, swapping [1,0] and [r,0] * [-1,r] | [-1,-1] */ t0 := w00.Get(k, -1) tr := w00.Get(r, -1) w00.Set(k, -1, tr) w00.Set(r, -1, t0) t0 = w00.Get(k, -2) tr = w00.Get(r, -2) w00.Set(k, -2, tr) w00.Set(r, -2, t0) } } // repartition according the pivot size util.Repartition2x2to3x3(&ATL, &A00, &a01, &A02, nil, &a11, &a12, nil, nil, &A22 /**/, A, np, util.PTOPLEFT) repartPivot2x1to3x1(&pT, &p0, &p1, &p2 /**/, *p, np, util.PTOP) // ------------------------------------------------------------ wlc := n(&w00) - np cwrk.SubMatrix(&w00, 0, wlc, m(&a01), n(&a01)) if np == 1 { // a11.Set(0, 0, w00.Get(m(&a01), wlc)) // a21 = a21/a11 blasd.Copy(&a01, &cwrk) blasd.InvScale(&a01, a11.Get(0, 0)) // store pivot point relative to original matrix if r == -1 { p1[0] = m(&ATL) } else { p1[0] = r + 1 } } else if np == 2 { /* a | b d/b | -1 * w00 == ------ == a11 --> a11.-1 == -------- * scale * . | d -1 | a/b */ a := w00.Get(m(&ATL)-2, -2) b := w00.Get(m(&ATL)-2, -1) d := w00.Get(m(&ATL)-1, -1) a11inv.Set(0, 0, d/b) a11inv.Set(1, 1, a/b) // denominator: (a/b)*(d/b)-1.0 == (a*d - b^2)/b^2 scale := 1.0 / ((a/b)*(d/b) - 1.0) scale /= b // a01 = a01*a11.-1 blasd.Mult(&a01, &cwrk, &a11inv, scale, 0.0, gomas.NONE, conf) a11.Set(0, 0, a) a11.Set(0, 1, b) a11.Set(1, 1, d) // store pivot point relative to original matrix p1[0] = -(r + 1) p1[1] = p1[0] } // ------------------------------------------------------------ nc += np util.Continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, util.PTOPLEFT) contPivot3x1to2x1( &pT, &pB, p0, p1, *p, util.PTOP) } return err, nc }