func blockedInverseUpper(A *matrix.FloatMatrix, flags Flags, nb int) (err error) { var ATL, ATR, ABL, ABR matrix.FloatMatrix var A00, A01, A02, A11, A12, A22 matrix.FloatMatrix err = nil partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, pTOPLEFT) for ATL.Rows() < A.Rows() { repartition2x2to3x3(&ATL, &A00, &A01, &A02, nil, &A11, &A12, nil, nil, &A22, A, nb, pBOTTOMRIGHT) // ------------------------------------------------- // libflame, variant 1 // A01 = A00*A01 MultTrm(&A01, &A00, 1.0, flags) // A01 = -A01 / triu(A11) SolveTrm(&A01, &A11, -1.0, flags|RIGHT) // A11 = inv(A11) if flags&UNIT != 0 { unblockedInverseUnitUpper(&A11) } else { unblockedInverseUpper(&A11) } // ------------------------------------------------- continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, pBOTTOMRIGHT) } return }
// Compute // Y = alpha*A*X + beta*Y // Y = alpha*A.T*X + beta*Y ; flags = TRANSA // // A is M*N or N*M generic matrix, // X is row or column vector of length N // Y is row or column vector of legth M. // // MVMult is vector orientation agnostic. It does not matter if Y, X are row or // column vectors, they are always handled as if they were column vectors. func MVMult(Y, A, X *matrix.FloatMatrix, alpha, beta float64, flags Flags) error { if A.Rows() == 0 || A.Cols() == 0 { return nil } if Y.Rows() != 1 && Y.Cols() != 1 { return errors.New("Y not a vector.") } if X.Rows() != 1 && X.Cols() != 1 { return errors.New("X not a vector.") } Ar := A.FloatArray() ldA := A.LeadingIndex() Yr := Y.FloatArray() incY := 1 lenY := Y.NumElements() if Y.Rows() == 1 { // row vector incY = Y.LeadingIndex() } Xr := X.FloatArray() incX := 1 lenX := X.NumElements() if X.Rows() == 1 { // row vector incX = X.LeadingIndex() } // NOTE: This could diveded to parallel tasks by rows. calgo.DMultMV(Yr, Ar, Xr, alpha, beta, calgo.Flags(flags), incY, ldA, incX, 0, lenX, 0, lenY, vpLen, mB) return nil }
func trmvTest(t *testing.T, A *matrix.FloatMatrix, flags Flags, nb int) bool { N := A.Cols() //S := 0 //E := A.Cols() X0 := matrix.FloatWithValue(A.Rows(), 1, 2.0) X1 := X0.Copy() trans := linalg.OptNoTrans if flags&TRANS != 0 { trans = linalg.OptTrans } diag := linalg.OptNonUnit if flags&UNIT != 0 { diag = linalg.OptUnit } uplo := linalg.OptUpper if flags&LOWER != 0 { uplo = linalg.OptLower } blas.TrmvFloat(A, X0, uplo, diag, trans) Ar := A.FloatArray() Xr := X1.FloatArray() if nb == 0 { DTrimvUnblkMV(Xr, Ar, flags, 1, A.LeadingIndex(), N) } result := X0.AllClose(X1) t.Logf(" X0 == X1: %v\n", result) if !result && A.Rows() < 8 { t.Logf(" BLAS TRMV X0:\n%v\n", X0) t.Logf(" DTrmv X1:\n%v\n", X1) } return result }
// Calculate C = alpha*A*B.T + beta*C, C is M*N, A is M*P and B is N*P func MMMultTransB(C, A, B *matrix.FloatMatrix, alpha, beta float64) error { psize := int64(C.NumElements() * A.Cols()) Ar := A.FloatArray() ldA := A.LeadingIndex() Br := B.FloatArray() ldB := B.LeadingIndex() Cr := C.FloatArray() ldC := C.LeadingIndex() if nWorker <= 1 || psize <= limitOne { calgo.DMult(Cr, Ar, Br, alpha, beta, calgo.TRANSB, ldC, ldA, ldB, B.Rows(), 0, C.Cols(), 0, C.Rows(), vpLen, nB, mB) return nil } // here we have more than one worker available worker := func(cstart, cend, rstart, rend int, ready chan int) { calgo.DMult(Cr, Ar, Br, alpha, beta, calgo.TRANSB, ldC, ldA, ldB, B.Rows(), cstart, cend, rstart, rend, vpLen, nB, mB) ready <- 1 } colworks, rowworks := divideWork(C.Rows(), C.Cols(), nWorker) scheduleWork(colworks, rowworks, C.Cols(), C.Rows(), worker) //scheduleWork(colworks, rowworks, worker) return nil }
func runRefTest(A *matrix.FloatMatrix, ntest, LB int) time.Duration { var mintime time.Duration N := A.Rows() ipiv := make([]int32, N, N) lopt := linalg.OptLower if testUpper { lopt = linalg.OptUpper } fnc := func() { ERRlapack = lapack.Sytrf(A, ipiv, lopt) } A0 := A.Copy() for n := 0; n < ntest; n++ { if n > 0 { // restore original A A0.CopyTo(A) } mperf.FlushCache() time0 := mperf.Timeit(fnc) if n == 0 || time0 < mintime { mintime = time0 } } return mintime }
// Inverse NON-UNIT diagonal tridiagonal matrix func unblockedInverseUpper(A *matrix.FloatMatrix) (err error) { var ATL, ATR, ABL, ABR matrix.FloatMatrix var A00, a01, A02, a11, a12t, A22 matrix.FloatMatrix err = nil partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, pTOPLEFT) for ATL.Rows() < A.Rows() { repartition2x2to3x3(&ATL, &A00, &a01, &A02, nil, &a11, &a12t, nil, nil, &A22, A, 1, pBOTTOMRIGHT) // ------------------------------------------------- aval := a11.Float() // a12 = -a12/a11 InvScale(&a12t, -aval) // A02 = A02 + a01*a12 MVRankUpdate(&A02, &a01, &a12t, 1.0) // a01 = a01/a11 InvScale(&a01, aval) // a11 = 1.0/a11 a11.SetAt(0, 0, 1.0/aval) // ------------------------------------------------- continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, pBOTTOMRIGHT) } return }
func runRefTest(A *matrix.FloatMatrix, ntest, LB int) time.Duration { var flags matops.Flags var mintime time.Duration N := A.Rows() ipiv := make([]int, N, N) flags = matops.LOWER if testUpper { flags = matops.UPPER } W := matrix.FloatZeros(A.Rows(), LB+2) fnc := func() { _, ERRref = matops.DecomposeLDL(A, W, ipiv, flags, 0) } A0 := A.Copy() for n := 0; n < ntest; n++ { if n > 0 { // restore original A A0.CopyTo(A) } mperf.FlushCache() time0 := mperf.Timeit(fnc) if n == 0 || time0 < mintime { mintime = time0 } } return mintime }
func runTest(A *matrix.FloatMatrix, ntest, LB int) time.Duration { var flags matops.Flags var mintime time.Duration N := A.Rows() ipiv := make([]int, N, N) flags = matops.LOWER if testUpper { flags = matops.UPPER } W := matrix.FloatZeros(A.Rows(), LB+2) fnc := func() { _, ERRmatops = matops.DecomposeBK(A, W, ipiv, flags, LB) } A0 := A.Copy() for n := 0; n < ntest; n++ { if n > 0 { // restore original A A0.CopyTo(A) } mperf.FlushCache() time0 := mperf.Timeit(fnc) if n == 0 || time0 < mintime { mintime = time0 } if verbose { fmt.Printf("%.4f ms\n", time0.Seconds()*1000.0) } } return mintime }
// Inverse UNIT diagonal tridiagonal matrix func unblockedInverseUnitLower(A *matrix.FloatMatrix) (err error) { var ATL, ATR, ABL, ABR matrix.FloatMatrix var A00, a10t, a11, A20, a21, A22 matrix.FloatMatrix err = nil partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, pTOPLEFT) for ATL.Rows() < A.Rows() { repartition2x2to3x3(&ATL, &A00, nil, nil, &a10t, &a11, nil, &A20, &a21, &A22, A, 1, pBOTTOMRIGHT) // ------------------------------------------------- // a21 = -a21 Scale(&a21, -1.0) // A20 = A20 + a21*a10.t MVRankUpdate(&A20, &a21, &a10t, 1.0) // ------------------------------------------------- continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, pBOTTOMRIGHT) } return }
func runTest(A *matrix.FloatMatrix, ntest, LB int) time.Duration { var mintime time.Duration M := A.Rows() N := A.Cols() nN := N if M < N { nN = M } ipiv := make([]int, nN, nN) fnc := func() { _, ERRmatops = matops.DecomposeLU(A, ipiv, LB) } A0 := A.Copy() for n := 0; n < ntest; n++ { if n > 0 { // restore original A A0.CopyTo(A) } mperf.FlushCache() time0 := mperf.Timeit(fnc) if n == 0 || time0 < mintime { mintime = time0 } if verbose { fmt.Printf("%.4f ms\n", time0.Seconds()*1000.0) } } return mintime }
/* * ( A11 a12 ) ( U11 u12 )( D1 0 )( U11.t 0 ) * ( a21 a22 ) ( 0 1 )( 0 d2 )( u12.t 1 ) * * a22 = d2 * a01 = u12*d2 => u12 = a12/d2 * A11 = u12*d2*u12.t + U11*D1*U11.t => U11 = A11 - u12*d2*u12.t */ func unblkUpperLDLnoPiv(A *matrix.FloatMatrix) (err error) { var ATL, ATR, ABL, ABR matrix.FloatMatrix var A00, a01, A02, a11, a12, A22 matrix.FloatMatrix err = nil partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, pBOTTOMRIGHT) for ATL.Rows() > 0 { repartition2x2to3x3(&ATL, &A00, &a01, &A02, nil, &a11, &a12, nil, nil, &A22, A, 1, pTOPLEFT) // -------------------------------------------------------- // A00 = A00 - u01*d11*u01.T = A00 - a01*a01.T/a11; triangular update err = MVUpdateTrm(&A00, &a01, &a01, -1.0/a11.Float(), UPPER) // u01 = a01/a11 InvScale(&a01, a11.Float()) // --------------------------------------------------------- continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, pTOPLEFT) } return }
func runTest(A *matrix.FloatMatrix, ntest, LB int) time.Duration { var W *matrix.FloatMatrix = nil var mintime time.Duration N := A.Cols() tau := matrix.FloatZeros(N, 1) if LB > 0 { W = matrix.FloatZeros(A.Rows(), LB) } fnc := func() { _, ERRmatops = matops.DecomposeQR(A, tau, W, LB) } A0 := A.Copy() for n := 0; n < ntest; n++ { if n > 0 { // restore original A A0.CopyTo(A) tau.Scale(0.0) } mperf.FlushCache() time0 := mperf.Timeit(fnc) if n == 0 || time0 < mintime { mintime = time0 } if verbose { fmt.Printf("%.4f ms\n", time0.Seconds()*1000.0) } } return mintime }
func runRefTest(A *matrix.FloatMatrix, ntest, LB int) time.Duration { var mintime time.Duration M := A.Rows() N := A.Cols() nN := N if M < N { nN = M } ipiv := make([]int32, nN, nN) fnc := func() { ERRlapack = lapack.Getrf(A, ipiv) } A0 := A.Copy() for n := 0; n < ntest; n++ { if n > 0 { // restore original A A0.CopyTo(A) } mperf.FlushCache() time0 := mperf.Timeit(fnc) if n == 0 || time0 < mintime { mintime = time0 } } return mintime }
// unblocked LU decomposition w/o pivots, FLAME LU nopivots variant 5 func unblockedLUnoPiv(A *matrix.FloatMatrix) (err error) { var ATL, ATR, ABL, ABR matrix.FloatMatrix var A00, a01, A02, a10, a11, a12, A20, a21, A22 matrix.FloatMatrix err = nil partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, pTOPLEFT) for ATL.Rows() < A.Rows() { repartition2x2to3x3(&ATL, &A00, &a01, &A02, &a10, &a11, &a12, &A20, &a21, &A22, A, 1, pBOTTOMRIGHT) // a21 = a21/a11 //a21.Scale(1.0/a11.Float()) InvScale(&a21, a11.Float()) // A22 = A22 - a21*a12 err = MVRankUpdate(&A22, &a21, &a12, -1.0) continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, pBOTTOMRIGHT) } return }
// blocked LU decomposition w/o pivots, FLAME LU nopivots variant 5 func blockedLUnoPiv(A *matrix.FloatMatrix, nb int) (err error) { var ATL, ATR, ABL, ABR matrix.FloatMatrix var A00, A01, A02, A10, A11, A12, A20, A21, A22 matrix.FloatMatrix err = nil partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, pTOPLEFT) for ATL.Rows() < A.Rows() { repartition2x2to3x3(&ATL, &A00, &A01, &A02, &A10, &A11, &A12, &A20, &A21, &A22, A, nb, pBOTTOMRIGHT) // A00 = LU(A00) unblockedLUnoPiv(&A11) // A12 = trilu(A00)*A12.-1 (TRSM) SolveTrm(&A12, &A11, 1.0, LEFT|LOWER|UNIT) // A21 = A21.-1*triu(A00) (TRSM) SolveTrm(&A21, &A11, 1.0, RIGHT|UPPER) // A22 = A22 - A21*A12 Mult(&A22, &A21, &A12, -1.0, 1.0, NOTRANS) continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, pBOTTOMRIGHT) } return }
// Matrix-vector triangular update A = A + alpha*X*Y.T // A is N*N matrix, // X is row or column vector of length N // Y is row or column vector of legth N. // flags is UPPER or LOWER func MVUpdateTrm(A, X, Y *matrix.FloatMatrix, alpha float64, flags Flags) error { if A.Rows() == 0 || A.Cols() == 0 { return nil } if Y.Rows() != 1 && Y.Cols() != 1 { return errors.New("Y not a vector.") } if X.Rows() != 1 && X.Cols() != 1 { return errors.New("X not a vector.") } Ar := A.FloatArray() ldA := A.LeadingIndex() Yr := Y.FloatArray() incY := 1 if Y.Rows() == 1 { // row vector incY = Y.LeadingIndex() } Xr := X.FloatArray() incX := 1 if X.Rows() == 1 { // row vector incX = X.LeadingIndex() } // NOTE: This could diveded to parallel tasks like matrix-matrix multiplication calgo.DTrmUpdMV(Ar, Xr, Yr, alpha, calgo.Flags(flags), ldA, incX, incY, 0, A.Cols(), nB) return nil }
/* * ( a11 a12 ) ( 1 0 )( d1 0 )( l l21.t ) * ( a21 A22 ) ( l21 L22 )( 0 A22 )( 0 L22.t ) * * a11 = d1 * a21 = l21*d1 => l21 = a21/d1 * A22 = l21*d1*l21.t + L22*D2*L22.t => L22 = A22 - l21*d1*l21t */ func unblkLowerLDLnoPiv(A *matrix.FloatMatrix) (err error) { var ATL, ATR, ABL, ABR matrix.FloatMatrix var A00, a10, a11, A20, a21, A22 matrix.FloatMatrix err = nil partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, pTOPLEFT) for ATL.Rows() < A.Rows() { repartition2x2to3x3(&ATL, &A00, nil, nil, &a10, &a11, nil, &A20, &a21, &A22, A, 1, pBOTTOMRIGHT) // -------------------------------------------------------- // d11 = a11; no-op // A22 = A22 - l21*d11*l21.T = A22 - a21*a21.T/a11; triangular update err = MVUpdateTrm(&A22, &a21, &a21, -1.0/a11.Float(), LOWER) // l21 = a21/a11 InvScale(&a21, a11.Float()) // --------------------------------------------------------- continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, pBOTTOMRIGHT) } return }
func updateBlas(t *testing.T, Y1, Y2, C1, C2, T, W *matrix.FloatMatrix) { if W.Rows() != C1.Cols() { panic("W.Rows != C1.Cols") } // W = C1.T ScalePlus(W, C1, 0.0, 1.0, TRANSB) //fmt.Printf("W = C1.T:\n%v\n", W) // W = C1.T*Y1 blas.TrmmFloat(Y1, W, 1.0, linalg.OptLower, linalg.OptUnit, linalg.OptRight) t.Logf("W = C1.T*Y1:\n%v\n", W) // W = W + C2.T*Y2 blas.GemmFloat(C2, Y2, W, 1.0, 1.0, linalg.OptTransA) t.Logf("W = W + C2.T*Y2:\n%v\n", W) // --- here: W == C.T*Y --- // W = W*T blas.TrmmFloat(T, W, 1.0, linalg.OptUpper, linalg.OptRight) t.Logf("W = C.T*Y*T:\n%v\n", W) // --- here: W == C.T*Y*T --- // C2 = C2 - Y2*W.T blas.GemmFloat(Y2, W, C2, -1, 1.0, linalg.OptTransB) t.Logf("C2 = C2 - Y2*W.T:\n%v\n", C2) // W = Y1*W.T ==> W.T = W*Y1.T blas.TrmmFloat(Y1, W, 1.0, linalg.OptLower, linalg.OptUnit, linalg.OptRight, linalg.OptTrans) t.Logf("W.T = W*Y1.T:\n%v\n", W) // C1 = C1 - W.T ScalePlus(C1, W, 1.0, -1.0, TRANSB) //fmt.Printf("C1 = C1 - W.T:\n%v\n", C1) // --- here: C = (I - Y*T*Y.T).T * C --- }
func blockedBuildQ(A, tau, W *matrix.FloatMatrix, nb int) error { var err error = nil var ATL, ATR, ABL, ABR, AL matrix.FloatMatrix var A00, A01, A02, A10, A11, A12, A20, A21, A22 matrix.FloatMatrix var tT, tB matrix.FloatMatrix var t0, tau1, t2, Tw, Wrk matrix.FloatMatrix var mb int mb = A.Rows() - A.Cols() Twork := matrix.FloatZeros(nb, nb) partition2x2( &ATL, &ATR, &ABL, &ABR, A, mb, 0, pBOTTOMRIGHT) partition2x1( &tT, &tB, tau, 0, pBOTTOM) // clearing of the columns of the right and setting ABR to unit diagonal // (only if not applying all reflectors, kb > 0) for ATL.Rows() > 0 && ATL.Cols() > 0 { repartition2x2to3x3(&ATL, &A00, &A01, &A02, &A10, &A11, &A12, &A20, &A21, &A22, A, nb, pTOPLEFT) repartition2x1to3x1(&tT, &t0, &tau1, &t2, tau, nb, pTOP) // -------------------------------------------------------- // build block reflector from current block merge2x1(&AL, &A11, &A21) Twork.SubMatrix(&Tw, 0, 0, A11.Cols(), A11.Cols()) unblkQRBlockReflector(&Tw, &AL, &tau1) // update with current block reflector (I - Y*T*Y.T)*Atrailing W.SubMatrix(&Wrk, 0, 0, A12.Cols(), A11.Cols()) updateWithQT(&A12, &A22, &A11, &A21, &Tw, &Wrk, nb, false) // use unblocked version to compute current block W.SubMatrix(&Wrk, 0, 0, 1, A11.Cols()) unblockedBuildQ(&AL, &tau1, &Wrk, 0) // zero upper part A01.SetIndexes(0.0) // -------------------------------------------------------- continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, pTOPLEFT) continue3x1to2x1( &tT, &tB, &t0, &tau1, tau, pTOP) } return err }
/* * like LAPACK/dlafrt.f * * Build block reflector T from HH reflector stored in TriLU(A) and coefficients * in tau. * * Q = I - Y*T*Y.T; Householder H = I - tau*v*v.T * * T = | T z | z = -tau*T*Y.T*v * | 0 c | c = tau * * Q = H(1)H(2)...H(k) building forward here. */ func unblkQRBlockReflector(T, A, tau *matrix.FloatMatrix) { var ATL, ATR, ABL, ABR matrix.FloatMatrix var A00, a10, a11, A20, a21, A22 matrix.FloatMatrix var TTL, TTR, TBL, TBR matrix.FloatMatrix var T00, t01, T02, t11, t12, T22 matrix.FloatMatrix var tT, tB matrix.FloatMatrix var t0, tau1, t2 matrix.FloatMatrix partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, pTOPLEFT) partition2x2( &TTL, &TTR, &TBL, &TBR, T, 0, 0, pTOPLEFT) partition2x1( &tT, &tB, tau, 0, pTOP) for ABR.Rows() > 0 && ABR.Cols() > 0 { repartition2x2to3x3(&ATL, &A00, nil, nil, &a10, &a11, nil, &A20, &a21, &A22, A, 1, pBOTTOMRIGHT) repartition2x2to3x3(&TTL, &T00, &t01, &T02, nil, &t11, &t12, nil, nil, &T22, T, 1, pBOTTOMRIGHT) repartition2x1to3x1(&tT, &t0, &tau1, &t2, tau, 1, pBOTTOM) // -------------------------------------------------- // t11 := tau tauval := tau1.GetAt(0, 0) if tauval != 0.0 { t11.SetAt(0, 0, tauval) // t01 := a10.T + &A20.T*a21 a10.CopyTo(&t01) MVMult(&t01, &A20, &a21, -tauval, -tauval, TRANSA) // t01 := T00*t01 MVMultTrm(&t01, &T00, UPPER) //t01.Scale(-tauval) } // -------------------------------------------------- continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, pBOTTOMRIGHT) continue3x3to2x2( &TTL, &TTR, &TBL, &TBR, &T00, &t11, &T22, T, pBOTTOMRIGHT) continue3x1to2x1( &tT, &tB, &t0, &tau1, tau, pBOTTOM) } }
/* * Apply diagonal pivot (row and column swapped) to symmetric matrix blocks. * AR[0,0] is on diagonal and AL is block to the left of diagonal and AR the * triangular diagonal block. Need to swap row and column. * * LOWER triangular; moving from top-left to bottom-right * * d * x d | * -------------------------- * x x | d * S1 S1| S1 P1 x x x P2 -- current row/col 'srcix' * x x | x S2 d x x x * x x | x S2 x d x x * x x | x S2 x x d x * D1 D1| D1 P2 D2 D2 D2 P3 -- swap with row/col 'dstix' * x x | x S3 x x x D3 d * x x | x S3 x x x D3 x d * (ABL) (ABR) * * UPPER triangular; moving from bottom-right to top-left * * (ATL) (ATR) * d x x D3 x x x S3 x | x * d x D3 x x x S3 x | x * d D3 x x x S3 x | x * P3 D2 D2 D2 P2 D1| D1 -- dstinx * d x x S2 x | x * d x S2 x | x * d S2 x | x * P1 S1| S1 -- srcinx * d | x * ----------------------------- * | d * (ABR) */ func applyPivotSym2(AL, AR *matrix.FloatMatrix, srcix, dstix int, flags Flags) { var s, d matrix.FloatMatrix if flags&LOWER != 0 { // AL is [ABL]; AR is [ABR]; P1 is AR[0,0], P2 is AR[index, 0] // S1 -- D1 AL.SubMatrix(&s, srcix, 0, 1, AL.Cols()) AL.SubMatrix(&d, dstix, 0, 1, AL.Cols()) Swap(&s, &d) if srcix > 0 { AR.SubMatrix(&s, srcix, 0, 1, srcix) AR.SubMatrix(&d, dstix, 0, 1, srcix) Swap(&s, &d) } // S2 -- D2 AR.SubMatrix(&s, srcix+1, srcix, dstix-srcix-1, 1) AR.SubMatrix(&d, dstix, srcix+1, 1, dstix-srcix-1) Swap(&s, &d) // S3 -- D3 AR.SubMatrix(&s, dstix+1, srcix, AR.Rows()-dstix-1, 1) AR.SubMatrix(&d, dstix+1, dstix, AR.Rows()-dstix-1, 1) Swap(&s, &d) // swap P1 and P3 p1 := AR.GetAt(srcix, srcix) p3 := AR.GetAt(dstix, dstix) AR.SetAt(srcix, srcix, p3) AR.SetAt(dstix, dstix, p1) return } if flags&UPPER != 0 { // AL is ATL, AR is ATR; P1 is AL[srcix, srcix]; // S1 -- D1; AR.SubMatrix(&s, srcix, 0, 1, AR.Cols()) AR.SubMatrix(&d, dstix, 0, 1, AR.Cols()) Swap(&s, &d) if srcix < AL.Cols()-1 { // not the corner element AL.SubMatrix(&s, srcix, srcix+1, 1, srcix) AL.SubMatrix(&d, dstix, srcix+1, 1, srcix) Swap(&s, &d) } // S2 -- D2 AL.SubMatrix(&s, dstix+1, srcix, srcix-dstix-1, 1) AL.SubMatrix(&d, dstix, dstix+1, 1, srcix-dstix-1) Swap(&s, &d) // S3 -- D3 AL.SubMatrix(&s, 0, srcix, dstix, 1) AL.SubMatrix(&d, 0, dstix, dstix, 1) Swap(&s, &d) //fmt.Printf("3, AR=%v\n", AR) // swap P1 and P3 p1 := AR.GetAt(0, 0) p3 := AL.GetAt(dstix, dstix) AR.SetAt(srcix, srcix, p3) AL.SetAt(dstix, dstix, p1) return } }
func swapCols(A *matrix.FloatMatrix, src, dst int) { var c0, c1 matrix.FloatMatrix if src == dst || A.Rows() == 0 { return } A.SubMatrix(&c0, 0, src, A.Rows(), 1) A.SubMatrix(&c1, 0, dst, A.Rows(), 1) Swap(&c0, &c1) }
/* * ( A11 a12 ) ( U11 u12 )( D1 0 )( U11.t 0 ) * ( a21 a22 ) ( 0 1 )( 0 d2 )( u12.t 1 ) * * a22 = d2 * a01 = u12*d2 => u12 = a12/d2 * A11 = u12*d2*u12.t + U11*D1*U11.t => U11 = A11 - u12*d2*u12.t */ func unblkUpperLDL(A *matrix.FloatMatrix, p *pPivots) (err error) { var ATL, ATR, ABL, ABR matrix.FloatMatrix var A00, a01, A02, a11, a12, A22 matrix.FloatMatrix var AL, AR, acol matrix.FloatMatrix var pT, pB, p0, p1, p2 pPivots err = nil partition2x2( &ATL, &ATR, &ABL, &ABR, A, 0, 0, pBOTTOMRIGHT) partitionPivot2x1( &pT, &pB, p, 0, pBOTTOM) for ATL.Rows() > 0 { repartition2x2to3x3(&ATL, &A00, &a01, &A02, nil, &a11, &a12, nil, nil, &A22, A, 1, pTOPLEFT) repartPivot2x1to3x1(&pT, &p0, &p1, &p2 /**/, p, 1, pTOP) // -------------------------------------------------------- // search diagonal; diag(A00;a11) ATL.Diag(&acol) //merge2x1(&acol, &a01, &a11) imax := IAMax(&acol) if imax < ATL.Rows()-1 { merge1x2(&AL, &ATL, &ATR) merge1x2(&AR, &a11, &a12) // pivot diagonal in symmetric matrix; will swap a11 and [imax,imax] applyPivotSym(&AL, &AR, imax, UPPER) p1.pivots[0] = imax + 1 } else { p1.pivots[0] = 0 } if a11.Float() == 0.0 { err = onError("zero on diagonal.") return } // A00 = A00 - u01*d11*u01.T = A00 - a01*a01.T/a11; triangular update err = MVUpdateTrm(&A00, &a01, &a01, -1.0/a11.Float(), UPPER) // u01 = a01/a11 InvScale(&a01, a11.Float()) // --------------------------------------------------------- continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &a11, &A22, A, pTOPLEFT) contPivot3x1to2x1( &pT, &pB, &p0, &p1, p, pTOP) } return }
func blockedBuildQT(A, T, W *matrix.FloatMatrix, nb int) error { var err error = nil var ATL, ATR, ABL, ABR, AL matrix.FloatMatrix var A00, A01, A11, A12, A21, A22 matrix.FloatMatrix var TTL, TTR, TBL, TBR matrix.FloatMatrix var T00, T01, T02, T11, T12, T22 matrix.FloatMatrix var tau1, Wrk matrix.FloatMatrix var mb int mb = A.Rows() - A.Cols() partition2x2( &ATL, &ATR, &ABL, &ABR, A, mb, 0, pBOTTOMRIGHT) partition2x2( &TTL, &TTR, &TBL, &TBR, T, 0, 0, pBOTTOMRIGHT) // clearing of the columns of the right and setting ABR to unit diagonal // (only if not applying all reflectors, kb > 0) for ATL.Rows() > 0 && ATL.Cols() > 0 { repartition2x2to3x3(&ATL, &A00, &A01, nil, nil, &A11, &A12, nil, &A21, &A22, A, nb, pTOPLEFT) repartition2x2to3x3(&TTL, &T00, &T01, &T02, nil, &T11, &T12, nil, nil, &T22, T, nb, pTOPLEFT) // -------------------------------------------------------- // update with current block reflector (I - Y*T*Y.T)*Atrailing W.SubMatrix(&Wrk, 0, 0, A12.Cols(), A11.Cols()) updateWithQT(&A12, &A22, &A11, &A21, &T11, &Wrk, nb, false) // use unblocked version to compute current block W.SubMatrix(&Wrk, 0, 0, 1, A11.Cols()) // elementary scalar coefficients on the diagonal, column vector T11.Diag(&tau1) merge2x1(&AL, &A11, &A21) // do an unblocked update to current block unblockedBuildQ(&AL, &tau1, &Wrk, 0) // zero upper part A01.SetIndexes(0.0) // -------------------------------------------------------- continue3x3to2x2( &ATL, &ATR, &ABL, &ABR, &A00, &A11, &A22, A, pTOPLEFT) continue3x3to2x2( &TTL, &TTR, &TBL, &TBR, &T00, &T11, &T22, T, pTOPLEFT) } return err }
func swapRows(A *matrix.FloatMatrix, src, dst int) { var r0, r1 matrix.FloatMatrix if src == dst || A.Rows() == 0 { return } A.SubMatrix(&r0, src, 0, 1, A.Cols()) A.SubMatrix(&r1, dst, 0, 1, A.Cols()) Swap(&r0, &r1) }
// Find largest absolute value on column func pivotIndex(A *matrix.FloatMatrix, p *pPivots) { max := math.Abs(A.GetAt(0, 0)) for k := 1; k < A.Rows(); k++ { v := math.Abs(A.GetAt(k, 0)) if v > max { p.pivots[0] = k max = v } } }
/* * Compute an LU factorization of a general M-by-N matrix without pivoting. * * Arguments: * A On entry, the M-by-N matrix to be factored. On exit the factors * L and U from factorization A = P*L*U, the unit diagonal elements * of L are not stored. * * nb Blocking factor for blocked invocations. If bn == 0 or * min(M,N) < nb unblocked algorithm is used. * * Returns: * LU factorization and error indicator. * * Compatible with lapack.DGETRF */ func DecomposeLUnoPiv(A *matrix.FloatMatrix, nb int) (*matrix.FloatMatrix, error) { var err error mlen := imin(A.Rows(), A.Cols()) if mlen <= nb || nb == 0 { err = unblockedLUnoPiv(A) } else { err = blockedLUnoPiv(A, nb) } return A, err }
/* * Build block reflector T from HH elementary reflectors stored in TriLU(A) and * scalar factors in tau. * * Q = I - Y*T*Y.T; Householder H = I - tau*v*v.T * * T = | T z | z = -tau*T*Y.T*v * | 0 c | c = tau * * Compatible with lapack.DLAFRT */ func BuildT(T, A, tau *matrix.FloatMatrix) (*matrix.FloatMatrix, error) { var err error = nil if T.Cols() < A.Cols() || T.Rows() < A.Cols() { return nil, errors.New("reflector matrix T too small") } unblkQRBlockReflector(T, A, tau) return T, err }
func columnDiffs(A, B *matrix.FloatMatrix) *matrix.FloatMatrix { var c matrix.FloatMatrix nrm := matrix.FloatZeros(A.Cols(), 1) A0 := A.Copy() A0.Minus(B) for k := 0; k < A.Cols(); k++ { A0.SubMatrix(&c, 0, k, A.Rows(), 1) nrm.SetAt(k, 0, matops.Norm2(&c)) } return nrm }
func rowDiffs(A, B *matrix.FloatMatrix) *matrix.FloatMatrix { var r matrix.FloatMatrix nrm := matrix.FloatZeros(A.Rows(), 1) A0 := A.Copy() A0.Minus(B) for k := 0; k < A.Rows(); k++ { A0.SubMatrix(&r, k, 0, 1, A.Cols()) nrm.SetAt(k, 0, matops.Norm2(&r)) } return nrm }