func constructH(tau []float64, v blas64.General, store lapack.StoreV, direct lapack.Direct) blas64.General { m := v.Rows k := v.Cols if store == lapack.RowWise { m, k = k, m } h := blas64.General{ Rows: m, Cols: m, Stride: m, Data: make([]float64, m*m), } for i := 0; i < m; i++ { h.Data[i*m+i] = 1 } for i := 0; i < k; i++ { vecData := make([]float64, m) if store == lapack.ColumnWise { for j := 0; j < m; j++ { vecData[j] = v.Data[j*v.Cols+i] } } else { for j := 0; j < m; j++ { vecData[j] = v.Data[i*v.Cols+j] } } vec := blas64.Vector{ Inc: 1, Data: vecData, } hi := blas64.General{ Rows: m, Cols: m, Stride: m, Data: make([]float64, m*m), } for i := 0; i < m; i++ { hi.Data[i*m+i] = 1 } // hi = I - tau * v * v^T blas64.Ger(-tau[i], vec, vec, hi) hcopy := blas64.General{ Rows: m, Cols: m, Stride: m, Data: make([]float64, m*m), } copy(hcopy.Data, h.Data) if direct == lapack.Forward { // H = H * H_I in forward mode blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, hcopy, hi, 0, h) } else { // H = H_I * H in backward mode blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, hi, hcopy, 0, h) } } return h }
func (c *ConvLayer) convolveR(v autofunc.RVector, in, inR linalg.Vector, out *Tensor3) { inMat := c.inputToMatrix(in) inMatR := c.inputToMatrix(inR) filterMat := blas64.General{ Rows: c.FilterCount, Cols: inMat.Cols, Stride: inMat.Stride, Data: c.FilterVar.Vector, } outMat := blas64.General{ Rows: out.Width * out.Height, Cols: out.Depth, Stride: out.Depth, Data: out.Data, } blas64.Gemm(blas.NoTrans, blas.Trans, 1, inMatR, filterMat, 0, outMat) if filterRV, ok := v[c.FilterVar]; ok { filterMatR := blas64.General{ Rows: c.FilterCount, Cols: inMat.Cols, Stride: inMat.Stride, Data: filterRV, } blas64.Gemm(blas.NoTrans, blas.Trans, 1, inMat, filterMatR, 1, outMat) } if biasRV, ok := v[c.Biases]; ok { biasVec := blas64.Vector{Inc: 1, Data: biasRV} for i := 0; i < len(out.Data); i += outMat.Cols { outRow := out.Data[i : i+outMat.Cols] outVec := blas64.Vector{Inc: 1, Data: outRow} blas64.Axpy(len(outRow), 1, biasVec, outVec) } } }
func (c *convLayerResult) propagateSingle(input, upstream, downstream linalg.Vector, grad autofunc.Gradient) { upstreamMat := blas64.General{ Rows: c.Layer.OutputWidth() * c.Layer.OutputHeight(), Cols: c.Layer.OutputDepth(), Stride: c.Layer.OutputDepth(), Data: upstream, } if downstream != nil { inDeriv := c.Layer.inputToMatrix(input) filterMat := blas64.General{ Rows: len(c.Layer.Filters), Cols: c.Layer.FilterWidth * c.Layer.FilterHeight * c.Layer.InputDepth, Stride: c.Layer.FilterWidth * c.Layer.FilterHeight * c.Layer.InputDepth, Data: c.Layer.FilterVar.Vector, } blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, upstreamMat, filterMat, 0, inDeriv) flattened := NewTensor3Col(c.Layer.InputWidth, c.Layer.InputHeight, c.Layer.InputDepth, inDeriv.Data, c.Layer.FilterWidth, c.Layer.FilterHeight, c.Layer.Stride) copy(downstream, flattened.Data) } if filterGrad, ok := grad[c.Layer.FilterVar]; ok { inMatrix := c.Layer.inputToMatrix(input) destMat := blas64.General{ Rows: len(c.Layer.Filters), Cols: c.Layer.FilterWidth * c.Layer.FilterHeight * c.Layer.InputDepth, Stride: c.Layer.FilterWidth * c.Layer.FilterHeight * c.Layer.InputDepth, Data: filterGrad, } blas64.Gemm(blas.Trans, blas.NoTrans, 1, upstreamMat, inMatrix, 1, destMat) } }
func testDlarfx(t *testing.T, impl Dlarfxer, side blas.Side, m, n, extra int, rnd *rand.Rand) { const tol = 1e-13 c := randomGeneral(m, n, n+extra, rnd) cWant := randomGeneral(m, n, n+extra, rnd) tau := rnd.NormFloat64() var ( v []float64 h blas64.General ) if side == blas.Left { v = randomSlice(m, rnd) h = eye(m, m+extra) } else { v = randomSlice(n, rnd) h = eye(n, n+extra) } blas64.Ger(-tau, blas64.Vector{Inc: 1, Data: v}, blas64.Vector{Inc: 1, Data: v}, h) if side == blas.Left { blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, h, c, 0, cWant) } else { blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, c, h, 0, cWant) } var work []float64 if h.Rows > 10 { // Allocate work only if H has order > 10. if side == blas.Left { work = make([]float64, n) } else { work = make([]float64, m) } } impl.Dlarfx(side, m, n, v, tau, c.Data, c.Stride, work) prefix := fmt.Sprintf("Case side=%v, m=%v, n=%v, extra=%v", side, m, n, extra) // Check any invalid modifications of c. if !generalOutsideAllNaN(c) { t.Errorf("%v: out-of-range write to C\n%v", prefix, c.Data) } if !equalApproxGeneral(c, cWant, tol) { t.Errorf("%v: unexpected C\n%v", prefix, c.Data) } }
// Mul takes the matrix product of a and b, placing the result in the receiver. // // See the Muler interface for more information. func (m *Dense) Mul(a, b Matrix) { ar, ac := a.Dims() br, bc := b.Dims() if ac != br { panic(ErrShape) } m.reuseAs(ar, bc) var w *Dense if m != a && m != b { w = m } else { w = getWorkspace(ar, bc, false) defer func() { m.Copy(w) putWorkspace(w) }() } if a, ok := a.(RawMatrixer); ok { if b, ok := b.(RawMatrixer); ok { amat, bmat := a.RawMatrix(), b.RawMatrix() blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, amat, bmat, 0, w.Mat) return } } if a, ok := a.(Vectorer); ok { if b, ok := b.(Vectorer); ok { row := make([]float64, ac) col := make([]float64, br) for r := 0; r < ar; r++ { dataTmp := w.Mat.Data[r*w.Mat.Stride : r*w.Mat.Stride+bc] for c := 0; c < bc; c++ { dataTmp[c] = blas64.Dot(ac, blas64.Vector{Inc: 1, Data: a.Row(row, r)}, blas64.Vector{Inc: 1, Data: b.Col(col, c)}, ) } } return } } row := make([]float64, ac) for r := 0; r < ar; r++ { for i := range row { row[i] = a.At(r, i) } for c := 0; c < bc; c++ { var v float64 for i, e := range row { v += e * b.At(i, c) } w.Mat.Data[r*w.Mat.Stride+c] = v } } }
func testDorghr(t *testing.T, impl Dorghrer, n, ilo, ihi, extra int, optwork bool, rnd *rand.Rand) { const tol = 1e-14 // Construct the matrix A with elementary reflectors and scalar factors tau. a := randomGeneral(n, n, n+extra, rnd) var tau []float64 if n > 1 { tau = nanSlice(n - 1) } work := nanSlice(max(1, n)) // Minimum work for Dgehrd. impl.Dgehrd(n, ilo, ihi, a.Data, a.Stride, tau, work, len(work)) // Extract Q for later comparison. q := eye(n, n) qCopy := cloneGeneral(q) for j := ilo; j < ihi; j++ { h := eye(n, n) v := blas64.Vector{ Inc: 1, Data: make([]float64, n), } v.Data[j+1] = 1 for i := j + 2; i < ihi+1; i++ { v.Data[i] = a.Data[i*a.Stride+j] } blas64.Ger(-tau[j], v, v, h) copy(qCopy.Data, q.Data) blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, qCopy, h, 0, q) } if optwork { work = nanSlice(1) impl.Dorghr(n, ilo, ihi, a.Data, a.Stride, tau, work, -1) work = nanSlice(int(work[0])) } else { work = nanSlice(max(1, ihi-ilo)) } impl.Dorghr(n, ilo, ihi, a.Data, a.Stride, tau, work, len(work)) prefix := fmt.Sprintf("Case n=%v, ilo=%v, ihi=%v, extra=%v, optwork=%v", n, ilo, ihi, extra, optwork) if !generalOutsideAllNaN(a) { t.Errorf("%v: out-of-range write to A\n%v", prefix, a.Data) } if !isOrthonormal(a) { t.Errorf("%v: A is not orthogonal\n%v", prefix, a.Data) } for i := 0; i < n; i++ { for j := 0; j < n; j++ { aij := a.Data[i*a.Stride+j] qij := q.Data[i*q.Stride+j] if math.Abs(aij-qij) > tol { t.Errorf("%v: unexpected value of A[%v,%v]. want %v, got %v", prefix, i, j, qij, aij) } } } }
// QFromQR extracts the m×m orthonormal matrix Q from a QR decomposition. func (m *Dense) QFromQR(qr *QR) { r, c := qr.qr.Dims() m.reuseAs(r, r) // Set Q = I. for i := 0; i < r; i++ { for j := 0; j < i; j++ { m.mat.Data[i*m.mat.Stride+j] = 0 } m.mat.Data[i*m.mat.Stride+i] = 1 for j := i + 1; j < r; j++ { m.mat.Data[i*m.mat.Stride+j] = 0 } } // Construct Q from the elementary reflectors. h := blas64.General{ Rows: r, Cols: r, Stride: r, Data: make([]float64, r*r), } qCopy := getWorkspace(r, r, false) v := blas64.Vector{ Inc: 1, Data: make([]float64, r), } for i := 0; i < c; i++ { // Set h = I. for i := range h.Data { h.Data[i] = 0 } for j := 0; j < r; j++ { h.Data[j*r+j] = 1 } // Set the vector data as the elementary reflector. for j := 0; j < i; j++ { v.Data[j] = 0 } v.Data[i] = 1 for j := i + 1; j < r; j++ { v.Data[j] = qr.qr.mat.Data[j*qr.qr.mat.Stride+i] } // Compute the multiplication matrix. blas64.Ger(-qr.tau[i], v, v, h) qCopy.Copy(m) blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, qCopy.mat, h, 0, m.mat) } }
// QFromLQ extracts the n×n orthonormal matrix Q from an LQ decomposition. func (m *Dense) QFromLQ(lq *LQ) { r, c := lq.lq.Dims() m.reuseAs(c, c) // Set Q = I. for i := 0; i < c; i++ { for j := 0; j < i; j++ { m.mat.Data[i*m.mat.Stride+j] = 0 } m.mat.Data[i*m.mat.Stride+i] = 1 for j := i + 1; j < c; j++ { m.mat.Data[i*m.mat.Stride+j] = 0 } } // Construct Q from the elementary reflectors. h := blas64.General{ Rows: c, Cols: c, Stride: c, Data: make([]float64, c*c), } qCopy := getWorkspace(c, c, false) v := blas64.Vector{ Inc: 1, Data: make([]float64, c), } for i := 0; i < r; i++ { // Set h = I. for i := range h.Data { h.Data[i] = 0 } for j := 0; j < c; j++ { h.Data[j*c+j] = 1 } // Set the vector data as the elementary reflector. for j := 0; j < i; j++ { v.Data[j] = 0 } v.Data[i] = 1 for j := i + 1; j < c; j++ { v.Data[j] = lq.lq.mat.Data[i*lq.lq.mat.Stride+j] } // Compute the multiplication matrix. blas64.Ger(-lq.tau[i], v, v, h) qCopy.Copy(m) blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, h, qCopy.mat, 0, m.mat) } }
// Eval returns a matrix literal. func (m1 *Mul) Eval() MatrixLiteral { // This should be replaced with a call to Eval on each side, and then a type // switch to handle the various matrix literals. lm := m1.Left.Eval() rm := m1.Right.Eval() left := lm.AsGeneral() right := rm.AsGeneral() r, c := m1.Dims() m := blas64.General{ Rows: r, Cols: c, Stride: c, Data: make([]float64, r*c), } blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, left, right, 0, m) return &General{m} }
func (c *ConvLayer) convolve(in linalg.Vector, out *Tensor3) { inMat := c.inputToMatrix(in) filterMat := blas64.General{ Rows: c.FilterCount, Cols: inMat.Cols, Stride: inMat.Stride, Data: c.FilterVar.Vector, } outMat := blas64.General{ Rows: out.Width * out.Height, Cols: out.Depth, Stride: out.Depth, Data: out.Data, } blas64.Gemm(blas.NoTrans, blas.Trans, 1, inMat, filterMat, 0, outMat) biasVec := blas64.Vector{Inc: 1, Data: c.Biases.Vector} for i := 0; i < len(out.Data); i += outMat.Cols { outRow := out.Data[i : i+outMat.Cols] outVec := blas64.Vector{Inc: 1, Data: outRow} blas64.Axpy(len(outRow), 1, biasVec, outVec) } }
// dlatrdCheckDecomposition checks that the first nb rows have been successfully // reduced. func dlatrdCheckDecomposition(t *testing.T, uplo blas.Uplo, n, nb int, e, tau, a []float64, lda int, aGen, q blas64.General) bool { // Compute Q^T * A * Q. tmp := blas64.General{ Rows: n, Cols: n, Stride: n, Data: make([]float64, n*n), } ans := blas64.General{ Rows: n, Cols: n, Stride: n, Data: make([]float64, n*n), } blas64.Gemm(blas.Trans, blas.NoTrans, 1, q, aGen, 0, tmp) blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, tmp, q, 0, ans) // Compare with T. if uplo == blas.Upper { for i := n - 1; i >= n-nb; i-- { for j := 0; j < n; j++ { v := ans.Data[i*ans.Stride+j] switch { case i == j: if math.Abs(v-a[i*lda+j]) > 1e-10 { return false } case i == j-1: if math.Abs(a[i*lda+j]-1) > 1e-10 { return false } if math.Abs(v-e[i]) > 1e-10 { return false } case i == j+1: default: if math.Abs(v) > 1e-10 { return false } } } } } else { for i := 0; i < nb; i++ { for j := 0; j < n; j++ { v := ans.Data[i*ans.Stride+j] switch { case i == j: if math.Abs(v-a[i*lda+j]) > 1e-10 { return false } case i == j-1: case i == j+1: if math.Abs(a[i*lda+j]-1) > 1e-10 { return false } if math.Abs(v-e[i-1]) > 1e-10 { return false } default: if math.Abs(v) > 1e-10 { return false } } } } } return true }
func DlarfbTest(t *testing.T, impl Dlarfber) { rnd := rand.New(rand.NewSource(1)) for _, store := range []lapack.StoreV{lapack.ColumnWise, lapack.RowWise} { for _, direct := range []lapack.Direct{lapack.Forward, lapack.Backward} { for _, side := range []blas.Side{blas.Left, blas.Right} { for _, trans := range []blas.Transpose{blas.Trans, blas.NoTrans} { for cas, test := range []struct { ma, na, cdim, lda, ldt, ldc int }{ {6, 6, 6, 0, 0, 0}, {6, 8, 10, 0, 0, 0}, {6, 10, 8, 0, 0, 0}, {8, 6, 10, 0, 0, 0}, {8, 10, 6, 0, 0, 0}, {10, 6, 8, 0, 0, 0}, {10, 8, 6, 0, 0, 0}, {6, 6, 6, 12, 15, 30}, {6, 8, 10, 12, 15, 30}, {6, 10, 8, 12, 15, 30}, {8, 6, 10, 12, 15, 30}, {8, 10, 6, 12, 15, 30}, {10, 6, 8, 12, 15, 30}, {10, 8, 6, 12, 15, 30}, {6, 6, 6, 15, 12, 30}, {6, 8, 10, 15, 12, 30}, {6, 10, 8, 15, 12, 30}, {8, 6, 10, 15, 12, 30}, {8, 10, 6, 15, 12, 30}, {10, 6, 8, 15, 12, 30}, {10, 8, 6, 15, 12, 30}, } { // Generate a matrix for QR ma := test.ma na := test.na lda := test.lda if lda == 0 { lda = na } a := make([]float64, ma*lda) for i := 0; i < ma; i++ { for j := 0; j < lda; j++ { a[i*lda+j] = rnd.Float64() } } k := min(ma, na) // H is always ma x ma var m, n, rowsWork int switch { default: panic("not implemented") case side == blas.Left: m = test.ma n = test.cdim rowsWork = n case side == blas.Right: m = test.cdim n = test.ma rowsWork = m } // Use dgeqr2 to find the v vectors tau := make([]float64, na) work := make([]float64, na) impl.Dgeqr2(ma, k, a, lda, tau, work) // Correct the v vectors based on the direct and store vMatTmp := extractVMat(ma, na, a, lda, lapack.Forward, lapack.ColumnWise) vMat := constructVMat(vMatTmp, store, direct) v := vMat.Data ldv := vMat.Stride // Use dlarft to find the t vector ldt := test.ldt if ldt == 0 { ldt = k } tm := make([]float64, k*ldt) impl.Dlarft(direct, store, ma, k, v, ldv, tau, tm, ldt) // Generate c matrix ldc := test.ldc if ldc == 0 { ldc = n } c := make([]float64, m*ldc) for i := 0; i < m; i++ { for j := 0; j < ldc; j++ { c[i*ldc+j] = rnd.Float64() } } cCopy := make([]float64, len(c)) copy(cCopy, c) ldwork := k work = make([]float64, rowsWork*k) // Call Dlarfb with this information impl.Dlarfb(side, trans, direct, store, m, n, k, v, ldv, tm, ldt, c, ldc, work, ldwork) h := constructH(tau, vMat, store, direct) cMat := blas64.General{ Rows: m, Cols: n, Stride: ldc, Data: make([]float64, m*ldc), } copy(cMat.Data, cCopy) ans := blas64.General{ Rows: m, Cols: n, Stride: ldc, Data: make([]float64, m*ldc), } copy(ans.Data, cMat.Data) switch { default: panic("not implemented") case side == blas.Left && trans == blas.NoTrans: blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, h, cMat, 0, ans) case side == blas.Left && trans == blas.Trans: blas64.Gemm(blas.Trans, blas.NoTrans, 1, h, cMat, 0, ans) case side == blas.Right && trans == blas.NoTrans: blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, cMat, h, 0, ans) case side == blas.Right && trans == blas.Trans: blas64.Gemm(blas.NoTrans, blas.Trans, 1, cMat, h, 0, ans) } if !floats.EqualApprox(ans.Data, c, 1e-14) { t.Errorf("Cas %v mismatch. Want %v, got %v.", cas, ans.Data, c) } } } } } } }
func DlarfTest(t *testing.T, impl Dlarfer) { for i, test := range []struct { m, n, ldc int incv, lastv int lastr, lastc int tau float64 }{ { m: 3, n: 2, ldc: 2, incv: 4, lastv: 1, lastr: 2, lastc: 1, tau: 2, }, { m: 2, n: 3, ldc: 3, incv: 4, lastv: 1, lastr: 1, lastc: 2, tau: 2, }, { m: 2, n: 3, ldc: 3, incv: 4, lastv: 1, lastr: 0, lastc: 1, tau: 2, }, { m: 2, n: 3, ldc: 3, incv: 4, lastv: 0, lastr: 0, lastc: 1, tau: 2, }, { m: 10, n: 10, ldc: 10, incv: 4, lastv: 6, lastr: 9, lastc: 8, tau: 2, }, } { // Construct a random matrix. c := make([]float64, test.ldc*test.m) for i := 0; i <= test.lastr; i++ { for j := 0; j <= test.lastc; j++ { c[i*test.ldc+j] = rand.Float64() } } cCopy := make([]float64, len(c)) copy(cCopy, c) cCopy2 := make([]float64, len(c)) copy(cCopy2, c) // Test with side right. sz := max(test.m, test.n) // so v works for both right and left side. v := make([]float64, test.incv*sz+1) // Fill with nonzero entries up until lastv. for i := 0; i <= test.lastv; i++ { v[i*test.incv] = rand.Float64() } // Construct h explicitly to compare. h := make([]float64, test.n*test.n) for i := 0; i < test.n; i++ { h[i*test.n+i] = 1 } hMat := blas64.General{ Rows: test.n, Cols: test.n, Stride: test.n, Data: h, } vVec := blas64.Vector{ Inc: test.incv, Data: v, } blas64.Ger(-test.tau, vVec, vVec, hMat) // Apply multiplication (2nd copy is to avoid aliasing). cMat := blas64.General{ Rows: test.m, Cols: test.n, Stride: test.ldc, Data: cCopy, } cMat2 := blas64.General{ Rows: test.m, Cols: test.n, Stride: test.ldc, Data: cCopy2, } blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, cMat2, hMat, 0, cMat) // cMat now stores the true answer. Compare with the function call. work := make([]float64, sz) impl.Dlarf(blas.Right, test.m, test.n, v, test.incv, test.tau, c, test.ldc, work) if !floats.EqualApprox(c, cMat.Data, 1e-14) { t.Errorf("Dlarf mismatch right, case %v. Want %v, got %v", i, cMat.Data, c) } // Test on the left side. copy(c, cCopy2) copy(cCopy, c) // Construct h. h = make([]float64, test.m*test.m) for i := 0; i < test.m; i++ { h[i*test.m+i] = 1 } hMat = blas64.General{ Rows: test.m, Cols: test.m, Stride: test.m, Data: h, } blas64.Ger(-test.tau, vVec, vVec, hMat) blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, hMat, cMat2, 0, cMat) impl.Dlarf(blas.Left, test.m, test.n, v, test.incv, test.tau, c, test.ldc, work) if !floats.EqualApprox(c, cMat.Data, 1e-14) { t.Errorf("Dlarf mismatch left, case %v. Want %v, got %v", i, cMat.Data, c) } } }
// checkPLU checks that the PLU factorization contained in factorize matches // the original matrix contained in original. func checkPLU(t *testing.T, ok bool, m, n, lda int, ipiv []int, factorized, original []float64, tol float64, print bool) { var hasZeroDiagonal bool for i := 0; i < min(m, n); i++ { if factorized[i*lda+i] == 0 { hasZeroDiagonal = true break } } if hasZeroDiagonal && ok { t.Error("Has a zero diagonal but returned ok") } if !hasZeroDiagonal && !ok { t.Error("Non-zero diagonal but returned !ok") } // Check that the LU decomposition is correct. mn := min(m, n) l := make([]float64, m*mn) ldl := mn u := make([]float64, mn*n) ldu := n for i := 0; i < m; i++ { for j := 0; j < n; j++ { v := factorized[i*lda+j] switch { case i == j: l[i*ldl+i] = 1 u[i*ldu+i] = v case i > j: l[i*ldl+j] = v case i < j: u[i*ldu+j] = v } } } LU := blas64.General{ Rows: m, Cols: n, Stride: n, Data: make([]float64, m*n), } U := blas64.General{ Rows: mn, Cols: n, Stride: ldu, Data: u, } L := blas64.General{ Rows: m, Cols: mn, Stride: ldl, Data: l, } blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, L, U, 0, LU) p := make([]float64, m*m) ldp := m for i := 0; i < m; i++ { p[i*ldp+i] = 1 } for i := len(ipiv) - 1; i >= 0; i-- { v := ipiv[i] blas64.Swap(m, blas64.Vector{1, p[i*ldp:]}, blas64.Vector{1, p[v*ldp:]}) } P := blas64.General{ Rows: m, Cols: m, Stride: m, Data: p, } aComp := blas64.General{ Rows: m, Cols: n, Stride: lda, Data: make([]float64, m*lda), } copy(aComp.Data, factorized) blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, P, LU, 0, aComp) if !floats.EqualApprox(aComp.Data, original, tol) { if print { t.Errorf("PLU multiplication does not match original matrix.\nWant: %v\nGot: %v", original, aComp.Data) return } t.Error("PLU multiplication does not match original matrix.") } }
func Dgelq2Test(t *testing.T, impl Dgelq2er) { for c, test := range []struct { m, n, lda int }{ {1, 1, 0}, {2, 2, 0}, {3, 2, 0}, {2, 3, 0}, {1, 12, 0}, {2, 6, 0}, {3, 4, 0}, {4, 3, 0}, {6, 2, 0}, {1, 12, 0}, {1, 1, 20}, {2, 2, 20}, {3, 2, 20}, {2, 3, 20}, {1, 12, 20}, {2, 6, 20}, {3, 4, 20}, {4, 3, 20}, {6, 2, 20}, {1, 12, 20}, } { n := test.n m := test.m lda := test.lda if lda == 0 { lda = test.n } k := min(m, n) tau := make([]float64, k) for i := range tau { tau[i] = rand.Float64() } work := make([]float64, m) for i := range work { work[i] = rand.Float64() } a := make([]float64, m*lda) for i := 0; i < m*lda; i++ { a[i] = rand.Float64() } aCopy := make([]float64, len(a)) copy(aCopy, a) impl.Dgelq2(m, n, a, lda, tau, work) Q := constructQ("LQ", m, n, a, lda, tau) // Check that Q is orthonormal for i := 0; i < Q.Rows; i++ { nrm := blas64.Nrm2(Q.Cols, blas64.Vector{Inc: 1, Data: Q.Data[i*Q.Stride:]}) if math.Abs(nrm-1) > 1e-14 { t.Errorf("Q not normal. Norm is %v", nrm) } for j := 0; j < i; j++ { dot := blas64.Dot(Q.Rows, blas64.Vector{Inc: 1, Data: Q.Data[i*Q.Stride:]}, blas64.Vector{Inc: 1, Data: Q.Data[j*Q.Stride:]}, ) if math.Abs(dot) > 1e-14 { t.Errorf("Q not orthogonal. Dot is %v", dot) } } } L := blas64.General{ Rows: m, Cols: n, Stride: n, Data: make([]float64, m*n), } for i := 0; i < m; i++ { for j := 0; j <= min(i, n-1); j++ { L.Data[i*L.Stride+j] = a[i*lda+j] } } ans := blas64.General{ Rows: m, Cols: n, Stride: lda, Data: make([]float64, m*lda), } copy(ans.Data, aCopy) blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, L, Q, 0, ans) if !floats.EqualApprox(aCopy, ans.Data, 1e-14) { t.Errorf("Case %v, LQ mismatch. Want %v, got %v.", c, aCopy, ans.Data) } } }
func testDgebak(t *testing.T, impl Dgebaker, job lapack.Job, side blas.Side, ilo, ihi int, v blas64.General, rnd *rand.Rand) { const tol = 1e-15 n := v.Rows m := v.Cols extra := v.Stride - v.Cols // Create D and D^{-1} by generating random scales between ilo and ihi. d := eye(n, n) dinv := eye(n, n) scale := nanSlice(n) if job == lapack.Scale || job == lapack.PermuteScale { if ilo == ihi { scale[ilo] = 1 } else { for i := ilo; i <= ihi; i++ { scale[i] = 2 * rnd.Float64() d.Data[i*d.Stride+i] = scale[i] dinv.Data[i*dinv.Stride+i] = 1 / scale[i] } } } // Create P by generating random column swaps. p := eye(n, n) if job == lapack.Permute || job == lapack.PermuteScale { // Make up some random permutations. for i := n - 1; i > ihi; i-- { scale[i] = float64(rnd.Intn(i + 1)) blas64.Swap(n, blas64.Vector{p.Stride, p.Data[i:]}, blas64.Vector{p.Stride, p.Data[int(scale[i]):]}) } for i := 0; i < ilo; i++ { scale[i] = float64(i + rnd.Intn(ihi-i+1)) blas64.Swap(n, blas64.Vector{p.Stride, p.Data[i:]}, blas64.Vector{p.Stride, p.Data[int(scale[i]):]}) } } got := cloneGeneral(v) impl.Dgebak(job, side, n, ilo, ihi, scale, m, got.Data, got.Stride) prefix := fmt.Sprintf("Case job=%v, side=%v, n=%v, ilo=%v, ihi=%v, m=%v, extra=%v", job, side, n, ilo, ihi, m, extra) if !generalOutsideAllNaN(got) { t.Errorf("%v: out-of-range write to V\n%v", prefix, got.Data) } // Compute D*V or D^{-1}*V and store into dv. dv := zeros(n, m, m) if side == blas.Right { blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, d, v, 0, dv) } else { blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, dinv, v, 0, dv) } // Compute P*D*V or P*D^{-1}*V and store into want. want := zeros(n, m, m) blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, p, dv, 0, want) if !equalApproxGeneral(want, got, tol) { t.Errorf("%v: unexpected value of V", prefix) } }
func Dgeqr2Test(t *testing.T, impl Dgeqr2er) { for c, test := range []struct { m, n, lda int }{ {1, 1, 0}, {2, 2, 0}, {3, 2, 0}, {2, 3, 0}, {1, 12, 0}, {2, 6, 0}, {3, 4, 0}, {4, 3, 0}, {6, 2, 0}, {12, 1, 0}, {1, 1, 20}, {2, 2, 20}, {3, 2, 20}, {2, 3, 20}, {1, 12, 20}, {2, 6, 20}, {3, 4, 20}, {4, 3, 20}, {6, 2, 20}, {12, 1, 20}, } { n := test.n m := test.m lda := test.lda if lda == 0 { lda = test.n } a := make([]float64, m*lda) for i := range a { a[i] = rand.Float64() } aCopy := make([]float64, len(a)) k := min(m, n) tau := make([]float64, k) for i := range tau { tau[i] = rand.Float64() } work := make([]float64, n) for i := range work { work[i] = rand.Float64() } copy(aCopy, a) impl.Dgeqr2(m, n, a, lda, tau, work) // Test that the QR factorization has completed successfully. Compute // Q based on the vectors. q := constructQ("QR", m, n, a, lda, tau) // Check that q is orthonormal for i := 0; i < m; i++ { nrm := blas64.Nrm2(m, blas64.Vector{1, q.Data[i*m:]}) if math.Abs(nrm-1) > 1e-14 { t.Errorf("Case %v, q not normal", c) } for j := 0; j < i; j++ { dot := blas64.Dot(m, blas64.Vector{1, q.Data[i*m:]}, blas64.Vector{1, q.Data[j*m:]}) if math.Abs(dot) > 1e-14 { t.Errorf("Case %v, q not orthogonal", i) } } } // Check that A = Q * R r := blas64.General{ Rows: m, Cols: n, Stride: n, Data: make([]float64, m*n), } for i := 0; i < m; i++ { for j := i; j < n; j++ { r.Data[i*n+j] = a[i*lda+j] } } atmp := blas64.General{ Rows: m, Cols: n, Stride: lda, Data: make([]float64, m*lda), } copy(atmp.Data, a) blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, q, r, 0, atmp) if !floats.EqualApprox(atmp.Data, aCopy, 1e-14) { t.Errorf("Q*R != a") } } }
// constructQPBidiagonal constructs Q or P from the Bidiagonal decomposition // computed by dlabrd and bgebd2. func constructQPBidiagonal(vect lapack.DecompUpdate, m, n, nb int, a []float64, lda int, tau []float64) blas64.General { sz := n if vect == lapack.ApplyQ { sz = m } var ldv int var v blas64.General if vect == lapack.ApplyQ { ldv = nb v = blas64.General{ Rows: m, Cols: nb, Stride: ldv, Data: make([]float64, m*ldv), } } else { ldv = n v = blas64.General{ Rows: nb, Cols: n, Stride: ldv, Data: make([]float64, m*ldv), } } if vect == lapack.ApplyQ { if m >= n { for i := 0; i < m; i++ { for j := 0; j <= min(nb-1, i); j++ { if i == j { v.Data[i*ldv+j] = 1 continue } v.Data[i*ldv+j] = a[i*lda+j] } } } else { for i := 1; i < m; i++ { for j := 0; j <= min(nb-1, i-1); j++ { if i-1 == j { v.Data[i*ldv+j] = 1 continue } v.Data[i*ldv+j] = a[i*lda+j] } } } } else { if m < n { for i := 0; i < nb; i++ { for j := i; j < n; j++ { if i == j { v.Data[i*ldv+j] = 1 continue } v.Data[i*ldv+j] = a[i*lda+j] } } } else { for i := 0; i < nb; i++ { for j := i + 1; j < n; j++ { if j-1 == i { v.Data[i*ldv+j] = 1 continue } v.Data[i*ldv+j] = a[i*lda+j] } } } } // The variable name is a computation of Q, but the algorithm is mostly the // same for computing P (just with different data). qMat := blas64.General{ Rows: sz, Cols: sz, Stride: sz, Data: make([]float64, sz*sz), } hMat := blas64.General{ Rows: sz, Cols: sz, Stride: sz, Data: make([]float64, sz*sz), } // set Q to I for i := 0; i < sz; i++ { qMat.Data[i*qMat.Stride+i] = 1 } for i := 0; i < nb; i++ { qCopy := blas64.General{Rows: qMat.Rows, Cols: qMat.Cols, Stride: qMat.Stride, Data: make([]float64, len(qMat.Data))} copy(qCopy.Data, qMat.Data) // Set g and h to I for i := 0; i < sz; i++ { for j := 0; j < sz; j++ { if i == j { hMat.Data[i*sz+j] = 1 } else { hMat.Data[i*sz+j] = 0 } } } var vi blas64.Vector // H -= tauQ[i] * v[i] * v[i]^t if vect == lapack.ApplyQ { vi = blas64.Vector{ Inc: v.Stride, Data: v.Data[i:], } } else { vi = blas64.Vector{ Inc: 1, Data: v.Data[i*v.Stride:], } } blas64.Ger(-tau[i], vi, vi, hMat) // Q = Q * G[1] blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, qCopy, hMat, 0, qMat) } return qMat }
func (c *convLayerRResult) propagateSingle(input, inputR, upstream, upstreamR, downstream, downstreamR linalg.Vector, rgrad autofunc.RGradient, grad autofunc.Gradient) { upstreamMat := blas64.General{ Rows: c.Layer.OutputWidth() * c.Layer.OutputHeight(), Cols: c.Layer.OutputDepth(), Stride: c.Layer.OutputDepth(), Data: upstream, } upstreamMatR := blas64.General{ Rows: c.Layer.OutputWidth() * c.Layer.OutputHeight(), Cols: c.Layer.OutputDepth(), Stride: c.Layer.OutputDepth(), Data: upstreamR, } if downstream != nil { inDeriv := c.Layer.inputToMatrix(input) filterMat := blas64.General{ Rows: len(c.Layer.Filters), Cols: c.Layer.FilterWidth * c.Layer.FilterHeight * c.Layer.InputDepth, Stride: c.Layer.FilterWidth * c.Layer.FilterHeight * c.Layer.InputDepth, Data: c.Layer.FilterVar.Vector, } blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, upstreamMat, filterMat, 0, inDeriv) flattened := NewTensor3Col(c.Layer.InputWidth, c.Layer.InputHeight, c.Layer.InputDepth, inDeriv.Data, c.Layer.FilterWidth, c.Layer.FilterHeight, c.Layer.Stride) copy(downstream, flattened.Data) blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, upstreamMatR, filterMat, 0, inDeriv) if c.FiltersR != nil { filterMat.Data = c.FiltersR blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, upstreamMat, filterMat, 1, inDeriv) } flattened = NewTensor3Col(c.Layer.InputWidth, c.Layer.InputHeight, c.Layer.InputDepth, inDeriv.Data, c.Layer.FilterWidth, c.Layer.FilterHeight, c.Layer.Stride) copy(downstreamR, flattened.Data) } filterGrad, hasFilterGrad := grad[c.Layer.FilterVar] filterRGrad, hasFilterRGrad := rgrad[c.Layer.FilterVar] var inMatrix blas64.General if hasFilterGrad || hasFilterRGrad { inMatrix = c.Layer.inputToMatrix(input) } if hasFilterGrad { destMat := blas64.General{ Rows: len(c.Layer.Filters), Cols: c.Layer.FilterWidth * c.Layer.FilterHeight * c.Layer.InputDepth, Stride: c.Layer.FilterWidth * c.Layer.FilterHeight * c.Layer.InputDepth, Data: filterGrad, } blas64.Gemm(blas.Trans, blas.NoTrans, 1, upstreamMat, inMatrix, 1, destMat) } if hasFilterRGrad { inMatrixR := c.Layer.inputToMatrix(inputR) destMat := blas64.General{ Rows: len(c.Layer.Filters), Cols: c.Layer.FilterWidth * c.Layer.FilterHeight * c.Layer.InputDepth, Stride: c.Layer.FilterWidth * c.Layer.FilterHeight * c.Layer.InputDepth, Data: filterRGrad, } blas64.Gemm(blas.Trans, blas.NoTrans, 1, upstreamMatR, inMatrix, 1, destMat) blas64.Gemm(blas.Trans, blas.NoTrans, 1, upstreamMat, inMatrixR, 1, destMat) } }
// Mul takes the matrix product of a and b, placing the result in the receiver. // // See the Muler interface for more information. func (m *Dense) Mul(a, b Matrix) { ar, ac := a.Dims() br, bc := b.Dims() if ac != br { panic(ErrShape) } aU, aTrans := untranspose(a) bU, bTrans := untranspose(b) m.reuseAs(ar, bc) var restore func() if m == aU { m, restore = m.isolatedWorkspace(aU) defer restore() } else if m == bU { m, restore = m.isolatedWorkspace(bU) defer restore() } aT := blas.NoTrans if aTrans { aT = blas.Trans } bT := blas.NoTrans if bTrans { bT = blas.Trans } // Some of the cases do not have a transpose option, so create // temporary memory. // C = A^T * B = (B^T * A)^T // C^T = B^T * A. if aU, ok := aU.(RawMatrixer); ok { amat := aU.RawMatrix() if bU, ok := bU.(RawMatrixer); ok { bmat := bU.RawMatrix() blas64.Gemm(aT, bT, 1, amat, bmat, 0, m.mat) return } if bU, ok := bU.(RawSymmetricer); ok { bmat := bU.RawSymmetric() if aTrans { c := getWorkspace(ac, ar, false) blas64.Symm(blas.Left, 1, bmat, amat, 0, c.mat) strictCopy(m, c.T()) putWorkspace(c) return } blas64.Symm(blas.Right, 1, bmat, amat, 0, m.mat) return } if bU, ok := bU.(RawTriangular); ok { // Trmm updates in place, so copy aU first. bmat := bU.RawTriangular() if aTrans { c := getWorkspace(ac, ar, false) var tmp Dense tmp.SetRawMatrix(aU.RawMatrix()) c.Copy(&tmp) bT := blas.Trans if bTrans { bT = blas.NoTrans } blas64.Trmm(blas.Left, bT, 1, bmat, c.mat) strictCopy(m, c.T()) putWorkspace(c) return } m.Copy(a) blas64.Trmm(blas.Right, bT, 1, bmat, m.mat) return } if bU, ok := bU.(*Vector); ok { bvec := bU.RawVector() if bTrans { // {ar,1} x {1,bc}, which is not a vector. // Instead, construct B as a General. bmat := blas64.General{ Rows: bc, Cols: 1, Stride: bvec.Inc, Data: bvec.Data, } blas64.Gemm(aT, bT, 1, amat, bmat, 0, m.mat) return } cvec := blas64.Vector{ Inc: m.mat.Stride, Data: m.mat.Data, } blas64.Gemv(aT, 1, amat, bvec, 0, cvec) return } } if bU, ok := bU.(RawMatrixer); ok { bmat := bU.RawMatrix() if aU, ok := aU.(RawSymmetricer); ok { amat := aU.RawSymmetric() if bTrans { c := getWorkspace(bc, br, false) blas64.Symm(blas.Right, 1, amat, bmat, 0, c.mat) strictCopy(m, c.T()) putWorkspace(c) return } blas64.Symm(blas.Left, 1, amat, bmat, 0, m.mat) return } if aU, ok := aU.(RawTriangular); ok { // Trmm updates in place, so copy bU first. amat := aU.RawTriangular() if bTrans { c := getWorkspace(bc, br, false) var tmp Dense tmp.SetRawMatrix(bU.RawMatrix()) c.Copy(&tmp) aT := blas.Trans if aTrans { aT = blas.NoTrans } blas64.Trmm(blas.Right, aT, 1, amat, c.mat) strictCopy(m, c.T()) putWorkspace(c) return } m.Copy(b) blas64.Trmm(blas.Left, aT, 1, amat, m.mat) return } if aU, ok := aU.(*Vector); ok { avec := aU.RawVector() if aTrans { // {1,ac} x {ac, bc} // Transpose B so that the vector is on the right. cvec := blas64.Vector{ Inc: 1, Data: m.mat.Data, } bT := blas.Trans if bTrans { bT = blas.NoTrans } blas64.Gemv(bT, 1, bmat, avec, 0, cvec) return } // {ar,1} x {1,bc} which is not a vector result. // Instead, construct A as a General. amat := blas64.General{ Rows: ar, Cols: 1, Stride: avec.Inc, Data: avec.Data, } blas64.Gemm(aT, bT, 1, amat, bmat, 0, m.mat) return } } if aU, ok := aU.(Vectorer); ok { if bU, ok := bU.(Vectorer); ok { row := make([]float64, ac) col := make([]float64, br) if aTrans { if bTrans { for r := 0; r < ar; r++ { dataTmp := m.mat.Data[r*m.mat.Stride : r*m.mat.Stride+bc] for c := 0; c < bc; c++ { dataTmp[c] = blas64.Dot(ac, blas64.Vector{Inc: 1, Data: aU.Col(row, r)}, blas64.Vector{Inc: 1, Data: bU.Row(col, c)}, ) } } return } // TODO(jonlawlor): determine if (b*a)' is more efficient for r := 0; r < ar; r++ { dataTmp := m.mat.Data[r*m.mat.Stride : r*m.mat.Stride+bc] for c := 0; c < bc; c++ { dataTmp[c] = blas64.Dot(ac, blas64.Vector{Inc: 1, Data: aU.Col(row, r)}, blas64.Vector{Inc: 1, Data: bU.Col(col, c)}, ) } } return } if bTrans { for r := 0; r < ar; r++ { dataTmp := m.mat.Data[r*m.mat.Stride : r*m.mat.Stride+bc] for c := 0; c < bc; c++ { dataTmp[c] = blas64.Dot(ac, blas64.Vector{Inc: 1, Data: aU.Row(row, r)}, blas64.Vector{Inc: 1, Data: bU.Row(col, c)}, ) } } return } for r := 0; r < ar; r++ { dataTmp := m.mat.Data[r*m.mat.Stride : r*m.mat.Stride+bc] for c := 0; c < bc; c++ { dataTmp[c] = blas64.Dot(ac, blas64.Vector{Inc: 1, Data: aU.Row(row, r)}, blas64.Vector{Inc: 1, Data: bU.Col(col, c)}, ) } } return } } row := make([]float64, ac) for r := 0; r < ar; r++ { for i := range row { row[i] = a.At(r, i) } for c := 0; c < bc; c++ { var v float64 for i, e := range row { v += e * b.At(i, c) } m.mat.Data[r*m.mat.Stride+c] = v } } }
// constructQ constructs the Q matrix from the result of dgeqrf and dgeqr2 func constructQ(kind string, m, n int, a []float64, lda int, tau []float64) blas64.General { k := min(m, n) var sz int switch kind { case "QR": sz = m case "LQ": sz = n } q := blas64.General{ Rows: sz, Cols: sz, Stride: sz, Data: make([]float64, sz*sz), } for i := 0; i < sz; i++ { q.Data[i*sz+i] = 1 } qCopy := blas64.General{ Rows: q.Rows, Cols: q.Cols, Stride: q.Stride, Data: make([]float64, len(q.Data)), } for i := 0; i < k; i++ { h := blas64.General{ Rows: sz, Cols: sz, Stride: sz, Data: make([]float64, sz*sz), } for j := 0; j < sz; j++ { h.Data[j*sz+j] = 1 } vVec := blas64.Vector{ Inc: 1, Data: make([]float64, sz), } for j := 0; j < i; j++ { vVec.Data[j] = 0 } vVec.Data[i] = 1 switch kind { case "QR": for j := i + 1; j < sz; j++ { vVec.Data[j] = a[lda*j+i] } case "LQ": for j := i + 1; j < sz; j++ { vVec.Data[j] = a[i*lda+j] } } blas64.Ger(-tau[i], vVec, vVec, h) copy(qCopy.Data, q.Data) // Mulitply q by the new h switch kind { case "QR": blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, qCopy, h, 0, q) case "LQ": blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, h, qCopy, 0, q) } } return q }
func testDlaqr5(t *testing.T, impl Dlaqr5er, n, extra, kacc22 int, rnd *rand.Rand) { wantt := true wantz := true nshfts := 2 * n sr := make([]float64, nshfts) si := make([]float64, nshfts) for i := 0; i < n; i++ { re := rnd.NormFloat64() im := rnd.NormFloat64() sr[2*i], sr[2*i+1] = re, re si[2*i], si[2*i+1] = im, -im } ktop := rnd.Intn(n) kbot := rnd.Intn(n) if kbot < ktop { ktop, kbot = kbot, ktop } v := randomGeneral(nshfts/2, 3, 3+extra, rnd) u := randomGeneral(3*nshfts-3, 3*nshfts-3, 3*nshfts-3+extra, rnd) nh := n wh := randomGeneral(3*nshfts-3, n, n+extra, rnd) nv := n wv := randomGeneral(n, 3*nshfts-3, 3*nshfts-3+extra, rnd) h := randomHessenberg(n, n+extra, rnd) if ktop > 0 { h.Data[ktop*h.Stride+ktop-1] = 0 } if kbot < n-1 { h.Data[(kbot+1)*h.Stride+kbot] = 0 } hCopy := h hCopy.Data = make([]float64, len(h.Data)) copy(hCopy.Data, h.Data) z := eye(n, n+extra) impl.Dlaqr5(wantt, wantz, kacc22, n, ktop, kbot, nshfts, sr, si, h.Data, h.Stride, 0, n-1, z.Data, z.Stride, v.Data, v.Stride, u.Data, u.Stride, nv, wv.Data, wv.Stride, nh, wh.Data, wh.Stride) prefix := fmt.Sprintf("Case n=%v, extra=%v, kacc22=%v", n, extra, kacc22) if !generalOutsideAllNaN(h) { t.Errorf("%v: out-of-range write to H\n%v", prefix, h.Data) } if !generalOutsideAllNaN(z) { t.Errorf("%v: out-of-range write to Z\n%v", prefix, z.Data) } if !generalOutsideAllNaN(u) { t.Errorf("%v: out-of-range write to U\n%v", prefix, u.Data) } if !generalOutsideAllNaN(v) { t.Errorf("%v: out-of-range write to V\n%v", prefix, v.Data) } if !generalOutsideAllNaN(wh) { t.Errorf("%v: out-of-range write to WH\n%v", prefix, wh.Data) } if !generalOutsideAllNaN(wv) { t.Errorf("%v: out-of-range write to WV\n%v", prefix, wv.Data) } for i := 0; i < n; i++ { for j := 0; j < i-1; j++ { if h.Data[i*h.Stride+j] != 0 { t.Errorf("%v: H is not Hessenberg, H[%v,%v]!=0", prefix, i, j) } } } if !isOrthonormal(z) { t.Errorf("%v: Z is not orthogonal", prefix) } // Construct Z^T * HOrig * Z and check that it is equal to H from Dlaqr5. hz := blas64.General{ Rows: n, Cols: n, Stride: n, Data: make([]float64, n*n), } blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, hCopy, z, 0, hz) zhz := blas64.General{ Rows: n, Cols: n, Stride: n, Data: make([]float64, n*n), } blas64.Gemm(blas.Trans, blas.NoTrans, 1, z, hz, 0, zhz) for i := 0; i < n; i++ { for j := 0; j < n; j++ { diff := zhz.Data[i*zhz.Stride+j] - h.Data[i*h.Stride+j] if math.Abs(diff) > 1e-13 { t.Errorf("%v: Z^T*HOrig*Z and H are not equal, diff at [%v,%v]=%v", prefix, i, j, diff) } } } }
// svdCheck checks that the singular value decomposition correctly multiplies back // to the original matrix. func svdCheck(t *testing.T, thin bool, errStr string, m, n int, s, a, u []float64, ldu int, vt []float64, ldvt int, aCopy []float64, lda int) { sigma := blas64.General{ Rows: m, Cols: n, Stride: n, Data: make([]float64, m*n), } for i := 0; i < min(m, n); i++ { sigma.Data[i*sigma.Stride+i] = s[i] } uMat := blas64.General{ Rows: m, Cols: m, Stride: ldu, Data: u, } vTMat := blas64.General{ Rows: n, Cols: n, Stride: ldvt, Data: vt, } if thin { sigma.Rows = min(m, n) sigma.Cols = min(m, n) uMat.Cols = min(m, n) vTMat.Rows = min(m, n) } tmp := blas64.General{ Rows: m, Cols: n, Stride: n, Data: make([]float64, m*n), } ans := blas64.General{ Rows: m, Cols: n, Stride: lda, Data: make([]float64, m*lda), } copy(ans.Data, a) blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, uMat, sigma, 0, tmp) blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, tmp, vTMat, 0, ans) if !floats.EqualApprox(ans.Data, aCopy, 1e-8) { t.Errorf("Decomposition mismatch. Trim = %v, %s", thin, errStr) } if !thin { // Check that U and V are orthogonal. for i := 0; i < uMat.Rows; i++ { for j := i + 1; j < uMat.Rows; j++ { dot := blas64.Dot(uMat.Cols, blas64.Vector{Inc: 1, Data: uMat.Data[i*uMat.Stride:]}, blas64.Vector{Inc: 1, Data: uMat.Data[j*uMat.Stride:]}, ) if dot > 1e-8 { t.Errorf("U not orthogonal %s", errStr) } } } for i := 0; i < vTMat.Rows; i++ { for j := i + 1; j < vTMat.Rows; j++ { dot := blas64.Dot(vTMat.Cols, blas64.Vector{Inc: 1, Data: vTMat.Data[i*vTMat.Stride:]}, blas64.Vector{Inc: 1, Data: vTMat.Data[j*vTMat.Stride:]}, ) if dot > 1e-8 { t.Errorf("V not orthogonal %s", errStr) } } } } }
func DlarftTest(t *testing.T, impl Dlarfter) { rnd := rand.New(rand.NewSource(1)) for _, store := range []lapack.StoreV{lapack.ColumnWise, lapack.RowWise} { for _, direct := range []lapack.Direct{lapack.Forward, lapack.Backward} { for _, test := range []struct { m, n, ldv, ldt int }{ {6, 6, 0, 0}, {8, 6, 0, 0}, {6, 8, 0, 0}, {6, 6, 10, 15}, {8, 6, 10, 15}, {6, 8, 10, 15}, {6, 6, 15, 10}, {8, 6, 15, 10}, {6, 8, 15, 10}, } { // Generate a matrix m := test.m n := test.n lda := n if lda == 0 { lda = n } a := make([]float64, m*lda) for i := 0; i < m; i++ { for j := 0; j < lda; j++ { a[i*lda+j] = rnd.Float64() } } // Use dgeqr2 to find the v vectors tau := make([]float64, n) work := make([]float64, n) impl.Dgeqr2(m, n, a, lda, tau, work) // Construct H using these answers vMatTmp := extractVMat(m, n, a, lda, lapack.Forward, lapack.ColumnWise) vMat := constructVMat(vMatTmp, store, direct) v := vMat.Data ldv := vMat.Stride h := constructH(tau, vMat, store, direct) k := min(m, n) ldt := test.ldt if ldt == 0 { ldt = k } // Find T from the actual function tm := make([]float64, k*ldt) for i := range tm { tm[i] = 100 + rnd.Float64() } // The v data has been put into a. impl.Dlarft(direct, store, m, k, v, ldv, tau, tm, ldt) tData := make([]float64, len(tm)) copy(tData, tm) if direct == lapack.Forward { // Zero out the lower traingular portion. for i := 0; i < k; i++ { for j := 0; j < i; j++ { tData[i*ldt+j] = 0 } } } else { // Zero out the upper traingular portion. for i := 0; i < k; i++ { for j := i + 1; j < k; j++ { tData[i*ldt+j] = 0 } } } T := blas64.General{ Rows: k, Cols: k, Stride: ldt, Data: tData, } vMatT := blas64.General{ Rows: vMat.Cols, Cols: vMat.Rows, Stride: vMat.Rows, Data: make([]float64, vMat.Cols*vMat.Rows), } for i := 0; i < vMat.Rows; i++ { for j := 0; j < vMat.Cols; j++ { vMatT.Data[j*vMatT.Stride+i] = vMat.Data[i*vMat.Stride+j] } } var comp blas64.General if store == lapack.ColumnWise { // H = I - V * T * V^T tmp := blas64.General{ Rows: T.Rows, Cols: vMatT.Cols, Stride: vMatT.Cols, Data: make([]float64, T.Rows*vMatT.Cols), } // T * V^T blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, T, vMatT, 0, tmp) comp = blas64.General{ Rows: vMat.Rows, Cols: tmp.Cols, Stride: tmp.Cols, Data: make([]float64, vMat.Rows*tmp.Cols), } // V * (T * V^T) blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, vMat, tmp, 0, comp) } else { // H = I - V^T * T * V tmp := blas64.General{ Rows: T.Rows, Cols: vMat.Cols, Stride: vMat.Cols, Data: make([]float64, T.Rows*vMat.Cols), } // T * V blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, T, vMat, 0, tmp) comp = blas64.General{ Rows: vMatT.Rows, Cols: tmp.Cols, Stride: tmp.Cols, Data: make([]float64, vMatT.Rows*tmp.Cols), } // V^T * (T * V) blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, vMatT, tmp, 0, comp) } // I - V^T * T * V for i := 0; i < comp.Rows; i++ { for j := 0; j < comp.Cols; j++ { comp.Data[i*m+j] *= -1 if i == j { comp.Data[i*m+j] += 1 } } } if !floats.EqualApprox(comp.Data, h.Data, 1e-14) { t.Errorf("T does not construct proper H. Store = %v, Direct = %v.\nWant %v\ngot %v.", string(store), string(direct), h.Data, comp.Data) } } } } }
func DlatrdTest(t *testing.T, impl Dlatrder) { rnd := rand.New(rand.NewSource(1)) for _, uplo := range []blas.Uplo{blas.Upper, blas.Lower} { for _, test := range []struct { n, nb, lda, ldw int }{ {5, 2, 0, 0}, {5, 5, 0, 0}, {5, 3, 10, 11}, {5, 5, 10, 11}, } { n := test.n nb := test.nb lda := test.lda if lda == 0 { lda = n } ldw := test.ldw if ldw == 0 { ldw = nb } a := make([]float64, n*lda) for i := range a { a[i] = rnd.NormFloat64() } e := make([]float64, n-1) for i := range e { e[i] = math.NaN() } tau := make([]float64, n-1) for i := range tau { tau[i] = math.NaN() } w := make([]float64, n*ldw) for i := range w { w[i] = math.NaN() } aCopy := make([]float64, len(a)) copy(aCopy, a) impl.Dlatrd(uplo, n, nb, a, lda, e, tau, w, ldw) // Construct Q. ldq := n q := blas64.General{ Rows: n, Cols: n, Stride: ldq, Data: make([]float64, n*ldq), } for i := 0; i < n; i++ { q.Data[i*ldq+i] = 1 } if uplo == blas.Upper { for i := n - 1; i >= n-nb; i-- { if i == 0 { continue } h := blas64.General{ Rows: n, Cols: n, Stride: n, Data: make([]float64, n*n), } for j := 0; j < n; j++ { h.Data[j*n+j] = 1 } v := blas64.Vector{ Inc: 1, Data: make([]float64, n), } for j := 0; j < i-1; j++ { v.Data[j] = a[j*lda+i] } v.Data[i-1] = 1 blas64.Ger(-tau[i-1], v, v, h) qTmp := blas64.General{ Rows: n, Cols: n, Stride: n, Data: make([]float64, n*n), } copy(qTmp.Data, q.Data) blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, qTmp, h, 0, q) } } else { for i := 0; i < nb; i++ { if i == n-1 { continue } h := blas64.General{ Rows: n, Cols: n, Stride: n, Data: make([]float64, n*n), } for j := 0; j < n; j++ { h.Data[j*n+j] = 1 } v := blas64.Vector{ Inc: 1, Data: make([]float64, n), } v.Data[i+1] = 1 for j := i + 2; j < n; j++ { v.Data[j] = a[j*lda+i] } blas64.Ger(-tau[i], v, v, h) qTmp := blas64.General{ Rows: n, Cols: n, Stride: n, Data: make([]float64, n*n), } copy(qTmp.Data, q.Data) blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, qTmp, h, 0, q) } } errStr := fmt.Sprintf("isUpper = %v, n = %v, nb = %v", uplo == blas.Upper, n, nb) if !isOrthonormal(q) { t.Errorf("Q not orthonormal. %s", errStr) } aGen := genFromSym(blas64.Symmetric{N: n, Stride: lda, Uplo: uplo, Data: aCopy}) if !dlatrdCheckDecomposition(t, uplo, n, nb, e, tau, a, lda, aGen, q) { t.Errorf("Decomposition mismatch. %s", errStr) } } } }
// checkBidiagonal checks the bidiagonal decomposition from dlabrd and dgebd2. // The input to this function is the answer returned from the routines, stored // in a, d, e, tauP, and tauQ. The data of original A matrix (before // decomposition) is input in aCopy. // // checkBidiagonal constructs the V and U matrices, and from them constructs Q // and P. Using these constructions, it checks that Q^T * A * P and checks that // the result is bidiagonal. func checkBidiagonal(t *testing.T, m, n, nb int, a []float64, lda int, d, e, tauP, tauQ, aCopy []float64) { // Check the answer. // Construct V and U. qMat := constructQPBidiagonal(lapack.ApplyQ, m, n, nb, a, lda, tauQ) pMat := constructQPBidiagonal(lapack.ApplyP, m, n, nb, a, lda, tauP) // Compute Q^T * A * P aMat := blas64.General{ Rows: m, Cols: n, Stride: lda, Data: make([]float64, len(aCopy)), } copy(aMat.Data, aCopy) tmp1 := blas64.General{ Rows: m, Cols: n, Stride: n, Data: make([]float64, m*n), } blas64.Gemm(blas.Trans, blas.NoTrans, 1, qMat, aMat, 0, tmp1) tmp2 := blas64.General{ Rows: m, Cols: n, Stride: n, Data: make([]float64, m*n), } blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, tmp1, pMat, 0, tmp2) // Check that the first nb rows and cols of tm2 are upper bidiagonal // if m >= n, and lower bidiagonal otherwise. correctDiag := true matchD := true matchE := true for i := 0; i < m; i++ { for j := 0; j < n; j++ { if i >= nb && j >= nb { continue } v := tmp2.Data[i*tmp2.Stride+j] if i == j { if math.Abs(d[i]-v) > 1e-12 { matchD = false } continue } if m >= n && i == j-1 { if math.Abs(e[j-1]-v) > 1e-12 { matchE = false } continue } if m < n && i-1 == j { if math.Abs(e[i-1]-v) > 1e-12 { matchE = false } continue } if math.Abs(v) > 1e-12 { correctDiag = false } } } if !correctDiag { t.Errorf("Updated A not bi-diagonal") } if !matchD { fmt.Println("d = ", d) t.Errorf("D Mismatch") } if !matchE { t.Errorf("E mismatch") } }
func testDlasy2(t *testing.T, impl Dlasy2er, tranl, tranr bool, isgn, n1, n2, extra int, rnd *rand.Rand) { const tol = 1e-11 tl := randomGeneral(n1, n1, n1+extra, rnd) tr := randomGeneral(n2, n2, n2+extra, rnd) b := randomGeneral(n1, n2, n2+extra, rnd) x := randomGeneral(n1, n2, n2+extra, rnd) scale, xnorm, ok := impl.Dlasy2(tranl, tranr, isgn, n1, n2, tl.Data, tl.Stride, tr.Data, tr.Stride, b.Data, b.Stride, x.Data, x.Stride) if scale > 1 { t.Errorf("invalid value of scale, want <= 1, got %v", scale) } if n1 == 0 || n2 == 0 { return } prefix := fmt.Sprintf("Case n1=%v, n2=%v, isgn=%v", n1, n2, isgn) // Check any invalid modifications of x. if !generalOutsideAllNaN(x) { t.Errorf("%v: out-of-range write to x\n%v", prefix, x.Data) } var xnormWant float64 for i := 0; i < n1; i++ { var rowsum float64 for j := 0; j < n2; j++ { rowsum += math.Abs(x.Data[i*x.Stride+j]) } if rowsum > xnormWant { xnormWant = rowsum } } if xnormWant != xnorm { t.Errorf("%v: unexpected xnorm: want %v, got %v", prefix, xnormWant, xnorm) } // Multiply b by scale to get the wanted right-hand side. for i := 0; i < n1; i++ { for j := 0; j < n2; j++ { b.Data[i*b.Stride+j] *= scale } } // Compute the wanted left-hand side. lhsWant := randomGeneral(n1, n2, n2, rnd) if tranl { blas64.Gemm(blas.Trans, blas.NoTrans, 1, tl, x, 0, lhsWant) } else { blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, tl, x, 0, lhsWant) } if tranr { blas64.Gemm(blas.NoTrans, blas.Trans, float64(isgn), x, tr, 1, lhsWant) } else { blas64.Gemm(blas.NoTrans, blas.NoTrans, float64(isgn), x, tr, 1, lhsWant) } // Compare them. for i := 0; i < n1; i++ { for j := 0; j < n2; j++ { diff := lhsWant.Data[i*lhsWant.Stride+j] - b.Data[i*b.Stride+j] if math.Abs(diff) > tol && ok { t.Errorf("%v: unexpected result, diff[%v,%v]=%v", prefix, i, j, diff) } } } }
func testDgehd2(t *testing.T, impl Dgehd2er, n, extra int, rnd *rand.Rand) { ilo := rnd.Intn(n) ihi := rnd.Intn(n) if ilo > ihi { ilo, ihi = ihi, ilo } tau := nanSlice(n - 1) work := nanSlice(n) a := randomGeneral(n, n, n+extra, rnd) // NaN out elements under the diagonal except // for the [ilo:ihi,ilo:ihi] block. for i := 1; i <= ihi; i++ { for j := 0; j < min(ilo, i); j++ { a.Data[i*a.Stride+j] = math.NaN() } } for i := ihi + 1; i < n; i++ { for j := 0; j < i; j++ { a.Data[i*a.Stride+j] = math.NaN() } } aCopy := a aCopy.Data = make([]float64, len(a.Data)) copy(aCopy.Data, a.Data) impl.Dgehd2(n, ilo, ihi, a.Data, a.Stride, tau, work) prefix := fmt.Sprintf("Case n=%v, ilo=%v, ihi=%v, extra=%v", n, ilo, ihi, extra) // Check any invalid modifications of a. if !generalOutsideAllNaN(a) { t.Errorf("%v: out-of-range write to A\n%v", prefix, a.Data) } for i := ilo; i <= ihi; i++ { for j := 0; j < min(ilo, i); j++ { if !math.IsNaN(a.Data[i*a.Stride+j]) { t.Errorf("%v: expected NaN at A[%v,%v]", prefix, i, j) } } } for i := ihi + 1; i < n; i++ { for j := 0; j < i; j++ { if !math.IsNaN(a.Data[i*a.Stride+j]) { t.Errorf("%v: expected NaN at A[%v,%v]", prefix, i, j) } } } for i := 0; i <= ilo; i++ { for j := i; j < ilo+1; j++ { if a.Data[i*a.Stride+j] != aCopy.Data[i*aCopy.Stride+j] { t.Errorf("%v: unexpected modification at A[%v,%v]", prefix, i, j) } } for j := ihi + 1; j < n; j++ { if a.Data[i*a.Stride+j] != aCopy.Data[i*aCopy.Stride+j] { t.Errorf("%v: unexpected modification at A[%v,%v]", prefix, i, j) } } } for i := ihi + 1; i < n; i++ { for j := i; j < n; j++ { if a.Data[i*a.Stride+j] != aCopy.Data[i*aCopy.Stride+j] { t.Errorf("%v: unexpected modification at A[%v,%v]", prefix, i, j) } } } // Check that tau has been assigned properly. for i, v := range tau { if i < ilo || i >= ihi { if !math.IsNaN(v) { t.Errorf("%v: expected NaN at tau[%v]", prefix, i) } } else { if math.IsNaN(v) { t.Errorf("%v: unexpected NaN at tau[%v]", prefix, i) } } } // Extract Q and check that it is orthogonal. q := blas64.General{ Rows: n, Cols: n, Stride: n, Data: make([]float64, n*n), } for i := 0; i < q.Rows; i++ { q.Data[i*q.Stride+i] = 1 } qCopy := q qCopy.Data = make([]float64, len(q.Data)) for j := ilo; j < ihi; j++ { h := blas64.General{ Rows: n, Cols: n, Stride: n, Data: make([]float64, n*n), } for i := 0; i < h.Rows; i++ { h.Data[i*h.Stride+i] = 1 } v := blas64.Vector{ Inc: 1, Data: make([]float64, n), } v.Data[j+1] = 1 for i := j + 2; i < ihi+1; i++ { v.Data[i] = a.Data[i*a.Stride+j] } blas64.Ger(-tau[j], v, v, h) copy(qCopy.Data, q.Data) blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, qCopy, h, 0, q) } if !isOrthonormal(q) { t.Errorf("%v: Q is not orthogonal\nQ=%v", prefix, q) } // Overwrite NaN elements of aCopy with zeros // (we will multiply with it below). for i := 1; i <= ihi; i++ { for j := 0; j < min(ilo, i); j++ { aCopy.Data[i*aCopy.Stride+j] = 0 } } for i := ihi + 1; i < n; i++ { for j := 0; j < i; j++ { aCopy.Data[i*aCopy.Stride+j] = 0 } } // Construct Q^T * AOrig * Q and check that it is // equal to A from Dgehd2. aq := blas64.General{ Rows: n, Cols: n, Stride: n, Data: make([]float64, n*n), } blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, aCopy, q, 0, aq) qaq := blas64.General{ Rows: n, Cols: n, Stride: n, Data: make([]float64, n*n), } blas64.Gemm(blas.Trans, blas.NoTrans, 1, q, aq, 0, qaq) for i := ilo; i <= ihi; i++ { for j := ilo; j <= ihi; j++ { qaqij := qaq.Data[i*qaq.Stride+j] if j < i-1 { if math.Abs(qaqij) > 1e-14 { t.Errorf("%v: Q^T*A*Q is not upper Hessenberg, [%v,%v]=%v", prefix, i, j, qaqij) } continue } diff := qaqij - a.Data[i*a.Stride+j] if math.Abs(diff) > 1e-14 { t.Errorf("%v: Q^T*AOrig*Q and A are not equal, diff at [%v,%v]=%v", prefix, i, j, diff) } } } }
// TODO: Need to add tests where one is overwritten. func TestMul(t *testing.T) { for _, test := range []struct { ar int ac int br int bc int Panics bool }{ { ar: 5, ac: 5, br: 5, bc: 5, Panics: false, }, { ar: 10, ac: 5, br: 5, bc: 3, Panics: false, }, { ar: 10, ac: 5, br: 5, bc: 8, Panics: false, }, { ar: 8, ac: 10, br: 10, bc: 3, Panics: false, }, { ar: 8, ac: 3, br: 3, bc: 10, Panics: false, }, { ar: 5, ac: 8, br: 8, bc: 10, Panics: false, }, { ar: 5, ac: 12, br: 12, bc: 8, Panics: false, }, { ar: 5, ac: 7, br: 8, bc: 10, Panics: true, }, } { ar := test.ar ac := test.ac br := test.br bc := test.bc // Generate random matrices avec := make([]float64, ar*ac) randomSlice(avec) a := NewDense(ar, ac, avec) bvec := make([]float64, br*bc) randomSlice(bvec) b := NewDense(br, bc, bvec) // Check that it panics if it is supposed to if test.Panics { c := NewDense(0, 0, nil) fn := func() { c.Mul(a, b) } pan, _ := panics(fn) if !pan { t.Errorf("Mul did not panic with dimension mismatch") } continue } cvec := make([]float64, ar*bc) // Get correct matrix multiply answer from blas64.Gemm blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, a.mat, b.mat, 0, blas64.General{Rows: ar, Cols: bc, Stride: bc, Data: cvec}, ) avecCopy := append([]float64{}, avec...) bvecCopy := append([]float64{}, bvec...) cvecCopy := append([]float64{}, cvec...) acomp := matComp{r: ar, c: ac, data: avecCopy} bcomp := matComp{r: br, c: bc, data: bvecCopy} ccomp := matComp{r: ar, c: bc, data: cvecCopy} // Do normal multiply with empty dense d := NewDense(0, 0, nil) testMul(t, a, b, d, acomp, bcomp, ccomp, false, "zero receiver") // Normal multiply with existing receiver c := NewDense(ar, bc, cvec) randomSlice(cvec) testMul(t, a, b, c, acomp, bcomp, ccomp, false, "existing receiver") // Test with vectorers avm := (*basicVectorer)(a) bvm := (*basicVectorer)(b) d.Reset() testMul(t, avm, b, d, acomp, bcomp, ccomp, true, "a vectoror with zero receiver") d.Reset() testMul(t, a, bvm, d, acomp, bcomp, ccomp, true, "b vectoror with zero receiver") d.Reset() testMul(t, avm, bvm, d, acomp, bcomp, ccomp, true, "both vectoror with zero receiver") randomSlice(cvec) testMul(t, avm, b, c, acomp, bcomp, ccomp, true, "a vectoror with existing receiver") randomSlice(cvec) testMul(t, a, bvm, c, acomp, bcomp, ccomp, true, "b vectoror with existing receiver") randomSlice(cvec) testMul(t, avm, bvm, c, acomp, bcomp, ccomp, true, "both vectoror with existing receiver") // Cast a as a basic matrix am := (*basicMatrix)(a) bm := (*basicMatrix)(b) d.Reset() testMul(t, am, b, d, acomp, bcomp, ccomp, true, "a is basic, receiver is zero") d.Reset() testMul(t, a, bm, d, acomp, bcomp, ccomp, true, "b is basic, receiver is zero") d.Reset() testMul(t, am, bm, d, acomp, bcomp, ccomp, true, "both basic, receiver is zero") randomSlice(cvec) testMul(t, am, b, d, acomp, bcomp, ccomp, true, "a is basic, receiver is full") randomSlice(cvec) testMul(t, a, bm, d, acomp, bcomp, ccomp, true, "b is basic, receiver is full") randomSlice(cvec) testMul(t, am, bm, d, acomp, bcomp, ccomp, true, "both basic, receiver is full") } }
func DorgtrTest(t *testing.T, impl Dorgtrer) { rnd := rand.New(rand.NewSource(1)) for _, uplo := range []blas.Uplo{blas.Upper, blas.Lower} { for _, test := range []struct { n, lda int }{ {6, 0}, {33, 0}, {100, 0}, {6, 10}, {33, 50}, {100, 120}, } { n := test.n lda := test.lda if lda == 0 { lda = n } a := make([]float64, n*lda) for i := range a { a[i] = rnd.NormFloat64() } aCopy := make([]float64, len(a)) copy(aCopy, a) d := make([]float64, n) e := make([]float64, n-1) tau := make([]float64, n-1) work := make([]float64, 1) impl.Dsytrd(uplo, n, a, lda, d, e, tau, work, -1) work = make([]float64, int(work[0])) impl.Dsytrd(uplo, n, a, lda, d, e, tau, work, len(work)) impl.Dorgtr(uplo, n, a, lda, tau, work, -1) work = make([]float64, int(work[0])) for i := range work { work[i] = math.NaN() } impl.Dorgtr(uplo, n, a, lda, tau, work, len(work)) q := blas64.General{ Rows: n, Cols: n, Stride: lda, Data: a, } tri := blas64.General{ Rows: n, Cols: n, Stride: n, Data: make([]float64, n*n), } for i := 0; i < n; i++ { tri.Data[i*tri.Stride+i] = d[i] if i != n-1 { tri.Data[i*tri.Stride+i+1] = e[i] tri.Data[(i+1)*tri.Stride+i] = e[i] } } aMat := blas64.General{ Rows: n, Cols: n, Stride: n, Data: make([]float64, n*n), } if uplo == blas.Upper { for i := 0; i < n; i++ { for j := i; j < n; j++ { v := aCopy[i*lda+j] aMat.Data[i*aMat.Stride+j] = v aMat.Data[j*aMat.Stride+i] = v } } } else { for i := 0; i < n; i++ { for j := 0; j <= i; j++ { v := aCopy[i*lda+j] aMat.Data[i*aMat.Stride+j] = v aMat.Data[j*aMat.Stride+i] = v } } } tmp := blas64.General{Rows: n, Cols: n, Stride: n, Data: make([]float64, n*n)} blas64.Gemm(blas.NoTrans, blas.NoTrans, 1, aMat, q, 0, tmp) ans := blas64.General{Rows: n, Cols: n, Stride: n, Data: make([]float64, n*n)} blas64.Gemm(blas.Trans, blas.NoTrans, 1, q, tmp, 0, ans) if !floats.EqualApprox(ans.Data, tri.Data, 1e-8) { t.Errorf("Recombination mismatch. n = %v, isUpper = %v", n, uplo == blas.Upper) } } } }