// Ddot computes the dot product of the two vectors // \sum_i x[i]*y[i] func (Implementation) Ddot(n int, x []float64, incX int, y []float64, incY int) float64 { if n < 0 { panic(negativeN) } if incX == 0 { panic(zeroIncX) } if incY == 0 { panic(zeroIncY) } if incX == 1 && incY == 1 { if len(x) < n { panic(badLenX) } if len(y) < n { panic(badLenY) } return asm.DdotUnitary(x[:n], y) } var ix, iy int if incX < 0 { ix = (-n + 1) * incX } if incY < 0 { iy = (-n + 1) * incY } if ix >= len(x) || ix+(n-1)*incX >= len(x) { panic(badLenX) } if iy >= len(y) || iy+(n-1)*incY >= len(y) { panic(badLenY) } return asm.DdotInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy)) }
// Cholesky calculates the Cholesky decomposition of the matrix A and returns // whether the matrix is positive definite. The returned matrix is either a // lower triangular matrix such that A = L * L^T or an upper triangular matrix // such that A = U^T * U depending on the upper parameter. func (t *Triangular) Cholesky(a *SymDense, upper bool) (ok bool) { n := a.Symmetric() if t.isZero() { t.mat = blas64.Triangular{ N: n, Stride: n, Diag: blas.NonUnit, Data: use(t.mat.Data, n*n), } } else if n != t.mat.N { panic(ErrShape) } mat := t.mat.Data stride := t.mat.Stride if upper { t.mat.Uplo = blas.Upper for j := 0; j < n; j++ { var d float64 for k := 0; k < j; k++ { s := asm.DdotInc( mat, mat, uintptr(k), uintptr(stride), uintptr(stride), uintptr(k), uintptr(j), ) s = (a.at(j, k) - s) / t.at(k, k) t.set(k, j, s) d += s * s } d = a.at(j, j) - d if d <= 0 { t.Reset() return false } t.set(j, j, math.Sqrt(math.Max(d, 0))) } } else { t.mat.Uplo = blas.Lower for j := 0; j < n; j++ { var d float64 for k := 0; k < j; k++ { s := asm.DdotUnitary(mat[k*stride:k*stride+(n-k)], mat[j*stride:j*stride+(n-k)]) s = (a.at(j, k) - s) / t.at(k, k) t.set(j, k, s) d += s * s } d = a.at(j, j) - d if d <= 0 { t.Reset() return false } t.set(j, j, math.Sqrt(math.Max(d, 0))) } } return true }
// Inner computes the generalized inner product // x^T A y // between vectors x and y with matrix A. This is only a true inner product if // A is symmetric positive definite, though the operation works for any matrix A. // // Inner panics if len(x) != m or len(y) != n when A is an m x n matrix. func Inner(x []float64, A Matrix, y []float64) float64 { m, n := A.Dims() if len(x) != m { panic(ErrShape) } if len(y) != n { panic(ErrShape) } if m == 0 || n == 0 { return 0 } var sum float64 switch b := A.(type) { case RawSymmetricer: bmat := b.RawSymmetric() for i, xi := range x { if xi != 0 { sum += xi * asm.DdotUnitary(bmat.Data[i*bmat.Stride+i:i*bmat.Stride+n], y[i:]) } yi := y[i] if i != n-1 && yi != 0 { sum += yi * asm.DdotUnitary(bmat.Data[i*bmat.Stride+i+1:i*bmat.Stride+n], x[i+1:]) } } case RawMatrixer: bmat := b.RawMatrix() for i, xi := range x { if xi != 0 { sum += xi * asm.DdotUnitary(bmat.Data[i*bmat.Stride:i*bmat.Stride+n], y) } } default: for i, xi := range x { for j, yj := range y { sum += xi * A.At(i, j) * yj } } } return sum }
// dgemmSerial where neither a is not transposed and b is func dgemmSerialNotTrans(m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) { // This style is used instead of the literal [i*stride +j]) is used because // approximately 5 times faster as of go 1.3. for i := 0; i < m; i++ { atmp := a[i*lda : i*lda+k] ctmp := c[i*ldc : i*ldc+n] for j := 0; j < n; j++ { ctmp[j] += alpha * asm.DdotUnitary(atmp, b[j*ldb:j*ldb+k]) } } }
// dgemmSerial where neither a is not transposed and b is func dgemmSerialNotTrans(a, b, c general64, alpha float64) { if debug { if a.cols != b.cols { panic("inner dimension mismatch") } if a.rows != c.rows { panic("outer dimension mismatch") } if b.rows != c.cols { panic("outer dimension mismatch") } } // This style is used instead of the literal [i*stride +j]) is used because // approximately 5 times faster as of go 1.3. for i := 0; i < a.rows; i++ { atmp := a.data[i*a.stride : i*a.stride+a.cols] ctmp := c.data[i*c.stride : i*c.stride+c.cols] for j := 0; j < b.rows; j++ { ctmp[j] += alpha * asm.DdotUnitary(atmp, b.data[j*b.stride:j*b.stride+b.cols]) } } }
// Dgemv computes // y = alpha * a * x + beta * y if tA = blas.NoTrans // y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans // where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. func (Implementation) Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { panic(badTranspose) } if m < 0 { panic(mLT0) } if n < 0 { panic(nLT0) } if lda < max(1, n) { panic(badLdA) } if incX == 0 { panic(zeroIncX) } if incY == 0 { panic(zeroIncY) } // Quick return if possible if m == 0 || n == 0 || (alpha == 0 && beta == 1) { return } // Set up indexes lenX := m lenY := n if tA == blas.NoTrans { lenX = n lenY = m } var kx, ky int if incX > 0 { kx = 0 } else { kx = -(lenX - 1) * incX } if incY > 0 { ky = 0 } else { ky = -(lenY - 1) * incY } // First form y := beta * y if incY > 0 { Implementation{}.Dscal(lenY, beta, y, incY) } else { Implementation{}.Dscal(lenY, beta, y, -incY) } if alpha == 0 { return } // Form y := alpha * A * x + y if tA == blas.NoTrans { if incX == 1 { for i := 0; i < m; i++ { y[i] += alpha * asm.DdotUnitary(a[lda*i:lda*i+n], x) } return } iy := ky for i := 0; i < m; i++ { y[iy] += alpha * asm.DdotInc(x, a[lda*i:lda*i+n], uintptr(n), uintptr(incX), 1, uintptr(kx), 0) iy += incY } return } // Cases where a is not transposed. if incX == 1 { for i := 0; i < m; i++ { tmp := alpha * x[i] if tmp != 0 { asm.DaxpyUnitary(tmp, a[lda*i:lda*i+n], y, y) } } return } ix := kx for i := 0; i < m; i++ { tmp := alpha * x[ix] if tmp != 0 { asm.DaxpyInc(tmp, a[lda*i:lda*i+n], y, uintptr(n), 1, uintptr(incY), 0, uintptr(ky)) } ix += incX } }
// Dtrmm performs // B = alpha * A * B if tA == blas.NoTrans and side == blas.Left // B = alpha * A^T * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Left // B = alpha * B * A if tA == blas.NoTrans and side == blas.Right // B = alpha * B * A^T if tA == blas.Trans or blas.ConjTrans, and side == blas.Right // where A is an n×n triangular matrix, and B is an m×n matrix. func (Implementation) Dtrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) { if s != blas.Left && s != blas.Right { panic(badSide) } if ul != blas.Lower && ul != blas.Upper { panic(badUplo) } if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { panic(badTranspose) } if d != blas.NonUnit && d != blas.Unit { panic(badDiag) } if m < 0 { panic(mLT0) } if n < 0 { panic(nLT0) } var k int if s == blas.Left { k = m } else { k = n } if lda*(k-1)+k > len(a) || lda < max(1, k) { panic(badLdA) } if ldb*(m-1)+n > len(b) || ldb < max(1, n) { panic(badLdB) } if alpha == 0 { for i := 0; i < m; i++ { btmp := b[i*ldb : i*ldb+n] for j := range btmp { btmp[j] = 0 } } return } nonUnit := d == blas.NonUnit if s == blas.Left { if tA == blas.NoTrans { if ul == blas.Upper { for i := 0; i < m; i++ { tmp := alpha if nonUnit { tmp *= a[i*lda+i] } btmp := b[i*ldb : i*ldb+n] for j := range btmp { btmp[j] *= tmp } for ka, va := range a[i*lda+i+1 : i*lda+m] { k := ka + i + 1 tmp := alpha * va if tmp != 0 { asm.DaxpyUnitary(tmp, b[k*ldb:k*ldb+n], btmp, btmp) } } } return } for i := m - 1; i >= 0; i-- { tmp := alpha if nonUnit { tmp *= a[i*lda+i] } btmp := b[i*ldb : i*ldb+n] for j := range btmp { btmp[j] *= tmp } for k, va := range a[i*lda : i*lda+i] { tmp := alpha * va if tmp != 0 { asm.DaxpyUnitary(tmp, b[k*ldb:k*ldb+n], btmp, btmp) } } } return } // Cases where a is transposed. if ul == blas.Upper { for k := m - 1; k >= 0; k-- { btmpk := b[k*ldb : k*ldb+n] for ia, va := range a[k*lda+k+1 : k*lda+m] { i := ia + k + 1 btmp := b[i*ldb : i*ldb+n] tmp := alpha * va if tmp != 0 { asm.DaxpyUnitary(tmp, btmpk, btmp, btmp) } } tmp := alpha if nonUnit { tmp *= a[k*lda+k] } if tmp != 1 { for j := 0; j < n; j++ { btmpk[j] *= tmp } } } return } for k := 0; k < m; k++ { btmpk := b[k*ldb : k*ldb+n] for i, va := range a[k*lda : k*lda+k] { btmp := b[i*ldb : i*ldb+n] tmp := alpha * va if tmp != 0 { asm.DaxpyUnitary(tmp, btmpk, btmp, btmp) } } tmp := alpha if nonUnit { tmp *= a[k*lda+k] } if tmp != 1 { for j := 0; j < n; j++ { btmpk[j] *= tmp } } } return } // Cases where a is on the right if tA == blas.NoTrans { if ul == blas.Upper { for i := 0; i < m; i++ { btmp := b[i*ldb : i*ldb+n] for k := n - 1; k >= 0; k-- { tmp := alpha * btmp[k] if tmp != 0 { btmp[k] = tmp if nonUnit { btmp[k] *= a[k*lda+k] } for ja, v := range a[k*lda+k+1 : k*lda+n] { j := ja + k + 1 btmp[j] += tmp * v } } } } return } for i := 0; i < m; i++ { btmp := b[i*ldb : i*ldb+n] for k := 0; k < n; k++ { tmp := alpha * btmp[k] if tmp != 0 { btmp[k] = tmp if nonUnit { btmp[k] *= a[k*lda+k] } asm.DaxpyUnitary(tmp, a[k*lda:k*lda+k], btmp, btmp) } } } return } // Cases where a is transposed. if ul == blas.Upper { for i := 0; i < m; i++ { btmp := b[i*ldb : i*ldb+n] for j, vb := range btmp { tmp := vb if nonUnit { tmp *= a[j*lda+j] } tmp += asm.DdotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:n]) btmp[j] = alpha * tmp } } return } for i := 0; i < m; i++ { btmp := b[i*ldb : i*ldb+n] for j := n - 1; j >= 0; j-- { tmp := btmp[j] if nonUnit { tmp *= a[j*lda+j] } tmp += asm.DdotUnitary(a[j*lda:j*lda+j], btmp[:j]) btmp[j] = alpha * tmp } } }
// Dsyrk performs the symmetric rank-k operation // C = alpha * A * A^T + beta*C // C is an n×n symmetric matrix. A is an n×k matrix if tA == blas.NoTrans, and // a k×n matrix otherwise. alpha and beta are scalars. func (Implementation) Dsyrk(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float64, a []float64, lda int, beta float64, c []float64, ldc int) { if ul != blas.Lower && ul != blas.Upper { panic(badUplo) } if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans { panic(badTranspose) } if n < 0 { panic(nLT0) } if k < 0 { panic(kLT0) } if ldc < n { panic(badLdC) } var row, col int if tA == blas.NoTrans { row, col = n, k } else { row, col = k, n } if lda*(row-1)+col > len(a) || lda < max(1, col) { panic(badLdA) } if ldc*(n-1)+n > len(c) || ldc < max(1, n) { panic(badLdC) } if alpha == 0 { if beta == 0 { if ul == blas.Upper { for i := 0; i < n; i++ { ctmp := c[i*ldc+i : i*ldc+n] for j := range ctmp { ctmp[j] = 0 } } return } for i := 0; i < n; i++ { ctmp := c[i*ldc : i*ldc+i+1] for j := range ctmp { ctmp[j] = 0 } } return } if ul == blas.Upper { for i := 0; i < n; i++ { ctmp := c[i*ldc+i : i*ldc+n] for j := range ctmp { ctmp[j] *= beta } } return } for i := 0; i < n; i++ { ctmp := c[i*ldc : i*ldc+i+1] for j := range ctmp { ctmp[j] *= beta } } return } if tA == blas.NoTrans { if ul == blas.Upper { for i := 0; i < n; i++ { ctmp := c[i*ldc+i : i*ldc+n] atmp := a[i*lda : i*lda+k] for jc, vc := range ctmp { j := jc + i ctmp[jc] = vc*beta + alpha*asm.DdotUnitary(atmp, a[j*lda:j*lda+k]) } } return } for i := 0; i < n; i++ { atmp := a[i*lda : i*lda+k] for j, vc := range c[i*ldc : i*ldc+i+1] { c[i*ldc+j] = vc*beta + alpha*asm.DdotUnitary(a[j*lda:j*lda+k], atmp) } } return } // Cases where a is transposed. if ul == blas.Upper { for i := 0; i < n; i++ { ctmp := c[i*ldc+i : i*ldc+n] if beta != 1 { for j := range ctmp { ctmp[j] *= beta } } for l := 0; l < k; l++ { tmp := alpha * a[l*lda+i] if tmp != 0 { asm.DaxpyUnitary(tmp, a[l*lda+i:l*lda+n], ctmp, ctmp) } } } return } for i := 0; i < n; i++ { ctmp := c[i*ldc : i*ldc+i+1] if beta != 0 { for j := range ctmp { ctmp[j] *= beta } } for l := 0; l < k; l++ { tmp := alpha * a[l*lda+i] if tmp != 0 { asm.DaxpyUnitary(tmp, a[l*lda:l*lda+i+1], ctmp, ctmp) } } } }
// Dtrsm solves // A * X = alpha * B if tA == blas.NoTrans and side == blas.Left // A^T * X = alpha * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Left // X * A = alpha * B if tA == blas.NoTrans and side == blas.Right // X * A^T = alpha * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Right // where A is an n×n triangular matrix, x is an m×n matrix, and alpha is a // scalar. // // At entry to the function, X contains the values of B, and the result is // stored in place into X. // // No check is made that A is invertible. func (Implementation) Dtrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) { if s != blas.Left && s != blas.Right { panic(badSide) } if ul != blas.Lower && ul != blas.Upper { panic(badUplo) } if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { panic(badTranspose) } if d != blas.NonUnit && d != blas.Unit { panic(badDiag) } if m < 0 { panic(mLT0) } if n < 0 { panic(nLT0) } if ldb < n { panic(badLdB) } var k int if s == blas.Left { k = m } else { k = n } if lda*(k-1)+k > len(a) || lda < max(1, k) { panic(badLdA) } if ldb*(m-1)+n > len(b) || ldb < max(1, n) { panic(badLdB) } if m == 0 || n == 0 { return } if alpha == 0 { for i := 0; i < m; i++ { btmp := b[i*ldb : i*ldb+n] for j := range btmp { btmp[j] = 0 } } return } nonUnit := d == blas.NonUnit if s == blas.Left { if tA == blas.NoTrans { if ul == blas.Upper { for i := m - 1; i >= 0; i-- { btmp := b[i*ldb : i*ldb+n] if alpha != 1 { for j := range btmp { btmp[j] *= alpha } } for ka, va := range a[i*lda+i+1 : i*lda+m] { k := ka + i + 1 if va != 0 { asm.DaxpyUnitary(-va, b[k*ldb:k*ldb+n], btmp, btmp) } } if nonUnit { tmp := 1 / a[i*lda+i] for j := 0; j < n; j++ { btmp[j] *= tmp } } } return } for i := 0; i < m; i++ { btmp := b[i*ldb : i*ldb+n] if alpha != 1 { for j := 0; j < n; j++ { btmp[j] *= alpha } } for k, va := range a[i*lda : i*lda+i] { if va != 0 { asm.DaxpyUnitary(-va, b[k*ldb:k*ldb+n], btmp, btmp) } } if nonUnit { tmp := 1 / a[i*lda+i] for j := 0; j < n; j++ { btmp[j] *= tmp } } } return } // Cases where a is transposed if ul == blas.Upper { for k := 0; k < m; k++ { btmpk := b[k*ldb : k*ldb+n] if nonUnit { tmp := 1 / a[k*lda+k] for j := 0; j < n; j++ { btmpk[j] *= tmp } } for ia, va := range a[k*lda+k+1 : k*lda+m] { i := ia + k + 1 if va != 0 { btmp := b[i*ldb : i*ldb+n] asm.DaxpyUnitary(-va, btmpk, btmp, btmp) } } if alpha != 1 { for j := 0; j < n; j++ { btmpk[j] *= alpha } } } return } for k := m - 1; k >= 0; k-- { btmpk := b[k*ldb : k*ldb+n] if nonUnit { tmp := 1 / a[k*lda+k] for j := 0; j < n; j++ { btmpk[j] *= tmp } } for i, va := range a[k*lda : k*lda+k] { if va != 0 { btmp := b[i*ldb : i*ldb+n] asm.DaxpyUnitary(-va, btmpk, btmp, btmp) } } if alpha != 1 { for j := 0; j < n; j++ { btmpk[j] *= alpha } } } return } // Cases where a is to the right of X. if tA == blas.NoTrans { if ul == blas.Upper { for i := 0; i < m; i++ { btmp := b[i*ldb : i*ldb+n] if alpha != 1 { for j := 0; j < n; j++ { btmp[j] *= alpha } } for k, vb := range btmp { if vb != 0 { if btmp[k] != 0 { if nonUnit { btmp[k] /= a[k*lda+k] } btmpk := btmp[k+1 : n] asm.DaxpyUnitary(-btmp[k], a[k*lda+k+1:k*lda+n], btmpk, btmpk) } } } } return } for i := 0; i < m; i++ { btmp := b[i*lda : i*lda+n] if alpha != 1 { for j := 0; j < n; j++ { btmp[j] *= alpha } } for k := n - 1; k >= 0; k-- { if btmp[k] != 0 { if nonUnit { btmp[k] /= a[k*lda+k] } asm.DaxpyUnitary(-btmp[k], a[k*lda:k*lda+k], btmp, btmp) } } } return } // Cases where a is transposed. if ul == blas.Upper { for i := 0; i < m; i++ { btmp := b[i*lda : i*lda+n] for j := n - 1; j >= 0; j-- { tmp := alpha*btmp[j] - asm.DdotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:]) if nonUnit { tmp /= a[j*lda+j] } btmp[j] = tmp } } return } for i := 0; i < m; i++ { btmp := b[i*lda : i*lda+n] for j := 0; j < n; j++ { tmp := alpha*btmp[j] - asm.DdotUnitary(a[j*lda:j*lda+j], btmp) if nonUnit { tmp /= a[j*lda+j] } btmp[j] = tmp } } }
// Dot computes the dot product of s1 and s2, i.e. // sum_{i = 1}^N s1[i]*s2[i]. // A panic will occur if lengths of arguments do not match. func Dot(s1, s2 []float64) float64 { if len(s1) != len(s2) { panic("floats: lengths of the slices do not match") } return asm.DdotUnitary(s1, s2) }
// Inner computes the generalized inner product // x^T A y // between vectors x and y with matrix A. This is only a true inner product if // A is symmetric positive definite, though the operation works for any matrix A. // // Inner panics if x.Len != m or y.Len != n when A is an m x n matrix. func Inner(x *Vector, A Matrix, y *Vector) float64 { m, n := A.Dims() if x.Len() != m { panic(matrix.ErrShape) } if y.Len() != n { panic(matrix.ErrShape) } if m == 0 || n == 0 { return 0 } var sum float64 switch b := A.(type) { case RawSymmetricer: bmat := b.RawSymmetric() if bmat.Uplo != blas.Upper { // Panic as a string not a mat64.Error. panic(badSymTriangle) } for i := 0; i < x.Len(); i++ { xi := x.at(i) if xi != 0 { if y.mat.Inc == 1 { sum += xi * asm.DdotUnitary( bmat.Data[i*bmat.Stride+i:i*bmat.Stride+n], y.mat.Data[i:], ) } else { sum += xi * asm.DdotInc( bmat.Data[i*bmat.Stride+i:i*bmat.Stride+n], y.mat.Data[i*y.mat.Inc:], uintptr(n-i), 1, uintptr(y.mat.Inc), 0, 0, ) } } yi := y.at(i) if i != n-1 && yi != 0 { if x.mat.Inc == 1 { sum += yi * asm.DdotUnitary( bmat.Data[i*bmat.Stride+i+1:i*bmat.Stride+n], x.mat.Data[i+1:], ) } else { sum += yi * asm.DdotInc( bmat.Data[i*bmat.Stride+i+1:i*bmat.Stride+n], x.mat.Data[(i+1)*x.mat.Inc:], uintptr(n-i-1), 1, uintptr(x.mat.Inc), 0, 0, ) } } } case RawMatrixer: bmat := b.RawMatrix() for i := 0; i < x.Len(); i++ { xi := x.at(i) if xi != 0 { if y.mat.Inc == 1 { sum += xi * asm.DdotUnitary( bmat.Data[i*bmat.Stride:i*bmat.Stride+n], y.mat.Data, ) } else { sum += xi * asm.DdotInc( bmat.Data[i*bmat.Stride:i*bmat.Stride+n], y.mat.Data, uintptr(n), 1, uintptr(y.mat.Inc), 0, 0, ) } } } default: for i := 0; i < x.Len(); i++ { xi := x.at(i) for j := 0; j < y.Len(); j++ { sum += xi * A.At(i, j) * y.at(j) } } } return sum }