// dgemmSerial where both are transposed func dgemmSerialTransTrans(a, b, c general64, alpha float64) { if debug { if a.rows != b.cols { panic("inner dimension mismatch") } if a.cols != c.rows { panic("outer dimension mismatch") } if b.rows != c.cols { panic("outer dimension mismatch") } } // This style is used instead of the literal [i*stride +j]) is used because // approximately 5 times faster as of go 1.3. for l := 0; l < a.rows; l++ { for i, v := range a.data[l*a.stride : l*a.stride+a.cols] { ctmp := c.data[i*c.stride : i*c.stride+c.cols] if v != 0 { tmp := alpha * v if tmp != 0 { asm.DaxpyInc(tmp, b.data[l:], ctmp, uintptr(b.rows), uintptr(b.stride), 1, 0, 0) } } } } }
// Dger performs the rank-one operation // A += alpha * x * y^T // where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. func (Implementation) Dger(m, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int) { // Check inputs if m < 0 { panic("m < 0") } if n < 0 { panic(negativeN) } if incX == 0 { panic(zeroIncX) } if incY == 0 { panic(zeroIncY) } if lda < max(1, n) { panic(badLdA) } // Quick return if possible if m == 0 || n == 0 || alpha == 0 { return } var ky, kx int if incY > 0 { ky = 0 } else { ky = -(n - 1) * incY } if incX > 0 { kx = 0 } else { kx = -(m - 1) * incX } if incX == 1 && incY == 1 { x = x[:m] y = y[:n] for i, xv := range x { tmp := alpha * xv if tmp != 0 { atmp := a[i*lda : i*lda+n] asm.DaxpyUnitary(tmp, y, atmp, atmp) } } return } ix := kx for i := 0; i < m; i++ { tmp := alpha * x[ix] if tmp != 0 { asm.DaxpyInc(tmp, y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(ky), 0) } ix += incX } }
// dgemmSerial where both are transposed func dgemmSerialTransTrans(m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) { // This style is used instead of the literal [i*stride +j]) is used because // approximately 5 times faster as of go 1.3. for l := 0; l < k; l++ { for i, v := range a[l*lda : l*lda+m] { tmp := alpha * v if tmp != 0 { ctmp := c[i*ldc : i*ldc+n] asm.DaxpyInc(tmp, b[l:], ctmp, uintptr(n), uintptr(ldb), 1, 0, 0) } } } }
// Daxpy adds alpha times x to y // y[i] += alpha * x[i] for all i func (Implementation) Daxpy(n int, alpha float64, x []float64, incX int, y []float64, incY int) { if n < 1 { if n == 0 { return } panic(negativeN) } if incX == 0 { panic(zeroIncX) } if incY == 0 { panic(zeroIncY) } if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) { panic(badX) } if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) { panic(badY) } if alpha == 0 { return } if incX == 1 && incY == 1 { if len(x) < n { panic(badLenX) } if len(y) < n { panic(badLenY) } asm.DaxpyUnitary(alpha, x[:n], y, y) return } var ix, iy int if incX < 0 { ix = (-n + 1) * incX } if incY < 0 { iy = (-n + 1) * incY } if ix >= len(x) || ix+(n-1)*incX >= len(x) { panic(badLenX) } if iy >= len(y) || iy+(n-1)*incY >= len(y) { panic(badLenY) } asm.DaxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy)) }
// AddScaledVec adds the vectors a and alpha*b, placing the result in the receiver. func (v *Vector) AddScaledVec(a *Vector, alpha float64, b *Vector) { if alpha == 1 { v.AddVec(a, b) return } if alpha == -1 { v.SubVec(a, b) return } ar := a.Len() br := b.Len() if ar != br { panic(matrix.ErrShape) } v.reuseAs(ar) switch { case alpha == 0: // v <- a v.CopyVec(a) case v == a && v == b: // v <- v + alpha * v = (alpha + 1) * v blas64.Scal(ar, alpha+1, v.mat) case v == a && v != b: // v <- v + alpha * b if v.mat.Inc == 1 && b.mat.Inc == 1 { // Fast path for a common case. asm.DaxpyUnitaryTo(v.mat.Data, alpha, b.mat.Data, a.mat.Data) } else { asm.DaxpyInc(alpha, b.mat.Data, v.mat.Data, uintptr(ar), uintptr(b.mat.Inc), uintptr(v.mat.Inc), 0, 0) } default: // v <- a + alpha * b or v <- a + alpha * v if v.mat.Inc == 1 && a.mat.Inc == 1 && b.mat.Inc == 1 { // Fast path for a common case. asm.DaxpyUnitaryTo(v.mat.Data, alpha, b.mat.Data, a.mat.Data) } else { asm.DaxpyIncTo(v.mat.Data, uintptr(v.mat.Inc), 0, alpha, b.mat.Data, a.mat.Data, uintptr(ar), uintptr(b.mat.Inc), uintptr(a.mat.Inc), 0, 0) } } }
// Dgemv computes // y = alpha * a * x + beta * y if tA = blas.NoTrans // y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans // where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. func (Implementation) Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { panic(badTranspose) } if m < 0 { panic(mLT0) } if n < 0 { panic(nLT0) } if lda < max(1, n) { panic(badLdA) } if incX == 0 { panic(zeroIncX) } if incY == 0 { panic(zeroIncY) } // Quick return if possible if m == 0 || n == 0 || (alpha == 0 && beta == 1) { return } // Set up indexes lenX := m lenY := n if tA == blas.NoTrans { lenX = n lenY = m } var kx, ky int if incX > 0 { kx = 0 } else { kx = -(lenX - 1) * incX } if incY > 0 { ky = 0 } else { ky = -(lenY - 1) * incY } // First form y := beta * y if incY > 0 { Implementation{}.Dscal(lenY, beta, y, incY) } else { Implementation{}.Dscal(lenY, beta, y, -incY) } if alpha == 0 { return } // Form y := alpha * A * x + y if tA == blas.NoTrans { if incX == 1 { for i := 0; i < m; i++ { y[i] += alpha * asm.DdotUnitary(a[lda*i:lda*i+n], x) } return } iy := ky for i := 0; i < m; i++ { y[iy] += alpha * asm.DdotInc(x, a[lda*i:lda*i+n], uintptr(n), uintptr(incX), 1, uintptr(kx), 0) iy += incY } return } // Cases where a is not transposed. if incX == 1 { for i := 0; i < m; i++ { tmp := alpha * x[i] if tmp != 0 { asm.DaxpyUnitary(tmp, a[lda*i:lda*i+n], y, y) } } return } ix := kx for i := 0; i < m; i++ { tmp := alpha * x[ix] if tmp != 0 { asm.DaxpyInc(tmp, a[lda*i:lda*i+n], y, uintptr(n), 1, uintptr(incY), 0, uintptr(ky)) } ix += incX } }