예제 #1
0
// AddScaledTo performs dst = y + alpha * s, where alpha is a scalar,
// and dst, y and s are all slices.
// It panics if the lengths of dst, y, and s are not equal.
//
// At the return of the function, dst[i] = y[i] + alpha * s[i]
func AddScaledTo(dst, y []float64, alpha float64, s []float64) []float64 {
	if len(dst) != len(s) || len(dst) != len(y) {
		panic("floats: lengths of slices do not match")
	}
	asm.DaxpyUnitaryTo(dst, alpha, s, y)
	return dst
}
예제 #2
0
파일: dgemm.go 프로젝트: carmine/blas
// dgemmSerial where neither a is transposed and b is not
func dgemmSerialTransNot(a, b, c general64, alpha float64) {
	if debug {
		if a.rows != b.rows {
			fmt.Println(a.rows, b.rows)
			panic("inner dimension mismatch")
		}
		if a.cols != c.rows {
			panic("outer dimension mismatch")
		}
		if b.cols != c.cols {
			panic("outer dimension mismatch")
		}
	}

	// This style is used instead of the literal [i*stride +j]) is used because
	// approximately 5 times faster as of go 1.3.
	for l := 0; l < a.rows; l++ {
		btmp := b.data[l*b.stride : l*b.stride+b.cols]
		for i, v := range a.data[l*a.stride : l*a.stride+a.cols] {
			tmp := alpha * v
			ctmp := c.data[i*c.stride : i*c.stride+c.cols]
			if tmp != 0 {
				asm.DaxpyUnitaryTo(ctmp, tmp, btmp, ctmp)
			}
		}
	}
}
예제 #3
0
// SubTo subtracts, element-wise, the elements of t from s and
// stores the result in dst. Panics if the lengths of s, t and dst do not match.
func SubTo(dst, s, t []float64) []float64 {
	if len(s) != len(t) {
		panic("floats: length of subtractor and subtractee do not match")
	}
	if len(dst) != len(s) {
		panic("floats: length of destination does not match length of subtractor")
	}
	asm.DaxpyUnitaryTo(dst, -1, t, s)
	return dst
}
예제 #4
0
// AddTo adds, element-wise, the elements of s and t and
// stores the result in dst. Panics if the lengths of s, t and dst do not match.
func AddTo(dst, s, t []float64) []float64 {
	if len(s) != len(t) {
		panic("floats: length of adders do not match")
	}
	if len(dst) != len(s) {
		panic("floats: length of destination does not match length of adder")
	}
	asm.DaxpyUnitaryTo(dst, 1, s, t)
	return dst
}
예제 #5
0
파일: vector.go 프로젝트: lessc0de/matrix
// AddScaledVec adds the vectors a and alpha*b, placing the result in the receiver.
func (v *Vector) AddScaledVec(a *Vector, alpha float64, b *Vector) {
	if alpha == 1 {
		v.AddVec(a, b)
		return
	}
	if alpha == -1 {
		v.SubVec(a, b)
		return
	}

	ar := a.Len()
	br := b.Len()

	if ar != br {
		panic(matrix.ErrShape)
	}

	v.reuseAs(ar)

	switch {
	case alpha == 0: // v <- a
		v.CopyVec(a)
	case v == a && v == b: // v <- v + alpha * v = (alpha + 1) * v
		blas64.Scal(ar, alpha+1, v.mat)
	case v == a && v != b: // v <- v + alpha * b
		if v.mat.Inc == 1 && b.mat.Inc == 1 {
			// Fast path for a common case.
			asm.DaxpyUnitaryTo(v.mat.Data, alpha, b.mat.Data, a.mat.Data)
		} else {
			asm.DaxpyInc(alpha, b.mat.Data, v.mat.Data,
				uintptr(ar), uintptr(b.mat.Inc), uintptr(v.mat.Inc), 0, 0)
		}
	default: // v <- a + alpha * b or v <- a + alpha * v
		if v.mat.Inc == 1 && a.mat.Inc == 1 && b.mat.Inc == 1 {
			// Fast path for a common case.
			asm.DaxpyUnitaryTo(v.mat.Data, alpha, b.mat.Data, a.mat.Data)
		} else {
			asm.DaxpyIncTo(v.mat.Data, uintptr(v.mat.Inc), 0,
				alpha, b.mat.Data, a.mat.Data,
				uintptr(ar), uintptr(b.mat.Inc), uintptr(a.mat.Inc), 0, 0)
		}
	}
}
예제 #6
0
// dgemmSerial where neither a nor b are transposed
func dgemmSerialNotNot(m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) {
	// This style is used instead of the literal [i*stride +j]) is used because
	// approximately 5 times faster as of go 1.3.
	for i := 0; i < m; i++ {
		ctmp := c[i*ldc : i*ldc+n]
		for l, v := range a[i*lda : i*lda+k] {
			tmp := alpha * v
			if tmp != 0 {
				asm.DaxpyUnitaryTo(ctmp, tmp, b[l*ldb:l*ldb+n], ctmp)
			}
		}
	}
}
예제 #7
0
// Daxpy adds alpha times x to y
//  y[i] += alpha * x[i] for all i
func (Implementation) Daxpy(n int, alpha float64, x []float64, incX int, y []float64, incY int) {
	if incX == 0 {
		panic(zeroIncX)
	}
	if incY == 0 {
		panic(zeroIncY)
	}
	if n < 1 {
		if n == 0 {
			return
		}
		panic(negativeN)
	}
	if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
		panic(badX)
	}
	if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
		panic(badY)
	}
	if alpha == 0 {
		return
	}
	if incX == 1 && incY == 1 {
		if len(x) < n {
			panic(badLenX)
		}
		if len(y) < n {
			panic(badLenY)
		}
		asm.DaxpyUnitaryTo(y, alpha, x[:n], y)
		return
	}
	var ix, iy int
	if incX < 0 {
		ix = (-n + 1) * incX
	}
	if incY < 0 {
		iy = (-n + 1) * incY
	}
	if ix >= len(x) || ix+(n-1)*incX >= len(x) {
		panic(badLenX)
	}
	if iy >= len(y) || iy+(n-1)*incY >= len(y) {
		panic(badLenY)
	}
	asm.DaxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}
예제 #8
0
파일: vector.go 프로젝트: lessc0de/matrix
// SubVec subtracts the vector b from a, placing the result in the receiver.
func (v *Vector) SubVec(a, b *Vector) {
	ar := a.Len()
	br := b.Len()

	if ar != br {
		panic(matrix.ErrShape)
	}

	v.reuseAs(ar)

	if v.mat.Inc == 1 && a.mat.Inc == 1 && b.mat.Inc == 1 {
		// Fast path for a common case.
		asm.DaxpyUnitaryTo(v.mat.Data, -1, b.mat.Data, a.mat.Data)
		return
	}
	asm.DaxpyIncTo(v.mat.Data, uintptr(v.mat.Inc), 0,
		-1, b.mat.Data, a.mat.Data,
		uintptr(ar), uintptr(b.mat.Inc), uintptr(a.mat.Inc), 0, 0)
}
예제 #9
0
// Dgemv computes
//  y = alpha * a * x + beta * y if tA = blas.NoTrans
//  y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans
// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.
func (Implementation) Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) {
	if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
		panic(badTranspose)
	}
	if m < 0 {
		panic(mLT0)
	}
	if n < 0 {
		panic(nLT0)
	}
	if lda < max(1, n) {
		panic(badLdA)
	}

	if incX == 0 {
		panic(zeroIncX)
	}
	if incY == 0 {
		panic(zeroIncY)
	}
	// Set up indexes
	lenX := m
	lenY := n
	if tA == blas.NoTrans {
		lenX = n
		lenY = m
	}
	if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) {
		panic(badX)
	}
	if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) {
		panic(badY)
	}
	if lda*(m-1)+n > len(a) || lda < max(1, n) {
		panic(badLdA)
	}

	// Quick return if possible
	if m == 0 || n == 0 || (alpha == 0 && beta == 1) {
		return
	}

	var kx, ky int
	if incX > 0 {
		kx = 0
	} else {
		kx = -(lenX - 1) * incX
	}
	if incY > 0 {
		ky = 0
	} else {
		ky = -(lenY - 1) * incY
	}

	// First form y := beta * y
	if incY > 0 {
		Implementation{}.Dscal(lenY, beta, y, incY)
	} else {
		Implementation{}.Dscal(lenY, beta, y, -incY)
	}

	if alpha == 0 {
		return
	}

	// Form y := alpha * A * x + y
	if tA == blas.NoTrans {
		if incX == 1 && incY == 1 {
			for i := 0; i < m; i++ {
				y[i] += alpha * asm.DdotUnitary(a[lda*i:lda*i+n], x)
			}
			return
		}
		iy := ky
		for i := 0; i < m; i++ {
			y[iy] += alpha * asm.DdotInc(x, a[lda*i:lda*i+n], uintptr(n), uintptr(incX), 1, uintptr(kx), 0)
			iy += incY
		}
		return
	}
	// Cases where a is transposed.
	if incX == 1 && incY == 1 {
		for i := 0; i < m; i++ {
			tmp := alpha * x[i]
			if tmp != 0 {
				asm.DaxpyUnitaryTo(y, tmp, a[lda*i:lda*i+n], y)
			}
		}
		return
	}
	ix := kx
	for i := 0; i < m; i++ {
		tmp := alpha * x[ix]
		if tmp != 0 {
			asm.DaxpyInc(tmp, a[lda*i:lda*i+n], y, uintptr(n), 1, uintptr(incY), 0, uintptr(ky))
		}
		ix += incX
	}
}
예제 #10
0
// Dger performs the rank-one operation
//  A += alpha * x * y^T
// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.
func (Implementation) Dger(m, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int) {
	// Check inputs
	if m < 0 {
		panic("m < 0")
	}
	if n < 0 {
		panic(negativeN)
	}
	if incX == 0 {
		panic(zeroIncX)
	}
	if incY == 0 {
		panic(zeroIncY)
	}
	if (incX > 0 && (m-1)*incX >= len(x)) || (incX < 0 && (1-m)*incX >= len(x)) {
		panic(badX)
	}
	if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
		panic(badY)
	}
	if lda*(m-1)+n > len(a) || lda < max(1, n) {
		panic(badLdA)
	}
	if lda < max(1, n) {
		panic(badLdA)
	}

	// Quick return if possible
	if m == 0 || n == 0 || alpha == 0 {
		return
	}

	var ky, kx int
	if incY > 0 {
		ky = 0
	} else {
		ky = -(n - 1) * incY
	}

	if incX > 0 {
		kx = 0
	} else {
		kx = -(m - 1) * incX
	}

	if incX == 1 && incY == 1 {
		x = x[:m]
		y = y[:n]
		for i, xv := range x {
			tmp := alpha * xv
			if tmp != 0 {
				atmp := a[i*lda : i*lda+n]
				asm.DaxpyUnitaryTo(atmp, tmp, y, atmp)
			}
		}
		return
	}

	ix := kx
	for i := 0; i < m; i++ {
		tmp := alpha * x[ix]
		if tmp != 0 {
			asm.DaxpyInc(tmp, y, a[i*lda:i*lda+n], uintptr(n), uintptr(incY), 1, uintptr(ky), 0)
		}
		ix += incX
	}
}
예제 #11
0
// Sub subtracts, element-wise, the elements of s from dst. Panics if
// the lengths of dst and s do not match.
func Sub(dst, s []float64) {
	if len(dst) != len(s) {
		panic("floats: length of the slices do not match")
	}
	asm.DaxpyUnitaryTo(dst, -1, s, dst)
}
예제 #12
0
// AddScaled performs dst = dst + alpha * s.
// It panics if the lengths of dst and s are not equal.
func AddScaled(dst []float64, alpha float64, s []float64) {
	if len(dst) != len(s) {
		panic("floats: length of destination and source to not match")
	}
	asm.DaxpyUnitaryTo(dst, alpha, s, dst)
}
예제 #13
0
// Dtrmm performs
//  B = alpha * A * B,   if tA == blas.NoTrans and side == blas.Left,
//  B = alpha * A^T * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Left,
//  B = alpha * B * A,   if tA == blas.NoTrans and side == blas.Right,
//  B = alpha * B * A^T, if tA == blas.Trans or blas.ConjTrans, and side == blas.Right,
// where A is an n×n or m×m triangular matrix, and B is an m×n matrix.
func (Implementation) Dtrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) {
	if s != blas.Left && s != blas.Right {
		panic(badSide)
	}
	if ul != blas.Lower && ul != blas.Upper {
		panic(badUplo)
	}
	if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
		panic(badTranspose)
	}
	if d != blas.NonUnit && d != blas.Unit {
		panic(badDiag)
	}
	if m < 0 {
		panic(mLT0)
	}
	if n < 0 {
		panic(nLT0)
	}
	var k int
	if s == blas.Left {
		k = m
	} else {
		k = n
	}
	if lda*(k-1)+k > len(a) || lda < max(1, k) {
		panic(badLdA)
	}
	if ldb*(m-1)+n > len(b) || ldb < max(1, n) {
		panic(badLdB)
	}
	if alpha == 0 {
		for i := 0; i < m; i++ {
			btmp := b[i*ldb : i*ldb+n]
			for j := range btmp {
				btmp[j] = 0
			}
		}
		return
	}

	nonUnit := d == blas.NonUnit
	if s == blas.Left {
		if tA == blas.NoTrans {
			if ul == blas.Upper {
				for i := 0; i < m; i++ {
					tmp := alpha
					if nonUnit {
						tmp *= a[i*lda+i]
					}
					btmp := b[i*ldb : i*ldb+n]
					for j := range btmp {
						btmp[j] *= tmp
					}
					for ka, va := range a[i*lda+i+1 : i*lda+m] {
						k := ka + i + 1
						tmp := alpha * va
						if tmp != 0 {
							asm.DaxpyUnitaryTo(btmp, tmp, b[k*ldb:k*ldb+n], btmp)
						}
					}
				}
				return
			}
			for i := m - 1; i >= 0; i-- {
				tmp := alpha
				if nonUnit {
					tmp *= a[i*lda+i]
				}
				btmp := b[i*ldb : i*ldb+n]
				for j := range btmp {
					btmp[j] *= tmp
				}
				for k, va := range a[i*lda : i*lda+i] {
					tmp := alpha * va
					if tmp != 0 {
						asm.DaxpyUnitaryTo(btmp, tmp, b[k*ldb:k*ldb+n], btmp)
					}
				}
			}
			return
		}
		// Cases where a is transposed.
		if ul == blas.Upper {
			for k := m - 1; k >= 0; k-- {
				btmpk := b[k*ldb : k*ldb+n]
				for ia, va := range a[k*lda+k+1 : k*lda+m] {
					i := ia + k + 1
					btmp := b[i*ldb : i*ldb+n]
					tmp := alpha * va
					if tmp != 0 {
						asm.DaxpyUnitaryTo(btmp, tmp, btmpk, btmp)
					}
				}
				tmp := alpha
				if nonUnit {
					tmp *= a[k*lda+k]
				}
				if tmp != 1 {
					for j := 0; j < n; j++ {
						btmpk[j] *= tmp
					}
				}
			}
			return
		}
		for k := 0; k < m; k++ {
			btmpk := b[k*ldb : k*ldb+n]
			for i, va := range a[k*lda : k*lda+k] {
				btmp := b[i*ldb : i*ldb+n]
				tmp := alpha * va
				if tmp != 0 {
					asm.DaxpyUnitaryTo(btmp, tmp, btmpk, btmp)
				}
			}
			tmp := alpha
			if nonUnit {
				tmp *= a[k*lda+k]
			}
			if tmp != 1 {
				for j := 0; j < n; j++ {
					btmpk[j] *= tmp
				}
			}
		}
		return
	}
	// Cases where a is on the right
	if tA == blas.NoTrans {
		if ul == blas.Upper {
			for i := 0; i < m; i++ {
				btmp := b[i*ldb : i*ldb+n]
				for k := n - 1; k >= 0; k-- {
					tmp := alpha * btmp[k]
					if tmp != 0 {
						btmp[k] = tmp
						if nonUnit {
							btmp[k] *= a[k*lda+k]
						}
						for ja, v := range a[k*lda+k+1 : k*lda+n] {
							j := ja + k + 1
							btmp[j] += tmp * v
						}
					}
				}
			}
			return
		}
		for i := 0; i < m; i++ {
			btmp := b[i*ldb : i*ldb+n]
			for k := 0; k < n; k++ {
				tmp := alpha * btmp[k]
				if tmp != 0 {
					btmp[k] = tmp
					if nonUnit {
						btmp[k] *= a[k*lda+k]
					}
					asm.DaxpyUnitaryTo(btmp, tmp, a[k*lda:k*lda+k], btmp)
				}
			}
		}
		return
	}
	// Cases where a is transposed.
	if ul == blas.Upper {
		for i := 0; i < m; i++ {
			btmp := b[i*ldb : i*ldb+n]
			for j, vb := range btmp {
				tmp := vb
				if nonUnit {
					tmp *= a[j*lda+j]
				}
				tmp += asm.DdotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:n])
				btmp[j] = alpha * tmp
			}
		}
		return
	}
	for i := 0; i < m; i++ {
		btmp := b[i*ldb : i*ldb+n]
		for j := n - 1; j >= 0; j-- {
			tmp := btmp[j]
			if nonUnit {
				tmp *= a[j*lda+j]
			}
			tmp += asm.DdotUnitary(a[j*lda:j*lda+j], btmp[:j])
			btmp[j] = alpha * tmp
		}
	}
}
예제 #14
0
// Dsyrk performs the symmetric rank-k operation
//  C = alpha * A * A^T + beta*C
// C is an n×n symmetric matrix. A is an n×k matrix if tA == blas.NoTrans, and
// a k×n matrix otherwise. alpha and beta are scalars.
func (Implementation) Dsyrk(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float64, a []float64, lda int, beta float64, c []float64, ldc int) {
	if ul != blas.Lower && ul != blas.Upper {
		panic(badUplo)
	}
	if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans {
		panic(badTranspose)
	}
	if n < 0 {
		panic(nLT0)
	}
	if k < 0 {
		panic(kLT0)
	}
	if ldc < n {
		panic(badLdC)
	}
	var row, col int
	if tA == blas.NoTrans {
		row, col = n, k
	} else {
		row, col = k, n
	}
	if lda*(row-1)+col > len(a) || lda < max(1, col) {
		panic(badLdA)
	}
	if ldc*(n-1)+n > len(c) || ldc < max(1, n) {
		panic(badLdC)
	}
	if alpha == 0 {
		if beta == 0 {
			if ul == blas.Upper {
				for i := 0; i < n; i++ {
					ctmp := c[i*ldc+i : i*ldc+n]
					for j := range ctmp {
						ctmp[j] = 0
					}
				}
				return
			}
			for i := 0; i < n; i++ {
				ctmp := c[i*ldc : i*ldc+i+1]
				for j := range ctmp {
					ctmp[j] = 0
				}
			}
			return
		}
		if ul == blas.Upper {
			for i := 0; i < n; i++ {
				ctmp := c[i*ldc+i : i*ldc+n]
				for j := range ctmp {
					ctmp[j] *= beta
				}
			}
			return
		}
		for i := 0; i < n; i++ {
			ctmp := c[i*ldc : i*ldc+i+1]
			for j := range ctmp {
				ctmp[j] *= beta
			}
		}
		return
	}
	if tA == blas.NoTrans {
		if ul == blas.Upper {
			for i := 0; i < n; i++ {
				ctmp := c[i*ldc+i : i*ldc+n]
				atmp := a[i*lda : i*lda+k]
				for jc, vc := range ctmp {
					j := jc + i
					ctmp[jc] = vc*beta + alpha*asm.DdotUnitary(atmp, a[j*lda:j*lda+k])
				}
			}
			return
		}
		for i := 0; i < n; i++ {
			atmp := a[i*lda : i*lda+k]
			for j, vc := range c[i*ldc : i*ldc+i+1] {
				c[i*ldc+j] = vc*beta + alpha*asm.DdotUnitary(a[j*lda:j*lda+k], atmp)
			}
		}
		return
	}
	// Cases where a is transposed.
	if ul == blas.Upper {
		for i := 0; i < n; i++ {
			ctmp := c[i*ldc+i : i*ldc+n]
			if beta != 1 {
				for j := range ctmp {
					ctmp[j] *= beta
				}
			}
			for l := 0; l < k; l++ {
				tmp := alpha * a[l*lda+i]
				if tmp != 0 {
					asm.DaxpyUnitaryTo(ctmp, tmp, a[l*lda+i:l*lda+n], ctmp)
				}
			}
		}
		return
	}
	for i := 0; i < n; i++ {
		ctmp := c[i*ldc : i*ldc+i+1]
		if beta != 0 {
			for j := range ctmp {
				ctmp[j] *= beta
			}
		}
		for l := 0; l < k; l++ {
			tmp := alpha * a[l*lda+i]
			if tmp != 0 {
				asm.DaxpyUnitaryTo(ctmp, tmp, a[l*lda:l*lda+i+1], ctmp)
			}
		}
	}
}
예제 #15
0
// Dtrsm solves
//  A * X = alpha * B,   if tA == blas.NoTrans side == blas.Left,
//  A^T * X = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Left,
//  X * A = alpha * B,   if tA == blas.NoTrans side == blas.Right,
//  X * A^T = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Right,
// where A is an n×n or m×m triangular matrix, X is an m×n matrix, and alpha is a
// scalar.
//
// At entry to the function, X contains the values of B, and the result is
// stored in place into X.
//
// No check is made that A is invertible.
func (Implementation) Dtrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) {
	if s != blas.Left && s != blas.Right {
		panic(badSide)
	}
	if ul != blas.Lower && ul != blas.Upper {
		panic(badUplo)
	}
	if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
		panic(badTranspose)
	}
	if d != blas.NonUnit && d != blas.Unit {
		panic(badDiag)
	}
	if m < 0 {
		panic(mLT0)
	}
	if n < 0 {
		panic(nLT0)
	}
	if ldb < n {
		panic(badLdB)
	}
	var k int
	if s == blas.Left {
		k = m
	} else {
		k = n
	}
	if lda*(k-1)+k > len(a) || lda < max(1, k) {
		panic(badLdA)
	}
	if ldb*(m-1)+n > len(b) || ldb < max(1, n) {
		panic(badLdB)
	}

	if m == 0 || n == 0 {
		return
	}

	if alpha == 0 {
		for i := 0; i < m; i++ {
			btmp := b[i*ldb : i*ldb+n]
			for j := range btmp {
				btmp[j] = 0
			}
		}
		return
	}
	nonUnit := d == blas.NonUnit
	if s == blas.Left {
		if tA == blas.NoTrans {
			if ul == blas.Upper {
				for i := m - 1; i >= 0; i-- {
					btmp := b[i*ldb : i*ldb+n]
					if alpha != 1 {
						for j := range btmp {
							btmp[j] *= alpha
						}
					}
					for ka, va := range a[i*lda+i+1 : i*lda+m] {
						k := ka + i + 1
						if va != 0 {
							asm.DaxpyUnitaryTo(btmp, -va, b[k*ldb:k*ldb+n], btmp)
						}
					}
					if nonUnit {
						tmp := 1 / a[i*lda+i]
						for j := 0; j < n; j++ {
							btmp[j] *= tmp
						}
					}
				}
				return
			}
			for i := 0; i < m; i++ {
				btmp := b[i*ldb : i*ldb+n]
				if alpha != 1 {
					for j := 0; j < n; j++ {
						btmp[j] *= alpha
					}
				}
				for k, va := range a[i*lda : i*lda+i] {
					if va != 0 {
						asm.DaxpyUnitaryTo(btmp, -va, b[k*ldb:k*ldb+n], btmp)
					}
				}
				if nonUnit {
					tmp := 1 / a[i*lda+i]
					for j := 0; j < n; j++ {
						btmp[j] *= tmp
					}
				}
			}
			return
		}
		// Cases where a is transposed
		if ul == blas.Upper {
			for k := 0; k < m; k++ {
				btmpk := b[k*ldb : k*ldb+n]
				if nonUnit {
					tmp := 1 / a[k*lda+k]
					for j := 0; j < n; j++ {
						btmpk[j] *= tmp
					}
				}
				for ia, va := range a[k*lda+k+1 : k*lda+m] {
					i := ia + k + 1
					if va != 0 {
						btmp := b[i*ldb : i*ldb+n]
						asm.DaxpyUnitaryTo(btmp, -va, btmpk, btmp)
					}
				}
				if alpha != 1 {
					for j := 0; j < n; j++ {
						btmpk[j] *= alpha
					}
				}
			}
			return
		}
		for k := m - 1; k >= 0; k-- {
			btmpk := b[k*ldb : k*ldb+n]
			if nonUnit {
				tmp := 1 / a[k*lda+k]
				for j := 0; j < n; j++ {
					btmpk[j] *= tmp
				}
			}
			for i, va := range a[k*lda : k*lda+k] {
				if va != 0 {
					btmp := b[i*ldb : i*ldb+n]
					asm.DaxpyUnitaryTo(btmp, -va, btmpk, btmp)
				}
			}
			if alpha != 1 {
				for j := 0; j < n; j++ {
					btmpk[j] *= alpha
				}
			}
		}
		return
	}
	// Cases where a is to the right of X.
	if tA == blas.NoTrans {
		if ul == blas.Upper {
			for i := 0; i < m; i++ {
				btmp := b[i*ldb : i*ldb+n]
				if alpha != 1 {
					for j := 0; j < n; j++ {
						btmp[j] *= alpha
					}
				}
				for k, vb := range btmp {
					if vb != 0 {
						if btmp[k] != 0 {
							if nonUnit {
								btmp[k] /= a[k*lda+k]
							}
							btmpk := btmp[k+1 : n]
							asm.DaxpyUnitaryTo(btmpk, -btmp[k], a[k*lda+k+1:k*lda+n], btmpk)
						}
					}
				}
			}
			return
		}
		for i := 0; i < m; i++ {
			btmp := b[i*lda : i*lda+n]
			if alpha != 1 {
				for j := 0; j < n; j++ {
					btmp[j] *= alpha
				}
			}
			for k := n - 1; k >= 0; k-- {
				if btmp[k] != 0 {
					if nonUnit {
						btmp[k] /= a[k*lda+k]
					}
					asm.DaxpyUnitaryTo(btmp, -btmp[k], a[k*lda:k*lda+k], btmp)
				}
			}
		}
		return
	}
	// Cases where a is transposed.
	if ul == blas.Upper {
		for i := 0; i < m; i++ {
			btmp := b[i*lda : i*lda+n]
			for j := n - 1; j >= 0; j-- {
				tmp := alpha*btmp[j] - asm.DdotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:])
				if nonUnit {
					tmp /= a[j*lda+j]
				}
				btmp[j] = tmp
			}
		}
		return
	}
	for i := 0; i < m; i++ {
		btmp := b[i*lda : i*lda+n]
		for j := 0; j < n; j++ {
			tmp := alpha*btmp[j] - asm.DdotUnitary(a[j*lda:j*lda+j], btmp)
			if nonUnit {
				tmp /= a[j*lda+j]
			}
			btmp[j] = tmp
		}
	}
}
예제 #16
0
// Dsymm performs one of
//  C = alpha * A * B + beta * C, if side == blas.Left,
//  C = alpha * B * A + beta * C, if side == blas.Right,
// where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha
// is a scalar.
func (Implementation) Dsymm(s blas.Side, ul blas.Uplo, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) {
	if s != blas.Right && s != blas.Left {
		panic("goblas: bad side")
	}
	if ul != blas.Lower && ul != blas.Upper {
		panic(badUplo)
	}
	if m < 0 {
		panic(mLT0)
	}
	if n < 0 {
		panic(nLT0)
	}
	var k int
	if s == blas.Left {
		k = m
	} else {
		k = n
	}
	if lda*(k-1)+k > len(a) || lda < max(1, k) {
		panic(badLdA)
	}
	if ldb*(m-1)+n > len(b) || ldb < max(1, n) {
		panic(badLdB)
	}
	if ldc*(m-1)+n > len(c) || ldc < max(1, n) {
		panic(badLdC)
	}
	if m == 0 || n == 0 {
		return
	}
	if alpha == 0 && beta == 1 {
		return
	}
	if alpha == 0 {
		if beta == 0 {
			for i := 0; i < m; i++ {
				ctmp := c[i*ldc : i*ldc+n]
				for j := range ctmp {
					ctmp[j] = 0
				}
			}
			return
		}
		for i := 0; i < m; i++ {
			ctmp := c[i*ldc : i*ldc+n]
			for j := 0; j < n; j++ {
				ctmp[j] *= beta
			}
		}
		return
	}

	isUpper := ul == blas.Upper
	if s == blas.Left {
		for i := 0; i < m; i++ {
			atmp := alpha * a[i*lda+i]
			btmp := b[i*ldb : i*ldb+n]
			ctmp := c[i*ldc : i*ldc+n]
			for j, v := range btmp {
				ctmp[j] *= beta
				ctmp[j] += atmp * v
			}

			for k := 0; k < i; k++ {
				var atmp float64
				if isUpper {
					atmp = a[k*lda+i]
				} else {
					atmp = a[i*lda+k]
				}
				atmp *= alpha
				ctmp := c[i*ldc : i*ldc+n]
				asm.DaxpyUnitaryTo(ctmp, atmp, b[k*ldb:k*ldb+n], ctmp)
			}
			for k := i + 1; k < m; k++ {
				var atmp float64
				if isUpper {
					atmp = a[i*lda+k]
				} else {
					atmp = a[k*lda+i]
				}
				atmp *= alpha
				ctmp := c[i*ldc : i*ldc+n]
				asm.DaxpyUnitaryTo(ctmp, atmp, b[k*ldb:k*ldb+n], ctmp)
			}
		}
		return
	}
	if isUpper {
		for i := 0; i < m; i++ {
			for j := n - 1; j >= 0; j-- {
				tmp := alpha * b[i*ldb+j]
				var tmp2 float64
				atmp := a[j*lda+j+1 : j*lda+n]
				btmp := b[i*ldb+j+1 : i*ldb+n]
				ctmp := c[i*ldc+j+1 : i*ldc+n]
				for k, v := range atmp {
					ctmp[k] += tmp * v
					tmp2 += btmp[k] * v
				}
				c[i*ldc+j] *= beta
				c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2
			}
		}
		return
	}
	for i := 0; i < m; i++ {
		for j := 0; j < n; j++ {
			tmp := alpha * b[i*ldb+j]
			var tmp2 float64
			atmp := a[j*lda : j*lda+j]
			btmp := b[i*ldb : i*ldb+j]
			ctmp := c[i*ldc : i*ldc+j]
			for k, v := range atmp {
				ctmp[k] += tmp * v
				tmp2 += btmp[k] * v
			}
			c[i*ldc+j] *= beta
			c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2
		}
	}
}