Beispiel #1
0
func Update2Sym(Cc, A, B *cmat.FloatMatrix, alpha, beta float64, bits int, confs ...*gomas.Config) *gomas.Error {

	conf := gomas.DefaultConf()
	if len(confs) > 0 {
		conf = confs[0]
	}

	ok := true
	cr, cc := Cc.Size()
	ar, ac := A.Size()
	br, bc := B.Size()

	if cr*cc == 0 {
		return nil
	}
	P := ac
	E := cr
	if bits&gomas.TRANS != 0 && bits&gomas.TRANSA == 0 {
		bits |= gomas.TRANSA
	}
	switch {
	case bits&gomas.TRANSA != 0:
		ok = cr == cc && cr == ac && bc == ac && br == ar
		P = ar
	default:
		ok = cr == cc && cr == ar && br == ar && bc == ac
	}
	if !ok {
		return gomas.NewError(gomas.ESIZE, "Update2Sym")
	}
	if conf.NProc == 1 || conf.WB <= 0 || E <= conf.WB {
		syr2k(Cc, A, B, alpha, beta, bits, P, 0, E, conf)
		return nil
	}
	// parallelized
	wait := make(chan int, 4)
	_, nN := blocking(0, E, conf.WB)
	nT := 0
	for j := 0; j < nN; j++ {
		jS := blockIndex(j, nN, conf.WB, E)
		jE := blockIndex(j+1, nN, conf.WB, E)
		task := func(q chan int) {
			syr2k(Cc, A, B, alpha, beta, bits, P, jS, jE, conf)
			q <- 1
		}
		conf.Sched.Schedule(gomas.NewTask(task, wait))
		nT += 1
	}
	for nT > 0 {
		<-wait
		nT -= 1
	}
	return nil
}
Beispiel #2
0
/*
 * Triangular matrix multiplication.
 */
func MultTrm(B, A *cmat.FloatMatrix, alpha float64, bits int, confs ...*gomas.Config) *gomas.Error {
	conf := gomas.DefaultConf()
	if len(confs) > 0 {
		conf = confs[0]
	}

	if B.Len() == 0 || A.Len() == 0 {
		return nil
	}

	ok := true
	ar, ac := A.Size()
	br, bc := B.Size()
	P := ac
	E := bc
	switch {
	case bits&gomas.RIGHT != 0:
		ok = bc == ar && ar == ac
		E = br
	case bits&gomas.LEFT != 0:
		fallthrough
	default:
		ok = ac == br && ar == ac
	}
	if !ok {
		return gomas.NewError(gomas.ESIZE, "MultTrm")
	}

	// single threaded
	if conf.NProc == 1 || conf.WB <= 0 || E < conf.WB/2 {
		trmm(B, A, alpha, bits, P, 0, E, conf)
		return nil
	}

	// parallelized
	wait := make(chan int, 4)
	_, nN := blocking(0, E, conf.WB/2)
	nT := 0
	for j := 0; j < nN; j++ {
		jS := blockIndex(j, nN, conf.WB/2, E)
		jL := blockIndex(j+1, nN, conf.WB/2, E)
		task := func(q chan int) {
			trmm(B, A, alpha, bits, P, jS, jL, conf)
			q <- 1
		}
		conf.Sched.Schedule(gomas.NewTask(task, wait))
		nT += 1
	}
	for nT > 0 {
		<-wait
		nT -= 1
	}
	return nil
}
Beispiel #3
0
/*
 * General matrix-matrix multiplication.
 *
 * Computes C = beta*C + alpha*op(A)*op(B), where op is optional transpose operation
 * encoded in bits argument. Operand A is transposed if gomas.TRANSA bit is set in
 * bits. And operand B is transposed if gomas.TRANSB bit is set.
 *
 * Optional Config block defines blocking parameters for computation.
 */
func Mult(Cc, A, B *cmat.FloatMatrix, alpha, beta float64, bits int, confs ...*gomas.Config) *gomas.Error {
	conf := gomas.DefaultConf()
	if len(confs) > 0 {
		conf = confs[0]
	}

	if A.Len() == 0 || B.Len() == 0 {
		return nil
	}
	ok := true
	cr, cc := Cc.Size()
	ar, ac := A.Size()
	br, bc := B.Size()
	P := ac
	L := cc
	E := cr
	switch bits & (gomas.TRANSA | gomas.TRANSB) {
	case gomas.TRANSA | gomas.TRANSB:
		ok = cr == ac && cc == br && ar == bc
		P = ar
	case gomas.TRANSA:
		ok = cr == ac && cc == bc && ar == br
		P = ar
	case gomas.TRANSB:
		ok = cr == ar && cc == br && ac == bc
		P = ac
	default:
		ok = cr == ar && cc == bc && ac == br
	}
	if !ok {
		return gomas.NewError(gomas.ESIZE, "Mult")
	}

	// single threaded
	if conf.NProc == 1 || conf.WB <= 0 || Cc.Len() < conf.WB*conf.WB {
		gemm(Cc, A, B, alpha, beta, bits, P, 0, L, 0, E, conf)
		return nil
	}
	// parallelized
	wait := make(chan int, 4)
	nM, nN := blocking(cr, cc, conf.WB)
	nT := int64(0)

	for j := 0; j < nN; j++ {
		jS := blockIndex(j, nN, conf.WB, cc)
		jL := blockIndex(j+1, nN, conf.WB, cc)
		for i := 0; i < nM; i++ {
			iR := blockIndex(i, nM, conf.WB, cr)
			iE := blockIndex(i+1, nM, conf.WB, cr)
			task := func(q chan int) {
				gemm(Cc, A, B, alpha, beta, bits, P, jS, jL, iR, iE, conf)
				q <- 1
			}
			nT += 1
			conf.Sched.Schedule(gomas.NewTask(task, wait))
		}
	}
	// wait for subtask to complete
	for nT > 0 {
		<-wait
		nT -= 1
	}
	return nil
}
Beispiel #4
0
func UpdateTrm(Cc, A, B *cmat.FloatMatrix, alpha, beta float64, bits int, confs ...*gomas.Config) *gomas.Error {

	conf := gomas.DefaultConf()
	if len(confs) > 0 {
		conf = confs[0]
	}
	if A.Len() == 0 || B.Len() == 0 {
		return nil
	}
	ok := true
	cr, cc := Cc.Size()
	ar, ac := A.Size()
	br, bc := B.Size()
	P := ac
	L := cc
	E := cr
	switch bits & (gomas.TRANSA | gomas.TRANSB) {
	case gomas.TRANSA | gomas.TRANSB:
		ok = cr == ac && cc == br && ar == bc
		P = ar
	case gomas.TRANSA:
		ok = cr == ac && cc == bc && ar == br
		P = ar
	case gomas.TRANSB:
		ok = cr == ar && cc == br && ac == bc
	default:
		ok = cr == ar && cc == bc && ac == br
	}
	if !ok {
		return gomas.NewError(gomas.ESIZE, "UpdateTrm")
	}
	// single threaded
	if conf.NProc == 1 || conf.WB <= 0 || Cc.Len() < conf.WB*conf.WB {
		updtrm(Cc, A, B, alpha, beta, bits, P, 0, L, 0, E, conf)
		return nil
	}
	// parallelized
	wait := make(chan int, 4)
	nM, nN := blocking(cr, cc, conf.WB)
	nT := 0
	if bits&gomas.UPPER != 0 {
		// by rows; upper trapezoidial
		for j := 0; j < nM; j++ {
			iR := blockIndex(j, nM, conf.WB, cr)
			iE := blockIndex(j+1, nM, conf.WB, cr)
			task := func(q chan int) {
				updtrm(Cc, A, B, alpha, beta, bits, P, iR, L, iR, iE, conf)
				q <- 1
			}
			conf.Sched.Schedule(gomas.NewTask(task, wait))
			nT += 1
		}
	} else {
		// by columns; lower trapezoidial
		for j := 0; j < nN; j++ {
			jS := blockIndex(j, nN, conf.WB, cc)
			jL := blockIndex(j+1, nN, conf.WB, cc)
			task := func(q chan int) {
				updtrm(Cc, A, B, alpha, beta, bits, P, jS, jL, jS, E, conf)
				q <- 1
			}
			conf.Sched.Schedule(gomas.NewTask(task, wait))
			nT += 1
		}
	}
	// wait for subtasks to complete
	for nT > 0 {
		<-wait
		nT -= 1
	}
	return nil
}
Beispiel #5
0
/*
 * UpdateSym performs symmetric rank-k update C = beta*C + alpha*A*A.T or
 * C = beta*C + alpha*A.T*A if gomas.TRANS bit is set.
 */
func UpdateSym(c, a *cmat.FloatMatrix, alpha, beta float64, bits int, confs ...*gomas.Config) *gomas.Error {

	conf := gomas.DefaultConf()
	if len(confs) > 0 {
		conf = confs[0]
	}

	ok := true
	cr, cc := c.Size()
	ar, ac := a.Size()
	if cr*cc == 0 {
		return nil
	}
	P := ac
	E := cr
	if bits&gomas.TRANS != 0 && bits&gomas.TRANSA == 0 {
		bits |= gomas.TRANSA
	}
	switch {
	case bits&gomas.TRANSA != 0:
		ok = cr == cc && cr == ac
		P = ar
	default:
		ok = cr == cc && cr == ar
	}
	if !ok {
		return gomas.NewError(gomas.ESIZE, "UpdateSym")
	}
	if conf.NProc == 1 || conf.WB <= 0 || E <= conf.WB {
		syrk(c, a, alpha, beta, bits, P, 0, E, conf)
		return nil
	}

	// parallelized
	var sbits int = 0
	wait := make(chan int, 4)
	nM, nN := blocking(E, E, conf.WB)
	nT := 0
	if bits&gomas.TRANS != 0 {
		sbits |= gomas.TRANSA
	} else {
		sbits |= gomas.TRANSB
	}
	if bits&gomas.LOWER != 0 {
		sbits |= gomas.LOWER
		for j := 0; j < nN; j++ {
			jS := blockIndex(j, nN, conf.WB, E)
			jL := blockIndex(j+1, nN, conf.WB, E)
			// update lower trapezoidal/triangular blocks
			task := func(q chan int) {
				updtrm(c, a, a, alpha, beta, sbits, P, jS, jL, jS, E, conf)
				//syrk(c, a, alpha, beta, bits, P, jS, jL, conf)
				q <- 1
			}
			conf.Sched.Schedule(gomas.NewTask(task, wait))
			nT += 1
		}
	} else {
		sbits |= gomas.UPPER
		for j := 0; j < nM; j++ {
			jS := blockIndex(j, nM, conf.WB, E)
			jL := blockIndex(j+1, nM, conf.WB, E)
			// update upper trapezoidal/triangular blocks
			task := func(q chan int) {
				updtrm(c, a, a, alpha, beta, sbits, P, jS, E, jS, jL, conf)
				//syrk(c, a, alpha, beta, bits, P, jS, jL, conf)
				q <- 1
			}
			conf.Sched.Schedule(gomas.NewTask(task, wait))
			nT += 1
		}
	}
	for nT > 0 {
		<-wait
		nT -= 1
	}
	return nil
}