Golang TrmmFloat Examples, github.com/henrylee2cn/algorithm/linalg/blas.TrmmFloat Golang Examples

Example #1

0

Show file

File: decomp_test.go Project: sguzwf/algorithm

func updateBlas(t *testing.T, Y1, Y2, C1, C2, T, W *matrix.FloatMatrix) {
	if W.Rows() != C1.Cols() {
		panic("W.Rows != C1.Cols")
	}
	// W = C1.T
	ScalePlus(W, C1, 0.0, 1.0, TRANSB)
	//fmt.Printf("W = C1.T:\n%v\n", W)
	// W = C1.T*Y1
	blas.TrmmFloat(Y1, W, 1.0, linalg.OptLower, linalg.OptUnit, linalg.OptRight)
	t.Logf("W = C1.T*Y1:\n%v\n", W)
	// W = W + C2.T*Y2
	blas.GemmFloat(C2, Y2, W, 1.0, 1.0, linalg.OptTransA)
	t.Logf("W = W + C2.T*Y2:\n%v\n", W)

	// --- here: W == C.T*Y ---
	// W = W*T
	blas.TrmmFloat(T, W, 1.0, linalg.OptUpper, linalg.OptRight)
	t.Logf("W = C.T*Y*T:\n%v\n", W)

	// --- here: W == C.T*Y*T ---
	// C2 = C2 - Y2*W.T
	blas.GemmFloat(Y2, W, C2, -1, 1.0, linalg.OptTransB)
	t.Logf("C2 = C2 - Y2*W.T:\n%v\n", C2)
	//  W = Y1*W.T ==> W.T = W*Y1.T
	blas.TrmmFloat(Y1, W, 1.0, linalg.OptLower,
		linalg.OptUnit, linalg.OptRight, linalg.OptTrans)
	t.Logf("W.T = W*Y1.T:\n%v\n", W)

	// C1 = C1 - W.T
	ScalePlus(C1, W, 1.0, -1.0, TRANSB)
	//fmt.Printf("C1 = C1 - W.T:\n%v\n", C1)

	// --- here: C = (I - Y*T*Y.T).T * C ---
}

Example #2

0

Show file

File: ttrm_test.go Project: sguzwf/algorithm

func trmmTest(t *testing.T, A *matrix.FloatMatrix, flags Flags, nb int) bool {
	var B0 *matrix.FloatMatrix
	N := A.Cols()
	S := 0
	E := A.Cols()
	side := linalg.OptLeft
	if flags&RIGHT != 0 {
		B0 = matrix.FloatWithValue(2, A.Rows(), 2.0)
		side = linalg.OptRight
		E = B0.Rows()
	} else {
		B0 = matrix.FloatWithValue(A.Rows(), 2, 2.0)
		E = B0.Cols()
	}
	B1 := B0.Copy()

	trans := linalg.OptNoTrans
	if flags&TRANSA != 0 {
		trans = linalg.OptTransA
	}
	diag := linalg.OptNonUnit
	if flags&UNIT != 0 {
		diag = linalg.OptUnit
	}
	uplo := linalg.OptUpper
	if flags&LOWER != 0 {
		uplo = linalg.OptLower
	}

	blas.TrmmFloat(A, B0, 1.0, uplo, diag, trans, side)
	if A.Rows() < 8 {
		//t.Logf("..A\n%v\n", A)
		t.Logf("  BLAS B0:\n%v\n", B0)
	}

	Ar := A.FloatArray()
	Br := B1.FloatArray()
	if nb != 0 {
		DTrmmBlk(Br, Ar, 1.0, flags, B1.LeadingIndex(), A.LeadingIndex(),
			N, S, E, nb)
	} else {
		DTrmmUnblk(Br, Ar, 1.0, flags, B1.LeadingIndex(), A.LeadingIndex(),
			N, S, E, 0)
	}
	result := B0.AllClose(B1)
	t.Logf("   B0 == B1: %v\n", result)
	if A.Rows() < 8 {
		t.Logf("  DTrmm B1:\n%v\n", B1)
	}
	return result
}

Example #3

0

Show file

File: mcsdp_test.go Project: sguzwf/algorithm

func mcsdp(w *matrix.FloatMatrix) (*Solution, error) {
	//
	// Returns solution x, z to
	//
	//    (primal)  minimize    sum(x)
	//              subject to  w + diag(x) >= 0
	//
	//    (dual)    maximize    -tr(w*z)
	//              subject to  diag(z) = 1
	//                          z >= 0.
	//
	n := w.Rows()
	G := &matrixFs{n}

	cngrnc := func(r, x *matrix.FloatMatrix, alpha float64) (err error) {
		// Congruence transformation
		//
		//    x := alpha * r'*x*r.
		//
		// r and x are square matrices.
		//
		err = nil

		// tx = matrix(x, (n,n)) is copying and reshaping
		// scale diagonal of x by 1/2, (x is (n,n))
		tx := x.Copy()
		matrix.Reshape(tx, n, n)
		tx.Diag().Scale(0.5)

		// a := tril(x)*r
		// (python: a = +r is really making a copy of r)
		a := r.Copy()

		err = blas.TrmmFloat(tx, a, 1.0, linalg.OptLeft)

		// x := alpha*(a*r' + r*a')
		err = blas.Syr2kFloat(r, a, tx, alpha, 0.0, linalg.OptTrans)

		// x[:] = tx[:]
		tx.CopyTo(x)
		return
	}

	Fkkt := func(W *sets.FloatMatrixSet) (KKTFunc, error) {

		//    Solve
		//                  -diag(z)                           = bx
		//        -diag(x) - inv(rti*rti') * z * inv(rti*rti') = bs
		//
		//    On entry, x and z contain bx and bs.
		//    On exit, they contain the solution, with z scaled
		//    (inv(rti)'*z*inv(rti) is returned instead of z).
		//
		//    We first solve
		//
		//        ((rti*rti') .* (rti*rti')) * x = bx - diag(t*bs*t)
		//
		//    and take z  = -rti' * (diag(x) + bs) * rti.

		var err error = nil
		rti := W.At("rti")[0]

		// t = rti*rti' as a nonsymmetric matrix.
		t := matrix.FloatZeros(n, n)
		err = blas.GemmFloat(rti, rti, t, 1.0, 0.0, linalg.OptTransB)
		if err != nil {
			return nil, err
		}

		// Cholesky factorization of tsq = t.*t.
		tsq := matrix.Mul(t, t)
		err = lapack.Potrf(tsq)
		if err != nil {
			return nil, err
		}

		f := func(x, y, z *matrix.FloatMatrix) (err error) {
			// tbst := t * zs * t = t * bs * t
			tbst := z.Copy()
			matrix.Reshape(tbst, n, n)
			cngrnc(t, tbst, 1.0)

			// x := x - diag(tbst) = bx - diag(rti*rti' * bs * rti*rti')
			diag := tbst.Diag().Transpose()
			x.Minus(diag)

			// x := (t.*t)^{-1} * x = (t.*t)^{-1} * (bx - diag(t*bs*t))
			err = lapack.Potrs(tsq, x)

			// z := z + diag(x) = bs + diag(x)
			// z, x are really column vectors here
			z.AddIndexes(matrix.MakeIndexSet(0, n*n, n+1), x.FloatArray())

			// z := -rti' * z * rti = -rti' * (diag(x) + bs) * rti
			cngrnc(rti, z, -1.0)
			return nil
		}
		return f, nil
	}

	c := matrix.FloatWithValue(n, 1, 1.0)

	// initial feasible x: x = 1.0 - min(lmbda(w))
	lmbda := matrix.FloatZeros(n, 1)
	wp := w.Copy()
	lapack.Syevx(wp, lmbda, nil, 0.0, nil, []int{1, 1}, linalg.OptRangeInt)
	x0 := matrix.FloatZeros(n, 1).Add(-lmbda.GetAt(0, 0) + 1.0)
	s0 := w.Copy()
	s0.Diag().Plus(x0.Transpose())
	matrix.Reshape(s0, n*n, 1)

	// initial feasible z is identity
	z0 := matrix.FloatIdentity(n)
	matrix.Reshape(z0, n*n, 1)

	dims := sets.DSetNew("l", "q", "s")
	dims.Set("s", []int{n})

	primalstart := sets.FloatSetNew("x", "s")
	dualstart := sets.FloatSetNew("z")
	primalstart.Set("x", x0)
	primalstart.Set("s", s0)
	dualstart.Set("z", z0)

	var solopts SolverOptions
	solopts.MaxIter = 30
	solopts.ShowProgress = false
	h := w.Copy()
	matrix.Reshape(h, h.NumElements(), 1)
	return ConeLpCustomMatrix(c, G, h, nil, nil, dims, Fkkt, &solopts, primalstart, dualstart)
}

Example #4

0

Show file

File: misc.go Project: sguzwf/algorithm

/*
   Applies Nesterov-Todd scaling or its inverse.

   Computes

        x := W*x        (trans is false 'N', inverse = false 'N')
        x := W^T*x      (trans is true  'T', inverse = false 'N')
        x := W^{-1}*x   (trans is false 'N', inverse = true  'T')
        x := W^{-T}*x   (trans is true  'T', inverse = true  'T').

   x is a dense float matrix.

   W is a MatrixSet with entries:

   - W['dnl']: positive vector
   - W['dnli']: componentwise inverse of W['dnl']
   - W['d']: positive vector
   - W['di']: componentwise inverse of W['d']
   - W['v']: lists of 2nd order cone vectors with unit hyperbolic norms
   - W['beta']: list of positive numbers
   - W['r']: list of square matrices
   - W['rti']: list of square matrices.  rti[k] is the inverse transpose
     of r[k].

   The 'dnl' and 'dnli' entries are optional, and only present when the
   function is called from the nonlinear solver.
*/
func scale(x *matrix.FloatMatrix, W *sets.FloatMatrixSet, trans, inverse bool) (err error) {
	/*DEBUGGED*/
	var wl []*matrix.FloatMatrix
	var w *matrix.FloatMatrix
	ind := 0
	err = nil

	// var minor int = 0
	//if ! checkpnt.MinorEmpty() {
	//	minor = checkpnt.MinorTop()
	//}

	//fmt.Printf("\n%d.%04d scaling x=\n%v\n", checkpnt.Major(), minor, x.ToString("%.17f"))

	// Scaling for nonlinear component xk is xk := dnl .* xk; inverse
	// scaling is xk ./ dnl = dnli .* xk, where dnl = W['dnl'],
	// dnli = W['dnli'].

	if wl = W.At("dnl"); wl != nil {
		if inverse {
			w = W.At("dnli")[0]
		} else {
			w = W.At("dnl")[0]
		}
		for k := 0; k < x.Cols(); k++ {
			err = blas.TbmvFloat(w, x, &la_.IOpt{"n", w.Rows()}, &la_.IOpt{"k", 0},
				&la_.IOpt{"lda", 1}, &la_.IOpt{"offsetx", k * x.Rows()})
			if err != nil {
				//fmt.Printf("1. TbmvFloat: %v\n", err)
				return
			}
		}
		ind += w.Rows()
	}

	//if ! checkpnt.MinorEmpty() {
	//    checkpnt.Check("000scale", minor)
	//}

	// Scaling for linear 'l' component xk is xk := d .* xk; inverse
	// scaling is xk ./ d = di .* xk, where d = W['d'], di = W['di'].

	if inverse {
		w = W.At("di")[0]
	} else {
		w = W.At("d")[0]
	}

	for k := 0; k < x.Cols(); k++ {
		err = blas.TbmvFloat(w, x, &la_.IOpt{"n", w.Rows()}, &la_.IOpt{"k", 0},
			&la_.IOpt{"lda", 1}, &la_.IOpt{"offsetx", k*x.Rows() + ind})
		if err != nil {
			//fmt.Printf("2. TbmvFloat: %v\n", err)
			return
		}
	}
	ind += w.Rows()

	//if ! checkpnt.MinorEmpty() {
	//	checkpnt.Check("010scale", minor)
	//}

	// Scaling for 'q' component is
	//
	//    xk := beta * (2*v*v' - J) * xk
	//        = beta * (2*v*(xk'*v)' - J*xk)
	//
	// where beta = W['beta'][k], v = W['v'][k], J = [1, 0; 0, -I].
	//
	//Inverse scaling is
	//
	//    xk := 1/beta * (2*J*v*v'*J - J) * xk
	//        = 1/beta * (-J) * (2*v*((-J*xk)'*v)' + xk).
	//wf := matrix.FloatZeros(x.Cols(), 1)
	w = matrix.FloatZeros(x.Cols(), 1)
	for k, v := range W.At("v") {
		m := v.Rows()
		if inverse {
			blas.ScalFloat(x, -1.0, &la_.IOpt{"offset", ind}, &la_.IOpt{"inc", x.Rows()})
		}
		err = blas.GemvFloat(x, v, w, 1.0, 0.0, la_.OptTrans, &la_.IOpt{"m", m},
			&la_.IOpt{"n", x.Cols()}, &la_.IOpt{"offsetA", ind},
			&la_.IOpt{"lda", x.Rows()})
		if err != nil {
			//fmt.Printf("3. GemvFloat: %v\n", err)
			return
		}

		err = blas.ScalFloat(x, -1.0, &la_.IOpt{"offset", ind}, &la_.IOpt{"inc", x.Rows()})
		if err != nil {
			return
		}

		err = blas.GerFloat(v, w, x, 2.0, &la_.IOpt{"m", m},
			&la_.IOpt{"n", x.Cols()}, &la_.IOpt{"lda", x.Rows()},
			&la_.IOpt{"offsetA", ind})
		if err != nil {
			//fmt.Printf("4. GerFloat: %v\n", err)
			return
		}

		var a float64
		if inverse {
			blas.ScalFloat(x, -1.0,
				&la_.IOpt{"offset", ind}, &la_.IOpt{"inc", x.Rows()})
			// a[i,j] := 1.0/W[i,j]
			a = 1.0 / W.At("beta")[0].GetIndex(k)
		} else {
			a = W.At("beta")[0].GetIndex(k)
		}
		for i := 0; i < x.Cols(); i++ {
			blas.ScalFloat(x, a, &la_.IOpt{"n", m}, &la_.IOpt{"offset", ind + i*x.Rows()})
		}
		ind += m
	}

	//if ! checkpnt.MinorEmpty() {
	//	checkpnt.Check("020scale", minor)
	//}

	// Scaling for 's' component xk is
	//
	//     xk := vec( r' * mat(xk) * r )  if trans = 'N'
	//     xk := vec( r * mat(xk) * r' )  if trans = 'T'.
	//
	// r is kth element of W['r'].
	//
	// Inverse scaling is
	//
	//     xk := vec( rti * mat(xk) * rti' )  if trans = 'N'
	//     xk := vec( rti' * mat(xk) * rti )  if trans = 'T'.
	//
	// rti is kth element of W['rti'].
	maxn := 0
	for _, r := range W.At("r") {
		if r.Rows() > maxn {
			maxn = r.Rows()
		}
	}
	a := matrix.FloatZeros(maxn, maxn)
	for k, v := range W.At("r") {
		t := trans
		var r *matrix.FloatMatrix
		if !inverse {
			r = v
			t = !trans
		} else {
			r = W.At("rti")[k]
		}

		n := r.Rows()
		for i := 0; i < x.Cols(); i++ {
			// scale diagonal of xk by 0.5
			blas.ScalFloat(x, 0.5, &la_.IOpt{"offset", ind + i*x.Rows()},
				&la_.IOpt{"inc", n + 1}, &la_.IOpt{"n", n})

			// a = r*tril(x) (t is 'N') or a = tril(x)*r  (t is 'T')
			blas.Copy(r, a)
			if !t {
				err = blas.TrmmFloat(x, a, 1.0, la_.OptRight, &la_.IOpt{"m", n},
					&la_.IOpt{"n", n}, &la_.IOpt{"lda", n}, &la_.IOpt{"ldb", n},
					&la_.IOpt{"offsetA", ind + i*x.Rows()})
				if err != nil {
					//fmt.Printf("5. TrmmFloat: %v\n", err)
					return
				}

				// x := (r*a' + a*r')  if t is 'N'
				err = blas.Syr2kFloat(r, a, x, 1.0, 0.0, la_.OptNoTrans, &la_.IOpt{"n", n},
					&la_.IOpt{"k", n}, &la_.IOpt{"ldb", n}, &la_.IOpt{"ldc", n},
					&la_.IOpt{"offsetC", ind + i*x.Rows()})
				if err != nil {
					//fmt.Printf("6. Syr2kFloat: %v\n", err)
					return
				}

			} else {
				err = blas.TrmmFloat(x, a, 1.0, la_.OptLeft, &la_.IOpt{"m", n},
					&la_.IOpt{"n", n}, &la_.IOpt{"lda", n}, &la_.IOpt{"ldb", n},
					&la_.IOpt{"offsetA", ind + i*x.Rows()})
				if err != nil {
					//fmt.Printf("7. TrmmFloat: %v\n", err)
					return
				}

				// x := (r'*a + a'*r)  if t is 'T'
				err = blas.Syr2kFloat(r, a, x, 1.0, 0.0, la_.OptTrans, &la_.IOpt{"n", n},
					&la_.IOpt{"k", n}, &la_.IOpt{"ldb", n}, &la_.IOpt{"ldc", n},
					&la_.IOpt{"offsetC", ind + i*x.Rows()})
				if err != nil {
					//fmt.Printf("8. Syr2kFloat: %v\n", err)
					return
				}
			}
		}
		ind += n * n
	}
	//if ! checkpnt.MinorEmpty() {
	//	checkpnt.Check("030scale", minor)
	//}
	return
}

Example #5

0

Show file

File: misc.go Project: sguzwf/algorithm

/*
   Returns the Nesterov-Todd scaling W at points s and z, and stores the
   scaled variable in lmbda.

       W * z = W^{-T} * s = lmbda.

   W is a MatrixSet with entries:

   - W['dnl']: positive vector
   - W['dnli']: componentwise inverse of W['dnl']
   - W['d']: positive vector
   - W['di']: componentwise inverse of W['d']
   - W['v']: lists of 2nd order cone vectors with unit hyperbolic norms
   - W['beta']: list of positive numbers
   - W['r']: list of square matrices
   - W['rti']: list of square matrices.  rti[k] is the inverse transpose
     of r[k].

*/
func computeScaling(s, z, lmbda *matrix.FloatMatrix, dims *sets.DimensionSet, mnl int) (W *sets.FloatMatrixSet, err error) {
	/*DEBUGGED*/
	err = nil
	W = sets.NewFloatSet("dnl", "dnli", "d", "di", "v", "beta", "r", "rti")

	// For the nonlinear block:
	//
	//     W['dnl'] = sqrt( s[:mnl] ./ z[:mnl] )
	//     W['dnli'] = sqrt( z[:mnl] ./ s[:mnl] )
	//     lambda[:mnl] = sqrt( s[:mnl] .* z[:mnl] )

	var stmp, ztmp, lmd *matrix.FloatMatrix
	if mnl > 0 {
		stmp = matrix.FloatVector(s.FloatArray()[:mnl])
		ztmp = matrix.FloatVector(z.FloatArray()[:mnl])
		//dnl := stmp.Div(ztmp)
		//dnl.Apply(dnl, math.Sqrt)
		dnl := matrix.Sqrt(matrix.Div(stmp, ztmp))
		//dnli := dnl.Copy()
		//dnli.Apply(dnli, func(a float64)float64 { return 1.0/a })
		dnli := matrix.Inv(dnl)
		W.Set("dnl", dnl)
		W.Set("dnli", dnli)
		//lmd = stmp.Mul(ztmp)
		//lmd.Apply(lmd, math.Sqrt)
		lmd = matrix.Sqrt(matrix.Mul(stmp, ztmp))
		lmbda.SetIndexesFromArray(lmd.FloatArray(), matrix.MakeIndexSet(0, mnl, 1)...)
	} else {
		// set for empty matrices
		//W.Set("dnl", matrix.FloatZeros(0, 1))
		//W.Set("dnli", matrix.FloatZeros(0, 1))
		mnl = 0
	}

	// For the 'l' block:
	//
	//     W['d'] = sqrt( sk ./ zk )
	//     W['di'] = sqrt( zk ./ sk )
	//     lambdak = sqrt( sk .* zk )
	//
	// where sk and zk are the first dims['l'] entries of s and z.
	// lambda_k is stored in the first dims['l'] positions of lmbda.

	m := dims.At("l")[0]
	//td := s.FloatArray()
	stmp = matrix.FloatVector(s.FloatArray()[mnl : mnl+m])
	//zd := z.FloatArray()
	ztmp = matrix.FloatVector(z.FloatArray()[mnl : mnl+m])
	//fmt.Printf(".Sqrt()=\n%v\n", matrix.Div(stmp, ztmp).Sqrt().ToString("%.17f"))
	//d := stmp.Div(ztmp)
	//d.Apply(d, math.Sqrt)
	d := matrix.Div(stmp, ztmp).Sqrt()
	//di := d.Copy()
	//di.Apply(di, func(a float64)float64 { return 1.0/a })
	di := matrix.Inv(d)
	//fmt.Printf("d:\n%v\n", d)
	//fmt.Printf("di:\n%v\n", di)
	W.Set("d", d)
	W.Set("di", di)
	//lmd = stmp.Mul(ztmp)
	//lmd.Apply(lmd, math.Sqrt)
	lmd = matrix.Mul(stmp, ztmp).Sqrt()
	// lmd has indexes mnl:mnl+m and length of m
	lmbda.SetIndexesFromArray(lmd.FloatArray(), matrix.MakeIndexSet(mnl, mnl+m, 1)...)
	//fmt.Printf("after l:\n%v\n", lmbda)

	/*
	   For the 'q' blocks, compute lists 'v', 'beta'.

	   The vector v[k] has unit hyperbolic norm:

	       (sqrt( v[k]' * J * v[k] ) = 1 with J = [1, 0; 0, -I]).

	   beta[k] is a positive scalar.

	   The hyperbolic Householder matrix H = 2*v[k]*v[k]' - J
	   defined by v[k] satisfies

	       (beta[k] * H) * zk  = (beta[k] * H) \ sk = lambda_k

	   where sk = s[indq[k]:indq[k+1]], zk = z[indq[k]:indq[k+1]].

	   lambda_k is stored in lmbda[indq[k]:indq[k+1]].
	*/
	ind := mnl + dims.At("l")[0]
	var beta *matrix.FloatMatrix

	for _, k := range dims.At("q") {
		W.Append("v", matrix.FloatZeros(k, 1))
	}
	beta = matrix.FloatZeros(len(dims.At("q")), 1)
	W.Set("beta", beta)
	vset := W.At("v")
	for k, m := range dims.At("q") {
		v := vset[k]
		// a = sqrt( sk' * J * sk )  where J = [1, 0; 0, -I]
		aa := jnrm2(s, m, ind)
		// b = sqrt( zk' * J * zk )
		bb := jnrm2(z, m, ind)
		// beta[k] = ( a / b )**1/2
		beta.SetIndex(k, math.Sqrt(aa/bb))
		// c = sqrt( (sk/a)' * (zk/b) + 1 ) / sqrt(2)
		c0 := blas.DotFloat(s, z, &la_.IOpt{"n", m},
			&la_.IOpt{"offsetx", ind}, &la_.IOpt{"offsety", ind})
		cc := math.Sqrt((c0/aa/bb + 1.0) / 2.0)

		// vk = 1/(2*c) * ( (sk/a) + J * (zk/b) )
		blas.CopyFloat(z, v, &la_.IOpt{"offsetx", ind}, &la_.IOpt{"n", m})
		blas.ScalFloat(v, -1.0/bb)
		v.SetIndex(0, -1.0*v.GetIndex(0))
		blas.AxpyFloat(s, v, 1.0/aa, &la_.IOpt{"offsetx", ind}, &la_.IOpt{"n", m})
		blas.ScalFloat(v, 1.0/2.0/cc)

		// v[k] = 1/sqrt(2*(vk0 + 1)) * ( vk + e ),  e = [1; 0]
		v.SetIndex(0, v.GetIndex(0)+1.0)
		blas.ScalFloat(v, (1.0 / math.Sqrt(2.0*v.GetIndex(0))))
		/*
		   To get the scaled variable lambda_k

		       d =  sk0/a + zk0/b + 2*c
		       lambda_k = [ c;
		                    (c + zk0/b)/d * sk1/a + (c + sk0/a)/d * zk1/b ]
		       lambda_k *= sqrt(a * b)
		*/
		lmbda.SetIndex(ind, cc)
		dd := 2*cc + s.GetIndex(ind)/aa + z.GetIndex(ind)/bb
		blas.CopyFloat(s, lmbda, &la_.IOpt{"offsetx", ind + 1}, &la_.IOpt{"offsety", ind + 1},
			&la_.IOpt{"n", m - 1})
		zz := (cc + z.GetIndex(ind)/bb) / dd / aa
		ss := (cc + s.GetIndex(ind)/aa) / dd / bb
		blas.ScalFloat(lmbda, zz, &la_.IOpt{"offset", ind + 1}, &la_.IOpt{"n", m - 1})
		blas.AxpyFloat(z, lmbda, ss, &la_.IOpt{"offsetx", ind + 1},
			&la_.IOpt{"offsety", ind + 1}, &la_.IOpt{"n", m - 1})
		blas.ScalFloat(lmbda, math.Sqrt(aa*bb), &la_.IOpt{"offset", ind}, &la_.IOpt{"n", m})

		ind += m
		//fmt.Printf("after q[%d]:\n%v\n", k, lmbda)
	}
	/*
	   For the 's' blocks: compute two lists 'r' and 'rti'.

	       r[k]' * sk^{-1} * r[k] = diag(lambda_k)^{-1}
	       r[k]' * zk * r[k] = diag(lambda_k)

	   where sk and zk are the entries inds[k] : inds[k+1] of
	   s and z, reshaped into symmetric matrices.

	   rti[k] is the inverse of r[k]', so

	       rti[k]' * sk * rti[k] = diag(lambda_k)^{-1}
	       rti[k]' * zk^{-1} * rti[k] = diag(lambda_k).

	   The vectors lambda_k are stored in

	       lmbda[ dims['l'] + sum(dims['q']) : -1 ]
	*/
	for _, k := range dims.At("s") {
		W.Append("r", matrix.FloatZeros(k, k))
		W.Append("rti", matrix.FloatZeros(k, k))
	}
	maxs := maxdim(dims.At("s"))
	work := matrix.FloatZeros(maxs*maxs, 1)
	Ls := matrix.FloatZeros(maxs*maxs, 1)
	Lz := matrix.FloatZeros(maxs*maxs, 1)
	ind2 := ind
	for k, m := range dims.At("s") {
		r := W.At("r")[k]
		rti := W.At("rti")[k]

		// Factor sk = Ls*Ls'; store Ls in ds[inds[k]:inds[k+1]].
		blas.CopyFloat(s, Ls, &la_.IOpt{"offsetx", ind2}, &la_.IOpt{"n", m * m})
		lapack.PotrfFloat(Ls, &la_.IOpt{"n", m}, &la_.IOpt{"lda", m})

		// Factor zs[k] = Lz*Lz'; store Lz in dz[inds[k]:inds[k+1]].
		blas.CopyFloat(z, Lz, &la_.IOpt{"offsetx", ind2}, &la_.IOpt{"n", m * m})
		lapack.PotrfFloat(Lz, &la_.IOpt{"n", m}, &la_.IOpt{"lda", m})

		// SVD Lz'*Ls = U*diag(lambda_k)*V'.  Keep U in work.
		for i := 0; i < m; i++ {
			blas.ScalFloat(Ls, 0.0, &la_.IOpt{"offset", i * m}, &la_.IOpt{"n", i})
		}
		blas.CopyFloat(Ls, work, &la_.IOpt{"n", m * m})
		blas.TrmmFloat(Lz, work, 1.0, la_.OptTransA, &la_.IOpt{"lda", m}, &la_.IOpt{"ldb", m},
			&la_.IOpt{"n", m}, &la_.IOpt{"m", m})
		lapack.GesvdFloat(work, lmbda, nil, nil,
			la_.OptJobuO, &la_.IOpt{"lda", m}, &la_.IOpt{"offsetS", ind},
			&la_.IOpt{"n", m}, &la_.IOpt{"m", m})

		// r = Lz^{-T} * U
		blas.CopyFloat(work, r, &la_.IOpt{"n", m * m})
		blas.TrsmFloat(Lz, r, 1.0, la_.OptTransA,
			&la_.IOpt{"lda", m}, &la_.IOpt{"n", m}, &la_.IOpt{"m", m})

		// rti = Lz * U
		blas.CopyFloat(work, rti, &la_.IOpt{"n", m * m})
		blas.TrmmFloat(Lz, rti, 1.0,
			&la_.IOpt{"lda", m}, &la_.IOpt{"n", m}, &la_.IOpt{"m", m})

		// r := r * diag(sqrt(lambda_k))
		// rti := rti * diag(1 ./ sqrt(lambda_k))
		for i := 0; i < m; i++ {
			a := math.Sqrt(lmbda.GetIndex(ind + i))
			blas.ScalFloat(r, a, &la_.IOpt{"offset", m * i}, &la_.IOpt{"n", m})
			blas.ScalFloat(rti, 1.0/a, &la_.IOpt{"offset", m * i}, &la_.IOpt{"n", m})
		}
		ind += m
		ind2 += m * m
	}
	return
}