Example #1
// Inner product of two vectors in S.
func sdot(x, y *matrix.FloatMatrix, dims *DimensionSet, mnl int) float64 {
	ind := mnl + dims.At("l")[0] + dims.Sum("q")
	a := blas.DotFloat(x, y, &la_.IOpt{"n", ind})
	for _, m := range dims.At("s") {
		a += blas.DotFloat(x, y, &la_.IOpt{"offsetx", ind}, &la_.IOpt{"offsety", ind},
			&la_.IOpt{"incx", m + 1}, &la_.IOpt{"incy", m + 1}, &la_.IOpt{"n", m})
		for j := 1; j < m; j++ {
			a += 2.0 * blas.DotFloat(x, y, &la_.IOpt{"offsetx", ind + j}, &la_.IOpt{"offsety", ind + j},
				&la_.IOpt{"incx", m + 1}, &la_.IOpt{"incy", m + 1}, &la_.IOpt{"n", m - j})
		ind += m * m
	return a
Example #2
   Returns x' * J * y, where J = [1, 0; 0, -I].
func jdot(x, y *matrix.FloatMatrix, n, offsetx, offsety int) float64 {
	if n <= 0 {
		n = x.NumElements()
	a := blas.DotFloat(x, y, &la_.IOpt{"n", n - 1}, &la_.IOpt{"offsetx", offsetx + 1},
		&la_.IOpt{"offsety", offsety + 1})
	return x.GetIndex(offsetx)*y.GetIndex(offsety) - a
Example #3
func sinv(x, y *matrix.FloatMatrix, dims *DimensionSet, mnl int) (err error) {

	err = nil

	// For the nonlinear and 'l' blocks:
	//     yk o\ xk = yk .\ xk.

	ind := mnl + dims.At("l")[0]
	blas.Tbsv(y, x, &la_.IOpt{"n", ind}, &la_.IOpt{"k", 0}, &la_.IOpt{"ldA", 1})

	// For the 'q' blocks:
	//                        [ l0   -l1'              ]
	//     yk o\ xk = 1/a^2 * [                        ] * xk
	//                        [ -l1  (a*I + l1*l1')/l0 ]
	// where yk = (l0, l1) and a = l0^2 - l1'*l1.

	for _, m := range dims.At("q") {
		aa := blas.Nrm2Float(y, &la_.IOpt{"n", m - 1}, &la_.IOpt{"offset", ind + 1})
		ee := y.GetIndex(ind)
		aa = (ee + aa) * (ee - aa)
		cc := x.GetIndex(ind)
		dd := blas.DotFloat(x, y, &la_.IOpt{"n", m - 1}, &la_.IOpt{"offsetx", ind + 1},
			&la_.IOpt{"offsety", ind + 1})
		x.SetIndex(ind, cc*ee-dd)
		blas.ScalFloat(x, aa/ee, &la_.IOpt{"n", m - 1}, &la_.IOpt{"offset", ind + 1})
		blas.AxpyFloat(y, x, dd/ee-cc, &la_.IOpt{"n", m - 1},
			&la_.IOpt{"offsetx", ind + 1}, &la_.IOpt{"offsety", ind + 1})
		blas.ScalFloat(x, 1.0/aa, &la_.IOpt{"n", m}, &la_.IOpt{"offset", ind})
		ind += m

	// For the 's' blocks:
	//     yk o\ xk =  xk ./ gamma
	// where gammaij = .5 * (yk_i + yk_j).

	ind2 := ind
	for _, m := range dims.At("s") {
		for j := 0; j < m; j++ {
			u := matrix.FloatVector(y.FloatArray()[ind2+j : ind2+m])
			u.Add(y.GetIndex(ind2 + j))
			blas.Tbsv(u, x, &la_.IOpt{"n", m - j}, &la_.IOpt{"k", 0}, &la_.IOpt{"lda", 1},
				&la_.IOpt{"offsetx", ind + j*(m+1)})
		ind += m * m
		ind2 += m
Example #4
func main() {

	Sdata := [][]float64{
		[]float64{ 4e-2,  6e-3, -4e-3,   0.0 },
        []float64{ 6e-3,  1e-2,  0.0,    0.0 },
        []float64{-4e-3,  0.0,   2.5e-3, 0.0 },
        []float64{ 0.0,   0.0,   0.0,    0.0 }}

	pbar := matrix.FloatVector([]float64{.12, .10, .07, .03})
	S := matrix.FloatMatrixStacked(Sdata)
	n := pbar.Rows()
	G := matrix.FloatDiagonal(n, -1.0)
	h := matrix.FloatZeros(n, 1)
	A := matrix.FloatWithValue(1, n, 1.0)
	b := matrix.FloatNew(1,1, []float64{1.0})

	var solopts cvx.SolverOptions
	solopts.MaxIter = 30
	solopts.ShowProgress = true

	mu := 1.0
	Smu := S.Copy().Scale(mu)
	pbarNeg := pbar.Copy().Scale(-1.0)
	fmt.Printf("Smu=\n%v\n", Smu.String())
	fmt.Printf("-pbar=\n%v\n", pbarNeg.String())

	sol, err := cvx.Qp(Smu, pbarNeg, G, h, A, b, &solopts, nil)

	fmt.Printf("status: %v\n", err)
	if sol != nil && sol.Status == cvx.Optimal {
		x := sol.Result.At("x")[0]
		ret := blas.DotFloat(x, pbar)
		risk := math.Sqrt(blas.DotFloat(x, S.Times(x)))
		fmt.Printf("ret=%.3f, risk=%.3f\n", ret, risk)
		fmt.Printf("x=\n%v\n", x)
Example #5
//    Solves a pair of primal and dual cone programs
//        minimize    c'*x
//        subject to  G*x + s = h
//                    A*x = b
//                    s >= 0
//        maximize    -h'*z - b'*y
//        subject to  G'*z + A'*y + c = 0
//                    z >= 0.
//    The inequalities are with respect to a cone C defined as the Cartesian
//    product of N + M + 1 cones:
//        C = C_0 x C_1 x .... x C_N x C_{N+1} x ... x C_{N+M}.
//    The first cone C_0 is the nonnegative orthant of dimension ml.
//    The next N cones are second order cones of dimension mq[0], ...,
//    mq[N-1].  The second order cone of dimension m is defined as
//        { (u0, u1) in R x R^{m-1} | u0 >= ||u1||_2 }.
//    The next M cones are positive semidefinite cones of order ms[0], ...,
//    ms[M-1] >= 0.
func ConeLp(c, G, h, A, b *matrix.FloatMatrix, dims *DimensionSet, solopts *SolverOptions, primalstart, dualstart *FloatMatrixSet) (sol *Solution, err error) {

	err = nil
	const EXPON = 3
	const STEP = 0.99

	sol = &Solution{Unknown,
		nil, nil, nil, nil, nil,
		0.0, 0.0, 0.0, 0.0, 0.0,
		0.0, 0.0, 0.0, 0.0, 0.0, 0}

	//var primalstart *FloatMatrixSet = nil
	//var dualstart *FloatMatrixSet = nil
	var refinement int

	if solopts.Refinement > 0 {
		refinement = solopts.Refinement
	} else {
		refinement = 0
		if len(dims.At("q")) > 0 || len(dims.At("s")) > 0 {
			refinement = 1
	feasTolerance := FEASTOL
	absTolerance := ABSTOL
	relTolerance := RELTOL
	if solopts.FeasTol > 0.0 {
		feasTolerance = solopts.FeasTol
	if solopts.AbsTol > 0.0 {
		absTolerance = solopts.AbsTol
	if solopts.RelTol > 0.0 {
		relTolerance = solopts.RelTol

	solvername := solopts.KKTSolverName
	if len(solvername) == 0 {
		if dims != nil && (len(dims.At("q")) > 0 || len(dims.At("s")) > 0) {
			solvername = "qr"
		} else {
			solvername = "chol2"

	if c == nil || c.Cols() > 1 {
		err = errors.New("'c' must be matrix with 1 column")
	if h == nil || h.Cols() > 1 {
		err = errors.New("'h' must be matrix with 1 column")

	if dims == nil {
		dims = DSetNew("l", "q", "s")
		dims.Set("l", []int{h.Rows()})
	if err = checkConeLpDimensions(dims); err != nil {

	cdim := dims.Sum("l", "q") + dims.SumSquared("s")
	cdim_diag := dims.Sum("l", "q", "s")

	if h.Rows() != cdim {
		err = errors.New(fmt.Sprintf("'h' must be float matrix of size (%d,1)", cdim))

	// Data for kth 'q' constraint are found in rows indq[k]:indq[k+1] of G.
	indq := make([]int, 0, 100)
	indq = append(indq, dims.At("l")[0])
	for _, k := range dims.At("q") {
		indq = append(indq, indq[len(indq)-1]+k)

	// Data for kth 's' constraint are found in rows inds[k]:inds[k+1] of G.
	inds := make([]int, 0, 100)
	inds = append(inds, indq[len(indq)-1])
	for _, k := range dims.At("s") {
		inds = append(inds, inds[len(inds)-1]+k*k)

	if G != nil && !G.SizeMatch(cdim, c.Rows()) {
		estr := fmt.Sprintf("'G' must be of size (%d,%d)", cdim, c.Rows())
		err = errors.New(estr)
	Gf := func(x, y *matrix.FloatMatrix, alpha, beta float64, opts ...la.Option) error {
		return sgemv(G, x, y, alpha, beta, dims, opts...)

	// Check A and set defaults if it is nil
	if A == nil {
		// zeros rows reduces Gemv to vector products
		A = matrix.FloatZeros(0, c.Rows())
	if A.Cols() != c.Rows() {
		estr := fmt.Sprintf("'A' must have %d columns", c.Rows())
		err = errors.New(estr)

	Af := func(x, y *matrix.FloatMatrix, alpha, beta float64, opts ...la.Option) error {
		return blas.GemvFloat(A, x, y, alpha, beta, opts...)

	// Check b and set defaults if it is nil
	if b == nil {
		b = matrix.FloatZeros(0, 1)
	if b.Cols() != 1 {
		estr := fmt.Sprintf("'b' must be a matrix with 1 column")
		err = errors.New(estr)
	if b.Rows() != A.Rows() {
		estr := fmt.Sprintf("'b' must have length %d", A.Rows())
		err = errors.New(estr)

	// kktsolver(W) returns a routine for solving 3x3 block KKT system
	//     [ 0   A'  G'*W^{-1} ] [ ux ]   [ bx ]
	//     [ A   0   0         ] [ uy ] = [ by ].
	//     [ G   0   -W'       ] [ uz ]   [ bz ]
	var factor kktFactor
	var kktsolver kktFactor = nil
	if kktfunc, ok := solvers[solvername]; ok {
		// kkt function returns us problem spesific factor function.
		factor, err = kktfunc(G, dims, A, 0)
		// solver is
		kktsolver = func(W *FloatMatrixSet, H, Df *matrix.FloatMatrix) (kktFunc, error) {
			return factor(W, nil, nil)
	} else {
		err = errors.New(fmt.Sprintf("solver '%s' not known", solvername))

	// res() evaluates residual in 5x5 block KKT system
	//     [ vx   ]    [ 0         ]   [ 0   A'  G'  c ] [ ux        ]
	//     [ vy   ]    [ 0         ]   [-A   0   0   b ] [ uy        ]
	//     [ vz   ] += [ W'*us     ] - [-G   0   0   h ] [ W^{-1}*uz ]
	//     [ vtau ]    [ dg*ukappa ]   [-c' -b' -h'  0 ] [ utau/dg   ]
	//           vs += lmbda o (dz + ds)
	//       vkappa += lmbdg * (dtau + dkappa).
	ws3 := matrix.FloatZeros(cdim, 1)
	wz3 := matrix.FloatZeros(cdim, 1)

	res := func(ux, uy, uz, utau, us, ukappa, vx, vy, vz, vtau, vs, vkappa *matrix.FloatMatrix, W *FloatMatrixSet, dg float64, lmbda *matrix.FloatMatrix) (err error) {

		err = nil
		// vx := vx - A'*uy - G'*W^{-1}*uz - c*utau/dg
		Af(uy, vx, -1.0, 1.0, la.OptTrans)
		//fmt.Printf("post-Af vx=\n%v\n", vx)
		blas.Copy(uz, wz3)
		scale(wz3, W, false, true)
		Gf(wz3, vx, -1.0, 1.0, la.OptTrans)
		blas.AxpyFloat(c, vx, -utau.Float()/dg)

		// vy := vy + A*ux - b*utau/dg
		Af(ux, vy, 1.0, 1.0)
		blas.AxpyFloat(b, vy, -utau.Float()/dg)

		// vz := vz + G*ux - h*utau/dg + W'*us
		Gf(ux, vz, 1.0, 1.0)
		blas.AxpyFloat(h, vz, -utau.Float()/dg)
		blas.Copy(us, ws3)
		scale(ws3, W, true, false)
		blas.AxpyFloat(ws3, vz, 1.0)

		// vtau := vtau + c'*ux + b'*uy + h'*W^{-1}*uz + dg*ukappa
		var vtauplus float64 = dg*ukappa.Float() + blas.DotFloat(c, ux) +
			blas.DotFloat(b, uy) + sdot(h, wz3, dims, 0)
		vtau.SetValue(vtau.Float() + vtauplus)

		// vs := vs + lmbda o (uz + us)
		blas.Copy(us, ws3)
		blas.AxpyFloat(uz, ws3, 1.0)
		sprod(ws3, lmbda, dims, 0, &la.SOpt{"diag", "D"})
		blas.AxpyFloat(ws3, vs, 1.0)

		// vkappa += vkappa + lmbdag * (utau + ukappa)
		lscale := lmbda.GetIndex(lmbda.NumElements() - 1)
		var vkplus float64 = lscale * (utau.Float() + ukappa.Float())
		vkappa.SetValue(vkappa.Float() + vkplus)

	resx0 := math.Max(1.0, math.Sqrt(blas.DotFloat(c, c)))
	resy0 := math.Max(1.0, math.Sqrt(blas.DotFloat(b, b)))
	resz0 := math.Max(1.0, snrm2(h, dims, 0))

	// select initial points

	//fmt.Printf("** initial resx0=%.4f, resy0=%.4f, resz0=%.4f \n", resx0, resy0, resz0)

	x := c.Copy()
	blas.ScalFloat(x, 0.0)
	y := b.Copy()
	blas.ScalFloat(y, 0.0)
	s := matrix.FloatZeros(cdim, 1)
	z := matrix.FloatZeros(cdim, 1)
	dx := c.Copy()
	dy := b.Copy()
	ds := matrix.FloatZeros(cdim, 1)
	dz := matrix.FloatZeros(cdim, 1)
	// these are singleton matrix
	dkappa := matrix.FloatValue(0.0)
	dtau := matrix.FloatValue(0.0)

	var W *FloatMatrixSet
	var f kktFunc
	if primalstart == nil || dualstart == nil {
		// Factor
		//     [ 0   A'  G' ]
		//     [ A   0   0  ].
		//     [ G   0  -I  ]
		W = FloatSetNew("d", "di", "v", "beta", "r", "rti")
		dd := dims.At("l")[0]
		mat := matrix.FloatOnes(dd, 1)
		W.Set("d", mat)
		mat = matrix.FloatOnes(dd, 1)
		W.Set("di", mat)
		dq := len(dims.At("q"))
		W.Set("beta", matrix.FloatOnes(dq, 1))

		for _, n := range dims.At("q") {
			vm := matrix.FloatZeros(n, 1)
			vm.SetIndex(0, 1.0)
			W.Append("v", vm)
		for _, n := range dims.At("s") {
			W.Append("r", matrix.FloatIdentity(n))
			W.Append("rti", matrix.FloatIdentity(n))
		f, err = kktsolver(W, nil, nil)
		if err != nil {
			fmt.Printf("kktsolver error: %s\n", err)

	if primalstart == nil {
		// minimize    || G * x - h ||^2
		// subject to  A * x = b
		// by solving
		//     [ 0   A'  G' ]   [ x  ]   [ 0 ]
		//     [ A   0   0  ] * [ dy ] = [ b ].
		//     [ G   0  -I  ]   [ -s ]   [ h ]
		blas.ScalFloat(x, 0.0)
		blas.CopyFloat(y, dy)
		blas.CopyFloat(h, s)
		err = f(x, dy, s)
		if err != nil {
			fmt.Printf("f(x,dy,s): %s\n", err)
		blas.ScalFloat(s, -1.0)
		//fmt.Printf("** initial s:\n%v\n", s)
	} else {
		blas.Copy(primalstart.At("x")[0], x)
		blas.Copy(primalstart.At("s")[0], s)

	// ts = min{ t | s + t*e >= 0 }
	ts, _ := maxStep(s, dims, 0, nil)
	if ts >= 0 && primalstart != nil {
		err = errors.New("initial s is not positive")

	if dualstart == nil {
		// minimize   || z ||^2
		// subject to G'*z + A'*y + c = 0
		// by solving
		//     [ 0   A'  G' ] [ dx ]   [ -c ]
		//     [ A   0   0  ] [ y  ] = [  0 ].
		//     [ G   0  -I  ] [ z  ]   [  0 ]
		blas.Copy(c, dx)
		blas.ScalFloat(dx, -1.0)
		blas.ScalFloat(y, 0.0)
		err = f(dx, y, z)
		if err != nil {
			fmt.Printf("f(dx,y,z): %s\n", err)
	} else {
		if len(dualstart.At("y")) > 0 {
			blas.Copy(dualstart.At("y")[0], y)
		blas.Copy(dualstart.At("z")[0], z)

	// ts = min{ t | z + t*e >= 0 }
	tz, _ := maxStep(z, dims, 0, nil)
	if tz >= 0 && dualstart != nil {
		err = errors.New("initial z is not positive")

	nrms := snrm2(s, dims, 0)
	nrmz := snrm2(z, dims, 0)

	gap := 0.0
	pcost := 0.0
	dcost := 0.0
	relgap := 0.0

	if primalstart == nil && dualstart == nil {
		gap = sdot(s, z, dims, 0)
		pcost = blas.DotFloat(c, x)
		dcost = -blas.DotFloat(b, y) - sdot(h, z, dims, 0)
		if pcost < 0.0 {
			relgap = gap / -pcost
		} else if dcost > 0.0 {
			relgap = gap / dcost
		} else {
			relgap = math.NaN()
		if ts <= 0 && tz < 0 &&
			(gap <= absTolerance || (!math.IsNaN(relgap) && relgap <= relTolerance)) {
			// Constructed initial points happen to be feasible and optimal

			ind := dims.At("l")[0] + dims.Sum("q")
			for _, m := range dims.At("s") {
				symm(s, m, ind)
				symm(z, m, ind)
				ind += m * m

			// rx = A'*y + G'*z + c
			rx := c.Copy()
			Af(y, rx, 1.0, 1.0, la.OptTrans)
			Gf(z, rx, 1.0, 1.0, la.OptTrans)
			resx := math.Sqrt(blas.Dot(rx, rx).Float())
			// ry = b - A*x
			ry := b.Copy()
			Af(x, ry, -1.0, -1.0)
			resy := math.Sqrt(blas.Dot(ry, ry).Float())
			// rz = s + G*x - h
			rz := matrix.FloatZeros(cdim, 1)
			Gf(x, rz, 1.0, 0.0)
			blas.AxpyFloat(s, rz, 1.0)
			blas.AxpyFloat(h, rz, -1.0)
			resz := snrm2(rz, dims, 0)

			pres := math.Max(resy/resy0, resz/resz0)
			dres := resx / resx0
			cx := blas.Dot(c, x).Float()
			by := blas.Dot(b, y).Float()
			hz := sdot(h, z, dims, 0)

			sol.X = x
			sol.Y = y
			sol.S = s
			sol.Z = z
			sol.Result = FloatSetNew("x", "y", "s", "x")
			sol.Result.Append("x", x)
			sol.Result.Append("y", y)
			sol.Result.Append("s", s)
			sol.Result.Append("z", z)
			sol.Status = Optimal
			sol.Gap = gap
			sol.RelativeGap = relgap
			sol.PrimalObjective = cx
			sol.DualObjective = -(by + hz)
			sol.PrimalInfeasibility = pres
			sol.DualInfeasibility = dres
			sol.PrimalSlack = -ts
			sol.DualSlack = -tz


		if ts >= -1e-8*math.Max(nrms, 1.0) {
			a := 1.0 + ts
			is := make([]int, 0)
			// indexes s[:dims['l']]
			is = append(is, matrix.MakeIndexSet(0, dims.At("l")[0], 1)...)
			// indexes s[indq[:-1]]
			is = append(is, indq[:len(indq)-1]...)
			// indexes s[ind:ind+m*m:m+1] (diagonal)
			ind := dims.Sum("l", "q")
			for _, m := range dims.At("s") {
				is = append(is, matrix.MakeIndexSet(ind, ind+m*m, m+1)...)
			for _, k := range is {
				s.SetIndex(k, a+s.GetIndex(k))
			//fmt.Printf("scaled s=\n%v\n", s.ConvertToString())

		if tz >= -1e-8*math.Max(nrmz, 1.0) {
			a := 1.0 + tz
			is := make([]int, 0)
			// indexes z[:dims['l']]
			is = append(is, matrix.MakeIndexSet(0, dims.At("l")[0], 1)...)
			// indexes z[indq[:-1]]
			is = append(is, indq[:len(indq)-1]...)
			// indexes z[ind:ind+m*m:m+1] (diagonal)
			ind := dims.Sum("l", "q")
			for _, m := range dims.At("s") {
				is = append(is, matrix.MakeIndexSet(ind, ind+m*m, m+1)...)
			for _, k := range is {
				z.SetIndex(k, a+z.GetIndex(k))
			//fmt.Printf("scaled z=\n%v\n", z.ConvertToString())
	} else if primalstart == nil && dualstart != nil {
		if ts >= -1e-8*math.Max(nrms, 1.0) {
			a := 1.0 + ts
			is := make([]int, 0)
			is = append(is, matrix.MakeIndexSet(0, dims.At("l")[0], 1)...)
			is = append(is, indq[:len(indq)-1]...)
			ind := dims.Sum("l", "q")
			for _, m := range dims.At("s") {
				is = append(is, matrix.MakeIndexSet(ind, ind+m*m, m+1)...)
			for _, k := range is {
				s.SetIndex(k, a+s.GetIndex(k))
	} else if primalstart != nil && dualstart == nil {
		if tz >= -1e-8*math.Max(nrmz, 1.0) {
			a := 1.0 + tz
			is := make([]int, 0)
			is = append(is, matrix.MakeIndexSet(0, dims.At("l")[0], 1)...)
			is = append(is, indq[:len(indq)-1]...)
			ind := dims.Sum("l", "q")
			for _, m := range dims.At("s") {
				is = append(is, matrix.MakeIndexSet(ind, ind+m*m, m+1)...)
			for _, k := range is {
				z.SetIndex(k, a+z.GetIndex(k))

	tau := matrix.FloatValue(1.0)
	kappa := matrix.FloatValue(1.0)
	wkappa3 := matrix.FloatValue(0.0)

	rx := c.Copy()
	hrx := c.Copy()
	ry := b.Copy()
	hry := b.Copy()
	rz := matrix.FloatZeros(cdim, 1)
	hrz := matrix.FloatZeros(cdim, 1)
	sigs := matrix.FloatZeros(dims.Sum("s"), 1)
	sigz := matrix.FloatZeros(dims.Sum("s"), 1)
	lmbda := matrix.FloatZeros(cdim_diag+1, 1)
	lmbdasq := matrix.FloatZeros(cdim_diag+1, 1)

	gap = sdot(s, z, dims, 0)

	var x1, y1, z1 *matrix.FloatMatrix
	var dg, dgi float64
	var th *matrix.FloatMatrix
	var WS fClosure
	var f3 kktFunc

	//fmt.Printf("preloop x=\n%v\n", x.ConvertToString())
	//fmt.Printf("preloop z=\n%v\n", z.ConvertToString())
	//fmt.Printf("preloop s=\n%v\n", s.ConvertToString())
	for iter := 0; iter < solopts.MaxIter+1; iter++ {
		// hrx = -A'*y - G'*z
		Af(y, hrx, -1.0, 0.0, la.OptTrans)
		Gf(z, hrx, -1.0, 1.0, la.OptTrans)
		hresx := math.Sqrt(blas.DotFloat(hrx, hrx))

		// rx = hrx - c*tau
		//    = -A'*y - G'*z - c*tau
		blas.Copy(hrx, rx)
		err = blas.AxpyFloat(c, rx, -tau.Float())
		resx := math.Sqrt(blas.DotFloat(rx, rx)) / tau.Float()

		// hry = A*x
		Af(x, hry, 1.0, 0.0)
		hresy := math.Sqrt(blas.DotFloat(hry, hry))

		// ry = hry - b*tau
		//    = A*x - b*tau
		blas.Copy(hry, ry)
		blas.AxpyFloat(b, ry, -tau.Float())
		resy := math.Sqrt(blas.DotFloat(ry, ry)) / tau.Float()

		// hrz = s + G*x
		Gf(x, hrz, 1.0, 0.0)
		blas.AxpyFloat(s, hrz, 1.0)
		hresz := snrm2(hrz, dims, 0)

		// rz = hrz - h*tau
		//    = s + G*x - h*tau
		blas.ScalFloat(rz, 0.0)
		blas.AxpyFloat(hrz, rz, 1.0)
		blas.AxpyFloat(h, rz, -tau.Float())
		resz := snrm2(rz, dims, 0) / tau.Float()

		// rt = kappa + c'*x + b'*y + h'*z '
		cx := blas.DotFloat(c, x)
		by := blas.DotFloat(b, y)
		hz := sdot(h, z, dims, 0)
		rt := kappa.Float() + cx + by + hz

		// Statistics for stopping criteria
		pcost = cx / tau.Float()
		dcost = -(by + hz) / tau.Float()

		if pcost < 0.0 {
			relgap = gap / -pcost
		} else if dcost > 0.0 {
			relgap = gap / dcost
		} else {
			relgap = math.NaN()

		pres := math.Max(resy/resy0, resz/resz0)
		dres := resx / resx0
		pinfres := math.NaN()
		if hz+by < 0.0 {
			pinfres = hresx / resx0 / (-hz - by)
		dinfres := math.NaN()
		if cx < 0.0 {
			dinfres = math.Max(hresy/resy0, hresz/resz0) / (-cx)

		if solopts.ShowProgress {
			if iter == 0 {
				// show headers of something
				fmt.Printf("% 10s% 12s% 10s% 8s% 7s % 5s\n",
					"pcost", "dcost", "gap", "pres", "dres", "k/t")
			// show something
			fmt.Printf("%2d: % 8.4e % 8.4e % 4.0e% 7.0e% 7.0e% 7.0e\n",
				iter, pcost, dcost, gap, pres, dres, kappa.GetIndex(0)/tau.GetIndex(0))

		if (pres <= feasTolerance && dres <= feasTolerance &&
			(gap <= absTolerance || (!math.IsNaN(relgap) && relgap <= relTolerance))) ||
			iter == solopts.MaxIter {
			// done
			blas.ScalFloat(x, 1.0/tau.Float())
			blas.ScalFloat(y, 1.0/tau.Float())
			blas.ScalFloat(s, 1.0/tau.Float())
			blas.ScalFloat(z, 1.0/tau.Float())
			ind := dims.Sum("l", "q")
			for _, m := range dims.At("s") {
				symm(s, m, ind)
				symm(z, m, ind)
				ind += m * m
			ts, _ = maxStep(s, dims, 0, nil)
			tz, _ = maxStep(z, dims, 0, nil)
			if iter == solopts.MaxIter {
				// MaxIterations exceeded
				if solopts.ShowProgress {
					fmt.Printf("No solution. Max iterations exceeded\n")
				err = errors.New("No solution. Max iterations exceeded")
				sol.X = x
				sol.Y = y
				sol.S = s
				sol.Z = z
				sol.Result = FloatSetNew("x", "y", "s", "x")
				sol.Result.Append("x", x)
				sol.Result.Append("y", y)
				sol.Result.Append("s", s)
				sol.Result.Append("z", z)
				sol.Status = Unknown
				sol.Gap = gap
				sol.RelativeGap = relgap
				sol.PrimalObjective = pcost
				sol.DualObjective = dcost
				sol.PrimalInfeasibility = pres
				sol.DualInfeasibility = dres
				sol.PrimalSlack = -ts
				sol.DualSlack = -tz
				sol.PrimalResidualCert = pinfres
				sol.DualResidualCert = dinfres
				sol.Iterations = iter
			} else {
				// Optimal
				if solopts.ShowProgress {
					fmt.Printf("Optimal solution.\n")
				err = nil
				sol.X = x
				sol.Y = y
				sol.S = s
				sol.Z = z
				sol.Result = FloatSetNew("x", "y", "s", "x")
				sol.Result.Append("x", x)
				sol.Result.Append("y", y)
				sol.Result.Append("s", s)
				sol.Result.Append("z", z)
				sol.Status = Optimal
				sol.Gap = gap
				sol.RelativeGap = relgap
				sol.PrimalObjective = pcost
				sol.DualObjective = dcost
				sol.PrimalInfeasibility = pres
				sol.DualInfeasibility = dres
				sol.PrimalSlack = -ts
				sol.DualSlack = -tz
				sol.PrimalResidualCert = math.NaN()
				sol.DualResidualCert = math.NaN()
				sol.Iterations = iter
		} else if !math.IsNaN(pinfres) && pinfres <= feasTolerance {
			// Primal Infeasible
			if solopts.ShowProgress {
				fmt.Printf("Primal infeasible.\n")
			err = errors.New("Primal infeasible")
			blas.ScalFloat(y, 1.0/(-hz-by))
			blas.ScalFloat(z, 1.0/(-hz-by))
			sol.X = nil
			sol.Y = nil
			sol.S = nil
			sol.Z = nil
			ind := dims.Sum("l", "q")
			for _, m := range dims.At("s") {
				symm(z, m, ind)
				ind += m * m
			tz, _ = maxStep(z, dims, 0, nil)
			sol.Status = PrimalInfeasible
			sol.Result = FloatSetNew("x", "y", "s", "x")
			sol.Result.Append("x", nil)
			sol.Result.Append("y", nil)
			sol.Result.Append("s", nil)
			sol.Result.Append("z", nil)
			sol.Gap = math.NaN()
			sol.RelativeGap = math.NaN()
			sol.PrimalObjective = math.NaN()
			sol.DualObjective = 1.0
			sol.PrimalInfeasibility = math.NaN()
			sol.DualInfeasibility = math.NaN()
			sol.PrimalSlack = math.NaN()
			sol.DualSlack = -tz
			sol.PrimalResidualCert = pinfres
			sol.DualResidualCert = math.NaN()
			sol.Iterations = iter
		} else if !math.IsNaN(dinfres) && dinfres <= feasTolerance {
			// Dual Infeasible
			if solopts.ShowProgress {
				fmt.Printf("Dual infeasible.\n")
			err = errors.New("Primal infeasible")
			blas.ScalFloat(x, 1.0/(-cx))
			blas.ScalFloat(s, 1.0/(-cx))
			sol.X = nil
			sol.Y = nil
			sol.S = nil
			sol.Z = nil
			ind := dims.Sum("l", "q")
			for _, m := range dims.At("s") {
				symm(s, m, ind)
				ind += m * m
			ts, _ = maxStep(s, dims, 0, nil)
			sol.Status = PrimalInfeasible
			sol.Result = FloatSetNew("x", "y", "s", "x")
			sol.Result.Append("x", nil)
			sol.Result.Append("y", nil)
			sol.Result.Append("s", nil)
			sol.Result.Append("z", nil)
			sol.Gap = math.NaN()
			sol.RelativeGap = math.NaN()
			sol.PrimalObjective = 1.0
			sol.DualObjective = math.NaN()
			sol.PrimalInfeasibility = math.NaN()
			sol.DualInfeasibility = math.NaN()
			sol.PrimalSlack = -ts
			sol.DualSlack = math.NaN()
			sol.PrimalResidualCert = math.NaN()
			sol.DualResidualCert = dinfres
			sol.Iterations = iter

		// Compute initial scaling W:
		//     W * z = W^{-T} * s = lambda
		//     dg * tau = 1/dg * kappa = lambdag.
		if iter == 0 {
			W, err = computeScaling(s, z, lmbda, dims, 0)

			//     dg = sqrt( kappa / tau )
			//     dgi = sqrt( tau / kappa )
			//     lambda_g = sqrt( tau * kappa )
			// lambda_g is stored in the last position of lmbda.

			dg = math.Sqrt(kappa.Float() / tau.Float())
			dgi = math.Sqrt(float64(tau.Float() / kappa.Float()))
			lmbda.SetIndex(-1, math.Sqrt(float64(tau.Float()*kappa.Float())))
		// lmbdasq := lmbda o lmbda
		ssqr(lmbdasq, lmbda, dims, 0)
		lmbdasq.SetIndex(-1, lmbda.GetIndex(-1)*lmbda.GetIndex(-1))

		// f3(x, y, z) solves
		//     [ 0  A'  G'   ] [ ux        ]   [ bx ]
		//     [ A  0   0    ] [ uy        ] = [ by ].
		//     [ G  0  -W'*W ] [ W^{-1}*uz ]   [ bz ]
		// On entry, x, y, z contain bx, by, bz.
		// On exit, they contain ux, uy, uz.
		// Also solve
		//     [ 0   A'  G'    ] [ x1        ]          [ c ]
		//     [-A   0   0     ]*[ y1        ] = -dgi * [ b ].
		//     [-G   0   W'*W  ] [ W^{-1}*z1 ]          [ h ]

		f3, err = kktsolver(W, nil, nil)
		if err != nil {
			fmt.Printf("kktsolver error=%v\n", err)
		if iter == 0 {
			x1 = c.Copy()
			y1 = b.Copy()
			z1 = matrix.FloatZeros(cdim, 1)
		blas.Copy(c, x1)
		blas.ScalFloat(x1, -1.0)
		blas.Copy(b, y1)
		blas.Copy(h, z1)
		err = f3(x1, y1, z1)
		//fmt.Printf("f3 result: x1=\n%v\nf3 result: z1=\n%v\n", x1, z1)
		blas.ScalFloat(x1, dgi)
		blas.ScalFloat(y1, dgi)
		blas.ScalFloat(z1, dgi)

		if err != nil {
			if iter == 0 && primalstart != nil && dualstart != nil {
				err = errors.New("Rank(A) < p or Rank([G; A]) < n")
			} else {
				t_ := 1.0 / tau.Float()
				blas.ScalFloat(x, t_)
				blas.ScalFloat(y, t_)
				blas.ScalFloat(s, t_)
				blas.ScalFloat(z, t_)
				ind := dims.Sum("l", "q")
				for _, m := range dims.At("s") {
					symm(s, m, ind)
					symm(z, m, ind)
					ind += m * m
				ts, _ = maxStep(s, dims, 0, nil)
				tz, _ = maxStep(z, dims, 0, nil)
				err = errors.New("Terminated (singular KKT matrix).")
				sol.X = x
				sol.Y = y
				sol.S = s
				sol.Z = z
				sol.Result = FloatSetNew("x", "y", "s", "x")
				sol.Result.Append("x", x)
				sol.Result.Append("y", y)
				sol.Result.Append("s", s)
				sol.Result.Append("z", z)
				sol.Status = Unknown
				sol.RelativeGap = relgap
				sol.PrimalObjective = pcost
				sol.DualObjective = dcost
				sol.PrimalInfeasibility = pres
				sol.DualInfeasibility = dres
				sol.PrimalSlack = -ts
				sol.DualSlack = -tz
				sol.Iterations = iter

		// f6_no_ir(x, y, z, tau, s, kappa) solves
		//    [ 0         ]   [  0   A'  G'  c ] [ ux        ]    [ bx   ]
		//    [ 0         ]   [ -A   0   0   b ] [ uy        ]    [ by   ]
		//    [ W'*us     ] - [ -G   0   0   h ] [ W^{-1}*uz ] = -[ bz   ]
		//    [ dg*ukappa ]   [ -c' -b' -h'  0 ] [ utau/dg   ]    [ btau ]
		//    lmbda o (uz + us) = -bs
		//    lmbdag * (utau + ukappa) = -bkappa.
		// On entry, x, y, z, tau, s, kappa contain bx, by, bz, btau,
		// bkappa.  On exit, they contain ux, uy, uz, utau, ukappa.

		// th = W^{-T} * h
		if iter == 0 {
			th = matrix.FloatZeros(cdim, 1)

		blas.Copy(h, th)
		scale(th, W, true, true)

		f6_no_ir := func(x, y, z, tau, s, kappa *matrix.FloatMatrix) (err error) {
			// Solve
			// [  0   A'  G'    0   ] [ ux        ]
			// [ -A   0   0     b   ] [ uy        ]
			// [ -G   0   W'*W  h   ] [ W^{-1}*uz ]
			// [ -c' -b' -h'    k/t ] [ utau/dg   ]
			//   [ bx                    ]
			//   [ by                    ]
			// = [ bz - W'*(lmbda o\ bs) ]
			//   [ btau - bkappa/tau     ]
			// us = -lmbda o\ bs - uz
			// ukappa = -bkappa/lmbdag - utau.

			// First solve
			// [ 0  A' G'   ] [ ux        ]   [  bx                    ]
			// [ A  0  0    ] [ uy        ] = [ -by                    ]
			// [ G  0 -W'*W ] [ W^{-1}*uz ]   [ -bz + W'*(lmbda o\ bs) ]

			err = nil
			// y := -y = -by
			blas.ScalFloat(y, -1.0)

			// s := -lmbda o\ s = -lmbda o\ bs
			err = sinv(s, lmbda, dims, 0)
			blas.ScalFloat(s, -1.0)

			// z := -(z + W'*s) = -bz + W'*(lambda o\ bs)
			blas.Copy(s, ws3)
			err = scale(ws3, W, true, false)
			blas.AxpyFloat(ws3, z, 1.0)
			blas.ScalFloat(z, -1.0)

			err = f3(x, y, z)

			// Combine with solution of
			// [ 0   A'  G'    ] [ x1         ]          [ c ]
			// [-A   0   0     ] [ y1         ] = -dgi * [ b ]
			// [-G   0   W'*W  ] [ W^{-1}*dzl ]          [ h ]
			// to satisfy
			// -c'*x - b'*y - h'*W^{-1}*z + dg*tau = btau - bkappa/tau. '

			// , kappa[0] := -kappa[0] / lmbd[-1] = -bkappa / lmbdag
			kap_ := kappa.Float()
			tau_ := tau.Float()
			kap_ = -kap_ / lmbda.GetIndex(-1)
			// tau[0] = tau[0] + kappa[0] / dgi = btau[0] - bkappa / tau
			tau_ = tau_ + kap_/dgi

			//tau[0] = dgi * ( tau[0] + xdot(c,x) + ydot(b,y) +
			//    misc.sdot(th, z, dims) ) / (1.0 + misc.sdot(z1, z1, dims))
			//tau_ = tau_ + blas.DotFloat(c, x) + blas.DotFloat(b, y) + sdot(th, z, dims, 0)
			tau_ += blas.DotFloat(c, x)
			tau_ += blas.DotFloat(b, y)
			tau_ += sdot(th, z, dims, 0)
			tau_ = dgi * tau_ / (1.0 + sdot(z1, z1, dims, 0))
			blas.AxpyFloat(x1, x, tau_)
			blas.AxpyFloat(y1, y, tau_)
			blas.AxpyFloat(z1, z, tau_)

			blas.AxpyFloat(z, s, -1.0)
			kap_ = kap_ - tau_

		// f6(x, y, z, tau, s, kappa) solves the same system as f6_no_ir,
		// but applies iterative refinement. Following variables part of f6-closure
		// and ~ 12 is the limit. We wrap them to a structure.

		if iter == 0 {
			if refinement > 0 || solopts.Debug {
				WS.wx = c.Copy()
				WS.wy = b.Copy()
				WS.wz = matrix.FloatZeros(cdim, 1)
				WS.ws = matrix.FloatZeros(cdim, 1)
				WS.wtau = matrix.FloatValue(0.0)
				WS.wkappa = matrix.FloatValue(0.0)
			if refinement > 0 {
				WS.wx2 = c.Copy()
				WS.wy2 = b.Copy()
				WS.wz2 = matrix.FloatZeros(cdim, 1)
				WS.ws2 = matrix.FloatZeros(cdim, 1)
				WS.wtau2 = matrix.FloatValue(0.0)
				WS.wkappa2 = matrix.FloatValue(0.0)

		f6 := func(x, y, z, tau, s, kappa *matrix.FloatMatrix) error {
			var err error = nil
			if refinement > 0 || solopts.Debug {
				blas.Copy(x, WS.wx)
				blas.Copy(y, WS.wy)
				blas.Copy(z, WS.wz)
				blas.Copy(s, WS.ws)
			err = f6_no_ir(x, y, z, tau, s, kappa)
			for i := 0; i < refinement; i++ {
				blas.Copy(WS.wx, WS.wx2)
				blas.Copy(WS.wy, WS.wy2)
				blas.Copy(WS.wz, WS.wz2)
				blas.Copy(WS.ws, WS.ws2)
				err = res(x, y, z, tau, s, kappa, WS.wx2, WS.wy2, WS.wz2, WS.wtau2, WS.ws2, WS.wkappa2, W, dg, lmbda)
				err = f6_no_ir(WS.wx2, WS.wy2, WS.wz2, WS.wtau2, WS.ws2, WS.wkappa2)
				blas.AxpyFloat(WS.wx2, x, 1.0)
				blas.AxpyFloat(WS.wy2, y, 1.0)
				blas.AxpyFloat(WS.wz2, z, 1.0)
				blas.AxpyFloat(WS.ws2, s, 1.0)
				tau.SetValue(tau.Float() + WS.wtau2.Float())
				kappa.SetValue(kappa.Float() + WS.wkappa2.Float())
			if solopts.Debug {
				res(x, y, z, tau, s, kappa, WS.wx, WS.wy, WS.wz, WS.wtau, WS.ws, WS.wkappa, W, dg, lmbda)
				fmt.Printf("KKT residuals\n")
			return err

		var nrm float64 = blas.Nrm2(lmbda).Float()
		mu := math.Pow(nrm, 2.0) / (1.0 + float64(cdim_diag))
		sigma := 0.0
		var step, tt, tk float64

		for i := 0; i < 2; i++ {
			// Solve
			// [ 0         ]   [  0   A'  G'  c ] [ dx        ]
			// [ 0         ]   [ -A   0   0   b ] [ dy        ]
			// [ W'*ds     ] - [ -G   0   0   h ] [ W^{-1}*dz ]
			// [ dg*dkappa ]   [ -c' -b' -h'  0 ] [ dtau/dg   ]
			//               [ rx   ]
			//               [ ry   ]
			// = - (1-sigma) [ rz   ]
			//               [ rtau ]
			// lmbda o (dz + ds) = -lmbda o lmbda + sigma*mu*e
			// lmbdag * (dtau + dkappa) = - kappa * tau + sigma*mu
			// ds = -lmbdasq if i is 0
			//    = -lmbdasq - dsa o dza + sigma*mu*e if i is 1
			// dkappa = -lambdasq[-1] if i is 0
			//        = -lambdasq[-1] - dkappaa*dtaua + sigma*mu if i is 1.
			ind := dims.Sum("l", "q")
			ind2 := ind
			blas.Copy(lmbdasq, ds, &la.IOpt{"n", ind})
			blas.ScalFloat(ds, 0.0, &la.IOpt{"offset", ind})
			for _, m := range dims.At("s") {
				blas.Copy(lmbdasq, ds, &la.IOpt{"n", m}, &la.IOpt{"offsetx", ind2},
					&la.IOpt{"offsety", ind}, &la.IOpt{"incy", m + 1})
				ind += m * m
				ind2 += m
			// dkappa[0] = lmbdasq[-1]

			if i == 1 {
				blas.AxpyFloat(ws3, ds, 1.0)
				ind = dims.Sum("l", "q")
				is := make([]int, 0)
				is = append(is, matrix.MakeIndexSet(0, dims.At("l")[0], 1)...)
				is = append(is, indq[:len(indq)-1]...)
				for _, m := range dims.At("s") {
					is = append(is, matrix.MakeIndexSet(ind, ind+m*m, m+1)...)
				for _, k := range is {
					ds.SetIndex(k, ds.GetIndex(k)-sigma*mu)

				dk_ := dkappa.Float()
				wk_ := wkappa3.Float()
				dkappa.SetValue(dk_ + wk_ - sigma*mu)
			// (dx, dy, dz, dtau) = (1-sigma)*(rx, ry, rz, rt)
			blas.Copy(rx, dx)
			blas.ScalFloat(dx, 1.0-sigma)
			blas.Copy(ry, dy)
			blas.ScalFloat(dy, 1.0-sigma)
			blas.Copy(rz, dz)
			blas.ScalFloat(dz, 1.0-sigma)
			// dtau[0] = (1.0 - sigma) * rt
			dtau.SetValue((1.0 - sigma) * rt)

			err = f6(dx, dy, dz, dtau, ds, dkappa)

			// Save ds o dz and dkappa * dtau for Mehrotra correction
			if i == 0 {
				blas.Copy(ds, ws3)
				sprod(ws3, dz, dims, 0)
				wkappa3.SetValue(dtau.Float() * dkappa.Float())

			// Maximum step to boundary.
			// If i is 1, also compute eigenvalue decomposition of the 's'
			// blocks in ds, dz.  The eigenvectors Qs, Qz are stored in
			// dsk, dzk.  The eigenvalues are stored in sigs, sigz.
			var ts, tz float64

			scale2(lmbda, ds, dims, 0, false)
			scale2(lmbda, dz, dims, 0, false)
			if i == 0 {
				ts, _ = maxStep(ds, dims, 0, nil)
				tz, _ = maxStep(dz, dims, 0, nil)
			} else {
				ts, _ = maxStep(ds, dims, 0, sigs)
				tz, _ = maxStep(dz, dims, 0, sigz)
			dt_ := dtau.Float()
			dk_ := dkappa.Float()
			tt = -dt_ / lmbda.GetIndex(-1)
			tk = -dk_ / lmbda.GetIndex(-1)
			t := maxvec([]float64{0.0, ts, tz, tt, tk})
			if t == 0.0 {
				step = 1.0
			} else {
				if i == 0 {
					step = math.Min(1.0, 1.0/t)
				} else {
					step = math.Min(1.0, STEP/t)
			if i == 0 {
				// sigma = (1 - step)^3
				sigma = (1.0 - step) * (1.0 - step) * (1.0 - step)
				//sigma = math.Pow((1.0 - step), EXPON)
		//fmt.Printf("** tau = %.17f, kappa = %.17f\n", tau.Float(), kappa.Float())
		//fmt.Printf("** step = %.17f, sigma = %.17f\n", step, sigma)

		// Update x, y
		blas.AxpyFloat(dx, x, step)
		blas.AxpyFloat(dy, y, step)

		// Replace 'l' and 'q' blocks of ds and dz with the updated
		// variables in the current scaling.
		// Replace 's' blocks of ds and dz with the factors Ls, Lz in a
		// factorization Ls*Ls', Lz*Lz' of the updated variables in the
		// current scaling.
		// ds := e + step*ds for 'l' and 'q' blocks.
		// dz := e + step*dz for 'l' and 'q' blocks.
		blas.ScalFloat(ds, step, &la.IOpt{"n", dims.Sum("l", "q")})
		blas.ScalFloat(dz, step, &la.IOpt{"n", dims.Sum("l", "q")})

		is := make([]int, 0)
		is = append(is, matrix.MakeIndexSet(0, dims.At("l")[0], 1)...)
		is = append(is, indq[:len(indq)-1]...)
		for _, k := range is {
			ds.SetIndex(k, 1.0+ds.GetIndex(k))
			dz.SetIndex(k, 1.0+dz.GetIndex(k))

		// ds := H(lambda)^{-1/2} * ds and dz := H(lambda)^{-1/2} * dz.
		// This replaces the 'l' and 'q' components of ds and dz with the
		// updated variables in the current scaling.
		// The 's' components of ds and dz are replaced with
		// diag(lmbda_k)^{1/2} * Qs * diag(lmbda_k)^{1/2}
		// diag(lmbda_k)^{1/2} * Qz * diag(lmbda_k)^{1/2}
		scale2(lmbda, ds, dims, 0, true)
		scale2(lmbda, dz, dims, 0, true)

		// sigs := ( e + step*sigs ) ./ lambda for 's' blocks.
		// sigz := ( e + step*sigz ) ./ lambda for 's' blocks.
		blas.ScalFloat(sigs, step)
		blas.ScalFloat(sigz, step)
		sdimsum := dims.Sum("s")
		qdimsum := dims.Sum("l", "q")
		blas.TbsvFloat(lmbda, sigs, &la.IOpt{"n", sdimsum}, &la.IOpt{"k", 0},
			&la.IOpt{"lda", 1}, &la.IOpt{"offseta", qdimsum})
		blas.TbsvFloat(lmbda, sigz, &la.IOpt{"n", sdimsum}, &la.IOpt{"k", 0},
			&la.IOpt{"lda", 1}, &la.IOpt{"offseta", qdimsum})

		ind2 := qdimsum
		ind3 := 0
		sdims := dims.At("s")
		for k := 0; k < len(sdims); k++ {
			m := sdims[k]
			for i := 0; i < m; i++ {
				a := math.Sqrt(sigs.GetIndex(ind3 + i))
				blas.ScalFloat(ds, a, &la.IOpt{"offset", ind2 + m*i}, &la.IOpt{"n", m})
				a = math.Sqrt(sigz.GetIndex(ind3 + i))
				blas.ScalFloat(dz, a, &la.IOpt{"offset", ind2 + m*i}, &la.IOpt{"n", m})
			ind2 += m * m
			ind3 += m

		err = updateScaling(W, lmbda, ds, dz)

		// For kappa, tau block:
		//     dg := sqrt( (kappa + step*dkappa) / (tau + step*dtau) )
		//         = dg * sqrt( (1 - step*tk) / (1 - step*tt) )
		//     lmbda[-1] := sqrt((tau + step*dtau) * (kappa + step*dkappa))
		//                = lmbda[-1] * sqrt(( 1 - step*tt) * (1 - step*tk))
		dg *= math.Sqrt(1.0-step*tk) / math.Sqrt(1.0-step*tt)
		dgi = 1.0 / dg
		a := math.Sqrt(1.0-step*tk) * math.Sqrt(1.0-step*tt)
		lmbda.SetIndex(-1, a*lmbda.GetIndex(-1))

		// Unscale s, z, tau, kappa (unscaled variables are used only to
		// compute feasibility residuals).
		ind := dims.Sum("l", "q")
		ind2 = ind
		blas.Copy(lmbda, s, &la.IOpt{"n", ind})
		for _, m := range dims.At("s") {
			blas.ScalFloat(s, 0.0, &la.IOpt{"offset", ind2})
			blas.Copy(lmbda, s, &la.IOpt{"offsetx", ind}, &la.IOpt{"offsety", ind2},
				&la.IOpt{"n", m}, &la.IOpt{"incy", m + 1})
			ind += m
			ind2 += m * m
		scale(s, W, true, false)

		ind = dims.Sum("l", "q")
		ind2 = ind
		blas.Copy(lmbda, z, &la.IOpt{"n", ind})
		for _, m := range dims.At("s") {
			blas.ScalFloat(z, 0.0, &la.IOpt{"offset", ind2})
			blas.Copy(lmbda, z, &la.IOpt{"offsetx", ind}, &la.IOpt{"offsety", ind2},
				&la.IOpt{"n", m}, &la.IOpt{"incy", m + 1})
			ind += m
			ind2 += m * m
		scale(z, W, false, true)

		kappa.SetValue(lmbda.GetIndex(-1) / dgi)
		tau.SetValue(lmbda.GetIndex(-1) * dgi)
		g := blas.Nrm2Float(lmbda, &la.IOpt{"n", lmbda.Rows() - 1}) / tau.Float()
		gap = g * g
		//fmt.Printf(" ** kappa=%.10f, tau=%.10f, gap=%.10f\n", kappa.Float(), tau.Float(), gap)

Example #6
   Returns the Nesterov-Todd scaling W at points s and z, and stores the
   scaled variable in lmbda.

       W * z = W^{-T} * s = lmbda.

   W is a MatrixSet with entries:

   - W['dnl']: positive vector
   - W['dnli']: componentwise inverse of W['dnl']
   - W['d']: positive vector
   - W['di']: componentwise inverse of W['d']
   - W['v']: lists of 2nd order cone vectors with unit hyperbolic norms
   - W['beta']: list of positive numbers
   - W['r']: list of square matrices
   - W['rti']: list of square matrices.  rti[k] is the inverse transpose
     of r[k].

func computeScaling(s, z, lmbda *matrix.FloatMatrix, dims *DimensionSet, mnl int) (W *FloatMatrixSet, err error) {
	err = nil
	W = FloatSetNew("dnl", "dnli", "d", "di", "v", "beta", "r", "rti")

	// For the nonlinear block:
	//     W['dnl'] = sqrt( s[:mnl] ./ z[:mnl] )
	//     W['dnli'] = sqrt( z[:mnl] ./ s[:mnl] )
	//     lambda[:mnl] = sqrt( s[:mnl] .* z[:mnl] )

	var stmp, ztmp, lmd *matrix.FloatMatrix
	if mnl > 0 {
		stmp = matrix.FloatVector(s.FloatArray()[:mnl])
		ztmp = matrix.FloatVector(z.FloatArray()[:mnl])
		dnl := stmp.Div(ztmp)
		dnl.Apply(dnl, math.Sqrt)
		dnli := dnl.Copy()
		dnli.Apply(dnli, func(a float64) float64 { return 1.0 / a })
		W.Set("dnl", dnl)
		W.Set("dnli", dnli)
		lmd = stmp.Mul(ztmp)
		lmd.Apply(lmd, math.Sqrt)
		lmbda.SetIndexes(matrix.MakeIndexSet(0, mnl, 1), lmd.FloatArray())
	} else {
		mnl = 0

	// For the 'l' block:
	//     W['d'] = sqrt( sk ./ zk )
	//     W['di'] = sqrt( zk ./ sk )
	//     lambdak = sqrt( sk .* zk )
	// where sk and zk are the first dims['l'] entries of s and z.
	// lambda_k is stored in the first dims['l'] positions of lmbda.

	m := dims.At("l")[0]
	td := s.FloatArray()
	stmp = matrix.FloatVector(td[mnl : mnl+m])
	zd := z.FloatArray()
	//fmt.Printf("zdata=%v\n", zd[mnl:mnl+m])
	ztmp = matrix.FloatVector(zd[mnl : mnl+m])
	d := stmp.Div(ztmp)
	d.Apply(d, math.Sqrt)
	di := d.Copy()
	di.Apply(di, func(a float64) float64 { return 1.0 / a })
	//fmt.Printf("d:\n%v\n", d)
	//fmt.Printf("di:\n%v\n", di)
	W.Set("d", d)
	W.Set("di", di)
	lmd = stmp.Mul(ztmp)
	lmd.Apply(lmd, math.Sqrt)
	// lmd has indexes mnl:mnl+m and length of m
	lmbda.SetIndexes(matrix.MakeIndexSet(mnl, mnl+m, 1), lmd.FloatArray())
	//fmt.Printf("after l:\n%v\n", lmbda)

	   For the 'q' blocks, compute lists 'v', 'beta'.

	   The vector v[k] has unit hyperbolic norm:

	       (sqrt( v[k]' * J * v[k] ) = 1 with J = [1, 0; 0, -I]).

	   beta[k] is a positive scalar.

	   The hyperbolic Householder matrix H = 2*v[k]*v[k]' - J
	   defined by v[k] satisfies

	       (beta[k] * H) * zk  = (beta[k] * H) \ sk = lambda_k

	   where sk = s[indq[k]:indq[k+1]], zk = z[indq[k]:indq[k+1]].

	   lambda_k is stored in lmbda[indq[k]:indq[k+1]].
	ind := mnl + dims.At("l")[0]
	var beta *matrix.FloatMatrix

	for _, k := range dims.At("q") {
		W.Append("v", matrix.FloatZeros(k, 1))
	beta = matrix.FloatZeros(len(dims.At("q")), 1)
	W.Set("beta", beta)
	vset := W.At("v")
	for k, m := range dims.At("q") {
		v := vset[k]
		// a = sqrt( sk' * J * sk )  where J = [1, 0; 0, -I]
		aa := jnrm2(s, m, ind)
		// b = sqrt( zk' * J * zk )
		bb := jnrm2(z, m, ind)
		// beta[k] = ( a / b )**1/2
		beta.SetIndex(k, math.Sqrt(aa/bb))
		// c = sqrt( (sk/a)' * (zk/b) + 1 ) / sqrt(2)
		c0 := blas.DotFloat(s, z, &la_.IOpt{"n", m},
			&la_.IOpt{"offsetx", ind}, &la_.IOpt{"offsety", ind})
		cc := math.Sqrt((c0/aa/bb + 1.0) / 2.0)

		// vk = 1/(2*c) * ( (sk/a) + J * (zk/b) )
		blas.CopyFloat(z, v, &la_.IOpt{"offsetx", ind}, &la_.IOpt{"n", m})
		blas.ScalFloat(v, -1.0/bb)
		v.SetIndex(0, -1.0*v.GetIndex(0))
		blas.AxpyFloat(s, v, 1.0/aa, &la_.IOpt{"offsetx", ind}, &la_.IOpt{"n", m})
		blas.ScalFloat(v, 1.0/2.0/cc)

		// v[k] = 1/sqrt(2*(vk0 + 1)) * ( vk + e ),  e = [1; 0]
		v.SetIndex(0, v.GetIndex(0)+1.0)
		blas.ScalFloat(v, (1.0 / math.Sqrt(2.0*v.GetIndex(0))))
		   To get the scaled variable lambda_k

		       d =  sk0/a + zk0/b + 2*c
		       lambda_k = [ c;
		                    (c + zk0/b)/d * sk1/a + (c + sk0/a)/d * zk1/b ]
		       lambda_k *= sqrt(a * b)
		lmbda.SetIndex(ind, cc)
		dd := 2*cc + s.GetIndex(ind)/aa + z.GetIndex(ind)/bb
		blas.CopyFloat(s, lmbda, &la_.IOpt{"offsetx", ind + 1}, &la_.IOpt{"offsety", ind + 1},
			&la_.IOpt{"n", m - 1})
		zz := (cc + z.GetIndex(ind)/bb) / dd / aa
		ss := (cc + s.GetIndex(ind)/aa) / dd / bb
		blas.ScalFloat(lmbda, zz, &la_.IOpt{"offset", ind + 1}, &la_.IOpt{"n", m - 1})
		blas.AxpyFloat(z, lmbda, ss, &la_.IOpt{"offsetx", ind + 1},
			&la_.IOpt{"offsety", ind + 1}, &la_.IOpt{"n", m - 1})
		blas.ScalFloat(lmbda, math.Sqrt(aa*bb), &la_.IOpt{"offset", ind}, &la_.IOpt{"n", m})

		ind += m
		//fmt.Printf("after q[%d]:\n%v\n", k, lmbda)
	   For the 's' blocks: compute two lists 'r' and 'rti'.

	       r[k]' * sk^{-1} * r[k] = diag(lambda_k)^{-1}
	       r[k]' * zk * r[k] = diag(lambda_k)

	   where sk and zk are the entries inds[k] : inds[k+1] of
	   s and z, reshaped into symmetric matrices.

	   rti[k] is the inverse of r[k]', so

	       rti[k]' * sk * rti[k] = diag(lambda_k)^{-1}
	       rti[k]' * zk^{-1} * rti[k] = diag(lambda_k).

	   The vectors lambda_k are stored in

	       lmbda[ dims['l'] + sum(dims['q']) : -1 ]
	for _, k := range dims.At("s") {
		W.Append("r", matrix.FloatZeros(k, k))
		W.Append("rti", matrix.FloatZeros(k, k))
	maxs := maxdim(dims.At("s"))
	work := matrix.FloatZeros(maxs*maxs, 1)
	Ls := matrix.FloatZeros(maxs*maxs, 1)
	Lz := matrix.FloatZeros(maxs*maxs, 1)
	ind2 := ind
	for k, m := range dims.At("s") {
		r := W.At("r")[k]
		rti := W.At("rti")[k]

		// Factor sk = Ls*Ls'; store Ls in ds[inds[k]:inds[k+1]].
		blas.CopyFloat(s, Ls, &la_.IOpt{"offsetx", ind2}, &la_.IOpt{"n", m * m})
		lapack.PotrfFloat(Ls, &la_.IOpt{"n", m}, &la_.IOpt{"lda", m})

		// Factor zs[k] = Lz*Lz'; store Lz in dz[inds[k]:inds[k+1]].
		blas.CopyFloat(z, Lz, &la_.IOpt{"offsetx", ind2}, &la_.IOpt{"n", m * m})
		lapack.PotrfFloat(Lz, &la_.IOpt{"n", m}, &la_.IOpt{"lda", m})

		// SVD Lz'*Ls = U*diag(lambda_k)*V'.  Keep U in work.
		for i := 0; i < m; i++ {
			blas.ScalFloat(Ls, 0.0, &la_.IOpt{"offset", i * m}, &la_.IOpt{"n", i})
		blas.CopyFloat(Ls, work, &la_.IOpt{"n", m * m})
		blas.TrmmFloat(Lz, work, 1.0, la_.OptTransA, &la_.IOpt{"lda", m}, &la_.IOpt{"ldb", m},
			&la_.IOpt{"n", m}, &la_.IOpt{"m", m})
		lapack.GesvdFloat(work, lmbda, nil, nil,
			la_.OptJobuO, &la_.IOpt{"lda", m}, &la_.IOpt{"offsetS", ind},
			&la_.IOpt{"n", m}, &la_.IOpt{"m", m})

		// r = Lz^{-T} * U
		blas.CopyFloat(work, r, &la_.IOpt{"n", m * m})
		blas.TrsmFloat(Lz, r, 1.0, la_.OptTransA,
			&la_.IOpt{"lda", m}, &la_.IOpt{"n", m}, &la_.IOpt{"m", m})

		// rti = Lz * U
		blas.CopyFloat(work, rti, &la_.IOpt{"n", m * m})
		blas.TrmmFloat(Lz, rti, 1.0,
			&la_.IOpt{"lda", m}, &la_.IOpt{"n", m}, &la_.IOpt{"m", m})

		// r := r * diag(sqrt(lambda_k))
		// rti := rti * diag(1 ./ sqrt(lambda_k))
		for i := 0; i < m; i++ {
			a := math.Sqrt(lmbda.GetIndex(ind + i))
			blas.ScalFloat(r, a, &la_.IOpt{"offset", m * i}, &la_.IOpt{"n", m})
			blas.ScalFloat(rti, 1.0/a, &la_.IOpt{"offset", m * i}, &la_.IOpt{"n", m})
		ind += m
		ind2 += m * m
Example #7
func updateScaling(W *FloatMatrixSet, lmbda, s, z *matrix.FloatMatrix) (err error) {
	err = nil
	var stmp, ztmp *matrix.FloatMatrix
	   Nonlinear and 'l' blocks

	      d :=  d .* sqrt( s ./ z )
	      lmbda := lmbda .* sqrt(s) .* sqrt(z)
	mnl := 0
	dnlset := W.At("dnl")
	dnliset := W.At("dnli")
	dset := W.At("d")
	diset := W.At("di")
	beta := W.At("beta")[0]
	if dnlset != nil && dnlset[0].NumElements() > 0 {
		mnl = dnlset[0].NumElements()
	ml := dset[0].NumElements()
	m := mnl + ml
	//fmt.Printf("ml=%d, mnl=%d, m=%d'n", ml, mnl, m)

	stmp = matrix.FloatVector(s.FloatArray()[:m])
	stmp.Apply(stmp, math.Sqrt)
	s.SetIndexes(matrix.MakeIndexSet(0, m, 1), stmp.FloatArray())

	ztmp = matrix.FloatVector(z.FloatArray()[:m])
	ztmp.Apply(ztmp, math.Sqrt)
	z.SetIndexes(matrix.MakeIndexSet(0, m, 1), ztmp.FloatArray())

	// d := d .* s .* z
	if len(dnlset) > 0 {
		blas.TbmvFloat(s, dnlset[0], &la_.IOpt{"n", mnl}, &la_.IOpt{"k", 0}, &la_.IOpt{"lda", 1})
		blas.TbsvFloat(z, dnlset[0], &la_.IOpt{"n", mnl}, &la_.IOpt{"k", 0}, &la_.IOpt{"lda", 1})
		dnliset[0].Apply(dnlset[0], func(a float64) float64 { return 1.0 / a })
	blas.TbmvFloat(s, dset[0], &la_.IOpt{"n", ml},
		&la_.IOpt{"k", 0}, &la_.IOpt{"lda", 1}, &la_.IOpt{"offseta", mnl})
	blas.TbsvFloat(z, dset[0], &la_.IOpt{"n", ml},
		&la_.IOpt{"k", 0}, &la_.IOpt{"lda", 1}, &la_.IOpt{"offseta", mnl})
	diset[0].Apply(dset[0], func(a float64) float64 { return 1.0 / a })

	// lmbda := s .* z
	blas.CopyFloat(s, lmbda, &la_.IOpt{"n", m})
	blas.TbmvFloat(z, lmbda, &la_.IOpt{"n", m}, &la_.IOpt{"k", 0}, &la_.IOpt{"lda", 1})

	//fmt.Printf("-- end of l:\nz=\n%v\nlmbda=\n%v\n", z.ConvertToString(), lmbda.ConvertToString())
	//fmt.Printf("W[d]=\n%v\n", dset[0].ConvertToString())
	//fmt.Printf("W[di]=\n%v\n", diset[0].ConvertToString())

	// 'q' blocks.
	// Let st and zt be the new variables in the old scaling:
	//     st = s_k,   zt = z_k
	// and a = sqrt(st' * J * st),  b = sqrt(zt' * J * zt).
	// 1. Compute the hyperbolic Householder transformation 2*q*q' - J
	//    that maps st/a to zt/b.
	//        c = sqrt( (1 + st'*zt/(a*b)) / 2 )
	//        q = (st/a + J*zt/b) / (2*c).
	//    The new scaling point is
	//        wk := betak * sqrt(a/b) * (2*v[k]*v[k]' - J) * q
	//    with betak = W['beta'][k].
	// 3. The scaled variable:
	//        lambda_k0 = sqrt(a*b) * c
	//        lambda_k1 = sqrt(a*b) * ( (2vk*vk' - J) * (-d*q + u/2) )_1
	//    where
	//        u = st/a - J*zt/b
	//        d = ( vk0 * (vk'*u) + u0/2 ) / (2*vk0 *(vk'*q) - q0 + 1).
	// 4. Update scaling
	//        v[k] := wk^1/2
	//              = 1 / sqrt(2*(wk0 + 1)) * (wk + e).
	//        beta[k] *=  sqrt(a/b)

	ind := m
	for k, v := range W.At("v") {
		m = v.NumElements()

		// ln = sqrt( lambda_k' * J * lambda_k ) !! NOT USED!!
		jnrm2(lmbda, m, ind) // ?? NOT USED ??

		// a = sqrt( sk' * J * sk ) = sqrt( st' * J * st )
		// s := s / a = st / a
		aa := jnrm2(s, m, ind)
		blas.ScalFloat(s, 1.0/aa, &la_.IOpt{"n", m}, &la_.IOpt{"offset", ind})

		// b = sqrt( zk' * J * zk ) = sqrt( zt' * J * zt )
		// z := z / a = zt / b
		bb := jnrm2(z, m, ind)
		blas.ScalFloat(z, 1.0/bb, &la_.IOpt{"n", m}, &la_.IOpt{"offset", ind})

		// c = sqrt( ( 1 + (st'*zt) / (a*b) ) / 2 )
		cc := blas.DotFloat(s, z, &la_.IOpt{"offsetx", ind}, &la_.IOpt{"offsety", ind},
			&la_.IOpt{"n", m})
		cc = math.Sqrt((1.0 + cc) / 2.0)

		// vs = v' * st / a
		vs := blas.DotFloat(v, s, &la_.IOpt{"offsety", ind}, &la_.IOpt{"n", m})

		// vz = v' * J *zt / b
		vz := jdot(v, z, m, 0, ind)

		// vq = v' * q where q = (st/a + J * zt/b) / (2 * c)
		vq := (vs + vz) / 2.0 / cc

		// vq = v' * q where q = (st/a + J * zt/b) / (2 * c)
		vu := vs - vz
		// lambda_k0 = c
		lmbda.SetIndex(ind, cc)

		// wk0 = 2 * vk0 * (vk' * q) - q0
		wk0 := 2.0*v.GetIndex(0)*vq - (s.GetIndex(ind)+z.GetIndex(ind))/2.0/cc

		// d = (v[0] * (vk' * u) - u0/2) / (wk0 + 1)
		dd := (v.GetIndex(0)*vu - s.GetIndex(ind)/2.0 + z.GetIndex(ind)/2.0) / (wk0 + 1.0)

		// lambda_k1 = 2 * v_k1 * vk' * (-d*q + u/2) - d*q1 + u1/2
		blas.CopyFloat(v, lmbda, &la_.IOpt{"offsetx", 1}, &la_.IOpt{"offsety", ind + 1},
			&la_.IOpt{"n", m - 1})
		blas.ScalFloat(lmbda, (2.0 * (-dd*vq + 0.5*vu)),
			&la_.IOpt{"offsetx", ind + 1}, &la_.IOpt{"offsety", ind + 1}, &la_.IOpt{"n", m - 1})
		blas.AxpyFloat(s, lmbda, 0.5*(1.0-dd/cc),
			&la_.IOpt{"offsetx", ind + 1}, &la_.IOpt{"offsety", ind + 1}, &la_.IOpt{"n", m - 1})
		blas.AxpyFloat(z, lmbda, 0.5*(1.0+dd/cc),
			&la_.IOpt{"offsetx", ind + 1}, &la_.IOpt{"offsety", ind + 1}, &la_.IOpt{"n", m - 1})

		// Scale so that sqrt(lambda_k' * J * lambda_k) = sqrt(aa*bb).
		blas.ScalFloat(lmbda, math.Sqrt(aa*bb), &la_.IOpt{"offset", ind}, &la_.IOpt{"n", m})

		// v := (2*v*v' - J) * q
		//    = 2 * (v'*q) * v' - (J* st/a + zt/b) / (2*c)
		blas.ScalFloat(v, 2.0*vq)
		v.SetIndex(0, v.GetIndex(0)-(s.GetIndex(ind)/2.0/cc))
		blas.AxpyFloat(s, v, 0.5/cc, &la_.IOpt{"offsetx", ind + 1}, &la_.IOpt{"offsety", 1},
			&la_.IOpt{"n", m - 1})
		blas.AxpyFloat(z, v, -0.5/cc, &la_.IOpt{"offsetx", ind}, &la_.IOpt{"n", m})

		// v := v^{1/2} = 1/sqrt(2 * (v0 + 1)) * (v + e)
		v0 := v.GetIndex(0) + 1.0
		v.SetIndex(0, v0)
		blas.ScalFloat(v, 1.0/math.Sqrt(2.0*v0))

		// beta[k] *= ( aa / bb )**1/2
		bk := beta.GetIndex(k)
		beta.SetIndex(k, bk*math.Sqrt(aa/bb))

		ind += m
	//fmt.Printf("-- end of q:\nz=\n%v\nlmbda=\n%v\n", z.ConvertToString(), lmbda.ConvertToString())
	//fmt.Printf("beta=\n%v\n", beta.ConvertToString())

	// 's' blocks
	// Let st, zt be the updated variables in the old scaling:
	//     st = Ls * Ls', zt = Lz * Lz'.
	// where Ls and Lz are the 's' components of s, z.
	// 1.  SVD Lz'*Ls = Uk * lambda_k^+ * Vk'.
	// 2.  New scaling is
	//         r[k] := r[k] * Ls * Vk * diag(lambda_k^+)^{-1/2}
	//         rti[k] := r[k] * Lz * Uk * diag(lambda_k^+)^{-1/2}.

	maxr := 0
	for _, m := range W.At("r") {
		if m.Rows() > maxr {
			maxr = m.Rows()
	work := matrix.FloatZeros(maxr*maxr, 1)
	vlensum := 0
	for _, m := range W.At("v") {
		vlensum += m.NumElements()
	ind = mnl + ml + vlensum
	ind2 := ind
	ind3 := 0
	rset := W.At("r")
	rtiset := W.At("rti")

	for k, _ := range rset {
		r := rset[k]
		rti := rtiset[k]
		m = r.Rows()
		//fmt.Printf("m=%d, r=\n%v\nrti=\n%v\n", m, r.ConvertToString(), rti.ConvertToString())

		// r := r*sk = r*Ls
		blas.GemmFloat(r, s, work, 1.0, 0.0, &la_.IOpt{"m", m}, &la_.IOpt{"n", m},
			&la_.IOpt{"k", m}, &la_.IOpt{"ldb", m}, &la_.IOpt{"ldc", m},
			&la_.IOpt{"offsetb", ind2})
		//fmt.Printf("1 work=\n%v\n", work.ConvertToString())
		blas.CopyFloat(work, r, &la_.IOpt{"n", m * m})

		// rti := rti*zk = rti*Lz
		blas.GemmFloat(rti, z, work, 1.0, 0.0, &la_.IOpt{"m", m}, &la_.IOpt{"n", m},
			&la_.IOpt{"k", m}, &la_.IOpt{"ldb", m}, &la_.IOpt{"ldc", m},
			&la_.IOpt{"offsetb", ind2})
		//fmt.Printf("2 work=\n%v\n", work.ConvertToString())
		blas.CopyFloat(work, rti, &la_.IOpt{"n", m * m})

		// SVD Lz'*Ls = U * lmbds^+ * V'; store U in sk and V' in zk. '
		blas.GemmFloat(z, s, work, 1.0, 0.0, la_.OptTransA, &la_.IOpt{"m", m},
			&la_.IOpt{"n", m}, &la_.IOpt{"k", m}, &la_.IOpt{"lda", m}, &la_.IOpt{"ldb", m},
			&la_.IOpt{"ldc", m}, &la_.IOpt{"offseta", ind2}, &la_.IOpt{"offsetb", ind2})
		//fmt.Printf("3 work=\n%v\n", work.ConvertToString())

		// U = s, Vt = z
		lapack.GesvdFloat(work, lmbda, s, z, la_.OptJobuAll, la_.OptJobvtAll,
			&la_.IOpt{"m", m}, &la_.IOpt{"n", m}, &la_.IOpt{"lda", m}, &la_.IOpt{"ldu", m},
			&la_.IOpt{"ldvt", m}, &la_.IOpt{"offsets", ind}, &la_.IOpt{"offsetu", ind2},
			&la_.IOpt{"offsetvt", ind2})

		// r := r*V
		blas.GemmFloat(r, z, work, 1.0, 0.0, la_.OptTransB, &la_.IOpt{"m", m},
			&la_.IOpt{"n", m}, &la_.IOpt{"k", m}, &la_.IOpt{"ldb", m}, &la_.IOpt{"ldc", m},
			&la_.IOpt{"offsetb", ind2})
		//fmt.Printf("4 work=\n%v\n", work.ConvertToString())
		blas.CopyFloat(work, r, &la_.IOpt{"n", m * m})

		// rti := rti*U
		blas.GemmFloat(rti, s, work, 1.0, 0.0, &la_.IOpt{"m", m}, &la_.IOpt{"n", m},
			&la_.IOpt{"k", m}, &la_.IOpt{"ldb", m}, &la_.IOpt{"ldc", m},
			&la_.IOpt{"offsetb", ind2})
		//fmt.Printf("5 work=\n%v\n", work.ConvertToString())
		blas.CopyFloat(work, rti, &la_.IOpt{"n", m * m})

		for i := 0; i < m; i++ {
			a := 1.0 / math.Sqrt(lmbda.GetIndex(ind+i))
			blas.ScalFloat(r, a, &la_.IOpt{"n", m}, &la_.IOpt{"offset", m * i})
			blas.ScalFloat(rti, a, &la_.IOpt{"n", m}, &la_.IOpt{"offset", m * i})
		ind += m
		ind2 += m * m
		ind3 += m // !!NOT USED: ind3!!

	//fmt.Printf("-- end of s:\nz=\n%v\nlmbda=\n%v\n", z.ConvertToString(), lmbda.ConvertToString())


Example #8

       x := H(lambda^{1/2}) * x   (inverse is 'N')
       x := H(lambda^{-1/2}) * x  (inverse is 'I').

   H is the Hessian of the logarithmic barrier.

func scale2(lmbda, x *matrix.FloatMatrix, dims *DimensionSet, mnl int, inverse bool) (err error) {
	err = nil

	// For the nonlinear and 'l' blocks,
	//     xk := xk ./ l   (inverse is 'N')
	//     xk := xk .* l   (inverse is 'I')
	// where l is lmbda[:mnl+dims['l']].
	ind := mnl + dims.Sum("l")
	if !inverse {
		blas.TbsvFloat(lmbda, x, &la_.IOpt{"n", ind}, &la_.IOpt{"k", 0}, &la_.IOpt{"lda", 1})
	} else {
		blas.TbmvFloat(lmbda, x, &la_.IOpt{"n", ind}, &la_.IOpt{"k", 0}, &la_.IOpt{"lda", 1})

	// For 'q' blocks, if inverse is 'N',
	//     xk := 1/a * [ l'*J*xk;
	//         xk[1:] - (xk[0] + l'*J*xk) / (l[0] + 1) * l[1:] ].
	// If inverse is 'I',
	//     xk := a * [ l'*xk;
	//         xk[1:] + (xk[0] + l'*xk) / (l[0] + 1) * l[1:] ].
	// a = sqrt(lambda_k' * J * lambda_k), l = lambda_k / a.
	for _, m := range dims.At("q") {
		var lx, a, c, x0 float64
		a = jnrm2(lmbda, m, ind) //&la_.IOpt{"n", m}, &la_.IOpt{"offset", ind})
		if !inverse {
			lx = jdot(lmbda, x, m, ind, ind) //&la_.IOpt{"n", m}, &la_.IOpt{"offsetx", ind},
			//&la_.IOpt{"offsety", ind})
			lx /= a
		} else {
			lx = blas.DotFloat(lmbda, x, &la_.IOpt{"n", m}, &la_.IOpt{"offsetx", ind},
				&la_.IOpt{"offsety", ind})
			lx /= a
		x0 = x.GetIndex(ind)
		x.SetIndex(ind, lx)
		c = (lx + x0) / (lmbda.GetIndex(ind)/a + 1.0) / a
		if !inverse {
			c *= -1.0
		blas.AxpyFloat(lmbda, x, c, &la_.IOpt{"n", m - 1}, &la_.IOpt{"offsetx", ind + 1},
			&la_.IOpt{"offsety", ind + 1})
		if !inverse {
			a = 1.0 / a
		blas.ScalFloat(x, a, &la_.IOpt{"offset", ind}, &la_.IOpt{"n", m})
		ind += m
	// For the 's' blocks, if inverse is 'N',
	//     xk := vec( diag(l)^{-1/2} * mat(xk) * diag(k)^{-1/2}).
	// If inverse is true,
	//     xk := vec( diag(l)^{1/2} * mat(xk) * diag(k)^{1/2}).
	// where l is kth block of lambda.
	// We scale upper and lower triangular part of mat(xk) because the
	// inverse operation will be applied to nonsymmetric matrices.
	ind2 := ind
	sdims := dims.At("s")
	for k := 0; k < len(sdims); k++ {
		m := sdims[k]
		scaleF := func(v, x float64) float64 {
			return math.Sqrt(v) * math.Sqrt(x)
		for j := 0; j < m; j++ {
			c := matrix.FloatVector(lmbda.FloatArray()[ind2 : ind2+m])
			c.ApplyConst(c, scaleF, lmbda.GetIndex(ind2+j))
			if !inverse {
				blas.Tbsv(c, x, &la_.IOpt{"n", m}, &la_.IOpt{"k", 0}, &la_.IOpt{"lda", 1},
					&la_.IOpt{"offsetx", ind + j*m})
			} else {
				blas.Tbmv(c, x, &la_.IOpt{"n", m}, &la_.IOpt{"k", 0}, &la_.IOpt{"lda", 1},
					&la_.IOpt{"offsetx", ind + j*m})
		ind += m * m
		ind2 += m
Example #9
// The product x := (y o x).  If diag is 'D', the 's' part of y is
// diagonal and only the diagonal is stored.
func sprod(x, y *matrix.FloatMatrix, dims *DimensionSet, mnl int, opts ...la_.Option) (err error) {

	err = nil
	diag := la_.GetStringOpt("diag", "N", opts...)
	// For the nonlinear and 'l' blocks:
	//     yk o xk = yk .* xk.
	ind := mnl + dims.At("l")[0]
	err = blas.Tbmv(y, x, &la_.IOpt{"n", ind}, &la_.IOpt{"k", 0}, &la_.IOpt{"lda", 1})
	if err != nil {
	//fmt.Printf("Sprod l:x=\n%v\n", x)

	// For 'q' blocks:
	//               [ l0   l1'  ]
	//     yk o xk = [           ] * xk
	//               [ l1   l0*I ]
	// where yk = (l0, l1).
	for _, m := range dims.At("q") {
		dd := blas.DotFloat(x, y, &la_.IOpt{"offsetx", ind}, &la_.IOpt{"offsety", ind},
			&la_.IOpt{"n", m})
		//fmt.Printf("dd=%v\n", dd)
		alpha := y.GetIndex(ind)
		//fmt.Printf("scal=%v\n", alpha)
		blas.ScalFloat(x, alpha, &la_.IOpt{"offset", ind + 1}, &la_.IOpt{"n", m - 1})
		alpha = x.GetIndex(ind)
		//fmt.Printf("axpy=%v\n", alpha)
		blas.AxpyFloat(y, x, alpha, &la_.IOpt{"offsetx", ind + 1}, &la_.IOpt{"offsety", ind + 1},
			&la_.IOpt{"n", m - 1})
		x.SetIndex(ind, dd)
		ind += m
	//fmt.Printf("Sprod q :x=\n%v\n", x)

	// For the 's' blocks:
	//    yk o sk = .5 * ( Yk * mat(xk) + mat(xk) * Yk )
	// where Yk = mat(yk) if diag is 'N' and Yk = diag(yk) if diag is 'D'.

	if diag[0] == 'N' {
		maxm := maxdim(dims.At("s"))
		A := matrix.FloatZeros(maxm, maxm)
		for _, m := range dims.At("s") {
			blas.Copy(x, A, &la_.IOpt{"offsetx", ind}, &la_.IOpt{"n", m * m})
			for i := 0; i < m-1; i++ { // i < m-1 --> i < m
				symm(A, m, 0)
				symm(y, m, ind)
			err = blas.Syr2kFloat(A, y, x, 0.5, 0.0, &la_.IOpt{"n", m}, &la_.IOpt{"k", m},
				&la_.IOpt{"lda", m}, &la_.IOpt{"ldb", m}, &la_.IOpt{"ldc", m},
				&la_.IOpt{"offsetb", ind}, &la_.IOpt{"offsetc", ind})
			if err != nil {
			ind += m * m
		//fmt.Printf("Sprod diag=N s:x=\n%v\n", x)

	} else {
		ind2 := ind
		for _, m := range dims.At("s") {
			for i := 0; i < m; i++ {
				// original: u = 0.5 * ( y[ind2+i:ind2+m] + y[ind2+i] )
				// creates matrix of elements: [ind2+i ... ind2+m] then
				// element wisely adds y[ind2+i] and scales by 0.5
				iset := matrix.MakeIndexSet(ind2+i, ind2+m, 1)
				u := matrix.FloatVector(y.GetIndexes(iset))
				u.Add(y.GetIndex(ind2 + i))
				err = blas.Tbmv(u, x, &la_.IOpt{"n", m - i}, &la_.IOpt{"k", 0}, &la_.IOpt{"lda", 1},
					&la_.IOpt{"offsetx", ind + i*(m+1)})
				if err != nil {
			ind += m * m
			ind2 += m
		//fmt.Printf("Sprod diag=T s:x=\n%v\n", x)
Example #10
// Computes analytic center of A*x <= b with A m by n of rank n.
// We assume that b > 0 and the feasible set is bounded.
func Acent(A, b *matrix.FloatMatrix, niters int) (*matrix.FloatMatrix, []float64) {

	if niters <= 0 {
		niters = MAXITERS
	ntdecrs := make([]float64, 0, niters)

	if A.Rows() != b.Rows() {
		return nil, nil

	m, n := A.Size()
	x := matrix.FloatZeros(n, 1)
	H := matrix.FloatZeros(n, n)
	// Helper m*n matrix
	Dmn := matrix.FloatZeros(m, n)

	for i := 0; i < niters; i++ {

		// Gradient is g = A^T * (1.0/(b - A*x)). d = 1.0/(b - A*x)
		// d is m*1 matrix, g is n*1 matrix
		d := b.Minus(A.Times(x))
		d.Apply(d, func(a float64) float64 { return 1.0 / a })
		g := A.Transpose().Times(d)

		// Hessian is H = A^T * diag(1./(b-A*x))^2 * A.
		// in the original python code expression d[:,n*[0]] creates
		// a m*n matrix where each column is copy of column 0.
		// We do it here manually.
		for i := 0; i < n; i++ {
			Dmn.SetColumnMatrix(i, d)

		// Function mul creates element wise product of matrices.
		Asc := Dmn.Mul(A)
		blas.SyrkFloat(Asc, H, 1.0, 0.0, linalg.OptTrans)

		// Newton step is v = H^-1 * g.
		v := g.Copy().Neg()
		lapack.PosvFloat(H, v)

		// Directional derivative and Newton decrement.
		lam := blas.DotFloat(g, v)
		ntdecrs = append(ntdecrs, math.Sqrt(-lam))
		if ntdecrs[len(ntdecrs)-1] < TOL {
			fmt.Printf("last Newton decrement < TOL(%v)\n", TOL)
			return x, ntdecrs

		// Backtracking line search.
		// y = d .* A*v
		y := d.Mul(A.Times(v))
		step := 1.0
		for 1-step*y.Max() < 0 {
			step *= BETA

		for {
			// t = -step*y
			t := y.Copy().Scale(-step)
			// t = (1 + t) [e.g. t = 1 - step*y]

			// ts = sum(log(1-step*y))
			ts := t.Log().Sum()
			if -ts < ALPHA*step*lam {
				break search
			step *= BETA
		x = x.Plus(v)
	// no solution !!
	fmt.Printf("Iteration %d exhausted\n", niters)
	return x, ntdecrs
Example #11
//    Solves a pair of primal and dual convex quadratic cone programs
//        minimize    (1/2)*x'*P*x + q'*x
//        subject to  G*x + s = h
//                    A*x = b
//                    s >= 0
//        maximize    -(1/2)*(q + G'*z + A'*y)' * pinv(P) * (q + G'*z + A'*y)
//                    - h'*z - b'*y
//        subject to  q + G'*z + A'*y in range(P)
//                    z >= 0.
//    The inequalities are with respect to a cone C defined as the Cartesian
//    product of N + M + 1 cones:
//        C = C_0 x C_1 x .... x C_N x C_{N+1} x ... x C_{N+M}.
//    The first cone C_0 is the nonnegative orthant of dimension ml.
//    The next N cones are 2nd order cones of dimension mq[0], ..., mq[N-1].
//    The second order cone of dimension m is defined as
//        { (u0, u1) in R x R^{m-1} | u0 >= ||u1||_2 }.
//    The next M cones are positive semidefinite cones of order ms[0], ...,
//    ms[M-1] >= 0.
func ConeQp(P, q, G, h, A, b *matrix.FloatMatrix, dims *DimensionSet, solopts *SolverOptions, initvals *FloatMatrixSet) (sol *Solution, err error) {

	err = nil
	EXPON := 3
	STEP := 0.99

	sol = &Solution{Unknown,
		nil, nil, nil, nil, nil,
		0.0, 0.0, 0.0, 0.0, 0.0,
		0.0, 0.0, 0.0, 0.0, 0.0, 0}

	var kktsolver func(*FloatMatrixSet) (kktFunc, error) = nil
	var refinement int
	var correction bool = true

	feasTolerance := FEASTOL
	absTolerance := ABSTOL
	relTolerance := RELTOL
	if solopts.FeasTol > 0.0 {
		feasTolerance = solopts.FeasTol
	if solopts.AbsTol > 0.0 {
		absTolerance = solopts.AbsTol
	if solopts.RelTol > 0.0 {
		relTolerance = solopts.RelTol

	solvername := solopts.KKTSolverName
	if len(solvername) == 0 {
		if dims != nil && (len(dims.At("q")) > 0 || len(dims.At("s")) > 0) {
			solvername = "qr"
			//kktsolver = solvers["qr"]
		} else {
			solvername = "chol2"
			//kktsolver = solvers["chol2"]

	if q == nil || q.Cols() != 1 {
		err = errors.New("'q' must be non-nil matrix with one column")
	if P == nil || P.Rows() != q.Rows() || P.Cols() != q.Rows() {
		err = errors.New(fmt.Sprintf("'P' must be non-nil matrix of size (%d, %d)",
			q.Rows(), q.Rows()))
	fP := func(x, y *matrix.FloatMatrix, alpha, beta float64) error {
		return blas.SymvFloat(P, x, y, alpha, beta)

	if h == nil {
		h = matrix.FloatZeros(0, 1)
	if h.Cols() != 1 {
		err = errors.New("'h' must be non-nil matrix with one column")
	if dims == nil {
		dims = DSetNew("l", "q", "s")
		dims.Set("l", []int{h.Rows()})

	err = checkConeQpDimensions(dims)
	if err != nil {

	cdim := dims.Sum("l", "q") + dims.SumSquared("s")
	//cdim_pckd := dims.Sum("l", "q") + dims.SumPacked("s")
	cdim_diag := dims.Sum("l", "q", "s")

	if h.Rows() != cdim {
		err = errors.New(fmt.Sprintf("'h' must be float matrix of size (%d,1)", cdim))

	// Data for kth 'q' constraint are found in rows indq[k]:indq[k+1] of G.
	indq := make([]int, 0)
	indq = append(indq, dims.At("l")[0])
	for _, k := range dims.At("q") {
		indq = append(indq, indq[len(indq)-1]+k)

	// Data for kth 's' constraint are found in rows inds[k]:inds[k+1] of G.
	inds := make([]int, 0)
	inds = append(inds, indq[len(indq)-1])
	for _, k := range dims.At("s") {
		inds = append(inds, inds[len(inds)-1]+k*k)

	if G != nil && !G.SizeMatch(cdim, q.Rows()) {
		estr := fmt.Sprintf("'G' must be of size (%d,%d)", cdim, q.Rows())
		err = errors.New(estr)
	fG := func(x, y *matrix.FloatMatrix, alpha, beta float64, opts ...la.Option) error {
		return sgemv(G, x, y, alpha, beta, dims, opts...)

	// Check A and set defaults if it is nil
	if A == nil {
		// zeros rows reduces Gemv to vector products
		A = matrix.FloatZeros(0, q.Rows())
	if A.Cols() != q.Rows() {
		estr := fmt.Sprintf("'A' must have %d columns", q.Rows())
		err = errors.New(estr)

	fA := func(x, y *matrix.FloatMatrix, alpha, beta float64, opts ...la.Option) error {
		return blas.GemvFloat(A, x, y, alpha, beta, opts...)

	// Check b and set defaults if it is nil
	if b == nil {
		b = matrix.FloatZeros(0, 1)
	if b.Cols() != 1 {
		estr := fmt.Sprintf("'b' must be a matrix with 1 column")
		err = errors.New(estr)
	if b.Rows() != A.Rows() {
		estr := fmt.Sprintf("'b' must have length %d", A.Rows())
		err = errors.New(estr)

	// kktsolver(W) returns a routine for solving 3x3 block KKT system
	//     [ 0   A'  G'*W^{-1} ] [ ux ]   [ bx ]
	//     [ A   0   0         ] [ uy ] = [ by ].
	//     [ G   0   -W'       ] [ uz ]   [ bz ]
	var factor kktFactor
	if kkt, ok := solvers[solvername]; ok {
		if b.Rows() > q.Rows() {
			err = errors.New("1: Rank(A) < p or Rank[G; A] < n")
		if kkt == nil {
			err = errors.New(fmt.Sprintf("solver '%s' not yet implemented", solvername))
		// kkt function returns us problem spesific factor function.
		factor, err = kkt(G, dims, A, 0)
		if err != nil {
			fmt.Printf("error on factoring: %s\n", err)
		// solver is
		kktsolver = func(W *FloatMatrixSet) (kktFunc, error) {
			return factor(W, P, nil)
	} else {
		err = errors.New(fmt.Sprintf("solver '%s' not known", solvername))

	ws3 := matrix.FloatZeros(cdim, 1)
	wz3 := matrix.FloatZeros(cdim, 1)

	res := func(ux, uy, uz, us, vx, vy, vz, vs *matrix.FloatMatrix, W *FloatMatrixSet, lmbda *matrix.FloatMatrix) (err error) {
		// Evaluates residual in Newton equations:
		//      [ vx ]    [ vx ]   [ 0     ]   [ P  A'  G' ]   [ ux        ]
		//      [ vy ] := [ vy ] - [ 0     ] - [ A  0   0  ] * [ uy        ]
		//      [ vz ]    [ vz ]   [ W'*us ]   [ G  0   0  ]   [ W^{-1}*uz ]
		//      vs := vs - lmbda o (uz + us).

		// vx := vx - P*ux - A'*uy - G'*W^{-1}*uz
		fP(ux, vx, -1.0, 1.0)
		fA(uy, vx, -1.0, 1.0, la.OptTrans)
		blas.Copy(uz, wz3)
		scale(wz3, W, true, false)
		fG(wz3, vx, -1.0, 1.0, la.OptTrans)
		// vy := vy - A*ux
		fA(ux, vy, -1.0, 1.0)

		// vz := vz - G*ux - W'*us
		fG(ux, vz, -1.0, 1.0)
		blas.Copy(us, ws3)
		scale(ws3, W, true, false)
		blas.AxpyFloat(ws3, vz, -1.0)

		// vs := vs - lmbda o (uz + us)
		blas.Copy(us, ws3)
		blas.AxpyFloat(uz, ws3, 1.0)
		sprod(ws3, lmbda, dims, 0, la.OptDiag)
		blas.AxpyFloat(ws3, vs, -1.0)

	resx0 := math.Max(1.0, math.Sqrt(blas.Dot(q, q).Float()))
	resy0 := math.Max(1.0, math.Sqrt(blas.Dot(b, b).Float()))
	resz0 := math.Max(1.0, snrm2(h, dims, 0))
	//fmt.Printf("resx0: %.17f, resy0: %.17f, resz0: %.17f\n", resx0, resy0, resz0)

	var x, y, z, s, dx, dy, ds, dz, rx, ry, rz *matrix.FloatMatrix
	var lmbda, lmbdasq, sigs, sigz *matrix.FloatMatrix
	var W *FloatMatrixSet
	var f, f3 kktFunc
	var resx, resy, resz, step, sigma, mu, eta float64
	var gap, pcost, dcost, relgap, pres, dres, f0 float64

	if cdim == 0 {
		// Solve
		//     [ P  A' ] [ x ]   [ -q ]
		//     [       ] [   ] = [    ].
		//     [ A  0  ] [ y ]   [  b ]
		Wtmp := FloatSetNew("d", "di", "beta", "v", "r", "rti")
		Wtmp.Set("d", matrix.FloatZeros(0, 1))
		Wtmp.Set("di", matrix.FloatZeros(0, 1))
		f3, err = kktsolver(Wtmp)
		if err != nil {
			s := fmt.Sprintf("kkt error: %s", err)
			err = errors.New("2: Rank(A) < p or Rank(([P; A; G;]) < n : " + s)
		x = q.Copy()
		blas.ScalFloat(x, 0.0)
		y = b.Copy()
		f3(x, y, matrix.FloatZeros(0, 1))

		// dres = || P*x + q + A'*y || / resx0
		rx = q.Copy()
		fP(x, rx, 1.0, 1.0)
		pcost = 0.5 * (blas.DotFloat(x, rx) + blas.DotFloat(x, q))
		fA(y, rx, 1.0, 1.0, la.OptTrans)
		dres = math.Sqrt(blas.DotFloat(rx, rx) / resx0)

		ry = b.Copy()
		fA(x, ry, 1.0, -1.0)
		pres = math.Sqrt(blas.DotFloat(ry, ry) / resy0)

		relgap = 0.0
		if pcost == 0.0 {
			relgap = math.NaN()

		sol.Result = FloatSetNew("x", "y", "s", "z")
		sol.Result.Set("x", x)
		sol.Result.Set("y", y)
		sol.Result.Set("s", matrix.FloatZeros(0, 1))
		sol.Result.Set("z", matrix.FloatZeros(0, 1))
		sol.Status = Optimal
		sol.Gap = 0.0
		sol.RelativeGap = relgap
		sol.PrimalObjective = pcost
		sol.DualObjective = pcost
		sol.PrimalInfeasibility = pres
		sol.DualInfeasibility = dres
		sol.PrimalSlack = 0.0
		sol.DualSlack = 0.0
	x = q.Copy()
	y = b.Copy()
	s = matrix.FloatZeros(cdim, 1)
	z = matrix.FloatZeros(cdim, 1)

	var ts, tz, nrms, nrmz float64

	if initvals == nil {
		// Factor
		//     [ 0   A'  G' ]
		//     [ A   0   0  ].
		//     [ G   0  -I  ]
		W = FloatSetNew("d", "di", "v", "beta", "r", "rti")
		W.Set("d", matrix.FloatOnes(dims.At("l")[0], 1))
		W.Set("di", matrix.FloatOnes(dims.At("l")[0], 1))
		W.Set("beta", matrix.FloatOnes(len(dims.At("q")), 1))

		for _, n := range dims.At("q") {
			vm := matrix.FloatZeros(n, 1)
			vm.SetIndex(0, 1.0)
			W.Append("v", vm)
		for _, n := range dims.At("s") {
			W.Append("r", matrix.FloatIdentity(n))
			W.Append("rti", matrix.FloatIdentity(n))
		f, err = kktsolver(W)
		if err != nil {
			s := fmt.Sprintf("kkt error: %s", err)
			err = errors.New("3: Rank(A) < p or Rank([P; G; A]) < n : " + s)
		// Solve
		//     [ P   A'  G' ]   [ x ]   [ -q ]
		//     [ A   0   0  ] * [ y ] = [  b ].
		//     [ G   0  -I  ]   [ z ]   [  h ]
		x = q.Copy()
		blas.ScalFloat(x, -1.0)
		y = b.Copy()
		z = h.Copy()
		err = f(x, y, z)
		if err != nil {
			s := fmt.Sprintf("kkt error: %s", err)
			err = errors.New("4: Rank(A) < p or Rank([P; G; A]) < n : " + s)
		s = z.Copy()
		blas.ScalFloat(s, -1.0)

		nrms = snrm2(s, dims, 0)
		ts, _ = maxStep(s, dims, 0, nil)
		if ts >= -1e-8*math.Max(nrms, 1.0) {
			// a = 1.0 + ts
			a := 1.0 + ts
			is := make([]int, 0)
			// indexes s[:dims['l']]
			is = append(is, matrix.MakeIndexSet(0, dims.At("l")[0], 1)...)
			// indexes s[indq[:-1]]
			is = append(is, indq[:len(indq)-1]...)
			ind := dims.Sum("l", "q")
			// indexes s[ind:ind+m*m:m+1] (diagonal)
			for _, m := range dims.At("s") {
				is = append(is, matrix.MakeIndexSet(ind, ind+m*m, m+1)...)
				ind += m * m
			for _, k := range is {
				s.SetIndex(k, a+s.GetIndex(k))

		nrmz = snrm2(z, dims, 0)
		tz, _ = maxStep(z, dims, 0, nil)
		if tz >= -1e-8*math.Max(nrmz, 1.0) {
			a := 1.0 + tz
			is := make([]int, 0)
			is = append(is, matrix.MakeIndexSet(0, dims.At("l")[0], 1)...)
			is = append(is, indq[:len(indq)-1]...)
			ind := dims.Sum("l", "q")
			for _, m := range dims.At("s") {
				is = append(is, matrix.MakeIndexSet(ind, ind+m*m, m+1)...)
				ind += m * m
			for _, k := range is {
				z.SetIndex(k, a+z.GetIndex(k))

	} else {
		ix := initvals.At("x")[0]
		if ix != nil {
			blas.Copy(ix, x)
		} else {
			blas.ScalFloat(x, 0.0)

		is := initvals.At("s")[0]
		if is != nil {
			blas.Copy(is, s)
		} else {
			iset := make([]int, 0)
			iset = append(iset, matrix.MakeIndexSet(0, dims.At("l")[0], 1)...)
			iset = append(iset, indq[:len(indq)-1]...)
			ind := dims.Sum("l", "q")
			for _, m := range dims.At("s") {
				iset = append(iset, matrix.MakeIndexSet(ind, ind+m*m, m+1)...)
				ind += m * m
			for _, k := range iset {
				s.SetIndex(k, 1.0)

		iy := initvals.At("y")[0]
		if iy != nil {
			blas.Copy(iy, y)
		} else {
			blas.ScalFloat(y, 0.0)

		iz := initvals.At("z")[0]
		if iz != nil {
			blas.Copy(iz, z)
		} else {
			iset := make([]int, 0)
			iset = append(iset, matrix.MakeIndexSet(0, dims.At("l")[0], 1)...)
			iset = append(iset, indq[:len(indq)-1]...)
			ind := dims.Sum("l", "q")
			for _, m := range dims.At("s") {
				iset = append(iset, matrix.MakeIndexSet(ind, ind+m*m, m+1)...)
				ind += m * m
			for _, k := range iset {
				z.SetIndex(k, 1.0)

	rx = q.Copy()
	ry = b.Copy()
	rz = matrix.FloatZeros(cdim, 1)
	dx = x.Copy()
	dy = y.Copy()
	dz = matrix.FloatZeros(cdim, 1)
	ds = matrix.FloatZeros(cdim, 1)
	lmbda = matrix.FloatZeros(cdim_diag, 1)
	lmbdasq = matrix.FloatZeros(cdim_diag, 1)
	sigs = matrix.FloatZeros(dims.Sum("s"), 1)
	sigz = matrix.FloatZeros(dims.Sum("s"), 1)

	var WS fClosure

	gap = sdot(s, z, dims, 0)
	for iter := 0; iter < solopts.MaxIter+1; iter++ {

		// f0 = (1/2)*x'*P*x + q'*x + r and  rx = P*x + q + A'*y + G'*z.
		blas.Copy(q, rx)
		fP(x, rx, 1.0, 1.0)
		f0 = 0.5 * (blas.DotFloat(x, rx) + blas.DotFloat(x, q))
		fA(y, rx, 1.0, 1.0, la.OptTrans)
		fG(z, rx, 1.0, 1.0, la.OptTrans)
		resx = math.Sqrt(blas.DotFloat(rx, rx))

		// ry = A*x - b
		blas.Copy(b, ry)
		fA(x, ry, 1.0, -1.0)
		resy = math.Sqrt(blas.DotFloat(ry, ry))

		// rz = s + G*x - h
		blas.Copy(s, rz)
		blas.AxpyFloat(h, rz, -1.0)
		fG(x, rz, 1.0, 1.0)
		resz = snrm2(rz, dims, 0)
		//fmt.Printf("resx: %.17f, resy: %.17f, resz: %.17f\n", resx, resy, resz)

		// Statistics for stopping criteria.

		// pcost = (1/2)*x'*P*x + q'*x
		// dcost = (1/2)*x'*P*x + q'*x + y'*(A*x-b) + z'*(G*x-h) '
		//       = (1/2)*x'*P*x + q'*x + y'*(A*x-b) + z'*(G*x-h+s) - z'*s
		//       = (1/2)*x'*P*x + q'*x + y'*ry + z'*rz - gap
		pcost = f0
		dcost = f0 + blas.DotFloat(y, ry) + sdot(z, rz, dims, 0) - gap
		if pcost < 0.0 {
			relgap = gap / -pcost
		} else if dcost > 0.0 {
			relgap = gap / dcost
		} else {
			relgap = math.NaN()
		pres = math.Max(resy/resy0, resz/resz0)
		dres = resx / resx0

		if solopts.ShowProgress {
			if iter == 0 {
				// show headers of something
				fmt.Printf("% 10s% 12s% 10s% 8s% 7s\n",
					"pcost", "dcost", "gap", "pres", "dres")
			// show something
			fmt.Printf("%2d: % 8.4e % 8.4e % 4.0e% 7.0e% 7.0e\n",
				iter, pcost, dcost, gap, pres, dres)

		if pres <= feasTolerance && dres <= feasTolerance &&
			(gap <= absTolerance || (!math.IsNaN(relgap) && relgap <= relTolerance)) ||
			iter == solopts.MaxIter {

			ind := dims.Sum("l", "q")
			for _, m := range dims.At("s") {
				symm(s, m, ind)
				symm(z, m, ind)
				ind += m * m
			ts, _ = maxStep(s, dims, 0, nil)
			tz, _ = maxStep(z, dims, 0, nil)
			if iter == solopts.MaxIter {
				// terminated on max iterations.
				sol.Status = Unknown
				err = errors.New("Terminated (maximum iterations reached)")
				fmt.Printf("Terminated (maximum iterations reached)\n")
			// optimal solution found
			//fmt.Print("Optimal solution.\n")
			err = nil
			sol.Result = FloatSetNew("x", "y", "s", "z")
			sol.Result.Set("x", x)
			sol.Result.Set("y", y)
			sol.Result.Set("s", s)
			sol.Result.Set("z", z)
			sol.Status = Optimal
			sol.Gap = gap
			sol.RelativeGap = relgap
			sol.PrimalObjective = pcost
			sol.DualObjective = dcost
			sol.PrimalInfeasibility = pres
			sol.DualInfeasibility = dres
			sol.PrimalSlack = -ts
			sol.DualSlack = -tz
			sol.PrimalResidualCert = math.NaN()
			sol.DualResidualCert = math.NaN()
			sol.Iterations = iter

		// Compute initial scaling W and scaled iterates:
		//     W * z = W^{-T} * s = lambda.
		// lmbdasq = lambda o lambda.
		if iter == 0 {
			W, err = computeScaling(s, z, lmbda, dims, 0)
		ssqr(lmbdasq, lmbda, dims, 0)

		f3, err = kktsolver(W)
		if err != nil {
			if iter == 0 {
				s := fmt.Sprintf("kkt error: %s", err)
				err = errors.New("5: Rank(A) < p or Rank([P; A; G]) < n : " + s)
			} else {
				ind := dims.Sum("l", "q")
				for _, m := range dims.At("s") {
					symm(s, m, ind)
					symm(z, m, ind)
					ind += m * m
				ts, _ = maxStep(s, dims, 0, nil)
				tz, _ = maxStep(z, dims, 0, nil)
				// terminated (singular KKT matrix)
				fmt.Printf("Terminated (singular KKT matrix).\n")
				err = errors.New("Terminated (singular KKT matrix).")
				sol.Result = FloatSetNew("x", "y", "s", "z")
				sol.Result.Set("x", x)
				sol.Result.Set("y", y)
				sol.Result.Set("s", s)
				sol.Result.Set("z", z)
				sol.Status = Unknown
				sol.RelativeGap = relgap
				sol.PrimalObjective = pcost
				sol.DualObjective = dcost
				sol.PrimalInfeasibility = pres
				sol.DualInfeasibility = dres
				sol.PrimalSlack = -ts
				sol.DualSlack = -tz
				sol.Iterations = iter
		// f4_no_ir(x, y, z, s) solves
		//     [ 0     ]   [ P  A'  G' ]   [ ux        ]   [ bx ]
		//     [ 0     ] + [ A  0   0  ] * [ uy        ] = [ by ]
		//     [ W'*us ]   [ G  0   0  ]   [ W^{-1}*uz ]   [ bz ]
		//     lmbda o (uz + us) = bs.
		// On entry, x, y, z, s contain bx, by, bz, bs.
		// On exit, they contain ux, uy, uz, us.

		f4_no_ir := func(x, y, z, s *matrix.FloatMatrix) error {
			// Solve
			//     [ P A' G'   ] [ ux        ]    [ bx                    ]
			//     [ A 0  0    ] [ uy        ] =  [ by                    ]
			//     [ G 0 -W'*W ] [ W^{-1}*uz ]    [ bz - W'*(lmbda o\ bs) ]
			//     us = lmbda o\ bs - uz.
			// On entry, x, y, z, s  contains bx, by, bz, bs.
			// On exit they contain x, y, z, s.

			// s := lmbda o\ s
			//    = lmbda o\ bs
			sinv(s, lmbda, dims, 0)

			// z := z - W'*s
			//    = bz - W'*(lambda o\ bs)
			blas.Copy(s, ws3)
			scale(ws3, W, true, false)
			blas.AxpyFloat(ws3, z, -1.0)

			err := f3(x, y, z)
			if err != nil {
				return err

			// s := s - z
			//    = lambda o\ bs - uz.
			blas.AxpyFloat(z, s, -1.0)
			return nil

		if iter == 0 {
			if refinement > 0 || solopts.Debug {
				WS.wx = q.Copy()
				WS.wy = y.Copy()
				WS.ws = matrix.FloatZeros(cdim, 1)
				WS.wz = matrix.FloatZeros(cdim, 1)
			if refinement > 0 {
				WS.wx2 = q.Copy()
				WS.wy2 = y.Copy()
				WS.ws2 = matrix.FloatZeros(cdim, 1)
				WS.wz2 = matrix.FloatZeros(cdim, 1)

		f4 := func(x, y, z, s *matrix.FloatMatrix) (err error) {
			err = nil
			if refinement > 0 || solopts.Debug {
				blas.Copy(x, WS.wx)
				blas.Copy(y, WS.wy)
				blas.Copy(z, WS.wz)
				blas.Copy(s, WS.ws)
			err = f4_no_ir(x, y, z, s)
			for i := 0; i < refinement; i++ {
				blas.Copy(WS.wx, WS.wx2)
				blas.Copy(WS.wy, WS.wy2)
				blas.Copy(WS.wz, WS.wz2)
				blas.Copy(WS.ws, WS.ws2)
				res(x, y, z, s, WS.wx2, WS.wy2, WS.wz2, WS.ws2, W, lmbda)
				f4_no_ir(WS.wx2, WS.wy2, WS.wz2, WS.ws2)
				blas.AxpyFloat(WS.wx2, x, 1.0)
				blas.AxpyFloat(WS.wy2, y, 1.0)
				blas.AxpyFloat(WS.wz2, z, 1.0)
				blas.AxpyFloat(WS.ws2, s, 1.0)

		//var mu, sigma, eta float64
		mu = gap / float64(dims.Sum("l", "s")+len(dims.At("q")))
		sigma, eta = 0.0, 0.0

		for i := 0; i < 2; i++ {
			// Solve
			//     [ 0     ]   [ P  A' G' ]   [ dx        ]
			//     [ 0     ] + [ A  0  0  ] * [ dy        ] = -(1 - eta) * r
			//     [ W'*ds ]   [ G  0  0  ]   [ W^{-1}*dz ]
			//     lmbda o (dz + ds) = -lmbda o lmbda + sigma*mu*e (i=0)
			//     lmbda o (dz + ds) = -lmbda o lmbda - dsa o dza
			//                         + sigma*mu*e (i=1) where dsa, dza
			//                         are the solution for i=0.

			// ds = -lmbdasq + sigma * mu * e  (if i is 0)
			//    = -lmbdasq - dsa o dza + sigma * mu * e  (if i is 1),
			//    where ds, dz are solution for i is 0.
			blas.ScalFloat(ds, 0.0)
			if correction && i == 1 {
				blas.AxpyFloat(ws3, ds, -1.0)
			blas.AxpyFloat(lmbdasq, ds, -1.0, &la.IOpt{"n", dims.Sum("l", "q")})
			ind := dims.At("l")[0]
			ds.Add(sigma*mu, matrix.MakeIndexSet(0, ind, 1)...)
			for _, m := range dims.At("q") {
				ds.SetIndex(ind, sigma*mu+ds.GetIndex(ind))
				ind += m
			ind2 := ind
			for _, m := range dims.At("s") {
				blas.AxpyFloat(lmbdasq, ds, -1.0, &la.IOpt{"n", m}, &la.IOpt{"incy", m + 1},
					&la.IOpt{"offsetx", ind2}, &la.IOpt{"offsety", ind})
				ds.Add(sigma*mu, matrix.MakeIndexSet(ind, ind+m*m, m+1)...)
				ind += m * m
				ind2 += m

			// (dx, dy, dz) := -(1 - eta) * (rx, ry, rz)
			blas.ScalFloat(dx, 0.0)
			blas.AxpyFloat(rx, dx, -1.0+eta)
			blas.ScalFloat(dy, 0.0)
			blas.AxpyFloat(ry, dy, -1.0+eta)
			blas.ScalFloat(dz, 0.0)
			blas.AxpyFloat(rz, dz, -1.0+eta)

			//fmt.Printf("== Calling f4 %d\n", i)
			//fmt.Printf("dx=\n%v\n", dx.ToString("%.17f"))
			//fmt.Printf("ds=\n%v\n", ds.ToString("%.17f"))
			//fmt.Printf("dz=\n%v\n", dz.ToString("%.17f"))
			//fmt.Printf("== Entering f4 %d\n", i)
			err = f4(dx, dy, dz, ds)
			if err != nil {
				if iter == 0 {
					s := fmt.Sprintf("kkt error: %s", err)
					err = errors.New("6: Rank(A) < p or Rank([P; A; G]) < n : " + s)
				} else {
					ind = dims.Sum("l", "q")
					for _, m := range dims.At("s") {
						symm(s, m, ind)
						symm(z, m, ind)
						ind += m * m
					ts, _ = maxStep(s, dims, 0, nil)
					tz, _ = maxStep(z, dims, 0, nil)

			dsdz := sdot(ds, dz, dims, 0)
			if correction && i == 0 {
				blas.Copy(ds, ws3)
				sprod(ws3, dz, dims, 0)

			// Maximum step to boundary.
			// If i is 1, also compute eigenvalue decomposition of the 's'
			// blocks in ds, dz.  The eigenvectors Qs, Qz are stored in
			// dsk, dzk.  The eigenvalues are stored in sigs, sigz.
			scale2(lmbda, ds, dims, 0, false)
			scale2(lmbda, dz, dims, 0, false)
			if i == 0 {
				ts, _ = maxStep(ds, dims, 0, nil)
				tz, _ = maxStep(dz, dims, 0, nil)
			} else {
				ts, _ = maxStep(ds, dims, 0, sigs)
				tz, _ = maxStep(dz, dims, 0, sigz)
			t := maxvec([]float64{0.0, ts, tz})
			//fmt.Printf("== t=%.17f from %v\n", t, []float64{ts, tz})
			if t == 0.0 {
				step = 1.0
			} else {
				if i == 0 {
					step = math.Min(1.0, 1.0/t)
				} else {
					step = math.Min(1.0, STEP/t)
			if i == 0 {
				m := math.Max(0.0, 1.0-step+dsdz/gap*(step*step))
				sigma = math.Pow(math.Min(1.0, m), float64(EXPON))
				eta = 0.0
			//fmt.Printf("== step=%.17f sigma=%.17f dsdz=%.17f\n", step, sigma, dsdz)


		blas.AxpyFloat(dx, x, step)
		blas.AxpyFloat(dy, y, step)
		//fmt.Printf("x=\n%v\n", x.ConvertToString())
		//fmt.Printf("y=\n%v\n", y.ConvertToString())
		//fmt.Printf("ds=\n%v\n", ds.ConvertToString())
		//fmt.Printf("dz=\n%v\n", dz.ConvertToString())

		// We will now replace the 'l' and 'q' blocks of ds and dz with
		// the updated iterates in the current scaling.
		// We also replace the 's' blocks of ds and dz with the factors
		// Ls, Lz in a factorization Ls*Ls', Lz*Lz' of the updated variables
		// in the current scaling.

		// ds := e + step*ds for nonlinear, 'l' and 'q' blocks.
		// dz := e + step*dz for nonlinear, 'l' and 'q' blocks.
		blas.ScalFloat(ds, step, &la.IOpt{"n", dims.Sum("l", "q")})
		blas.ScalFloat(dz, step, &la.IOpt{"n", dims.Sum("l", "q")})
		ind := dims.At("l")[0]
		is := matrix.MakeIndexSet(0, ind, 1)
		ds.Add(1.0, is...)
		dz.Add(1.0, is...)
		for _, m := range dims.At("q") {
			ds.SetIndex(ind, 1.0+ds.GetIndex(ind))
			dz.SetIndex(ind, 1.0+dz.GetIndex(ind))
			ind += m

		// ds := H(lambda)^{-1/2} * ds and dz := H(lambda)^{-1/2} * dz.
		// This replaces the 'l' and 'q' components of ds and dz with the
		// updated variables in the current scaling.
		// The 's' components of ds and dz are replaced with
		// diag(lmbda_k)^{1/2} * Qs * diag(lmbda_k)^{1/2}
		// diag(lmbda_k)^{1/2} * Qz * diag(lmbda_k)^{1/2}
		scale2(lmbda, ds, dims, 0, true)
		scale2(lmbda, dz, dims, 0, true)

		// sigs := ( e + step*sigs ) ./ lambda for 's' blocks.
		// sigz := ( e + step*sigz ) ./ lambda for 's' blocks.
		blas.ScalFloat(sigs, step)
		blas.ScalFloat(sigz, step)
		sdimsum := dims.Sum("s")
		qdimsum := dims.Sum("l", "q")
		blas.TbsvFloat(lmbda, sigs, &la.IOpt{"n", sdimsum}, &la.IOpt{"k", 0},
			&la.IOpt{"lda", 1}, &la.IOpt{"offseta", qdimsum})
		blas.TbsvFloat(lmbda, sigz, &la.IOpt{"n", sdimsum}, &la.IOpt{"k", 0},
			&la.IOpt{"lda", 1}, &la.IOpt{"offseta", qdimsum})

		ind2 := qdimsum
		ind3 := 0
		sdims := dims.At("s")

		for k := 0; k < len(sdims); k++ {
			m := sdims[k]
			for i := 0; i < m; i++ {
				a := math.Sqrt(sigs.GetIndex(ind3 + i))
				blas.ScalFloat(ds, a, &la.IOpt{"offset", ind2 + m*i}, &la.IOpt{"n", m})
				a = math.Sqrt(sigz.GetIndex(ind3 + i))
				blas.ScalFloat(dz, a, &la.IOpt{"offset", ind2 + m*i}, &la.IOpt{"n", m})
			ind2 += m * m
			ind3 += m

		err = updateScaling(W, lmbda, ds, dz)

		// Unscale s, z, tau, kappa (unscaled variables are used only to
		// compute feasibility residuals).
		ind = dims.Sum("l", "q")
		ind2 = ind
		blas.Copy(lmbda, s, &la.IOpt{"n", ind})
		for _, m := range dims.At("s") {
			blas.ScalFloat(s, 0.0, &la.IOpt{"offset", ind2})
			blas.Copy(lmbda, s, &la.IOpt{"offsetx", ind}, &la.IOpt{"offsety", ind2},
				&la.IOpt{"n", m}, &la.IOpt{"incy", m + 1})
			ind += m
			ind2 += m * m
		scale(s, W, true, false)

		ind = dims.Sum("l", "q")
		ind2 = ind
		blas.Copy(lmbda, z, &la.IOpt{"n", ind})
		for _, m := range dims.At("s") {
			blas.ScalFloat(z, 0.0, &la.IOpt{"offset", ind2})
			blas.Copy(lmbda, z, &la.IOpt{"offsetx", ind}, &la.IOpt{"offsety", ind2},
				&la.IOpt{"n", m}, &la.IOpt{"incy", m + 1})
			ind += m
			ind2 += m * m
		scale(z, W, false, true)

		gap = blas.DotFloat(lmbda, lmbda)
		//fmt.Printf("== gap = %.17f\n", gap)