Beispiel #1
0
func sinv(x, y *matrix.FloatMatrix, dims *DimensionSet, mnl int) (err error) {
	/*DEBUGGED*/

	err = nil

	// For the nonlinear and 'l' blocks:
	//
	//     yk o\ xk = yk .\ xk.

	ind := mnl + dims.At("l")[0]
	blas.Tbsv(y, x, &la_.IOpt{"n", ind}, &la_.IOpt{"k", 0}, &la_.IOpt{"ldA", 1})

	// For the 'q' blocks:
	//
	//                        [ l0   -l1'              ]
	//     yk o\ xk = 1/a^2 * [                        ] * xk
	//                        [ -l1  (a*I + l1*l1')/l0 ]
	//
	// where yk = (l0, l1) and a = l0^2 - l1'*l1.

	for _, m := range dims.At("q") {
		aa := blas.Nrm2Float(y, &la_.IOpt{"n", m - 1}, &la_.IOpt{"offset", ind + 1})
		ee := y.GetIndex(ind)
		aa = (ee + aa) * (ee - aa)
		cc := x.GetIndex(ind)
		dd := blas.DotFloat(x, y, &la_.IOpt{"n", m - 1}, &la_.IOpt{"offsetx", ind + 1},
			&la_.IOpt{"offsety", ind + 1})
		x.SetIndex(ind, cc*ee-dd)
		blas.ScalFloat(x, aa/ee, &la_.IOpt{"n", m - 1}, &la_.IOpt{"offset", ind + 1})
		blas.AxpyFloat(y, x, dd/ee-cc, &la_.IOpt{"n", m - 1},
			&la_.IOpt{"offsetx", ind + 1}, &la_.IOpt{"offsety", ind + 1})
		blas.ScalFloat(x, 1.0/aa, &la_.IOpt{"n", m}, &la_.IOpt{"offset", ind})
		ind += m
	}

	// For the 's' blocks:
	//
	//     yk o\ xk =  xk ./ gamma
	//
	// where gammaij = .5 * (yk_i + yk_j).

	ind2 := ind
	for _, m := range dims.At("s") {
		for j := 0; j < m; j++ {
			u := matrix.FloatVector(y.FloatArray()[ind2+j : ind2+m])
			u.Add(y.GetIndex(ind2 + j))
			u.Scale(0.5)
			blas.Tbsv(u, x, &la_.IOpt{"n", m - j}, &la_.IOpt{"k", 0}, &la_.IOpt{"lda", 1},
				&la_.IOpt{"offsetx", ind + j*(m+1)})
		}
		ind += m * m
		ind2 += m
	}
	return
}
Beispiel #2
0
/*
   Returns sqrt(x' * J * x) where J = [1, 0; 0, -I], for a vector
   x in a second order cone.
*/
func jnrm2(x *matrix.FloatMatrix, n, offset int) float64 {
	/*DEBUGGED*/
	if n <= 0 {
		n = x.NumElements()
	}
	if offset < 0 {
		offset = 0
	}
	a := blas.Nrm2Float(x, &la_.IOpt{"n", n - 1}, &la_.IOpt{"offset", offset + 1})
	fst := x.GetIndex(offset)
	return math.Sqrt(fst-a) * math.Sqrt(fst+a)
}
Beispiel #3
0
// Returns min {t | x + t*e >= 0}, where e is defined as follows
//
//  - For the nonlinear and 'l' blocks: e is the vector of ones.
//  - For the 'q' blocks: e is the first unit vector.
//  - For the 's' blocks: e is the identity matrix.
//
// When called with the argument sigma, also returns the eigenvalues
// (in sigma) and the eigenvectors (in x) of the 's' components of x.
func maxStep(x *matrix.FloatMatrix, dims *DimensionSet, mnl int, sigma *matrix.FloatMatrix) (rval float64, err error) {
	/*DEBUGGED*/

	rval = 0.0
	err = nil
	t := make([]float64, 0, 10)
	ind := mnl + dims.Sum("l")
	if ind > 0 {
		t = append(t, -minvec(x.FloatArray()[:ind]))
	}
	for _, m := range dims.At("q") {
		if m > 0 {
			v := blas.Nrm2Float(x, &la_.IOpt{"offset", ind + 1}, &la_.IOpt{"n", m - 1})
			v -= x.GetIndex(ind)
			t = append(t, v)
		}
		ind += m
	}

	var Q *matrix.FloatMatrix
	var w *matrix.FloatMatrix
	ind2 := 0
	if sigma == nil && len(dims.At("s")) > 0 {
		mx := dims.Max("s")
		Q = matrix.FloatZeros(mx, mx)
		w = matrix.FloatZeros(mx, 1)
	}
	for _, m := range dims.At("s") {
		if sigma == nil {
			blas.Copy(x, Q, &la_.IOpt{"offsetx", ind}, &la_.IOpt{"n", m * m})
			err = lapack.SyevrFloat(Q, w, nil, 0.0, nil, []int{1, 1}, la_.OptRangeInt,
				&la_.IOpt{"n", m}, &la_.IOpt{"lda", m})
			if m > 0 {
				t = append(t, -w.GetIndex(0))
			}
		} else {
			err = lapack.SyevdFloat(x, sigma, la_.OptJobZValue, &la_.IOpt{"n", m},
				&la_.IOpt{"lda", m}, &la_.IOpt{"offseta", ind}, &la_.IOpt{"offsetw", ind2})
			if m > 0 {
				t = append(t, -sigma.GetIndex(ind2))
			}
		}
		ind += m * m
		ind2 += m
	}

	if len(t) > 0 {
		rval = maxvec(t)
	}
	return
}
Beispiel #4
0
// The product x := y o y.   The 's' components of y are diagonal and
// only the diagonals of x and y are stored.
func ssqr(x, y *matrix.FloatMatrix, dims *DimensionSet, mnl int) (err error) {
	/*DEBUGGED*/
	blas.Copy(y, x)
	ind := mnl + dims.At("l")[0]
	err = blas.Tbmv(y, x, &la_.IOpt{"n", ind}, &la_.IOpt{"k", 0}, &la_.IOpt{"lda", 1})
	if err != nil {
		return
	}

	for _, m := range dims.At("q") {
		v := blas.Nrm2Float(y, &la_.IOpt{"n", m}, &la_.IOpt{"offset", ind})
		x.SetIndex(ind, v*v)
		blas.ScalFloat(x, 2.0*y.GetIndex(ind), &la_.IOpt{"n", m - 1}, &la_.IOpt{"offset", ind + 1})
		ind += m
	}
	err = blas.Tbmv(y, x, &la_.IOpt{"n", dims.Sum("s")}, &la_.IOpt{"k", 0},
		&la_.IOpt{"lda", 1}, &la_.IOpt{"offseta", ind}, &la_.IOpt{"offsetx", ind})
	return
}
Beispiel #5
0
//    Solves a pair of primal and dual cone programs
//
//        minimize    c'*x
//        subject to  G*x + s = h
//                    A*x = b
//                    s >= 0
//
//        maximize    -h'*z - b'*y
//        subject to  G'*z + A'*y + c = 0
//                    z >= 0.
//
//    The inequalities are with respect to a cone C defined as the Cartesian
//    product of N + M + 1 cones:
//
//        C = C_0 x C_1 x .... x C_N x C_{N+1} x ... x C_{N+M}.
//
//    The first cone C_0 is the nonnegative orthant of dimension ml.
//    The next N cones are second order cones of dimension mq[0], ...,
//    mq[N-1].  The second order cone of dimension m is defined as
//
//        { (u0, u1) in R x R^{m-1} | u0 >= ||u1||_2 }.
//
//    The next M cones are positive semidefinite cones of order ms[0], ...,
//    ms[M-1] >= 0.
//
func ConeLp(c, G, h, A, b *matrix.FloatMatrix, dims *DimensionSet, solopts *SolverOptions, primalstart, dualstart *FloatMatrixSet) (sol *Solution, err error) {

	err = nil
	const EXPON = 3
	const STEP = 0.99

	sol = &Solution{Unknown,
		nil, nil, nil, nil, nil,
		0.0, 0.0, 0.0, 0.0, 0.0,
		0.0, 0.0, 0.0, 0.0, 0.0, 0}

	//var primalstart *FloatMatrixSet = nil
	//var dualstart *FloatMatrixSet = nil
	var refinement int

	if solopts.Refinement > 0 {
		refinement = solopts.Refinement
	} else {
		refinement = 0
		if len(dims.At("q")) > 0 || len(dims.At("s")) > 0 {
			refinement = 1
		}
	}
	feasTolerance := FEASTOL
	absTolerance := ABSTOL
	relTolerance := RELTOL
	if solopts.FeasTol > 0.0 {
		feasTolerance = solopts.FeasTol
	}
	if solopts.AbsTol > 0.0 {
		absTolerance = solopts.AbsTol
	}
	if solopts.RelTol > 0.0 {
		relTolerance = solopts.RelTol
	}

	solvername := solopts.KKTSolverName
	if len(solvername) == 0 {
		if dims != nil && (len(dims.At("q")) > 0 || len(dims.At("s")) > 0) {
			solvername = "qr"
		} else {
			solvername = "chol2"
		}
	}

	if c == nil || c.Cols() > 1 {
		err = errors.New("'c' must be matrix with 1 column")
		return
	}
	if h == nil || h.Cols() > 1 {
		err = errors.New("'h' must be matrix with 1 column")
		return
	}

	if dims == nil {
		dims = DSetNew("l", "q", "s")
		dims.Set("l", []int{h.Rows()})
	}
	if err = checkConeLpDimensions(dims); err != nil {
		return
	}

	cdim := dims.Sum("l", "q") + dims.SumSquared("s")
	cdim_diag := dims.Sum("l", "q", "s")

	if h.Rows() != cdim {
		err = errors.New(fmt.Sprintf("'h' must be float matrix of size (%d,1)", cdim))
		return
	}

	// Data for kth 'q' constraint are found in rows indq[k]:indq[k+1] of G.
	indq := make([]int, 0, 100)
	indq = append(indq, dims.At("l")[0])
	for _, k := range dims.At("q") {
		indq = append(indq, indq[len(indq)-1]+k)
	}

	// Data for kth 's' constraint are found in rows inds[k]:inds[k+1] of G.
	inds := make([]int, 0, 100)
	inds = append(inds, indq[len(indq)-1])
	for _, k := range dims.At("s") {
		inds = append(inds, inds[len(inds)-1]+k*k)
	}

	if G != nil && !G.SizeMatch(cdim, c.Rows()) {
		estr := fmt.Sprintf("'G' must be of size (%d,%d)", cdim, c.Rows())
		err = errors.New(estr)
		return
	}
	Gf := func(x, y *matrix.FloatMatrix, alpha, beta float64, opts ...la.Option) error {
		return sgemv(G, x, y, alpha, beta, dims, opts...)
	}

	// Check A and set defaults if it is nil
	if A == nil {
		// zeros rows reduces Gemv to vector products
		A = matrix.FloatZeros(0, c.Rows())
	}
	if A.Cols() != c.Rows() {
		estr := fmt.Sprintf("'A' must have %d columns", c.Rows())
		err = errors.New(estr)
		return
	}

	Af := func(x, y *matrix.FloatMatrix, alpha, beta float64, opts ...la.Option) error {
		return blas.GemvFloat(A, x, y, alpha, beta, opts...)
	}

	// Check b and set defaults if it is nil
	if b == nil {
		b = matrix.FloatZeros(0, 1)
	}
	if b.Cols() != 1 {
		estr := fmt.Sprintf("'b' must be a matrix with 1 column")
		err = errors.New(estr)
		return
	}
	if b.Rows() != A.Rows() {
		estr := fmt.Sprintf("'b' must have length %d", A.Rows())
		err = errors.New(estr)
		return
	}

	// kktsolver(W) returns a routine for solving 3x3 block KKT system
	//
	//     [ 0   A'  G'*W^{-1} ] [ ux ]   [ bx ]
	//     [ A   0   0         ] [ uy ] = [ by ].
	//     [ G   0   -W'       ] [ uz ]   [ bz ]
	var factor kktFactor
	var kktsolver kktFactor = nil
	if kktfunc, ok := solvers[solvername]; ok {
		// kkt function returns us problem spesific factor function.
		factor, err = kktfunc(G, dims, A, 0)
		// solver is
		kktsolver = func(W *FloatMatrixSet, H, Df *matrix.FloatMatrix) (kktFunc, error) {
			return factor(W, nil, nil)
		}
	} else {
		err = errors.New(fmt.Sprintf("solver '%s' not known", solvername))
		return
	}

	// res() evaluates residual in 5x5 block KKT system
	//
	//     [ vx   ]    [ 0         ]   [ 0   A'  G'  c ] [ ux        ]
	//     [ vy   ]    [ 0         ]   [-A   0   0   b ] [ uy        ]
	//     [ vz   ] += [ W'*us     ] - [-G   0   0   h ] [ W^{-1}*uz ]
	//     [ vtau ]    [ dg*ukappa ]   [-c' -b' -h'  0 ] [ utau/dg   ]
	//
	//           vs += lmbda o (dz + ds)
	//       vkappa += lmbdg * (dtau + dkappa).
	ws3 := matrix.FloatZeros(cdim, 1)
	wz3 := matrix.FloatZeros(cdim, 1)

	//
	res := func(ux, uy, uz, utau, us, ukappa, vx, vy, vz, vtau, vs, vkappa *matrix.FloatMatrix, W *FloatMatrixSet, dg float64, lmbda *matrix.FloatMatrix) (err error) {

		err = nil
		// vx := vx - A'*uy - G'*W^{-1}*uz - c*utau/dg
		Af(uy, vx, -1.0, 1.0, la.OptTrans)
		//fmt.Printf("post-Af vx=\n%v\n", vx)
		blas.Copy(uz, wz3)
		scale(wz3, W, false, true)
		Gf(wz3, vx, -1.0, 1.0, la.OptTrans)
		blas.AxpyFloat(c, vx, -utau.Float()/dg)

		// vy := vy + A*ux - b*utau/dg
		Af(ux, vy, 1.0, 1.0)
		blas.AxpyFloat(b, vy, -utau.Float()/dg)

		// vz := vz + G*ux - h*utau/dg + W'*us
		Gf(ux, vz, 1.0, 1.0)
		blas.AxpyFloat(h, vz, -utau.Float()/dg)
		blas.Copy(us, ws3)
		scale(ws3, W, true, false)
		blas.AxpyFloat(ws3, vz, 1.0)

		// vtau := vtau + c'*ux + b'*uy + h'*W^{-1}*uz + dg*ukappa
		var vtauplus float64 = dg*ukappa.Float() + blas.DotFloat(c, ux) +
			blas.DotFloat(b, uy) + sdot(h, wz3, dims, 0)
		vtau.SetValue(vtau.Float() + vtauplus)

		// vs := vs + lmbda o (uz + us)
		blas.Copy(us, ws3)
		blas.AxpyFloat(uz, ws3, 1.0)
		sprod(ws3, lmbda, dims, 0, &la.SOpt{"diag", "D"})
		blas.AxpyFloat(ws3, vs, 1.0)

		// vkappa += vkappa + lmbdag * (utau + ukappa)
		lscale := lmbda.GetIndex(lmbda.NumElements() - 1)
		var vkplus float64 = lscale * (utau.Float() + ukappa.Float())
		vkappa.SetValue(vkappa.Float() + vkplus)
		return
	}

	resx0 := math.Max(1.0, math.Sqrt(blas.DotFloat(c, c)))
	resy0 := math.Max(1.0, math.Sqrt(blas.DotFloat(b, b)))
	resz0 := math.Max(1.0, snrm2(h, dims, 0))

	// select initial points

	//fmt.Printf("** initial resx0=%.4f, resy0=%.4f, resz0=%.4f \n", resx0, resy0, resz0)

	x := c.Copy()
	blas.ScalFloat(x, 0.0)
	y := b.Copy()
	blas.ScalFloat(y, 0.0)
	s := matrix.FloatZeros(cdim, 1)
	z := matrix.FloatZeros(cdim, 1)
	dx := c.Copy()
	dy := b.Copy()
	ds := matrix.FloatZeros(cdim, 1)
	dz := matrix.FloatZeros(cdim, 1)
	// these are singleton matrix
	dkappa := matrix.FloatValue(0.0)
	dtau := matrix.FloatValue(0.0)

	var W *FloatMatrixSet
	var f kktFunc
	if primalstart == nil || dualstart == nil {
		// Factor
		//
		//     [ 0   A'  G' ]
		//     [ A   0   0  ].
		//     [ G   0  -I  ]
		//
		W = FloatSetNew("d", "di", "v", "beta", "r", "rti")
		dd := dims.At("l")[0]
		mat := matrix.FloatOnes(dd, 1)
		W.Set("d", mat)
		mat = matrix.FloatOnes(dd, 1)
		W.Set("di", mat)
		dq := len(dims.At("q"))
		W.Set("beta", matrix.FloatOnes(dq, 1))

		for _, n := range dims.At("q") {
			vm := matrix.FloatZeros(n, 1)
			vm.SetIndex(0, 1.0)
			W.Append("v", vm)
		}
		for _, n := range dims.At("s") {
			W.Append("r", matrix.FloatIdentity(n))
			W.Append("rti", matrix.FloatIdentity(n))
		}
		f, err = kktsolver(W, nil, nil)
		if err != nil {
			fmt.Printf("kktsolver error: %s\n", err)
			return
		}
	}

	if primalstart == nil {
		// minimize    || G * x - h ||^2
		// subject to  A * x = b
		//
		// by solving
		//
		//     [ 0   A'  G' ]   [ x  ]   [ 0 ]
		//     [ A   0   0  ] * [ dy ] = [ b ].
		//     [ G   0  -I  ]   [ -s ]   [ h ]
		blas.ScalFloat(x, 0.0)
		blas.CopyFloat(y, dy)
		blas.CopyFloat(h, s)
		err = f(x, dy, s)
		if err != nil {
			fmt.Printf("f(x,dy,s): %s\n", err)
			return
		}
		blas.ScalFloat(s, -1.0)
		//fmt.Printf("** initial s:\n%v\n", s)
	} else {
		blas.Copy(primalstart.At("x")[0], x)
		blas.Copy(primalstart.At("s")[0], s)
	}

	// ts = min{ t | s + t*e >= 0 }
	ts, _ := maxStep(s, dims, 0, nil)
	if ts >= 0 && primalstart != nil {
		err = errors.New("initial s is not positive")
		return
	}

	if dualstart == nil {
		// minimize   || z ||^2
		// subject to G'*z + A'*y + c = 0
		//
		// by solving
		//
		//     [ 0   A'  G' ] [ dx ]   [ -c ]
		//     [ A   0   0  ] [ y  ] = [  0 ].
		//     [ G   0  -I  ] [ z  ]   [  0 ]
		blas.Copy(c, dx)
		blas.ScalFloat(dx, -1.0)
		blas.ScalFloat(y, 0.0)
		err = f(dx, y, z)
		if err != nil {
			fmt.Printf("f(dx,y,z): %s\n", err)
			return
		}
	} else {
		if len(dualstart.At("y")) > 0 {
			blas.Copy(dualstart.At("y")[0], y)
		}
		blas.Copy(dualstart.At("z")[0], z)
	}

	// ts = min{ t | z + t*e >= 0 }
	tz, _ := maxStep(z, dims, 0, nil)
	if tz >= 0 && dualstart != nil {
		err = errors.New("initial z is not positive")
		return
	}

	nrms := snrm2(s, dims, 0)
	nrmz := snrm2(z, dims, 0)

	gap := 0.0
	pcost := 0.0
	dcost := 0.0
	relgap := 0.0

	if primalstart == nil && dualstart == nil {
		gap = sdot(s, z, dims, 0)
		pcost = blas.DotFloat(c, x)
		dcost = -blas.DotFloat(b, y) - sdot(h, z, dims, 0)
		if pcost < 0.0 {
			relgap = gap / -pcost
		} else if dcost > 0.0 {
			relgap = gap / dcost
		} else {
			relgap = math.NaN()
		}
		if ts <= 0 && tz < 0 &&
			(gap <= absTolerance || (!math.IsNaN(relgap) && relgap <= relTolerance)) {
			// Constructed initial points happen to be feasible and optimal

			ind := dims.At("l")[0] + dims.Sum("q")
			for _, m := range dims.At("s") {
				symm(s, m, ind)
				symm(z, m, ind)
				ind += m * m
			}

			// rx = A'*y + G'*z + c
			rx := c.Copy()
			Af(y, rx, 1.0, 1.0, la.OptTrans)
			Gf(z, rx, 1.0, 1.0, la.OptTrans)
			resx := math.Sqrt(blas.Dot(rx, rx).Float())
			// ry = b - A*x
			ry := b.Copy()
			Af(x, ry, -1.0, -1.0)
			resy := math.Sqrt(blas.Dot(ry, ry).Float())
			// rz = s + G*x - h
			rz := matrix.FloatZeros(cdim, 1)
			Gf(x, rz, 1.0, 0.0)
			blas.AxpyFloat(s, rz, 1.0)
			blas.AxpyFloat(h, rz, -1.0)
			resz := snrm2(rz, dims, 0)

			pres := math.Max(resy/resy0, resz/resz0)
			dres := resx / resx0
			cx := blas.Dot(c, x).Float()
			by := blas.Dot(b, y).Float()
			hz := sdot(h, z, dims, 0)

			sol.X = x
			sol.Y = y
			sol.S = s
			sol.Z = z
			sol.Result = FloatSetNew("x", "y", "s", "x")
			sol.Result.Append("x", x)
			sol.Result.Append("y", y)
			sol.Result.Append("s", s)
			sol.Result.Append("z", z)
			sol.Status = Optimal
			sol.Gap = gap
			sol.RelativeGap = relgap
			sol.PrimalObjective = cx
			sol.DualObjective = -(by + hz)
			sol.PrimalInfeasibility = pres
			sol.DualInfeasibility = dres
			sol.PrimalSlack = -ts
			sol.DualSlack = -tz

			return
		}

		if ts >= -1e-8*math.Max(nrms, 1.0) {
			a := 1.0 + ts
			is := make([]int, 0)
			// indexes s[:dims['l']]
			is = append(is, matrix.MakeIndexSet(0, dims.At("l")[0], 1)...)
			// indexes s[indq[:-1]]
			is = append(is, indq[:len(indq)-1]...)
			// indexes s[ind:ind+m*m:m+1] (diagonal)
			ind := dims.Sum("l", "q")
			for _, m := range dims.At("s") {
				is = append(is, matrix.MakeIndexSet(ind, ind+m*m, m+1)...)
			}
			for _, k := range is {
				s.SetIndex(k, a+s.GetIndex(k))
			}
			//fmt.Printf("scaled s=\n%v\n", s.ConvertToString())
		}

		if tz >= -1e-8*math.Max(nrmz, 1.0) {
			a := 1.0 + tz
			is := make([]int, 0)
			// indexes z[:dims['l']]
			is = append(is, matrix.MakeIndexSet(0, dims.At("l")[0], 1)...)
			// indexes z[indq[:-1]]
			is = append(is, indq[:len(indq)-1]...)
			// indexes z[ind:ind+m*m:m+1] (diagonal)
			ind := dims.Sum("l", "q")
			for _, m := range dims.At("s") {
				is = append(is, matrix.MakeIndexSet(ind, ind+m*m, m+1)...)
			}
			for _, k := range is {
				z.SetIndex(k, a+z.GetIndex(k))
			}
			//fmt.Printf("scaled z=\n%v\n", z.ConvertToString())
		}
	} else if primalstart == nil && dualstart != nil {
		if ts >= -1e-8*math.Max(nrms, 1.0) {
			a := 1.0 + ts
			is := make([]int, 0)
			is = append(is, matrix.MakeIndexSet(0, dims.At("l")[0], 1)...)
			is = append(is, indq[:len(indq)-1]...)
			ind := dims.Sum("l", "q")
			for _, m := range dims.At("s") {
				is = append(is, matrix.MakeIndexSet(ind, ind+m*m, m+1)...)
			}
			for _, k := range is {
				s.SetIndex(k, a+s.GetIndex(k))
			}
		}
	} else if primalstart != nil && dualstart == nil {
		if tz >= -1e-8*math.Max(nrmz, 1.0) {
			a := 1.0 + tz
			is := make([]int, 0)
			is = append(is, matrix.MakeIndexSet(0, dims.At("l")[0], 1)...)
			is = append(is, indq[:len(indq)-1]...)
			ind := dims.Sum("l", "q")
			for _, m := range dims.At("s") {
				is = append(is, matrix.MakeIndexSet(ind, ind+m*m, m+1)...)
			}
			for _, k := range is {
				z.SetIndex(k, a+z.GetIndex(k))
			}
		}
	}

	tau := matrix.FloatValue(1.0)
	kappa := matrix.FloatValue(1.0)
	wkappa3 := matrix.FloatValue(0.0)

	rx := c.Copy()
	hrx := c.Copy()
	ry := b.Copy()
	hry := b.Copy()
	rz := matrix.FloatZeros(cdim, 1)
	hrz := matrix.FloatZeros(cdim, 1)
	sigs := matrix.FloatZeros(dims.Sum("s"), 1)
	sigz := matrix.FloatZeros(dims.Sum("s"), 1)
	lmbda := matrix.FloatZeros(cdim_diag+1, 1)
	lmbdasq := matrix.FloatZeros(cdim_diag+1, 1)

	gap = sdot(s, z, dims, 0)

	var x1, y1, z1 *matrix.FloatMatrix
	var dg, dgi float64
	var th *matrix.FloatMatrix
	var WS fClosure
	var f3 kktFunc

	//fmt.Printf("preloop x=\n%v\n", x.ConvertToString())
	//fmt.Printf("preloop z=\n%v\n", z.ConvertToString())
	//fmt.Printf("preloop s=\n%v\n", s.ConvertToString())
	for iter := 0; iter < solopts.MaxIter+1; iter++ {
		// hrx = -A'*y - G'*z
		Af(y, hrx, -1.0, 0.0, la.OptTrans)
		Gf(z, hrx, -1.0, 1.0, la.OptTrans)
		hresx := math.Sqrt(blas.DotFloat(hrx, hrx))

		// rx = hrx - c*tau
		//    = -A'*y - G'*z - c*tau
		blas.Copy(hrx, rx)
		err = blas.AxpyFloat(c, rx, -tau.Float())
		resx := math.Sqrt(blas.DotFloat(rx, rx)) / tau.Float()

		// hry = A*x
		Af(x, hry, 1.0, 0.0)
		hresy := math.Sqrt(blas.DotFloat(hry, hry))

		// ry = hry - b*tau
		//    = A*x - b*tau
		blas.Copy(hry, ry)
		blas.AxpyFloat(b, ry, -tau.Float())
		resy := math.Sqrt(blas.DotFloat(ry, ry)) / tau.Float()

		// hrz = s + G*x
		Gf(x, hrz, 1.0, 0.0)
		blas.AxpyFloat(s, hrz, 1.0)
		hresz := snrm2(hrz, dims, 0)

		// rz = hrz - h*tau
		//    = s + G*x - h*tau
		blas.ScalFloat(rz, 0.0)
		blas.AxpyFloat(hrz, rz, 1.0)
		blas.AxpyFloat(h, rz, -tau.Float())
		resz := snrm2(rz, dims, 0) / tau.Float()

		// rt = kappa + c'*x + b'*y + h'*z '
		cx := blas.DotFloat(c, x)
		by := blas.DotFloat(b, y)
		hz := sdot(h, z, dims, 0)
		rt := kappa.Float() + cx + by + hz

		// Statistics for stopping criteria
		pcost = cx / tau.Float()
		dcost = -(by + hz) / tau.Float()

		if pcost < 0.0 {
			relgap = gap / -pcost
		} else if dcost > 0.0 {
			relgap = gap / dcost
		} else {
			relgap = math.NaN()
		}

		pres := math.Max(resy/resy0, resz/resz0)
		dres := resx / resx0
		pinfres := math.NaN()
		if hz+by < 0.0 {
			pinfres = hresx / resx0 / (-hz - by)
		}
		dinfres := math.NaN()
		if cx < 0.0 {
			dinfres = math.Max(hresy/resy0, hresz/resz0) / (-cx)
		}

		if solopts.ShowProgress {
			if iter == 0 {
				// show headers of something
				fmt.Printf("% 10s% 12s% 10s% 8s% 7s % 5s\n",
					"pcost", "dcost", "gap", "pres", "dres", "k/t")
			}
			// show something
			fmt.Printf("%2d: % 8.4e % 8.4e % 4.0e% 7.0e% 7.0e% 7.0e\n",
				iter, pcost, dcost, gap, pres, dres, kappa.GetIndex(0)/tau.GetIndex(0))
		}

		if (pres <= feasTolerance && dres <= feasTolerance &&
			(gap <= absTolerance || (!math.IsNaN(relgap) && relgap <= relTolerance))) ||
			iter == solopts.MaxIter {
			// done
			blas.ScalFloat(x, 1.0/tau.Float())
			blas.ScalFloat(y, 1.0/tau.Float())
			blas.ScalFloat(s, 1.0/tau.Float())
			blas.ScalFloat(z, 1.0/tau.Float())
			ind := dims.Sum("l", "q")
			for _, m := range dims.At("s") {
				symm(s, m, ind)
				symm(z, m, ind)
				ind += m * m
			}
			ts, _ = maxStep(s, dims, 0, nil)
			tz, _ = maxStep(z, dims, 0, nil)
			if iter == solopts.MaxIter {
				// MaxIterations exceeded
				if solopts.ShowProgress {
					fmt.Printf("No solution. Max iterations exceeded\n")
				}
				err = errors.New("No solution. Max iterations exceeded")
				sol.X = x
				sol.Y = y
				sol.S = s
				sol.Z = z
				sol.Result = FloatSetNew("x", "y", "s", "x")
				sol.Result.Append("x", x)
				sol.Result.Append("y", y)
				sol.Result.Append("s", s)
				sol.Result.Append("z", z)
				sol.Status = Unknown
				sol.Gap = gap
				sol.RelativeGap = relgap
				sol.PrimalObjective = pcost
				sol.DualObjective = dcost
				sol.PrimalInfeasibility = pres
				sol.DualInfeasibility = dres
				sol.PrimalSlack = -ts
				sol.DualSlack = -tz
				sol.PrimalResidualCert = pinfres
				sol.DualResidualCert = dinfres
				sol.Iterations = iter
				return
			} else {
				// Optimal
				if solopts.ShowProgress {
					fmt.Printf("Optimal solution.\n")
				}
				err = nil
				sol.X = x
				sol.Y = y
				sol.S = s
				sol.Z = z
				sol.Result = FloatSetNew("x", "y", "s", "x")
				sol.Result.Append("x", x)
				sol.Result.Append("y", y)
				sol.Result.Append("s", s)
				sol.Result.Append("z", z)
				sol.Status = Optimal
				sol.Gap = gap
				sol.RelativeGap = relgap
				sol.PrimalObjective = pcost
				sol.DualObjective = dcost
				sol.PrimalInfeasibility = pres
				sol.DualInfeasibility = dres
				sol.PrimalSlack = -ts
				sol.DualSlack = -tz
				sol.PrimalResidualCert = math.NaN()
				sol.DualResidualCert = math.NaN()
				sol.Iterations = iter
				return
			}
		} else if !math.IsNaN(pinfres) && pinfres <= feasTolerance {
			// Primal Infeasible
			if solopts.ShowProgress {
				fmt.Printf("Primal infeasible.\n")
			}
			err = errors.New("Primal infeasible")
			blas.ScalFloat(y, 1.0/(-hz-by))
			blas.ScalFloat(z, 1.0/(-hz-by))
			sol.X = nil
			sol.Y = nil
			sol.S = nil
			sol.Z = nil
			ind := dims.Sum("l", "q")
			for _, m := range dims.At("s") {
				symm(z, m, ind)
				ind += m * m
			}
			tz, _ = maxStep(z, dims, 0, nil)
			sol.Status = PrimalInfeasible
			sol.Result = FloatSetNew("x", "y", "s", "x")
			sol.Result.Append("x", nil)
			sol.Result.Append("y", nil)
			sol.Result.Append("s", nil)
			sol.Result.Append("z", nil)
			sol.Gap = math.NaN()
			sol.RelativeGap = math.NaN()
			sol.PrimalObjective = math.NaN()
			sol.DualObjective = 1.0
			sol.PrimalInfeasibility = math.NaN()
			sol.DualInfeasibility = math.NaN()
			sol.PrimalSlack = math.NaN()
			sol.DualSlack = -tz
			sol.PrimalResidualCert = pinfres
			sol.DualResidualCert = math.NaN()
			sol.Iterations = iter
			return
		} else if !math.IsNaN(dinfres) && dinfres <= feasTolerance {
			// Dual Infeasible
			if solopts.ShowProgress {
				fmt.Printf("Dual infeasible.\n")
			}
			err = errors.New("Primal infeasible")
			blas.ScalFloat(x, 1.0/(-cx))
			blas.ScalFloat(s, 1.0/(-cx))
			sol.X = nil
			sol.Y = nil
			sol.S = nil
			sol.Z = nil
			ind := dims.Sum("l", "q")
			for _, m := range dims.At("s") {
				symm(s, m, ind)
				ind += m * m
			}
			ts, _ = maxStep(s, dims, 0, nil)
			sol.Status = PrimalInfeasible
			sol.Result = FloatSetNew("x", "y", "s", "x")
			sol.Result.Append("x", nil)
			sol.Result.Append("y", nil)
			sol.Result.Append("s", nil)
			sol.Result.Append("z", nil)
			sol.Gap = math.NaN()
			sol.RelativeGap = math.NaN()
			sol.PrimalObjective = 1.0
			sol.DualObjective = math.NaN()
			sol.PrimalInfeasibility = math.NaN()
			sol.DualInfeasibility = math.NaN()
			sol.PrimalSlack = -ts
			sol.DualSlack = math.NaN()
			sol.PrimalResidualCert = math.NaN()
			sol.DualResidualCert = dinfres
			sol.Iterations = iter
			return
		}

		// Compute initial scaling W:
		//
		//     W * z = W^{-T} * s = lambda
		//     dg * tau = 1/dg * kappa = lambdag.
		if iter == 0 {
			W, err = computeScaling(s, z, lmbda, dims, 0)

			//     dg = sqrt( kappa / tau )
			//     dgi = sqrt( tau / kappa )
			//     lambda_g = sqrt( tau * kappa )
			//
			// lambda_g is stored in the last position of lmbda.

			dg = math.Sqrt(kappa.Float() / tau.Float())
			dgi = math.Sqrt(float64(tau.Float() / kappa.Float()))
			lmbda.SetIndex(-1, math.Sqrt(float64(tau.Float()*kappa.Float())))
		}
		// lmbdasq := lmbda o lmbda
		ssqr(lmbdasq, lmbda, dims, 0)
		lmbdasq.SetIndex(-1, lmbda.GetIndex(-1)*lmbda.GetIndex(-1))

		// f3(x, y, z) solves
		//
		//     [ 0  A'  G'   ] [ ux        ]   [ bx ]
		//     [ A  0   0    ] [ uy        ] = [ by ].
		//     [ G  0  -W'*W ] [ W^{-1}*uz ]   [ bz ]
		//
		// On entry, x, y, z contain bx, by, bz.
		// On exit, they contain ux, uy, uz.
		//
		// Also solve
		//
		//     [ 0   A'  G'    ] [ x1        ]          [ c ]
		//     [-A   0   0     ]*[ y1        ] = -dgi * [ b ].
		//     [-G   0   W'*W  ] [ W^{-1}*z1 ]          [ h ]

		f3, err = kktsolver(W, nil, nil)
		if err != nil {
			fmt.Printf("kktsolver error=%v\n", err)
			return
		}
		if iter == 0 {
			x1 = c.Copy()
			y1 = b.Copy()
			z1 = matrix.FloatZeros(cdim, 1)
		}
		blas.Copy(c, x1)
		blas.ScalFloat(x1, -1.0)
		blas.Copy(b, y1)
		blas.Copy(h, z1)
		err = f3(x1, y1, z1)
		//fmt.Printf("f3 result: x1=\n%v\nf3 result: z1=\n%v\n", x1, z1)
		blas.ScalFloat(x1, dgi)
		blas.ScalFloat(y1, dgi)
		blas.ScalFloat(z1, dgi)

		if err != nil {
			if iter == 0 && primalstart != nil && dualstart != nil {
				err = errors.New("Rank(A) < p or Rank([G; A]) < n")
				return
			} else {
				t_ := 1.0 / tau.Float()
				blas.ScalFloat(x, t_)
				blas.ScalFloat(y, t_)
				blas.ScalFloat(s, t_)
				blas.ScalFloat(z, t_)
				ind := dims.Sum("l", "q")
				for _, m := range dims.At("s") {
					symm(s, m, ind)
					symm(z, m, ind)
					ind += m * m
				}
				ts, _ = maxStep(s, dims, 0, nil)
				tz, _ = maxStep(z, dims, 0, nil)
				err = errors.New("Terminated (singular KKT matrix).")
				sol.X = x
				sol.Y = y
				sol.S = s
				sol.Z = z
				sol.Result = FloatSetNew("x", "y", "s", "x")
				sol.Result.Append("x", x)
				sol.Result.Append("y", y)
				sol.Result.Append("s", s)
				sol.Result.Append("z", z)
				sol.Status = Unknown
				sol.RelativeGap = relgap
				sol.PrimalObjective = pcost
				sol.DualObjective = dcost
				sol.PrimalInfeasibility = pres
				sol.DualInfeasibility = dres
				sol.PrimalSlack = -ts
				sol.DualSlack = -tz
				sol.Iterations = iter
				return
			}
		}

		// f6_no_ir(x, y, z, tau, s, kappa) solves
		//
		//    [ 0         ]   [  0   A'  G'  c ] [ ux        ]    [ bx   ]
		//    [ 0         ]   [ -A   0   0   b ] [ uy        ]    [ by   ]
		//    [ W'*us     ] - [ -G   0   0   h ] [ W^{-1}*uz ] = -[ bz   ]
		//    [ dg*ukappa ]   [ -c' -b' -h'  0 ] [ utau/dg   ]    [ btau ]
		//
		//    lmbda o (uz + us) = -bs
		//    lmbdag * (utau + ukappa) = -bkappa.
		//
		// On entry, x, y, z, tau, s, kappa contain bx, by, bz, btau,
		// bkappa.  On exit, they contain ux, uy, uz, utau, ukappa.

		// th = W^{-T} * h
		if iter == 0 {
			th = matrix.FloatZeros(cdim, 1)
		}

		blas.Copy(h, th)
		scale(th, W, true, true)

		f6_no_ir := func(x, y, z, tau, s, kappa *matrix.FloatMatrix) (err error) {
			// Solve
			//
			// [  0   A'  G'    0   ] [ ux        ]
			// [ -A   0   0     b   ] [ uy        ]
			// [ -G   0   W'*W  h   ] [ W^{-1}*uz ]
			// [ -c' -b' -h'    k/t ] [ utau/dg   ]
			//
			//   [ bx                    ]
			//   [ by                    ]
			// = [ bz - W'*(lmbda o\ bs) ]
			//   [ btau - bkappa/tau     ]
			//
			// us = -lmbda o\ bs - uz
			// ukappa = -bkappa/lmbdag - utau.

			// First solve
			//
			// [ 0  A' G'   ] [ ux        ]   [  bx                    ]
			// [ A  0  0    ] [ uy        ] = [ -by                    ]
			// [ G  0 -W'*W ] [ W^{-1}*uz ]   [ -bz + W'*(lmbda o\ bs) ]

			err = nil
			// y := -y = -by
			blas.ScalFloat(y, -1.0)

			// s := -lmbda o\ s = -lmbda o\ bs
			err = sinv(s, lmbda, dims, 0)
			blas.ScalFloat(s, -1.0)

			// z := -(z + W'*s) = -bz + W'*(lambda o\ bs)
			blas.Copy(s, ws3)
			err = scale(ws3, W, true, false)
			blas.AxpyFloat(ws3, z, 1.0)
			blas.ScalFloat(z, -1.0)

			err = f3(x, y, z)

			// Combine with solution of
			//
			// [ 0   A'  G'    ] [ x1         ]          [ c ]
			// [-A   0   0     ] [ y1         ] = -dgi * [ b ]
			// [-G   0   W'*W  ] [ W^{-1}*dzl ]          [ h ]
			//
			// to satisfy
			//
			// -c'*x - b'*y - h'*W^{-1}*z + dg*tau = btau - bkappa/tau. '

			// , kappa[0] := -kappa[0] / lmbd[-1] = -bkappa / lmbdag
			kap_ := kappa.Float()
			tau_ := tau.Float()
			kap_ = -kap_ / lmbda.GetIndex(-1)
			// tau[0] = tau[0] + kappa[0] / dgi = btau[0] - bkappa / tau
			tau_ = tau_ + kap_/dgi

			//tau[0] = dgi * ( tau[0] + xdot(c,x) + ydot(b,y) +
			//    misc.sdot(th, z, dims) ) / (1.0 + misc.sdot(z1, z1, dims))
			//tau_ = tau_ + blas.DotFloat(c, x) + blas.DotFloat(b, y) + sdot(th, z, dims, 0)
			tau_ += blas.DotFloat(c, x)
			tau_ += blas.DotFloat(b, y)
			tau_ += sdot(th, z, dims, 0)
			tau_ = dgi * tau_ / (1.0 + sdot(z1, z1, dims, 0))
			tau.SetValue(tau_)
			blas.AxpyFloat(x1, x, tau_)
			blas.AxpyFloat(y1, y, tau_)
			blas.AxpyFloat(z1, z, tau_)

			blas.AxpyFloat(z, s, -1.0)
			kap_ = kap_ - tau_
			kappa.SetValue(kap_)
			return
		}

		// f6(x, y, z, tau, s, kappa) solves the same system as f6_no_ir,
		// but applies iterative refinement. Following variables part of f6-closure
		// and ~ 12 is the limit. We wrap them to a structure.

		if iter == 0 {
			if refinement > 0 || solopts.Debug {
				WS.wx = c.Copy()
				WS.wy = b.Copy()
				WS.wz = matrix.FloatZeros(cdim, 1)
				WS.ws = matrix.FloatZeros(cdim, 1)
				WS.wtau = matrix.FloatValue(0.0)
				WS.wkappa = matrix.FloatValue(0.0)
			}
			if refinement > 0 {
				WS.wx2 = c.Copy()
				WS.wy2 = b.Copy()
				WS.wz2 = matrix.FloatZeros(cdim, 1)
				WS.ws2 = matrix.FloatZeros(cdim, 1)
				WS.wtau2 = matrix.FloatValue(0.0)
				WS.wkappa2 = matrix.FloatValue(0.0)
			}
		}

		f6 := func(x, y, z, tau, s, kappa *matrix.FloatMatrix) error {
			var err error = nil
			if refinement > 0 || solopts.Debug {
				blas.Copy(x, WS.wx)
				blas.Copy(y, WS.wy)
				blas.Copy(z, WS.wz)
				blas.Copy(s, WS.ws)
				WS.wtau.SetValue(tau.Float())
				WS.wkappa.SetValue(kappa.Float())
			}
			err = f6_no_ir(x, y, z, tau, s, kappa)
			for i := 0; i < refinement; i++ {
				blas.Copy(WS.wx, WS.wx2)
				blas.Copy(WS.wy, WS.wy2)
				blas.Copy(WS.wz, WS.wz2)
				blas.Copy(WS.ws, WS.ws2)
				WS.wtau2.SetValue(WS.wtau.Float())
				WS.wkappa2.SetValue(WS.wkappa.Float())
				err = res(x, y, z, tau, s, kappa, WS.wx2, WS.wy2, WS.wz2, WS.wtau2, WS.ws2, WS.wkappa2, W, dg, lmbda)
				err = f6_no_ir(WS.wx2, WS.wy2, WS.wz2, WS.wtau2, WS.ws2, WS.wkappa2)
				blas.AxpyFloat(WS.wx2, x, 1.0)
				blas.AxpyFloat(WS.wy2, y, 1.0)
				blas.AxpyFloat(WS.wz2, z, 1.0)
				blas.AxpyFloat(WS.ws2, s, 1.0)
				tau.SetValue(tau.Float() + WS.wtau2.Float())
				kappa.SetValue(kappa.Float() + WS.wkappa2.Float())
			}
			if solopts.Debug {
				res(x, y, z, tau, s, kappa, WS.wx, WS.wy, WS.wz, WS.wtau, WS.ws, WS.wkappa, W, dg, lmbda)
				fmt.Printf("KKT residuals\n")
			}
			return err
		}

		var nrm float64 = blas.Nrm2(lmbda).Float()
		mu := math.Pow(nrm, 2.0) / (1.0 + float64(cdim_diag))
		sigma := 0.0
		var step, tt, tk float64

		for i := 0; i < 2; i++ {
			// Solve
			//
			// [ 0         ]   [  0   A'  G'  c ] [ dx        ]
			// [ 0         ]   [ -A   0   0   b ] [ dy        ]
			// [ W'*ds     ] - [ -G   0   0   h ] [ W^{-1}*dz ]
			// [ dg*dkappa ]   [ -c' -b' -h'  0 ] [ dtau/dg   ]
			//
			//               [ rx   ]
			//               [ ry   ]
			// = - (1-sigma) [ rz   ]
			//               [ rtau ]
			//
			// lmbda o (dz + ds) = -lmbda o lmbda + sigma*mu*e
			// lmbdag * (dtau + dkappa) = - kappa * tau + sigma*mu
			//
			// ds = -lmbdasq if i is 0
			//    = -lmbdasq - dsa o dza + sigma*mu*e if i is 1
			// dkappa = -lambdasq[-1] if i is 0
			//        = -lambdasq[-1] - dkappaa*dtaua + sigma*mu if i is 1.
			ind := dims.Sum("l", "q")
			ind2 := ind
			blas.Copy(lmbdasq, ds, &la.IOpt{"n", ind})
			blas.ScalFloat(ds, 0.0, &la.IOpt{"offset", ind})
			for _, m := range dims.At("s") {
				blas.Copy(lmbdasq, ds, &la.IOpt{"n", m}, &la.IOpt{"offsetx", ind2},
					&la.IOpt{"offsety", ind}, &la.IOpt{"incy", m + 1})
				ind += m * m
				ind2 += m
			}
			// dkappa[0] = lmbdasq[-1]
			dkappa.SetValue(lmbdasq.GetIndex(-1))

			if i == 1 {
				blas.AxpyFloat(ws3, ds, 1.0)
				ind = dims.Sum("l", "q")
				is := make([]int, 0)
				is = append(is, matrix.MakeIndexSet(0, dims.At("l")[0], 1)...)
				is = append(is, indq[:len(indq)-1]...)
				for _, m := range dims.At("s") {
					is = append(is, matrix.MakeIndexSet(ind, ind+m*m, m+1)...)
				}
				for _, k := range is {
					ds.SetIndex(k, ds.GetIndex(k)-sigma*mu)
				}

				dk_ := dkappa.Float()
				wk_ := wkappa3.Float()
				dkappa.SetValue(dk_ + wk_ - sigma*mu)
			}
			// (dx, dy, dz, dtau) = (1-sigma)*(rx, ry, rz, rt)
			blas.Copy(rx, dx)
			blas.ScalFloat(dx, 1.0-sigma)
			blas.Copy(ry, dy)
			blas.ScalFloat(dy, 1.0-sigma)
			blas.Copy(rz, dz)
			blas.ScalFloat(dz, 1.0-sigma)
			// dtau[0] = (1.0 - sigma) * rt
			dtau.SetValue((1.0 - sigma) * rt)

			err = f6(dx, dy, dz, dtau, ds, dkappa)

			// Save ds o dz and dkappa * dtau for Mehrotra correction
			if i == 0 {
				blas.Copy(ds, ws3)
				sprod(ws3, dz, dims, 0)
				wkappa3.SetValue(dtau.Float() * dkappa.Float())
			}

			// Maximum step to boundary.
			//
			// If i is 1, also compute eigenvalue decomposition of the 's'
			// blocks in ds, dz.  The eigenvectors Qs, Qz are stored in
			// dsk, dzk.  The eigenvalues are stored in sigs, sigz.
			var ts, tz float64

			scale2(lmbda, ds, dims, 0, false)
			scale2(lmbda, dz, dims, 0, false)
			if i == 0 {
				ts, _ = maxStep(ds, dims, 0, nil)
				tz, _ = maxStep(dz, dims, 0, nil)
			} else {
				ts, _ = maxStep(ds, dims, 0, sigs)
				tz, _ = maxStep(dz, dims, 0, sigz)
			}
			dt_ := dtau.Float()
			dk_ := dkappa.Float()
			tt = -dt_ / lmbda.GetIndex(-1)
			tk = -dk_ / lmbda.GetIndex(-1)
			t := maxvec([]float64{0.0, ts, tz, tt, tk})
			if t == 0.0 {
				step = 1.0
			} else {
				if i == 0 {
					step = math.Min(1.0, 1.0/t)
				} else {
					step = math.Min(1.0, STEP/t)
				}
			}
			if i == 0 {
				// sigma = (1 - step)^3
				sigma = (1.0 - step) * (1.0 - step) * (1.0 - step)
				//sigma = math.Pow((1.0 - step), EXPON)
			}
		}
		//fmt.Printf("** tau = %.17f, kappa = %.17f\n", tau.Float(), kappa.Float())
		//fmt.Printf("** step = %.17f, sigma = %.17f\n", step, sigma)

		// Update x, y
		blas.AxpyFloat(dx, x, step)
		blas.AxpyFloat(dy, y, step)

		// Replace 'l' and 'q' blocks of ds and dz with the updated
		// variables in the current scaling.
		// Replace 's' blocks of ds and dz with the factors Ls, Lz in a
		// factorization Ls*Ls', Lz*Lz' of the updated variables in the
		// current scaling.
		//
		// ds := e + step*ds for 'l' and 'q' blocks.
		// dz := e + step*dz for 'l' and 'q' blocks.
		blas.ScalFloat(ds, step, &la.IOpt{"n", dims.Sum("l", "q")})
		blas.ScalFloat(dz, step, &la.IOpt{"n", dims.Sum("l", "q")})

		is := make([]int, 0)
		is = append(is, matrix.MakeIndexSet(0, dims.At("l")[0], 1)...)
		is = append(is, indq[:len(indq)-1]...)
		for _, k := range is {
			ds.SetIndex(k, 1.0+ds.GetIndex(k))
			dz.SetIndex(k, 1.0+dz.GetIndex(k))
		}

		// ds := H(lambda)^{-1/2} * ds and dz := H(lambda)^{-1/2} * dz.
		//
		// This replaces the 'l' and 'q' components of ds and dz with the
		// updated variables in the current scaling.
		// The 's' components of ds and dz are replaced with
		//
		// diag(lmbda_k)^{1/2} * Qs * diag(lmbda_k)^{1/2}
		// diag(lmbda_k)^{1/2} * Qz * diag(lmbda_k)^{1/2}
		scale2(lmbda, ds, dims, 0, true)
		scale2(lmbda, dz, dims, 0, true)

		// sigs := ( e + step*sigs ) ./ lambda for 's' blocks.
		// sigz := ( e + step*sigz ) ./ lambda for 's' blocks.
		blas.ScalFloat(sigs, step)
		blas.ScalFloat(sigz, step)
		sigs.Add(1.0)
		sigz.Add(1.0)
		sdimsum := dims.Sum("s")
		qdimsum := dims.Sum("l", "q")
		blas.TbsvFloat(lmbda, sigs, &la.IOpt{"n", sdimsum}, &la.IOpt{"k", 0},
			&la.IOpt{"lda", 1}, &la.IOpt{"offseta", qdimsum})
		blas.TbsvFloat(lmbda, sigz, &la.IOpt{"n", sdimsum}, &la.IOpt{"k", 0},
			&la.IOpt{"lda", 1}, &la.IOpt{"offseta", qdimsum})

		ind2 := qdimsum
		ind3 := 0
		sdims := dims.At("s")
		for k := 0; k < len(sdims); k++ {
			m := sdims[k]
			for i := 0; i < m; i++ {
				a := math.Sqrt(sigs.GetIndex(ind3 + i))
				blas.ScalFloat(ds, a, &la.IOpt{"offset", ind2 + m*i}, &la.IOpt{"n", m})
				a = math.Sqrt(sigz.GetIndex(ind3 + i))
				blas.ScalFloat(dz, a, &la.IOpt{"offset", ind2 + m*i}, &la.IOpt{"n", m})
			}
			ind2 += m * m
			ind3 += m
		}

		err = updateScaling(W, lmbda, ds, dz)

		// For kappa, tau block:
		//
		//     dg := sqrt( (kappa + step*dkappa) / (tau + step*dtau) )
		//         = dg * sqrt( (1 - step*tk) / (1 - step*tt) )
		//
		//     lmbda[-1] := sqrt((tau + step*dtau) * (kappa + step*dkappa))
		//                = lmbda[-1] * sqrt(( 1 - step*tt) * (1 - step*tk))
		dg *= math.Sqrt(1.0-step*tk) / math.Sqrt(1.0-step*tt)
		dgi = 1.0 / dg
		a := math.Sqrt(1.0-step*tk) * math.Sqrt(1.0-step*tt)
		lmbda.SetIndex(-1, a*lmbda.GetIndex(-1))

		// Unscale s, z, tau, kappa (unscaled variables are used only to
		// compute feasibility residuals).
		ind := dims.Sum("l", "q")
		ind2 = ind
		blas.Copy(lmbda, s, &la.IOpt{"n", ind})
		for _, m := range dims.At("s") {
			blas.ScalFloat(s, 0.0, &la.IOpt{"offset", ind2})
			blas.Copy(lmbda, s, &la.IOpt{"offsetx", ind}, &la.IOpt{"offsety", ind2},
				&la.IOpt{"n", m}, &la.IOpt{"incy", m + 1})
			ind += m
			ind2 += m * m
		}
		scale(s, W, true, false)

		ind = dims.Sum("l", "q")
		ind2 = ind
		blas.Copy(lmbda, z, &la.IOpt{"n", ind})
		for _, m := range dims.At("s") {
			blas.ScalFloat(z, 0.0, &la.IOpt{"offset", ind2})
			blas.Copy(lmbda, z, &la.IOpt{"offsetx", ind}, &la.IOpt{"offsety", ind2},
				&la.IOpt{"n", m}, &la.IOpt{"incy", m + 1})
			ind += m
			ind2 += m * m
		}
		scale(z, W, false, true)

		kappa.SetValue(lmbda.GetIndex(-1) / dgi)
		tau.SetValue(lmbda.GetIndex(-1) * dgi)
		g := blas.Nrm2Float(lmbda, &la.IOpt{"n", lmbda.Rows() - 1}) / tau.Float()
		gap = g * g
		//fmt.Printf(" ** kappa=%.10f, tau=%.10f, gap=%.10f\n", kappa.Float(), tau.Float(), gap)

	}
	return
}