Пример #1
0
/*
   Returns the Nesterov-Todd scaling W at points s and z, and stores the
   scaled variable in lmbda.

       W * z = W^{-T} * s = lmbda.

   W is a MatrixSet with entries:

   - W['dnl']: positive vector
   - W['dnli']: componentwise inverse of W['dnl']
   - W['d']: positive vector
   - W['di']: componentwise inverse of W['d']
   - W['v']: lists of 2nd order cone vectors with unit hyperbolic norms
   - W['beta']: list of positive numbers
   - W['r']: list of square matrices
   - W['rti']: list of square matrices.  rti[k] is the inverse transpose
     of r[k].

*/
func computeScaling(s, z, lmbda *matrix.FloatMatrix, dims *sets.DimensionSet, mnl int) (W *sets.FloatMatrixSet, err error) {
	/*DEBUGGED*/
	err = nil
	W = sets.NewFloatSet("dnl", "dnli", "d", "di", "v", "beta", "r", "rti")

	// For the nonlinear block:
	//
	//     W['dnl'] = sqrt( s[:mnl] ./ z[:mnl] )
	//     W['dnli'] = sqrt( z[:mnl] ./ s[:mnl] )
	//     lambda[:mnl] = sqrt( s[:mnl] .* z[:mnl] )

	var stmp, ztmp, lmd *matrix.FloatMatrix
	if mnl > 0 {
		stmp = matrix.FloatVector(s.FloatArray()[:mnl])
		ztmp = matrix.FloatVector(z.FloatArray()[:mnl])
		//dnl := stmp.Div(ztmp)
		//dnl.Apply(dnl, math.Sqrt)
		dnl := matrix.Sqrt(matrix.Div(stmp, ztmp))
		//dnli := dnl.Copy()
		//dnli.Apply(dnli, func(a float64)float64 { return 1.0/a })
		dnli := matrix.Inv(dnl)
		W.Set("dnl", dnl)
		W.Set("dnli", dnli)
		//lmd = stmp.Mul(ztmp)
		//lmd.Apply(lmd, math.Sqrt)
		lmd = matrix.Sqrt(matrix.Mul(stmp, ztmp))
		lmbda.SetIndexesFromArray(lmd.FloatArray(), matrix.MakeIndexSet(0, mnl, 1)...)
	} else {
		// set for empty matrices
		//W.Set("dnl", matrix.FloatZeros(0, 1))
		//W.Set("dnli", matrix.FloatZeros(0, 1))
		mnl = 0
	}

	// For the 'l' block:
	//
	//     W['d'] = sqrt( sk ./ zk )
	//     W['di'] = sqrt( zk ./ sk )
	//     lambdak = sqrt( sk .* zk )
	//
	// where sk and zk are the first dims['l'] entries of s and z.
	// lambda_k is stored in the first dims['l'] positions of lmbda.

	m := dims.At("l")[0]
	//td := s.FloatArray()
	stmp = matrix.FloatVector(s.FloatArray()[mnl : mnl+m])
	//zd := z.FloatArray()
	ztmp = matrix.FloatVector(z.FloatArray()[mnl : mnl+m])
	//fmt.Printf(".Sqrt()=\n%v\n", matrix.Div(stmp, ztmp).Sqrt().ToString("%.17f"))
	//d := stmp.Div(ztmp)
	//d.Apply(d, math.Sqrt)
	d := matrix.Div(stmp, ztmp).Sqrt()
	//di := d.Copy()
	//di.Apply(di, func(a float64)float64 { return 1.0/a })
	di := matrix.Inv(d)
	//fmt.Printf("d:\n%v\n", d)
	//fmt.Printf("di:\n%v\n", di)
	W.Set("d", d)
	W.Set("di", di)
	//lmd = stmp.Mul(ztmp)
	//lmd.Apply(lmd, math.Sqrt)
	lmd = matrix.Mul(stmp, ztmp).Sqrt()
	// lmd has indexes mnl:mnl+m and length of m
	lmbda.SetIndexesFromArray(lmd.FloatArray(), matrix.MakeIndexSet(mnl, mnl+m, 1)...)
	//fmt.Printf("after l:\n%v\n", lmbda)

	/*
	   For the 'q' blocks, compute lists 'v', 'beta'.

	   The vector v[k] has unit hyperbolic norm:

	       (sqrt( v[k]' * J * v[k] ) = 1 with J = [1, 0; 0, -I]).

	   beta[k] is a positive scalar.

	   The hyperbolic Householder matrix H = 2*v[k]*v[k]' - J
	   defined by v[k] satisfies

	       (beta[k] * H) * zk  = (beta[k] * H) \ sk = lambda_k

	   where sk = s[indq[k]:indq[k+1]], zk = z[indq[k]:indq[k+1]].

	   lambda_k is stored in lmbda[indq[k]:indq[k+1]].
	*/
	ind := mnl + dims.At("l")[0]
	var beta *matrix.FloatMatrix

	for _, k := range dims.At("q") {
		W.Append("v", matrix.FloatZeros(k, 1))
	}
	beta = matrix.FloatZeros(len(dims.At("q")), 1)
	W.Set("beta", beta)
	vset := W.At("v")
	for k, m := range dims.At("q") {
		v := vset[k]
		// a = sqrt( sk' * J * sk )  where J = [1, 0; 0, -I]
		aa := jnrm2(s, m, ind)
		// b = sqrt( zk' * J * zk )
		bb := jnrm2(z, m, ind)
		// beta[k] = ( a / b )**1/2
		beta.SetIndex(k, math.Sqrt(aa/bb))
		// c = sqrt( (sk/a)' * (zk/b) + 1 ) / sqrt(2)
		c0 := blas.DotFloat(s, z, &la_.IOpt{"n", m},
			&la_.IOpt{"offsetx", ind}, &la_.IOpt{"offsety", ind})
		cc := math.Sqrt((c0/aa/bb + 1.0) / 2.0)

		// vk = 1/(2*c) * ( (sk/a) + J * (zk/b) )
		blas.CopyFloat(z, v, &la_.IOpt{"offsetx", ind}, &la_.IOpt{"n", m})
		blas.ScalFloat(v, -1.0/bb)
		v.SetIndex(0, -1.0*v.GetIndex(0))
		blas.AxpyFloat(s, v, 1.0/aa, &la_.IOpt{"offsetx", ind}, &la_.IOpt{"n", m})
		blas.ScalFloat(v, 1.0/2.0/cc)

		// v[k] = 1/sqrt(2*(vk0 + 1)) * ( vk + e ),  e = [1; 0]
		v.SetIndex(0, v.GetIndex(0)+1.0)
		blas.ScalFloat(v, (1.0 / math.Sqrt(2.0*v.GetIndex(0))))
		/*
		   To get the scaled variable lambda_k

		       d =  sk0/a + zk0/b + 2*c
		       lambda_k = [ c;
		                    (c + zk0/b)/d * sk1/a + (c + sk0/a)/d * zk1/b ]
		       lambda_k *= sqrt(a * b)
		*/
		lmbda.SetIndex(ind, cc)
		dd := 2*cc + s.GetIndex(ind)/aa + z.GetIndex(ind)/bb
		blas.CopyFloat(s, lmbda, &la_.IOpt{"offsetx", ind + 1}, &la_.IOpt{"offsety", ind + 1},
			&la_.IOpt{"n", m - 1})
		zz := (cc + z.GetIndex(ind)/bb) / dd / aa
		ss := (cc + s.GetIndex(ind)/aa) / dd / bb
		blas.ScalFloat(lmbda, zz, &la_.IOpt{"offset", ind + 1}, &la_.IOpt{"n", m - 1})
		blas.AxpyFloat(z, lmbda, ss, &la_.IOpt{"offsetx", ind + 1},
			&la_.IOpt{"offsety", ind + 1}, &la_.IOpt{"n", m - 1})
		blas.ScalFloat(lmbda, math.Sqrt(aa*bb), &la_.IOpt{"offset", ind}, &la_.IOpt{"n", m})

		ind += m
		//fmt.Printf("after q[%d]:\n%v\n", k, lmbda)
	}
	/*
	   For the 's' blocks: compute two lists 'r' and 'rti'.

	       r[k]' * sk^{-1} * r[k] = diag(lambda_k)^{-1}
	       r[k]' * zk * r[k] = diag(lambda_k)

	   where sk and zk are the entries inds[k] : inds[k+1] of
	   s and z, reshaped into symmetric matrices.

	   rti[k] is the inverse of r[k]', so

	       rti[k]' * sk * rti[k] = diag(lambda_k)^{-1}
	       rti[k]' * zk^{-1} * rti[k] = diag(lambda_k).

	   The vectors lambda_k are stored in

	       lmbda[ dims['l'] + sum(dims['q']) : -1 ]
	*/
	for _, k := range dims.At("s") {
		W.Append("r", matrix.FloatZeros(k, k))
		W.Append("rti", matrix.FloatZeros(k, k))
	}
	maxs := maxdim(dims.At("s"))
	work := matrix.FloatZeros(maxs*maxs, 1)
	Ls := matrix.FloatZeros(maxs*maxs, 1)
	Lz := matrix.FloatZeros(maxs*maxs, 1)
	ind2 := ind
	for k, m := range dims.At("s") {
		r := W.At("r")[k]
		rti := W.At("rti")[k]

		// Factor sk = Ls*Ls'; store Ls in ds[inds[k]:inds[k+1]].
		blas.CopyFloat(s, Ls, &la_.IOpt{"offsetx", ind2}, &la_.IOpt{"n", m * m})
		lapack.PotrfFloat(Ls, &la_.IOpt{"n", m}, &la_.IOpt{"lda", m})

		// Factor zs[k] = Lz*Lz'; store Lz in dz[inds[k]:inds[k+1]].
		blas.CopyFloat(z, Lz, &la_.IOpt{"offsetx", ind2}, &la_.IOpt{"n", m * m})
		lapack.PotrfFloat(Lz, &la_.IOpt{"n", m}, &la_.IOpt{"lda", m})

		// SVD Lz'*Ls = U*diag(lambda_k)*V'.  Keep U in work.
		for i := 0; i < m; i++ {
			blas.ScalFloat(Ls, 0.0, &la_.IOpt{"offset", i * m}, &la_.IOpt{"n", i})
		}
		blas.CopyFloat(Ls, work, &la_.IOpt{"n", m * m})
		blas.TrmmFloat(Lz, work, 1.0, la_.OptTransA, &la_.IOpt{"lda", m}, &la_.IOpt{"ldb", m},
			&la_.IOpt{"n", m}, &la_.IOpt{"m", m})
		lapack.GesvdFloat(work, lmbda, nil, nil,
			la_.OptJobuO, &la_.IOpt{"lda", m}, &la_.IOpt{"offsetS", ind},
			&la_.IOpt{"n", m}, &la_.IOpt{"m", m})

		// r = Lz^{-T} * U
		blas.CopyFloat(work, r, &la_.IOpt{"n", m * m})
		blas.TrsmFloat(Lz, r, 1.0, la_.OptTransA,
			&la_.IOpt{"lda", m}, &la_.IOpt{"n", m}, &la_.IOpt{"m", m})

		// rti = Lz * U
		blas.CopyFloat(work, rti, &la_.IOpt{"n", m * m})
		blas.TrmmFloat(Lz, rti, 1.0,
			&la_.IOpt{"lda", m}, &la_.IOpt{"n", m}, &la_.IOpt{"m", m})

		// r := r * diag(sqrt(lambda_k))
		// rti := rti * diag(1 ./ sqrt(lambda_k))
		for i := 0; i < m; i++ {
			a := math.Sqrt(lmbda.GetIndex(ind + i))
			blas.ScalFloat(r, a, &la_.IOpt{"offset", m * i}, &la_.IOpt{"n", m})
			blas.ScalFloat(rti, 1.0/a, &la_.IOpt{"offset", m * i}, &la_.IOpt{"n", m})
		}
		ind += m
		ind2 += m * m
	}
	return
}
Пример #2
0
func updateScaling(W *sets.FloatMatrixSet, lmbda, s, z *matrix.FloatMatrix) (err error) {
	err = nil
	var stmp, ztmp *matrix.FloatMatrix
	/*
	   Nonlinear and 'l' blocks

	      d :=  d .* sqrt( s ./ z )
	      lmbda := lmbda .* sqrt(s) .* sqrt(z)
	*/
	mnl := 0
	dnlset := W.At("dnl")
	dnliset := W.At("dnli")
	dset := W.At("d")
	diset := W.At("di")
	beta := W.At("beta")[0]
	if dnlset != nil && dnlset[0].NumElements() > 0 {
		mnl = dnlset[0].NumElements()
	}
	ml := dset[0].NumElements()
	m := mnl + ml
	//fmt.Printf("ml=%d, mnl=%d, m=%d'n", ml, mnl, m)

	stmp = matrix.FloatVector(s.FloatArray()[:m])
	stmp.Apply(math.Sqrt)
	s.SetIndexesFromArray(stmp.FloatArray(), matrix.MakeIndexSet(0, m, 1)...)

	ztmp = matrix.FloatVector(z.FloatArray()[:m])
	ztmp.Apply(math.Sqrt)
	z.SetIndexesFromArray(ztmp.FloatArray(), matrix.MakeIndexSet(0, m, 1)...)

	// d := d .* s .* z
	if len(dnlset) > 0 {
		blas.TbmvFloat(s, dnlset[0], &la_.IOpt{"n", mnl}, &la_.IOpt{"k", 0}, &la_.IOpt{"lda", 1})
		blas.TbsvFloat(z, dnlset[0], &la_.IOpt{"n", mnl}, &la_.IOpt{"k", 0}, &la_.IOpt{"lda", 1})
		//dnliset[0].Apply(dnlset[0], func(a float64)float64 { return 1.0/a})
		//--dnliset[0] = matrix.Inv(dnlset[0])
		matrix.Set(dnliset[0], dnlset[0])
		dnliset[0].Inv()
	}
	blas.TbmvFloat(s, dset[0], &la_.IOpt{"n", ml},
		&la_.IOpt{"k", 0}, &la_.IOpt{"lda", 1}, &la_.IOpt{"offseta", mnl})
	blas.TbsvFloat(z, dset[0], &la_.IOpt{"n", ml},
		&la_.IOpt{"k", 0}, &la_.IOpt{"lda", 1}, &la_.IOpt{"offseta", mnl})
	//diset[0].Apply(dset[0], func(a float64)float64 { return 1.0/a})
	//--diset[0] = matrix.Inv(dset[0])
	matrix.Set(diset[0], dset[0])
	diset[0].Inv()

	// lmbda := s .* z
	blas.CopyFloat(s, lmbda, &la_.IOpt{"n", m})
	blas.TbmvFloat(z, lmbda, &la_.IOpt{"n", m}, &la_.IOpt{"k", 0}, &la_.IOpt{"lda", 1})

	// 'q' blocks.
	// Let st and zt be the new variables in the old scaling:
	//
	//     st = s_k,   zt = z_k
	//
	// and a = sqrt(st' * J * st),  b = sqrt(zt' * J * zt).
	//
	// 1. Compute the hyperbolic Householder transformation 2*q*q' - J
	//    that maps st/a to zt/b.
	//
	//        c = sqrt( (1 + st'*zt/(a*b)) / 2 )
	//        q = (st/a + J*zt/b) / (2*c).
	//
	//    The new scaling point is
	//
	//        wk := betak * sqrt(a/b) * (2*v[k]*v[k]' - J) * q
	//
	//    with betak = W['beta'][k].
	//
	// 3. The scaled variable:
	//
	//        lambda_k0 = sqrt(a*b) * c
	//        lambda_k1 = sqrt(a*b) * ( (2vk*vk' - J) * (-d*q + u/2) )_1
	//
	//    where
	//
	//        u = st/a - J*zt/b
	//        d = ( vk0 * (vk'*u) + u0/2 ) / (2*vk0 *(vk'*q) - q0 + 1).
	//
	// 4. Update scaling
	//
	//        v[k] := wk^1/2
	//              = 1 / sqrt(2*(wk0 + 1)) * (wk + e).
	//        beta[k] *=  sqrt(a/b)

	ind := m
	for k, v := range W.At("v") {
		m = v.NumElements()

		// ln = sqrt( lambda_k' * J * lambda_k ) !! NOT USED!!
		jnrm2(lmbda, m, ind) // ?? NOT USED ??

		// a = sqrt( sk' * J * sk ) = sqrt( st' * J * st )
		// s := s / a = st / a
		aa := jnrm2(s, m, ind)
		blas.ScalFloat(s, 1.0/aa, &la_.IOpt{"n", m}, &la_.IOpt{"offset", ind})

		// b = sqrt( zk' * J * zk ) = sqrt( zt' * J * zt )
		// z := z / a = zt / b
		bb := jnrm2(z, m, ind)
		blas.ScalFloat(z, 1.0/bb, &la_.IOpt{"n", m}, &la_.IOpt{"offset", ind})

		// c = sqrt( ( 1 + (st'*zt) / (a*b) ) / 2 )
		cc := blas.DotFloat(s, z, &la_.IOpt{"offsetx", ind}, &la_.IOpt{"offsety", ind},
			&la_.IOpt{"n", m})
		cc = math.Sqrt((1.0 + cc) / 2.0)

		// vs = v' * st / a
		vs := blas.DotFloat(v, s, &la_.IOpt{"offsety", ind}, &la_.IOpt{"n", m})

		// vz = v' * J *zt / b
		vz := jdot(v, z, m, 0, ind)

		// vq = v' * q where q = (st/a + J * zt/b) / (2 * c)
		vq := (vs + vz) / 2.0 / cc

		// vq = v' * q where q = (st/a + J * zt/b) / (2 * c)
		vu := vs - vz
		// lambda_k0 = c
		lmbda.SetIndex(ind, cc)

		// wk0 = 2 * vk0 * (vk' * q) - q0
		wk0 := 2.0*v.GetIndex(0)*vq - (s.GetIndex(ind)+z.GetIndex(ind))/2.0/cc

		// d = (v[0] * (vk' * u) - u0/2) / (wk0 + 1)
		dd := (v.GetIndex(0)*vu - s.GetIndex(ind)/2.0 + z.GetIndex(ind)/2.0) / (wk0 + 1.0)

		// lambda_k1 = 2 * v_k1 * vk' * (-d*q + u/2) - d*q1 + u1/2
		blas.CopyFloat(v, lmbda, &la_.IOpt{"offsetx", 1}, &la_.IOpt{"offsety", ind + 1},
			&la_.IOpt{"n", m - 1})
		blas.ScalFloat(lmbda, (2.0 * (-dd*vq + 0.5*vu)),
			&la_.IOpt{"offsetx", ind + 1}, &la_.IOpt{"offsety", ind + 1}, &la_.IOpt{"n", m - 1})
		blas.AxpyFloat(s, lmbda, 0.5*(1.0-dd/cc),
			&la_.IOpt{"offsetx", ind + 1}, &la_.IOpt{"offsety", ind + 1}, &la_.IOpt{"n", m - 1})
		blas.AxpyFloat(z, lmbda, 0.5*(1.0+dd/cc),
			&la_.IOpt{"offsetx", ind + 1}, &la_.IOpt{"offsety", ind + 1}, &la_.IOpt{"n", m - 1})

		// Scale so that sqrt(lambda_k' * J * lambda_k) = sqrt(aa*bb).
		blas.ScalFloat(lmbda, math.Sqrt(aa*bb), &la_.IOpt{"offset", ind}, &la_.IOpt{"n", m})

		// v := (2*v*v' - J) * q
		//    = 2 * (v'*q) * v' - (J* st/a + zt/b) / (2*c)
		blas.ScalFloat(v, 2.0*vq)
		v.SetIndex(0, v.GetIndex(0)-(s.GetIndex(ind)/2.0/cc))
		blas.AxpyFloat(s, v, 0.5/cc, &la_.IOpt{"offsetx", ind + 1}, &la_.IOpt{"offsety", 1},
			&la_.IOpt{"n", m - 1})
		blas.AxpyFloat(z, v, -0.5/cc, &la_.IOpt{"offsetx", ind}, &la_.IOpt{"n", m})

		// v := v^{1/2} = 1/sqrt(2 * (v0 + 1)) * (v + e)
		v0 := v.GetIndex(0) + 1.0
		v.SetIndex(0, v0)
		blas.ScalFloat(v, 1.0/math.Sqrt(2.0*v0))

		// beta[k] *= ( aa / bb )**1/2
		bk := beta.GetIndex(k)
		beta.SetIndex(k, bk*math.Sqrt(aa/bb))

		ind += m
	}
	//fmt.Printf("-- end of q:\nz=\n%v\nlmbda=\n%v\n", z.ConvertToString(), lmbda.ConvertToString())
	//fmt.Printf("beta=\n%v\n", beta.ConvertToString())

	// 's' blocks
	//
	// Let st, zt be the updated variables in the old scaling:
	//
	//     st = Ls * Ls', zt = Lz * Lz'.
	//
	// where Ls and Lz are the 's' components of s, z.
	//
	// 1.  SVD Lz'*Ls = Uk * lambda_k^+ * Vk'.
	//
	// 2.  New scaling is
	//
	//         r[k] := r[k] * Ls * Vk * diag(lambda_k^+)^{-1/2}
	//         rti[k] := r[k] * Lz * Uk * diag(lambda_k^+)^{-1/2}.
	//

	maxr := 0
	for _, m := range W.At("r") {
		if m.Rows() > maxr {
			maxr = m.Rows()
		}
	}
	work := matrix.FloatZeros(maxr*maxr, 1)
	vlensum := 0
	for _, m := range W.At("v") {
		vlensum += m.NumElements()
	}
	ind = mnl + ml + vlensum
	ind2 := ind
	ind3 := 0
	rset := W.At("r")
	rtiset := W.At("rti")

	for k, _ := range rset {
		r := rset[k]
		rti := rtiset[k]
		m = r.Rows()
		//fmt.Printf("m=%d, r=\n%v\nrti=\n%v\n", m, r.ConvertToString(), rti.ConvertToString())

		// r := r*sk = r*Ls
		blas.GemmFloat(r, s, work, 1.0, 0.0, &la_.IOpt{"m", m}, &la_.IOpt{"n", m},
			&la_.IOpt{"k", m}, &la_.IOpt{"ldb", m}, &la_.IOpt{"ldc", m},
			&la_.IOpt{"offsetb", ind2})
		//fmt.Printf("1 work=\n%v\n", work.ConvertToString())
		blas.CopyFloat(work, r, &la_.IOpt{"n", m * m})

		// rti := rti*zk = rti*Lz
		blas.GemmFloat(rti, z, work, 1.0, 0.0, &la_.IOpt{"m", m}, &la_.IOpt{"n", m},
			&la_.IOpt{"k", m}, &la_.IOpt{"ldb", m}, &la_.IOpt{"ldc", m},
			&la_.IOpt{"offsetb", ind2})
		//fmt.Printf("2 work=\n%v\n", work.ConvertToString())
		blas.CopyFloat(work, rti, &la_.IOpt{"n", m * m})

		// SVD Lz'*Ls = U * lmbds^+ * V'; store U in sk and V' in zk. '
		blas.GemmFloat(z, s, work, 1.0, 0.0, la_.OptTransA, &la_.IOpt{"m", m},
			&la_.IOpt{"n", m}, &la_.IOpt{"k", m}, &la_.IOpt{"lda", m}, &la_.IOpt{"ldb", m},
			&la_.IOpt{"ldc", m}, &la_.IOpt{"offseta", ind2}, &la_.IOpt{"offsetb", ind2})
		//fmt.Printf("3 work=\n%v\n", work.ConvertToString())

		// U = s, Vt = z
		lapack.GesvdFloat(work, lmbda, s, z, la_.OptJobuAll, la_.OptJobvtAll,
			&la_.IOpt{"m", m}, &la_.IOpt{"n", m}, &la_.IOpt{"lda", m}, &la_.IOpt{"ldu", m},
			&la_.IOpt{"ldvt", m}, &la_.IOpt{"offsets", ind}, &la_.IOpt{"offsetu", ind2},
			&la_.IOpt{"offsetvt", ind2})

		// r := r*V
		blas.GemmFloat(r, z, work, 1.0, 0.0, la_.OptTransB, &la_.IOpt{"m", m},
			&la_.IOpt{"n", m}, &la_.IOpt{"k", m}, &la_.IOpt{"ldb", m}, &la_.IOpt{"ldc", m},
			&la_.IOpt{"offsetb", ind2})
		//fmt.Printf("4 work=\n%v\n", work.ConvertToString())
		blas.CopyFloat(work, r, &la_.IOpt{"n", m * m})

		// rti := rti*U
		blas.GemmFloat(rti, s, work, 1.0, 0.0, &la_.IOpt{"m", m}, &la_.IOpt{"n", m},
			&la_.IOpt{"k", m}, &la_.IOpt{"ldb", m}, &la_.IOpt{"ldc", m},
			&la_.IOpt{"offsetb", ind2})
		//fmt.Printf("5 work=\n%v\n", work.ConvertToString())
		blas.CopyFloat(work, rti, &la_.IOpt{"n", m * m})

		for i := 0; i < m; i++ {
			a := 1.0 / math.Sqrt(lmbda.GetIndex(ind+i))
			blas.ScalFloat(r, a, &la_.IOpt{"n", m}, &la_.IOpt{"offset", m * i})
			blas.ScalFloat(rti, a, &la_.IOpt{"n", m}, &la_.IOpt{"offset", m * i})
		}
		ind += m
		ind2 += m * m
		ind3 += m // !!NOT USED: ind3!!
	}

	//fmt.Printf("-- end of s:\nz=\n%v\nlmbda=\n%v\n", z.ConvertToString(), lmbda.ConvertToString())

	return

}