Пример #1
0
func (it *SkipIterator) Next() {
	if it.innerIterator.End() {
		return
	}
	it.innerIterator.Skip(it.skips[it.skipsIndex])
	it.skipsIndex = util.Mod(it.skipsIndex+1, len(it.skips))
	if it.skipsIndex == 0 {
		it.innerIterator.Skip(it.skips[it.skipsIndex])
		it.skipsIndex = 1
	}
}
Пример #2
0
// 从另一个数据集创建数据集合
//
// buckets定义了怎样跳过数据集中的数据:
//   1. 当SkipMode为true时,跳过NumInstances个数据
//   2. 当SkipMode为false时,使用NumInstances个数据
// 重复buckets中的所有项,直到set遍历完为止。
func NewSkipDataset(set Dataset, buckets []SkipBucket) *skipDataset {
	skipSet := new(skipDataset)
	skipSet.innerDataset = set

	skip := 0
	for _, iBucket := range buckets {
		for iInst := 0; iInst < iBucket.NumInstances; iInst++ {
			if iBucket.SkipMode {
				skip++
			} else {
				skipSet.skips = append(skipSet.skips, skip)
				skip = 1
			}
		}
	}
	skipSet.skips = append(skipSet.skips, skip)

	skipSet.numInstances = 0
	numSkippedInstances := 0
	skipIndex := 0
	for {
		if numSkippedInstances >= set.NumInstances() {
			break
		}

		numSkippedInstances += skipSet.skips[skipIndex]
		skipSet.numInstances++

		skipIndex = util.Mod(skipIndex+1, len(skipSet.skips))
		if skipIndex == 0 {
			numSkippedInstances += skipSet.skips[skipIndex]
			skipIndex = 1
		}
	}
	skipSet.numInstances--

	return skipSet
}
Пример #3
0
// 输入x_k和g_k,返回x需要更新的增量 d_k = - H_k * g_k
func (opt *lbfgsOptimizer) GetDeltaX(x, g *util.Matrix) *util.Matrix {
	if x.NumLabels() != g.NumLabels() {
		log.Fatal("x和g的维度不一致")
	}

	// 第一次调用时开辟内存
	if opt.k == 0 {
		if x.IsSparse() {
			opt.initStruct(x.NumLabels(), 0, x.IsSparse())
		} else {
			opt.initStruct(x.NumLabels(), x.NumValues(), x.IsSparse())
		}
	}

	currIndex := util.Mod(opt.k, *lbfgs_history_size)

	// 更新x_k
	opt.x[currIndex].DeepCopy(x)

	// 更新g_k
	opt.g[currIndex].DeepCopy(g)

	// 当为第0步时,使用简单的gradient descent
	if opt.k == 0 {
		opt.k++
		return g.Opposite()
	}

	prevIndex := util.Mod(opt.k-1, *lbfgs_history_size)

	// 更新s_(k-1)
	opt.s[prevIndex].WeightedSum(opt.x[currIndex], opt.x[prevIndex], 1, -1)

	// 更新y_(k-1)
	opt.y[prevIndex].WeightedSum(opt.g[currIndex], opt.g[prevIndex], 1, -1)

	// 更新ro_(k-1)
	opt.ro.Set(prevIndex, 1.0/util.MatrixDotProduct(opt.y[prevIndex], opt.s[prevIndex]))

	// 计算两个循环的下限
	lowerBound := opt.k - *lbfgs_history_size
	if lowerBound < 0 {
		lowerBound = 0
	}

	// 第一个循环
	opt.q.DeepCopy(g)
	for i := opt.k - 1; i >= lowerBound; i-- {
		currIndex := util.Mod(i, *lbfgs_history_size)
		opt.alpha.Set(currIndex,
			opt.ro.Get(currIndex)*util.MatrixDotProduct(opt.s[currIndex], opt.q))
		opt.q.Increment(opt.y[currIndex], -opt.alpha.Get(currIndex))
	}

	// 第二个循环
	opt.z.DeepCopy(opt.q)
	for i := lowerBound; i <= opt.k-1; i++ {
		currIndex := util.Mod(i, *lbfgs_history_size)
		opt.beta.Set(currIndex,
			opt.ro.Get(currIndex)*util.MatrixDotProduct(opt.y[currIndex], opt.z))
		opt.z.Increment(opt.s[currIndex],
			opt.alpha.Get(currIndex)-opt.beta.Get(currIndex))
	}

	// 更新k
	opt.k++

	return opt.z.Opposite()
}