func MaxEntComputeInstanceDerivative(
	weights *util.Matrix, instance *data.Instance, instanceDerivative *util.Matrix) {
	// 定义偏导和特征向量
	features := instance.Features

	// 得到维度信息
	numLabels := weights.NumLabels() + 1

	// 计算 z = 1 + exp(sum(w_i * x_i))
	label := instance.Output.Label
	z := ComputeZ(weights, features, label, instanceDerivative)
	inverseZ := float64(1) / z

	for iLabel := 1; iLabel < numLabels; iLabel++ {
		vec := instanceDerivative.GetValues(iLabel - 1)
		if label == 0 || label != iLabel {
			vec.Multiply(inverseZ, 0, features)
		} else {
			vec.Multiply(inverseZ, -1, features)
		}
	}
}
// 计算 z = 1 + sum(exp(sum(w_i * x_i)))
//
// 在temp中保存 exp(sum(w_i * x_i))
func ComputeZ(weights *util.Matrix, features *util.Vector, label int, temp *util.Matrix) float64 {
	result := float64(1.0)
	numLabels := weights.NumLabels() + 1

	for iLabel := 1; iLabel < numLabels; iLabel++ {
		exp := math.Exp(util.VecDotProduct(features, weights.GetValues(iLabel-1)))
		result += exp

		tempVec := temp.GetValues(iLabel - 1)
		if tempVec.IsSparse() {
			for _, k := range features.Keys() {
				tempVec.Set(k, exp)
			}
		} else {
			tempVec.SetAll(exp)
		}
	}
	return result
}
Beispiel #3
0
Datei: gd.go Projekt: hycxa/mlf
func (opt *gdOptimizer) OptimizeWeights(
	weights *util.Matrix, derivative_func ComputeInstanceDerivativeFunc, set data.Dataset) {
	// 偏导数向量
	derivative := weights.Populate()

	// 学习率计算器
	learningRate := NewLearningRate(opt.options)

	// 优化循环
	iterator := set.CreateIterator()
	step := 0
	var learning_rate float64
	convergingSteps := 0
	oldWeights := weights.Populate()
	weightsDelta := weights.Populate()
	instanceDerivative := weights.Populate()
	log.Print("开始梯度递降优化")
	for {
		if opt.options.MaxIterations > 0 && step >= opt.options.MaxIterations {
			break
		}
		step++

		// 每次遍历样本前对偏导数向量清零
		derivative.Clear()

		// 遍历所有样本,计算偏导数向量并累加
		iterator.Start()
		instancesProcessed := 0
		for !iterator.End() {
			instance := iterator.GetInstance()
			derivative_func(weights, instance, instanceDerivative)
			derivative.Increment(instanceDerivative, 1.0/float64(set.NumInstances()))
			iterator.Next()
			instancesProcessed++

			if opt.options.GDBatchSize > 0 && instancesProcessed >= opt.options.GDBatchSize {
				// 添加正则化项
				derivative.Increment(ComputeRegularization(weights, opt.options),
					float64(instancesProcessed)/(float64(set.NumInstances())*float64(set.NumInstances())))

				// 计算特征权重的增量
				delta := opt.GetDeltaX(weights, derivative)

				// 根据学习率更新权重
				learning_rate = learningRate.ComputeLearningRate(delta)
				weights.Increment(delta, learning_rate)

				// 重置
				derivative.Clear()
				instancesProcessed = 0
			}
		}

		if instancesProcessed > 0 {
			// 处理剩余的样本
			derivative.Increment(ComputeRegularization(weights, opt.options),
				float64(instancesProcessed)/(float64(set.NumInstances())*float64(set.NumInstances())))
			delta := opt.GetDeltaX(weights, derivative)
			learning_rate = learningRate.ComputeLearningRate(delta)
			weights.Increment(delta, learning_rate)
		}

		weightsDelta.WeightedSum(weights, oldWeights, 1, -1)
		oldWeights.DeepCopy(weights)
		weightsNorm := weights.Norm()
		weightsDeltaNorm := weightsDelta.Norm()
		log.Printf("#%d |w|=%1.3g |dw|/|w|=%1.3g lr=%1.3g", step, weightsNorm, weightsDeltaNorm/weightsNorm, learning_rate)

		// 判断是否溢出
		if math.IsNaN(weightsNorm) {
			log.Fatal("优化失败:不收敛")
		}

		// 判断是否收敛
		if weightsDelta.Norm()/weights.Norm() < opt.options.ConvergingDeltaWeight {
			convergingSteps++
			if convergingSteps > opt.options.ConvergingSteps {
				log.Printf("收敛")
				break
			}
		}
	}
}
Beispiel #4
0
Datei: gd.go Projekt: hycxa/mlf
// 输入x_k和g_k,返回x需要更新的增量 d_k = - g_k
func (opt *gdOptimizer) GetDeltaX(x, g *util.Matrix) *util.Matrix {
	return g.Opposite()
}
Beispiel #5
0
// 根据正则化方法计算偏导数向量需要添加正则化项
func ComputeRegularization(weights *util.Matrix, options OptimizerOptions) *util.Matrix {
	reg := weights.Populate()

	if options.RegularizationScheme == 1 {
		// L-1正则化
		for iLabel := 0; iLabel < weights.NumLabels(); iLabel++ {
			for _, k := range weights.GetValues(iLabel).Keys() {
				if weights.Get(iLabel, k) > 0 {
					reg.Set(iLabel, k, options.RegularizationFactor)
				} else {
					reg.Set(iLabel, k, -options.RegularizationFactor)
				}
			}
		}
	} else if options.RegularizationScheme == 2 {
		// L-2正则化
		for iLabel := 0; iLabel < weights.NumLabels(); iLabel++ {
			for _, k := range weights.GetValues(iLabel).Keys() {
				reg.Set(iLabel, k, options.RegularizationFactor*float64(2)*weights.Get(iLabel, k))
			}
		}
	}

	return reg
}
Beispiel #6
0
func (opt *lbfgsOptimizer) OptimizeWeights(
	weights *util.Matrix, derivative_func ComputeInstanceDerivativeFunc, set data.Dataset) {

	// 学习率计算器
	learningRate := NewLearningRate(opt.options)

	// 偏导数向量
	derivative := weights.Populate()

	// 优化循环
	step := 0
	convergingSteps := 0
	oldWeights := weights.Populate()
	weightsDelta := weights.Populate()

	// 为各个工作协程开辟临时资源
	numLbfgsThreads := *lbfgs_threads
	if numLbfgsThreads == 0 {
		numLbfgsThreads = runtime.NumCPU()
	}
	workerSet := make([]data.Dataset, numLbfgsThreads)
	workerDerivative := make([]*util.Matrix, numLbfgsThreads)
	workerInstanceDerivative := make([]*util.Matrix, numLbfgsThreads)
	for iWorker := 0; iWorker < numLbfgsThreads; iWorker++ {
		workerBuckets := []data.SkipBucket{
			{true, iWorker},
			{false, 1},
			{true, numLbfgsThreads - 1 - iWorker},
		}
		workerSet[iWorker] = data.NewSkipDataset(set, workerBuckets)
		workerDerivative[iWorker] = weights.Populate()
		workerInstanceDerivative[iWorker] = weights.Populate()
	}

	log.Print("开始L-BFGS优化")
	for {
		if opt.options.MaxIterations > 0 && step >= opt.options.MaxIterations {
			break
		}
		step++

		// 开始工作协程
		workerChannel := make(chan int, numLbfgsThreads)
		for iWorker := 0; iWorker < numLbfgsThreads; iWorker++ {
			go func(iw int) {
				workerDerivative[iw].Clear()
				iterator := workerSet[iw].CreateIterator()
				iterator.Start()
				for !iterator.End() {
					instance := iterator.GetInstance()
					derivative_func(
						weights, instance, workerInstanceDerivative[iw])
					//					log.Print(workerInstanceDerivative[iw].GetValues(0))
					workerDerivative[iw].Increment(
						workerInstanceDerivative[iw], float64(1)/float64(set.NumInstances()))
					iterator.Next()
				}
				workerChannel <- iw
			}(iWorker)
		}

		derivative.Clear()

		// 等待工作协程结束
		for iWorker := 0; iWorker < numLbfgsThreads; iWorker++ {
			<-workerChannel
		}
		for iWorker := 0; iWorker < numLbfgsThreads; iWorker++ {
			derivative.Increment(workerDerivative[iWorker], 1)
		}

		// 添加正则化项
		derivative.Increment(ComputeRegularization(weights, opt.options), 1.0/float64(set.NumInstances()))

		// 计算特征权重的增量
		delta := opt.GetDeltaX(weights, derivative)

		// 根据学习率更新权重
		learning_rate := learningRate.ComputeLearningRate(delta)
		weights.Increment(delta, learning_rate)

		weightsDelta.WeightedSum(weights, oldWeights, 1, -1)
		oldWeights.DeepCopy(weights)
		weightsNorm := weights.Norm()
		weightsDeltaNorm := weightsDelta.Norm()
		log.Printf("#%d |dw|/|w|=%f |w|=%f lr=%1.3g", step, weightsDeltaNorm/weightsNorm, weightsNorm, learning_rate)

		// 判断是否溢出
		if math.IsNaN(weightsNorm) {
			log.Fatal("优化失败:不收敛")
		}

		// 判断是否收敛
		if weightsDeltaNorm/weightsNorm < opt.options.ConvergingDeltaWeight {
			convergingSteps++
			if convergingSteps > opt.options.ConvergingSteps {
				log.Printf("收敛")
				break
			}
		} else {
			convergingSteps = 0
		}
	}
}
Beispiel #7
0
// 输入x_k和g_k,返回x需要更新的增量 d_k = - H_k * g_k
func (opt *lbfgsOptimizer) GetDeltaX(x, g *util.Matrix) *util.Matrix {
	if x.NumLabels() != g.NumLabels() {
		log.Fatal("x和g的维度不一致")
	}

	// 第一次调用时开辟内存
	if opt.k == 0 {
		if x.IsSparse() {
			opt.initStruct(x.NumLabels(), 0, x.IsSparse())
		} else {
			opt.initStruct(x.NumLabels(), x.NumValues(), x.IsSparse())
		}
	}

	currIndex := util.Mod(opt.k, *lbfgs_history_size)

	// 更新x_k
	opt.x[currIndex].DeepCopy(x)

	// 更新g_k
	opt.g[currIndex].DeepCopy(g)

	// 当为第0步时,使用简单的gradient descent
	if opt.k == 0 {
		opt.k++
		return g.Opposite()
	}

	prevIndex := util.Mod(opt.k-1, *lbfgs_history_size)

	// 更新s_(k-1)
	opt.s[prevIndex].WeightedSum(opt.x[currIndex], opt.x[prevIndex], 1, -1)

	// 更新y_(k-1)
	opt.y[prevIndex].WeightedSum(opt.g[currIndex], opt.g[prevIndex], 1, -1)

	// 更新ro_(k-1)
	opt.ro.Set(prevIndex, 1.0/util.MatrixDotProduct(opt.y[prevIndex], opt.s[prevIndex]))

	// 计算两个循环的下限
	lowerBound := opt.k - *lbfgs_history_size
	if lowerBound < 0 {
		lowerBound = 0
	}

	// 第一个循环
	opt.q.DeepCopy(g)
	for i := opt.k - 1; i >= lowerBound; i-- {
		currIndex := util.Mod(i, *lbfgs_history_size)
		opt.alpha.Set(currIndex,
			opt.ro.Get(currIndex)*util.MatrixDotProduct(opt.s[currIndex], opt.q))
		opt.q.Increment(opt.y[currIndex], -opt.alpha.Get(currIndex))
	}

	// 第二个循环
	opt.z.DeepCopy(opt.q)
	for i := lowerBound; i <= opt.k-1; i++ {
		currIndex := util.Mod(i, *lbfgs_history_size)
		opt.beta.Set(currIndex,
			opt.ro.Get(currIndex)*util.MatrixDotProduct(opt.y[currIndex], opt.z))
		opt.z.Increment(opt.s[currIndex],
			opt.alpha.Get(currIndex)-opt.beta.Get(currIndex))
	}

	// 更新k
	opt.k++

	return opt.z.Opposite()
}