func (trainer *MaxEntClassifierTrainer) Train(set data.Dataset) Model { // 检查训练数据是否是分类问题 if !set.GetOptions().IsSupervisedLearning { log.Fatal("训练数据不是分类问题数据") } // 建立新的优化器 optimizer := optimizer.NewOptimizer(trainer.options.Optimizer) // 建立特征权重向量 featureDimension := set.GetOptions().FeatureDimension numLabels := set.GetOptions().NumLabels var weights *util.Matrix if set.GetOptions().FeatureIsSparse { weights = util.NewSparseMatrix(numLabels) } else { weights = util.NewMatrix(numLabels, featureDimension) } // 得到优化的特征权重向量 optimizer.OptimizeWeights(weights, MaxEntComputeInstanceDerivative, set) classifier := new(MaxEntClassifier) classifier.Weights = weights classifier.NumLabels = numLabels classifier.FeatureDimension = featureDimension classifier.FeatureDictionary = set.GetFeatureDictionary() classifier.LabelDictionary = set.GetLabelDictionary() return classifier }
// 初始化优化结构体 // 为结构体中的向量分配新的内存,向量的长度可能发生变化。 func (opt *lbfgsOptimizer) initStruct(labels, features int, isSparse bool) { opt.labels = labels opt.x = make([]*util.Matrix, *lbfgs_history_size) opt.g = make([]*util.Matrix, *lbfgs_history_size) opt.s = make([]*util.Matrix, *lbfgs_history_size) opt.y = make([]*util.Matrix, *lbfgs_history_size) opt.ro = util.NewVector(*lbfgs_history_size) opt.alpha = util.NewVector(*lbfgs_history_size) opt.beta = util.NewVector(*lbfgs_history_size) if !isSparse { opt.q = util.NewMatrix(labels, features) opt.z = util.NewMatrix(labels, features) for i := 0; i < *lbfgs_history_size; i++ { opt.x[i] = util.NewMatrix(labels, features) opt.g[i] = util.NewMatrix(labels, features) opt.s[i] = util.NewMatrix(labels, features) opt.y[i] = util.NewMatrix(labels, features) } } else { opt.q = util.NewSparseMatrix(labels) opt.z = util.NewSparseMatrix(labels) for i := 0; i < *lbfgs_history_size; i++ { opt.x[i] = util.NewSparseMatrix(labels) opt.g[i] = util.NewSparseMatrix(labels) opt.s[i] = util.NewSparseMatrix(labels) opt.y[i] = util.NewSparseMatrix(labels) } } }
func TestComputeInstanceDerivative(t *testing.T) { weights := util.NewMatrix(2, 3) weights.GetValues(0).SetValues([]float64{1, 2, 3}) weights.GetValues(1).SetValues([]float64{3, 4, 5}) de := util.NewMatrix(2, 3) instance := data.Instance{} instance.Features = util.NewVector(3) instance.Features.SetValues([]float64{1, 0.3, 0.4}) instance.Output = &data.InstanceOutput{Label: 0} MaxEntComputeInstanceDerivative(weights, &instance, de) util.ExpectNear(t, 0.0322, de.Get(0, 0), 0.0001) util.ExpectNear(t, 0.0096, de.Get(0, 1), 0.0001) util.ExpectNear(t, 0.0128, de.Get(0, 2), 0.0001) util.ExpectNear(t, 0.9658, de.Get(1, 0), 0.0001) util.ExpectNear(t, 0.2897, de.Get(1, 1), 0.0001) util.ExpectNear(t, 0.3863, de.Get(1, 2), 0.0001) instance.Features.SetValues([]float64{1, 0.6, 0.7}) instance.Output.Label = 1 MaxEntComputeInstanceDerivative(weights, &instance, de) util.ExpectNear(t, -0.9900, de.Get(0, 0), 0.0001) util.ExpectNear(t, -0.5940, de.Get(0, 1), 0.0001) util.ExpectNear(t, -0.6930, de.Get(0, 2), 0.0001) util.ExpectNear(t, 0.9899, de.Get(1, 0), 0.0001) util.ExpectNear(t, 0.5939, de.Get(1, 1), 0.0001) util.ExpectNear(t, 0.6929, de.Get(1, 2), 0.0001) instance.Features.SetValues([]float64{1, 0.4, 0.2}) instance.Output.Label = 2 MaxEntComputeInstanceDerivative(weights, &instance, de) util.ExpectNear(t, 0.0390, de.Get(0, 0), 0.0001) util.ExpectNear(t, 0.0156, de.Get(0, 1), 0.0001) util.ExpectNear(t, 0.0078, de.Get(0, 2), 0.0001) util.ExpectNear(t, -0.0425, de.Get(1, 0), 0.0001) util.ExpectNear(t, -0.0170, de.Get(1, 1), 0.0001) util.ExpectNear(t, -0.0085, de.Get(1, 2), 0.0001) }
func TestLbfgsOptimizer(t *testing.T) { opt := NewLbfgsOptimizer(OptimizerOptions{}) x := util.NewMatrix(1, 2) g := util.NewMatrix(1, 2) x.GetValues(0).SetValues([]float64{1, 0.3}) k := 0 for { g.GetValues(0).SetValues([]float64{4 * x.Get(0, 0) * x.Get(0, 0) * x.Get(0, 0), 4 * x.Get(0, 1) * x.Get(0, 1) * x.Get(0, 1)}) delta := opt.GetDeltaX(x, g) x.Increment(delta, 1) k++ if delta.Norm() < 0.0001 { break } } fmt.Println("==== LBFGS优化完成 ====") fmt.Println("循环数", k) fmt.Println("x = ", x) }
func TestGdOptimizer(t *testing.T) { opt := NewGdOptimizer(OptimizerOptions{}) x := util.NewMatrix(1, 2) g := util.NewMatrix(1, 2) x.GetValues(0).SetValues([]float64{1, 0.3}) learningRate := float64(0.2) k := 0 for { g.GetValues(0).SetValues([]float64{4 * x.Get(0, 0) * x.Get(0, 0) * x.Get(0, 0), 4 * x.Get(0, 1) * x.Get(0, 1) * x.Get(0, 1)}) delta := opt.GetDeltaX(x, g) x.Increment(delta, learningRate) if g.Norm() < 0.0001 { break } k++ } fmt.Println("==== GD优化完成 ====") fmt.Println("循环数", k) fmt.Println("x = ", x) }
func (rbm *RBM) Train(set data.Dataset) { featureDimension := set.GetOptions().FeatureDimension visibleDim := featureDimension hiddenDim := rbm.options.NumHiddenUnits + 1 log.Printf("#visible = %d, #hidden = %d", featureDimension-1, hiddenDim-1) // 随机化 weights rbm.lock.Lock() rbm.lock.weights = util.NewMatrix(hiddenDim, visibleDim) oldWeights := util.NewMatrix(hiddenDim, visibleDim) batchDerivative := util.NewMatrix(hiddenDim, visibleDim) for i := 0; i < hiddenDim; i++ { for j := 0; j < visibleDim; j++ { value := (rand.Float64()*2 - 1) * 0.01 rbm.lock.weights.Set(i, j, value) } } rbm.lock.Unlock() // 启动工作协程 ch := make(chan *data.Instance, rbm.options.Worker) out := make(chan *util.Matrix, rbm.options.Worker) for iWorker := 0; iWorker < rbm.options.Worker; iWorker++ { go rbm.derivativeWorker(ch, out, visibleDim, hiddenDim) } iteration := 0 delta := 1.0 for (rbm.options.MaxIter == 0 || iteration < rbm.options.MaxIter) && (rbm.options.Delta == 0 || delta > rbm.options.Delta) { iteration++ go rbm.feeder(set, ch) iBatch := 0 batchDerivative.Clear() numInstances := set.NumInstances() for it := 0; it < numInstances; it++ { // 乱序读入 derivative := <-out batchDerivative.Increment(derivative, rbm.options.LearningRate) iBatch++ if iBatch == rbm.options.BatchSize || it == numInstances-1 { rbm.lock.Lock() rbm.lock.weights.Increment(batchDerivative, 1.0) rbm.lock.Unlock() iBatch = 0 batchDerivative.Clear() } } // 统计delta和|weight| rbm.lock.RLock() weightsNorm := rbm.lock.weights.Norm() batchDerivative.DeepCopy(rbm.lock.weights) batchDerivative.Increment(oldWeights, -1.0) derivativeNorm := batchDerivative.Norm() delta = derivativeNorm / weightsNorm log.Printf("iter = %d, delta = %f, |weight| = %f", iteration, delta, weightsNorm) oldWeights.DeepCopy(rbm.lock.weights) rbm.lock.RUnlock() } }
func (rbm *RBM) derivativeWorker(ch chan *data.Instance, out chan *util.Matrix, visibleDim int, hiddenDim int) { // 可见单元 visibleUnits := util.NewVector(visibleDim) visibleUnits.Set(0, 1.0) // 不可见单元 hiddenUnitsProb := util.NewVector(hiddenDim) hiddenUnitsBinary := util.NewVector(hiddenDim) hiddenUnitsProb.Set(0, 1.0) hiddenUnitsBinary.Set(0, 1.0) for { instance := <-ch derivative := util.NewMatrix(hiddenDim, visibleDim) // 设置 visible units 的初始值 for j := 1; j < visibleDim; j++ { visibleUnits.Set(j, instance.Features.Get(j)) } rbm.lock.RLock() // 更新 hidden units for i := 1; i < hiddenDim; i++ { prob := rbm.logistic(visibleUnits, i, true) hiddenUnitsProb.Set(i, prob) if rbm.options.UseBinaryHiddenUnits { hiddenUnitsBinary.Set(i, rbm.bernoulli(prob)) } } // 计算 positive statistics for i := 0; i < hiddenDim; i++ { for j := 0; j < visibleDim; j++ { derivative.Set(i, j, visibleUnits.Get(j)*hiddenUnitsProb.Get(i)) } } // 计算CD_n for nCD := 0; nCD < rbm.options.NumCD; nCD++ { for j := 1; j < visibleDim; j++ { var prob float64 if rbm.options.UseBinaryHiddenUnits { prob = rbm.logistic(hiddenUnitsBinary, j, false) } else { prob = rbm.logistic(hiddenUnitsProb, j, false) } visibleUnits.Set(j, prob) } for i := 1; i < hiddenDim; i++ { prob := rbm.logistic(visibleUnits, i, true) hiddenUnitsProb.Set(i, prob) if rbm.options.UseBinaryHiddenUnits { hiddenUnitsBinary.Set(i, rbm.bernoulli(prob)) } } } rbm.lock.RUnlock() // 计算 negative statistics for i := 0; i < hiddenDim; i++ { for j := 0; j < visibleDim; j++ { old := derivative.Get(i, j) derivative.Set(i, j, old-visibleUnits.Get(j)*hiddenUnitsProb.Get(i)) } } out <- derivative } }