func TestComputeInstanceDerivative(t *testing.T) { weights := util.NewMatrix(2, 3) weights.GetValues(0).SetValues([]float64{1, 2, 3}) weights.GetValues(1).SetValues([]float64{3, 4, 5}) de := util.NewMatrix(2, 3) instance := data.Instance{} instance.Features = util.NewVector(3) instance.Features.SetValues([]float64{1, 0.3, 0.4}) instance.Output = &data.InstanceOutput{Label: 0} MaxEntComputeInstanceDerivative(weights, &instance, de) util.ExpectNear(t, 0.0322, de.Get(0, 0), 0.0001) util.ExpectNear(t, 0.0096, de.Get(0, 1), 0.0001) util.ExpectNear(t, 0.0128, de.Get(0, 2), 0.0001) util.ExpectNear(t, 0.9658, de.Get(1, 0), 0.0001) util.ExpectNear(t, 0.2897, de.Get(1, 1), 0.0001) util.ExpectNear(t, 0.3863, de.Get(1, 2), 0.0001) instance.Features.SetValues([]float64{1, 0.6, 0.7}) instance.Output.Label = 1 MaxEntComputeInstanceDerivative(weights, &instance, de) util.ExpectNear(t, -0.9900, de.Get(0, 0), 0.0001) util.ExpectNear(t, -0.5940, de.Get(0, 1), 0.0001) util.ExpectNear(t, -0.6930, de.Get(0, 2), 0.0001) util.ExpectNear(t, 0.9899, de.Get(1, 0), 0.0001) util.ExpectNear(t, 0.5939, de.Get(1, 1), 0.0001) util.ExpectNear(t, 0.6929, de.Get(1, 2), 0.0001) instance.Features.SetValues([]float64{1, 0.4, 0.2}) instance.Output.Label = 2 MaxEntComputeInstanceDerivative(weights, &instance, de) util.ExpectNear(t, 0.0390, de.Get(0, 0), 0.0001) util.ExpectNear(t, 0.0156, de.Get(0, 1), 0.0001) util.ExpectNear(t, 0.0078, de.Get(0, 2), 0.0001) util.ExpectNear(t, -0.0425, de.Get(1, 0), 0.0001) util.ExpectNear(t, -0.0170, de.Get(1, 1), 0.0001) util.ExpectNear(t, -0.0085, de.Get(1, 2), 0.0001) }
func LoadLibSVMDataset(path string, usingSparseRepresentation bool) data.Dataset { log.Print("载入libsvm格式文件", path) content, err := ioutil.ReadFile(path) if err != nil { log.Fatalf("无法打开文件\"%v\",错误提示:%v\n", path, err) } lines := strings.Split(string(content), "\n") minFeature := 10000 maxFeature := 0 labels := make(map[string]int) labelIndex := 0 for _, l := range lines { if l == "" { continue } fields := strings.Split(l, " ") _, ok := labels[fields[0]] if !ok { labels[fields[0]] = labelIndex labelIndex++ } for i := 1; i < len(fields); i++ { if fields[i] == "" { continue } fs := strings.Split(fields[i], ":") fid, _ := strconv.Atoi(fs[0]) if fid > maxFeature { maxFeature = fid } if fid < minFeature { minFeature = fid } } } if minFeature == 0 || maxFeature < 2 { log.Fatal("文件输入格式不合法") } log.Printf("feature 数目 %d", maxFeature) log.Printf("label 数目 %d", len(labels)) set := data.NewInmemDataset() for _, l := range lines { if l == "" { continue } fields := strings.Split(l, " ") instance := new(data.Instance) instance.Output = &data.InstanceOutput{ Label: labels[fields[0]], LabelString: fields[0], } if usingSparseRepresentation { instance.NamedFeatures = make(map[string]float64) } else { instance.Features = util.NewVector(maxFeature + 1) } // 常数项 if !usingSparseRepresentation { instance.Features.Set(0, 1) } for i := 1; i < len(fields); i++ { if fields[i] == "" { continue } fs := strings.Split(fields[i], ":") fid, _ := strconv.Atoi(fs[0]) value, _ := strconv.ParseFloat(fs[1], 64) if usingSparseRepresentation { instance.NamedFeatures[fs[0]] = value } else { instance.Features.Set(fid, value) } } set.AddInstance(instance) } set.Finalize() log.Print("载入数据样本数目 ", set.NumInstances()) return set }
func TestTrain(t *testing.T) { set := data.NewInmemDataset() instance1 := new(data.Instance) instance1.Features = util.NewVector(4) instance1.Features.SetValues([]float64{1, 1, 1, 3}) instance1.Output = &data.InstanceOutput{Label: 0} set.AddInstance(instance1) instance2 := new(data.Instance) instance2.Features = util.NewVector(4) instance2.Features.SetValues([]float64{1, 3, 1, 5}) instance2.Output = &data.InstanceOutput{Label: 0} set.AddInstance(instance2) instance3 := new(data.Instance) instance3.Features = util.NewVector(4) instance3.Features.SetValues([]float64{1, 3, 4, 7}) instance3.Output = &data.InstanceOutput{Label: 1} set.AddInstance(instance3) instance4 := new(data.Instance) instance4.Features = util.NewVector(4) instance4.Features.SetValues([]float64{1, 2, 8, 6}) instance4.Output = &data.InstanceOutput{Label: 1} set.AddInstance(instance4) set.Finalize() gdTrainerOptions := TrainerOptions{ Optimizer: optimizer.OptimizerOptions{ OptimizerName: "gd", RegularizationScheme: 2, RegularizationFactor: 1, LearningRate: 0.1, ConvergingDeltaWeight: 1e-6, ConvergingSteps: 3, MaxIterations: 0, GDBatchSize: 0, // full-bath }, } gdTrainer := NewMaxEntClassifierTrainer(gdTrainerOptions) lbfgsTrainerOptions := TrainerOptions{ Optimizer: optimizer.OptimizerOptions{ OptimizerName: "lbfgs", RegularizationScheme: 2, RegularizationFactor: 1, LearningRate: 1, ConvergingDeltaWeight: 1e-6, ConvergingSteps: 3, MaxIterations: 0, }, } lbfgsTrainer := NewMaxEntClassifierTrainer(lbfgsTrainerOptions) lbfgsTrainer.Train(set) gdTrainer.Train(set).Write("test.mlf") model := LoadModel("test.mlf") util.Expect(t, "0", model.Predict(instance1).Label) util.Expect(t, "0", model.Predict(instance2).Label) util.Expect(t, "1", model.Predict(instance3).Label) util.Expect(t, "1", model.Predict(instance4).Label) }
func TestTrainWithNamedFeatures(t *testing.T) { set := data.NewInmemDataset() instance1 := new(data.Instance) instance1.NamedFeatures = map[string]float64{ "1": 1, "2": 1, "3": 1, "4": 3, } instance1.Output = &data.InstanceOutput{Label: 0} set.AddInstance(instance1) instance2 := new(data.Instance) instance2.NamedFeatures = map[string]float64{ "1": 1, "2": 3, "3": 1, "4": 5, } instance2.Output = &data.InstanceOutput{Label: 0} set.AddInstance(instance2) instance3 := new(data.Instance) instance3.NamedFeatures = map[string]float64{ "1": 1, "2": 3, "3": 4, "4": 7, } instance3.Output = &data.InstanceOutput{Label: 1} set.AddInstance(instance3) instance4 := new(data.Instance) instance4.NamedFeatures = map[string]float64{ "1": 1, "2": 2, "3": 8, "4": 6, } instance4.Output = &data.InstanceOutput{Label: 1} set.AddInstance(instance4) set.Finalize() gdTrainerOptions := TrainerOptions{ Optimizer: optimizer.OptimizerOptions{ OptimizerName: "gd", RegularizationScheme: 2, RegularizationFactor: 1, LearningRate: 0.1, ConvergingDeltaWeight: 1e-6, ConvergingSteps: 3, MaxIterations: 0, GDBatchSize: 0, // full-bath }, } gdTrainer := NewMaxEntClassifierTrainer(gdTrainerOptions) lbfgsTrainerOptions := TrainerOptions{ Optimizer: optimizer.OptimizerOptions{ OptimizerName: "lbfgs", RegularizationScheme: 2, RegularizationFactor: 1, LearningRate: 1, ConvergingDeltaWeight: 1e-6, ConvergingSteps: 3, MaxIterations: 0, }, } lbfgsTrainer := NewMaxEntClassifierTrainer(lbfgsTrainerOptions) lbfgsTrainer.Train(set) gdTrainer.Train(set).Write("test.mlf") model := LoadModel("test.mlf") util.Expect(t, "0", model.Predict(instance1).Label) util.Expect(t, "0", model.Predict(instance2).Label) util.Expect(t, "1", model.Predict(instance3).Label) util.Expect(t, "1", model.Predict(instance4).Label) }