func TestDictionaryJSON(t *testing.T) { dict := NewDictionary(1) dict.GetIdFromName("f1") dict.GetIdFromName("f2") dict.GetIdFromName("f3") dict.GetIdFromName("f4") dictJson, _ := json.Marshal(dict) var newDict Dictionary json.Unmarshal(dictJson, &newDict) util.Expect(t, "1", newDict.GetIdFromName("f1")) util.Expect(t, "2", newDict.GetIdFromName("f2")) util.Expect(t, "3", newDict.GetIdFromName("f3")) util.Expect(t, "4", newDict.GetIdFromName("f4")) util.Expect(t, "f1", newDict.GetNameFromId(1)) util.Expect(t, "f2", newDict.GetNameFromId(2)) util.Expect(t, "f3", newDict.GetNameFromId(3)) util.Expect(t, "f4", newDict.GetNameFromId(4)) }
func TestSkipIterator(t *testing.T) { set := NewInmemDataset() instance1 := new(Instance) instance1.Features = util.NewVector(3) instance1.Features.SetValues([]float64{1, 2, 3}) util.Expect(t, "true", set.AddInstance(instance1)) instance2 := new(Instance) instance2.Features = util.NewVector(3) instance2.Features.SetValues([]float64{3, 4, 5}) util.Expect(t, "true", set.AddInstance(instance2)) instance3 := new(Instance) instance3.Features = util.NewVector(3) instance3.Features.SetValues([]float64{7, 8, 9}) util.Expect(t, "true", set.AddInstance(instance3)) instance4 := new(Instance) instance4.Features = util.NewVector(3) instance4.Features.SetValues([]float64{30, 40, 50}) util.Expect(t, "true", set.AddInstance(instance4)) instance5 := new(Instance) instance5.Features = util.NewVector(3) instance5.Features.SetValues([]float64{70, 80, 90}) util.Expect(t, "true", set.AddInstance(instance5)) instance6 := new(Instance) instance6.Features = util.NewVector(3) instance6.Features.SetValues([]float64{31, 41, 51}) util.Expect(t, "true", set.AddInstance(instance6)) set.Finalize() iter := NewSkipIterator(set, []int{0, 4}) iter.Start() util.Expect(t, "false", iter.End()) util.Expect(t, "1", iter.GetInstance().Features.Get(0)) util.Expect(t, "2", iter.GetInstance().Features.Get(1)) util.Expect(t, "3", iter.GetInstance().Features.Get(2)) iter.Next() util.Expect(t, "false", iter.End()) util.Expect(t, "70", iter.GetInstance().Features.Get(0)) util.Expect(t, "80", iter.GetInstance().Features.Get(1)) util.Expect(t, "90", iter.GetInstance().Features.Get(2)) iter.Next() util.Expect(t, "true", iter.End()) iter = NewSkipIterator(set, []int{3, 1}) iter.Start() util.Expect(t, "false", iter.End()) util.Expect(t, "30", iter.GetInstance().Features.Get(0)) util.Expect(t, "40", iter.GetInstance().Features.Get(1)) util.Expect(t, "50", iter.GetInstance().Features.Get(2)) iter.Next() util.Expect(t, "true", iter.End()) iter = NewSkipIterator(set, []int{1, 2, 1}) iter.Start() util.Expect(t, "false", iter.End()) util.Expect(t, "3", iter.GetInstance().Features.Get(0)) util.Expect(t, "4", iter.GetInstance().Features.Get(1)) util.Expect(t, "5", iter.GetInstance().Features.Get(2)) iter.Next() util.Expect(t, "false", iter.End()) util.Expect(t, "30", iter.GetInstance().Features.Get(0)) util.Expect(t, "40", iter.GetInstance().Features.Get(1)) util.Expect(t, "50", iter.GetInstance().Features.Get(2)) iter.Next() util.Expect(t, "false", iter.End()) util.Expect(t, "31", iter.GetInstance().Features.Get(0)) util.Expect(t, "41", iter.GetInstance().Features.Get(1)) util.Expect(t, "51", iter.GetInstance().Features.Get(2)) iter.Next() util.Expect(t, "true", iter.End()) }
func TestSkipDataset(t *testing.T) { set := NewInmemDataset() instance1 := new(Instance) instance1.Features = util.NewVector(3) instance1.Features.SetValues([]float64{1, 2, 3}) util.Expect(t, "true", set.AddInstance(instance1)) instance2 := new(Instance) instance2.Features = util.NewVector(3) instance2.Features.SetValues([]float64{3, 4, 5}) util.Expect(t, "true", set.AddInstance(instance2)) instance3 := new(Instance) instance3.Features = util.NewVector(3) instance3.Features.SetValues([]float64{7, 8, 9}) util.Expect(t, "true", set.AddInstance(instance3)) instance4 := new(Instance) instance4.Features = util.NewVector(3) instance4.Features.SetValues([]float64{30, 40, 50}) util.Expect(t, "true", set.AddInstance(instance4)) instance5 := new(Instance) instance5.Features = util.NewVector(3) instance5.Features.SetValues([]float64{70, 80, 90}) util.Expect(t, "true", set.AddInstance(instance5)) instance6 := new(Instance) instance6.Features = util.NewVector(3) instance6.Features.SetValues([]float64{31, 41, 51}) util.Expect(t, "true", set.AddInstance(instance6)) set.Finalize() buckets := []SkipBucket{} buckets = append(buckets, SkipBucket{true, 0}) buckets = append(buckets, SkipBucket{false, 1}) buckets = append(buckets, SkipBucket{true, 3}) ss := NewSkipDataset(set, buckets) util.Expect(t, "2", ss.NumInstances()) iter := ss.CreateIterator() iter.Start() util.Expect(t, "false", iter.End()) util.Expect(t, "1", iter.GetInstance().Features.Get(0)) util.Expect(t, "2", iter.GetInstance().Features.Get(1)) util.Expect(t, "3", iter.GetInstance().Features.Get(2)) iter.Next() util.Expect(t, "false", iter.End()) util.Expect(t, "70", iter.GetInstance().Features.Get(0)) util.Expect(t, "80", iter.GetInstance().Features.Get(1)) util.Expect(t, "90", iter.GetInstance().Features.Get(2)) iter.Next() util.Expect(t, "true", iter.End()) buckets = []SkipBucket{} buckets = append(buckets, SkipBucket{true, 3}) buckets = append(buckets, SkipBucket{false, 1}) ss = NewSkipDataset(set, buckets) util.Expect(t, "1", ss.NumInstances()) iter = ss.CreateIterator() iter.Start() util.Expect(t, "false", iter.End()) util.Expect(t, "30", iter.GetInstance().Features.Get(0)) util.Expect(t, "40", iter.GetInstance().Features.Get(1)) util.Expect(t, "50", iter.GetInstance().Features.Get(2)) iter.Next() util.Expect(t, "true", iter.End()) buckets = []SkipBucket{} buckets = append(buckets, SkipBucket{true, 1}) buckets = append(buckets, SkipBucket{false, 1}) buckets = append(buckets, SkipBucket{true, 1}) buckets = append(buckets, SkipBucket{false, 1}) ss = NewSkipDataset(set, buckets) util.Expect(t, "3", ss.NumInstances()) iter = ss.CreateIterator() iter.Start() util.Expect(t, "false", iter.End()) util.Expect(t, "3", iter.GetInstance().Features.Get(0)) util.Expect(t, "4", iter.GetInstance().Features.Get(1)) util.Expect(t, "5", iter.GetInstance().Features.Get(2)) iter.Next() util.Expect(t, "false", iter.End()) util.Expect(t, "30", iter.GetInstance().Features.Get(0)) util.Expect(t, "40", iter.GetInstance().Features.Get(1)) util.Expect(t, "50", iter.GetInstance().Features.Get(2)) iter.Next() util.Expect(t, "false", iter.End()) util.Expect(t, "31", iter.GetInstance().Features.Get(0)) util.Expect(t, "41", iter.GetInstance().Features.Get(1)) util.Expect(t, "51", iter.GetInstance().Features.Get(2)) iter.Next() util.Expect(t, "true", iter.End()) }
func TestDictionary(t *testing.T) { dict := NewDictionary(1) util.Expect(t, "1", dict.GetIdFromName("feature1")) util.Expect(t, "2", dict.GetIdFromName("feature2")) util.Expect(t, "3", dict.GetIdFromName("feature3")) util.Expect(t, "2", dict.GetIdFromName("feature2")) util.Expect(t, "3", dict.GetIdFromName("feature3")) util.Expect(t, "1", dict.GetIdFromName("feature1")) util.Expect(t, "4", dict.GetIdFromName("feature4")) util.Expect(t, "", dict.GetNameFromId(0)) util.Expect(t, "feature1", dict.GetNameFromId(1)) util.Expect(t, "feature2", dict.GetNameFromId(2)) util.Expect(t, "feature3", dict.GetNameFromId(3)) util.Expect(t, "feature4", dict.GetNameFromId(4)) util.Expect(t, "", dict.GetNameFromId(5)) }
func TestTrain(t *testing.T) { set := data.NewInmemDataset() instance1 := new(data.Instance) instance1.Features = util.NewVector(4) instance1.Features.SetValues([]float64{1, 1, 1, 3}) instance1.Output = &data.InstanceOutput{Label: 0} set.AddInstance(instance1) instance2 := new(data.Instance) instance2.Features = util.NewVector(4) instance2.Features.SetValues([]float64{1, 3, 1, 5}) instance2.Output = &data.InstanceOutput{Label: 0} set.AddInstance(instance2) instance3 := new(data.Instance) instance3.Features = util.NewVector(4) instance3.Features.SetValues([]float64{1, 3, 4, 7}) instance3.Output = &data.InstanceOutput{Label: 1} set.AddInstance(instance3) instance4 := new(data.Instance) instance4.Features = util.NewVector(4) instance4.Features.SetValues([]float64{1, 2, 8, 6}) instance4.Output = &data.InstanceOutput{Label: 1} set.AddInstance(instance4) set.Finalize() gdTrainerOptions := TrainerOptions{ Optimizer: optimizer.OptimizerOptions{ OptimizerName: "gd", RegularizationScheme: 2, RegularizationFactor: 1, LearningRate: 0.1, ConvergingDeltaWeight: 1e-6, ConvergingSteps: 3, MaxIterations: 0, GDBatchSize: 0, // full-bath }, } gdTrainer := NewMaxEntClassifierTrainer(gdTrainerOptions) lbfgsTrainerOptions := TrainerOptions{ Optimizer: optimizer.OptimizerOptions{ OptimizerName: "lbfgs", RegularizationScheme: 2, RegularizationFactor: 1, LearningRate: 1, ConvergingDeltaWeight: 1e-6, ConvergingSteps: 3, MaxIterations: 0, }, } lbfgsTrainer := NewMaxEntClassifierTrainer(lbfgsTrainerOptions) lbfgsTrainer.Train(set) gdTrainer.Train(set).Write("test.mlf") model := LoadModel("test.mlf") util.Expect(t, "0", model.Predict(instance1).Label) util.Expect(t, "0", model.Predict(instance2).Label) util.Expect(t, "1", model.Predict(instance3).Label) util.Expect(t, "1", model.Predict(instance4).Label) }
func TestTrainWithNamedFeatures(t *testing.T) { set := data.NewInmemDataset() instance1 := new(data.Instance) instance1.NamedFeatures = map[string]float64{ "1": 1, "2": 1, "3": 1, "4": 3, } instance1.Output = &data.InstanceOutput{Label: 0} set.AddInstance(instance1) instance2 := new(data.Instance) instance2.NamedFeatures = map[string]float64{ "1": 1, "2": 3, "3": 1, "4": 5, } instance2.Output = &data.InstanceOutput{Label: 0} set.AddInstance(instance2) instance3 := new(data.Instance) instance3.NamedFeatures = map[string]float64{ "1": 1, "2": 3, "3": 4, "4": 7, } instance3.Output = &data.InstanceOutput{Label: 1} set.AddInstance(instance3) instance4 := new(data.Instance) instance4.NamedFeatures = map[string]float64{ "1": 1, "2": 2, "3": 8, "4": 6, } instance4.Output = &data.InstanceOutput{Label: 1} set.AddInstance(instance4) set.Finalize() gdTrainerOptions := TrainerOptions{ Optimizer: optimizer.OptimizerOptions{ OptimizerName: "gd", RegularizationScheme: 2, RegularizationFactor: 1, LearningRate: 0.1, ConvergingDeltaWeight: 1e-6, ConvergingSteps: 3, MaxIterations: 0, GDBatchSize: 0, // full-bath }, } gdTrainer := NewMaxEntClassifierTrainer(gdTrainerOptions) lbfgsTrainerOptions := TrainerOptions{ Optimizer: optimizer.OptimizerOptions{ OptimizerName: "lbfgs", RegularizationScheme: 2, RegularizationFactor: 1, LearningRate: 1, ConvergingDeltaWeight: 1e-6, ConvergingSteps: 3, MaxIterations: 0, }, } lbfgsTrainer := NewMaxEntClassifierTrainer(lbfgsTrainerOptions) lbfgsTrainer.Train(set) gdTrainer.Train(set).Write("test.mlf") model := LoadModel("test.mlf") util.Expect(t, "0", model.Predict(instance1).Label) util.Expect(t, "0", model.Predict(instance2).Label) util.Expect(t, "1", model.Predict(instance3).Label) util.Expect(t, "1", model.Predict(instance4).Label) }
func TestInMemDataset(t *testing.T) { set := NewInmemDataset() instance1 := new(Instance) instance1.Features = util.NewVector(3) instance1.Features.SetValues([]float64{1, 2, 3}) instance1.Output = &InstanceOutput{Label: 1} util.Expect(t, "true", set.AddInstance(instance1)) instance2 := new(Instance) instance2.Features = util.NewVector(3) instance2.Features.SetValues([]float64{3, 4, 5}) instance2.Output = &InstanceOutput{Label: 2} util.Expect(t, "true", set.AddInstance(instance2)) instance3 := new(Instance) instance3.Features = util.NewSparseVector() instance3.Features.SetValues([]float64{3, 4, 5}) instance3.Output = &InstanceOutput{Label: 0} util.Expect(t, "false", set.AddInstance(instance3)) instance4 := new(Instance) instance4.Features = util.NewVector(4) instance4.Features.SetValues([]float64{3, 4, 5, 6}) instance4.Output = &InstanceOutput{Label: 4} util.Expect(t, "false", set.AddInstance(instance4)) instance5 := new(Instance) instance5.Features = util.NewVector(3) instance5.Features.SetValues([]float64{3, 5, 5}) util.Expect(t, "false", set.AddInstance(instance5)) set.Finalize() // 检查数据集选项 util.Expect(t, "false", set.GetOptions().FeatureIsSparse) util.Expect(t, "3", set.GetOptions().FeatureDimension) util.Expect(t, "true", set.GetOptions().IsSupervisedLearning) util.Expect(t, "3", set.GetOptions().NumLabels) util.Expect(t, "2", set.NumInstances()) iter := set.CreateIterator() iter.Start() util.Expect(t, "false", iter.End()) util.Expect(t, "1", iter.GetInstance().Features.Get(0)) util.Expect(t, "2", iter.GetInstance().Features.Get(1)) util.Expect(t, "3", iter.GetInstance().Features.Get(2)) iter.Next() util.Expect(t, "false", iter.End()) util.Expect(t, "3", iter.GetInstance().Features.Get(0)) util.Expect(t, "4", iter.GetInstance().Features.Get(1)) util.Expect(t, "5", iter.GetInstance().Features.Get(2)) iter.Next() util.Expect(t, "true", iter.End()) iter = set.CreateIterator() iter.Start() iter.Skip(2) util.Expect(t, "true", iter.End()) util.Expect(t, "2", set.NumInstances()) }
func TestLibsvmLoader(t *testing.T) { set := LoadLibSVMDataset("test.txt", false) util.Expect(t, "10", set.NumInstances()) util.Expect(t, "45", set.GetOptions().FeatureDimension) util.Expect(t, "2", set.GetOptions().NumLabels) }