예제 #1
0
func TestDictionaryJSON(t *testing.T) {
	dict := NewDictionary(1)
	dict.GetIdFromName("f1")
	dict.GetIdFromName("f2")
	dict.GetIdFromName("f3")
	dict.GetIdFromName("f4")

	dictJson, _ := json.Marshal(dict)

	var newDict Dictionary
	json.Unmarshal(dictJson, &newDict)

	util.Expect(t, "1", newDict.GetIdFromName("f1"))
	util.Expect(t, "2", newDict.GetIdFromName("f2"))
	util.Expect(t, "3", newDict.GetIdFromName("f3"))
	util.Expect(t, "4", newDict.GetIdFromName("f4"))

	util.Expect(t, "f1", newDict.GetNameFromId(1))
	util.Expect(t, "f2", newDict.GetNameFromId(2))
	util.Expect(t, "f3", newDict.GetNameFromId(3))
	util.Expect(t, "f4", newDict.GetNameFromId(4))
}
예제 #2
0
func TestSkipIterator(t *testing.T) {
	set := NewInmemDataset()

	instance1 := new(Instance)
	instance1.Features = util.NewVector(3)
	instance1.Features.SetValues([]float64{1, 2, 3})
	util.Expect(t, "true", set.AddInstance(instance1))

	instance2 := new(Instance)
	instance2.Features = util.NewVector(3)
	instance2.Features.SetValues([]float64{3, 4, 5})
	util.Expect(t, "true", set.AddInstance(instance2))

	instance3 := new(Instance)
	instance3.Features = util.NewVector(3)
	instance3.Features.SetValues([]float64{7, 8, 9})
	util.Expect(t, "true", set.AddInstance(instance3))

	instance4 := new(Instance)
	instance4.Features = util.NewVector(3)
	instance4.Features.SetValues([]float64{30, 40, 50})
	util.Expect(t, "true", set.AddInstance(instance4))

	instance5 := new(Instance)
	instance5.Features = util.NewVector(3)
	instance5.Features.SetValues([]float64{70, 80, 90})
	util.Expect(t, "true", set.AddInstance(instance5))

	instance6 := new(Instance)
	instance6.Features = util.NewVector(3)
	instance6.Features.SetValues([]float64{31, 41, 51})
	util.Expect(t, "true", set.AddInstance(instance6))

	set.Finalize()

	iter := NewSkipIterator(set, []int{0, 4})
	iter.Start()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "1", iter.GetInstance().Features.Get(0))
	util.Expect(t, "2", iter.GetInstance().Features.Get(1))
	util.Expect(t, "3", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "70", iter.GetInstance().Features.Get(0))
	util.Expect(t, "80", iter.GetInstance().Features.Get(1))
	util.Expect(t, "90", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "true", iter.End())

	iter = NewSkipIterator(set, []int{3, 1})
	iter.Start()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "30", iter.GetInstance().Features.Get(0))
	util.Expect(t, "40", iter.GetInstance().Features.Get(1))
	util.Expect(t, "50", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "true", iter.End())

	iter = NewSkipIterator(set, []int{1, 2, 1})
	iter.Start()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "3", iter.GetInstance().Features.Get(0))
	util.Expect(t, "4", iter.GetInstance().Features.Get(1))
	util.Expect(t, "5", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "30", iter.GetInstance().Features.Get(0))
	util.Expect(t, "40", iter.GetInstance().Features.Get(1))
	util.Expect(t, "50", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "31", iter.GetInstance().Features.Get(0))
	util.Expect(t, "41", iter.GetInstance().Features.Get(1))
	util.Expect(t, "51", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "true", iter.End())
}
예제 #3
0
func TestSkipDataset(t *testing.T) {
	set := NewInmemDataset()

	instance1 := new(Instance)
	instance1.Features = util.NewVector(3)
	instance1.Features.SetValues([]float64{1, 2, 3})
	util.Expect(t, "true", set.AddInstance(instance1))

	instance2 := new(Instance)
	instance2.Features = util.NewVector(3)
	instance2.Features.SetValues([]float64{3, 4, 5})
	util.Expect(t, "true", set.AddInstance(instance2))

	instance3 := new(Instance)
	instance3.Features = util.NewVector(3)
	instance3.Features.SetValues([]float64{7, 8, 9})
	util.Expect(t, "true", set.AddInstance(instance3))

	instance4 := new(Instance)
	instance4.Features = util.NewVector(3)
	instance4.Features.SetValues([]float64{30, 40, 50})
	util.Expect(t, "true", set.AddInstance(instance4))

	instance5 := new(Instance)
	instance5.Features = util.NewVector(3)
	instance5.Features.SetValues([]float64{70, 80, 90})
	util.Expect(t, "true", set.AddInstance(instance5))

	instance6 := new(Instance)
	instance6.Features = util.NewVector(3)
	instance6.Features.SetValues([]float64{31, 41, 51})
	util.Expect(t, "true", set.AddInstance(instance6))

	set.Finalize()

	buckets := []SkipBucket{}
	buckets = append(buckets, SkipBucket{true, 0})
	buckets = append(buckets, SkipBucket{false, 1})
	buckets = append(buckets, SkipBucket{true, 3})
	ss := NewSkipDataset(set, buckets)
	util.Expect(t, "2", ss.NumInstances())

	iter := ss.CreateIterator()
	iter.Start()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "1", iter.GetInstance().Features.Get(0))
	util.Expect(t, "2", iter.GetInstance().Features.Get(1))
	util.Expect(t, "3", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "70", iter.GetInstance().Features.Get(0))
	util.Expect(t, "80", iter.GetInstance().Features.Get(1))
	util.Expect(t, "90", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "true", iter.End())

	buckets = []SkipBucket{}
	buckets = append(buckets, SkipBucket{true, 3})
	buckets = append(buckets, SkipBucket{false, 1})
	ss = NewSkipDataset(set, buckets)
	util.Expect(t, "1", ss.NumInstances())

	iter = ss.CreateIterator()
	iter.Start()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "30", iter.GetInstance().Features.Get(0))
	util.Expect(t, "40", iter.GetInstance().Features.Get(1))
	util.Expect(t, "50", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "true", iter.End())

	buckets = []SkipBucket{}
	buckets = append(buckets, SkipBucket{true, 1})
	buckets = append(buckets, SkipBucket{false, 1})
	buckets = append(buckets, SkipBucket{true, 1})
	buckets = append(buckets, SkipBucket{false, 1})
	ss = NewSkipDataset(set, buckets)
	util.Expect(t, "3", ss.NumInstances())

	iter = ss.CreateIterator()
	iter.Start()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "3", iter.GetInstance().Features.Get(0))
	util.Expect(t, "4", iter.GetInstance().Features.Get(1))
	util.Expect(t, "5", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "30", iter.GetInstance().Features.Get(0))
	util.Expect(t, "40", iter.GetInstance().Features.Get(1))
	util.Expect(t, "50", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "31", iter.GetInstance().Features.Get(0))
	util.Expect(t, "41", iter.GetInstance().Features.Get(1))
	util.Expect(t, "51", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "true", iter.End())
}
예제 #4
0
func TestDictionary(t *testing.T) {
	dict := NewDictionary(1)
	util.Expect(t, "1", dict.GetIdFromName("feature1"))
	util.Expect(t, "2", dict.GetIdFromName("feature2"))
	util.Expect(t, "3", dict.GetIdFromName("feature3"))
	util.Expect(t, "2", dict.GetIdFromName("feature2"))
	util.Expect(t, "3", dict.GetIdFromName("feature3"))
	util.Expect(t, "1", dict.GetIdFromName("feature1"))
	util.Expect(t, "4", dict.GetIdFromName("feature4"))

	util.Expect(t, "", dict.GetNameFromId(0))
	util.Expect(t, "feature1", dict.GetNameFromId(1))
	util.Expect(t, "feature2", dict.GetNameFromId(2))
	util.Expect(t, "feature3", dict.GetNameFromId(3))
	util.Expect(t, "feature4", dict.GetNameFromId(4))
	util.Expect(t, "", dict.GetNameFromId(5))
}
예제 #5
0
func TestTrain(t *testing.T) {
	set := data.NewInmemDataset()
	instance1 := new(data.Instance)
	instance1.Features = util.NewVector(4)
	instance1.Features.SetValues([]float64{1, 1, 1, 3})
	instance1.Output = &data.InstanceOutput{Label: 0}
	set.AddInstance(instance1)

	instance2 := new(data.Instance)
	instance2.Features = util.NewVector(4)
	instance2.Features.SetValues([]float64{1, 3, 1, 5})
	instance2.Output = &data.InstanceOutput{Label: 0}
	set.AddInstance(instance2)

	instance3 := new(data.Instance)
	instance3.Features = util.NewVector(4)
	instance3.Features.SetValues([]float64{1, 3, 4, 7})
	instance3.Output = &data.InstanceOutput{Label: 1}
	set.AddInstance(instance3)

	instance4 := new(data.Instance)
	instance4.Features = util.NewVector(4)
	instance4.Features.SetValues([]float64{1, 2, 8, 6})
	instance4.Output = &data.InstanceOutput{Label: 1}
	set.AddInstance(instance4)
	set.Finalize()

	gdTrainerOptions := TrainerOptions{
		Optimizer: optimizer.OptimizerOptions{
			OptimizerName:         "gd",
			RegularizationScheme:  2,
			RegularizationFactor:  1,
			LearningRate:          0.1,
			ConvergingDeltaWeight: 1e-6,
			ConvergingSteps:       3,
			MaxIterations:         0,
			GDBatchSize:           0, // full-bath
		},
	}
	gdTrainer := NewMaxEntClassifierTrainer(gdTrainerOptions)

	lbfgsTrainerOptions := TrainerOptions{
		Optimizer: optimizer.OptimizerOptions{
			OptimizerName:         "lbfgs",
			RegularizationScheme:  2,
			RegularizationFactor:  1,
			LearningRate:          1,
			ConvergingDeltaWeight: 1e-6,
			ConvergingSteps:       3,
			MaxIterations:         0,
		},
	}
	lbfgsTrainer := NewMaxEntClassifierTrainer(lbfgsTrainerOptions)
	lbfgsTrainer.Train(set)

	gdTrainer.Train(set).Write("test.mlf")
	model := LoadModel("test.mlf")
	util.Expect(t, "0", model.Predict(instance1).Label)
	util.Expect(t, "0", model.Predict(instance2).Label)
	util.Expect(t, "1", model.Predict(instance3).Label)
	util.Expect(t, "1", model.Predict(instance4).Label)
}
예제 #6
0
func TestTrainWithNamedFeatures(t *testing.T) {
	set := data.NewInmemDataset()
	instance1 := new(data.Instance)
	instance1.NamedFeatures = map[string]float64{
		"1": 1,
		"2": 1,
		"3": 1,
		"4": 3,
	}
	instance1.Output = &data.InstanceOutput{Label: 0}
	set.AddInstance(instance1)

	instance2 := new(data.Instance)
	instance2.NamedFeatures = map[string]float64{
		"1": 1,
		"2": 3,
		"3": 1,
		"4": 5,
	}
	instance2.Output = &data.InstanceOutput{Label: 0}
	set.AddInstance(instance2)

	instance3 := new(data.Instance)
	instance3.NamedFeatures = map[string]float64{
		"1": 1,
		"2": 3,
		"3": 4,
		"4": 7,
	}
	instance3.Output = &data.InstanceOutput{Label: 1}
	set.AddInstance(instance3)

	instance4 := new(data.Instance)
	instance4.NamedFeatures = map[string]float64{
		"1": 1,
		"2": 2,
		"3": 8,
		"4": 6,
	}
	instance4.Output = &data.InstanceOutput{Label: 1}
	set.AddInstance(instance4)
	set.Finalize()

	gdTrainerOptions := TrainerOptions{
		Optimizer: optimizer.OptimizerOptions{
			OptimizerName:         "gd",
			RegularizationScheme:  2,
			RegularizationFactor:  1,
			LearningRate:          0.1,
			ConvergingDeltaWeight: 1e-6,
			ConvergingSteps:       3,
			MaxIterations:         0,
			GDBatchSize:           0, // full-bath
		},
	}
	gdTrainer := NewMaxEntClassifierTrainer(gdTrainerOptions)

	lbfgsTrainerOptions := TrainerOptions{
		Optimizer: optimizer.OptimizerOptions{
			OptimizerName:         "lbfgs",
			RegularizationScheme:  2,
			RegularizationFactor:  1,
			LearningRate:          1,
			ConvergingDeltaWeight: 1e-6,
			ConvergingSteps:       3,
			MaxIterations:         0,
		},
	}
	lbfgsTrainer := NewMaxEntClassifierTrainer(lbfgsTrainerOptions)
	lbfgsTrainer.Train(set)

	gdTrainer.Train(set).Write("test.mlf")
	model := LoadModel("test.mlf")
	util.Expect(t, "0", model.Predict(instance1).Label)
	util.Expect(t, "0", model.Predict(instance2).Label)
	util.Expect(t, "1", model.Predict(instance3).Label)
	util.Expect(t, "1", model.Predict(instance4).Label)
}
예제 #7
0
func TestInMemDataset(t *testing.T) {
	set := NewInmemDataset()

	instance1 := new(Instance)
	instance1.Features = util.NewVector(3)
	instance1.Features.SetValues([]float64{1, 2, 3})
	instance1.Output = &InstanceOutput{Label: 1}
	util.Expect(t, "true", set.AddInstance(instance1))

	instance2 := new(Instance)
	instance2.Features = util.NewVector(3)
	instance2.Features.SetValues([]float64{3, 4, 5})
	instance2.Output = &InstanceOutput{Label: 2}
	util.Expect(t, "true", set.AddInstance(instance2))

	instance3 := new(Instance)
	instance3.Features = util.NewSparseVector()
	instance3.Features.SetValues([]float64{3, 4, 5})
	instance3.Output = &InstanceOutput{Label: 0}
	util.Expect(t, "false", set.AddInstance(instance3))

	instance4 := new(Instance)
	instance4.Features = util.NewVector(4)
	instance4.Features.SetValues([]float64{3, 4, 5, 6})
	instance4.Output = &InstanceOutput{Label: 4}
	util.Expect(t, "false", set.AddInstance(instance4))

	instance5 := new(Instance)
	instance5.Features = util.NewVector(3)
	instance5.Features.SetValues([]float64{3, 5, 5})
	util.Expect(t, "false", set.AddInstance(instance5))

	set.Finalize()

	// 检查数据集选项
	util.Expect(t, "false", set.GetOptions().FeatureIsSparse)
	util.Expect(t, "3", set.GetOptions().FeatureDimension)
	util.Expect(t, "true", set.GetOptions().IsSupervisedLearning)
	util.Expect(t, "3", set.GetOptions().NumLabels)

	util.Expect(t, "2", set.NumInstances())

	iter := set.CreateIterator()
	iter.Start()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "1", iter.GetInstance().Features.Get(0))
	util.Expect(t, "2", iter.GetInstance().Features.Get(1))
	util.Expect(t, "3", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "3", iter.GetInstance().Features.Get(0))
	util.Expect(t, "4", iter.GetInstance().Features.Get(1))
	util.Expect(t, "5", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "true", iter.End())

	iter = set.CreateIterator()
	iter.Start()
	iter.Skip(2)
	util.Expect(t, "true", iter.End())

	util.Expect(t, "2", set.NumInstances())
}
func TestLibsvmLoader(t *testing.T) {
	set := LoadLibSVMDataset("test.txt", false)
	util.Expect(t, "10", set.NumInstances())
	util.Expect(t, "45", set.GetOptions().FeatureDimension)
	util.Expect(t, "2", set.GetOptions().NumLabels)
}