Beispiel #1
0
func TestTrainGaussian(t *testing.T) {

	if testing.Short() {
		t.Skip("skipping test in short mode.")
	}
	dim := 8
	mean := []float64{0.1, 0.2, 0.3, 0.4, 1, 1, 1, 1}
	std := []float64{0.5, 0.5, 0.5, 0.5, 0.1, 0.2, 0.3, 0.4}
	g := NewModel(dim, Name("test training"))

	r := rand.New(rand.NewSource(33))
	for i := 0; i < 2000000; i++ {
		rv := model.RandNormalVector(r, mean, std)
		g.UpdateOne(model.F64ToObs(rv, ""), 1.0)
	}
	g.Estimate()
	t.Logf("Mean: \n%+v", g.Mean)
	t.Logf("STD: \n%+v", g.StdDev)

	for i, _ := range mean {
		if !gjoa.Comparef64(mean[i], g.Mean[i], tolerance) {
			t.Errorf("Wrong Mean[%d]. Expected: [%f], Got: [%f]",
				i, mean[i], g.Mean[i])
		}
		if !gjoa.Comparef64(std[i], g.StdDev[i], tolerance) {
			t.Errorf("Wrong STD[%d]. Expected: [%f], Got: [%f]",
				i, std[i], g.StdDev[i])
		}
	}
}
Beispiel #2
0
// Train using sampler.
func BenchmarkTrain2(b *testing.B) {

	dim := 8
	numSamp := 2000000

	// Use a FloatObserver.
	fs, err := model.NewFloatObserver(make([][]float64, numSamp, numSamp),
		make([]model.SimpleLabel, numSamp, numSamp))
	if err != nil {
		b.Fatal(err)
	}
	mean := []float64{0.1, 0.2, 0.3, 0.4, 1, 1, 1, 1}
	std := []float64{0.5, 0.5, 0.5, 0.5, 0.1, 0.2, 0.3, 0.4}
	g := NewModel(dim, Name("test training"))

	r := rand.New(rand.NewSource(33))
	for i := 0; i < numSamp; i++ {
		rv := model.RandNormalVector(r, mean, std)
		fs.Values[i] = rv
	}
	for i := 0; i < b.N; i++ {
		g.Update(fs, model.NoWeight)
		g.Estimate()
		g.Clear()
	}
}
Beispiel #3
0
func TestCloneGaussian(t *testing.T) {

	dim := 8
	mean := []float64{0.1, 0.2, 0.3, 0.4, 1, 1, 1, 1}
	std := []float64{0.5, 0.5, 0.5, 0.5, 0.1, 0.2, 0.3, 0.4}
	g := NewModel(dim, Name("test cloning"))

	r := rand.New(rand.NewSource(33))
	for i := 0; i < 2000; i++ {
		rv := model.RandNormalVector(r, mean, std)
		g.UpdateOne(model.F64ToObs(rv, ""), 1.0)
	}
	g.Estimate()

	ng := NewModel(g.ModelDim, Clone(g))

	// compare g vs. ng
	type table struct {
		v1, v2 []float64
		name   string
	}

	tab := []table{
		table{g.Sumx, ng.Sumx, "Sumx"},
		table{g.Sumxsq, ng.Sumxsq, "Sumxsq"},
		table{g.Mean, ng.Mean, "Mean"},
		table{g.StdDev, ng.StdDev, "StdDev"},
		table{g.variance, ng.variance, "variance"},
		table{g.varianceInv, ng.varianceInv, "varianceInv"},
		table{[]float64{g.const1}, []float64{ng.const1}, "const1"},
		table{[]float64{g.const2}, []float64{ng.const2}, "const2"},
	}

	// compare slices
	for _, v := range tab {
		gjoa.CompareSliceFloat(t, v.v1, v.v2, "no match: "+v.name, 0.00001)
	}

	//	if ng.BaseModel.Model == g.BaseModel.Model {
	//		t.Fatalf("Modeler is the same.")
	//	}
	if ng.NSamples != g.NSamples {
		t.Fatalf("NSamples doesn't match.")
	}
}
Beispiel #4
0
func TestTrainGaussian2(t *testing.T) {

	if testing.Short() {
		t.Skip("skipping test in short mode.")
	}
	dim := 8
	numSamp := 2000000

	// Use a FloatObserver.
	values := make([][]float64, numSamp, numSamp)
	labels := make([]model.SimpleLabel, numSamp, numSamp)
	mean := []float64{0.1, 0.2, 0.3, 0.4, 1, 1, 1, 1}
	std := []float64{0.5, 0.5, 0.5, 0.5, 0.1, 0.2, 0.3, 0.4}
	g := NewModel(dim, Name("test training"))

	r := rand.New(rand.NewSource(33))
	for i := 0; i < numSamp; i++ {
		rv := model.RandNormalVector(r, mean, std)
		values[i] = rv
	}

	fo, err := model.NewFloatObserver(values, labels)
	if err != nil {
		t.Fatal(err)
	}

	g.Update(fo, model.NoWeight)
	g.Estimate()
	t.Logf("Mean: \n%+v", g.Mean)
	t.Logf("STD: \n%+v", g.StdDev)

	for i, _ := range mean {
		if !gjoa.Comparef64(mean[i], g.Mean[i], tolerance) {
			t.Errorf("Wrong Mean[%d]. Expected: [%f], Got: [%f]",
				i, mean[i], g.Mean[i])
		}
		if !gjoa.Comparef64(std[i], g.StdDev[i], tolerance) {
			t.Errorf("Wrong STD[%d]. Expected: [%f], Got: [%f]",
				i, std[i], g.StdDev[i])
		}
	}
}
Beispiel #5
0
// Train without using sampler.
func BenchmarkTrain(b *testing.B) {

	dim := 8
	mean := []float64{0.1, 0.2, 0.3, 0.4, 1, 1, 1, 1}
	std := []float64{0.5, 0.5, 0.5, 0.5, 0.1, 0.2, 0.3, 0.4}
	g := NewModel(dim, Name("test training"))

	r := rand.New(rand.NewSource(33))
	buf := make([][]float64, 2000000, 2000000)
	for i := 0; i < 2000000; i++ {
		rv := model.RandNormalVector(r, mean, std)
		buf[i] = rv
	}

	for i := 0; i < b.N; i++ {
		for i := 0; i < 2000000; i++ {
			g.UpdateOne(model.F64ToObs(buf[i], ""), 1.0)
		}
		g.Estimate()
		g.Clear()
	}
}
Beispiel #6
0
// Trains a GMM as follows:
// 1 - Estimate a Gaussian model params for the training set.
// 2 - Use the mean and sd of the training set to generate
//     a random GMM to be used as seed.
// 3 - Run several iterations of the GMM max likelihood training algorithm
//     to estimate the GMM weights and the Gaussian component mean, and
//     variance vectors.
func TestTrainGMM(t *testing.T) {

	var seed int64 = 33
	numComp := 2
	numIter := 10
	numObs := 1000000

	mean0 := []float64{1, 2}
	std0 := []float64{0.3, 0.3}
	mean1 := []float64{4, 4}
	std1 := []float64{1, 1}
	dim := len(mean0)
	gmm := NewModel(dim, numComp, Name("mygmm"))
	t.Logf("Initial Weights: \n%+v", gmm.Weights)
	{
		// Estimate mean variance of the data.
		g := gaussian.NewModel(dim, gaussian.Name("test training"))

		r := rand.New(rand.NewSource(seed))
		for i := 0; i < numObs; i++ {
			rv, err := model.RandNormalVector(mean0, std0, r)
			if err != nil {
				t.Fatal(err)
			}
			g.UpdateOne(model.F64ToObs(rv), 1.0)
			rv, err = model.RandNormalVector(mean1, std1, r)
			if err != nil {
				t.Fatal(err)
			}
			g.UpdateOne(model.F64ToObs(rv), 1.0)
		}
		g.Estimate()
		t.Logf("Gaussian Model for training set:")
		t.Logf("Mean: \n%+v", g.Mean)
		t.Logf("SD: \n%+v", g.StdDev)

		// Use the estimated mean and sd to generate a seed GMM.
		gmm = RandomModel(g.Mean, g.StdDev, numComp,
			"mygmm", 99)
		t.Logf("Random GMM: %+v.", gmm)
		t.Logf("Component 0: %+v.", gmm.Components[0])
		t.Logf("Component 1: %+v.", gmm.Components[1])
	}

	for iter := 0; iter < numIter; iter++ {
		t.Logf("Starting GMM training iteration %d.", iter)

		// Reset the same random number generator to make sure we use the
		// same observations in each iterations.
		r := rand.New(rand.NewSource(seed))

		// Update GMM stats.
		for i := 0; i < numObs; i++ {
			rv, err := model.RandNormalVector(mean0, std0, r)
			if err != nil {
				t.Fatal(err)
			}
			gmm.UpdateOne(model.F64ToObs(rv), 1.0)
			rv, err = model.RandNormalVector(mean1, std1, r)
			if err != nil {
				t.Fatal(err)
			}
			gmm.UpdateOne(model.F64ToObs(rv), 1.0)
		}

		// Estimates GMM params.
		gmm.Estimate()

		t.Logf("Iter: %d", iter)
		t.Logf("GMM: %+v", gmm)
		t.Logf("Weights: \n%+v", gmm.Weights)
		t.Logf("Likelihood: %f", gmm.Likelihood)
		t.Logf("Num Samples: %f", gmm.NSamples)
		for _, c := range gmm.Components {
			t.Logf("%s: Mean: \n%+v", c.Name(), c.Mean)
			t.Logf("%s: STD: \n%+v", c.Name(), c.StdDev)
		}

		// Prepare for next iteration.
		gmm.Clear()
	}

	for i := 0; i < dim; i++ {
		g := gmm.Components[1]
		if !gjoa.Comparef64(mean0[i], g.Mean[i], epsilon) {
			t.Errorf("Wrong Mean[%d]. Expected: [%f], Got: [%f]",
				i, mean0[i], g.Mean[i])
		}
		if !gjoa.Comparef64(std0[i], g.StdDev[i], epsilon) {
			t.Errorf("Wrong STD[%d]. Expected: [%f], Got: [%f]",
				i, std0[i], g.StdDev[i])
		}
	}

	for i := 0; i < dim; i++ {
		g := gmm.Components[0]
		if !gjoa.Comparef64(mean1[i], g.Mean[i], epsilon) {
			t.Errorf("Wrong Mean[%d]. Expected: [%f], Got: [%f]",
				i, mean1[i], g.Mean[i])
		}
		if !gjoa.Comparef64(std1[i], g.StdDev[i], epsilon) {
			t.Errorf("Wrong STD[%d]. Expected: [%f], Got: [%f]",
				i, std1[i], g.StdDev[i])
		}
	}

	if !gjoa.Comparef64(0.5, gmm.Weights[0], epsilon) {
		t.Errorf("Wrong weights[0]. Expected: [%f], Got: [%f]",
			0.5, gmm.Weights[0])
	}

	if !gjoa.Comparef64(0.5, gmm.Weights[1], epsilon) {
		t.Errorf("Wrong weights[0]. Expected: [%f], Got: [%f]",
			0.5, gmm.Weights[1])
	}

}
Beispiel #7
0
// Sample returns a Gaussian sample.
func (g *Model) Sample(r *rand.Rand) model.Obs {
	obs := model.RandNormalVector(r, g.Mean, g.StdDev)
	return model.NewFloatObs(obs, model.SimpleLabel(""))
}