func TestTrainGaussian(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } dim := 8 mean := []float64{0.1, 0.2, 0.3, 0.4, 1, 1, 1, 1} std := []float64{0.5, 0.5, 0.5, 0.5, 0.1, 0.2, 0.3, 0.4} g := NewModel(dim, Name("test training")) r := rand.New(rand.NewSource(33)) for i := 0; i < 2000000; i++ { rv := model.RandNormalVector(r, mean, std) g.UpdateOne(model.F64ToObs(rv, ""), 1.0) } g.Estimate() t.Logf("Mean: \n%+v", g.Mean) t.Logf("STD: \n%+v", g.StdDev) for i, _ := range mean { if !gjoa.Comparef64(mean[i], g.Mean[i], tolerance) { t.Errorf("Wrong Mean[%d]. Expected: [%f], Got: [%f]", i, mean[i], g.Mean[i]) } if !gjoa.Comparef64(std[i], g.StdDev[i], tolerance) { t.Errorf("Wrong STD[%d]. Expected: [%f], Got: [%f]", i, std[i], g.StdDev[i]) } } }
// Train using sampler. func BenchmarkTrain2(b *testing.B) { dim := 8 numSamp := 2000000 // Use a FloatObserver. fs, err := model.NewFloatObserver(make([][]float64, numSamp, numSamp), make([]model.SimpleLabel, numSamp, numSamp)) if err != nil { b.Fatal(err) } mean := []float64{0.1, 0.2, 0.3, 0.4, 1, 1, 1, 1} std := []float64{0.5, 0.5, 0.5, 0.5, 0.1, 0.2, 0.3, 0.4} g := NewModel(dim, Name("test training")) r := rand.New(rand.NewSource(33)) for i := 0; i < numSamp; i++ { rv := model.RandNormalVector(r, mean, std) fs.Values[i] = rv } for i := 0; i < b.N; i++ { g.Update(fs, model.NoWeight) g.Estimate() g.Clear() } }
func TestCloneGaussian(t *testing.T) { dim := 8 mean := []float64{0.1, 0.2, 0.3, 0.4, 1, 1, 1, 1} std := []float64{0.5, 0.5, 0.5, 0.5, 0.1, 0.2, 0.3, 0.4} g := NewModel(dim, Name("test cloning")) r := rand.New(rand.NewSource(33)) for i := 0; i < 2000; i++ { rv := model.RandNormalVector(r, mean, std) g.UpdateOne(model.F64ToObs(rv, ""), 1.0) } g.Estimate() ng := NewModel(g.ModelDim, Clone(g)) // compare g vs. ng type table struct { v1, v2 []float64 name string } tab := []table{ table{g.Sumx, ng.Sumx, "Sumx"}, table{g.Sumxsq, ng.Sumxsq, "Sumxsq"}, table{g.Mean, ng.Mean, "Mean"}, table{g.StdDev, ng.StdDev, "StdDev"}, table{g.variance, ng.variance, "variance"}, table{g.varianceInv, ng.varianceInv, "varianceInv"}, table{[]float64{g.const1}, []float64{ng.const1}, "const1"}, table{[]float64{g.const2}, []float64{ng.const2}, "const2"}, } // compare slices for _, v := range tab { gjoa.CompareSliceFloat(t, v.v1, v.v2, "no match: "+v.name, 0.00001) } // if ng.BaseModel.Model == g.BaseModel.Model { // t.Fatalf("Modeler is the same.") // } if ng.NSamples != g.NSamples { t.Fatalf("NSamples doesn't match.") } }
func TestTrainGaussian2(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } dim := 8 numSamp := 2000000 // Use a FloatObserver. values := make([][]float64, numSamp, numSamp) labels := make([]model.SimpleLabel, numSamp, numSamp) mean := []float64{0.1, 0.2, 0.3, 0.4, 1, 1, 1, 1} std := []float64{0.5, 0.5, 0.5, 0.5, 0.1, 0.2, 0.3, 0.4} g := NewModel(dim, Name("test training")) r := rand.New(rand.NewSource(33)) for i := 0; i < numSamp; i++ { rv := model.RandNormalVector(r, mean, std) values[i] = rv } fo, err := model.NewFloatObserver(values, labels) if err != nil { t.Fatal(err) } g.Update(fo, model.NoWeight) g.Estimate() t.Logf("Mean: \n%+v", g.Mean) t.Logf("STD: \n%+v", g.StdDev) for i, _ := range mean { if !gjoa.Comparef64(mean[i], g.Mean[i], tolerance) { t.Errorf("Wrong Mean[%d]. Expected: [%f], Got: [%f]", i, mean[i], g.Mean[i]) } if !gjoa.Comparef64(std[i], g.StdDev[i], tolerance) { t.Errorf("Wrong STD[%d]. Expected: [%f], Got: [%f]", i, std[i], g.StdDev[i]) } } }
// Train without using sampler. func BenchmarkTrain(b *testing.B) { dim := 8 mean := []float64{0.1, 0.2, 0.3, 0.4, 1, 1, 1, 1} std := []float64{0.5, 0.5, 0.5, 0.5, 0.1, 0.2, 0.3, 0.4} g := NewModel(dim, Name("test training")) r := rand.New(rand.NewSource(33)) buf := make([][]float64, 2000000, 2000000) for i := 0; i < 2000000; i++ { rv := model.RandNormalVector(r, mean, std) buf[i] = rv } for i := 0; i < b.N; i++ { for i := 0; i < 2000000; i++ { g.UpdateOne(model.F64ToObs(buf[i], ""), 1.0) } g.Estimate() g.Clear() } }
// Trains a GMM as follows: // 1 - Estimate a Gaussian model params for the training set. // 2 - Use the mean and sd of the training set to generate // a random GMM to be used as seed. // 3 - Run several iterations of the GMM max likelihood training algorithm // to estimate the GMM weights and the Gaussian component mean, and // variance vectors. func TestTrainGMM(t *testing.T) { var seed int64 = 33 numComp := 2 numIter := 10 numObs := 1000000 mean0 := []float64{1, 2} std0 := []float64{0.3, 0.3} mean1 := []float64{4, 4} std1 := []float64{1, 1} dim := len(mean0) gmm := NewModel(dim, numComp, Name("mygmm")) t.Logf("Initial Weights: \n%+v", gmm.Weights) { // Estimate mean variance of the data. g := gaussian.NewModel(dim, gaussian.Name("test training")) r := rand.New(rand.NewSource(seed)) for i := 0; i < numObs; i++ { rv, err := model.RandNormalVector(mean0, std0, r) if err != nil { t.Fatal(err) } g.UpdateOne(model.F64ToObs(rv), 1.0) rv, err = model.RandNormalVector(mean1, std1, r) if err != nil { t.Fatal(err) } g.UpdateOne(model.F64ToObs(rv), 1.0) } g.Estimate() t.Logf("Gaussian Model for training set:") t.Logf("Mean: \n%+v", g.Mean) t.Logf("SD: \n%+v", g.StdDev) // Use the estimated mean and sd to generate a seed GMM. gmm = RandomModel(g.Mean, g.StdDev, numComp, "mygmm", 99) t.Logf("Random GMM: %+v.", gmm) t.Logf("Component 0: %+v.", gmm.Components[0]) t.Logf("Component 1: %+v.", gmm.Components[1]) } for iter := 0; iter < numIter; iter++ { t.Logf("Starting GMM training iteration %d.", iter) // Reset the same random number generator to make sure we use the // same observations in each iterations. r := rand.New(rand.NewSource(seed)) // Update GMM stats. for i := 0; i < numObs; i++ { rv, err := model.RandNormalVector(mean0, std0, r) if err != nil { t.Fatal(err) } gmm.UpdateOne(model.F64ToObs(rv), 1.0) rv, err = model.RandNormalVector(mean1, std1, r) if err != nil { t.Fatal(err) } gmm.UpdateOne(model.F64ToObs(rv), 1.0) } // Estimates GMM params. gmm.Estimate() t.Logf("Iter: %d", iter) t.Logf("GMM: %+v", gmm) t.Logf("Weights: \n%+v", gmm.Weights) t.Logf("Likelihood: %f", gmm.Likelihood) t.Logf("Num Samples: %f", gmm.NSamples) for _, c := range gmm.Components { t.Logf("%s: Mean: \n%+v", c.Name(), c.Mean) t.Logf("%s: STD: \n%+v", c.Name(), c.StdDev) } // Prepare for next iteration. gmm.Clear() } for i := 0; i < dim; i++ { g := gmm.Components[1] if !gjoa.Comparef64(mean0[i], g.Mean[i], epsilon) { t.Errorf("Wrong Mean[%d]. Expected: [%f], Got: [%f]", i, mean0[i], g.Mean[i]) } if !gjoa.Comparef64(std0[i], g.StdDev[i], epsilon) { t.Errorf("Wrong STD[%d]. Expected: [%f], Got: [%f]", i, std0[i], g.StdDev[i]) } } for i := 0; i < dim; i++ { g := gmm.Components[0] if !gjoa.Comparef64(mean1[i], g.Mean[i], epsilon) { t.Errorf("Wrong Mean[%d]. Expected: [%f], Got: [%f]", i, mean1[i], g.Mean[i]) } if !gjoa.Comparef64(std1[i], g.StdDev[i], epsilon) { t.Errorf("Wrong STD[%d]. Expected: [%f], Got: [%f]", i, std1[i], g.StdDev[i]) } } if !gjoa.Comparef64(0.5, gmm.Weights[0], epsilon) { t.Errorf("Wrong weights[0]. Expected: [%f], Got: [%f]", 0.5, gmm.Weights[0]) } if !gjoa.Comparef64(0.5, gmm.Weights[1], epsilon) { t.Errorf("Wrong weights[0]. Expected: [%f], Got: [%f]", 0.5, gmm.Weights[1]) } }
// Sample returns a Gaussian sample. func (g *Model) Sample(r *rand.Rand) model.Obs { obs := model.RandNormalVector(r, g.Mean, g.StdDev) return model.NewFloatObs(obs, model.SimpleLabel("")) }