func makeHMM(t *testing.T) *Model { // Gaussian 1. mean1 := []float64{1} sd1 := []float64{1} g1 := gm.NewModel(1, gm.Name("g1"), gm.Mean(mean1), gm.StdDev(sd1)) // Gaussian 2. mean2 := []float64{4} sd2 := []float64{2} g2 := gm.NewModel(1, gm.Name("g2"), gm.Mean(mean2), gm.StdDev(sd2)) var err error h0 := narray.New(4, 4) // h0.Set(.8, 0, 1) h0.Set(1, 0, 1) // h0.Set(.2, 0, 2) h0.Set(.5, 1, 1) h0.Set(.5, 1, 2) h0.Set(.7, 2, 2) h0.Set(.3, 2, 3) h0 = narray.Log(nil, h0.Copy()) ms, _ = NewSet() _, err = ms.NewNet("hmm0", h0, []model.Modeler{nil, g1, g2, nil}) fatalIf(t, err) return NewModel(OSet(ms), UpdateTP(true), UpdateOP(true)) }
func MakeGMM(t *testing.T) *Model { mean0 := []float64{1, 2} sd0 := []float64{0.3, 0.3} mean1 := []float64{4, 4} sd1 := []float64{1, 1} weights := []float64{0.6, 0.4} dim := len(mean0) g0 := gaussian.NewModel(2, gaussian.Name("g0"), gaussian.Mean(mean0), gaussian.StdDev(sd0)) g1 := gaussian.NewModel(2, gaussian.Name("g1"), gaussian.Mean(mean1), gaussian.StdDev(sd1)) components := []*gaussian.Model{g0, g1} gmm := NewModel(dim, 2, Name("mygmm"), Components(components), Weights(weights)) return gmm }
// creates a random gaussian by adding a perturbation to an existing gaussian. func initGaussian(r *rand.Rand, m model.Modeler) *gm.Model { g := m.(*gm.Model) var mean, sd []float64 for i := 0; i < g.ModelDim; i++ { a := r.NormFloat64()*0.2 + 1.0 // pert 0.8 to 1.2 mean = append(mean, g.Mean[i]*a) sd = append(sd, g.StdDev[i]*a) } return gm.NewModel(g.ModelDim, gm.Name(g.ModelName), gm.Mean(mean), gm.StdDev(sd)) }
func randomGaussian(r *rand.Rand, id string, dim int) *gm.Model { var mean, sd []float64 startSD := 40.0 for i := 0; i < dim; i++ { mean = append(mean, float64(r.Intn(10)*100.0)) a := r.NormFloat64()*0.2 + 1.0 // pert 0.8 to 1.2 sd = append(sd, startSD*a) } return gm.NewModel(dim, gm.Name(id), gm.Mean(mean), gm.StdDev(sd)) }
// NewModel creates a new Gaussian mixture model. func NewModel(dim, numComponents int, options ...Option) *Model { gmm := &Model{ ModelName: "GMM", // default name ModelDim: dim, NComponents: numComponents, Diag: true, tmpProbs: make([]float64, numComponents), } gmm.Type = reflect.TypeOf(*gmm).String() // Set options. for _, option := range options { option(gmm) } if len(gmm.PosteriorSum) == 0 { gmm.PosteriorSum = make([]float64, gmm.NComponents) } // Create components if not provided. if len(gmm.Components) == 0 { gmm.Components = make([]*gaussian.Model, numComponents, numComponents) for i := range gmm.Components { cname := componentName(gmm.ModelName, i, gmm.NComponents) gmm.Components[i] = gaussian.NewModel(gmm.ModelDim, gaussian.Name(cname)) } } // Initialize weights. // Caller may pass weight, log(weights), or no weights. switch { case len(gmm.LogWeights) > 0 && len(gmm.Weights) > 0: glog.Fatal("options not allowed: provide only one of LogWeights or Weights") case len(gmm.LogWeights) == 0 && len(gmm.Weights) == 0: gmm.LogWeights = make([]float64, numComponents) logw := -math.Log(float64(gmm.NComponents)) floatx.Apply(floatx.SetValueFunc(logw), gmm.LogWeights, nil) gmm.Weights = make([]float64, gmm.NComponents) floatx.Exp(gmm.Weights, gmm.LogWeights) glog.Infof("init weights with equal values: %.6f", gmm.Weights[0]) case len(gmm.LogWeights) > 0: gmm.Weights = make([]float64, gmm.NComponents) floatx.Exp(gmm.Weights, gmm.LogWeights) case len(gmm.Weights) > 0: gmm.LogWeights = make([]float64, numComponents) floatx.Log(gmm.LogWeights, gmm.Weights) } return gmm }
// RandomModel generates a random Gaussian mixture model using mean and variance vectors as seed. // Use this function to initialize the GMM before training. The mean and sd // vector can be estimated from the data set using a Gaussian model. func RandomModel(mean, sd []float64, numComponents int, name string, seed int64) *Model { n := len(mean) if !floats.EqualLengths(mean, sd) { panic(floatx.ErrLength) } cs := make([]*gaussian.Model, n, n) r := rand.New(rand.NewSource(seed)) for i := 0; i < n; i++ { rv := RandomVector(mean, sd, r) cs[i] = gaussian.NewModel(n, gaussian.Mean(rv), gaussian.StdDev(sd)) } gmm := NewModel(n, numComponents, Name(name), Components(cs)) return gmm }
// Trains a GMM as follows: // 1 - Estimate a Gaussian model params for the training set. // 2 - Use the mean and sd of the training set to generate // a random GMM to be used as seed. // 3 - Run several iterations of the GMM max likelihood training algorithm // to estimate the GMM weights and the Gaussian component mean, and // variance vectors. func TestTrainGMM(t *testing.T) { var seed int64 = 33 numComp := 2 numIter := 10 numObs := 1000000 mean0 := []float64{1, 2} std0 := []float64{0.3, 0.3} mean1 := []float64{4, 4} std1 := []float64{1, 1} dim := len(mean0) gmm := NewModel(dim, numComp, Name("mygmm")) t.Logf("Initial Weights: \n%+v", gmm.Weights) { // Estimate mean variance of the data. g := gaussian.NewModel(dim, gaussian.Name("test training")) r := rand.New(rand.NewSource(seed)) for i := 0; i < numObs; i++ { rv, err := model.RandNormalVector(mean0, std0, r) if err != nil { t.Fatal(err) } g.UpdateOne(model.F64ToObs(rv), 1.0) rv, err = model.RandNormalVector(mean1, std1, r) if err != nil { t.Fatal(err) } g.UpdateOne(model.F64ToObs(rv), 1.0) } g.Estimate() t.Logf("Gaussian Model for training set:") t.Logf("Mean: \n%+v", g.Mean) t.Logf("SD: \n%+v", g.StdDev) // Use the estimated mean and sd to generate a seed GMM. gmm = RandomModel(g.Mean, g.StdDev, numComp, "mygmm", 99) t.Logf("Random GMM: %+v.", gmm) t.Logf("Component 0: %+v.", gmm.Components[0]) t.Logf("Component 1: %+v.", gmm.Components[1]) } for iter := 0; iter < numIter; iter++ { t.Logf("Starting GMM training iteration %d.", iter) // Reset the same random number generator to make sure we use the // same observations in each iterations. r := rand.New(rand.NewSource(seed)) // Update GMM stats. for i := 0; i < numObs; i++ { rv, err := model.RandNormalVector(mean0, std0, r) if err != nil { t.Fatal(err) } gmm.UpdateOne(model.F64ToObs(rv), 1.0) rv, err = model.RandNormalVector(mean1, std1, r) if err != nil { t.Fatal(err) } gmm.UpdateOne(model.F64ToObs(rv), 1.0) } // Estimates GMM params. gmm.Estimate() t.Logf("Iter: %d", iter) t.Logf("GMM: %+v", gmm) t.Logf("Weights: \n%+v", gmm.Weights) t.Logf("Likelihood: %f", gmm.Likelihood) t.Logf("Num Samples: %f", gmm.NSamples) for _, c := range gmm.Components { t.Logf("%s: Mean: \n%+v", c.Name(), c.Mean) t.Logf("%s: STD: \n%+v", c.Name(), c.StdDev) } // Prepare for next iteration. gmm.Clear() } for i := 0; i < dim; i++ { g := gmm.Components[1] if !gjoa.Comparef64(mean0[i], g.Mean[i], epsilon) { t.Errorf("Wrong Mean[%d]. Expected: [%f], Got: [%f]", i, mean0[i], g.Mean[i]) } if !gjoa.Comparef64(std0[i], g.StdDev[i], epsilon) { t.Errorf("Wrong STD[%d]. Expected: [%f], Got: [%f]", i, std0[i], g.StdDev[i]) } } for i := 0; i < dim; i++ { g := gmm.Components[0] if !gjoa.Comparef64(mean1[i], g.Mean[i], epsilon) { t.Errorf("Wrong Mean[%d]. Expected: [%f], Got: [%f]", i, mean1[i], g.Mean[i]) } if !gjoa.Comparef64(std1[i], g.StdDev[i], epsilon) { t.Errorf("Wrong STD[%d]. Expected: [%f], Got: [%f]", i, std1[i], g.StdDev[i]) } } if !gjoa.Comparef64(0.5, gmm.Weights[0], epsilon) { t.Errorf("Wrong weights[0]. Expected: [%f], Got: [%f]", 0.5, gmm.Weights[0]) } if !gjoa.Comparef64(0.5, gmm.Weights[1], epsilon) { t.Errorf("Wrong weights[0]. Expected: [%f], Got: [%f]", 0.5, gmm.Weights[1]) } }
// should be equivalent to training a single gaussian, great for debugging. func TestSingleState(t *testing.T) { // HMM to generate data. g01 := gm.NewModel(1, gm.Name("g01"), gm.Mean([]float64{0}), gm.StdDev([]float64{1})) h0 := narray.New(3, 3) h0.Set(1, 0, 1) h0.Set(.8, 1, 1) h0.Set(.2, 1, 2) h0 = narray.Log(nil, h0.Copy()) ms0, _ := NewSet() net0, e0 := ms0.NewNet("hmm", h0, []model.Modeler{nil, g01, nil}) fatalIf(t, e0) hmm0 := NewModel(OSet(ms0)) _ = hmm0 // Create gaussian to estimate without using the HMM code. g := gm.NewModel(1, gm.Name("g1"), gm.Mean([]float64{-1}), gm.StdDev([]float64{2})) // Create initial HMM and estimate params from generated data. g1 := gm.NewModel(1, gm.Name("g1"), gm.Mean([]float64{-1}), gm.StdDev([]float64{2})) h := narray.New(3, 3) h.Set(1, 0, 1) h.Set(.5, 1, 1) h.Set(.5, 1, 2) h = narray.Log(nil, h.Copy()) ms, _ = NewSet() net, e := ms.NewNet("hmm", h, []model.Modeler{nil, g1, nil}) fatalIf(t, e) hmm := NewModel(OSet(ms), UpdateTP(true), UpdateOP(true)) iter := 5 // number of sequences m := 1000 numFrames := 0 t0 := time.Now() // Start timer. for i := 0; i < iter; i++ { t.Logf("iter [%d]", i) // Make sure we generate the same data in each iteration. r := rand.New(rand.NewSource(33)) gen := newGenerator(r, false, net0) // Reset all counters. hmm.Clear() g.Clear() // fix the seed to get the same sequence for j := 0; j < m; j++ { obs, states := gen.next("oid-" + fi(j)) numFrames += len(states) - 2 hmm.UpdateOne(obs, 1.0) // Update Gaussian for _, o := range obs.ValueAsSlice() { vec := o.([]float64) gobs := model.NewFloatObs(vec, model.SimpleLabel("")) g.UpdateOne(gobs, 1.0) } } hmm.Estimate() g.Estimate() t.Logf("iter:%d, hmm g1: %+v", i, net.B[1]) t.Logf("iter:%d, direct g1:%+v", i, g) } dur := time.Now().Sub(t0) tp0 := narray.Exp(nil, h0.Copy()) tp := narray.Exp(nil, net.A.Copy()) ns := tp.Shape[0] for i := 0; i < ns; i++ { for j := 0; j < ns; j++ { p0 := tp0.At(i, j) logp0 := h0.At(i, j) p := tp.At(i, j) logp := h.At(i, j) if p > smallNumber || p0 > smallNumber { t.Logf("TP: %d=>%d, p0:%5.2f, p:%5.2f, logp0:%8.5f, logp:%8.5f", i, j, p0, p, logp0, logp) } } } t.Log("") t.Logf("hmm0 g1:%+v", net0.B[1]) t.Logf("hmm g1: %+v", net.B[1]) t.Log("") t.Logf("direct g1:%+v", g) // Print time stats. t.Log("") t.Logf("Total time: %v", dur) t.Logf("Time per iteration: %v", dur/time.Duration(iter)) t.Logf("Time per frame: %v", dur/time.Duration(iter*numFrames*m)) gjoa.CompareSliceFloat(t, tp0.Data, tp.Data, "error in Trans Probs [0]", .03) CompareGaussians(t, net0.B[1].(*gm.Model), net.B[1].(*gm.Model), 0.03) if t.Failed() { t.FailNow() } // Recognize. sg := ms.SearchGraph() dec, e := graph.NewDecoder(sg) if e != nil { t.Fatal(e) } r := rand.New(rand.NewSource(5151)) gen := newGenerator(r, true, net0) // testDecoder(t, gen, dec, 1000) testDecoder(t, gen, dec, 10) }
func TestHMMGauss(t *testing.T) { // Create reference HMM to generate observations. g01 := gm.NewModel(1, gm.Name("g01"), gm.Mean([]float64{0}), gm.StdDev([]float64{1})) g02 := gm.NewModel(1, gm.Name("g02"), gm.Mean([]float64{16}), gm.StdDev([]float64{2})) h0 := narray.New(4, 4) h0.Set(.6, 0, 1) h0.Set(.4, 0, 2) h0.Set(.9, 1, 1) h0.Set(.1, 1, 2) h0.Set(.7, 2, 2) h0.Set(.3, 2, 3) h0 = narray.Log(nil, h0.Copy()) ms0, _ := NewSet() net0, e0 := ms0.NewNet("hmm", h0, []model.Modeler{nil, g01, g02, nil}) fatalIf(t, e0) hmm0 := NewModel(OSet(ms0), UpdateTP(true), UpdateOP(true)) _ = hmm0 // Create random HMM and estimate params from obs. g1 := gm.NewModel(1, gm.Name("g1"), gm.Mean([]float64{-1}), gm.StdDev([]float64{2})) g2 := gm.NewModel(1, gm.Name("g2"), gm.Mean([]float64{18}), gm.StdDev([]float64{4})) h := narray.New(4, 4) h.Set(.5, 0, 1) h.Set(.5, 0, 2) h.Set(.5, 1, 1) h.Set(.5, 1, 2) h.Set(.5, 2, 2) h.Set(.5, 2, 3) h = narray.Log(nil, h.Copy()) ms, _ = NewSet() net, e := ms.NewNet("hmm", h, []model.Modeler{nil, g1, g2, nil}) fatalIf(t, e) hmm := NewModel(OSet(ms), UpdateTP(true), UpdateOP(true)) iter := 10 // number of sequences m := 500 numFrames := 0 t0 := time.Now() // Start timer. for i := 0; i < iter; i++ { t.Logf("iter [%d]", i) // Make sure we generate the same data in each iteration. r := rand.New(rand.NewSource(33)) gen := newGenerator(r, false, net0) // Reset all counters. hmm.Clear() // fix the seed to get the same sequence for j := 0; j < m; j++ { obs, states := gen.next("oid-" + fi(j)) numFrames += len(states) - 2 hmm.UpdateOne(obs, 1.0) } hmm.Estimate() } dur := time.Now().Sub(t0) tp0 := narray.Exp(nil, h0.Copy()) tp := narray.Exp(nil, net.A.Copy()) ns := tp.Shape[0] for i := 0; i < ns; i++ { for j := 0; j < ns; j++ { p0 := tp0.At(i, j) logp0 := h0.At(i, j) p := tp.At(i, j) logp := h.At(i, j) if p > smallNumber || p0 > smallNumber { t.Logf("TP: %d=>%d, p0:%5.2f, p:%5.2f, logp0:%8.5f, logp:%8.5f", i, j, p0, p, logp0, logp) } } } t.Log("") t.Logf("hmm0 g1:%+v, g2:%+v", net0.B[1], net0.B[2]) t.Logf("hmm g1: %+v, g2:%+v", net.B[1], net.B[2]) // Print time stats. t.Log("") t.Logf("Total time: %v", dur) t.Logf("Time per iteration: %v", dur/time.Duration(iter)) t.Logf("Time per frame: %v", dur/time.Duration(iter*numFrames*m)) gjoa.CompareSliceFloat(t, tp0.Data, tp.Data, "error in Trans Probs [0]", .03) CompareGaussians(t, net0.B[1].(*gm.Model), net.B[1].(*gm.Model), 0.03) CompareGaussians(t, net0.B[2].(*gm.Model), net.B[2].(*gm.Model), 0.03) if t.Failed() { t.FailNow() } // Recognize. g := ms.SearchGraph() dec, e := graph.NewDecoder(g) if e != nil { t.Fatal(e) } r := rand.New(rand.NewSource(5151)) gen := newGenerator(r, true, net0) testDecoder(t, gen, dec, 1000) }