func TestRandomIntFromDist(t *testing.T) { dist := []float64{0.1, 0.2, 0.3, 0.4} r := rand.New(rand.NewSource(33)) res1 := make([]int, 100, 100) for range res1 { res1 = append(res1, RandIntFromDist(dist, r)) } // Checking that experiments are repeatable. r = rand.New(rand.NewSource(33)) res2 := make([]int, 100, 100) for range res2 { res2 = append(res2, RandIntFromDist(dist, r)) } gjoa.CompareSliceInt(t, res1, res1, "sequence mismatch") r = rand.New(rand.NewSource(33)) res := make(map[int]float64) var n float64 = 100000 for i := 0.0; i < n; i++ { res[RandIntFromDist(dist, r)]++ } actual := make([]float64, len(dist), len(dist)) for k, v := range res { p := v / n t.Log(k, v, p) actual[k] = p } gjoa.CompareSliceFloat(t, dist, actual, "probs don't match, error in RandIntFromDist", 0.02) // same with log probs logDist := make([]float64, len(dist), len(dist)) for k, v := range dist { logDist[k] = math.Log(v) } r = rand.New(rand.NewSource(33)) res = make(map[int]float64) for i := 0.0; i < n; i++ { res[RandIntFromLogDist(logDist, r)]++ } for k, v := range res { p := v / n t.Log(k, v, p) actual[k] = p } gjoa.CompareSliceFloat(t, dist, actual, "probs don't match, error in RandIntFromLogDist", 0.02) }
func TestWriteReadGMM(t *testing.T) { dim := 2 numComp := 2 numIter := 10 numObs := 10000 gmm0 := MakeGMM(t) gmm := NewModel(dim, numComp) t.Logf("Initial Weights: \n%+v", gmm.Weights) mean01 := []float64{2.5, 3} sd01 := []float64{0.70710678118, 0.70710678118} gmm = RandomModel(mean01, sd01, numComp, "mygmm", 99) for iter := 0; iter < numIter; iter++ { t.Logf("Starting GMM training iteration %d.", iter) gmm.Clear() for i := 0; i < numObs; i++ { rv := gmm0.Sample() gmm.UpdateOne(rv, 1.0) } gmm.Estimate() t.Logf("Iter: %d", iter) } // Write model. fn := os.TempDir() + "gmm.json" t.Logf("Wrote to temp file: %s\n", fn) gmm.WriteFile(fn) /* x, e1 := gmm.ReadFile(fn) if e1 != nil { t.Fatal(e1) } gmm1 := x.(*GMM) */ // Create another Gaussian model. gmm1, e1 := ReadFile(fn) if e1 != nil { t.Fatal(e1) } // Compare gmm and gmm1 for k, v := range gmm.Components { CompareGaussians(t, v, gmm1.Components[k], epsilon) } gjoa.CompareSliceFloat(t, gmm.Weights, gmm1.Weights, "Weights don't match.", epsilon) gjoa.CompareSliceFloat(t, gmm.PosteriorSum, gmm1.PosteriorSum, "PosteriorSum doesn't match.", epsilon) }
func TestCloneGaussian(t *testing.T) { dim := 8 mean := []float64{0.1, 0.2, 0.3, 0.4, 1, 1, 1, 1} std := []float64{0.5, 0.5, 0.5, 0.5, 0.1, 0.2, 0.3, 0.4} g := NewModel(dim, Name("test cloning")) r := rand.New(rand.NewSource(33)) for i := 0; i < 2000; i++ { rv := model.RandNormalVector(r, mean, std) g.UpdateOne(model.F64ToObs(rv, ""), 1.0) } g.Estimate() ng := NewModel(g.ModelDim, Clone(g)) // compare g vs. ng type table struct { v1, v2 []float64 name string } tab := []table{ table{g.Sumx, ng.Sumx, "Sumx"}, table{g.Sumxsq, ng.Sumxsq, "Sumxsq"}, table{g.Mean, ng.Mean, "Mean"}, table{g.StdDev, ng.StdDev, "StdDev"}, table{g.variance, ng.variance, "variance"}, table{g.varianceInv, ng.varianceInv, "varianceInv"}, table{[]float64{g.const1}, []float64{ng.const1}, "const1"}, table{[]float64{g.const2}, []float64{ng.const2}, "const2"}, } // compare slices for _, v := range tab { gjoa.CompareSliceFloat(t, v.v1, v.v2, "no match: "+v.name, 0.00001) } // if ng.BaseModel.Model == g.BaseModel.Model { // t.Fatalf("Modeler is the same.") // } if ng.NSamples != g.NSamples { t.Fatalf("NSamples doesn't match.") } }
func CompareGaussians(t *testing.T, g1 *gaussian.Model, g2 *gaussian.Model, epsilon float64) { gjoa.CompareSliceFloat(t, g1.Mean, g2.Mean, "Wrong Mean", epsilon) gjoa.CompareSliceFloat(t, g1.StdDev, g2.StdDev, "Wrong SD", epsilon) }
func CompareGaussians(t *testing.T, g1 *Model, g2 *Model, tolerance float64) { gjoa.CompareSliceFloat(t, g1.Mean, g2.Mean, "Wrong Mean", tolerance) gjoa.CompareSliceFloat(t, g1.StdDev, g2.StdDev, "Wrong SD", tolerance) }
// should be equivalent to training a single gaussian, great for debugging. func TestSingleState(t *testing.T) { // HMM to generate data. g01 := gm.NewModel(1, gm.Name("g01"), gm.Mean([]float64{0}), gm.StdDev([]float64{1})) h0 := narray.New(3, 3) h0.Set(1, 0, 1) h0.Set(.8, 1, 1) h0.Set(.2, 1, 2) h0 = narray.Log(nil, h0.Copy()) ms0, _ := NewSet() net0, e0 := ms0.NewNet("hmm", h0, []model.Modeler{nil, g01, nil}) fatalIf(t, e0) hmm0 := NewModel(OSet(ms0)) _ = hmm0 // Create gaussian to estimate without using the HMM code. g := gm.NewModel(1, gm.Name("g1"), gm.Mean([]float64{-1}), gm.StdDev([]float64{2})) // Create initial HMM and estimate params from generated data. g1 := gm.NewModel(1, gm.Name("g1"), gm.Mean([]float64{-1}), gm.StdDev([]float64{2})) h := narray.New(3, 3) h.Set(1, 0, 1) h.Set(.5, 1, 1) h.Set(.5, 1, 2) h = narray.Log(nil, h.Copy()) ms, _ = NewSet() net, e := ms.NewNet("hmm", h, []model.Modeler{nil, g1, nil}) fatalIf(t, e) hmm := NewModel(OSet(ms), UpdateTP(true), UpdateOP(true)) iter := 5 // number of sequences m := 1000 numFrames := 0 t0 := time.Now() // Start timer. for i := 0; i < iter; i++ { t.Logf("iter [%d]", i) // Make sure we generate the same data in each iteration. r := rand.New(rand.NewSource(33)) gen := newGenerator(r, false, net0) // Reset all counters. hmm.Clear() g.Clear() // fix the seed to get the same sequence for j := 0; j < m; j++ { obs, states := gen.next("oid-" + fi(j)) numFrames += len(states) - 2 hmm.UpdateOne(obs, 1.0) // Update Gaussian for _, o := range obs.ValueAsSlice() { vec := o.([]float64) gobs := model.NewFloatObs(vec, model.SimpleLabel("")) g.UpdateOne(gobs, 1.0) } } hmm.Estimate() g.Estimate() t.Logf("iter:%d, hmm g1: %+v", i, net.B[1]) t.Logf("iter:%d, direct g1:%+v", i, g) } dur := time.Now().Sub(t0) tp0 := narray.Exp(nil, h0.Copy()) tp := narray.Exp(nil, net.A.Copy()) ns := tp.Shape[0] for i := 0; i < ns; i++ { for j := 0; j < ns; j++ { p0 := tp0.At(i, j) logp0 := h0.At(i, j) p := tp.At(i, j) logp := h.At(i, j) if p > smallNumber || p0 > smallNumber { t.Logf("TP: %d=>%d, p0:%5.2f, p:%5.2f, logp0:%8.5f, logp:%8.5f", i, j, p0, p, logp0, logp) } } } t.Log("") t.Logf("hmm0 g1:%+v", net0.B[1]) t.Logf("hmm g1: %+v", net.B[1]) t.Log("") t.Logf("direct g1:%+v", g) // Print time stats. t.Log("") t.Logf("Total time: %v", dur) t.Logf("Time per iteration: %v", dur/time.Duration(iter)) t.Logf("Time per frame: %v", dur/time.Duration(iter*numFrames*m)) gjoa.CompareSliceFloat(t, tp0.Data, tp.Data, "error in Trans Probs [0]", .03) CompareGaussians(t, net0.B[1].(*gm.Model), net.B[1].(*gm.Model), 0.03) if t.Failed() { t.FailNow() } // Recognize. sg := ms.SearchGraph() dec, e := graph.NewDecoder(sg) if e != nil { t.Fatal(e) } r := rand.New(rand.NewSource(5151)) gen := newGenerator(r, true, net0) // testDecoder(t, gen, dec, 1000) testDecoder(t, gen, dec, 10) }
func TestHMMGauss(t *testing.T) { // Create reference HMM to generate observations. g01 := gm.NewModel(1, gm.Name("g01"), gm.Mean([]float64{0}), gm.StdDev([]float64{1})) g02 := gm.NewModel(1, gm.Name("g02"), gm.Mean([]float64{16}), gm.StdDev([]float64{2})) h0 := narray.New(4, 4) h0.Set(.6, 0, 1) h0.Set(.4, 0, 2) h0.Set(.9, 1, 1) h0.Set(.1, 1, 2) h0.Set(.7, 2, 2) h0.Set(.3, 2, 3) h0 = narray.Log(nil, h0.Copy()) ms0, _ := NewSet() net0, e0 := ms0.NewNet("hmm", h0, []model.Modeler{nil, g01, g02, nil}) fatalIf(t, e0) hmm0 := NewModel(OSet(ms0), UpdateTP(true), UpdateOP(true)) _ = hmm0 // Create random HMM and estimate params from obs. g1 := gm.NewModel(1, gm.Name("g1"), gm.Mean([]float64{-1}), gm.StdDev([]float64{2})) g2 := gm.NewModel(1, gm.Name("g2"), gm.Mean([]float64{18}), gm.StdDev([]float64{4})) h := narray.New(4, 4) h.Set(.5, 0, 1) h.Set(.5, 0, 2) h.Set(.5, 1, 1) h.Set(.5, 1, 2) h.Set(.5, 2, 2) h.Set(.5, 2, 3) h = narray.Log(nil, h.Copy()) ms, _ = NewSet() net, e := ms.NewNet("hmm", h, []model.Modeler{nil, g1, g2, nil}) fatalIf(t, e) hmm := NewModel(OSet(ms), UpdateTP(true), UpdateOP(true)) iter := 10 // number of sequences m := 500 numFrames := 0 t0 := time.Now() // Start timer. for i := 0; i < iter; i++ { t.Logf("iter [%d]", i) // Make sure we generate the same data in each iteration. r := rand.New(rand.NewSource(33)) gen := newGenerator(r, false, net0) // Reset all counters. hmm.Clear() // fix the seed to get the same sequence for j := 0; j < m; j++ { obs, states := gen.next("oid-" + fi(j)) numFrames += len(states) - 2 hmm.UpdateOne(obs, 1.0) } hmm.Estimate() } dur := time.Now().Sub(t0) tp0 := narray.Exp(nil, h0.Copy()) tp := narray.Exp(nil, net.A.Copy()) ns := tp.Shape[0] for i := 0; i < ns; i++ { for j := 0; j < ns; j++ { p0 := tp0.At(i, j) logp0 := h0.At(i, j) p := tp.At(i, j) logp := h.At(i, j) if p > smallNumber || p0 > smallNumber { t.Logf("TP: %d=>%d, p0:%5.2f, p:%5.2f, logp0:%8.5f, logp:%8.5f", i, j, p0, p, logp0, logp) } } } t.Log("") t.Logf("hmm0 g1:%+v, g2:%+v", net0.B[1], net0.B[2]) t.Logf("hmm g1: %+v, g2:%+v", net.B[1], net.B[2]) // Print time stats. t.Log("") t.Logf("Total time: %v", dur) t.Logf("Time per iteration: %v", dur/time.Duration(iter)) t.Logf("Time per frame: %v", dur/time.Duration(iter*numFrames*m)) gjoa.CompareSliceFloat(t, tp0.Data, tp.Data, "error in Trans Probs [0]", .03) CompareGaussians(t, net0.B[1].(*gm.Model), net.B[1].(*gm.Model), 0.03) CompareGaussians(t, net0.B[2].(*gm.Model), net.B[2].(*gm.Model), 0.03) if t.Failed() { t.FailNow() } // Recognize. g := ms.SearchGraph() dec, e := graph.NewDecoder(g) if e != nil { t.Fatal(e) } r := rand.New(rand.NewSource(5151)) gen := newGenerator(r, true, net0) testDecoder(t, gen, dec, 1000) }
func TestSeqObserver(t *testing.T) { dim := 4 maxSeqLen := 10 numObs := 12 bufSize := 1000000 r := rand.New(rand.NewSource(666)) // Test non sequence data. reader := makeObsData(r, numObs, dim, 1) data := reader.(*obsReader).data // use to assert obs, err := NewSeqObserver(reader) if err != nil { t.Fatal(err) } c, e2 := obs.ObsChan() if e2 != nil { t.Fatal(e2) } i := 0 for v := range c { if len(v.Value().([]float64)) != len(data[i].Vectors[0]) { t.Fatalf("length mismatch - got %d, expected %d", len(v.Value().([]float64)), len(data[i].Vectors[0])) } for j, exp := range data[i].Vectors[0] { got := v.Value().([]float64)[j] if exp != got { t.Logf("got: %5.3f, expected: %5.3f", exp, got) } } i++ } // Test sequence data. reader = makeObsData(r, numObs, dim, maxSeqLen) data = reader.(*obsReader).data // use to assert bufReader := bufio.NewReaderSize(reader, bufSize) obs, err = NewSeqObserver(bufReader) if err != nil { t.Fatal(err) } c, e2 = obs.ObsChan() if e2 != nil { t.Fatal(e2) } i = 0 for v := range c { if len(v.Value().([][]float64)) != len(data[i].Vectors) { t.Fatalf("length mismatch - i:%d, got:%d, expected:%d", i, len(v.Value().([][]float64)), len(data[i].Vectors)) } for j, exp := range data[i].Vectors { got := v.Value().([][]float64)[j] gjoa.CompareSliceFloat(t, exp, got, "value mismatch", 0.001) } i++ } // Write to a file reader = makeObsData(r, numObs, dim, maxSeqLen) data = reader.(*obsReader).data // use to assert fn := filepath.Join(os.TempDir(), "streamobs.json") w, err := os.Create(fn) if err != nil { t.Fatal(err) } io.Copy(w, reader) w.Close() // Read file. rr, ee := os.Open(fn) if ee != nil { t.Fatal(ee) } dec := json.NewDecoder(rr) for i := 0; ; i++ { var oe Seq e := dec.Decode(&oe) if e == io.EOF { break } if e != nil { t.Fatal(e) } for j, exp := range data[i].Vectors { got := oe.Vectors[j] gjoa.CompareSliceFloat(t, exp, got, "value mismatch", 0.001) } } rr.Close() // Read file and pass reader to stream observer. rr, ee = os.Open(fn) if ee != nil { t.Fatal(ee) } obs, err = NewSeqObserver(rr) if err != nil { t.Fatal(err) } c, e2 = obs.ObsChan() if e2 != nil { t.Fatal(e2) } i = 0 for v := range c { if len(v.Value().([][]float64)) != len(data[i].Vectors) { t.Fatalf("length mismatch - i:%d, got:%d, expected:%d", i, len(v.Value().([][]float64)), len(data[i].Vectors)) } for j, exp := range data[i].Vectors { got := v.Value().([][]float64)[j] gjoa.CompareSliceFloat(t, exp, got, "value mismatch", 0.001) } i++ } err = obs.Close() if err != nil { t.Fatal(err) } }