Example #1
0
func TestRandomIntFromDist(t *testing.T) {

	dist := []float64{0.1, 0.2, 0.3, 0.4}

	r := rand.New(rand.NewSource(33))
	res1 := make([]int, 100, 100)
	for range res1 {
		res1 = append(res1, RandIntFromDist(dist, r))
	}

	// Checking that experiments are repeatable.
	r = rand.New(rand.NewSource(33))
	res2 := make([]int, 100, 100)
	for range res2 {
		res2 = append(res2, RandIntFromDist(dist, r))
	}

	gjoa.CompareSliceInt(t, res1, res1, "sequence mismatch")

	r = rand.New(rand.NewSource(33))
	res := make(map[int]float64)
	var n float64 = 100000
	for i := 0.0; i < n; i++ {
		res[RandIntFromDist(dist, r)]++
	}

	actual := make([]float64, len(dist), len(dist))
	for k, v := range res {
		p := v / n
		t.Log(k, v, p)
		actual[k] = p
	}

	gjoa.CompareSliceFloat(t, dist, actual, "probs don't match, error in RandIntFromDist", 0.02)

	// same with log probs
	logDist := make([]float64, len(dist), len(dist))
	for k, v := range dist {
		logDist[k] = math.Log(v)
	}

	r = rand.New(rand.NewSource(33))
	res = make(map[int]float64)
	for i := 0.0; i < n; i++ {
		res[RandIntFromLogDist(logDist, r)]++
	}

	for k, v := range res {
		p := v / n
		t.Log(k, v, p)
		actual[k] = p
	}

	gjoa.CompareSliceFloat(t, dist, actual, "probs don't match, error in RandIntFromLogDist", 0.02)

}
Example #2
0
func TestWriteReadGMM(t *testing.T) {
	dim := 2
	numComp := 2
	numIter := 10
	numObs := 10000
	gmm0 := MakeGMM(t)
	gmm := NewModel(dim, numComp)
	t.Logf("Initial Weights: \n%+v", gmm.Weights)
	mean01 := []float64{2.5, 3}
	sd01 := []float64{0.70710678118, 0.70710678118}
	gmm = RandomModel(mean01, sd01, numComp, "mygmm", 99)
	for iter := 0; iter < numIter; iter++ {
		t.Logf("Starting GMM training iteration %d.", iter)
		gmm.Clear()

		for i := 0; i < numObs; i++ {
			rv := gmm0.Sample()
			gmm.UpdateOne(rv, 1.0)
		}
		gmm.Estimate()

		t.Logf("Iter: %d", iter)
	}

	// Write model.
	fn := os.TempDir() + "gmm.json"
	t.Logf("Wrote to temp file: %s\n", fn)
	gmm.WriteFile(fn)
	/*
		x, e1 := gmm.ReadFile(fn)
		if e1 != nil {
			t.Fatal(e1)
		}
		gmm1 := x.(*GMM)
	*/

	// Create another Gaussian model.
	gmm1, e1 := ReadFile(fn)
	if e1 != nil {
		t.Fatal(e1)
	}

	// Compare gmm and gmm1
	for k, v := range gmm.Components {
		CompareGaussians(t, v, gmm1.Components[k], epsilon)
	}
	gjoa.CompareSliceFloat(t, gmm.Weights, gmm1.Weights, "Weights don't match.", epsilon)
	gjoa.CompareSliceFloat(t, gmm.PosteriorSum, gmm1.PosteriorSum, "PosteriorSum doesn't match.", epsilon)
}
Example #3
0
func TestCloneGaussian(t *testing.T) {

	dim := 8
	mean := []float64{0.1, 0.2, 0.3, 0.4, 1, 1, 1, 1}
	std := []float64{0.5, 0.5, 0.5, 0.5, 0.1, 0.2, 0.3, 0.4}
	g := NewModel(dim, Name("test cloning"))

	r := rand.New(rand.NewSource(33))
	for i := 0; i < 2000; i++ {
		rv := model.RandNormalVector(r, mean, std)
		g.UpdateOne(model.F64ToObs(rv, ""), 1.0)
	}
	g.Estimate()

	ng := NewModel(g.ModelDim, Clone(g))

	// compare g vs. ng
	type table struct {
		v1, v2 []float64
		name   string
	}

	tab := []table{
		table{g.Sumx, ng.Sumx, "Sumx"},
		table{g.Sumxsq, ng.Sumxsq, "Sumxsq"},
		table{g.Mean, ng.Mean, "Mean"},
		table{g.StdDev, ng.StdDev, "StdDev"},
		table{g.variance, ng.variance, "variance"},
		table{g.varianceInv, ng.varianceInv, "varianceInv"},
		table{[]float64{g.const1}, []float64{ng.const1}, "const1"},
		table{[]float64{g.const2}, []float64{ng.const2}, "const2"},
	}

	// compare slices
	for _, v := range tab {
		gjoa.CompareSliceFloat(t, v.v1, v.v2, "no match: "+v.name, 0.00001)
	}

	//	if ng.BaseModel.Model == g.BaseModel.Model {
	//		t.Fatalf("Modeler is the same.")
	//	}
	if ng.NSamples != g.NSamples {
		t.Fatalf("NSamples doesn't match.")
	}
}
Example #4
0
func CompareGaussians(t *testing.T, g1 *gaussian.Model, g2 *gaussian.Model, epsilon float64) {
	gjoa.CompareSliceFloat(t, g1.Mean, g2.Mean, "Wrong Mean", epsilon)
	gjoa.CompareSliceFloat(t, g1.StdDev, g2.StdDev, "Wrong SD", epsilon)
}
Example #5
0
func CompareGaussians(t *testing.T, g1 *Model, g2 *Model, tolerance float64) {
	gjoa.CompareSliceFloat(t, g1.Mean, g2.Mean, "Wrong Mean", tolerance)
	gjoa.CompareSliceFloat(t, g1.StdDev, g2.StdDev, "Wrong SD", tolerance)
}
Example #6
0
// should be equivalent to training a single gaussian, great for debugging.
func TestSingleState(t *testing.T) {

	// HMM to generate data.
	g01 := gm.NewModel(1, gm.Name("g01"), gm.Mean([]float64{0}), gm.StdDev([]float64{1}))

	h0 := narray.New(3, 3)
	h0.Set(1, 0, 1)
	h0.Set(.8, 1, 1)
	h0.Set(.2, 1, 2)
	h0 = narray.Log(nil, h0.Copy())

	ms0, _ := NewSet()
	net0, e0 := ms0.NewNet("hmm", h0,
		[]model.Modeler{nil, g01, nil})
	fatalIf(t, e0)
	hmm0 := NewModel(OSet(ms0))
	_ = hmm0

	// Create gaussian to estimate without using the HMM code.
	g := gm.NewModel(1, gm.Name("g1"), gm.Mean([]float64{-1}), gm.StdDev([]float64{2}))

	// Create initial HMM and estimate params from generated data.
	g1 := gm.NewModel(1, gm.Name("g1"), gm.Mean([]float64{-1}), gm.StdDev([]float64{2}))

	h := narray.New(3, 3)
	h.Set(1, 0, 1)
	h.Set(.5, 1, 1)
	h.Set(.5, 1, 2)
	h = narray.Log(nil, h.Copy())

	ms, _ = NewSet()
	net, e := ms.NewNet("hmm", h,
		[]model.Modeler{nil, g1, nil})
	fatalIf(t, e)
	hmm := NewModel(OSet(ms), UpdateTP(true), UpdateOP(true))

	iter := 5
	// number of sequences
	m := 1000
	numFrames := 0
	t0 := time.Now() // Start timer.
	for i := 0; i < iter; i++ {
		t.Logf("iter [%d]", i)

		// Make sure we generate the same data in each iteration.
		r := rand.New(rand.NewSource(33))
		gen := newGenerator(r, false, net0)

		// Reset all counters.
		hmm.Clear()
		g.Clear()

		// fix the seed to get the same sequence
		for j := 0; j < m; j++ {
			obs, states := gen.next("oid-" + fi(j))
			numFrames += len(states) - 2
			hmm.UpdateOne(obs, 1.0)

			// Update Gaussian
			for _, o := range obs.ValueAsSlice() {
				vec := o.([]float64)
				gobs := model.NewFloatObs(vec, model.SimpleLabel(""))
				g.UpdateOne(gobs, 1.0)
			}
		}
		hmm.Estimate()
		g.Estimate()
		t.Logf("iter:%d, hmm g1:   %+v", i, net.B[1])
		t.Logf("iter:%d, direct g1:%+v", i, g)
	}
	dur := time.Now().Sub(t0)
	tp0 := narray.Exp(nil, h0.Copy())
	tp := narray.Exp(nil, net.A.Copy())
	ns := tp.Shape[0]
	for i := 0; i < ns; i++ {
		for j := 0; j < ns; j++ {
			p0 := tp0.At(i, j)
			logp0 := h0.At(i, j)
			p := tp.At(i, j)
			logp := h.At(i, j)
			if p > smallNumber || p0 > smallNumber {
				t.Logf("TP: %d=>%d, p0:%5.2f, p:%5.2f, logp0:%8.5f, logp:%8.5f", i, j, p0, p, logp0, logp)
			}
		}
	}

	t.Log("")
	t.Logf("hmm0 g1:%+v", net0.B[1])
	t.Logf("hmm  g1: %+v", net.B[1])

	t.Log("")
	t.Logf("direct g1:%+v", g)

	// Print time stats.
	t.Log("")
	t.Logf("Total time: %v", dur)
	t.Logf("Time per iteration: %v", dur/time.Duration(iter))
	t.Logf("Time per frame: %v", dur/time.Duration(iter*numFrames*m))

	gjoa.CompareSliceFloat(t, tp0.Data, tp.Data,
		"error in Trans Probs [0]", .03)

	CompareGaussians(t, net0.B[1].(*gm.Model), net.B[1].(*gm.Model), 0.03)

	if t.Failed() {
		t.FailNow()
	}

	// Recognize.
	sg := ms.SearchGraph()

	dec, e := graph.NewDecoder(sg)
	if e != nil {
		t.Fatal(e)
	}

	r := rand.New(rand.NewSource(5151))
	gen := newGenerator(r, true, net0)
	//	testDecoder(t, gen, dec, 1000)
	testDecoder(t, gen, dec, 10)
}
Example #7
0
func TestHMMGauss(t *testing.T) {

	// Create reference HMM to generate observations.

	g01 := gm.NewModel(1, gm.Name("g01"), gm.Mean([]float64{0}), gm.StdDev([]float64{1}))
	g02 := gm.NewModel(1, gm.Name("g02"), gm.Mean([]float64{16}), gm.StdDev([]float64{2}))

	h0 := narray.New(4, 4)
	h0.Set(.6, 0, 1)
	h0.Set(.4, 0, 2)
	h0.Set(.9, 1, 1)
	h0.Set(.1, 1, 2)
	h0.Set(.7, 2, 2)
	h0.Set(.3, 2, 3)
	h0 = narray.Log(nil, h0.Copy())

	ms0, _ := NewSet()
	net0, e0 := ms0.NewNet("hmm", h0,
		[]model.Modeler{nil, g01, g02, nil})
	fatalIf(t, e0)
	hmm0 := NewModel(OSet(ms0), UpdateTP(true), UpdateOP(true))
	_ = hmm0

	// Create random HMM and estimate params from obs.

	g1 := gm.NewModel(1, gm.Name("g1"), gm.Mean([]float64{-1}), gm.StdDev([]float64{2}))
	g2 := gm.NewModel(1, gm.Name("g2"), gm.Mean([]float64{18}), gm.StdDev([]float64{4}))

	h := narray.New(4, 4)
	h.Set(.5, 0, 1)
	h.Set(.5, 0, 2)
	h.Set(.5, 1, 1)
	h.Set(.5, 1, 2)
	h.Set(.5, 2, 2)
	h.Set(.5, 2, 3)
	h = narray.Log(nil, h.Copy())

	ms, _ = NewSet()
	net, e := ms.NewNet("hmm", h,
		[]model.Modeler{nil, g1, g2, nil})
	fatalIf(t, e)
	hmm := NewModel(OSet(ms), UpdateTP(true), UpdateOP(true))

	iter := 10
	// number of sequences
	m := 500
	numFrames := 0
	t0 := time.Now() // Start timer.
	for i := 0; i < iter; i++ {
		t.Logf("iter [%d]", i)

		// Make sure we generate the same data in each iteration.
		r := rand.New(rand.NewSource(33))
		gen := newGenerator(r, false, net0)

		// Reset all counters.
		hmm.Clear()

		// fix the seed to get the same sequence
		for j := 0; j < m; j++ {
			obs, states := gen.next("oid-" + fi(j))
			numFrames += len(states) - 2
			hmm.UpdateOne(obs, 1.0)
		}
		hmm.Estimate()
	}
	dur := time.Now().Sub(t0)
	tp0 := narray.Exp(nil, h0.Copy())
	tp := narray.Exp(nil, net.A.Copy())
	ns := tp.Shape[0]
	for i := 0; i < ns; i++ {
		for j := 0; j < ns; j++ {
			p0 := tp0.At(i, j)
			logp0 := h0.At(i, j)
			p := tp.At(i, j)
			logp := h.At(i, j)
			if p > smallNumber || p0 > smallNumber {
				t.Logf("TP: %d=>%d, p0:%5.2f, p:%5.2f, logp0:%8.5f, logp:%8.5f", i, j, p0, p, logp0, logp)
			}
		}
	}

	t.Log("")
	t.Logf("hmm0 g1:%+v, g2:%+v", net0.B[1], net0.B[2])
	t.Logf("hmm  g1: %+v, g2:%+v", net.B[1], net.B[2])

	// Print time stats.
	t.Log("")
	t.Logf("Total time: %v", dur)
	t.Logf("Time per iteration: %v", dur/time.Duration(iter))
	t.Logf("Time per frame: %v", dur/time.Duration(iter*numFrames*m))

	gjoa.CompareSliceFloat(t, tp0.Data, tp.Data,
		"error in Trans Probs [0]", .03)

	CompareGaussians(t, net0.B[1].(*gm.Model), net.B[1].(*gm.Model), 0.03)
	CompareGaussians(t, net0.B[2].(*gm.Model), net.B[2].(*gm.Model), 0.03)

	if t.Failed() {
		t.FailNow()
	}

	// Recognize.
	g := ms.SearchGraph()

	dec, e := graph.NewDecoder(g)
	if e != nil {
		t.Fatal(e)
	}

	r := rand.New(rand.NewSource(5151))
	gen := newGenerator(r, true, net0)
	testDecoder(t, gen, dec, 1000)
}
Example #8
0
func TestSeqObserver(t *testing.T) {

	dim := 4
	maxSeqLen := 10
	numObs := 12
	bufSize := 1000000

	r := rand.New(rand.NewSource(666))

	// Test non sequence data.
	reader := makeObsData(r, numObs, dim, 1)
	data := reader.(*obsReader).data // use to assert
	obs, err := NewSeqObserver(reader)
	if err != nil {
		t.Fatal(err)
	}
	c, e2 := obs.ObsChan()
	if e2 != nil {
		t.Fatal(e2)
	}
	i := 0
	for v := range c {
		if len(v.Value().([]float64)) != len(data[i].Vectors[0]) {
			t.Fatalf("length mismatch - got %d, expected %d", len(v.Value().([]float64)), len(data[i].Vectors[0]))
		}
		for j, exp := range data[i].Vectors[0] {
			got := v.Value().([]float64)[j]
			if exp != got {
				t.Logf("got: %5.3f, expected: %5.3f", exp, got)
			}
		}
		i++
	}

	// Test sequence data.
	reader = makeObsData(r, numObs, dim, maxSeqLen)
	data = reader.(*obsReader).data // use to assert
	bufReader := bufio.NewReaderSize(reader, bufSize)
	obs, err = NewSeqObserver(bufReader)
	if err != nil {
		t.Fatal(err)
	}
	c, e2 = obs.ObsChan()
	if e2 != nil {
		t.Fatal(e2)
	}

	i = 0
	for v := range c {

		if len(v.Value().([][]float64)) != len(data[i].Vectors) {
			t.Fatalf("length mismatch - i:%d, got:%d, expected:%d", i, len(v.Value().([][]float64)), len(data[i].Vectors))
		}
		for j, exp := range data[i].Vectors {
			got := v.Value().([][]float64)[j]
			gjoa.CompareSliceFloat(t, exp, got, "value mismatch", 0.001)
		}
		i++
	}

	// Write to a file
	reader = makeObsData(r, numObs, dim, maxSeqLen)
	data = reader.(*obsReader).data // use to assert
	fn := filepath.Join(os.TempDir(), "streamobs.json")
	w, err := os.Create(fn)
	if err != nil {
		t.Fatal(err)
	}
	io.Copy(w, reader)
	w.Close()

	// Read file.
	rr, ee := os.Open(fn)
	if ee != nil {
		t.Fatal(ee)
	}
	dec := json.NewDecoder(rr)
	for i := 0; ; i++ {
		var oe Seq
		e := dec.Decode(&oe)
		if e == io.EOF {
			break
		}
		if e != nil {
			t.Fatal(e)
		}
		for j, exp := range data[i].Vectors {
			got := oe.Vectors[j]
			gjoa.CompareSliceFloat(t, exp, got, "value mismatch", 0.001)
		}
	}
	rr.Close()

	// Read file and pass reader to stream observer.
	rr, ee = os.Open(fn)
	if ee != nil {
		t.Fatal(ee)
	}
	obs, err = NewSeqObserver(rr)
	if err != nil {
		t.Fatal(err)
	}
	c, e2 = obs.ObsChan()
	if e2 != nil {
		t.Fatal(e2)
	}
	i = 0
	for v := range c {
		if len(v.Value().([][]float64)) != len(data[i].Vectors) {
			t.Fatalf("length mismatch - i:%d, got:%d, expected:%d", i, len(v.Value().([][]float64)), len(data[i].Vectors))
		}
		for j, exp := range data[i].Vectors {
			got := v.Value().([][]float64)[j]
			gjoa.CompareSliceFloat(t, exp, got, "value mismatch", 0.001)
		}
		i++
	}
	err = obs.Close()
	if err != nil {
		t.Fatal(err)
	}
}