Ejemplo n.º 1
0
func makeHMM(t *testing.T) *Model {

	// Gaussian 1.
	mean1 := []float64{1}
	sd1 := []float64{1}
	g1 := gm.NewModel(1, gm.Name("g1"), gm.Mean(mean1), gm.StdDev(sd1))

	// Gaussian 2.
	mean2 := []float64{4}
	sd2 := []float64{2}
	g2 := gm.NewModel(1, gm.Name("g2"), gm.Mean(mean2), gm.StdDev(sd2))

	var err error
	h0 := narray.New(4, 4)
	//	h0.Set(.8, 0, 1)
	h0.Set(1, 0, 1)
	//	h0.Set(.2, 0, 2)
	h0.Set(.5, 1, 1)
	h0.Set(.5, 1, 2)
	h0.Set(.7, 2, 2)
	h0.Set(.3, 2, 3)
	h0 = narray.Log(nil, h0.Copy())

	ms, _ = NewSet()
	_, err = ms.NewNet("hmm0", h0,
		[]model.Modeler{nil, g1, g2, nil})
	fatalIf(t, err)

	return NewModel(OSet(ms), UpdateTP(true), UpdateOP(true))
}
Ejemplo n.º 2
0
func MakeGMM(t *testing.T) *Model {

	mean0 := []float64{1, 2}
	sd0 := []float64{0.3, 0.3}
	mean1 := []float64{4, 4}
	sd1 := []float64{1, 1}
	weights := []float64{0.6, 0.4}
	dim := len(mean0)

	g0 := gaussian.NewModel(2, gaussian.Name("g0"), gaussian.Mean(mean0), gaussian.StdDev(sd0))
	g1 := gaussian.NewModel(2, gaussian.Name("g1"), gaussian.Mean(mean1), gaussian.StdDev(sd1))
	components := []*gaussian.Model{g0, g1}
	gmm := NewModel(dim, 2, Name("mygmm"), Components(components), Weights(weights))
	return gmm
}
Ejemplo n.º 3
0
// creates a random gaussian by adding a perturbation to an existing gaussian.
func initGaussian(r *rand.Rand, m model.Modeler) *gm.Model {

	g := m.(*gm.Model)
	var mean, sd []float64
	for i := 0; i < g.ModelDim; i++ {
		a := r.NormFloat64()*0.2 + 1.0 // pert 0.8 to 1.2
		mean = append(mean, g.Mean[i]*a)
		sd = append(sd, g.StdDev[i]*a)
	}
	return gm.NewModel(g.ModelDim, gm.Name(g.ModelName), gm.Mean(mean), gm.StdDev(sd))
}
Ejemplo n.º 4
0
func randomGaussian(r *rand.Rand, id string, dim int) *gm.Model {

	var mean, sd []float64
	startSD := 40.0
	for i := 0; i < dim; i++ {
		mean = append(mean, float64(r.Intn(10)*100.0))
		a := r.NormFloat64()*0.2 + 1.0 // pert 0.8 to 1.2
		sd = append(sd, startSD*a)
	}
	return gm.NewModel(dim, gm.Name(id), gm.Mean(mean), gm.StdDev(sd))
}
Ejemplo n.º 5
0
// NewModel creates a new Gaussian mixture model.
func NewModel(dim, numComponents int, options ...Option) *Model {

	gmm := &Model{
		ModelName:   "GMM", // default name
		ModelDim:    dim,
		NComponents: numComponents,
		Diag:        true,
		tmpProbs:    make([]float64, numComponents),
	}
	gmm.Type = reflect.TypeOf(*gmm).String()

	// Set options.
	for _, option := range options {
		option(gmm)
	}
	if len(gmm.PosteriorSum) == 0 {
		gmm.PosteriorSum = make([]float64, gmm.NComponents)
	}

	// Create components if not provided.
	if len(gmm.Components) == 0 {
		gmm.Components = make([]*gaussian.Model, numComponents, numComponents)
		for i := range gmm.Components {
			cname := componentName(gmm.ModelName, i, gmm.NComponents)
			gmm.Components[i] = gaussian.NewModel(gmm.ModelDim, gaussian.Name(cname))
		}
	}

	// Initialize weights.
	// Caller may pass weight, log(weights), or no weights.
	switch {

	case len(gmm.LogWeights) > 0 && len(gmm.Weights) > 0:
		glog.Fatal("options not allowed: provide only one of LogWeights or Weights")

	case len(gmm.LogWeights) == 0 && len(gmm.Weights) == 0:
		gmm.LogWeights = make([]float64, numComponents)
		logw := -math.Log(float64(gmm.NComponents))
		floatx.Apply(floatx.SetValueFunc(logw), gmm.LogWeights, nil)
		gmm.Weights = make([]float64, gmm.NComponents)
		floatx.Exp(gmm.Weights, gmm.LogWeights)
		glog.Infof("init weights with equal values: %.6f", gmm.Weights[0])

	case len(gmm.LogWeights) > 0:
		gmm.Weights = make([]float64, gmm.NComponents)
		floatx.Exp(gmm.Weights, gmm.LogWeights)

	case len(gmm.Weights) > 0:
		gmm.LogWeights = make([]float64, numComponents)
		floatx.Log(gmm.LogWeights, gmm.Weights)
	}
	return gmm
}
Ejemplo n.º 6
0
// Trains a GMM as follows:
// 1 - Estimate a Gaussian model params for the training set.
// 2 - Use the mean and sd of the training set to generate
//     a random GMM to be used as seed.
// 3 - Run several iterations of the GMM max likelihood training algorithm
//     to estimate the GMM weights and the Gaussian component mean, and
//     variance vectors.
func TestTrainGMM(t *testing.T) {

	var seed int64 = 33
	numComp := 2
	numIter := 10
	numObs := 1000000

	mean0 := []float64{1, 2}
	std0 := []float64{0.3, 0.3}
	mean1 := []float64{4, 4}
	std1 := []float64{1, 1}
	dim := len(mean0)
	gmm := NewModel(dim, numComp, Name("mygmm"))
	t.Logf("Initial Weights: \n%+v", gmm.Weights)
	{
		// Estimate mean variance of the data.
		g := gaussian.NewModel(dim, gaussian.Name("test training"))

		r := rand.New(rand.NewSource(seed))
		for i := 0; i < numObs; i++ {
			rv, err := model.RandNormalVector(mean0, std0, r)
			if err != nil {
				t.Fatal(err)
			}
			g.UpdateOne(model.F64ToObs(rv), 1.0)
			rv, err = model.RandNormalVector(mean1, std1, r)
			if err != nil {
				t.Fatal(err)
			}
			g.UpdateOne(model.F64ToObs(rv), 1.0)
		}
		g.Estimate()
		t.Logf("Gaussian Model for training set:")
		t.Logf("Mean: \n%+v", g.Mean)
		t.Logf("SD: \n%+v", g.StdDev)

		// Use the estimated mean and sd to generate a seed GMM.
		gmm = RandomModel(g.Mean, g.StdDev, numComp,
			"mygmm", 99)
		t.Logf("Random GMM: %+v.", gmm)
		t.Logf("Component 0: %+v.", gmm.Components[0])
		t.Logf("Component 1: %+v.", gmm.Components[1])
	}

	for iter := 0; iter < numIter; iter++ {
		t.Logf("Starting GMM training iteration %d.", iter)

		// Reset the same random number generator to make sure we use the
		// same observations in each iterations.
		r := rand.New(rand.NewSource(seed))

		// Update GMM stats.
		for i := 0; i < numObs; i++ {
			rv, err := model.RandNormalVector(mean0, std0, r)
			if err != nil {
				t.Fatal(err)
			}
			gmm.UpdateOne(model.F64ToObs(rv), 1.0)
			rv, err = model.RandNormalVector(mean1, std1, r)
			if err != nil {
				t.Fatal(err)
			}
			gmm.UpdateOne(model.F64ToObs(rv), 1.0)
		}

		// Estimates GMM params.
		gmm.Estimate()

		t.Logf("Iter: %d", iter)
		t.Logf("GMM: %+v", gmm)
		t.Logf("Weights: \n%+v", gmm.Weights)
		t.Logf("Likelihood: %f", gmm.Likelihood)
		t.Logf("Num Samples: %f", gmm.NSamples)
		for _, c := range gmm.Components {
			t.Logf("%s: Mean: \n%+v", c.Name(), c.Mean)
			t.Logf("%s: STD: \n%+v", c.Name(), c.StdDev)
		}

		// Prepare for next iteration.
		gmm.Clear()
	}

	for i := 0; i < dim; i++ {
		g := gmm.Components[1]
		if !gjoa.Comparef64(mean0[i], g.Mean[i], epsilon) {
			t.Errorf("Wrong Mean[%d]. Expected: [%f], Got: [%f]",
				i, mean0[i], g.Mean[i])
		}
		if !gjoa.Comparef64(std0[i], g.StdDev[i], epsilon) {
			t.Errorf("Wrong STD[%d]. Expected: [%f], Got: [%f]",
				i, std0[i], g.StdDev[i])
		}
	}

	for i := 0; i < dim; i++ {
		g := gmm.Components[0]
		if !gjoa.Comparef64(mean1[i], g.Mean[i], epsilon) {
			t.Errorf("Wrong Mean[%d]. Expected: [%f], Got: [%f]",
				i, mean1[i], g.Mean[i])
		}
		if !gjoa.Comparef64(std1[i], g.StdDev[i], epsilon) {
			t.Errorf("Wrong STD[%d]. Expected: [%f], Got: [%f]",
				i, std1[i], g.StdDev[i])
		}
	}

	if !gjoa.Comparef64(0.5, gmm.Weights[0], epsilon) {
		t.Errorf("Wrong weights[0]. Expected: [%f], Got: [%f]",
			0.5, gmm.Weights[0])
	}

	if !gjoa.Comparef64(0.5, gmm.Weights[1], epsilon) {
		t.Errorf("Wrong weights[0]. Expected: [%f], Got: [%f]",
			0.5, gmm.Weights[1])
	}

}
Ejemplo n.º 7
0
// should be equivalent to training a single gaussian, great for debugging.
func TestSingleState(t *testing.T) {

	// HMM to generate data.
	g01 := gm.NewModel(1, gm.Name("g01"), gm.Mean([]float64{0}), gm.StdDev([]float64{1}))

	h0 := narray.New(3, 3)
	h0.Set(1, 0, 1)
	h0.Set(.8, 1, 1)
	h0.Set(.2, 1, 2)
	h0 = narray.Log(nil, h0.Copy())

	ms0, _ := NewSet()
	net0, e0 := ms0.NewNet("hmm", h0,
		[]model.Modeler{nil, g01, nil})
	fatalIf(t, e0)
	hmm0 := NewModel(OSet(ms0))
	_ = hmm0

	// Create gaussian to estimate without using the HMM code.
	g := gm.NewModel(1, gm.Name("g1"), gm.Mean([]float64{-1}), gm.StdDev([]float64{2}))

	// Create initial HMM and estimate params from generated data.
	g1 := gm.NewModel(1, gm.Name("g1"), gm.Mean([]float64{-1}), gm.StdDev([]float64{2}))

	h := narray.New(3, 3)
	h.Set(1, 0, 1)
	h.Set(.5, 1, 1)
	h.Set(.5, 1, 2)
	h = narray.Log(nil, h.Copy())

	ms, _ = NewSet()
	net, e := ms.NewNet("hmm", h,
		[]model.Modeler{nil, g1, nil})
	fatalIf(t, e)
	hmm := NewModel(OSet(ms), UpdateTP(true), UpdateOP(true))

	iter := 5
	// number of sequences
	m := 1000
	numFrames := 0
	t0 := time.Now() // Start timer.
	for i := 0; i < iter; i++ {
		t.Logf("iter [%d]", i)

		// Make sure we generate the same data in each iteration.
		r := rand.New(rand.NewSource(33))
		gen := newGenerator(r, false, net0)

		// Reset all counters.
		hmm.Clear()
		g.Clear()

		// fix the seed to get the same sequence
		for j := 0; j < m; j++ {
			obs, states := gen.next("oid-" + fi(j))
			numFrames += len(states) - 2
			hmm.UpdateOne(obs, 1.0)

			// Update Gaussian
			for _, o := range obs.ValueAsSlice() {
				vec := o.([]float64)
				gobs := model.NewFloatObs(vec, model.SimpleLabel(""))
				g.UpdateOne(gobs, 1.0)
			}
		}
		hmm.Estimate()
		g.Estimate()
		t.Logf("iter:%d, hmm g1:   %+v", i, net.B[1])
		t.Logf("iter:%d, direct g1:%+v", i, g)
	}
	dur := time.Now().Sub(t0)
	tp0 := narray.Exp(nil, h0.Copy())
	tp := narray.Exp(nil, net.A.Copy())
	ns := tp.Shape[0]
	for i := 0; i < ns; i++ {
		for j := 0; j < ns; j++ {
			p0 := tp0.At(i, j)
			logp0 := h0.At(i, j)
			p := tp.At(i, j)
			logp := h.At(i, j)
			if p > smallNumber || p0 > smallNumber {
				t.Logf("TP: %d=>%d, p0:%5.2f, p:%5.2f, logp0:%8.5f, logp:%8.5f", i, j, p0, p, logp0, logp)
			}
		}
	}

	t.Log("")
	t.Logf("hmm0 g1:%+v", net0.B[1])
	t.Logf("hmm  g1: %+v", net.B[1])

	t.Log("")
	t.Logf("direct g1:%+v", g)

	// Print time stats.
	t.Log("")
	t.Logf("Total time: %v", dur)
	t.Logf("Time per iteration: %v", dur/time.Duration(iter))
	t.Logf("Time per frame: %v", dur/time.Duration(iter*numFrames*m))

	gjoa.CompareSliceFloat(t, tp0.Data, tp.Data,
		"error in Trans Probs [0]", .03)

	CompareGaussians(t, net0.B[1].(*gm.Model), net.B[1].(*gm.Model), 0.03)

	if t.Failed() {
		t.FailNow()
	}

	// Recognize.
	sg := ms.SearchGraph()

	dec, e := graph.NewDecoder(sg)
	if e != nil {
		t.Fatal(e)
	}

	r := rand.New(rand.NewSource(5151))
	gen := newGenerator(r, true, net0)
	//	testDecoder(t, gen, dec, 1000)
	testDecoder(t, gen, dec, 10)
}
Ejemplo n.º 8
0
func TestHMMGauss(t *testing.T) {

	// Create reference HMM to generate observations.

	g01 := gm.NewModel(1, gm.Name("g01"), gm.Mean([]float64{0}), gm.StdDev([]float64{1}))
	g02 := gm.NewModel(1, gm.Name("g02"), gm.Mean([]float64{16}), gm.StdDev([]float64{2}))

	h0 := narray.New(4, 4)
	h0.Set(.6, 0, 1)
	h0.Set(.4, 0, 2)
	h0.Set(.9, 1, 1)
	h0.Set(.1, 1, 2)
	h0.Set(.7, 2, 2)
	h0.Set(.3, 2, 3)
	h0 = narray.Log(nil, h0.Copy())

	ms0, _ := NewSet()
	net0, e0 := ms0.NewNet("hmm", h0,
		[]model.Modeler{nil, g01, g02, nil})
	fatalIf(t, e0)
	hmm0 := NewModel(OSet(ms0), UpdateTP(true), UpdateOP(true))
	_ = hmm0

	// Create random HMM and estimate params from obs.

	g1 := gm.NewModel(1, gm.Name("g1"), gm.Mean([]float64{-1}), gm.StdDev([]float64{2}))
	g2 := gm.NewModel(1, gm.Name("g2"), gm.Mean([]float64{18}), gm.StdDev([]float64{4}))

	h := narray.New(4, 4)
	h.Set(.5, 0, 1)
	h.Set(.5, 0, 2)
	h.Set(.5, 1, 1)
	h.Set(.5, 1, 2)
	h.Set(.5, 2, 2)
	h.Set(.5, 2, 3)
	h = narray.Log(nil, h.Copy())

	ms, _ = NewSet()
	net, e := ms.NewNet("hmm", h,
		[]model.Modeler{nil, g1, g2, nil})
	fatalIf(t, e)
	hmm := NewModel(OSet(ms), UpdateTP(true), UpdateOP(true))

	iter := 10
	// number of sequences
	m := 500
	numFrames := 0
	t0 := time.Now() // Start timer.
	for i := 0; i < iter; i++ {
		t.Logf("iter [%d]", i)

		// Make sure we generate the same data in each iteration.
		r := rand.New(rand.NewSource(33))
		gen := newGenerator(r, false, net0)

		// Reset all counters.
		hmm.Clear()

		// fix the seed to get the same sequence
		for j := 0; j < m; j++ {
			obs, states := gen.next("oid-" + fi(j))
			numFrames += len(states) - 2
			hmm.UpdateOne(obs, 1.0)
		}
		hmm.Estimate()
	}
	dur := time.Now().Sub(t0)
	tp0 := narray.Exp(nil, h0.Copy())
	tp := narray.Exp(nil, net.A.Copy())
	ns := tp.Shape[0]
	for i := 0; i < ns; i++ {
		for j := 0; j < ns; j++ {
			p0 := tp0.At(i, j)
			logp0 := h0.At(i, j)
			p := tp.At(i, j)
			logp := h.At(i, j)
			if p > smallNumber || p0 > smallNumber {
				t.Logf("TP: %d=>%d, p0:%5.2f, p:%5.2f, logp0:%8.5f, logp:%8.5f", i, j, p0, p, logp0, logp)
			}
		}
	}

	t.Log("")
	t.Logf("hmm0 g1:%+v, g2:%+v", net0.B[1], net0.B[2])
	t.Logf("hmm  g1: %+v, g2:%+v", net.B[1], net.B[2])

	// Print time stats.
	t.Log("")
	t.Logf("Total time: %v", dur)
	t.Logf("Time per iteration: %v", dur/time.Duration(iter))
	t.Logf("Time per frame: %v", dur/time.Duration(iter*numFrames*m))

	gjoa.CompareSliceFloat(t, tp0.Data, tp.Data,
		"error in Trans Probs [0]", .03)

	CompareGaussians(t, net0.B[1].(*gm.Model), net.B[1].(*gm.Model), 0.03)
	CompareGaussians(t, net0.B[2].(*gm.Model), net.B[2].(*gm.Model), 0.03)

	if t.Failed() {
		t.FailNow()
	}

	// Recognize.
	g := ms.SearchGraph()

	dec, e := graph.NewDecoder(g)
	if e != nil {
		t.Fatal(e)
	}

	r := rand.New(rand.NewSource(5151))
	gen := newGenerator(r, true, net0)
	testDecoder(t, gen, dec, 1000)
}