Пример #1
0
func TestTrainGaussian(t *testing.T) {

	if testing.Short() {
		t.Skip("skipping test in short mode.")
	}
	dim := 8
	mean := []float64{0.1, 0.2, 0.3, 0.4, 1, 1, 1, 1}
	std := []float64{0.5, 0.5, 0.5, 0.5, 0.1, 0.2, 0.3, 0.4}
	g := NewModel(dim, Name("test training"))

	r := rand.New(rand.NewSource(33))
	for i := 0; i < 2000000; i++ {
		rv := model.RandNormalVector(r, mean, std)
		g.UpdateOne(model.F64ToObs(rv, ""), 1.0)
	}
	g.Estimate()
	t.Logf("Mean: \n%+v", g.Mean)
	t.Logf("STD: \n%+v", g.StdDev)

	for i, _ := range mean {
		if !gjoa.Comparef64(mean[i], g.Mean[i], tolerance) {
			t.Errorf("Wrong Mean[%d]. Expected: [%f], Got: [%f]",
				i, mean[i], g.Mean[i])
		}
		if !gjoa.Comparef64(std[i], g.StdDev[i], tolerance) {
			t.Errorf("Wrong STD[%d]. Expected: [%f], Got: [%f]",
				i, std[i], g.StdDev[i])
		}
	}
}
Пример #2
0
func (ch *chain) update() error {

	ch.fb() // Compute forward-backward probabilities.
	logProb := ch.beta.At(0, 0, 0)
	totalProb := math.Exp(logProb)
	if logProb == math.Inf(-1) {
		return fmt.Errorf("oid:%s, log prob is -Inf, skipping training sequence, num vectos:%d, chain len:%d, states per chain:%v", ch.obs.ID(), ch.nobs, ch.nq, ch.ns)
	}
	glog.V(2).Infof("oid:%s, log prob per observation:%e", ch.obs.ID(), logProb/float64(ch.nobs))

	// Compute state transition counts.
	glog.V(2).Infof("oid:%s, compute hmm state transition counts.", ch.obs.ID())
	for q, h := range ch.hmms {
		exit := ch.ns[q] - 1
		for t, vec := range ch.vectors {
			for i := 0; i < exit; i++ {
				w := ch.doOccAcc(q, i, t, totalProb) / totalProb
				ch.doTrAcc(q, i, t, totalProb)
				if i > 0 {
					o := model.F64ToObs(vec, "")
					h.B[i].UpdateOne(o, w) // TODO prove!
				}
			}
		}
	}
	return nil
}
Пример #3
0
func TestCloneGaussian(t *testing.T) {

	dim := 8
	mean := []float64{0.1, 0.2, 0.3, 0.4, 1, 1, 1, 1}
	std := []float64{0.5, 0.5, 0.5, 0.5, 0.1, 0.2, 0.3, 0.4}
	g := NewModel(dim, Name("test cloning"))

	r := rand.New(rand.NewSource(33))
	for i := 0; i < 2000; i++ {
		rv := model.RandNormalVector(r, mean, std)
		g.UpdateOne(model.F64ToObs(rv, ""), 1.0)
	}
	g.Estimate()

	ng := NewModel(g.ModelDim, Clone(g))

	// compare g vs. ng
	type table struct {
		v1, v2 []float64
		name   string
	}

	tab := []table{
		table{g.Sumx, ng.Sumx, "Sumx"},
		table{g.Sumxsq, ng.Sumxsq, "Sumxsq"},
		table{g.Mean, ng.Mean, "Mean"},
		table{g.StdDev, ng.StdDev, "StdDev"},
		table{g.variance, ng.variance, "variance"},
		table{g.varianceInv, ng.varianceInv, "varianceInv"},
		table{[]float64{g.const1}, []float64{ng.const1}, "const1"},
		table{[]float64{g.const2}, []float64{ng.const2}, "const2"},
	}

	// compare slices
	for _, v := range tab {
		gjoa.CompareSliceFloat(t, v.v1, v.v2, "no match: "+v.name, 0.00001)
	}

	//	if ng.BaseModel.Model == g.BaseModel.Model {
	//		t.Fatalf("Modeler is the same.")
	//	}
	if ng.NSamples != g.NSamples {
		t.Fatalf("NSamples doesn't match.")
	}
}
Пример #4
0
func (ch *chain) updateFromAlignments() error {

	// Get alignments.
	aligner, ok := ch.obs.(model.Aligner)
	if !ok {
		return fmt.Errorf("oid:%s - obs object does not implement the aligner interface", ch.obs.ID())
	}
	al := aligner.Alignment()
	glog.V(6).Infof("oid:%s, alignments: %v", ch.obs.ID(), al)
	if len(al) == 0 {
		return fmt.Errorf("oid:%s - alignment object has zero length", ch.obs.ID())
	}
	glog.V(2).Infof("oid:%s, estimating output PDF from alignment with %d nodes", ch.obs.ID(), len(al))

	if al[len(al)-1].End != ch.nobs {
		return fmt.Errorf("oid:%s - alignment length is [%d] - does not match num observations in sequence [%d]", ch.obs.ID(), al[len(al)-1].End, ch.nobs)
	}

	// Iterate over alignment nodes. Find the net by name and state number.
	// TODO: hardcoded for state alignments. Need to impl. net-level alignments?
	// Also, this is using a naming convention (xxx-N), can we use a better design?
	// Include state index in alignment node?
	for _, node := range al {
		s := strings.Split(node.Name, "-") // format is xxx-N where xxx is the net name and N is the state index.
		if len(s) != 2 {
			return fmt.Errorf("oid:%s - there must be exactly one \"-\" in the alignment node name [%s]", ch.obs.ID(), node.Name)
		}
		glog.V(6).Infof("oid:%s - align node name: %s, state: %s", ch.obs.ID(), s[0], s[1])
		h := ch.ms.byName[s[0]]
		st, err := strconv.ParseInt(s[1], 10, 32)
		if err != nil {
			return err
		}
		for p := node.Start; p < node.End; p++ {
			vec := ch.vectors[p]
			o := model.F64ToObs(vec, "")
			h.B[int(st)].UpdateOne(o, 1)
		}
	}
	return nil
}
Пример #5
0
// Train without using sampler.
func BenchmarkTrain(b *testing.B) {

	dim := 8
	mean := []float64{0.1, 0.2, 0.3, 0.4, 1, 1, 1, 1}
	std := []float64{0.5, 0.5, 0.5, 0.5, 0.1, 0.2, 0.3, 0.4}
	g := NewModel(dim, Name("test training"))

	r := rand.New(rand.NewSource(33))
	buf := make([][]float64, 2000000, 2000000)
	for i := 0; i < 2000000; i++ {
		rv := model.RandNormalVector(r, mean, std)
		buf[i] = rv
	}

	for i := 0; i < b.N; i++ {
		for i := 0; i < 2000000; i++ {
			g.UpdateOne(model.F64ToObs(buf[i], ""), 1.0)
		}
		g.Estimate()
		g.Clear()
	}
}
Пример #6
0
// Computes log prob for mixture.// SIDE EFFECT => returns logProb of
// Gaussian comp + logWeight in matrix pointed by func arg probs.
func (gmm *Model) logProbInternal(obs, probs []float64) float64 {

	var max = -math.MaxFloat64

	/* Compute log probabilities for this observation. */
	for i, c := range gmm.Components {
		o := model.F64ToObs(obs, "")
		v1 := c.LogProb(o)
		v2 := gmm.LogWeights[i]
		v := v1 + v2

		if probs != nil {
			probs[i] = v
		}
		if v > max {
			max = v
		}
	}

	// To simplify computation, use the max prob in the denominator instead
	// of the sum.
	return max
}
Пример #7
0
// Returns the probability.
func (gmm *Model) prob(obs []float64) float64 {
	return math.Exp(gmm.LogProb(model.F64ToObs(obs, "")))
}
Пример #8
0
// Trains a GMM as follows:
// 1 - Estimate a Gaussian model params for the training set.
// 2 - Use the mean and sd of the training set to generate
//     a random GMM to be used as seed.
// 3 - Run several iterations of the GMM max likelihood training algorithm
//     to estimate the GMM weights and the Gaussian component mean, and
//     variance vectors.
func TestTrainGMM(t *testing.T) {

	var seed int64 = 33
	numComp := 2
	numIter := 10
	numObs := 1000000

	mean0 := []float64{1, 2}
	std0 := []float64{0.3, 0.3}
	mean1 := []float64{4, 4}
	std1 := []float64{1, 1}
	dim := len(mean0)
	gmm := NewModel(dim, numComp, Name("mygmm"))
	t.Logf("Initial Weights: \n%+v", gmm.Weights)
	{
		// Estimate mean variance of the data.
		g := gaussian.NewModel(dim, gaussian.Name("test training"))

		r := rand.New(rand.NewSource(seed))
		for i := 0; i < numObs; i++ {
			rv, err := model.RandNormalVector(mean0, std0, r)
			if err != nil {
				t.Fatal(err)
			}
			g.UpdateOne(model.F64ToObs(rv), 1.0)
			rv, err = model.RandNormalVector(mean1, std1, r)
			if err != nil {
				t.Fatal(err)
			}
			g.UpdateOne(model.F64ToObs(rv), 1.0)
		}
		g.Estimate()
		t.Logf("Gaussian Model for training set:")
		t.Logf("Mean: \n%+v", g.Mean)
		t.Logf("SD: \n%+v", g.StdDev)

		// Use the estimated mean and sd to generate a seed GMM.
		gmm = RandomModel(g.Mean, g.StdDev, numComp,
			"mygmm", 99)
		t.Logf("Random GMM: %+v.", gmm)
		t.Logf("Component 0: %+v.", gmm.Components[0])
		t.Logf("Component 1: %+v.", gmm.Components[1])
	}

	for iter := 0; iter < numIter; iter++ {
		t.Logf("Starting GMM training iteration %d.", iter)

		// Reset the same random number generator to make sure we use the
		// same observations in each iterations.
		r := rand.New(rand.NewSource(seed))

		// Update GMM stats.
		for i := 0; i < numObs; i++ {
			rv, err := model.RandNormalVector(mean0, std0, r)
			if err != nil {
				t.Fatal(err)
			}
			gmm.UpdateOne(model.F64ToObs(rv), 1.0)
			rv, err = model.RandNormalVector(mean1, std1, r)
			if err != nil {
				t.Fatal(err)
			}
			gmm.UpdateOne(model.F64ToObs(rv), 1.0)
		}

		// Estimates GMM params.
		gmm.Estimate()

		t.Logf("Iter: %d", iter)
		t.Logf("GMM: %+v", gmm)
		t.Logf("Weights: \n%+v", gmm.Weights)
		t.Logf("Likelihood: %f", gmm.Likelihood)
		t.Logf("Num Samples: %f", gmm.NSamples)
		for _, c := range gmm.Components {
			t.Logf("%s: Mean: \n%+v", c.Name(), c.Mean)
			t.Logf("%s: STD: \n%+v", c.Name(), c.StdDev)
		}

		// Prepare for next iteration.
		gmm.Clear()
	}

	for i := 0; i < dim; i++ {
		g := gmm.Components[1]
		if !gjoa.Comparef64(mean0[i], g.Mean[i], epsilon) {
			t.Errorf("Wrong Mean[%d]. Expected: [%f], Got: [%f]",
				i, mean0[i], g.Mean[i])
		}
		if !gjoa.Comparef64(std0[i], g.StdDev[i], epsilon) {
			t.Errorf("Wrong STD[%d]. Expected: [%f], Got: [%f]",
				i, std0[i], g.StdDev[i])
		}
	}

	for i := 0; i < dim; i++ {
		g := gmm.Components[0]
		if !gjoa.Comparef64(mean1[i], g.Mean[i], epsilon) {
			t.Errorf("Wrong Mean[%d]. Expected: [%f], Got: [%f]",
				i, mean1[i], g.Mean[i])
		}
		if !gjoa.Comparef64(std1[i], g.StdDev[i], epsilon) {
			t.Errorf("Wrong STD[%d]. Expected: [%f], Got: [%f]",
				i, std1[i], g.StdDev[i])
		}
	}

	if !gjoa.Comparef64(0.5, gmm.Weights[0], epsilon) {
		t.Errorf("Wrong weights[0]. Expected: [%f], Got: [%f]",
			0.5, gmm.Weights[0])
	}

	if !gjoa.Comparef64(0.5, gmm.Weights[1], epsilon) {
		t.Errorf("Wrong weights[0]. Expected: [%f], Got: [%f]",
			0.5, gmm.Weights[1])
	}

}