func initChainFB(t *testing.T) { var err error h0 := narray.New(nstates[0], nstates[0]) h1 := narray.New(nstates[1], nstates[1]) h0.Set(1, 0, 1) h0.Set(.5, 1, 1) h0.Set(.5, 1, 2) h0.Set(.3, 2, 2) h0.Set(.6, 2, 3) h0.Set(.1, 2, 4) h0.Set(.7, 3, 3) h0.Set(.3, 3, 4) h1.Set(1, 0, 1) h1.Set(.3, 1, 1) h1.Set(.2, 1, 2) h1.Set(.5, 1, 3) h1.Set(.6, 2, 2) h1.Set(.4, 2, 3) h0 = narray.Log(nil, h0.Copy()) h1 = narray.Log(nil, h1.Copy()) ms, _ = NewSet() hmm0, err = ms.NewNet("model 0", h0, []model.Modeler{nil, newScorer(0, 1), newScorer(0, 2), newScorer(0, 3), nil}) fatalIf(t, err) hmm1, err = ms.NewNet("model 1", h1, []model.Modeler{nil, newScorer(1, 1), newScorer(1, 2), nil}) fatalIf(t, err) }
// NewNet creates a new HMM network. func (ms *Set) NewNet(name string, a *narray.NArray, b []model.Modeler) (*Net, error) { if len(a.Shape) != 2 || a.Shape[0] != a.Shape[1] { panic("rank must be 2 and matrix should be square") } numStates := a.Shape[0] if len(b) != numStates { err := fmt.Sprintf("length of b is [%d]. must match shape of a [%d]", len(b), a.Shape[0]) panic(err) } net := &Net{ Name: name, A: a, B: b, TrAcc: narray.New(numStates, numStates), OccAcc: narray.New(numStates), ns: numStates, } err := ms.add(net) if err != nil { return nil, err } return net, nil }
func (ms *Set) chainFromNets(obs model.Obs, m ...*Net) (*chain, error) { fos, ok := obs.(model.FloatObsSequence) if !ok { return nil, fmt.Errorf("obs must be of type model.FloatObsSequence, found type %s which is not supported", reflect.TypeOf(obs.Value())) } if len(fos.ValueAsSlice()) == 0 { return nil, fmt.Errorf("obs sequence has no data") } ch := &chain{ hmms: m, nq: len(m), ns: make([]int, len(m), len(m)), obs: obs, vectors: fos.Value().([][]float64), nobs: len(fos.Value().([][]float64)), ms: ms, } for k, v := range m { if !ms.exist(v) { return nil, fmt.Errorf("model [%s] with id [%d] is not in set", v.Name, v.id) } if v.A.Shape[0] > ch.maxNS { ch.maxNS = v.A.Shape[0] } ch.ns[k] = v.A.Shape[0] } ch.alpha = narray.New(ch.nq, ch.maxNS, ch.nobs) ch.beta = narray.New(ch.nq, ch.maxNS, ch.nobs) if glog.V(6) { glog.Info("lab: ", ch.obs.Label()) glog.Info("obs: ", ch.obs.Value()) } ch.computeLikelihoods() // Validate. // Can't have models with transitions from entry to exit states // at the beginning or end of a chain. if isTeeModel(m[0]) { return nil, fmt.Errorf("first model in chain can't have entry to exit transition - model name is [%s] with id [%d]", m[0].Name, m[0].id) } if isTeeModel(m[len(m)-1]) { return nil, fmt.Errorf("last model in chain can't have entry to exit transition - model name is [%s] with id [%d]", m[0].Name, m[0].id) } return ch, nil }
func TestModelName(t *testing.T) { ms2, _ := NewSet() _, err := ms2.NewNet("XXX", narray.New(3, 3), []model.Modeler{nil, nil, nil}) fatalIf(t, err) _, errr := ms2.NewNet("XXX", narray.New(4, 4), []model.Modeler{nil, nil, nil, nil}) if errr == nil { t.Fatalf("expected error, got nil for duplicate model name") } }
func makeHMM(t *testing.T) *Model { // Gaussian 1. mean1 := []float64{1} sd1 := []float64{1} g1 := gm.NewModel(1, gm.Name("g1"), gm.Mean(mean1), gm.StdDev(sd1)) // Gaussian 2. mean2 := []float64{4} sd2 := []float64{2} g2 := gm.NewModel(1, gm.Name("g2"), gm.Mean(mean2), gm.StdDev(sd2)) var err error h0 := narray.New(4, 4) // h0.Set(.8, 0, 1) h0.Set(1, 0, 1) // h0.Set(.2, 0, 2) h0.Set(.5, 1, 1) h0.Set(.5, 1, 2) h0.Set(.7, 2, 2) h0.Set(.3, 2, 3) h0 = narray.Log(nil, h0.Copy()) ms, _ = NewSet() _, err = ms.NewNet("hmm0", h0, []model.Modeler{nil, g1, g2, nil}) fatalIf(t, err) return NewModel(OSet(ms), UpdateTP(true), UpdateOP(true)) }
func (ch *chain) computeLikelihoods() { ch.likelihoods = narray.New(ch.nq, ch.maxNS, ch.nobs) for q, h := range ch.hmms { for i := 1; i < ch.ns[q]-1; i++ { for t, v := range ch.vectors { ll := h.logProb(i, v) ch.likelihoods.Set(ll, q, i, t) glog.V(6).Infof("q:%d, i:%d, t:%d, likelihood:%8.3f", q, i, t, ll) } } } }
// makes left-to-right hmm with self loops. // ns is the total number of states including entry/exit. // selfProb is the prob of the self loop with value between 0 and 1. // skipProb is the prob of skipping next state. Make it zero for no skips. // for ns=3, skipProb is set to zero. func (ms *Set) makeLeftToRight(name string, ns int, selfProb, skipProb float64, dists []model.Modeler) (*Net, error) { if selfProb >= 1 || skipProb >= 1 || selfProb < 0 || skipProb < 0 { panic("probabilities must have value >= 0 and < 1") } if selfProb+skipProb >= 1 { panic("selfProb + skipProb must be less than 1") } if len(dists) != ns { panic("length of dists must match number of states") } if ns < 3 { panic("min number of states is 3") } p := selfProb var q float64 if ns > 3 { q = skipProb } else { q = 0 } r := 1.0 - p - q h := narray.New(ns, ns) // state 0 h.Set(1-q, 0, 1) // entry h.Set(q, 0, 2) // skip first emmiting state // states 1..ns-3 for i := 1; i < ns-2; i++ { h.Set(p, i, i) // self loop h.Set(r, i, i+1) // to right h.Set(q, i, i+2) // skip } // state ns-2 h.Set(p, ns-2, ns-2) // self h.Set(1-p, ns-2, ns-1) // to exit (no skip) // convert to log. h = narray.Log(nil, h.Copy()) hmm, err := ms.NewNet(name, h, dists) if err != nil { return nil, err } return hmm, nil }
// randTrans generates a left-to right random transition prob matrix. // n is the total number of states including entry/exit. func randTrans(r *rand.Rand, n int, skip bool) *narray.NArray { if n < 3 { panic("need at least 3 states") } a := narray.New(n, n) // state 0 if n > 3 && skip { p := getProbs(r, 2) a.Set(p[0], 0, 1) // entry a.Set(p[1], 0, 2) // skip first emmiting state } else { a.Set(1, 0, 1) // entry } // states 1..n-3 for i := 1; i < n-2; i++ { if skip { p := getProbs(r, 3) a.Set(p[0], i, i) // self loop a.Set(p[1], i, i+1) // to right a.Set(p[2], i, i+2) // skip } else { p := getProbs(r, 2) a.Set(p[0], i, i) // self loop a.Set(p[1], i, i+1) // to right } } // state ns-2 p := getProbs(r, 2) a.Set(p[0], n-2, n-2) // self a.Set(p[1], n-2, n-1) // to exit (no skip) if glog.V(8) { st := a.Sprint(func(na *narray.NArray, k int) bool { if na.Data[k] > 0 { return true } return false }) glog.Info(st) } return narray.Log(nil, a) }
// MakeLeftToRight creates a transition probability matrix for a left-to-right HMM. // * ns is the total number of states including entry/exit. Must be 3 or greater. // * selfProb is the prob of the self loop with value between 0 and 1. // * skipProb is the prob of skipping next state. Make it zero for no skips. // * for ns=3, skipProb is set to zero. func MakeLeftToRight(ns int, selfProb, skipProb float64) *narray.NArray { if selfProb >= 1 || skipProb >= 1 || selfProb < 0 || skipProb < 0 { panic("probabilities must have value >= 0 and < 1") } if selfProb+skipProb >= 1 { panic("selfProb + skipProb must be less than 1") } if ns < 3 { panic("min number of states is 3") } p := selfProb var q float64 if ns > 3 { q = skipProb } else { q = 0 } r := 1.0 - p - q h := narray.New(ns, ns) // state 0 h.Set(1-q, 0, 1) // entry h.Set(q, 0, 2) // skip first emmiting state // states 1..ns-3 for i := 1; i < ns-2; i++ { h.Set(p, i, i) // self loop h.Set(r, i, i+1) // to right h.Set(q, i, i+2) // skip } // state ns-2 h.Set(p, ns-2, ns-2) // self h.Set(1-p, ns-2, ns-1) // to exit (no skip) return narray.Log(h, h) }
// Use the label to create a chain of hmms. // If model set only has one hmm, no need to use labels, simply assign the only hmm. func (ms *Set) chainFromAssigner(obs model.Obs, assigner Assigner) (*chain, error) { var hmms []*Net var fos model.FloatObsSequence switch o := obs.(type) { case model.FloatObsSequence: fos = o case *model.FloatObsSequence: fos = *o default: return nil, fmt.Errorf("obs must be of type model.FloatObsSequence, found type %s which is not supported", reflect.TypeOf(obs)) } if len(fos.ValueAsSlice()) == 0 { return nil, fmt.Errorf("obs sequence has no data") } if assigner == nil && ms.size() == 1 { hmms = append(hmms, ms.Nets[0]) if glog.V(3) { glog.Warningf("assigner missing but model set has only one hmm network - assigning model [%s] to chain", ms.Nets[0].Name) } } else if assigner == nil { return nil, fmt.Errorf("need assigner to create hmm chain if model set is greater than one") } else { // Get the labeler and check that it is of type SimpleLabeler // otherwise return error. labeler, ok := obs.Label().(model.SimpleLabel) if !ok { return nil, fmt.Errorf("labeler must be of type model.SimpleLabel, found type %s which is not supported", reflect.TypeOf(obs.Label())) } // Now we need to assign hmms to the chain. // We will use the sequence of labels to lookup the models by name. labels := strings.Split(labeler.String(), ",") if len(labels) == 1 && len(labels[0]) == 0 { return nil, fmt.Errorf("no label found, can't assign models") } modelNames := assigner.Assign(labels) glog.V(5).Infof("obsid:%s, labels:%v", obs.ID(), labels) glog.V(5).Infof("obsid:%s, models:%v", obs.ID(), modelNames) for _, name := range modelNames { h, ok := ms.net(name) if !ok { return nil, fmt.Errorf("can't find model for name [%s] in set - assignment failed", name) } hmms = append(hmms, h) } } nq := len(hmms) if nq == 0 { return nil, fmt.Errorf("the assigner returned no models") } ch := &chain{ hmms: hmms, nq: nq, ns: make([]int, nq, nq), obs: obs, vectors: fos.Value().([][]float64), nobs: len(fos.Value().([][]float64)), ms: ms, } for k, hmm := range hmms { if hmm.A.Shape[0] > ch.maxNS { ch.maxNS = hmm.A.Shape[0] } ch.ns[k] = hmm.A.Shape[0] } ch.alpha = narray.New(ch.nq, ch.maxNS, ch.nobs) ch.beta = narray.New(ch.nq, ch.maxNS, ch.nobs) ch.computeLikelihoods() // Validate. // Can't have models with transitions from entry to exit states // at the beginning or end of a chain. if isTeeModel(hmms[0]) { return nil, fmt.Errorf("the first model in the chain can't have a transition from entry to exit states - model name is [%s] with id [%d]", hmms[0].Name, hmms[0].id) } if isTeeModel(hmms[nq-1]) { return nil, fmt.Errorf("the last model in the chain can't have a transition from entry to exit states - model name is [%s] with id [%d]", hmms[nq-1].Name, hmms[nq-1].id) } return ch, nil }
// should be equivalent to training a single gaussian, great for debugging. func TestSingleState(t *testing.T) { // HMM to generate data. g01 := gm.NewModel(1, gm.Name("g01"), gm.Mean([]float64{0}), gm.StdDev([]float64{1})) h0 := narray.New(3, 3) h0.Set(1, 0, 1) h0.Set(.8, 1, 1) h0.Set(.2, 1, 2) h0 = narray.Log(nil, h0.Copy()) ms0, _ := NewSet() net0, e0 := ms0.NewNet("hmm", h0, []model.Modeler{nil, g01, nil}) fatalIf(t, e0) hmm0 := NewModel(OSet(ms0)) _ = hmm0 // Create gaussian to estimate without using the HMM code. g := gm.NewModel(1, gm.Name("g1"), gm.Mean([]float64{-1}), gm.StdDev([]float64{2})) // Create initial HMM and estimate params from generated data. g1 := gm.NewModel(1, gm.Name("g1"), gm.Mean([]float64{-1}), gm.StdDev([]float64{2})) h := narray.New(3, 3) h.Set(1, 0, 1) h.Set(.5, 1, 1) h.Set(.5, 1, 2) h = narray.Log(nil, h.Copy()) ms, _ = NewSet() net, e := ms.NewNet("hmm", h, []model.Modeler{nil, g1, nil}) fatalIf(t, e) hmm := NewModel(OSet(ms), UpdateTP(true), UpdateOP(true)) iter := 5 // number of sequences m := 1000 numFrames := 0 t0 := time.Now() // Start timer. for i := 0; i < iter; i++ { t.Logf("iter [%d]", i) // Make sure we generate the same data in each iteration. r := rand.New(rand.NewSource(33)) gen := newGenerator(r, false, net0) // Reset all counters. hmm.Clear() g.Clear() // fix the seed to get the same sequence for j := 0; j < m; j++ { obs, states := gen.next("oid-" + fi(j)) numFrames += len(states) - 2 hmm.UpdateOne(obs, 1.0) // Update Gaussian for _, o := range obs.ValueAsSlice() { vec := o.([]float64) gobs := model.NewFloatObs(vec, model.SimpleLabel("")) g.UpdateOne(gobs, 1.0) } } hmm.Estimate() g.Estimate() t.Logf("iter:%d, hmm g1: %+v", i, net.B[1]) t.Logf("iter:%d, direct g1:%+v", i, g) } dur := time.Now().Sub(t0) tp0 := narray.Exp(nil, h0.Copy()) tp := narray.Exp(nil, net.A.Copy()) ns := tp.Shape[0] for i := 0; i < ns; i++ { for j := 0; j < ns; j++ { p0 := tp0.At(i, j) logp0 := h0.At(i, j) p := tp.At(i, j) logp := h.At(i, j) if p > smallNumber || p0 > smallNumber { t.Logf("TP: %d=>%d, p0:%5.2f, p:%5.2f, logp0:%8.5f, logp:%8.5f", i, j, p0, p, logp0, logp) } } } t.Log("") t.Logf("hmm0 g1:%+v", net0.B[1]) t.Logf("hmm g1: %+v", net.B[1]) t.Log("") t.Logf("direct g1:%+v", g) // Print time stats. t.Log("") t.Logf("Total time: %v", dur) t.Logf("Time per iteration: %v", dur/time.Duration(iter)) t.Logf("Time per frame: %v", dur/time.Duration(iter*numFrames*m)) gjoa.CompareSliceFloat(t, tp0.Data, tp.Data, "error in Trans Probs [0]", .03) CompareGaussians(t, net0.B[1].(*gm.Model), net.B[1].(*gm.Model), 0.03) if t.Failed() { t.FailNow() } // Recognize. sg := ms.SearchGraph() dec, e := graph.NewDecoder(sg) if e != nil { t.Fatal(e) } r := rand.New(rand.NewSource(5151)) gen := newGenerator(r, true, net0) // testDecoder(t, gen, dec, 1000) testDecoder(t, gen, dec, 10) }
func TestHMMGauss(t *testing.T) { // Create reference HMM to generate observations. g01 := gm.NewModel(1, gm.Name("g01"), gm.Mean([]float64{0}), gm.StdDev([]float64{1})) g02 := gm.NewModel(1, gm.Name("g02"), gm.Mean([]float64{16}), gm.StdDev([]float64{2})) h0 := narray.New(4, 4) h0.Set(.6, 0, 1) h0.Set(.4, 0, 2) h0.Set(.9, 1, 1) h0.Set(.1, 1, 2) h0.Set(.7, 2, 2) h0.Set(.3, 2, 3) h0 = narray.Log(nil, h0.Copy()) ms0, _ := NewSet() net0, e0 := ms0.NewNet("hmm", h0, []model.Modeler{nil, g01, g02, nil}) fatalIf(t, e0) hmm0 := NewModel(OSet(ms0), UpdateTP(true), UpdateOP(true)) _ = hmm0 // Create random HMM and estimate params from obs. g1 := gm.NewModel(1, gm.Name("g1"), gm.Mean([]float64{-1}), gm.StdDev([]float64{2})) g2 := gm.NewModel(1, gm.Name("g2"), gm.Mean([]float64{18}), gm.StdDev([]float64{4})) h := narray.New(4, 4) h.Set(.5, 0, 1) h.Set(.5, 0, 2) h.Set(.5, 1, 1) h.Set(.5, 1, 2) h.Set(.5, 2, 2) h.Set(.5, 2, 3) h = narray.Log(nil, h.Copy()) ms, _ = NewSet() net, e := ms.NewNet("hmm", h, []model.Modeler{nil, g1, g2, nil}) fatalIf(t, e) hmm := NewModel(OSet(ms), UpdateTP(true), UpdateOP(true)) iter := 10 // number of sequences m := 500 numFrames := 0 t0 := time.Now() // Start timer. for i := 0; i < iter; i++ { t.Logf("iter [%d]", i) // Make sure we generate the same data in each iteration. r := rand.New(rand.NewSource(33)) gen := newGenerator(r, false, net0) // Reset all counters. hmm.Clear() // fix the seed to get the same sequence for j := 0; j < m; j++ { obs, states := gen.next("oid-" + fi(j)) numFrames += len(states) - 2 hmm.UpdateOne(obs, 1.0) } hmm.Estimate() } dur := time.Now().Sub(t0) tp0 := narray.Exp(nil, h0.Copy()) tp := narray.Exp(nil, net.A.Copy()) ns := tp.Shape[0] for i := 0; i < ns; i++ { for j := 0; j < ns; j++ { p0 := tp0.At(i, j) logp0 := h0.At(i, j) p := tp.At(i, j) logp := h.At(i, j) if p > smallNumber || p0 > smallNumber { t.Logf("TP: %d=>%d, p0:%5.2f, p:%5.2f, logp0:%8.5f, logp:%8.5f", i, j, p0, p, logp0, logp) } } } t.Log("") t.Logf("hmm0 g1:%+v, g2:%+v", net0.B[1], net0.B[2]) t.Logf("hmm g1: %+v, g2:%+v", net.B[1], net.B[2]) // Print time stats. t.Log("") t.Logf("Total time: %v", dur) t.Logf("Time per iteration: %v", dur/time.Duration(iter)) t.Logf("Time per frame: %v", dur/time.Duration(iter*numFrames*m)) gjoa.CompareSliceFloat(t, tp0.Data, tp.Data, "error in Trans Probs [0]", .03) CompareGaussians(t, net0.B[1].(*gm.Model), net.B[1].(*gm.Model), 0.03) CompareGaussians(t, net0.B[2].(*gm.Model), net.B[2].(*gm.Model), 0.03) if t.Failed() { t.FailNow() } // Recognize. g := ms.SearchGraph() dec, e := graph.NewDecoder(g) if e != nil { t.Fatal(e) } r := rand.New(rand.NewSource(5151)) gen := newGenerator(r, true, net0) testDecoder(t, gen, dec, 1000) }
func TestMain(m *testing.M) { // Configure glog. Example to set debug level 6 for file viterbi.go and 3 for everythign else: // export GLOG_LEVEL=3 // go test -v -run TestTrainHmmGau -vmodule=viterbi=6 > /tmp/zzz flag.Set("alsologtostderr", "true") flag.Set("log_dir", "/tmp/log") level := os.Getenv("GLOG_LEVEL") if len(level) == 0 { level = "0" } flag.Set("v", level) glog.Info("glog debug level is: ", level) ns = 5 // max num states in a model nstates[0] = 5 nstates[1] = 4 nobs = len(obs) a = narray.New(nq, ns, ns) b = narray.New(nq, ns, nobs) alpha = narray.New(nq, ns, nobs) beta = narray.New(nq, ns, nobs) a.Set(1, 0, 0, 1) a.Set(.5, 0, 1, 1) a.Set(.5, 0, 1, 2) a.Set(.3, 0, 2, 2) a.Set(.6, 0, 2, 3) a.Set(.1, 0, 2, 4) a.Set(.7, 0, 3, 3) a.Set(.3, 0, 3, 4) a.Set(1, 1, 0, 1) a.Set(.3, 1, 1, 1) a.Set(.2, 1, 1, 2) a.Set(.5, 1, 1, 3) a.Set(.6, 1, 2, 2) a.Set(.4, 1, 2, 3) dist := [][]float64{{.4, .5, .1}, {.3, .5, .2}} // prob dist for states in model 0 and 1 // output probs as a function of model,state,time for q := 0; q < nq; q++ { for i := 1; i < nstates[q]-1; i++ { for t := 0; t < nobs; t++ { p := dist[q][obs[t]] // k := r.Intn(len(someProbs)) // b.Set(someProbs[k], q, i, t) b.Set(p, q, i, t) } } } // same output probs but as a function of model,state,symbol // we need this to test the network implementation. outputProbs = narray.New(nq, ns, nsymb) for q := 0; q < nq; q++ { for i := 1; i < nstates[q]-1; i++ { for k := 0; k < nsymb; k++ { p := math.Log(dist[q][k]) outputProbs.Set(p, q, i, k) } } } loga = narray.Log(loga, a.Copy()) logb = narray.Log(logb, b.Copy()) data := make([][]float64, len(obs), len(obs)) for k, v := range obs { data[k] = []float64{float64(v)} } xobs = model.NewFloatObsSequence(data, model.SimpleLabel(""), "") os.Exit(m.Run()) }
func TestTeeModel(t *testing.T) { initChainFB(t) ms2, e := NewSet(hmm0, hmm1) fatalIf(t, e) testScorer := func() scorer { return scorer{op: []float64{math.Log(0.4), math.Log(0.2), math.Log(0.4)}} } h2 := narray.New(3, 3) h3 := narray.New(4, 4) h2.Set(1, 0, 1) h2.Set(0.5, 1, 1) h2.Set(0.5, 1, 2) h3.Set(.9, 0, 1) h3.Set(.1, 0, 3) // entry to exit transition. h3.Set(0.5, 1, 1) h3.Set(0.5, 1, 2) h3.Set(0.5, 2, 2) h3.Set(0.5, 2, 3) h2 = narray.Log(nil, h2.Copy()) h3 = narray.Log(nil, h3.Copy()) hmm2, err := ms2.NewNet("model 2", h2, []model.Modeler{nil, testScorer(), nil}) fatalIf(t, err) hmm3, errr := ms2.NewNet("model 3", h3, []model.Modeler{nil, testScorer(), testScorer(), nil}) fatalIf(t, errr) hmms2, err := ms2.chainFromNets(xobs, hmm0, hmm2, hmm3, hmm0, hmm0, hmm0, hmm0, hmm2) if err != nil { t.Fatal(err) } hmms2.update() nq := hmms2.nq alpha2 := hmms2.alpha.At(nq-1, hmms2.ns[nq-1]-1, nobs-1) beta2 := hmms2.beta.At(0, 0, 0) t.Logf("alpha2:%f", alpha2) t.Logf("beta2:%f", beta2) // check log prob per obs calculated with alpha and beta delta := math.Abs(alpha2-beta2) / float64(nobs) if delta > smallNumber { t.Fatalf("alphaLogProb:%f does not match betaLogProb:%f", alpha2, beta2) } _, err = ms2.chainFromNets(xobs, hmm3, hmm0, hmm2, hmm2) if err == nil { t.Fatal("expected error, got nil - first model has trans from entry to exit") } _, err = ms2.chainFromNets(xobs, hmm1, hmm0, hmm2, hmm3) if err == nil { t.Errorf("expected error, got nil - last model has trans from entry to exit") } }