// Estimate computes model parameters using sufficient statistics. func (g *Model) Estimate() error { if g.NSamples > minNumSamples { /* Estimate the mean. */ floatx.Apply(floatx.ScaleFunc(1.0/g.NSamples), g.Sumx, g.Mean) /* * Estimate the variance. sigma_sq = 1/n (sumxsq - 1/n sumx^2) or * 1/n sumxsq - mean^2. */ tmp := g.variance // borrow as an intermediate array. // floatx.Apply(sq, g.Mean, g.tmpArray) floatx.Sq(g.tmpArray, g.Mean) floatx.Apply(floatx.ScaleFunc(1.0/g.NSamples), g.Sumxsq, tmp) floats.SubTo(g.variance, tmp, g.tmpArray) floatx.Apply(floatx.Floorv(smallVar), g.variance, nil) } else { /* Not enough training sample. */ glog.Warningf("not enough training samples, name [%s], num samples [%e]", g.ModelName, g.NSamples) floatx.Apply(floatx.SetValueFunc(smallVar), g.variance, nil) floatx.Apply(floatx.SetValueFunc(0), g.Mean, nil) } g.setVariance(g.variance) // to update varInv and stddev. /* Update log Gaussian constant. */ floatx.Log(g.tmpArray, g.variance) g.const2 = g.const1 - floats.Sum(g.tmpArray)/2.0 glog.V(6).Infof("gaussian reest, name:%s, mean:%v, sd:%v", g.ModelName, g.Mean, g.StdDev) return nil }
// Clear resets sufficient statistics. func (gmm *Model) Clear() { for _, c := range gmm.Components { c.Clear() } floatx.Apply(floatx.SetValueFunc(0), gmm.PosteriorSum, nil) gmm.NSamples = 0 gmm.Likelihood = 0 }
// NewModel creates a new Gaussian mixture model. func NewModel(dim, numComponents int, options ...Option) *Model { gmm := &Model{ ModelName: "GMM", // default name ModelDim: dim, NComponents: numComponents, Diag: true, tmpProbs: make([]float64, numComponents), } gmm.Type = reflect.TypeOf(*gmm).String() // Set options. for _, option := range options { option(gmm) } if len(gmm.PosteriorSum) == 0 { gmm.PosteriorSum = make([]float64, gmm.NComponents) } // Create components if not provided. if len(gmm.Components) == 0 { gmm.Components = make([]*gaussian.Model, numComponents, numComponents) for i := range gmm.Components { cname := componentName(gmm.ModelName, i, gmm.NComponents) gmm.Components[i] = gaussian.NewModel(gmm.ModelDim, gaussian.Name(cname)) } } // Initialize weights. // Caller may pass weight, log(weights), or no weights. switch { case len(gmm.LogWeights) > 0 && len(gmm.Weights) > 0: glog.Fatal("options not allowed: provide only one of LogWeights or Weights") case len(gmm.LogWeights) == 0 && len(gmm.Weights) == 0: gmm.LogWeights = make([]float64, numComponents) logw := -math.Log(float64(gmm.NComponents)) floatx.Apply(floatx.SetValueFunc(logw), gmm.LogWeights, nil) gmm.Weights = make([]float64, gmm.NComponents) floatx.Exp(gmm.Weights, gmm.LogWeights) glog.Infof("init weights with equal values: %.6f", gmm.Weights[0]) case len(gmm.LogWeights) > 0: gmm.Weights = make([]float64, gmm.NComponents) floatx.Exp(gmm.Weights, gmm.LogWeights) case len(gmm.Weights) > 0: gmm.LogWeights = make([]float64, numComponents) floatx.Log(gmm.LogWeights, gmm.Weights) } return gmm }
// UpdateOne updates sufficient statistics using one observation. func (g *Model) UpdateOne(o model.Obs, w float64) { glog.V(6).Infof("gaussian update, name:%s, obs:%v, weight:%e", g.ModelName, o, w) /* Update sufficient statistics. */ obs, _, _ := model.ObsToF64(o) floatx.Apply(floatx.ScaleFunc(w), obs, g.tmpArray) floats.Add(g.Sumx, g.tmpArray) floatx.Sq(g.tmpArray, obs) floats.Scale(w, g.tmpArray) floats.Add(g.Sumxsq, g.tmpArray) g.NSamples += w }
// Estimate computes model parameters using sufficient statistics. func (gmm *Model) Estimate() error { // Estimate mixture weights. floatx.Apply(floatx.ScaleFunc(1.0/gmm.NSamples), gmm.PosteriorSum, gmm.Weights) floatx.Log(gmm.LogWeights, gmm.Weights) // Estimate component density. for _, c := range gmm.Components { err := c.Estimate() if err != nil { return err } } gmm.Iteration++ return nil }
// Estimate computes model parameters using sufficient statistics. func (gmm *Model) UpdateOne(o model.Obs, w float64) { obs, _, _ := model.ObsToF64(o) maxProb := gmm.logProbInternal(obs, gmm.tmpProbs) gmm.Likelihood += maxProb floatx.Apply(floatx.AddScalarFunc(-maxProb+math.Log(w)), gmm.tmpProbs, nil) // Compute posterior probabilities. floatx.Exp(gmm.tmpProbs, gmm.tmpProbs) // Update posterior sum, needed to compute mixture weights. floats.Add(gmm.PosteriorSum, gmm.tmpProbs) // Update Gaussian components. for i, c := range gmm.Components { c.UpdateOne(o, gmm.tmpProbs[i]) } // Count number of observations. gmm.NSamples += w }
// NewModel creates a new Gaussian model. func NewModel(dim int, options ...Option) *Model { g := &Model{ ModelName: "Gaussian", ModelDim: dim, Diag: true, variance: make([]float64, dim), varianceInv: make([]float64, dim), tmpArray: make([]float64, dim), } g.Type = reflect.TypeOf(*g).String() // Set options. for _, option := range options { option(g) } if len(g.Sumx) == 0 { g.Sumx = make([]float64, dim) } if len(g.Sumxsq) == 0 { g.Sumxsq = make([]float64, dim) } if g.Mean == nil { g.Mean = make([]float64, dim) } if g.StdDev == nil { g.StdDev = make([]float64, dim) floatx.Apply(floatx.SetValueFunc(smallSD), g.StdDev, nil) } floatx.Sq(g.variance, g.StdDev) // Initializes variance, varianceInv, and StdDev. g.setVariance(g.variance) floatx.Log(g.tmpArray, g.variance) g.const1 = -float64(g.ModelDim) * math.Log(2.0*math.Pi) / 2.0 g.const2 = g.const1 - floats.Sum(g.tmpArray)/2.0 return g }
func (g *Model) setVariance(variance []float64) { copy(g.variance, variance) floatx.Apply(floatx.Inv, g.variance, g.varianceInv) g.StdDev = g.standardDeviation() }
// Clear resets sufficient statistics. func (g *Model) Clear() { floatx.Apply(floatx.SetValueFunc(0), g.Sumx, nil) floatx.Apply(floatx.SetValueFunc(0), g.Sumxsq, nil) g.NSamples = 0 }