// 将instance中的NamedFeatures域转化为Features域 // 如果instance.Features不为nil则不转化 func ConvertNamedFeatures(instance *Instance, dict *dictionary.Dictionary) { if instance.Features != nil { return } instance.Features = util.NewSparseVector() // 第0个feature始终是1 instance.Features.Set(0, 1.0) for k, v := range instance.NamedFeatures { id := dict.GetIdFromName(k) instance.Features.Set(id, v) } }
func (classifier *MaxEntClassifier) Predict(instance *data.Instance) data.InstanceOutput { output := data.InstanceOutput{} // 当使用NamedFeatures时转化为Features if instance.NamedFeatures != nil { if classifier.FeatureDictionary == nil { return output } instance.Features = util.NewSparseVector() // 第0个feature始终是1 instance.Features.Set(0, 1.0) for k, v := range instance.NamedFeatures { id := classifier.FeatureDictionary.TranslateIdFromName(k) instance.Features.Set(id, v) } } output.LabelDistribution = util.NewVector(classifier.NumLabels) output.LabelDistribution.Set(0, 1.0) z := float64(1) mostPossibleLabel := 0 mostPossibleLabelWeight := float64(1) for iLabel := 1; iLabel < classifier.NumLabels; iLabel++ { sum := float64(0) for _, k := range classifier.Weights.GetValues(iLabel - 1).Keys() { sum += classifier.Weights.Get(iLabel-1, k) * instance.Features.Get(k) } exp := math.Exp(sum) if exp > mostPossibleLabelWeight { mostPossibleLabel = iLabel mostPossibleLabelWeight = exp } z += exp output.LabelDistribution.Set(iLabel, exp) } output.LabelDistribution.Scale(1 / z) output.Label = mostPossibleLabel if classifier.LabelDictionary != nil { output.LabelString = classifier.LabelDictionary.GetNameFromId(output.Label) } return output }
func TestInMemDataset(t *testing.T) { set := NewInmemDataset() instance1 := new(Instance) instance1.Features = util.NewVector(3) instance1.Features.SetValues([]float64{1, 2, 3}) instance1.Output = &InstanceOutput{Label: 1} util.Expect(t, "true", set.AddInstance(instance1)) instance2 := new(Instance) instance2.Features = util.NewVector(3) instance2.Features.SetValues([]float64{3, 4, 5}) instance2.Output = &InstanceOutput{Label: 2} util.Expect(t, "true", set.AddInstance(instance2)) instance3 := new(Instance) instance3.Features = util.NewSparseVector() instance3.Features.SetValues([]float64{3, 4, 5}) instance3.Output = &InstanceOutput{Label: 0} util.Expect(t, "false", set.AddInstance(instance3)) instance4 := new(Instance) instance4.Features = util.NewVector(4) instance4.Features.SetValues([]float64{3, 4, 5, 6}) instance4.Output = &InstanceOutput{Label: 4} util.Expect(t, "false", set.AddInstance(instance4)) instance5 := new(Instance) instance5.Features = util.NewVector(3) instance5.Features.SetValues([]float64{3, 5, 5}) util.Expect(t, "false", set.AddInstance(instance5)) set.Finalize() // 检查数据集选项 util.Expect(t, "false", set.GetOptions().FeatureIsSparse) util.Expect(t, "3", set.GetOptions().FeatureDimension) util.Expect(t, "true", set.GetOptions().IsSupervisedLearning) util.Expect(t, "3", set.GetOptions().NumLabels) util.Expect(t, "2", set.NumInstances()) iter := set.CreateIterator() iter.Start() util.Expect(t, "false", iter.End()) util.Expect(t, "1", iter.GetInstance().Features.Get(0)) util.Expect(t, "2", iter.GetInstance().Features.Get(1)) util.Expect(t, "3", iter.GetInstance().Features.Get(2)) iter.Next() util.Expect(t, "false", iter.End()) util.Expect(t, "3", iter.GetInstance().Features.Get(0)) util.Expect(t, "4", iter.GetInstance().Features.Get(1)) util.Expect(t, "5", iter.GetInstance().Features.Get(2)) iter.Next() util.Expect(t, "true", iter.End()) iter = set.CreateIterator() iter.Start() iter.Skip(2) util.Expect(t, "true", iter.End()) util.Expect(t, "2", set.NumInstances()) }