Exemple #1
0
// 将instance中的NamedFeatures域转化为Features域
// 如果instance.Features不为nil则不转化
func ConvertNamedFeatures(instance *Instance, dict *dictionary.Dictionary) {
	if instance.Features != nil {
		return
	}

	instance.Features = util.NewSparseVector()
	// 第0个feature始终是1
	instance.Features.Set(0, 1.0)

	for k, v := range instance.NamedFeatures {
		id := dict.GetIdFromName(k)
		instance.Features.Set(id, v)
	}
}
func (classifier *MaxEntClassifier) Predict(instance *data.Instance) data.InstanceOutput {
	output := data.InstanceOutput{}

	// 当使用NamedFeatures时转化为Features
	if instance.NamedFeatures != nil {
		if classifier.FeatureDictionary == nil {
			return output
		}
		instance.Features = util.NewSparseVector()
		// 第0个feature始终是1
		instance.Features.Set(0, 1.0)

		for k, v := range instance.NamedFeatures {
			id := classifier.FeatureDictionary.TranslateIdFromName(k)
			instance.Features.Set(id, v)
		}
	}

	output.LabelDistribution = util.NewVector(classifier.NumLabels)
	output.LabelDistribution.Set(0, 1.0)

	z := float64(1)
	mostPossibleLabel := 0
	mostPossibleLabelWeight := float64(1)
	for iLabel := 1; iLabel < classifier.NumLabels; iLabel++ {
		sum := float64(0)
		for _, k := range classifier.Weights.GetValues(iLabel - 1).Keys() {
			sum += classifier.Weights.Get(iLabel-1, k) * instance.Features.Get(k)
		}
		exp := math.Exp(sum)
		if exp > mostPossibleLabelWeight {
			mostPossibleLabel = iLabel
			mostPossibleLabelWeight = exp
		}
		z += exp
		output.LabelDistribution.Set(iLabel, exp)
	}
	output.LabelDistribution.Scale(1 / z)
	output.Label = mostPossibleLabel

	if classifier.LabelDictionary != nil {
		output.LabelString = classifier.LabelDictionary.GetNameFromId(output.Label)
	}

	return output
}
func TestInMemDataset(t *testing.T) {
	set := NewInmemDataset()

	instance1 := new(Instance)
	instance1.Features = util.NewVector(3)
	instance1.Features.SetValues([]float64{1, 2, 3})
	instance1.Output = &InstanceOutput{Label: 1}
	util.Expect(t, "true", set.AddInstance(instance1))

	instance2 := new(Instance)
	instance2.Features = util.NewVector(3)
	instance2.Features.SetValues([]float64{3, 4, 5})
	instance2.Output = &InstanceOutput{Label: 2}
	util.Expect(t, "true", set.AddInstance(instance2))

	instance3 := new(Instance)
	instance3.Features = util.NewSparseVector()
	instance3.Features.SetValues([]float64{3, 4, 5})
	instance3.Output = &InstanceOutput{Label: 0}
	util.Expect(t, "false", set.AddInstance(instance3))

	instance4 := new(Instance)
	instance4.Features = util.NewVector(4)
	instance4.Features.SetValues([]float64{3, 4, 5, 6})
	instance4.Output = &InstanceOutput{Label: 4}
	util.Expect(t, "false", set.AddInstance(instance4))

	instance5 := new(Instance)
	instance5.Features = util.NewVector(3)
	instance5.Features.SetValues([]float64{3, 5, 5})
	util.Expect(t, "false", set.AddInstance(instance5))

	set.Finalize()

	// 检查数据集选项
	util.Expect(t, "false", set.GetOptions().FeatureIsSparse)
	util.Expect(t, "3", set.GetOptions().FeatureDimension)
	util.Expect(t, "true", set.GetOptions().IsSupervisedLearning)
	util.Expect(t, "3", set.GetOptions().NumLabels)

	util.Expect(t, "2", set.NumInstances())

	iter := set.CreateIterator()
	iter.Start()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "1", iter.GetInstance().Features.Get(0))
	util.Expect(t, "2", iter.GetInstance().Features.Get(1))
	util.Expect(t, "3", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "3", iter.GetInstance().Features.Get(0))
	util.Expect(t, "4", iter.GetInstance().Features.Get(1))
	util.Expect(t, "5", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "true", iter.End())

	iter = set.CreateIterator()
	iter.Start()
	iter.Skip(2)
	util.Expect(t, "true", iter.End())

	util.Expect(t, "2", set.NumInstances())
}