예제 #1
0
파일: lbfgs.go 프로젝트: sguzwf/mlf
// 初始化优化结构体
// 为结构体中的向量分配新的内存,向量的长度可能发生变化。
func (opt *lbfgsOptimizer) initStruct(labels, features int, isSparse bool) {
	opt.labels = labels

	opt.x = make([]*util.Matrix, *lbfgs_history_size)
	opt.g = make([]*util.Matrix, *lbfgs_history_size)
	opt.s = make([]*util.Matrix, *lbfgs_history_size)
	opt.y = make([]*util.Matrix, *lbfgs_history_size)

	opt.ro = util.NewVector(*lbfgs_history_size)
	opt.alpha = util.NewVector(*lbfgs_history_size)
	opt.beta = util.NewVector(*lbfgs_history_size)
	if !isSparse {
		opt.q = util.NewMatrix(labels, features)
		opt.z = util.NewMatrix(labels, features)
		for i := 0; i < *lbfgs_history_size; i++ {
			opt.x[i] = util.NewMatrix(labels, features)
			opt.g[i] = util.NewMatrix(labels, features)
			opt.s[i] = util.NewMatrix(labels, features)
			opt.y[i] = util.NewMatrix(labels, features)
		}
	} else {
		opt.q = util.NewSparseMatrix(labels)
		opt.z = util.NewSparseMatrix(labels)
		for i := 0; i < *lbfgs_history_size; i++ {
			opt.x[i] = util.NewSparseMatrix(labels)
			opt.g[i] = util.NewSparseMatrix(labels)
			opt.s[i] = util.NewSparseMatrix(labels)
			opt.y[i] = util.NewSparseMatrix(labels)
		}
	}
}
예제 #2
0
파일: rbm.go 프로젝트: sguzwf/mlf
// 输入和输出都有 bias 项
func (rbm *RBM) SampleHidden(v *util.Vector, n int, binary bool) *util.Vector {
	rbm.lock.RLock()
	defer rbm.lock.RUnlock()
	hiddenDim := rbm.options.NumHiddenUnits + 1
	visibleDim := rbm.lock.weights.NumValues()

	hiddenUnits := util.NewVector(hiddenDim)
	visibleUnits := util.NewVector(visibleDim)
	hiddenUnits.Set(0, 1.0)
	visibleUnits.Set(0, 1.0)

	for j := 1; j < visibleDim; j++ {
		visibleUnits.Set(j, v.Get(j))
	}

	// 更新 hidden units
	for i := 1; i < hiddenDim; i++ {
		prob := rbm.logistic(visibleUnits, i, true)
		if binary {
			hiddenUnits.Set(i, rbm.bernoulli(prob))
		} else {
			hiddenUnits.Set(i, prob)
		}
	}

	// reconstruct n-1 次
	for nCD := 0; nCD < n; nCD++ {
		for j := 1; j < visibleDim; j++ {
			var prob float64
			prob = rbm.logistic(hiddenUnits, j, false)
			visibleUnits.Set(j, prob)
		}
		for i := 1; i < hiddenDim; i++ {
			prob := rbm.logistic(visibleUnits, i, true)
			if binary {
				hiddenUnits.Set(i, rbm.bernoulli(prob))
			} else {
				hiddenUnits.Set(i, prob)
			}
		}
	}

	return hiddenUnits
}
예제 #3
0
func main() {
	flag.Parse()
	runtime.GOMAXPROCS(runtime.NumCPU())

	// 载入训练集
	set := contrib.LoadLibSVMDataset(*libsvm_file, false)

	// 创建训练器
	machine := rbm.LoadRBM(*model)

	visibleDim := set.GetOptions().FeatureDimension
	hiddenDim := machine.GetOptions().NumHiddenUnits + 1

	iter := set.CreateIterator()
	iter.Start()
	for !iter.End() {
		instance := iter.GetInstance()
		v := util.NewVector(visibleDim)

		content := fmt.Sprintf("%s", instance.Output.LabelString)

		for i := 0; i < visibleDim; i++ {
			value := instance.Features.Get(i)
			v.Set(i, value)
			if value != 0.0 && *append {
				content = fmt.Sprintf("%s %d:%d", content, i+1, int(value))
			}
		}

		h := machine.SampleHidden(v, *numCD, *useBinary)

		for i := 1; i < hiddenDim; i++ {
			value := h.Get(i)
			if value != 0.0 {
				if *append {
					if *useBinary {
						content = fmt.Sprintf("%s %d:%d", content, visibleDim+i-1, int(value))
					} else {
						content = fmt.Sprintf("%s %d:%.3f", content, visibleDim+i-1, value)
					}
				} else {
					if *useBinary {
						content = fmt.Sprintf("%s %d:%d", content, i, int(value))
					} else {
						content = fmt.Sprintf("%s %d:%.3f", content, i, value)
					}
				}
			}
		}

		fmt.Printf("%s\n", content)

		iter.Next()
	}
}
예제 #4
0
func (classifier *MaxEntClassifier) Predict(instance *data.Instance) data.InstanceOutput {
	output := data.InstanceOutput{}

	// 当使用NamedFeatures时转化为Features
	if instance.NamedFeatures != nil {
		if classifier.FeatureDictionary == nil {
			return output
		}
		instance.Features = util.NewSparseVector()
		// 第0个feature始终是1
		instance.Features.Set(0, 1.0)

		for k, v := range instance.NamedFeatures {
			id := classifier.FeatureDictionary.TranslateIdFromName(k)
			instance.Features.Set(id, v)
		}
	}

	output.LabelDistribution = util.NewVector(classifier.NumLabels)
	output.LabelDistribution.Set(0, 1.0)

	z := float64(1)
	mostPossibleLabel := 0
	mostPossibleLabelWeight := float64(1)
	for iLabel := 1; iLabel < classifier.NumLabels; iLabel++ {
		sum := float64(0)
		for _, k := range classifier.Weights.GetValues(iLabel - 1).Keys() {
			sum += classifier.Weights.Get(iLabel-1, k) * instance.Features.Get(k)
		}
		exp := math.Exp(sum)
		if exp > mostPossibleLabelWeight {
			mostPossibleLabel = iLabel
			mostPossibleLabelWeight = exp
		}
		z += exp
		output.LabelDistribution.Set(iLabel, exp)
	}
	output.LabelDistribution.Scale(1 / z)
	output.Label = mostPossibleLabel

	if classifier.LabelDictionary != nil {
		output.LabelString = classifier.LabelDictionary.GetNameFromId(output.Label)
	}

	return output
}
예제 #5
0
func TestComputeInstanceDerivative(t *testing.T) {
	weights := util.NewMatrix(2, 3)
	weights.GetValues(0).SetValues([]float64{1, 2, 3})
	weights.GetValues(1).SetValues([]float64{3, 4, 5})
	de := util.NewMatrix(2, 3)
	instance := data.Instance{}
	instance.Features = util.NewVector(3)

	instance.Features.SetValues([]float64{1, 0.3, 0.4})
	instance.Output = &data.InstanceOutput{Label: 0}
	MaxEntComputeInstanceDerivative(weights, &instance, de)

	util.ExpectNear(t, 0.0322, de.Get(0, 0), 0.0001)
	util.ExpectNear(t, 0.0096, de.Get(0, 1), 0.0001)
	util.ExpectNear(t, 0.0128, de.Get(0, 2), 0.0001)
	util.ExpectNear(t, 0.9658, de.Get(1, 0), 0.0001)
	util.ExpectNear(t, 0.2897, de.Get(1, 1), 0.0001)
	util.ExpectNear(t, 0.3863, de.Get(1, 2), 0.0001)

	instance.Features.SetValues([]float64{1, 0.6, 0.7})
	instance.Output.Label = 1
	MaxEntComputeInstanceDerivative(weights, &instance, de)
	util.ExpectNear(t, -0.9900, de.Get(0, 0), 0.0001)
	util.ExpectNear(t, -0.5940, de.Get(0, 1), 0.0001)
	util.ExpectNear(t, -0.6930, de.Get(0, 2), 0.0001)
	util.ExpectNear(t, 0.9899, de.Get(1, 0), 0.0001)
	util.ExpectNear(t, 0.5939, de.Get(1, 1), 0.0001)
	util.ExpectNear(t, 0.6929, de.Get(1, 2), 0.0001)

	instance.Features.SetValues([]float64{1, 0.4, 0.2})
	instance.Output.Label = 2
	MaxEntComputeInstanceDerivative(weights, &instance, de)
	util.ExpectNear(t, 0.0390, de.Get(0, 0), 0.0001)
	util.ExpectNear(t, 0.0156, de.Get(0, 1), 0.0001)
	util.ExpectNear(t, 0.0078, de.Get(0, 2), 0.0001)
	util.ExpectNear(t, -0.0425, de.Get(1, 0), 0.0001)
	util.ExpectNear(t, -0.0170, de.Get(1, 1), 0.0001)
	util.ExpectNear(t, -0.0085, de.Get(1, 2), 0.0001)
}
예제 #6
0
func TestSkipIterator(t *testing.T) {
	set := NewInmemDataset()

	instance1 := new(Instance)
	instance1.Features = util.NewVector(3)
	instance1.Features.SetValues([]float64{1, 2, 3})
	util.Expect(t, "true", set.AddInstance(instance1))

	instance2 := new(Instance)
	instance2.Features = util.NewVector(3)
	instance2.Features.SetValues([]float64{3, 4, 5})
	util.Expect(t, "true", set.AddInstance(instance2))

	instance3 := new(Instance)
	instance3.Features = util.NewVector(3)
	instance3.Features.SetValues([]float64{7, 8, 9})
	util.Expect(t, "true", set.AddInstance(instance3))

	instance4 := new(Instance)
	instance4.Features = util.NewVector(3)
	instance4.Features.SetValues([]float64{30, 40, 50})
	util.Expect(t, "true", set.AddInstance(instance4))

	instance5 := new(Instance)
	instance5.Features = util.NewVector(3)
	instance5.Features.SetValues([]float64{70, 80, 90})
	util.Expect(t, "true", set.AddInstance(instance5))

	instance6 := new(Instance)
	instance6.Features = util.NewVector(3)
	instance6.Features.SetValues([]float64{31, 41, 51})
	util.Expect(t, "true", set.AddInstance(instance6))

	set.Finalize()

	iter := NewSkipIterator(set, []int{0, 4})
	iter.Start()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "1", iter.GetInstance().Features.Get(0))
	util.Expect(t, "2", iter.GetInstance().Features.Get(1))
	util.Expect(t, "3", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "70", iter.GetInstance().Features.Get(0))
	util.Expect(t, "80", iter.GetInstance().Features.Get(1))
	util.Expect(t, "90", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "true", iter.End())

	iter = NewSkipIterator(set, []int{3, 1})
	iter.Start()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "30", iter.GetInstance().Features.Get(0))
	util.Expect(t, "40", iter.GetInstance().Features.Get(1))
	util.Expect(t, "50", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "true", iter.End())

	iter = NewSkipIterator(set, []int{1, 2, 1})
	iter.Start()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "3", iter.GetInstance().Features.Get(0))
	util.Expect(t, "4", iter.GetInstance().Features.Get(1))
	util.Expect(t, "5", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "30", iter.GetInstance().Features.Get(0))
	util.Expect(t, "40", iter.GetInstance().Features.Get(1))
	util.Expect(t, "50", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "31", iter.GetInstance().Features.Get(0))
	util.Expect(t, "41", iter.GetInstance().Features.Get(1))
	util.Expect(t, "51", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "true", iter.End())
}
예제 #7
0
func TestSkipDataset(t *testing.T) {
	set := NewInmemDataset()

	instance1 := new(Instance)
	instance1.Features = util.NewVector(3)
	instance1.Features.SetValues([]float64{1, 2, 3})
	util.Expect(t, "true", set.AddInstance(instance1))

	instance2 := new(Instance)
	instance2.Features = util.NewVector(3)
	instance2.Features.SetValues([]float64{3, 4, 5})
	util.Expect(t, "true", set.AddInstance(instance2))

	instance3 := new(Instance)
	instance3.Features = util.NewVector(3)
	instance3.Features.SetValues([]float64{7, 8, 9})
	util.Expect(t, "true", set.AddInstance(instance3))

	instance4 := new(Instance)
	instance4.Features = util.NewVector(3)
	instance4.Features.SetValues([]float64{30, 40, 50})
	util.Expect(t, "true", set.AddInstance(instance4))

	instance5 := new(Instance)
	instance5.Features = util.NewVector(3)
	instance5.Features.SetValues([]float64{70, 80, 90})
	util.Expect(t, "true", set.AddInstance(instance5))

	instance6 := new(Instance)
	instance6.Features = util.NewVector(3)
	instance6.Features.SetValues([]float64{31, 41, 51})
	util.Expect(t, "true", set.AddInstance(instance6))

	set.Finalize()

	buckets := []SkipBucket{}
	buckets = append(buckets, SkipBucket{true, 0})
	buckets = append(buckets, SkipBucket{false, 1})
	buckets = append(buckets, SkipBucket{true, 3})
	ss := NewSkipDataset(set, buckets)
	util.Expect(t, "2", ss.NumInstances())

	iter := ss.CreateIterator()
	iter.Start()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "1", iter.GetInstance().Features.Get(0))
	util.Expect(t, "2", iter.GetInstance().Features.Get(1))
	util.Expect(t, "3", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "70", iter.GetInstance().Features.Get(0))
	util.Expect(t, "80", iter.GetInstance().Features.Get(1))
	util.Expect(t, "90", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "true", iter.End())

	buckets = []SkipBucket{}
	buckets = append(buckets, SkipBucket{true, 3})
	buckets = append(buckets, SkipBucket{false, 1})
	ss = NewSkipDataset(set, buckets)
	util.Expect(t, "1", ss.NumInstances())

	iter = ss.CreateIterator()
	iter.Start()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "30", iter.GetInstance().Features.Get(0))
	util.Expect(t, "40", iter.GetInstance().Features.Get(1))
	util.Expect(t, "50", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "true", iter.End())

	buckets = []SkipBucket{}
	buckets = append(buckets, SkipBucket{true, 1})
	buckets = append(buckets, SkipBucket{false, 1})
	buckets = append(buckets, SkipBucket{true, 1})
	buckets = append(buckets, SkipBucket{false, 1})
	ss = NewSkipDataset(set, buckets)
	util.Expect(t, "3", ss.NumInstances())

	iter = ss.CreateIterator()
	iter.Start()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "3", iter.GetInstance().Features.Get(0))
	util.Expect(t, "4", iter.GetInstance().Features.Get(1))
	util.Expect(t, "5", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "30", iter.GetInstance().Features.Get(0))
	util.Expect(t, "40", iter.GetInstance().Features.Get(1))
	util.Expect(t, "50", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "31", iter.GetInstance().Features.Get(0))
	util.Expect(t, "41", iter.GetInstance().Features.Get(1))
	util.Expect(t, "51", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "true", iter.End())
}
예제 #8
0
func LoadLibSVMDataset(path string, usingSparseRepresentation bool) data.Dataset {
	log.Print("载入libsvm格式文件", path)

	content, err := ioutil.ReadFile(path)
	if err != nil {
		log.Fatalf("无法打开文件\"%v\",错误提示:%v\n", path, err)
	}
	lines := strings.Split(string(content), "\n")

	minFeature := 10000
	maxFeature := 0

	labels := make(map[string]int)
	labelIndex := 0

	for _, l := range lines {
		if l == "" {
			continue
		}

		fields := strings.Split(l, " ")

		_, ok := labels[fields[0]]
		if !ok {
			labels[fields[0]] = labelIndex
			labelIndex++
		}

		for i := 1; i < len(fields); i++ {
			if fields[i] == "" {
				continue
			}
			fs := strings.Split(fields[i], ":")
			fid, _ := strconv.Atoi(fs[0])
			if fid > maxFeature {
				maxFeature = fid
			}
			if fid < minFeature {
				minFeature = fid
			}
		}
	}

	if minFeature == 0 || maxFeature < 2 {
		log.Fatal("文件输入格式不合法")
	}
	log.Printf("feature 数目 %d", maxFeature)
	log.Printf("label 数目 %d", len(labels))

	set := data.NewInmemDataset()

	for _, l := range lines {
		if l == "" {
			continue
		}
		fields := strings.Split(l, " ")

		instance := new(data.Instance)
		instance.Output = &data.InstanceOutput{
			Label:       labels[fields[0]],
			LabelString: fields[0],
		}
		if usingSparseRepresentation {
			instance.NamedFeatures = make(map[string]float64)
		} else {
			instance.Features = util.NewVector(maxFeature + 1)
		}

		// 常数项
		if !usingSparseRepresentation {
			instance.Features.Set(0, 1)
		}

		for i := 1; i < len(fields); i++ {
			if fields[i] == "" {
				continue
			}
			fs := strings.Split(fields[i], ":")
			fid, _ := strconv.Atoi(fs[0])
			value, _ := strconv.ParseFloat(fs[1], 64)
			if usingSparseRepresentation {
				instance.NamedFeatures[fs[0]] = value
			} else {
				instance.Features.Set(fid, value)
			}
		}
		set.AddInstance(instance)
	}

	set.Finalize()

	log.Print("载入数据样本数目 ", set.NumInstances())

	return set
}
예제 #9
0
파일: rbm.go 프로젝트: sguzwf/mlf
func (rbm *RBM) derivativeWorker(ch chan *data.Instance, out chan *util.Matrix,
	visibleDim int, hiddenDim int) {
	// 可见单元
	visibleUnits := util.NewVector(visibleDim)
	visibleUnits.Set(0, 1.0)

	// 不可见单元
	hiddenUnitsProb := util.NewVector(hiddenDim)
	hiddenUnitsBinary := util.NewVector(hiddenDim)
	hiddenUnitsProb.Set(0, 1.0)
	hiddenUnitsBinary.Set(0, 1.0)

	for {
		instance := <-ch
		derivative := util.NewMatrix(hiddenDim, visibleDim)

		// 设置 visible units 的初始值
		for j := 1; j < visibleDim; j++ {
			visibleUnits.Set(j, instance.Features.Get(j))
		}

		rbm.lock.RLock()

		// 更新 hidden units
		for i := 1; i < hiddenDim; i++ {
			prob := rbm.logistic(visibleUnits, i, true)
			hiddenUnitsProb.Set(i, prob)
			if rbm.options.UseBinaryHiddenUnits {
				hiddenUnitsBinary.Set(i, rbm.bernoulli(prob))
			}
		}
		// 计算 positive statistics
		for i := 0; i < hiddenDim; i++ {
			for j := 0; j < visibleDim; j++ {
				derivative.Set(i, j, visibleUnits.Get(j)*hiddenUnitsProb.Get(i))
			}
		}

		// 计算CD_n
		for nCD := 0; nCD < rbm.options.NumCD; nCD++ {
			for j := 1; j < visibleDim; j++ {
				var prob float64
				if rbm.options.UseBinaryHiddenUnits {
					prob = rbm.logistic(hiddenUnitsBinary, j, false)
				} else {
					prob = rbm.logistic(hiddenUnitsProb, j, false)
				}
				visibleUnits.Set(j, prob)
			}
			for i := 1; i < hiddenDim; i++ {
				prob := rbm.logistic(visibleUnits, i, true)
				hiddenUnitsProb.Set(i, prob)
				if rbm.options.UseBinaryHiddenUnits {
					hiddenUnitsBinary.Set(i, rbm.bernoulli(prob))
				}
			}
		}

		rbm.lock.RUnlock()

		// 计算 negative statistics
		for i := 0; i < hiddenDim; i++ {
			for j := 0; j < visibleDim; j++ {
				old := derivative.Get(i, j)
				derivative.Set(i, j, old-visibleUnits.Get(j)*hiddenUnitsProb.Get(i))
			}
		}

		out <- derivative
	}
}
예제 #10
0
func TestTrain(t *testing.T) {
	set := data.NewInmemDataset()
	instance1 := new(data.Instance)
	instance1.Features = util.NewVector(4)
	instance1.Features.SetValues([]float64{1, 1, 1, 3})
	instance1.Output = &data.InstanceOutput{Label: 0}
	set.AddInstance(instance1)

	instance2 := new(data.Instance)
	instance2.Features = util.NewVector(4)
	instance2.Features.SetValues([]float64{1, 3, 1, 5})
	instance2.Output = &data.InstanceOutput{Label: 0}
	set.AddInstance(instance2)

	instance3 := new(data.Instance)
	instance3.Features = util.NewVector(4)
	instance3.Features.SetValues([]float64{1, 3, 4, 7})
	instance3.Output = &data.InstanceOutput{Label: 1}
	set.AddInstance(instance3)

	instance4 := new(data.Instance)
	instance4.Features = util.NewVector(4)
	instance4.Features.SetValues([]float64{1, 2, 8, 6})
	instance4.Output = &data.InstanceOutput{Label: 1}
	set.AddInstance(instance4)
	set.Finalize()

	gdTrainerOptions := TrainerOptions{
		Optimizer: optimizer.OptimizerOptions{
			OptimizerName:         "gd",
			RegularizationScheme:  2,
			RegularizationFactor:  1,
			LearningRate:          0.1,
			ConvergingDeltaWeight: 1e-6,
			ConvergingSteps:       3,
			MaxIterations:         0,
			GDBatchSize:           0, // full-bath
		},
	}
	gdTrainer := NewMaxEntClassifierTrainer(gdTrainerOptions)

	lbfgsTrainerOptions := TrainerOptions{
		Optimizer: optimizer.OptimizerOptions{
			OptimizerName:         "lbfgs",
			RegularizationScheme:  2,
			RegularizationFactor:  1,
			LearningRate:          1,
			ConvergingDeltaWeight: 1e-6,
			ConvergingSteps:       3,
			MaxIterations:         0,
		},
	}
	lbfgsTrainer := NewMaxEntClassifierTrainer(lbfgsTrainerOptions)
	lbfgsTrainer.Train(set)

	gdTrainer.Train(set).Write("test.mlf")
	model := LoadModel("test.mlf")
	util.Expect(t, "0", model.Predict(instance1).Label)
	util.Expect(t, "0", model.Predict(instance2).Label)
	util.Expect(t, "1", model.Predict(instance3).Label)
	util.Expect(t, "1", model.Predict(instance4).Label)
}
예제 #11
0
func TestInMemDataset(t *testing.T) {
	set := NewInmemDataset()

	instance1 := new(Instance)
	instance1.Features = util.NewVector(3)
	instance1.Features.SetValues([]float64{1, 2, 3})
	instance1.Output = &InstanceOutput{Label: 1}
	util.Expect(t, "true", set.AddInstance(instance1))

	instance2 := new(Instance)
	instance2.Features = util.NewVector(3)
	instance2.Features.SetValues([]float64{3, 4, 5})
	instance2.Output = &InstanceOutput{Label: 2}
	util.Expect(t, "true", set.AddInstance(instance2))

	instance3 := new(Instance)
	instance3.Features = util.NewSparseVector()
	instance3.Features.SetValues([]float64{3, 4, 5})
	instance3.Output = &InstanceOutput{Label: 0}
	util.Expect(t, "false", set.AddInstance(instance3))

	instance4 := new(Instance)
	instance4.Features = util.NewVector(4)
	instance4.Features.SetValues([]float64{3, 4, 5, 6})
	instance4.Output = &InstanceOutput{Label: 4}
	util.Expect(t, "false", set.AddInstance(instance4))

	instance5 := new(Instance)
	instance5.Features = util.NewVector(3)
	instance5.Features.SetValues([]float64{3, 5, 5})
	util.Expect(t, "false", set.AddInstance(instance5))

	set.Finalize()

	// 检查数据集选项
	util.Expect(t, "false", set.GetOptions().FeatureIsSparse)
	util.Expect(t, "3", set.GetOptions().FeatureDimension)
	util.Expect(t, "true", set.GetOptions().IsSupervisedLearning)
	util.Expect(t, "3", set.GetOptions().NumLabels)

	util.Expect(t, "2", set.NumInstances())

	iter := set.CreateIterator()
	iter.Start()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "1", iter.GetInstance().Features.Get(0))
	util.Expect(t, "2", iter.GetInstance().Features.Get(1))
	util.Expect(t, "3", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "false", iter.End())
	util.Expect(t, "3", iter.GetInstance().Features.Get(0))
	util.Expect(t, "4", iter.GetInstance().Features.Get(1))
	util.Expect(t, "5", iter.GetInstance().Features.Get(2))

	iter.Next()
	util.Expect(t, "true", iter.End())

	iter = set.CreateIterator()
	iter.Start()
	iter.Skip(2)
	util.Expect(t, "true", iter.End())

	util.Expect(t, "2", set.NumInstances())
}