Beispiel #1
0
func SplitFile(dataset *core.DataSet, total, part int) (*core.DataSet, *core.DataSet) {
	train := core.NewDataSet()
	test := core.NewDataSet()
	for i, sample := range dataset.Samples {
		if i%total == part {
			test.AddSample(sample)
		} else {
			train.AddSample(sample)
		}
	}
	return train, test
}
Beispiel #2
0
func main() {
	train_path, _, _, method, params := hector.PrepareParams()
	global, _ := strconv.ParseInt(params["global"], 10, 64)
	profile, _ := params["profile"]
	dataset := core.NewDataSet()
	dataset.Load(train_path, global)

	cv, _ := strconv.ParseInt(params["cv"], 10, 32)
	total := int(cv)

	if profile != "" {
		fmt.Println(profile)
		f, err := os.Create(profile)
		if err != nil {
			fmt.Println("%v", err)
			log.Fatal(err)
		}
		pprof.StartCPUProfile(f)
		defer pprof.StopCPUProfile()
	}

	average_auc := 0.0
	for part := 0; part < total; part++ {
		train, test := SplitFile(dataset, total, part)
		classifier := hector.GetClassifier(method)
		classifier.Init(params)
		auc, _ := hector.AlgorithmRunOnDataSet(classifier, train, test, "", params)
		fmt.Println("AUC:")
		fmt.Println(auc)
		average_auc += auc
		classifier = nil
	}
	fmt.Println(average_auc / float64(total))
}
Beispiel #3
0
func main() {
	train_path, _, _, method, params := hector.PrepareParams()
	global, _ := strconv.ParseInt(params["global"], 10, 64)
	profile, _ := params["profile"]
	dataset := core.NewDataSet()
	dataset.Load(train_path, global)

	cv, _ := strconv.ParseInt(params["cv"], 10, 32)
	total := int(cv)

	if profile != "" {
		f, err := os.Create(profile)
		if err != nil {
			log.Fatal(err)
		}
		pprof.StartCPUProfile(f)
		defer pprof.StopCPUProfile()
	}

	average_accuracy := 0.0
	for part := 0; part < total; part++ {
		train, test := SplitFile(dataset, total, part)
		classifier := hector.GetMutliClassClassifier(method)
		classifier.Init(params)
		accuracy := hector.MultiClassRunOnDataSet(classifier, train, test, "", params)
		fmt.Println("accuracy : ", accuracy)
		average_accuracy += accuracy
		classifier = nil
		train = nil
		test = nil
		runtime.GC()
	}
	fmt.Println(average_accuracy / float64(total))
}
Beispiel #4
0
func MultiClassRun(classifier algo.MultiClassClassifier, train_path string, test_path string, pred_path string, params map[string]string) (float64, error) {
	global, _ := strconv.ParseInt(params["global"], 10, 64)
	train_dataset := core.NewDataSet()

	err := train_dataset.Load(train_path, global)

	if err != nil {
		return 0.5, err
	}

	test_dataset := core.NewDataSet()
	err = test_dataset.Load(test_path, global)
	if err != nil {
		return 0.5, err
	}
	classifier.Init(params)
	accuracy := MultiClassRunOnDataSet(classifier, train_dataset, test_dataset, pred_path, params)

	return accuracy, nil
}
Beispiel #5
0
func AlgorithmRun(classifier algo.Classifier, train_path string, test_path string, pred_path string, params map[string]string) (float64, []*eval.LabelPrediction, error) {
	global, _ := strconv.ParseInt(params["global"], 10, 64)
	train_dataset := core.NewDataSet()

	err := train_dataset.Load(train_path, global)

	if err != nil {
		return 0.5, nil, err
	}

	test_dataset := core.NewDataSet()
	err = test_dataset.Load(test_path, global)
	if err != nil {
		return 0.5, nil, err
	}
	classifier.Init(params)
	auc, predictions := AlgorithmRunOnDataSet(classifier, train_dataset, test_dataset, pred_path, params)

	return auc, predictions, nil
}
Beispiel #6
0
func constructTrainingData(records []creditRecord, borrower2iuser []int, protos []*hector.Sample) *hector.DataSet {
	data := hector.NewDataSet()
	for borrower, record := range records {
		for i := 0; i < record.borrowed; i++ {
			s := protos[borrower2iuser[borrower]].Clone()
			if i < record.returned {
				s.Label = 1
			} else {
				s.Label = 0
			}
			data.AddSample(s)
		}
	}
	return data
}
Beispiel #7
0
func main() {
	path := flag.String("input", "", "path of dataset")
	flag.Parse()

	ds := core.NewDataSet()
	ds.Load(*path, -1)
	iv := core.InformationValue(ds)
	fs := make(FeatureValueList, 0, len(iv))
	for f, v := range iv {
		fs = append(fs, FeatureValue{Name: ds.FeatureNameIdMap[f], Value: v})
	}
	sort.Sort(fs)
	for _, f := range fs {
		fmt.Printf("%s\t%v\n", f.Name, f.Value)
	}
}
Beispiel #8
0
func (c *L1VM) Train(dataset *core.DataSet) {
	c.sv = []*core.Vector{}
	kernel_dataset := core.NewDataSet()

	positive := []int{}
	negative := []int{}
	for i, si := range dataset.Samples {
		if si.Label > 0.0 {
			positive = append(positive, i)
		} else {
			negative = append(negative, i)
		}
	}

	perm_positive := rand.Perm(len(positive))

	for i, k := range perm_positive {
		if i > c.count {
			break
		}
		c.sv = append(c.sv, dataset.Samples[positive[k]].GetFeatureVector())
	}

	perm_negative := rand.Perm(len(negative))

	for i, k := range perm_negative {
		if i > c.count {
			break
		}
		c.sv = append(c.sv, dataset.Samples[negative[k]].GetFeatureVector())
	}

	for _, si := range dataset.Samples {
		xi := si.GetFeatureVector()
		tsample := core.NewSample()
		tsample.Label = si.Label
		for j, xj := range c.sv {
			tsample.AddFeature(core.Feature{Id: int64(j), Value: RBFKernel(xi, xj, c.radius)})
		}
		kernel_dataset.AddSample(tsample)
	}

	c.ftrl.Train(kernel_dataset)
}
Beispiel #9
0
func MultiClassTest(classifier algo.MultiClassClassifier, test_path string, pred_path string, params map[string]string) (float64, error) {
	global, _ := strconv.ParseInt(params["global"], 10, 64)

	model_path, _ := params["model"]
	classifier.Init(params)
	if model_path != "" {
		classifier.LoadModel(model_path)
	} else {
		return 0.0, nil
	}

	test_dataset := core.NewDataSet()
	err := test_dataset.Load(test_path, global)
	if err != nil {
		return 0.0, err
	}

	accuracy := MultiClassRunOnDataSet(classifier, nil, test_dataset, pred_path, params)

	return accuracy, nil
}
Beispiel #10
0
func MultiClassTrain(classifier algo.MultiClassClassifier, train_path string, params map[string]string) error {
	global, _ := strconv.ParseInt(params["global"], 10, 64)
	train_dataset := core.NewDataSet()

	err := train_dataset.Load(train_path, global)

	if err != nil {
		return err
	}

	classifier.Init(params)
	classifier.Train(train_dataset)

	model_path, _ := params["model"]

	if model_path != "" {
		classifier.SaveModel(model_path)
	}

	return nil
}
Beispiel #11
0
func AlgorithmTest(classifier algo.Classifier, test_path string, pred_path string, params map[string]string) (float64, []*eval.LabelPrediction, error) {
	global, _ := strconv.ParseInt(params["global"], 10, 64)

	model_path, _ := params["model"]
	classifier.Init(params)
	if model_path != "" {
		classifier.LoadModel(model_path)
	} else {
		return 0.0, nil, nil
	}

	test_dataset := core.NewDataSet()
	err := test_dataset.Load(test_path, global)
	if err != nil {
		return 0.0, nil, err
	}

	auc, predictions := AlgorithmRunOnDataSet(classifier, nil, test_dataset, pred_path, params)

	return auc, predictions, nil
}
Beispiel #12
0
func EncodeLabelAction(e *core.LabelEncoder, data_path string) {

	dataset := core.NewDataSet()
	err := dataset.Load(data_path, -1)

	if err != nil {
		log.Fatal(err)
		return
	}

	encoded_label_dataset := e.TransformDataset(dataset)
	var output_file *os.File

	output_file, _ = os.Create(data_path + ".hector")
	for _, sample := range encoded_label_dataset.Samples {
		output_file.WriteString(string(sample.ToString(false)) + "\n")
	}

	if output_file != nil {
		defer output_file.Close()
	}
}
Beispiel #13
0
func main() {
	train_path, test_path, pred_path, _, params := hector.PrepareParams()
	total := 5
	methods := []string{"ftrl", "fm"}
	all_methods_predictions := [][]*eval.LabelPrediction{}
	all_methods_test_predictions := [][]*eval.LabelPrediction{}
	for _, method := range methods {
		fmt.Println(method)
		average_auc := 0.0
		all_predictions := []*eval.LabelPrediction{}
		for part := 0; part < total; part++ {
			train, test, _ := SplitFile(train_path, total, part)
			classifier := hector.GetClassifier(method)

			auc, predictions, _ := hector.AlgorithmRun(classifier, train, test, "", params)
			fmt.Println("AUC:")
			fmt.Println(auc)
			average_auc += auc
			os.Remove(train)
			os.Remove(test)
			classifier = nil
			for _, pred := range predictions {
				all_predictions = append(all_predictions, pred)
			}
		}
		all_methods_predictions = append(all_methods_predictions, all_predictions)
		fmt.Println(average_auc / float64(total))

		classifier := hector.GetClassifier(method)
		fmt.Println(test_path)
		_, test_predictions, _ := hector.AlgorithmRun(classifier, train_path, test_path, "", params)
		all_methods_test_predictions = append(all_methods_test_predictions, test_predictions)
	}

	var wait sync.WaitGroup
	wait.Add(2)
	dataset := core.NewDataSet()
	go func() {
		for i, _ := range all_methods_predictions[0] {
			sample := core.NewSample()
			sample.Label = all_methods_predictions[0][i].Label
			for j, _ := range all_methods_predictions {
				feature := core.Feature{Id: int64(j), Value: all_methods_predictions[j][i].Prediction}
				sample.AddFeature(feature)
			}
			dataset.Samples <- sample
		}
		close(dataset.Samples)
		wait.Done()
	}()

	ensembler := lr.LinearRegression{}
	go func() {
		ensembler.Init(params)
		ensembler.Train(dataset)
		wait.Done()
	}()
	wait.Wait()

	fmt.Println(ensembler.Model)

	wait.Add(2)
	test_dataset := hector.NewDataSet()
	go func() {
		for i, _ := range all_methods_test_predictions[0] {
			sample := hector.NewSample()
			sample.Label = all_methods_test_predictions[0][i].Prediction
			for j, _ := range all_methods_test_predictions {
				feature := hector.Feature{Id: int64(j), Value: all_methods_test_predictions[j][i].Prediction}
				sample.AddFeature(feature)
			}
			test_dataset.Samples <- sample
		}
		close(test_dataset.Samples)
		wait.Done()
	}()

	go func() {
		pred_file, _ := os.Create(test_path + ".out")
		for sample := range test_dataset.Samples {
			prediction := sample.Label //ensembler.Predict(sample)
			pred_file.WriteString(strconv.FormatFloat(prediction, 'g', 5, 64) + "\n")
		}
		defer pred_file.Close()
		wait.Done()
	}()
	wait.Wait()
}