func main() { train_path, _, _, method, params := hector.PrepareParams() global, _ := strconv.ParseInt(params["global"], 10, 64) profile, _ := params["profile"] dataset := core.NewDataSet() dataset.Load(train_path, global) cv, _ := strconv.ParseInt(params["cv"], 10, 32) total := int(cv) if profile != "" { fmt.Println(profile) f, err := os.Create(profile) if err != nil { fmt.Println("%v", err) log.Fatal(err) } pprof.StartCPUProfile(f) defer pprof.StopCPUProfile() } average_auc := 0.0 for part := 0; part < total; part++ { train, test := SplitFile(dataset, total, part) classifier := hector.GetClassifier(method) classifier.Init(params) auc, _ := hector.AlgorithmRunOnDataSet(classifier, train, test, "", params) fmt.Println("AUC:") fmt.Println(auc) average_auc += auc classifier = nil } fmt.Println(average_auc / float64(total)) }
func main() { _, _, _, method, params := hector.PrepareParams() ch := &ClassifierHandler{ classifier: hector.GetClassifier(method), } model, ok := params["model"] if !ok { log.Fatalln("please input model file") } ch.classifier.LoadModel(model) http.Handle("/predict", ch) err := http.ListenAndServe(":"+params["port"], nil) if err != nil { log.Fatal(err) } }
func main() { train, test, pred, method, params := hector.PrepareParams() action, _ := params["action"] classifier := hector.GetClassifier(method) if action == "" { auc, _, _ := hector.AlgorithmRun(classifier, train, test, pred, params) fmt.Println("AUC:") fmt.Println(auc) } else if action == "train" { hector.AlgorithmTrain(classifier, train, params) } else if action == "test" { auc, _, _ := hector.AlgorithmTest(classifier, test, pred, params) fmt.Println("AUC:") fmt.Println(auc) } }
func main() { train_path, test_path, pred_path, _, params := hector.PrepareParams() total := 5 methods := []string{"ftrl", "fm"} all_methods_predictions := [][]*eval.LabelPrediction{} all_methods_test_predictions := [][]*eval.LabelPrediction{} for _, method := range methods { fmt.Println(method) average_auc := 0.0 all_predictions := []*eval.LabelPrediction{} for part := 0; part < total; part++ { train, test, _ := SplitFile(train_path, total, part) classifier := hector.GetClassifier(method) auc, predictions, _ := hector.AlgorithmRun(classifier, train, test, "", params) fmt.Println("AUC:") fmt.Println(auc) average_auc += auc os.Remove(train) os.Remove(test) classifier = nil for _, pred := range predictions { all_predictions = append(all_predictions, pred) } } all_methods_predictions = append(all_methods_predictions, all_predictions) fmt.Println(average_auc / float64(total)) classifier := hector.GetClassifier(method) fmt.Println(test_path) _, test_predictions, _ := hector.AlgorithmRun(classifier, train_path, test_path, "", params) all_methods_test_predictions = append(all_methods_test_predictions, test_predictions) } var wait sync.WaitGroup wait.Add(2) dataset := core.NewDataSet() go func() { for i, _ := range all_methods_predictions[0] { sample := core.NewSample() sample.Label = all_methods_predictions[0][i].Label for j, _ := range all_methods_predictions { feature := core.Feature{Id: int64(j), Value: all_methods_predictions[j][i].Prediction} sample.AddFeature(feature) } dataset.Samples <- sample } close(dataset.Samples) wait.Done() }() ensembler := lr.LinearRegression{} go func() { ensembler.Init(params) ensembler.Train(dataset) wait.Done() }() wait.Wait() fmt.Println(ensembler.Model) wait.Add(2) test_dataset := hector.NewDataSet() go func() { for i, _ := range all_methods_test_predictions[0] { sample := hector.NewSample() sample.Label = all_methods_test_predictions[0][i].Prediction for j, _ := range all_methods_test_predictions { feature := hector.Feature{Id: int64(j), Value: all_methods_test_predictions[j][i].Prediction} sample.AddFeature(feature) } test_dataset.Samples <- sample } close(test_dataset.Samples) wait.Done() }() go func() { pred_file, _ := os.Create(test_path + ".out") for sample := range test_dataset.Samples { prediction := sample.Label //ensembler.Predict(sample) pred_file.WriteString(strconv.FormatFloat(prediction, 'g', 5, 64) + "\n") } defer pred_file.Close() wait.Done() }() wait.Wait() }
func NewModel(driversID int) algo.Classifier { params := make(map[string]string) params["steps"] = "30" params["max-depth"] = "7" params["min-leaf-size"] = "10" params["tree-count"] = "10" params["learning-rate"] = "0.0001" params["learning-rate-discount"] = "1.0" params["regularization"] = "0.0001" params["gini"] = "1.0" params["hidden"] = "15" params["k"] = "10" params["feature-count"] = "9.0" params["dt-sample-ratio"] = "1.0" driversModels[driversID] = hector.GetClassifier("rf") dataSet, _ := NewDataSetSample("716new.csv") //dataSetTest, statusID := NewDataSetSample("716dec-3.csv") driversModels[driversID].Init(params) log.Println("Train") //train, test := SplitFile(dataSet, 2, 0) //auc, _ := hector.AlgorithmRunOnDataSet(driversModels[driversID[i]], train, test, "", params) //log.Print("AUC: ") //log.Println(auc) driversModels[driversID].Train(dataSet) log.Println("save") driversModels[driversID].SaveModel("716") return driversModels[driversID] //driversModels[driversID[i]].LoadModel("6116") /*log.Println("predict... ") //sample := SampleTest() predictAccept := 0.0 predictAcceptTotal := 0.0 predictReject := 0.0 predictRejectTotal := 0.0 countR := 0 countA := 0 for j := 0; j < len(dataSetTest.Samples); j++ { sample := dataSetTest.Samples[j] predict := driversModels[driversID[i]].Predict(sample) if statusID[j] == "4" { predictAcceptTotal++ predictAccept += predict if predict > 0.7 { countA++ log.Println("A:", predict) } } else { predictReject += predict predictRejectTotal++ if predict > 0.7 { countR++ log.Println("R:", predict) } } } log.Print("Accept: ") log.Println(predictAccept / predictAcceptTotal) log.Println(countA) log.Print("Reject: ") log.Println(predictReject / predictRejectTotal) log.Println(countR)*/ }