func (c *ClassifierHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { sample := core.NewSample() if req.Method != "POST" { http.Error(w, "method not allowed", http.StatusMethodNotAllowed) return } features := req.FormValue("features") if len(features) == 0 { http.Error(w, "need input features", http.StatusInternalServerError) return } fs := make(map[string]float64) err := json.Unmarshal([]byte(features), &fs) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } for k, v := range fs { f := core.Feature{ Id: util.Hash(k), Value: v, } sample.AddFeature(f) } p := c.classifier.Predict(sample) output, err := json.Marshal(map[string]interface{}{ "prediction": p, }) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } fmt.Fprint(w, output) }
func (c *L1VM) PredictVector(x *core.Vector) float64 { s := core.NewSample() for k, xs := range c.sv { s.AddFeature(core.Feature{Id: int64(k), Value: RBFKernel(xs, x, c.radius)}) } return c.ftrl.Predict(s) }
func NewSample(fs map[string]float64) *core.Sample { sample := core.NewSample() for k, v := range fs { f := core.Feature{ Id: util.Hash(k), Value: v, } sample.AddFeature(f) } return sample }
func (c *L1VM) Train(dataset *core.DataSet) { c.sv = []*core.Vector{} kernel_dataset := core.NewDataSet() positive := []int{} negative := []int{} for i, si := range dataset.Samples { if si.Label > 0.0 { positive = append(positive, i) } else { negative = append(negative, i) } } perm_positive := rand.Perm(len(positive)) for i, k := range perm_positive { if i > c.count { break } c.sv = append(c.sv, dataset.Samples[positive[k]].GetFeatureVector()) } perm_negative := rand.Perm(len(negative)) for i, k := range perm_negative { if i > c.count { break } c.sv = append(c.sv, dataset.Samples[negative[k]].GetFeatureVector()) } for _, si := range dataset.Samples { xi := si.GetFeatureVector() tsample := core.NewSample() tsample.Label = si.Label for j, xj := range c.sv { tsample.AddFeature(core.Feature{Id: int64(j), Value: RBFKernel(xi, xj, c.radius)}) } kernel_dataset.AddSample(tsample) } c.ftrl.Train(kernel_dataset) }
func constructFeatureVectors(iusers []internetUser) ([]*hector.Sample, []string) { protos := make([]*hector.Sample, len(iusers)) f2id := make(map[string]int) id2f := make([]string, 0) for i, u := range iusers { protos[i] = hector.NewSample() for _, f := range u { id, exists := f2id[f] if !exists { id = len(id2f) id2f = append(id2f, f) f2id[f] = id } protos[i].AddFeature(hector.Feature{int64(id), 1.0}) } } return protos, id2f }
func main() { train_path, test_path, pred_path, _, params := hector.PrepareParams() total := 5 methods := []string{"ftrl", "fm"} all_methods_predictions := [][]*eval.LabelPrediction{} all_methods_test_predictions := [][]*eval.LabelPrediction{} for _, method := range methods { fmt.Println(method) average_auc := 0.0 all_predictions := []*eval.LabelPrediction{} for part := 0; part < total; part++ { train, test, _ := SplitFile(train_path, total, part) classifier := hector.GetClassifier(method) auc, predictions, _ := hector.AlgorithmRun(classifier, train, test, "", params) fmt.Println("AUC:") fmt.Println(auc) average_auc += auc os.Remove(train) os.Remove(test) classifier = nil for _, pred := range predictions { all_predictions = append(all_predictions, pred) } } all_methods_predictions = append(all_methods_predictions, all_predictions) fmt.Println(average_auc / float64(total)) classifier := hector.GetClassifier(method) fmt.Println(test_path) _, test_predictions, _ := hector.AlgorithmRun(classifier, train_path, test_path, "", params) all_methods_test_predictions = append(all_methods_test_predictions, test_predictions) } var wait sync.WaitGroup wait.Add(2) dataset := core.NewDataSet() go func() { for i, _ := range all_methods_predictions[0] { sample := core.NewSample() sample.Label = all_methods_predictions[0][i].Label for j, _ := range all_methods_predictions { feature := core.Feature{Id: int64(j), Value: all_methods_predictions[j][i].Prediction} sample.AddFeature(feature) } dataset.Samples <- sample } close(dataset.Samples) wait.Done() }() ensembler := lr.LinearRegression{} go func() { ensembler.Init(params) ensembler.Train(dataset) wait.Done() }() wait.Wait() fmt.Println(ensembler.Model) wait.Add(2) test_dataset := hector.NewDataSet() go func() { for i, _ := range all_methods_test_predictions[0] { sample := hector.NewSample() sample.Label = all_methods_test_predictions[0][i].Prediction for j, _ := range all_methods_test_predictions { feature := hector.Feature{Id: int64(j), Value: all_methods_test_predictions[j][i].Prediction} sample.AddFeature(feature) } test_dataset.Samples <- sample } close(test_dataset.Samples) wait.Done() }() go func() { pred_file, _ := os.Create(test_path + ".out") for sample := range test_dataset.Samples { prediction := sample.Label //ensembler.Predict(sample) pred_file.WriteString(strconv.FormatFloat(prediction, 'g', 5, 64) + "\n") } defer pred_file.Close() wait.Done() }() wait.Wait() }
func NewDataSetSample(path string) (*core.DataSet, []string) { samples := []*core.Sample{} statusID := []string{} f, _ := os.Open(path) r := csv.NewReader(bufio.NewReader(f)) r.Read() for { record, err := r.Read() // Stop at EOF. if err == io.EOF { break } sample := core.NewSample() fs := make(map[string]float64) log.Println(record[2]) statusID = append(statusID, record[2]) switch record[2] { case "4", "2": sample.Label = 1 default: sample.Label = 0 } log.Println(record[10]) if sample.Label == 0 && record[10] != "" && record[2] != "3" { sample.Label = 1 } log.Println("hour " + record[4]) fs["hour"], _ = strconv.ParseFloat(record[4], 64) log.Println("day_of_week " + record[5]) fs["day_of_week"], _ = strconv.ParseFloat(record[5], 64) log.Println("distance_from_order_on_creation " + record[6]) fs["distance_from_order_on_creation"], _ = strconv.ParseFloat(record[6], 64) log.Println("driver_location_key " + record[7]) fs["driver_location_key"], _ = strconv.ParseFloat(record[7], 64) log.Println("driver_latitude " + record[8]) fs["driver_latitude"], _ = strconv.ParseFloat(record[8], 64) log.Println("driver_longitude " + record[9]) fs["driver_longitude"], _ = strconv.ParseFloat(record[9], 64) log.Println("origin_location_key " + record[13]) fs["origin_location_key"], _ = strconv.ParseFloat(record[13], 64) log.Println("origin_latitude " + record[14]) fs["origin_latitude"], _ = strconv.ParseFloat(record[14], 64) log.Println("origin_longitude " + record[15]) fs["origin_longitude"], _ = strconv.ParseFloat(record[15], 64) for k, v := range fs { f := core.Feature{ Id: util.Hash(k), Value: v, } sample.AddFeature(f) } samples = append(samples, sample) } d := &core.DataSet{ Samples: samples, } return d, statusID }