Beispiel #1
0
func (c *ClassifierHandler) ServeHTTP(w http.ResponseWriter,
	req *http.Request) {
	sample := core.NewSample()
	if req.Method != "POST" {
		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
		return
	}
	features := req.FormValue("features")
	if len(features) == 0 {
		http.Error(w, "need input features", http.StatusInternalServerError)
		return
	}
	fs := make(map[string]float64)
	err := json.Unmarshal([]byte(features), &fs)
	if err != nil {
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}
	for k, v := range fs {
		f := core.Feature{
			Id:    util.Hash(k),
			Value: v,
		}
		sample.AddFeature(f)
	}
	p := c.classifier.Predict(sample)
	output, err := json.Marshal(map[string]interface{}{
		"prediction": p,
	})
	if err != nil {
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}
	fmt.Fprint(w, output)
}
Beispiel #2
0
func (c *L1VM) PredictVector(x *core.Vector) float64 {
	s := core.NewSample()
	for k, xs := range c.sv {

		s.AddFeature(core.Feature{Id: int64(k), Value: RBFKernel(xs, x, c.radius)})
	}
	return c.ftrl.Predict(s)
}
Beispiel #3
0
func NewSample(fs map[string]float64) *core.Sample {
	sample := core.NewSample()
	for k, v := range fs {
		f := core.Feature{
			Id:    util.Hash(k),
			Value: v,
		}
		sample.AddFeature(f)
	}
	return sample
}
Beispiel #4
0
func (c *L1VM) Train(dataset *core.DataSet) {
	c.sv = []*core.Vector{}
	kernel_dataset := core.NewDataSet()

	positive := []int{}
	negative := []int{}
	for i, si := range dataset.Samples {
		if si.Label > 0.0 {
			positive = append(positive, i)
		} else {
			negative = append(negative, i)
		}
	}

	perm_positive := rand.Perm(len(positive))

	for i, k := range perm_positive {
		if i > c.count {
			break
		}
		c.sv = append(c.sv, dataset.Samples[positive[k]].GetFeatureVector())
	}

	perm_negative := rand.Perm(len(negative))

	for i, k := range perm_negative {
		if i > c.count {
			break
		}
		c.sv = append(c.sv, dataset.Samples[negative[k]].GetFeatureVector())
	}

	for _, si := range dataset.Samples {
		xi := si.GetFeatureVector()
		tsample := core.NewSample()
		tsample.Label = si.Label
		for j, xj := range c.sv {
			tsample.AddFeature(core.Feature{Id: int64(j), Value: RBFKernel(xi, xj, c.radius)})
		}
		kernel_dataset.AddSample(tsample)
	}

	c.ftrl.Train(kernel_dataset)
}
Beispiel #5
0
func constructFeatureVectors(iusers []internetUser) ([]*hector.Sample, []string) {
	protos := make([]*hector.Sample, len(iusers))
	f2id := make(map[string]int)
	id2f := make([]string, 0)

	for i, u := range iusers {
		protos[i] = hector.NewSample()

		for _, f := range u {
			id, exists := f2id[f]
			if !exists {
				id = len(id2f)
				id2f = append(id2f, f)
				f2id[f] = id
			}
			protos[i].AddFeature(hector.Feature{int64(id), 1.0})
		}
	}
	return protos, id2f
}
Beispiel #6
0
func main() {
	train_path, test_path, pred_path, _, params := hector.PrepareParams()
	total := 5
	methods := []string{"ftrl", "fm"}
	all_methods_predictions := [][]*eval.LabelPrediction{}
	all_methods_test_predictions := [][]*eval.LabelPrediction{}
	for _, method := range methods {
		fmt.Println(method)
		average_auc := 0.0
		all_predictions := []*eval.LabelPrediction{}
		for part := 0; part < total; part++ {
			train, test, _ := SplitFile(train_path, total, part)
			classifier := hector.GetClassifier(method)

			auc, predictions, _ := hector.AlgorithmRun(classifier, train, test, "", params)
			fmt.Println("AUC:")
			fmt.Println(auc)
			average_auc += auc
			os.Remove(train)
			os.Remove(test)
			classifier = nil
			for _, pred := range predictions {
				all_predictions = append(all_predictions, pred)
			}
		}
		all_methods_predictions = append(all_methods_predictions, all_predictions)
		fmt.Println(average_auc / float64(total))

		classifier := hector.GetClassifier(method)
		fmt.Println(test_path)
		_, test_predictions, _ := hector.AlgorithmRun(classifier, train_path, test_path, "", params)
		all_methods_test_predictions = append(all_methods_test_predictions, test_predictions)
	}

	var wait sync.WaitGroup
	wait.Add(2)
	dataset := core.NewDataSet()
	go func() {
		for i, _ := range all_methods_predictions[0] {
			sample := core.NewSample()
			sample.Label = all_methods_predictions[0][i].Label
			for j, _ := range all_methods_predictions {
				feature := core.Feature{Id: int64(j), Value: all_methods_predictions[j][i].Prediction}
				sample.AddFeature(feature)
			}
			dataset.Samples <- sample
		}
		close(dataset.Samples)
		wait.Done()
	}()

	ensembler := lr.LinearRegression{}
	go func() {
		ensembler.Init(params)
		ensembler.Train(dataset)
		wait.Done()
	}()
	wait.Wait()

	fmt.Println(ensembler.Model)

	wait.Add(2)
	test_dataset := hector.NewDataSet()
	go func() {
		for i, _ := range all_methods_test_predictions[0] {
			sample := hector.NewSample()
			sample.Label = all_methods_test_predictions[0][i].Prediction
			for j, _ := range all_methods_test_predictions {
				feature := hector.Feature{Id: int64(j), Value: all_methods_test_predictions[j][i].Prediction}
				sample.AddFeature(feature)
			}
			test_dataset.Samples <- sample
		}
		close(test_dataset.Samples)
		wait.Done()
	}()

	go func() {
		pred_file, _ := os.Create(test_path + ".out")
		for sample := range test_dataset.Samples {
			prediction := sample.Label //ensembler.Predict(sample)
			pred_file.WriteString(strconv.FormatFloat(prediction, 'g', 5, 64) + "\n")
		}
		defer pred_file.Close()
		wait.Done()
	}()
	wait.Wait()
}
Beispiel #7
0
func NewDataSetSample(path string) (*core.DataSet, []string) {
	samples := []*core.Sample{}
	statusID := []string{}
	f, _ := os.Open(path)
	r := csv.NewReader(bufio.NewReader(f))
	r.Read()
	for {
		record, err := r.Read()
		// Stop at EOF.
		if err == io.EOF {
			break
		}
		sample := core.NewSample()
		fs := make(map[string]float64)
		log.Println(record[2])
		statusID = append(statusID, record[2])
		switch record[2] {
		case "4", "2":
			sample.Label = 1
		default:
			sample.Label = 0
		}
		log.Println(record[10])
		if sample.Label == 0 && record[10] != "" && record[2] != "3" {
			sample.Label = 1
		}
		log.Println("hour " + record[4])
		fs["hour"], _ = strconv.ParseFloat(record[4], 64)

		log.Println("day_of_week " + record[5])
		fs["day_of_week"], _ = strconv.ParseFloat(record[5], 64)

		log.Println("distance_from_order_on_creation " + record[6])
		fs["distance_from_order_on_creation"], _ = strconv.ParseFloat(record[6], 64)

		log.Println("driver_location_key " + record[7])
		fs["driver_location_key"], _ = strconv.ParseFloat(record[7], 64)

		log.Println("driver_latitude " + record[8])
		fs["driver_latitude"], _ = strconv.ParseFloat(record[8], 64)

		log.Println("driver_longitude " + record[9])
		fs["driver_longitude"], _ = strconv.ParseFloat(record[9], 64)

		log.Println("origin_location_key " + record[13])
		fs["origin_location_key"], _ = strconv.ParseFloat(record[13], 64)

		log.Println("origin_latitude " + record[14])
		fs["origin_latitude"], _ = strconv.ParseFloat(record[14], 64)

		log.Println("origin_longitude " + record[15])
		fs["origin_longitude"], _ = strconv.ParseFloat(record[15], 64)

		for k, v := range fs {
			f := core.Feature{
				Id:    util.Hash(k),
				Value: v,
			}
			sample.AddFeature(f)
		}
		samples = append(samples, sample)
	}
	d := &core.DataSet{
		Samples: samples,
	}
	return d, statusID
}