Example #1
0
func (c *ClassifierHandler) ServeHTTP(w http.ResponseWriter,
	req *http.Request) {
	sample := core.NewSample()
	if req.Method != "POST" {
		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
		return
	}
	features := req.FormValue("features")
	if len(features) == 0 {
		http.Error(w, "need input features", http.StatusInternalServerError)
		return
	}
	fs := make(map[string]float64)
	err := json.Unmarshal([]byte(features), &fs)
	if err != nil {
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}
	for k, v := range fs {
		f := core.Feature{
			Id:    util.Hash(k),
			Value: v,
		}
		sample.AddFeature(f)
	}
	p := c.classifier.Predict(sample)
	output, err := json.Marshal(map[string]interface{}{
		"prediction": p,
	})
	if err != nil {
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}
	fmt.Fprint(w, output)
}
Example #2
0
func (d *RawDataSet) ToDataSet(splits map[string][]float64, combinations []CombinedFeature) *DataSet {
	out_data := NewDataSet()
	fm := make(map[string]int64)
	for _, sample := range d.Samples {
		out_sample := NewSample()
		out_sample.Label = sample.Label
		if splits != nil {
			for fkey_str, fvalue_str := range sample.Features {
				fkey := ""
				fvalue := 0.0
				if GetFeatureType(fkey_str) == FeatureTypeEnum.CONTINUOUS_FEATURE {
					split, ok := splits[fkey_str]
					if ok {
						cat := FindCategory(split, util.ParseFloat64(fvalue_str))
						fkey = fkey_str + "_" + strconv.FormatInt(int64(cat), 10)
						fvalue = 1.0
					} else {
						fvalue = util.ParseFloat64(fvalue_str)
					}
					fm[fkey] = util.Hash(fkey)
					out_sample.AddFeature(Feature{Id: util.Hash(fkey), Value: fvalue})
				}
			}
		}
		for _, combination := range combinations {
			fkey := ""
			for _, ckey := range combination {
				fkey += ckey
				fkey += ":"
				fkey += sample.GetFeatureValue(ckey)
				fkey += "_"
			}
			fm[fkey] = util.Hash(fkey)
			out_sample.AddFeature(Feature{Id: util.Hash(fkey), Value: 1.0})
		}
		out_data.AddSample(out_sample)
	}
	f, _ := os.Create("features.tsv")
	defer f.Close()
	w := bufio.NewWriter(f)
	for k, v := range fm {
		w.WriteString(k + "\t" + strconv.FormatInt(v, 10) + "\n")
	}

	return out_data
}
Example #3
0
func (d *DataSet) Load(path string, global_bias_feature_id int64) error {
	fm := make(map[string]int64)
	file, err := os.Open(path)
	if err != nil {
		return err
	}
	defer file.Close()

	scanner := bufio.NewScanner(file)

	for scanner.Scan() {
		line := strings.Replace(scanner.Text(), " ", "\t", -1)
		tks := strings.Split(line, "\t")
		sample := Sample{Features: []Feature{}, Label: 0}
		for i, tk := range tks {
			if i == 0 {
				label, _ := strconv.Atoi(tk)
				sample.Label = label
				if d.max_label < label {
					d.max_label = label
				}
			} else {
				kv := strings.Split(tk, ":")
				feature_id, err := strconv.ParseInt(kv[0], 10, 64)
				if err != nil {
					feature_id = util.Hash(kv[0])
					fm[kv[0]] = feature_id
				}
				d.FeatureNameIdMap[feature_id] = kv[0]
				feature_value := 1.0
				if len(kv) > 1 {
					feature_value, err = strconv.ParseFloat(kv[1], 64)
					if err != nil {
						break
					}
				}
				feature := Feature{feature_id, feature_value}
				sample.Features = append(sample.Features, feature)
			}
		}
		if global_bias_feature_id >= 0 {
			sample.Features = append(sample.Features, Feature{global_bias_feature_id, 1.0})
		}
		d.AddSample(&sample)
	}
	if scanner.Err() != nil {
		return scanner.Err()
	}
	f, _ := os.Create("features.tsv")
	defer f.Close()
	w := bufio.NewWriter(f)
	for k, v := range fm {
		w.WriteString(k + "\t" + strconv.FormatInt(v, 10) + "\n")
	}

	log.Println("dataset size : ", len(d.Samples))
	return nil
}
Example #4
0
func NewSample(fs map[string]float64) *core.Sample {
	sample := core.NewSample()
	for k, v := range fs {
		f := core.Feature{
			Id:    util.Hash(k),
			Value: v,
		}
		sample.AddFeature(f)
	}
	return sample
}
Example #5
0
func (d *RawDataSet) ToDataSet(splits map[string][]float64, combinations []CombinedFeature) *DataSet {
	out_data := NewDataSet()
	for _, sample := range d.Samples {
		out_sample := NewSample()
		out_sample.Label = sample.Label
		if splits != nil {
			for fkey_str, fvalue_str := range sample.Features {
				fkey := ""
				fvalue := 0.0
				if GetFeatureType(fkey_str) == FeatureTypeEnum.CONTINUOUS_FEATURE {
					split, ok := splits[fkey_str]
					if ok {
						cat := FindCategory(split, util.ParseFloat64(fvalue_str))
						fkey = fkey_str + "_" + strconv.FormatInt(int64(cat), 10)
						fvalue = 1.0
					} else {
						fvalue = util.ParseFloat64(fvalue_str)
					}
					out_sample.AddFeature(Feature{Id: util.Hash(fkey), Value: fvalue})
				}
			}
		}
		for _, combination := range combinations {
			fkey := ""
			for _, ckey := range combination {
				fkey += ckey
				fkey += ":"
				fkey += sample.GetFeatureValue(ckey)
				fkey += "_"
			}
			out_sample.AddFeature(Feature{Id: util.Hash(fkey), Value: 1.0})
		}
		out_data.AddSample(out_sample)
	}
	return out_data
}
Example #6
0
func NewDataSetSample(path string) (*core.DataSet, []string) {
	samples := []*core.Sample{}
	statusID := []string{}
	f, _ := os.Open(path)
	r := csv.NewReader(bufio.NewReader(f))
	r.Read()
	for {
		record, err := r.Read()
		// Stop at EOF.
		if err == io.EOF {
			break
		}
		sample := core.NewSample()
		fs := make(map[string]float64)
		log.Println(record[2])
		statusID = append(statusID, record[2])
		switch record[2] {
		case "4", "2":
			sample.Label = 1
		default:
			sample.Label = 0
		}
		log.Println(record[10])
		if sample.Label == 0 && record[10] != "" && record[2] != "3" {
			sample.Label = 1
		}
		log.Println("hour " + record[4])
		fs["hour"], _ = strconv.ParseFloat(record[4], 64)

		log.Println("day_of_week " + record[5])
		fs["day_of_week"], _ = strconv.ParseFloat(record[5], 64)

		log.Println("distance_from_order_on_creation " + record[6])
		fs["distance_from_order_on_creation"], _ = strconv.ParseFloat(record[6], 64)

		log.Println("driver_location_key " + record[7])
		fs["driver_location_key"], _ = strconv.ParseFloat(record[7], 64)

		log.Println("driver_latitude " + record[8])
		fs["driver_latitude"], _ = strconv.ParseFloat(record[8], 64)

		log.Println("driver_longitude " + record[9])
		fs["driver_longitude"], _ = strconv.ParseFloat(record[9], 64)

		log.Println("origin_location_key " + record[13])
		fs["origin_location_key"], _ = strconv.ParseFloat(record[13], 64)

		log.Println("origin_latitude " + record[14])
		fs["origin_latitude"], _ = strconv.ParseFloat(record[14], 64)

		log.Println("origin_longitude " + record[15])
		fs["origin_longitude"], _ = strconv.ParseFloat(record[15], 64)

		for k, v := range fs {
			f := core.Feature{
				Id:    util.Hash(k),
				Value: v,
			}
			sample.AddFeature(f)
		}
		samples = append(samples, sample)
	}
	d := &core.DataSet{
		Samples: samples,
	}
	return d, statusID
}