func (c *ClassifierHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { sample := core.NewSample() if req.Method != "POST" { http.Error(w, "method not allowed", http.StatusMethodNotAllowed) return } features := req.FormValue("features") if len(features) == 0 { http.Error(w, "need input features", http.StatusInternalServerError) return } fs := make(map[string]float64) err := json.Unmarshal([]byte(features), &fs) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } for k, v := range fs { f := core.Feature{ Id: util.Hash(k), Value: v, } sample.AddFeature(f) } p := c.classifier.Predict(sample) output, err := json.Marshal(map[string]interface{}{ "prediction": p, }) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } fmt.Fprint(w, output) }
func (d *RawDataSet) ToDataSet(splits map[string][]float64, combinations []CombinedFeature) *DataSet { out_data := NewDataSet() fm := make(map[string]int64) for _, sample := range d.Samples { out_sample := NewSample() out_sample.Label = sample.Label if splits != nil { for fkey_str, fvalue_str := range sample.Features { fkey := "" fvalue := 0.0 if GetFeatureType(fkey_str) == FeatureTypeEnum.CONTINUOUS_FEATURE { split, ok := splits[fkey_str] if ok { cat := FindCategory(split, util.ParseFloat64(fvalue_str)) fkey = fkey_str + "_" + strconv.FormatInt(int64(cat), 10) fvalue = 1.0 } else { fvalue = util.ParseFloat64(fvalue_str) } fm[fkey] = util.Hash(fkey) out_sample.AddFeature(Feature{Id: util.Hash(fkey), Value: fvalue}) } } } for _, combination := range combinations { fkey := "" for _, ckey := range combination { fkey += ckey fkey += ":" fkey += sample.GetFeatureValue(ckey) fkey += "_" } fm[fkey] = util.Hash(fkey) out_sample.AddFeature(Feature{Id: util.Hash(fkey), Value: 1.0}) } out_data.AddSample(out_sample) } f, _ := os.Create("features.tsv") defer f.Close() w := bufio.NewWriter(f) for k, v := range fm { w.WriteString(k + "\t" + strconv.FormatInt(v, 10) + "\n") } return out_data }
func (d *DataSet) Load(path string, global_bias_feature_id int64) error { fm := make(map[string]int64) file, err := os.Open(path) if err != nil { return err } defer file.Close() scanner := bufio.NewScanner(file) for scanner.Scan() { line := strings.Replace(scanner.Text(), " ", "\t", -1) tks := strings.Split(line, "\t") sample := Sample{Features: []Feature{}, Label: 0} for i, tk := range tks { if i == 0 { label, _ := strconv.Atoi(tk) sample.Label = label if d.max_label < label { d.max_label = label } } else { kv := strings.Split(tk, ":") feature_id, err := strconv.ParseInt(kv[0], 10, 64) if err != nil { feature_id = util.Hash(kv[0]) fm[kv[0]] = feature_id } d.FeatureNameIdMap[feature_id] = kv[0] feature_value := 1.0 if len(kv) > 1 { feature_value, err = strconv.ParseFloat(kv[1], 64) if err != nil { break } } feature := Feature{feature_id, feature_value} sample.Features = append(sample.Features, feature) } } if global_bias_feature_id >= 0 { sample.Features = append(sample.Features, Feature{global_bias_feature_id, 1.0}) } d.AddSample(&sample) } if scanner.Err() != nil { return scanner.Err() } f, _ := os.Create("features.tsv") defer f.Close() w := bufio.NewWriter(f) for k, v := range fm { w.WriteString(k + "\t" + strconv.FormatInt(v, 10) + "\n") } log.Println("dataset size : ", len(d.Samples)) return nil }
func NewSample(fs map[string]float64) *core.Sample { sample := core.NewSample() for k, v := range fs { f := core.Feature{ Id: util.Hash(k), Value: v, } sample.AddFeature(f) } return sample }
func (d *RawDataSet) ToDataSet(splits map[string][]float64, combinations []CombinedFeature) *DataSet { out_data := NewDataSet() for _, sample := range d.Samples { out_sample := NewSample() out_sample.Label = sample.Label if splits != nil { for fkey_str, fvalue_str := range sample.Features { fkey := "" fvalue := 0.0 if GetFeatureType(fkey_str) == FeatureTypeEnum.CONTINUOUS_FEATURE { split, ok := splits[fkey_str] if ok { cat := FindCategory(split, util.ParseFloat64(fvalue_str)) fkey = fkey_str + "_" + strconv.FormatInt(int64(cat), 10) fvalue = 1.0 } else { fvalue = util.ParseFloat64(fvalue_str) } out_sample.AddFeature(Feature{Id: util.Hash(fkey), Value: fvalue}) } } } for _, combination := range combinations { fkey := "" for _, ckey := range combination { fkey += ckey fkey += ":" fkey += sample.GetFeatureValue(ckey) fkey += "_" } out_sample.AddFeature(Feature{Id: util.Hash(fkey), Value: 1.0}) } out_data.AddSample(out_sample) } return out_data }
func NewDataSetSample(path string) (*core.DataSet, []string) { samples := []*core.Sample{} statusID := []string{} f, _ := os.Open(path) r := csv.NewReader(bufio.NewReader(f)) r.Read() for { record, err := r.Read() // Stop at EOF. if err == io.EOF { break } sample := core.NewSample() fs := make(map[string]float64) log.Println(record[2]) statusID = append(statusID, record[2]) switch record[2] { case "4", "2": sample.Label = 1 default: sample.Label = 0 } log.Println(record[10]) if sample.Label == 0 && record[10] != "" && record[2] != "3" { sample.Label = 1 } log.Println("hour " + record[4]) fs["hour"], _ = strconv.ParseFloat(record[4], 64) log.Println("day_of_week " + record[5]) fs["day_of_week"], _ = strconv.ParseFloat(record[5], 64) log.Println("distance_from_order_on_creation " + record[6]) fs["distance_from_order_on_creation"], _ = strconv.ParseFloat(record[6], 64) log.Println("driver_location_key " + record[7]) fs["driver_location_key"], _ = strconv.ParseFloat(record[7], 64) log.Println("driver_latitude " + record[8]) fs["driver_latitude"], _ = strconv.ParseFloat(record[8], 64) log.Println("driver_longitude " + record[9]) fs["driver_longitude"], _ = strconv.ParseFloat(record[9], 64) log.Println("origin_location_key " + record[13]) fs["origin_location_key"], _ = strconv.ParseFloat(record[13], 64) log.Println("origin_latitude " + record[14]) fs["origin_latitude"], _ = strconv.ParseFloat(record[14], 64) log.Println("origin_longitude " + record[15]) fs["origin_longitude"], _ = strconv.ParseFloat(record[15], 64) for k, v := range fs { f := core.Feature{ Id: util.Hash(k), Value: v, } sample.AddFeature(f) } samples = append(samples, sample) } d := &core.DataSet{ Samples: samples, } return d, statusID }