func (d *RawDataSet) ToDataSet(splits map[string][]float64, combinations []CombinedFeature) *DataSet { out_data := NewDataSet() fm := make(map[string]int64) for _, sample := range d.Samples { out_sample := NewSample() out_sample.Label = sample.Label if splits != nil { for fkey_str, fvalue_str := range sample.Features { fkey := "" fvalue := 0.0 if GetFeatureType(fkey_str) == FeatureTypeEnum.CONTINUOUS_FEATURE { split, ok := splits[fkey_str] if ok { cat := FindCategory(split, util.ParseFloat64(fvalue_str)) fkey = fkey_str + "_" + strconv.FormatInt(int64(cat), 10) fvalue = 1.0 } else { fvalue = util.ParseFloat64(fvalue_str) } fm[fkey] = util.Hash(fkey) out_sample.AddFeature(Feature{Id: util.Hash(fkey), Value: fvalue}) } } } for _, combination := range combinations { fkey := "" for _, ckey := range combination { fkey += ckey fkey += ":" fkey += sample.GetFeatureValue(ckey) fkey += "_" } fm[fkey] = util.Hash(fkey) out_sample.AddFeature(Feature{Id: util.Hash(fkey), Value: 1.0}) } out_data.AddSample(out_sample) } f, _ := os.Create("features.tsv") defer f.Close() w := bufio.NewWriter(f) for k, v := range fm { w.WriteString(k + "\t" + strconv.FormatInt(v, 10) + "\n") } return out_data }
func (d *RawDataSet) ToDataSet(splits map[string][]float64, combinations []CombinedFeature) *DataSet { out_data := NewDataSet() for _, sample := range d.Samples { out_sample := NewSample() out_sample.Label = sample.Label if splits != nil { for fkey_str, fvalue_str := range sample.Features { fkey := "" fvalue := 0.0 if GetFeatureType(fkey_str) == FeatureTypeEnum.CONTINUOUS_FEATURE { split, ok := splits[fkey_str] if ok { cat := FindCategory(split, util.ParseFloat64(fvalue_str)) fkey = fkey_str + "_" + strconv.FormatInt(int64(cat), 10) fvalue = 1.0 } else { fvalue = util.ParseFloat64(fvalue_str) } out_sample.AddFeature(Feature{Id: util.Hash(fkey), Value: fvalue}) } } } for _, combination := range combinations { fkey := "" for _, ckey := range combination { fkey += ckey fkey += ":" fkey += sample.GetFeatureValue(ckey) fkey += "_" } out_sample.AddFeature(Feature{Id: util.Hash(fkey), Value: 1.0}) } out_data.AddSample(out_sample) } return out_data }
func (d *RealDataSet) Load(path string, global_bias_feature_id int64) error { file, err := os.Open(path) if err != nil { return err } defer file.Close() scanner := bufio.NewScanner(file) for scanner.Scan() { line := strings.Replace(scanner.Text(), " ", "\t", -1) tks := strings.Split(line, "\t") sample := RealSample{Features: []Feature{}, Value: 0.0} for i, tk := range tks { if i == 0 { value := util.ParseFloat64(tk) sample.Value = value } else { kv := strings.Split(tk, ":") feature_id, err := strconv.ParseInt(kv[0], 10, 64) if err != nil { break } feature_value := 1.0 if len(kv) > 1 { feature_value, err = strconv.ParseFloat(kv[1], 64) if err != nil { break } } feature := Feature{feature_id, feature_value} sample.Features = append(sample.Features, feature) } } if global_bias_feature_id >= 0 { sample.Features = append(sample.Features, Feature{global_bias_feature_id, 1.0}) } d.AddSample(&sample) } if scanner.Err() != nil { return scanner.Err() } return nil }