示例#1
0
func (d *RawDataSet) ToDataSet(splits map[string][]float64, combinations []CombinedFeature) *DataSet {
	out_data := NewDataSet()
	fm := make(map[string]int64)
	for _, sample := range d.Samples {
		out_sample := NewSample()
		out_sample.Label = sample.Label
		if splits != nil {
			for fkey_str, fvalue_str := range sample.Features {
				fkey := ""
				fvalue := 0.0
				if GetFeatureType(fkey_str) == FeatureTypeEnum.CONTINUOUS_FEATURE {
					split, ok := splits[fkey_str]
					if ok {
						cat := FindCategory(split, util.ParseFloat64(fvalue_str))
						fkey = fkey_str + "_" + strconv.FormatInt(int64(cat), 10)
						fvalue = 1.0
					} else {
						fvalue = util.ParseFloat64(fvalue_str)
					}
					fm[fkey] = util.Hash(fkey)
					out_sample.AddFeature(Feature{Id: util.Hash(fkey), Value: fvalue})
				}
			}
		}
		for _, combination := range combinations {
			fkey := ""
			for _, ckey := range combination {
				fkey += ckey
				fkey += ":"
				fkey += sample.GetFeatureValue(ckey)
				fkey += "_"
			}
			fm[fkey] = util.Hash(fkey)
			out_sample.AddFeature(Feature{Id: util.Hash(fkey), Value: 1.0})
		}
		out_data.AddSample(out_sample)
	}
	f, _ := os.Create("features.tsv")
	defer f.Close()
	w := bufio.NewWriter(f)
	for k, v := range fm {
		w.WriteString(k + "\t" + strconv.FormatInt(v, 10) + "\n")
	}

	return out_data
}
示例#2
0
func (d *RawDataSet) ToDataSet(splits map[string][]float64, combinations []CombinedFeature) *DataSet {
	out_data := NewDataSet()
	for _, sample := range d.Samples {
		out_sample := NewSample()
		out_sample.Label = sample.Label
		if splits != nil {
			for fkey_str, fvalue_str := range sample.Features {
				fkey := ""
				fvalue := 0.0
				if GetFeatureType(fkey_str) == FeatureTypeEnum.CONTINUOUS_FEATURE {
					split, ok := splits[fkey_str]
					if ok {
						cat := FindCategory(split, util.ParseFloat64(fvalue_str))
						fkey = fkey_str + "_" + strconv.FormatInt(int64(cat), 10)
						fvalue = 1.0
					} else {
						fvalue = util.ParseFloat64(fvalue_str)
					}
					out_sample.AddFeature(Feature{Id: util.Hash(fkey), Value: fvalue})
				}
			}
		}
		for _, combination := range combinations {
			fkey := ""
			for _, ckey := range combination {
				fkey += ckey
				fkey += ":"
				fkey += sample.GetFeatureValue(ckey)
				fkey += "_"
			}
			out_sample.AddFeature(Feature{Id: util.Hash(fkey), Value: 1.0})
		}
		out_data.AddSample(out_sample)
	}
	return out_data
}
示例#3
0
func (d *RealDataSet) Load(path string, global_bias_feature_id int64) error {
	file, err := os.Open(path)
	if err != nil {
		return err
	}
	defer file.Close()

	scanner := bufio.NewScanner(file)

	for scanner.Scan() {
		line := strings.Replace(scanner.Text(), " ", "\t", -1)
		tks := strings.Split(line, "\t")
		sample := RealSample{Features: []Feature{}, Value: 0.0}
		for i, tk := range tks {
			if i == 0 {
				value := util.ParseFloat64(tk)
				sample.Value = value
			} else {
				kv := strings.Split(tk, ":")
				feature_id, err := strconv.ParseInt(kv[0], 10, 64)
				if err != nil {
					break
				}
				feature_value := 1.0
				if len(kv) > 1 {
					feature_value, err = strconv.ParseFloat(kv[1], 64)
					if err != nil {
						break
					}
				}
				feature := Feature{feature_id, feature_value}
				sample.Features = append(sample.Features, feature)
			}
		}
		if global_bias_feature_id >= 0 {
			sample.Features = append(sample.Features, Feature{global_bias_feature_id, 1.0})
		}
		d.AddSample(&sample)
	}
	if scanner.Err() != nil {
		return scanner.Err()
	}
	return nil
}