Ejemplo n.º 1
0
func main() {
	fm := flag.String("data",
		"", "Data file to read.")

	outfn := flag.String("out",
		"", "The name of a file to write feature matrix too.")

	libsvmtarget := flag.String("libsvmtarget",
		"", "Output lib svm with the named feature in the first position.")

	flag.Parse()

	//Parse Data
	data, err := CloudForest.LoadAFM(*fm)
	if err != nil {
		log.Fatal(err)
	}

	//anotate with type information
	for _, f := range data.Data {
		switch f.(type) {
		case *CloudForest.DenseNumFeature:
			nf := f.(*CloudForest.DenseNumFeature)
			if !strings.HasPrefix(nf.Name, "N:") {
				nf.Name = "N:" + nf.Name
			}
		case *CloudForest.DenseCatFeature:
			nf := f.(*CloudForest.DenseCatFeature)
			if !(strings.HasPrefix(nf.Name, "C:") || strings.HasPrefix(nf.Name, "B:")) {
				nf.Name = "C:" + nf.Name
			}

		}
	}

	ncases := data.Data[0].Length()
	cases := make([]int, ncases, ncases)

	for i := 0; i < ncases; i++ {
		cases[i] = i
	}

	outfile, err := os.Create(*outfn)
	if err != nil {
		log.Fatal(err)
	}
	defer outfile.Close()

	if *libsvmtarget == "" {

		err = data.WriteCases(outfile, cases)
		if err != nil {
			log.Fatal(err)
		}
	} else {
		// targeti, ok := data.Map[*libsvmtarget]
		// if !ok {
		// 	log.Fatalf("Target '%v' not found in data.", *libsvmtarget)
		// }
		// target := data.Data[targeti]

		// data.Data = append(data.Data[:targeti], data.Data[targeti+1:]...)

		// encodedfm := data.EncodeToNum()

		// oucsv := csv.NewWriter(outfile)
		// oucsv.Comma = ' '

		// for i := 0; i < target.Length(); i++ {
		// 	entries := make([]string, 0, 10)
		// 	switch target.(type) {
		// 	case CloudForest.NumFeature:
		// 		entries = append(entries, target.GetStr(i))
		// 	case CloudForest.CatFeature:
		// 		entries = append(entries, fmt.Sprintf("%v", target.(CloudForest.CatFeature).Geti(i)))
		// 	}

		// 	for j, f := range encodedfm.Data {
		// 		v := f.(CloudForest.NumFeature).Get(i)
		// 		if v != 0.0 {
		// 			entries = append(entries, fmt.Sprintf("%v:%v", j+1, v))
		// 		}
		// 	}
		// 	//fmt.Println(entries)
		// 	err := oucsv.Write(entries)
		// 	if err != nil {
		// 		log.Fatalf("Error writing libsvm:\n%v", err)
		// 	}

		// }
		// oucsv.Flush()
		err = CloudForest.WriteLibSvm(data, *libsvmtarget, outfile)
		if err != nil {
			log.Fatalf("Error writing libsvm:\n%v", err)
		}

	}

}
Ejemplo n.º 2
0
func main() {
	fm := flag.String("data",
		"", "Data file to read.")

	outfn := flag.String("out",
		"", "The name of a file to write feature matrix too.")

	libsvmtarget := flag.String("libsvmtarget",
		"", "Output lib svm with the named feature in the first position.")

	anontarget := flag.String("anontarget",
		"", "Strip strings with named feature in the first position.")

	blacklist := flag.String("blacklist",
		"", "A list of feature id's to exclude from the set of predictors.")

	flag.Parse()

	//Parse Data
	data, err := CloudForest.LoadAFM(*fm)
	if err != nil {
		log.Fatal(err)
	}

	blacklisted := 0
	blacklistis := make([]bool, len(data.Data))
	if *blacklist != "" {
		fmt.Printf("Loading blacklist from: %v\n", *blacklist)
		blackfile, err := os.Open(*blacklist)
		if err != nil {
			log.Fatal(err)
		}
		tsv := csv.NewReader(blackfile)
		tsv.Comma = '\t'
		for {
			id, err := tsv.Read()
			if err == io.EOF {
				break
			} else if err != nil {
				log.Fatal(err)
			}
			if id[0] == *anontarget || id[0] == *libsvmtarget {
				continue
			}
			i, ok := data.Map[id[0]]
			if !ok {
				fmt.Printf("Ignoring blacklist feature not found in data: %v\n", id[0])
				continue
			}
			if !blacklistis[i] {
				blacklisted += 1
				blacklistis[i] = true
			}

		}
		blackfile.Close()

	}

	newdata := make([]CloudForest.Feature, 0, len(data.Data)-blacklisted)
	newmap := make(map[string]int, len(data.Data)-blacklisted)

	for i, f := range data.Data {
		if !blacklistis[i] {
			newmap[f.GetName()] = len(newdata)
			newdata = append(newdata, f)
		}
	}

	data.Data = newdata
	data.Map = newmap

	if *anontarget != "" {
		data.StripStrings(*anontarget)

	}

	//anotate with type information
	for _, f := range data.Data {
		switch f.(type) {
		case *CloudForest.DenseNumFeature:
			nf := f.(*CloudForest.DenseNumFeature)
			if !strings.HasPrefix(nf.Name, "N:") {
				nf.Name = "N:" + nf.Name
			}
		case *CloudForest.DenseCatFeature:
			nf := f.(*CloudForest.DenseCatFeature)
			if !(strings.HasPrefix(nf.Name, "C:") || strings.HasPrefix(nf.Name, "B:")) {
				nf.Name = "C:" + nf.Name
			}

		}
	}

	ncases := data.Data[0].Length()
	cases := make([]int, ncases, ncases)

	for i := 0; i < ncases; i++ {
		cases[i] = i
	}

	outfile, err := os.Create(*outfn)
	if err != nil {
		log.Fatal(err)
	}
	defer outfile.Close()

	if *libsvmtarget == "" {

		err = data.WriteCases(outfile, cases)
		if err != nil {
			log.Fatal(err)
		}
	} else {
		// targeti, ok := data.Map[*libsvmtarget]
		// if !ok {
		// 	log.Fatalf("Target '%v' not found in data.", *libsvmtarget)
		// }
		// target := data.Data[targeti]

		// data.Data = append(data.Data[:targeti], data.Data[targeti+1:]...)

		// encodedfm := data.EncodeToNum()

		// oucsv := csv.NewWriter(outfile)
		// oucsv.Comma = ' '

		// for i := 0; i < target.Length(); i++ {
		// 	entries := make([]string, 0, 10)
		// 	switch target.(type) {
		// 	case CloudForest.NumFeature:
		// 		entries = append(entries, target.GetStr(i))
		// 	case CloudForest.CatFeature:
		// 		entries = append(entries, fmt.Sprintf("%v", target.(CloudForest.CatFeature).Geti(i)))
		// 	}

		// 	for j, f := range encodedfm.Data {
		// 		v := f.(CloudForest.NumFeature).Get(i)
		// 		if v != 0.0 {
		// 			entries = append(entries, fmt.Sprintf("%v:%v", j+1, v))
		// 		}
		// 	}
		// 	//fmt.Println(entries)
		// 	err := oucsv.Write(entries)
		// 	if err != nil {
		// 		log.Fatalf("Error writing libsvm:\n%v", err)
		// 	}

		// }
		// oucsv.Flush()
		err = CloudForest.WriteLibSvm(data, *libsvmtarget, outfile)
		if err != nil {
			log.Fatalf("Error writing libsvm:\n%v", err)
		}

	}

}