Esempio n. 1
0
func main() {
	instances := data.NewInstancesWithClassIndex(0)
	instances.ParseFile("C:\\Users\\Yuri\\workspace\\SMO\\src\\main\\_AppsLemmas.arff")
	//	for _, attr := range instances.Attributes() {
	//		fmt.Println(attr.Name(), attr.Type())
	//		for idx, val := range attr.Values() {
	//			fmt.Printf("idx: %d val: %s", idx, val)
	//			fmt.Println()
	//		}
	//	}
	//
	//	for _, inst := range instances.Instances() {
	//		fmt.Println(inst.Values())
	//		fmt.Println(inst.RealValues())
	//	}
	stwv := functions.NewStringToWordVectorInst(instances)
	stwv.SetIDF_Transformation(true)
	stwv.SetTF_Transformation(true)
	stwv.SetWordsToKeep(15)
	stwv.SetPerClass(true)
	stwv.SetNormalize(false)
	processed := stwv.Exec()
	processed.ClassIndex()
	//	for _, attr := range processed.Attributes() {
	//		fmt.Println(attr.Name(), attr.Type())
	//		for idx, val := range attr.Values() {
	//			fmt.Printf("idx: %d val: %s", idx, val)
	//			fmt.Println()
	//		}
	//	}
	//fmt.Println(processed.Attributes()[0].Values())
	for _, inst := range processed.Instances() {
		fmt.Println(inst.Values())
		fmt.Println(inst.RealValues())
		fmt.Println(inst.Indices())
	}
	ig := functions.NewInfoGain()
	ranker := functions.NewRanker()
	ranker.SetThreshold(0.0)
	ranker.SetNumToSelect(-1)
	//ranker.SetRange("1,3")
	ig.SetBinarize(true)
	as := functions.NewAttributeSelection()
	as.SetEvaluator(ig)
	as.SetSearchMethod(ranker)
	as.StartSelection(processed)
	processed = as.Output()
	for _, inst := range processed.Instances() {
		fmt.Println(inst.Values())
		fmt.Println(inst.RealValues())
		fmt.Println(inst.Indices())
	}
	//ig.BuildEvaluator(processed)
	i := utils.SortFloat([]float64{0, 0, 0, 0, 0, 0, 0.8904916402194916, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.7793498372920848, 0, 0, 0, 0, 0.7793498372920848})
	fmt.Println(i)
	data.ExportToArffFileSparse(processed, "", "")
}
func (r *Remove) SetInputFormat(instInfo data.Instances) {
	r.getSelectedAttributes(len(instInfo.Attributes()))
	attributes := make([]data.Attribute, 0)
	outputClass := -1
	for _, current := range r.selectedAttributes {
		if instInfo.ClassIndex() == current {
			outputClass = len(attributes)
		}
		keep := *instInfo.Attribute(current)
		fmt.Println(keep.Name())
		attributes = append(attributes, keep)
	}
	fmt.Println(len(attributes), "attributes", "\n", outputClass, "outputClass")
	r.outputFormat = data.NewInstancesWithClassIndex(outputClass)
	r.outputFormat.SetAttributes(attributes)
}
//New StringToWordVector function with default values
func NewStringToWordVectorInst(inputData data.Instances) StringToWordVector {
	var stwv StringToWordVector
	stwv.dictionary = omap.NewStringKeyed()
	stwv.outputsCounts = false
	stwv.docsCounts = make([]int, 0)
	stwv.avgDocLength = -1
	stwv.wordsToKeep = 1000
	stwv.numInstances = -1
	stwv.perdiodicPruningRate = -1
	stwv.minTermFreq = 1
	stwv.perClass = true
	stwv.normalize = true
	stwv.inputFormat = inputData
	stwv.outputFormat = data.NewInstancesWithClassIndex(inputData.ClassIndex())
	stwv.firstTime = true
	stwv.tf_transformation, stwv.idf_transformation = true, true
	return stwv
}