func main() { instances := data.NewInstancesWithClassIndex(0) instances.ParseFile("C:\\Users\\Yuri\\workspace\\SMO\\src\\main\\_AppsLemmas.arff") // for _, attr := range instances.Attributes() { // fmt.Println(attr.Name(), attr.Type()) // for idx, val := range attr.Values() { // fmt.Printf("idx: %d val: %s", idx, val) // fmt.Println() // } // } // // for _, inst := range instances.Instances() { // fmt.Println(inst.Values()) // fmt.Println(inst.RealValues()) // } stwv := functions.NewStringToWordVectorInst(instances) stwv.SetIDF_Transformation(true) stwv.SetTF_Transformation(true) stwv.SetWordsToKeep(15) stwv.SetPerClass(true) stwv.SetNormalize(false) processed := stwv.Exec() processed.ClassIndex() // for _, attr := range processed.Attributes() { // fmt.Println(attr.Name(), attr.Type()) // for idx, val := range attr.Values() { // fmt.Printf("idx: %d val: %s", idx, val) // fmt.Println() // } // } //fmt.Println(processed.Attributes()[0].Values()) for _, inst := range processed.Instances() { fmt.Println(inst.Values()) fmt.Println(inst.RealValues()) fmt.Println(inst.Indices()) } ig := functions.NewInfoGain() ranker := functions.NewRanker() ranker.SetThreshold(0.0) ranker.SetNumToSelect(-1) //ranker.SetRange("1,3") ig.SetBinarize(true) as := functions.NewAttributeSelection() as.SetEvaluator(ig) as.SetSearchMethod(ranker) as.StartSelection(processed) processed = as.Output() for _, inst := range processed.Instances() { fmt.Println(inst.Values()) fmt.Println(inst.RealValues()) fmt.Println(inst.Indices()) } //ig.BuildEvaluator(processed) i := utils.SortFloat([]float64{0, 0, 0, 0, 0, 0, 0.8904916402194916, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.7793498372920848, 0, 0, 0, 0, 0.7793498372920848}) fmt.Println(i) data.ExportToArffFileSparse(processed, "", "") }
func (r *Remove) SetInputFormat(instInfo data.Instances) { r.getSelectedAttributes(len(instInfo.Attributes())) attributes := make([]data.Attribute, 0) outputClass := -1 for _, current := range r.selectedAttributes { if instInfo.ClassIndex() == current { outputClass = len(attributes) } keep := *instInfo.Attribute(current) fmt.Println(keep.Name()) attributes = append(attributes, keep) } fmt.Println(len(attributes), "attributes", "\n", outputClass, "outputClass") r.outputFormat = data.NewInstancesWithClassIndex(outputClass) r.outputFormat.SetAttributes(attributes) }
//New StringToWordVector function with default values func NewStringToWordVectorInst(inputData data.Instances) StringToWordVector { var stwv StringToWordVector stwv.dictionary = omap.NewStringKeyed() stwv.outputsCounts = false stwv.docsCounts = make([]int, 0) stwv.avgDocLength = -1 stwv.wordsToKeep = 1000 stwv.numInstances = -1 stwv.perdiodicPruningRate = -1 stwv.minTermFreq = 1 stwv.perClass = true stwv.normalize = true stwv.inputFormat = inputData stwv.outputFormat = data.NewInstancesWithClassIndex(inputData.ClassIndex()) stwv.firstTime = true stwv.tf_transformation, stwv.idf_transformation = true, true return stwv }