//Start the selection attributes process func (as *AttributeSelection) StartSelection(instances data.Instances) { as.input = data.NewInstancesWithInst(instances, len(instances.Attributes())) as.input = instances as.hasClass = as.input.ClassIndex() >= 0 as.selectedAttributes = as.SelectAttributes(as.input) if len(as.selectedAttributes) == 0 { panic("No selected attributes") } //Set output fmt.Println(as.selectedAttributes, "as.selectedAttributes") as.output = data.NewInstances() attributes := make([]data.Attribute, 0) for i := range as.selectedAttributes { attributes = append(attributes, *as.input.Attribute(as.selectedAttributes[i])) } fmt.Println(attributes, "attributes") as.output.SetDatasetName(as.input.DatasetName()) as.output.SetAttributes(attributes) if as.hasClass { as.output.SetClassIndex(len(as.selectedAttributes) - 1) } // Convert pending input instances tmpInst := make([]data.Instance, 0) for _, in := range as.input.Instances() { tmpInst = append(tmpInst, as.convertInstance(in)) } as.output.SetInstances(tmpInst) }
func (as *AttributeSelection) SelectAttributes(data_ data.Instances) []int { //***********attributeSet := make([]int, 0) as.trainInstances = data_ as.doRank = as.search.GenerateRanking() // check that a class index has been set if as.trainInstances.ClassIndex() < 0 { as.trainInstances.SetClassIndex(len(as.trainInstances.Attributes()) - 1) } // Initialize the attribute evaluator as.evaluator.BuildEvaluator(as.trainInstances) //fieldWith := int(math.Log(float64(len(as.trainInstances.Attributes()) + 1))) // Do the search //***********attributeSet = as.search.Search(as.evaluator, as.trainInstances) // InfoGain do not implements postprocessing in weka //I won't use this check because in this implementation it will always be true //due that search method always is going to be Ranker if as.doRank { } as.attributeRanking = as.search.rankedAttributes() // retrieve the number of attributes to retain as.numToSelect = as.search.GetCalculatedNumToSelect() fmt.Println(as.numToSelect, "as.numToSelect") // determine fieldwidth for merit f_p, w_p := 0, 0 for i := 0; i < as.numToSelect; i++ { precision := math.Abs(as.attributeRanking[i][1]) - math.Abs(as.attributeRanking[i][1]) intPart := int(math.Abs(as.attributeRanking[i][1])) if precision > 0 { precision = math.Abs((math.Log(math.Abs(precision)) / math.Log(10))) + 3 } if precision > float64(f_p) { f_p = int(precision) } if intPart == 0 { if w_p < 2 { w_p = 2 } } else if (math.Abs((math.Log(math.Abs(as.attributeRanking[i][1])) / math.Log(10))) + 1) > float64(w_p) { if as.attributeRanking[i][1] > 0 { w_p = int(math.Abs((math.Log(math.Abs(as.attributeRanking[i][1])) / math.Log(10))) + 1) } } } // set up the selected attributes array - usable by a filter or // whatever if as.trainInstances.ClassIndex() >= 0 { as.selectedAttributeSet = make([]int, as.numToSelect+1) as.selectedAttributeSet[as.numToSelect] = as.trainInstances.ClassIndex() } else { as.selectedAttributeSet = make([]int, as.numToSelect) } for i := 0; i < as.numToSelect; i++ { as.selectedAttributeSet[i] = int(as.attributeRanking[i][0]) } fmt.Println(as.selectedAttributeSet, "as.selectedAttributeSet") if as.doXval { as.CrossValidateAttribute() } if as.selectedAttributeSet != nil && !as.doXval { as.attributeFilter = NewRemove() as.attributeFilter.SetSelectedColumns(as.selectedAttributeSet) as.attributeFilter.SetInvertSelection(true) as.attributeFilter.SetInputFormat(as.trainInstances) } as.trainInstances = data.NewInstancesWithInst(as.trainInstances, 0) return as.selectedAttributeSet }