Ejemplo n.º 1
0
//Start the selection attributes process
func (as *AttributeSelection) StartSelection(instances data.Instances) {
	as.input = data.NewInstancesWithInst(instances, len(instances.Attributes()))
	as.input = instances
	as.hasClass = as.input.ClassIndex() >= 0
	as.selectedAttributes = as.SelectAttributes(as.input)
	if len(as.selectedAttributes) == 0 {
		panic("No selected attributes")
	}
	//Set output
	fmt.Println(as.selectedAttributes, "as.selectedAttributes")
	as.output = data.NewInstances()
	attributes := make([]data.Attribute, 0)
	for i := range as.selectedAttributes {
		attributes = append(attributes, *as.input.Attribute(as.selectedAttributes[i]))
	}
	fmt.Println(attributes, "attributes")
	as.output.SetDatasetName(as.input.DatasetName())
	as.output.SetAttributes(attributes)
	if as.hasClass {
		as.output.SetClassIndex(len(as.selectedAttributes) - 1)
	}
	// Convert pending input instances
	tmpInst := make([]data.Instance, 0)
	for _, in := range as.input.Instances() {
		tmpInst = append(tmpInst, as.convertInstance(in))
	}
	as.output.SetInstances(tmpInst)
}
Ejemplo n.º 2
0
func (as *AttributeSelection) SelectAttributes(data_ data.Instances) []int {
	//***********attributeSet := make([]int, 0)
	as.trainInstances = data_
	as.doRank = as.search.GenerateRanking()
	// check that a class index has been set
	if as.trainInstances.ClassIndex() < 0 {
		as.trainInstances.SetClassIndex(len(as.trainInstances.Attributes()) - 1)
	}
	// Initialize the attribute evaluator
	as.evaluator.BuildEvaluator(as.trainInstances)
	//fieldWith := int(math.Log(float64(len(as.trainInstances.Attributes()) + 1)))
	// Do the search
	//***********attributeSet =
	as.search.Search(as.evaluator, as.trainInstances)
	// InfoGain do not implements postprocessing in weka

	//I won't use this check because in this implementation it will always be true
	//due that search method always is going to be Ranker
	if as.doRank {
	}
	as.attributeRanking = as.search.rankedAttributes()
	// retrieve the number of attributes to retain
	as.numToSelect = as.search.GetCalculatedNumToSelect()
	fmt.Println(as.numToSelect, "as.numToSelect")
	// determine fieldwidth for merit
	f_p, w_p := 0, 0
	for i := 0; i < as.numToSelect; i++ {
		precision := math.Abs(as.attributeRanking[i][1]) - math.Abs(as.attributeRanking[i][1])
		intPart := int(math.Abs(as.attributeRanking[i][1]))
		if precision > 0 {
			precision = math.Abs((math.Log(math.Abs(precision)) / math.Log(10))) + 3
		}
		if precision > float64(f_p) {
			f_p = int(precision)
		}
		if intPart == 0 {
			if w_p < 2 {
				w_p = 2
			}
		} else if (math.Abs((math.Log(math.Abs(as.attributeRanking[i][1])) / math.Log(10))) + 1) > float64(w_p) {
			if as.attributeRanking[i][1] > 0 {
				w_p = int(math.Abs((math.Log(math.Abs(as.attributeRanking[i][1])) / math.Log(10))) + 1)
			}
		}
	}
	// set up the selected attributes array - usable by a filter or
	// whatever
	if as.trainInstances.ClassIndex() >= 0 {
		as.selectedAttributeSet = make([]int, as.numToSelect+1)
		as.selectedAttributeSet[as.numToSelect] = as.trainInstances.ClassIndex()
	} else {
		as.selectedAttributeSet = make([]int, as.numToSelect)
	}
	for i := 0; i < as.numToSelect; i++ {
		as.selectedAttributeSet[i] = int(as.attributeRanking[i][0])
	}
	fmt.Println(as.selectedAttributeSet, "as.selectedAttributeSet")
	if as.doXval {
		as.CrossValidateAttribute()
	}
	if as.selectedAttributeSet != nil && !as.doXval {
		as.attributeFilter = NewRemove()
		as.attributeFilter.SetSelectedColumns(as.selectedAttributeSet)
		as.attributeFilter.SetInvertSelection(true)
		as.attributeFilter.SetInputFormat(as.trainInstances)
	}
	as.trainInstances = data.NewInstancesWithInst(as.trainInstances, 0)
	return as.selectedAttributeSet
}