예제 #1
0
파일: gini.go 프로젝트: CTLife/golearn
// GetSplitRuleFromSelection returns the DecisionTreeRule which maximises
// the information gain amongst consideredAttributes
//
// IMPORTANT: passing a zero-length consideredAttributes parameter will panic()
func (g *GiniCoefficientRuleGenerator) GetSplitRuleFromSelection(consideredAttributes []base.Attribute, f base.FixedDataGrid) *DecisionTreeRule {

	var selectedAttribute base.Attribute
	var selectedVal float64

	// Parameter check
	if len(consideredAttributes) == 0 {
		panic("More Attributes should be considered")
	}

	// Minimize the averagge Gini index
	minGini := math.Inf(1)
	for _, s := range consideredAttributes {
		var proposedDist map[string]map[string]int
		var splitVal float64
		if fAttr, ok := s.(*base.FloatAttribute); ok {
			_, splitVal = getNumericAttributeEntropy(f, fAttr)
			proposedDist = base.GetClassDistributionAfterThreshold(f, fAttr, splitVal)
		} else {
			proposedDist = base.GetClassDistributionAfterSplit(f, s)
		}
		avgGini := computeAverageGiniIndex(proposedDist)
		if avgGini < minGini {
			minGini = avgGini
			selectedAttribute = s
			selectedVal = splitVal
		}
	}

	return &DecisionTreeRule{selectedAttribute, selectedVal}
}
예제 #2
0
// GetSplitAttributeFromSelection returns the class Attribute which maximises
// the information gain amongst consideredAttributes
//
// IMPORTANT: passing a zero-length consideredAttributes parameter will panic()
func (r *InformationGainRuleGenerator) GetSplitAttributeFromSelection(consideredAttributes []base.Attribute, f base.FixedDataGrid) base.Attribute {

	var selectedAttribute base.Attribute

	// Parameter check
	if len(consideredAttributes) == 0 {
		panic("More Attributes should be considered")
	}

	// Next step is to compute the information gain at this node
	// for each randomly chosen attribute, and pick the one
	// which maximises it
	maxGain := math.Inf(-1)

	// Compute the base entropy
	classDist := base.GetClassDistribution(f)
	baseEntropy := getBaseEntropy(classDist)

	// Compute the information gain for each attribute
	for _, s := range consideredAttributes {
		proposedClassDist := base.GetClassDistributionAfterSplit(f, s)
		localEntropy := getSplitEntropy(proposedClassDist)
		informationGain := baseEntropy - localEntropy
		if informationGain > maxGain {
			maxGain = informationGain
			selectedAttribute = s
		}
	}

	// Pick the one which maximises IG
	return selectedAttribute
}
예제 #3
0
파일: entropy.go 프로젝트: CTLife/golearn
// GetSplitRuleFromSelection returns a DecisionTreeRule which maximises
// the information gain amongst the considered Attributes.
//
// IMPORTANT: passing a zero-length consideredAttributes parameter will panic()
func (r *InformationGainRuleGenerator) GetSplitRuleFromSelection(consideredAttributes []base.Attribute, f base.FixedDataGrid) *DecisionTreeRule {

	var selectedAttribute base.Attribute

	// Parameter check
	if len(consideredAttributes) == 0 {
		panic("More Attributes should be considered")
	}

	// Next step is to compute the information gain at this node
	// for each randomly chosen attribute, and pick the one
	// which maximises it
	maxGain := math.Inf(-1)
	selectedVal := math.Inf(1)

	// Compute the base entropy
	classDist := base.GetClassDistribution(f)
	baseEntropy := getBaseEntropy(classDist)

	// Compute the information gain for each attribute
	for _, s := range consideredAttributes {
		var informationGain float64
		var splitVal float64
		if fAttr, ok := s.(*base.FloatAttribute); ok {
			var attributeEntropy float64
			attributeEntropy, splitVal = getNumericAttributeEntropy(f, fAttr)
			informationGain = baseEntropy - attributeEntropy
		} else {
			proposedClassDist := base.GetClassDistributionAfterSplit(f, s)
			localEntropy := getSplitEntropy(proposedClassDist)
			informationGain = baseEntropy - localEntropy
		}

		if informationGain > maxGain {
			maxGain = informationGain
			selectedAttribute = s
			selectedVal = splitVal
		}
	}

	// Pick the one which maximises IG
	return &DecisionTreeRule{selectedAttribute, selectedVal}
}