// GetSplitRuleFromSelection returns the DecisionTreeRule which maximises // the information gain amongst consideredAttributes // // IMPORTANT: passing a zero-length consideredAttributes parameter will panic() func (g *GiniCoefficientRuleGenerator) GetSplitRuleFromSelection(consideredAttributes []base.Attribute, f base.FixedDataGrid) *DecisionTreeRule { var selectedAttribute base.Attribute var selectedVal float64 // Parameter check if len(consideredAttributes) == 0 { panic("More Attributes should be considered") } // Minimize the averagge Gini index minGini := math.Inf(1) for _, s := range consideredAttributes { var proposedDist map[string]map[string]int var splitVal float64 if fAttr, ok := s.(*base.FloatAttribute); ok { _, splitVal = getNumericAttributeEntropy(f, fAttr) proposedDist = base.GetClassDistributionAfterThreshold(f, fAttr, splitVal) } else { proposedDist = base.GetClassDistributionAfterSplit(f, s) } avgGini := computeAverageGiniIndex(proposedDist) if avgGini < minGini { minGini = avgGini selectedAttribute = s selectedVal = splitVal } } return &DecisionTreeRule{selectedAttribute, selectedVal} }
// GetSplitAttributeFromSelection returns the class Attribute which maximises // the information gain amongst consideredAttributes // // IMPORTANT: passing a zero-length consideredAttributes parameter will panic() func (r *InformationGainRuleGenerator) GetSplitAttributeFromSelection(consideredAttributes []base.Attribute, f base.FixedDataGrid) base.Attribute { var selectedAttribute base.Attribute // Parameter check if len(consideredAttributes) == 0 { panic("More Attributes should be considered") } // Next step is to compute the information gain at this node // for each randomly chosen attribute, and pick the one // which maximises it maxGain := math.Inf(-1) // Compute the base entropy classDist := base.GetClassDistribution(f) baseEntropy := getBaseEntropy(classDist) // Compute the information gain for each attribute for _, s := range consideredAttributes { proposedClassDist := base.GetClassDistributionAfterSplit(f, s) localEntropy := getSplitEntropy(proposedClassDist) informationGain := baseEntropy - localEntropy if informationGain > maxGain { maxGain = informationGain selectedAttribute = s } } // Pick the one which maximises IG return selectedAttribute }
// GetSplitRuleFromSelection returns a DecisionTreeRule which maximises // the information gain amongst the considered Attributes. // // IMPORTANT: passing a zero-length consideredAttributes parameter will panic() func (r *InformationGainRuleGenerator) GetSplitRuleFromSelection(consideredAttributes []base.Attribute, f base.FixedDataGrid) *DecisionTreeRule { var selectedAttribute base.Attribute // Parameter check if len(consideredAttributes) == 0 { panic("More Attributes should be considered") } // Next step is to compute the information gain at this node // for each randomly chosen attribute, and pick the one // which maximises it maxGain := math.Inf(-1) selectedVal := math.Inf(1) // Compute the base entropy classDist := base.GetClassDistribution(f) baseEntropy := getBaseEntropy(classDist) // Compute the information gain for each attribute for _, s := range consideredAttributes { var informationGain float64 var splitVal float64 if fAttr, ok := s.(*base.FloatAttribute); ok { var attributeEntropy float64 attributeEntropy, splitVal = getNumericAttributeEntropy(f, fAttr) informationGain = baseEntropy - attributeEntropy } else { proposedClassDist := base.GetClassDistributionAfterSplit(f, s) localEntropy := getSplitEntropy(proposedClassDist) informationGain = baseEntropy - localEntropy } if informationGain > maxGain { maxGain = informationGain selectedAttribute = s selectedVal = splitVal } } // Pick the one which maximises IG return &DecisionTreeRule{selectedAttribute, selectedVal} }