// GetSplitAttributeFromSelection returns the class Attribute which maximises // the information gain amongst consideredAttributes // // IMPORTANT: passing a zero-length consideredAttributes parameter will panic() func (r *InformationGainRuleGenerator) GetSplitAttributeFromSelection(consideredAttributes []base.Attribute, f base.FixedDataGrid) base.Attribute { var selectedAttribute base.Attribute // Parameter check if len(consideredAttributes) == 0 { panic("More Attributes should be considered") } // Next step is to compute the information gain at this node // for each randomly chosen attribute, and pick the one // which maximises it maxGain := math.Inf(-1) // Compute the base entropy classDist := base.GetClassDistribution(f) baseEntropy := getBaseEntropy(classDist) // Compute the information gain for each attribute for _, s := range consideredAttributes { proposedClassDist := base.GetClassDistributionAfterSplit(f, s) localEntropy := getSplitEntropy(proposedClassDist) informationGain := baseEntropy - localEntropy if informationGain > maxGain { maxGain = informationGain selectedAttribute = s } } // Pick the one which maximises IG return selectedAttribute }
// GetSplitRuleFromSelection returns a DecisionTreeRule which maximises // the information gain amongst the considered Attributes. // // IMPORTANT: passing a zero-length consideredAttributes parameter will panic() func (r *InformationGainRuleGenerator) GetSplitRuleFromSelection(consideredAttributes []base.Attribute, f base.FixedDataGrid) *DecisionTreeRule { var selectedAttribute base.Attribute // Parameter check if len(consideredAttributes) == 0 { panic("More Attributes should be considered") } // Next step is to compute the information gain at this node // for each randomly chosen attribute, and pick the one // which maximises it maxGain := math.Inf(-1) selectedVal := math.Inf(1) // Compute the base entropy classDist := base.GetClassDistribution(f) baseEntropy := getBaseEntropy(classDist) // Compute the information gain for each attribute for _, s := range consideredAttributes { var informationGain float64 var splitVal float64 if fAttr, ok := s.(*base.FloatAttribute); ok { var attributeEntropy float64 attributeEntropy, splitVal = getNumericAttributeEntropy(f, fAttr) informationGain = baseEntropy - attributeEntropy } else { proposedClassDist := base.GetClassDistributionAfterSplit(f, s) localEntropy := getSplitEntropy(proposedClassDist) informationGain = baseEntropy - localEntropy } if informationGain > maxGain { maxGain = informationGain selectedAttribute = s selectedVal = splitVal } } // Pick the one which maximises IG return &DecisionTreeRule{selectedAttribute, selectedVal} }
// InferID3Tree builds a decision tree using a RuleGenerator // from a set of Instances (implements the ID3 algorithm) func InferID3Tree(from base.FixedDataGrid, with RuleGenerator) *DecisionTreeNode { // Count the number of classes at this node classes := base.GetClassDistribution(from) // If there's only one class, return a DecisionTreeLeaf with // the only class available if len(classes) == 1 { maxClass := "" for i := range classes { maxClass = i } ret := &DecisionTreeNode{ LeafNode, nil, classes, maxClass, getClassAttr(from), &DecisionTreeRule{nil, 0.0}, } return ret } // Only have the class attribute maxVal := 0 maxClass := "" for i := range classes { if classes[i] > maxVal { maxClass = i maxVal = classes[i] } } // If there are no more Attributes left to split on, // return a DecisionTreeLeaf with the majority class cols, _ := from.Size() if cols == 2 { ret := &DecisionTreeNode{ LeafNode, nil, classes, maxClass, getClassAttr(from), &DecisionTreeRule{nil, 0.0}, } return ret } // Generate a return structure ret := &DecisionTreeNode{ RuleNode, nil, classes, maxClass, getClassAttr(from), nil, } // Generate the splitting rule splitRule := with.GenerateSplitRule(from) if splitRule == nil { // Can't determine, just return what we have return ret } // Split the attributes based on this attribute's value var splitInstances map[string]base.FixedDataGrid if _, ok := splitRule.SplitAttr.(*base.FloatAttribute); ok { splitInstances = base.DecomposeOnNumericAttributeThreshold(from, splitRule.SplitAttr, splitRule.SplitVal) } else { splitInstances = base.DecomposeOnAttributeValues(from, splitRule.SplitAttr) } // Create new children from these attributes ret.Children = make(map[string]*DecisionTreeNode) for k := range splitInstances { newInstances := splitInstances[k] ret.Children[k] = InferID3Tree(newInstances, with) } ret.SplitRule = splitRule return ret }