func weakestLinkCostFunction(t *pb.TreeNode, e Examples) (float64, int) { left, right := splitExamples(t, e) if !isLeaf(t) { leftSquaredDivergence, leftNodes := weakestLinkCostFunction(t.GetLeft(), left) rightSquaredDivergence, rightNodes := weakestLinkCostFunction(t.GetRight(), right) return leftSquaredDivergence + rightSquaredDivergence, leftNodes + rightNodes } return constructLoss(e).sumSquaredDivergence, 1 }
func mapTree(t *pb.TreeNode, e Examples, m TreeMapperFunc) *pb.TreeNode { left, right := splitExamples(t, e) result, continueTraversal := m(t, e) if continueTraversal == false { return result } if result.GetLeft() != nil { result.Left, _ = m(t.GetLeft(), left) } if result.GetRight() != nil { result.Right, _ = m(t.GetRight(), right) } return result }
func splitExamples(t *pb.TreeNode, e Examples) (left Examples, right Examples) { by(func(e1, e2 *pb.Example) bool { return e1.Features[t.GetFeature()] < e2.Features[t.GetFeature()] }).Sort(e) splitIndex := 0 for i, ex := range e { splitIndex = i if ex.Features[t.GetFeature()] > t.GetSplitValue() { break } } left, right = e[:splitIndex], e[splitIndex:] return }
func getAnnotation(node *pb.TreeNode) string { if node.GetAnnotation() == nil || node.GetAnnotation().LeftFraction == nil { return "" } if node.GetAnnotation().GetLeftFraction() > 0.5+*likelyThreshold { return "LIKELY" } if node.GetAnnotation().GetLeftFraction() < 0.5-*likelyThreshold { return "UNLIKELY" } return "" }
func flattenTree(f *fastTreeEvaluator, current *pb.TreeNode, currentIndex int) { glog.Infof("Flattening tree at index %v", currentIndex) if isLeaf(current) { f.nodes[currentIndex] = flatNode{ value: current.GetLeafValue(), feature: leafFeatureID, } return } // append child nodes // since we push on N + 2 elements, we want index N + 1, hence len(f.nodes) leftChild := len(f.nodes) f.nodes = append(f.nodes, flatNode{}, flatNode{}) f.nodes[currentIndex] = flatNode{ value: current.GetSplitValue(), feature: current.GetFeature(), leftChild: leftChild, } flattenTree(f, current.GetLeft(), leftChild) flattenTree(f, current.GetRight(), leftChild+1) }
func validateTree(t *pb.TreeNode) error { if isLeaf(t) { if t.GetLeft() != nil || t.GetRight() != nil { return fmt.Errorf("leaf has non-zero children: %v", t) } return nil } // not a leaf - must have both children if t.GetLeft() == nil || t.GetRight() == nil { return fmt.Errorf("branch has nil children: %v", t.String()) } err := validateTree(t.GetLeft()) if err != nil { return err } err = validateTree(t.GetRight()) if err != nil { return err } return nil }
func printNode(node *pb.TreeNode, c *codeWriter) { if node.GetLeft() == nil && node.GetRight() == nil { c.WriteString(fmt.Sprintf("return %v;\n", node.GetLeafValue())) return } c.WriteString(fmt.Sprintf("if (%v(f[%v] < %v)) {\n", getAnnotation(node), node.GetFeature(), node.GetSplitValue())) { c.indentLevel++ printNode(node.GetLeft(), c) c.indentLevel-- } c.WriteString("} else {\n") { c.indentLevel++ printNode(node.GetRight(), c) c.indentLevel-- } c.WriteString("}\n") }