func (w *Walker) weight(_, v lattice.Node) (float64, error) { vmax, err := v.Maximal() if err != nil { return 0, err } if vmax { indeg, err := v.ParentCount() if err != nil { return 0, err } level := float64(v.Pattern().Level()) return (level) / float64(indeg), nil // maxLevel := float64(w.Dt.LargestLevel()) // return (level) / (float64(indeg) * maxLevel), nil } else { // level approximates indeg level := float64(v.Pattern().Level()) // indeg, err := v.ParentCount() // if err != nil { // return 0, err // } odeg, err := v.ChildCount() if err != nil { return 0, err } // return float64(odeg) / float64(indeg), nil return float64(odeg) / (level), nil } }
func (r *Max) Report(n lattice.Node) error { if ismax, err := n.Maximal(); err != nil { return err } else if ismax { return r.Reporter.Report(n) } return nil }
func (r *CanonMax) Report(n lattice.Node) error { if kids, err := n.CanonKids(); err != nil { return err } else if len(kids) == 0 { return r.Reporter.Report(n) } return nil }
func (r *PrFormatter) SelectionProbability(n lattice.Node, m interface{}) (float64, error) { var nPr float64 err := r.w.Prs.DoFind(n.Pattern().Label(), func(_ []byte, npr float64) error { nPr = npr return nil }) if err != nil { return 0, err } return nPr, nil }
func (w *Walker) Next(cur lattice.Node) (lattice.Node, error) { kids, err := cur.Children() if err != nil { return nil, err } if false { errors.Logf("DEBUG", "cur %v kids %v", cur, len(kids)) } _, next, err := walker.Transition(cur, kids, w.weight, false) return next, err }
func weight(u, v lattice.Node) (float64, error) { udeg, err := u.AdjacentCount() if err != nil { return 0, err } vdeg, err := v.AdjacentCount() if err != nil { return 0, err } return 2 * max(float64(udeg), float64(vdeg)), nil }
func newClusterNode(fmtr lattice.Formatter, n lattice.Node, attr string) (*clusterNode, error) { items, err := itemset(n, attr) if err != nil { return nil, err } labels, err := labelset(n) if err != nil { return nil, err } cn := &clusterNode{n.Pattern(), fmtr.PatternName(n), items, labels} return cn, nil }
func (m *Miner) step(wg *sync.WaitGroup, n lattice.Node, reports chan lattice.Node, stack *Stack) (err error) { if m.Dt.Acceptable(n) { wg.Add(1) reports <- n } kids, err := n.CanonKids() if err != nil { return err } for _, k := range kids { stack.Push(k) } return nil }
func (w *Walker) transitionProbability(cur, next lattice.Node, pr float64) error { if next == nil { return nil } if has, err := w.Prs.Has(next.Pattern().Label()); err != nil { return err } else if has { return nil } var curPr float64 if bytes.Equal(w.Dt.Root().Pattern().Label(), cur.Pattern().Label()) { curPr = 1.0 } else { err := w.Prs.DoFind(cur.Pattern().Label(), func(_ []byte, cpr float64) error { // errors.Logf("PR", "cur %v curPr %v", cur, cpr) curPr = cpr return nil }) if err != nil { return err } } nextPr := curPr * pr // errors.Logf("PR", "adding next %v %v %v", next, next.Pattern().Label(), nextPr) err := w.Prs.Add(next.Pattern().Label(), nextPr) if err != nil { return err } // errors.Logf("PR", "curPr %v -> %v -> nextPr %v", curPr, pr, nextPr) return nil }
func Next(ctx interface{}, cur lattice.Node) (lattice.Node, error) { kids, err := cur.Children() if err != nil { return nil, err } parents, err := cur.Parents() if err != nil { return nil, err } adjs := append(kids, parents...) errors.Logf("DEBUG", "cur %v parents %v kids %v adjs %v", cur, len(parents), len(kids), len(adjs)) _, next, err := walker.Transition(cur, adjs, weight, false) return next, err }
func weight(u, v lattice.Node) (float64, error) { umax, err := u.Maximal() if err != nil { return 0, err } vmax, err := v.Maximal() if err != nil { return 0, err } udeg, err := u.AdjacentCount() if err != nil { return 0, err } vdeg, err := v.AdjacentCount() if err != nil { return 0, err } if umax && vmax { return 0, nil } else if !umax && vmax { return 1.0 / float64(vdeg), nil } else if umax && !vmax { return 1.0 / float64(udeg), nil } else { return 1.0, nil } }
func Next(ctx interface{}, cur lattice.Node) (lattice.Node, error) { w := ctx.(*Walker) if ismax, err := cur.Maximal(); err != nil { return nil, err } else if ismax && w.Dt.Acceptable(cur) { w.teleportAllowed = true errors.Logf("INFO", "ALLOWING TELEPORTS") } if w.teleportAllowed && rand.Float64() < w.TeleportProbability { w.teleportAllowed = false next := w.Teleports[rand.Intn(len(w.Teleports))] errors.Logf("INFO", "TELEPORT\n from %v\n to %v", cur, next) return next, nil } return musk.Next(ctx, cur) }
func (m *Miner) mine() (err error) { seen, err := m.Config.BytesIntMultiMap("stack-seen") if err != nil { return err } add := func(stack []lattice.Node, n lattice.Node) ([]lattice.Node, error) { err := seen.Add(n.Pattern().Label(), 1) if err != nil { return nil, err } return append(stack, n), nil } pop := func(stack []lattice.Node) ([]lattice.Node, lattice.Node) { return stack[:len(stack)-1], stack[len(stack)-1] } stack := make([]lattice.Node, 0, 10) stack, err = add(stack, m.Dt.Root()) if err != nil { return err } for len(stack) > 0 { var n lattice.Node stack, n = pop(stack) if m.Dt.Acceptable(n) { err = m.Rptr.Report(n) if err != nil { return err } } kids, err := n.Children() if err != nil { return err } for _, k := range kids { if has, err := seen.Has(k.Pattern().Label()); err != nil { return err } else if !has { stack, err = add(stack, k) if err != nil { return err } } } } return nil }
func MakeUniformWalk(restartPr float64, selfTransition bool) walker.Walk { return func(w *walker.Walker) (chan lattice.Node, chan bool, chan error) { samples := make(chan lattice.Node) terminate := make(chan bool) errs := make(chan error) go func() { cur := w.Dt.Root() loop: for { samples <- cur if <-terminate { break loop } if rand.Float64() < restartPr { errors.Logf("INFO", "a random restart occured with probability %v", restartPr) cur = w.Dt.Root() } else { curLabel := cur.Pattern().Label() nextLabel := curLabel var next lattice.Node = nil for bytes.Equal(curLabel, nextLabel) { var err error next, err = Next(w, cur) if err != nil { errs <- err break loop } if next == nil { errs <- errors.Errorf("next was nil!!") break loop } nextLabel = next.Pattern().Label() if selfTransition { break } } cur = next } } close(samples) close(errs) }() return samples, terminate, errs } }
func (w *Walker) Next(cur lattice.Node) (lattice.Node, error) { kids, err := cur.CanonKids() if err != nil { return nil, err } errors.Logf("DEBUG", "cur %v kids %v", cur, len(kids)) pr, next, err := walker.Transition(cur, kids, w.weight, true) if err != nil { return nil, err } if next == nil && w.Max { if ismax, err := cur.Maximal(); err != nil { return nil, err } else if !ismax { return w.Dt.Root(), nil } } return next, w.transitionProbability(cur, next, pr) }
func (m *Miner) mine() (err error) { add := func(stack []lattice.Node, n lattice.Node) ([]lattice.Node, error) { stack = append(stack, n) if len(stack) > m.MaxQueueSize { stack = m.dropOne(stack) } return stack, nil } root := m.Dt.Root() rootKids, err := root.CanonKids() if err != nil { return err } for _, rk := range rootKids { stack := make([]lattice.Node, 0, 10) stack, err = add(stack, rk) if err != nil { return err } for len(stack) > 0 { var n lattice.Node stack, n = m.takeOne(stack) if m.Dt.Acceptable(n) { err = m.Rptr.Report(n) if err != nil { return err } } kids, err := n.CanonKids() if err != nil { return err } for _, k := range kids { stack, err = add(stack, k) if err != nil { return err } } } } return nil }
func Next(w *walker.Walker, cur lattice.Node) (lattice.Node, error) { kids, err := cur.Children() if err != nil { return nil, err } parents, err := cur.Parents() if err != nil { return nil, err } adjs := append(kids, parents...) errors.Logf("DEBUG", "cur %v parents %v kids %v adjs %v", cur, len(parents), len(kids), len(adjs)) prs, err := transPrs(w, cur, adjs) if err != nil { return nil, err } adjs = append(adjs, cur) prs = append(prs, selfPr(prs)) i := stats.WeightedSample(prs) return adjs[i], nil }
func (w *Walker) estimateDepthDiameter(v lattice.Node, walks int) (depth, diameter float64, err error) { if kids, err := v.CanonKids(); err != nil { return 0, 0, err } else if len(kids) <= 0 { return 1, 1, nil } var maxDepth int = 0 var maxTail lattice.Pattern = nil tails := set.NewSortedSet(10) for i := 0; i < walks; i++ { errors.Logf("EST-WALK-DEBUG", "walk %v %v", i, v) var path []lattice.Node = nil var err error = nil path, err = w.walkFrom(v) if err != nil { return 0, 0, err } tail := path[len(path)-1].Pattern() tails.Add(tail) if len(path) > maxDepth { maxDepth = len(path) maxTail = tail } } level := maxDepth + v.Pattern().Level() if level < w.Dt.MinimumLevel() { return 0, 0, nil } patterns := make([]lattice.Pattern, 0, tails.Size()) for t, next := tails.Items()(); next != nil; t, next = next() { patterns = append(patterns, t.(lattice.Pattern)) } anc, err := CommonAncestor(patterns) if err != nil { return 0, 0, err } diameter = float64(maxTail.Level()-anc.Level()) + 1 depth = float64(maxDepth) + 1 return depth, diameter, nil }
func (w *Walker) weight(_, v lattice.Node) (float64, error) { label := v.Pattern().Label() if has, err := w.Ests.Has(label); err != nil { return 0, err } else if has { var est float64 err := w.Ests.DoFind(label, func(_ []byte, f float64) error { est = f return nil }) if err != nil { return 0, err } return est, nil } var est float64 if kids, err := v.CanonKids(); err != nil { return 0, err } else if len(kids) > 0 { depth, diameter, err := w.estimateDepthDiameter(v, w.EstimatingWalks) if err != nil { return 0, err } est = depth * diameter if est >= 1 { errors.Logf("DEBUG", "weight %v depth %v diameter %v est %v", v, depth, diameter, est) } } else if v.Pattern().Level() >= w.Dt.MinimumLevel() { est = 1.0 // errors.Logf("INFO", "node %v is max %v est %v", v, ismax, est) } else { est = 0.0 // errors.Logf("INFO", "node %v is max %v but too small v est %v", v, ismax, est) } err := w.Ests.Add(label, est) if err != nil { return 0, err } return est, nil }
func (w *Walker) Next(cur lattice.Node) (lattice.Node, error) { errors.Logf("DEBUG", "cur %v", cur) return uniform(cur.Children()) }