func (m *RandomWalkMiner) nonOverlapping(sgs partition) partition { group := make(sortableIsoGroup, 0, len(sgs)) for _, sg := range sgs { group = append(group, &isoGroupWithSet{ sg: sg, vertices: VertexSet(sg), }) } sort.Sort(group) vids := set.NewSortedSet(10) non_overlapping := make(partition, 0, len(sgs)) for _, sg := range group { s := sg.vertices if !vids.Overlap(s) { non_overlapping = append(non_overlapping, sg.sg) for v, next := s.Items()(); next != nil; v, next = next() { item := v.(types.Int) if err := vids.Add(item); err != nil { panic(err) } } } } return non_overlapping }
func (ids *IdNode) idSet(length int) *set.SortedSet { s := set.NewSortedSet(length) for c := ids; c != nil; c = c.Prev { s.Add(types.Int(c.Id)) } return s }
func int32sToSet(list []int32) *set.SortedSet { items := set.NewSortedSet(len(list)) for _, item := range list { items.Add(types.Int32(item)) } return items }
func (w *Walker) RejectingWalk(samples chan lattice.Node, terminate chan bool) chan lattice.Node { accepted := make(chan lattice.Node) go func() { i := 0 seen := set.NewSortedSet(w.Config.Samples) for sampled := range samples { accept := false if !w.Reject || w.Dt.Acceptable(sampled) { label := types.ByteSlice(sampled.Pattern().Label()) if !w.Config.Unique || !seen.Has(label) { if w.Config.Unique { seen.Add(label) } accept = true i++ } else { errors.Logf("DEBUG", "duplicate %v", sampled) } } else { errors.Logf("DEBUG", "rejected %v", sampled) } if i >= w.Config.Samples { terminate <- true } else { terminate <- false } if accept { accepted <- sampled } } close(accepted) close(terminate) }() return accepted }
func digraphItemset(n *digraph.EmbListNode, attr string) (types.Set, error) { dt := n.Dt embs, err := n.Embeddings() if err != nil { return nil, err } s := set.NewSortedSet(len(embs)) for _, emb := range embs { for _, vid := range emb.Ids { err := dt.NodeAttrs.DoFind( int32(vid), func(_ int32, attrs map[string]interface{}) error { if val, has := attrs[attr]; has { switch v := val.(type) { case string: s.Add(types.String(v)) case int: s.Add(types.Int(v)) default: return errors.Errorf("DBSCAN does not yet support attr type %T", val) } } return nil }) if err != nil { return nil, err } } } return s, nil }
func (n *EmbListNode) SaveUnsupportedExts(orgLen int, vord []int, eps *set.SortedSet) error { if n.Dt.Config.Mode&ExtensionPruning == 0 { return nil } n.unsupExts = set.NewSortedSet(eps.Size()) for x, next := eps.Items()(); next != nil; x, next = next() { ep := x.(*subgraph.Extension) ept := ep.Translate(orgLen, vord) n.unsupExts.Add(ept) } if n.Dt.UnsupExts == nil || n.Dt.Config.Mode&Caching == 0 { return nil } n.Dt.lock.Lock() defer n.Dt.lock.Unlock() if len(n.Pat.E) < 4 { return nil } label := n.Label() for x, next := n.unsupExts.Items()(); next != nil; x, next = next() { ept := x.(*subgraph.Extension) err := n.Dt.UnsupExts.Add(label, ept) if err != nil { return err } } return nil }
func parents(n Node, parents bytes_bytes.MultiMap, parentCount bytes_int.MultiMap) (nodes []lattice.Node, err error) { // errors.Logf("DEBUG", "compute Parents\n of %v", n) if n.isRoot() { return []lattice.Node{}, nil } dt := n.dt() sg := n.SubGraph() if len(sg.V) == 1 && len(sg.E) == 0 { return []lattice.Node{dt.Root()}, nil } if nodes, has, err := cachedAdj(n, dt, dt.ParentCount, dt.Parents); err != nil { return nil, err } else if has { return nodes, nil } parentBuilders, err := AllParents(n.SubGraph().Builder()) if err != nil { return nil, err } seen := set.NewSortedSet(10) nodes = make([]lattice.Node, 0, 10) for _, pBuilder := range parentBuilders { parent := pBuilder.Build() if seen.Has(parent) { continue } seen.Add(parent) support, pexts, pembs, poverlap, punsupEmbs, err := ExtsAndEmbs(dt, parent, nil, set.NewSortedSet(0), nil, dt.Mode, false) if err != nil { return nil, err } if support < dt.Support() { // this means this parent support comes from automorphism // it isn't truly supported, and its children may be spurious as well // log and skip? ExtsAndEmbs(dt, parent, nil, set.NewSortedSet(0), nil, dt.Mode, true) errors.Logf("WARN", "for node %v parent %v had support %v less than required %v due to automorphism", n, parent.Pretty(dt.Labels), support, dt.Support()) } else { nodes = append(nodes, n.New(parent, pexts, pembs, poverlap, punsupEmbs)) } } if len(nodes) == 0 { return nil, errors.Errorf("Found no parents!!\n node %v", n) } return nodes, cacheAdj(dt, dt.ParentCount, dt.Parents, n.Label(), nodes) }
func (m *RandomWalkMiner) extensions(sgs []*goiso.SubGraph) *set.SortedSet { if len(sgs) == 0 { return set.NewSortedSet(10) } label := types.ByteSlice(sgs[0].ShortLabel()) if m.extended.Has(label) { keys := m.extended.Get(label) return keys } keys := set.NewSortedSet(10) m.extend(sgs, func(sg *goiso.SubGraph) { m.AllEmbeddings.send(sg) keys.Add(types.ByteSlice(sg.ShortLabel())) }) m.extended.Put(label, keys) return keys }
func VertexSet(sg *goiso.SubGraph) *set.SortedSet { s := set.NewSortedSet(len(sg.V)) for _, v := range sg.V { if err := s.Add(types.Int(v.Id)); err != nil { panic(err) } } return s }
func digraphLabelset(n *digraph.EmbListNode) (types.Set, error) { p := n.Pat s := set.NewSortedSet(len(p.V) + len(p.E)) for i := range p.V { s.Add(types.Int(p.V[i].Color)) } for i := range p.E { s.Add(types.Int(p.E[i].Color)) } return s, nil }
func (n *EmbListNode) UnsupportedExts() (*set.SortedSet, error) { if n.unsupExts != nil && n.Dt.Config.Mode&ExtensionPruning == ExtensionPruning { return n.unsupExts, nil } if n.Dt.UnsupExts == nil || n.Dt.Config.Mode&Caching == 0 { return set.NewSortedSet(0), nil } n.Dt.lock.RLock() defer n.Dt.lock.RUnlock() label := n.Label() u := set.NewSortedSet(10) err := n.Dt.UnsupExts.DoFind(label, func(_ []byte, ext *subgraph.Extension) error { return u.Add(ext) }) if err != nil { return nil, err } n.unsupExts = u return u, nil }
func (m *RandomWalkMiner) initial() (Collectors, *set.SortedSet) { groups := m.makeCollectors(m.PLevel) for i := range m.Graph.V { v := &m.Graph.V[i] if m.Graph.ColorFrequency(v.Color) >= m.Support { sg, _ := m.Graph.VertexSubGraph(v.Idx) groups.send(sg) } } startingPoints := set.NewSortedSet(10) for key, next := groups.keys()(); next != nil; key, next = next() { startingPoints.Add(types.ByteSlice(key)) } return groups, startingPoints }
func VertexSets(sgs partition) []*set.MapSet { if len(sgs) == 0 { return make([]*set.MapSet, 0) } sets := make([]*set.MapSet, 0, len(sgs[0].V)) for i := range sgs[0].V { set := set.NewMapSet(set.NewSortedSet(len(sgs))) for j, sg := range sgs { id := types.Int(sg.V[i].Id) if !set.Has(id) { set.Put(id, j) } } sets = append(sets, set) } return sets }
func (st *SurveyLogStore) load() (int, *models.Survey, error) { if st.cache != nil { return st.cache.answerCount, st.cache.survey, nil } answers := make([]*models.SurveyAnswer, 0, len(st.clones)*2) answered := set.NewSortedSet(len(st.clones)) err := createOrOpen(st.answersPath, func(path string) (err error) { // create file f, err := os.Create(path) if err != nil { return err } return f.Close() }, func(path string) (err error) { f, err := os.Open(path) if err != nil { return err } defer f.Close() return st.loadFile(f, &answers, answered) }, ) if err != nil { return 0, nil, err } unanswered := st.cloneIdxs.Subtract(answered) s := &models.Survey{ Questions: st.questions, Clones: st.clones, Unanswered: unanswered, Answers: answers, } st.cache = &surveyCache{ survey: s, answerCount: len(answers), } return len(answers), s, nil }
func (w *Walker) estimateDepthDiameter(v lattice.Node, walks int) (depth, diameter float64, err error) { if kids, err := v.CanonKids(); err != nil { return 0, 0, err } else if len(kids) <= 0 { return 1, 1, nil } var maxDepth int = 0 var maxTail lattice.Pattern = nil tails := set.NewSortedSet(10) for i := 0; i < walks; i++ { errors.Logf("EST-WALK-DEBUG", "walk %v %v", i, v) var path []lattice.Node = nil var err error = nil path, err = w.walkFrom(v) if err != nil { return 0, 0, err } tail := path[len(path)-1].Pattern() tails.Add(tail) if len(path) > maxDepth { maxDepth = len(path) maxTail = tail } } level := maxDepth + v.Pattern().Level() if level < w.Dt.MinimumLevel() { return 0, 0, nil } patterns := make([]lattice.Pattern, 0, tails.Size()) for t, next := tails.Items()(); next != nil; t, next = next() { patterns = append(patterns, t.(lattice.Pattern)) } anc, err := CommonAncestor(patterns) if err != nil { return 0, 0, err } diameter = float64(maxTail.Level()-anc.Level()) + 1 depth = float64(maxDepth) + 1 return depth, diameter, nil }
func (m *RandomWalkMiner) supportedKeys(from []byte, keys *set.SortedSet) *set.SortedSet { key := types.ByteSlice(from) if m.supportedExtensions.Has(key) { supKeys := m.supportedExtensions.Get(key) return supKeys } keysCh := make(chan []byte) partKeys := make(chan []byte) done := make(chan bool) for i := 0; i < m.PLevel; i++ { go func() { for key := range keysCh { if len(m.partition(key)) >= m.Support { partKeys <- key } } done <- true }() } go func() { for k, next := keys.Items()(); next != nil; k, next = next() { keysCh <- []byte(k.(types.ByteSlice)) } close(keysCh) }() go func() { for i := 0; i < m.PLevel; i++ { <-done } close(partKeys) close(done) }() supKeys := set.NewSortedSet(10) for partKey := range partKeys { supKeys.Add(types.ByteSlice(partKey)) } m.supportedExtensions.Put(key, supKeys) return supKeys }
func NewSurveyStore(dir string, questions []models.Renderable, clones []*clones.Clone) (*SurveyLogStore, error) { fi, err := os.Stat(dir) if err != nil && os.IsNotExist(err) { err := os.Mkdir(dir, 0775) if err != nil { return nil, err } } else if err != nil { return nil, err } else if !fi.IsDir() { return nil, fmt.Errorf("%v is not a directory", dir) } cloneIdxs := set.NewSortedSet(len(clones)) for i := 0; i < len(clones); i++ { cloneIdxs.Add(types.Int(i)) } qPath := filepath.Join(dir, "questions") if q, err := os.Create(qPath); err != nil { return nil, err } else { defer q.Close() bytes, err := json.Marshal(questions) if err != nil { return nil, err } _, err = q.Write(bytes) if err != nil { return nil, err } } st := &SurveyLogStore{ questions: questions, clones: clones, cloneIdxs: cloneIdxs, answersPath: filepath.Join(dir, "answers"), } return st, nil }
func add(clusters []cluster, cn *clusterNode, epsilon float64, sim func(a, b *clusterNode) float64) []cluster { near := set.NewSortedSet(len(clusters)) min_near := -1 min_sim := -1.0 var min_item *clusterNode = nil for i := len(clusters) - 1; i >= 0; i-- { for _, b := range clusters[i] { s := sim(cn, b) if s <= epsilon { near.Add(types.Int(i)) if min_near == -1 || s < min_sim { min_near = i min_sim = s min_item = b } } } } if near.Size() <= 0 { return append(clusters, cluster{cn}) } if false { errors.Logf("DBSCAN", "%v %v %v", min_sim, cn.pattern, min_item.pattern) } clusters[min_near] = append(clusters[min_near], cn) prev := -1 for x, next := near.ItemsInReverse()(); next != nil; x, next = next() { cur := int(x.(types.Int)) if prev >= 0 { clusters[cur] = append(clusters[cur], clusters[prev]...) clusters = remove(clusters, prev) } prev = cur } return clusters }
func dfs(t testing.TB, x *assert.Assertions, root Node) { visit(t, x, set.NewSortedSet(250), root) }
func TestSetCast(t *testing.T) { s := set.NewSortedSet(17) _ = types.Set(s) _ = types.Hashable(s) }
func (n *Node) Parents() ([]lattice.Node, error) { if n.pat.Items.Size() == 0 { return []lattice.Node{}, nil } else if n.pat.Items.Size() == 1 { return []lattice.Node{n.dt.empty}, nil } i := setToInt32s(n.pat.Items) if has, err := n.dt.ParentCount.Has(i); err != nil { return nil, err } else if has { return n.cached(n.dt.Parents, i) } parents := make([]*set.SortedSet, 0, n.pat.Items.Size()) for item, next := n.pat.Items.Items()(); next != nil; item, next = next() { parent := n.pat.Items.Copy() parent.Delete(item) parents = append(parents, parent) } nodes := make([]lattice.Node, 0, 10) for _, items := range parents { if node, err := TryLoadNode(setToInt32s(items), n.dt); err != nil { return nil, err } else if node != nil { nodes = append(nodes, node) continue } ctxs := int32sToSet(n.txs) var txs types.Set for item, next := items.Items()(); next != nil; item, next = next() { mytxs := set.NewSortedSet(len(n.txs) + 10) for _, tx := range n.dt.InvertedIndex[item.(types.Int32)] { if !ctxs.Has(types.Int32(tx)) { mytxs.Add(types.Int32(tx)) } } var err error if txs == nil { txs = mytxs } else { txs, err = txs.Intersect(mytxs) if err != nil { return nil, err } } } txs, err := txs.Union(ctxs) if err != nil { return nil, err } stxs := make([]int32, 0, txs.Size()) for item, next := txs.Items()(); next != nil; item, next = next() { stxs = append(stxs, int32(item.(types.Int32))) } node := &Node{Pattern{items}, n.dt, stxs} err = node.Save() if err != nil { return nil, err } nodes = append(nodes, node) } err := n.cache(n.dt.ParentCount, n.dt.Parents, i, nodes) if err != nil { return nil, err } return nodes, nil }