func doGood() { // goodUIDs are UIDs with type.object.name containing "good". results := expand(goodUIDs) directorEdges := results["film.director.film"] x.AssertTrue(len(directorEdges) > 100) // Directors are UIDs which go to goodUIDs via "film.director.film". directorUIDs, _ := uniqueUIDs(directorEdges) results = expand(directorUIDs) filmEdges := results["film.film.directed_by"] x.AssertTrue(len(filmEdges) > 100) // Films are UIDs which go to directorUIDs via "film.film.directed_by". filmUIDs, _ := uniqueUIDs(filmEdges) results = expand(filmUIDs) directorEdges = results["film.director.film"] x.AssertTrue(len(directorEdges) > 100) // Directors are UIDs which go to filmUIDs via "film.director.film". directorUIDs, counts := uniqueUIDs(directorEdges) var maxCount int var maxDirector uint64 for i, c := range counts { if c > maxCount { maxCount = c maxDirector = directorUIDs[i] } } fmt.Printf("maxDirector %d with count %d\n", maxDirector, maxCount) }
// getPostingList tries to get posting list from l.pbuffer. If it is nil, then // we query RocksDB. There is no need for lock acquisition here. func (l *List) getPostingList(loop int) *types.PostingList { if loop >= 10 { x.Fatalf("This is over the 10th loop: %v", loop) } l.AssertRLock() // Wait for any previous commits to happen before retrieving posting list again. l.Wait() pb := atomic.LoadPointer(&l.pbuffer) plist := (*types.PostingList)(pb) if plist == nil { x.AssertTrue(l.pstore != nil) plist = new(types.PostingList) if slice, err := l.pstore.Get(l.key); err == nil && slice != nil { x.Checkf(plist.Unmarshal(slice.Data()), "Unable to Unmarshal PostingList from store") slice.Free() } if atomic.CompareAndSwapPointer(&l.pbuffer, pb, unsafe.Pointer(plist)) { return plist } // Someone else replaced the pointer in the meantime. Retry recursively. return l.getPostingList(loop + 1) } return plist }
func parseDefaultConfig(l string) (uint32, error) { // If we have already seen a default config line, and n has a value then we // log.Fatal. if groupConfig.n != 0 { return 0, fmt.Errorf("Default config can only be defined once: %v", l) } l = strings.TrimSpace(l) conf := strings.Split(l, " ") // + in (fp % n + k) is optional. if !(len(conf) == 5 || len(conf) == 3) || conf[0] != "fp" || conf[1] != "%" { return 0, fmt.Errorf("Default config format should be like: %v", "default: fp % n + k") } var err error var n uint64 n, err = strconv.ParseUint(conf[2], 10, 32) x.Check(err) groupConfig.n = uint32(n) x.AssertTrue(groupConfig.n != 0) if len(conf) == 5 { if conf[3] != "+" { return 0, fmt.Errorf("Default config format should be like: %v", "default: fp % n + k") } n, err = strconv.ParseUint(conf[4], 10, 32) groupConfig.k = uint32(n) x.Check(err) } return groupConfig.k, nil }
func (p *poolsi) connect(addr string) { if addr == *myAddr { return } p.RLock() _, has := p.all[addr] p.RUnlock() if has { return } pool := newPool(addr, 5) query := new(Payload) query.Data = make([]byte, 10) x.Check2(rand.Read(query.Data)) conn, err := pool.Get() x.Checkf(err, "Unable to connect") c := NewWorkerClient(conn) resp, err := c.Echo(context.Background(), query) x.Checkf(err, "Unable to Echo") x.AssertTrue(bytes.Equal(resp.Data, query.Data)) x.Check(pool.Put(conn)) fmt.Printf("Connection with %q successful.\n", addr) p.Lock() defer p.Unlock() _, has = p.all[addr] if has { return } p.all[addr] = pool }
// newQuery creates a Query task and returns it. func newQuery(attr string, uids []uint64, srcFunc []string) *task.Query { x.AssertTrue(uids == nil || srcFunc == nil) return &task.Query{ Uids: uids, SrcFunc: srcFunc, Attr: attr, } }
func (n *node) processMembership(e raftpb.Entry, mm *task.Membership) error { x.AssertTrue(n.gid == 0) x.Printf("group: %v Addr: %q leader: %v dead: %v\n", mm.GroupId, mm.Addr, mm.Leader, mm.AmDead) groups().applyMembershipUpdate(e.Index, mm) return nil }
func parsePeer(peer string) (uint64, string) { x.AssertTrue(len(peer) > 0) kv := strings.SplitN(peer, ":", 2) x.AssertTruef(len(kv) == 2, "Invalid peer format: %v", peer) pid, err := strconv.ParseUint(kv[0], 10, 64) x.Checkf(err, "Invalid peer id: %v", kv[0]) // TODO: Validate the url kv[1] return pid, kv[1] }
// initIndex initializes the index with the given data store. func initIndex() { x.AssertTrue(pstore != nil) // Initialize TokensTables. indexedFields := schema.IndexedFields() type resultStruct struct { attr string table *TokensTable } results := make(chan resultStruct, len(indexedFields)) for _, attr := range indexedFields { go func(attr string) { table := &TokensTable{ key: make([]string, 0, 50), } pk := x.ParsedKey{ Attr: attr, } prefix := pk.IndexPrefix() it := pstore.NewIterator() defer it.Close() for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() { pki := x.Parse(it.Key().Data()) x.AssertTrue(pki.IsIndex()) x.AssertTrue(len(pki.Term) > 0) table.push(pki.Term) } results <- resultStruct{attr, table} }(attr) } tables = make(map[string]*TokensTable) for i := 0; i < len(indexedFields); i++ { r := <-results tables[r.attr] = r.table } }
// newSort creates a task.Sort for sorting. func newSort(uids [][]uint64, offset, count int) *task.Sort { x.AssertTrue(uids != nil) uidMatrix := make([]*task.List, len(uids)) for i, l := range uids { uidMatrix[i] = &task.List{Uids: l} } return &task.Sort{ Attr: "dob", Offset: int32(offset), Count: int32(count), UidMatrix: uidMatrix, } }
// applyWindow applies windowing to sg.sorted. func (sg *SubGraph) applyPagination(ctx context.Context) error { params := sg.Params if params.Count == 0 && params.Offset == 0 { // No pagination. return nil } x.AssertTrue(len(sg.SrcUIDs.Uids) == len(sg.uidMatrix)) for _, l := range sg.uidMatrix { algo.IntersectWith(l, sg.DestUIDs) start, end := pageRange(&sg.Params, len(l.Uids)) l.Uids = l.Uids[start:end] } // Re-merge the UID matrix. sg.DestUIDs = algo.MergeSorted(sg.uidMatrix) return nil }
// stringHelper does simple DFS to convert FilterTree to string. func (t *FilterTree) stringHelper(buf *bytes.Buffer) { x.AssertTrue(t != nil) if t.Func != nil && len(t.Func.Name) > 0 { // Leaf node. _, err := buf.WriteRune('(') x.Check(err) _, err = buf.WriteString(t.Func.Name) x.Check(err) if len(t.Func.Attr) > 0 { args := make([]string, len(t.Func.Args)+1) args[0] = t.Func.Attr copy(args[1:], t.Func.Args) for _, arg := range args { _, err = buf.WriteString(" \"") x.Check(err) _, err = buf.WriteString(arg) x.Check(err) _, err := buf.WriteRune('"') x.Check(err) } } _, err = buf.WriteRune(')') x.Check(err) return } // Non-leaf node. _, err := buf.WriteRune('(') x.Check(err) switch t.Op { case "&": _, err = buf.WriteString("AND") case "|": _, err = buf.WriteString("OR") default: err = x.Errorf("Unknown operator: %q", t.Op) } x.Check(err) for _, c := range t.Child { _, err = buf.WriteRune(' ') x.Check(err) c.stringHelper(buf) } _, err = buf.WriteRune(')') x.Check(err) }
func main() { x.Init() s = &status{ start: time.Now(), } filesList := strings.Split(*files, ",") x.AssertTrue(len(filesList) > 0) for _, file := range filesList { processFile(file) } fmt.Printf("Number of mutations run : %d\n", s.mutations) fmt.Printf("Number of RDFs processed : %d\n", s.rdfs) fmt.Printf("Time spent : %v\n", time.Since(s.start)) fmt.Printf("RDFs processed per second : %d\n", s.rdfs/uint64(time.Since(s.start).Seconds())) }
func expand(uids []uint64, pred string) []uint64 { x.AssertTrue(graph != nil) var out []uint64 for p, m := range graph { if pred != p { continue } for _, u := range uids { dst := m[u] if dst == nil { continue } out = append(out, dst...) } } return out }
func expand(uids []uint64) map[string]map[uint64][]uint64 { x.AssertTrue(invGraph != nil) out := make(map[string]map[uint64][]uint64) for pred, m := range invGraph { outM := make(map[uint64][]uint64) for _, u := range uids { // srcUID. z := m[u] if z == nil { continue } outM[u] = z } if len(outM) > 0 { out[pred] = outM } } return out }
// applyOrderAndPagination orders each posting list by a given attribute // before applying pagination. func (sg *SubGraph) applyOrderAndPagination(ctx context.Context) error { if len(sg.Params.Order) == 0 { return nil } if sg.Params.Count == 0 { // Only retrieve up to 1000 results by default. sg.Params.Count = 1000 } sort := &task.Sort{ Attr: sg.Params.Order, UidMatrix: sg.uidMatrix, Offset: int32(sg.Params.Offset), Count: int32(sg.Params.Count), Desc: sg.Params.OrderDesc, } result, err := worker.SortOverNetwork(ctx, sort) if err != nil { return err } x.AssertTrue(len(result.UidMatrix) == len(sg.uidMatrix)) sg.uidMatrix = result.GetUidMatrix() // Update sg.destUID. Iterate over the UID matrix (which is not sorted by // UID). For each element in UID matrix, we do a binary search in the // current destUID and mark it. Then we scan over this bool array and // rebuild destUIDs. included := make([]bool, len(sg.DestUIDs.Uids)) for _, ul := range sg.uidMatrix { for _, uid := range ul.Uids { idx := algo.IndexOf(sg.DestUIDs, uid) // Binary search. if idx >= 0 { included[idx] = true } } } algo.ApplyFilter(sg.DestUIDs, func(uid uint64, idx int) bool { return included[idx] }) return nil }
// addIndexMutations adds mutation(s) for a single term, to maintain index. func addIndexMutations(ctx context.Context, attr string, uid uint64, p types.Value, del bool) { x.AssertTrue(uid != 0) tokens, err := IndexTokens(attr, p) if err != nil { // This data is not indexable return } edge := &task.DirectedEdge{ ValueId: uid, Attr: attr, Label: "idx", } tokensTable := GetTokensTable(attr) x.AssertTruef(tokensTable != nil, "TokensTable missing for attr %s", attr) for _, token := range tokens { addIndexMutation(ctx, attr, token, tokensTable, edge, del) } }
func batchCommit() { var sz int var waits []*x.SafeWait var loop uint64 b := pstore.NewWriteBatch() defer b.Destroy() for { select { case e := <-commitCh: b.Put(e.key, e.val) sz++ waits = append(waits, e.sw) default: // default is executed if no other case is ready. start := time.Now() if sz > 0 { x.AssertTrue(b != nil) loop++ fmt.Printf("[%4d] Writing batch of size: %v\n", loop, sz) x.Checkf(pstore.WriteBatch(b), "Error while writing to RocksDB.") for _, w := range waits { w.Done() } b.Clear() sz = 0 waits = waits[:0] } // Add a sleep clause to avoid a busy wait loop if there's no input to commitCh. sleepFor := 10*time.Millisecond - time.Since(start) if sleepFor > time.Millisecond { time.Sleep(sleepFor) } } } }
// NewTokenizer creates a new Tokenizer object from a given input string of bytes. func NewTokenizer(s []byte) (*Tokenizer, error) { x.AssertTrue(s != nil) if disableICU { // ICU is disabled. Return a dummy tokenizer. return &Tokenizer{}, nil } sNorm, terr := normalize(s) if terr != nil { return nil, terr } sNorm = append(sNorm, 0) // Null-terminate this for ICU's C functions. var err C.UErrorCode c := C.NewTokenizer(byteToChar(sNorm), C.int(len(s)), maxTokenSize, &err) if int(err) > 0 { return nil, x.Errorf("ICU new tokenizer error %d", int(err)) } if c == nil { return nil, x.Errorf("NewTokenizer returns nil") } return &Tokenizer{c, C.TokenizerToken(c)}, nil }
func (l *List) updateMutationLayer(mpost *types.Posting) bool { l.AssertLock() x.AssertTrue(mpost.Op == Set || mpost.Op == Del) // First check the mutable layer. midx := sort.Search(len(l.mlayer), func(idx int) bool { mp := l.mlayer[idx] return mpost.Uid <= mp.Uid }) // This block handles the case where mpost.UID is found in mutation layer. if midx < len(l.mlayer) && l.mlayer[midx].Uid == mpost.Uid { // mp is the posting found in mlayer. oldPost := l.mlayer[midx] // Note that mpost.Op is either Set or Del, whereas oldPost.Op can be // either Set or Del or Add. msame := samePosting(oldPost, mpost) if msame && ((mpost.Op == Del) == (oldPost.Op == Del)) { // This posting has similar content as what is found in mlayer. If the // ops are similar, then we do nothing. Note that Add and Set are // considered similar, and the second clause is true also when // mpost.Op==Add and oldPost.Op==Set. return false } if !msame && mpost.Op == Del { // Invalid Del as contents do not match. return false } // Here are the remaining cases. // Del, Set: Replace with new post. // Del, Del: Replace with new post. // Set, Del: Replace with new post. // Set, Set: Replace with new post. // Add, Del: Undo by removing oldPost. // Add, Set: Replace with new post. Need to set mpost.Op to Add. if oldPost.Op == Add { if mpost.Op == Del { // Undo old post. copy(l.mlayer[midx:], l.mlayer[midx+1:]) l.mlayer[len(l.mlayer)-1] = nil l.mlayer = l.mlayer[:len(l.mlayer)-1] return true } // Add followed by Set is considered an Add. Hence, mutate mpost.Op. mpost.Op = Add } l.mlayer[midx] = mpost return true } // Didn't find it in mutable layer. Now check the immutable layer. pl := l.getPostingList(0) pidx := sort.Search(len(pl.Postings), func(idx int) bool { p := pl.Postings[idx] return mpost.Uid <= p.Uid }) var uidFound, psame bool if pidx < len(pl.Postings) { p := pl.Postings[pidx] uidFound = mpost.Uid == p.Uid if uidFound { psame = samePosting(p, mpost) } } if mpost.Op == Set { if psame { return false } if !uidFound { // Posting not found in PL. This is considered an Add operation. mpost.Op = Add } } else if !psame { // mpost.Op==Del // Either we fail to find UID in immutable PL or contents don't match. return false } // Doesn't match what we already have in immutable layer. So, add to mutable layer. if midx >= len(l.mlayer) { // Add it at the end. l.mlayer = append(l.mlayer, mpost) return true } // Otherwise, add it where midx is pointing to. l.mlayer = append(l.mlayer, nil) copy(l.mlayer[midx+1:], l.mlayer[midx:]) l.mlayer[midx] = mpost return true }
func main() { x.Init() fin, err := os.Open(filename) x.Check(err) defer fin.Close() scanner := bufio.NewScanner(fin) var numLines, numValues, numNames, numReleaseDates int invGraph = make(map[string]map[uint64][]uint64) for scanner.Scan() { numLines++ tokens := strings.Split(scanner.Text(), "\t") x.AssertTruef(len(tokens) == 4, scanner.Text()) src := tokens[0] x.AssertTrue(bracketed(src)) src = removeFirstLast(src) srcUID := farm.Fingerprint64([]byte(src)) pred := tokens[1] x.AssertTrue(bracketed(pred)) pred = removeFirstLast(pred) value := tokens[2] if bracketed(value) { // Normal edge. value = removeFirstLast(value) destUID := farm.Fingerprint64([]byte(value)) m, found := invGraph[pred] if !found { m = make(map[uint64][]uint64) invGraph[pred] = m } // We are building an inverse map! m[destUID] = append(m[destUID], srcUID) } else { // A value. numValues++ value = removeFirstLast(value) if pred == "type.object.name" { numNames++ // Do some custom processing here. value = strings.ToLower(value) vTokens := strings.Split(value, " ") var found bool for _, t := range vTokens { if t == "the" { found = true break } } if found { goodUIDs = append(goodUIDs, srcUID) } } else if pred == "film.film.initial_release_date" { numReleaseDates++ } } } fmt.Printf("Num lines read: %d\n", numLines) fmt.Printf("Num predicates: %d\n", len(invGraph)) fmt.Printf("Num values read: %d\n", numValues) fmt.Printf("Num names read: %d\n", numNames) fmt.Printf("Num release dates read: %d\n", numReleaseDates) fmt.Printf("Num good UIDs: %d\n", len(goodUIDs)) x.AssertTrue(numLines > 0) x.AssertTrue(len(invGraph) > 0) x.AssertTrue(numValues > 0) x.AssertTrue(numNames > 0) x.AssertTrue(numReleaseDates > 0) x.AssertTrue(len(goodUIDs) > 0) doGood() }
// Backup creates a backup of data by exporting it as an RDF gzip. func backup(gid uint32, bdir string) error { // Use a goroutine to write to file. err := os.MkdirAll(bdir, 0700) if err != nil { return err } fpath := path.Join(bdir, fmt.Sprintf("dgraph-%d-%s.rdf.gz", gid, time.Now().Format("2006-01-02-15-04"))) fmt.Printf("Backing up at: %v\n", fpath) chb := make(chan []byte, 1000) errChan := make(chan error, 1) go func() { errChan <- writeToFile(fpath, chb) }() // Use a bunch of goroutines to convert to RDF format. chkv := make(chan kv, 1000) var wg sync.WaitGroup wg.Add(numBackupRoutines) for i := 0; i < numBackupRoutines; i++ { go func() { buf := new(bytes.Buffer) buf.Grow(50000) for item := range chkv { toRDF(buf, item) if buf.Len() >= 40000 { tmp := make([]byte, buf.Len()) copy(tmp, buf.Bytes()) chb <- tmp buf.Reset() } } if buf.Len() > 0 { tmp := make([]byte, buf.Len()) copy(tmp, buf.Bytes()) chb <- tmp } wg.Done() }() } // Iterate over rocksdb. it := pstore.NewIterator() defer it.Close() var lastPred string for it.SeekToFirst(); it.Valid(); { key := it.Key().Data() pk := x.Parse(key) if pk.IsIndex() { // Seek to the end of index keys. it.Seek(pk.SkipRangeOfSameType()) continue } if pk.Attr == "_uid_" { // Skip the UID mappings. it.Seek(pk.SkipPredicate()) continue } x.AssertTrue(pk.IsData()) pred, uid := pk.Attr, pk.Uid if pred != lastPred && group.BelongsTo(pred) != gid { it.Seek(pk.SkipPredicate()) continue } prefix := fmt.Sprintf("<_uid_:%#x> <%s> ", uid, pred) pl := &types.PostingList{} x.Check(pl.Unmarshal(it.Value().Data())) chkv <- kv{ prefix: prefix, list: pl, } lastPred = pred it.Next() } close(chkv) // We have stopped output to chkv. wg.Wait() // Wait for numBackupRoutines to finish. close(chb) // We have stopped output to chb. err = <-errChan return err }
func intersectBucket(ts *task.Sort, attr, token string, out []intersectedList) error { count := int(ts.Count) sType := schema.TypeOf(attr) if !sType.IsScalar() { return x.Errorf("Cannot sort attribute %s of type object.", attr) } scalar := sType.(types.Scalar) key := x.IndexKey(attr, token) pl, decr := posting.GetOrCreate(key) defer decr() for i, ul := range ts.UidMatrix { il := &out[i] if count > 0 && len(il.ulist.Uids) >= count { continue } // Intersect index with i-th input UID list. listOpt := posting.ListOptions{Intersect: ul} result := pl.Uids(listOpt) n := len(result.Uids) // Check offsets[i]. if il.offset >= n { // We are going to skip the whole intersection. No need to do actual // sorting. Just update offsets[i]. il.offset -= n continue } // Sort results by value before applying offset. sortByValue(attr, result, scalar, ts.Desc) if il.offset > 0 { result.Uids = result.Uids[il.offset:n] il.offset = 0 n = len(result.Uids) } // n is number of elements to copy from result to out. if count > 0 { slack := count - len(il.ulist.Uids) if slack < n { n = slack } } // Copy from result to out. for j := 0; j < n; j++ { il.ulist.Uids = append(il.ulist.Uids, result.Uids[j]) } } // end for loop // Check out[i] sizes for all i. for i := 0; i < len(ts.UidMatrix); i++ { // Iterate over UID lists. if len(out[i].ulist.Uids) < count { return errContinue } x.AssertTrue(len(out[i].ulist.Uids) == count) } return errDone }
// processTask processes the query, accumulates and returns the result. func processTask(q *task.Query) (*task.Result, error) { attr := q.Attr useFunc := len(q.SrcFunc) != 0 var n int var tokens []string var geoQuery *geo.QueryData var err error var intersectDest bool var ineqValue types.Value var ineqValueToken string var isGeq, isLeq bool if useFunc { f := q.SrcFunc[0] isGeq = f == "geq" isLeq = f == "leq" switch { case isGeq: fallthrough case isLeq: if len(q.SrcFunc) != 2 { return nil, x.Errorf("Function requires 2 arguments, but got %d %v", len(q.SrcFunc), q.SrcFunc) } ineqValue, err = getValue(attr, q.SrcFunc[1]) if err != nil { return nil, err } // Tokenizing RHS value of inequality. ineqTokens, err := posting.IndexTokens(attr, ineqValue) if err != nil { return nil, err } if len(ineqTokens) != 1 { return nil, x.Errorf("Expected only 1 token but got: %v", ineqTokens) } ineqValueToken = ineqTokens[0] // Get tokens geq / leq ineqValueToken. tokens, err = getInequalityTokens(attr, ineqValueToken, isGeq) if err != nil { return nil, err } case geo.IsGeoFunc(q.SrcFunc[0]): // For geo functions, we get extra information used for filtering. tokens, geoQuery, err = geo.GetTokens(q.SrcFunc) if err != nil { return nil, err } default: tokens, err = getTokens(q.SrcFunc) if err != nil { return nil, err } intersectDest = (strings.ToLower(q.SrcFunc[0]) == "allof") } n = len(tokens) } else { n = len(q.Uids) } var out task.Result for i := 0; i < n; i++ { var key []byte if useFunc { key = x.IndexKey(attr, tokens[i]) } else { key = x.DataKey(attr, q.Uids[i]) } // Get or create the posting list for an entity, attribute combination. pl, decr := posting.GetOrCreate(key) defer decr() // If a posting list contains a value, we store that or else we store a nil // byte so that processing is consistent later. vbytes, vtype, err := pl.Value() newValue := &task.Value{ValType: uint32(vtype)} if err == nil { newValue.Val = vbytes } else { newValue.Val = x.Nilbyte } out.Values = append(out.Values, newValue) if q.DoCount { out.Counts = append(out.Counts, uint32(pl.Length(0))) // Add an empty UID list to make later processing consistent out.UidMatrix = append(out.UidMatrix, &emptyUIDList) continue } // The more usual case: Getting the UIDs. opts := posting.ListOptions{ AfterUID: uint64(q.AfterUid), } // If we have srcFunc and Uids, it means its a filter. So we intersect. if useFunc && len(q.Uids) > 0 { opts.Intersect = &task.List{Uids: q.Uids} } out.UidMatrix = append(out.UidMatrix, pl.Uids(opts)) } if (isGeq || isLeq) && len(tokens) > 0 && ineqValueToken == tokens[0] { // Need to evaluate inequality for entries in the first bucket. typ := schema.TypeOf(attr) if typ == nil || !typ.IsScalar() { return nil, x.Errorf("Attribute not scalar: %s %v", attr, typ) } scalarType := typ.(types.Scalar) x.AssertTrue(len(out.UidMatrix) > 0) // Filter the first row of UidMatrix. Since ineqValue != nil, we may // assume that ineqValue is equal to the first token found in TokensTable. algo.ApplyFilter(out.UidMatrix[0], func(uid uint64, i int) bool { key := x.DataKey(attr, uid) sv := getPostingValue(key, scalarType) if sv == nil { return false } if isGeq { return !scalarType.Less(*sv, ineqValue) } return !scalarType.Less(ineqValue, *sv) }) } // If geo filter, do value check for correctness. var values []*task.Value if geoQuery != nil { uids := algo.MergeSorted(out.UidMatrix) for _, uid := range uids.Uids { key := x.DataKey(attr, uid) pl, decr := posting.GetOrCreate(key) vbytes, vtype, err := pl.Value() newValue := &task.Value{ValType: uint32(vtype)} if err == nil { newValue.Val = vbytes } else { newValue.Val = x.Nilbyte } values = append(values, newValue) decr() // Decrement the reference count of the pl. } filtered := geo.FilterUids(uids, values, geoQuery) for i := 0; i < len(out.UidMatrix); i++ { out.UidMatrix[i] = algo.IntersectSorted([]*task.List{out.UidMatrix[i], filtered}) } } out.IntersectDest = intersectDest return &out, nil }
func main() { x.Init() fin, err := os.Open(filename) x.Check(err) defer fin.Close() scanner := bufio.NewScanner(fin) var numLines, numValues, numNames, numReleaseDates int graph = make(map[string]map[uint64][]uint64) gNames = make(map[uint64]string) gReleaseDates = make(map[uint64]string) for scanner.Scan() { numLines++ tokens := strings.Split(scanner.Text(), "\t") x.AssertTruef(len(tokens) == 4, scanner.Text()) src := tokens[0] x.AssertTrue(bracketed(src)) src = removeFirstLast(src) srcUID := farm.Fingerprint64([]byte(src)) pred := tokens[1] x.AssertTrue(bracketed(pred)) pred = removeFirstLast(pred) value := tokens[2] if bracketed(value) { // Normal edge. value = removeFirstLast(value) destUID := farm.Fingerprint64([]byte(value)) m, found := graph[pred] if !found { m = make(map[uint64][]uint64) graph[pred] = m } m[srcUID] = append(m[srcUID], destUID) } else { numValues++ // Check for "@". pos := strings.LastIndex(value, "@") if pos >= 0 { pred = pred + "." + value[pos+1:] value = removeFirstLast(value[:pos]) } if pred == "type.object.name.en" { numNames++ gNames[srcUID] = value } else if pred == "film.film.initial_release_date" { numReleaseDates++ gReleaseDates[srcUID] = value } } } fmt.Printf("Num lines read: %d\n", numLines) fmt.Printf("Num predicates: %d\n", len(graph)) fmt.Printf("Num values read: %d\n", numValues) fmt.Printf("Num names read: %d\n", numNames) fmt.Printf("Num release dates read: %d\n", numReleaseDates) x.AssertTrue(numLines > 0) x.AssertTrue(len(graph) > 0) x.AssertTrue(numValues > 0) x.AssertTrue(numNames > 0) x.AssertTrue(numReleaseDates > 0) // doFilterString() // doSortRelease() doGen() }
// ProcessGraph processes the SubGraph instance accumulating result for the query // from different instances. Note: taskQuery is nil for root node. func ProcessGraph(ctx context.Context, sg, parent *SubGraph, rch chan error) { var err error if len(sg.Attr) == 0 { // If we have a filter SubGraph which only contains an operator, // it won't have any attribute to work on. // This is to allow providing SrcUIDs to the filter children. sg.DestUIDs = sg.SrcUIDs } else if parent == nil && len(sg.SrcFunc) == 0 { // I am root. I don't have any function to execute, and my // result has been prepared for me already. sg.DestUIDs = algo.MergeSorted(sg.uidMatrix) // Could also be = sg.SrcUIDs } else { taskQuery := createTaskQuery(sg) result, err := worker.ProcessTaskOverNetwork(ctx, taskQuery) if err != nil { x.TraceError(ctx, x.Wrapf(err, "Error while processing task")) rch <- err return } sg.uidMatrix = result.UidMatrix sg.values = result.Values if len(sg.values) > 0 { v := sg.values[0] x.Trace(ctx, "Sample value for attr: %v Val: %v", sg.Attr, string(v.Val)) } sg.counts = result.Counts if sg.Params.DoCount && len(sg.Filters) == 0 { // If there is a filter, we need to do more work to get the actual count. x.Trace(ctx, "Zero uids. Only count requested") rch <- nil return } if result.IntersectDest { sg.DestUIDs = algo.IntersectSorted(result.UidMatrix) } else { sg.DestUIDs = algo.MergeSorted(result.UidMatrix) } } if len(sg.DestUIDs.Uids) == 0 { // Looks like we're done here. Be careful with nil srcUIDs! x.Trace(ctx, "Zero uids for %q. Num attr children: %v", sg.Attr, len(sg.Children)) rch <- nil return } // Apply filters if any. if len(sg.Filters) > 0 { // Run all filters in parallel. filterChan := make(chan error, len(sg.Filters)) for _, filter := range sg.Filters { filter.SrcUIDs = sg.DestUIDs go ProcessGraph(ctx, filter, sg, filterChan) } for _ = range sg.Filters { select { case err = <-filterChan: if err != nil { x.TraceError(ctx, x.Wrapf(err, "Error while processing filter task")) rch <- err return } case <-ctx.Done(): x.TraceError(ctx, x.Wrapf(ctx.Err(), "Context done before full execution")) rch <- ctx.Err() return } } // Now apply the results from filter. var lists []*task.List for _, filter := range sg.Filters { lists = append(lists, filter.DestUIDs) } if sg.FilterOp == "|" { sg.DestUIDs = algo.MergeSorted(lists) } else { sg.DestUIDs = algo.IntersectSorted(lists) } } if len(sg.Params.Order) == 0 { // There is no ordering. Just apply pagination and return. if err = sg.applyPagination(ctx); err != nil { rch <- err return } } else { // We need to sort first before pagination. if err = sg.applyOrderAndPagination(ctx); err != nil { rch <- err return } } // Here we consider handling _count_ with filtering. We do this after // pagination because otherwise, we need to do the count with pagination // taken into account. For example, a PL might have only 50 entries but the // user wants to skip 100 entries and return 10 entries. In this case, you // should return a count of 0, not 10. if sg.Params.DoCount { x.AssertTrue(len(sg.Filters) > 0) sg.counts = make([]uint32, len(sg.uidMatrix)) for i, ul := range sg.uidMatrix { // A possible optimization is to return the size of the intersection // without forming the intersection. algo.IntersectWith(ul, sg.DestUIDs) sg.counts[i] = uint32(len(ul.Uids)) } rch <- nil return } childChan := make(chan error, len(sg.Children)) for i := 0; i < len(sg.Children); i++ { child := sg.Children[i] child.SrcUIDs = sg.DestUIDs // Make the connection. go ProcessGraph(ctx, child, sg, childChan) } // Now get all the results back. for _ = range sg.Children { select { case err = <-childChan: if err != nil { x.TraceError(ctx, x.Wrapf(err, "Error while processing child task")) rch <- err return } case <-ctx.Done(): x.TraceError(ctx, x.Wrapf(ctx.Err(), "Context done before full execution")) rch <- ctx.Err() return } } rch <- nil }