Beispiel #1
0
func doGood() {
	// goodUIDs are UIDs with type.object.name containing "good".
	results := expand(goodUIDs)
	directorEdges := results["film.director.film"]
	x.AssertTrue(len(directorEdges) > 100)

	// Directors are UIDs which go to goodUIDs via "film.director.film".
	directorUIDs, _ := uniqueUIDs(directorEdges)

	results = expand(directorUIDs)
	filmEdges := results["film.film.directed_by"]
	x.AssertTrue(len(filmEdges) > 100)

	// Films are UIDs which go to directorUIDs via "film.film.directed_by".
	filmUIDs, _ := uniqueUIDs(filmEdges)

	results = expand(filmUIDs)
	directorEdges = results["film.director.film"]
	x.AssertTrue(len(directorEdges) > 100)

	// Directors are UIDs which go to filmUIDs via "film.director.film".
	directorUIDs, counts := uniqueUIDs(directorEdges)

	var maxCount int
	var maxDirector uint64
	for i, c := range counts {
		if c > maxCount {
			maxCount = c
			maxDirector = directorUIDs[i]
		}
	}
	fmt.Printf("maxDirector %d with count %d\n", maxDirector, maxCount)
}
Beispiel #2
0
// getPostingList tries to get posting list from l.pbuffer. If it is nil, then
// we query RocksDB. There is no need for lock acquisition here.
func (l *List) getPostingList(loop int) *types.PostingList {
	if loop >= 10 {
		x.Fatalf("This is over the 10th loop: %v", loop)
	}
	l.AssertRLock()
	// Wait for any previous commits to happen before retrieving posting list again.
	l.Wait()

	pb := atomic.LoadPointer(&l.pbuffer)
	plist := (*types.PostingList)(pb)

	if plist == nil {
		x.AssertTrue(l.pstore != nil)
		plist = new(types.PostingList)

		if slice, err := l.pstore.Get(l.key); err == nil && slice != nil {
			x.Checkf(plist.Unmarshal(slice.Data()), "Unable to Unmarshal PostingList from store")
			slice.Free()
		}
		if atomic.CompareAndSwapPointer(&l.pbuffer, pb, unsafe.Pointer(plist)) {
			return plist
		}
		// Someone else replaced the pointer in the meantime. Retry recursively.
		return l.getPostingList(loop + 1)
	}
	return plist
}
Beispiel #3
0
func parseDefaultConfig(l string) (uint32, error) {
	// If we have already seen a default config line, and n has a value then we
	// log.Fatal.
	if groupConfig.n != 0 {
		return 0, fmt.Errorf("Default config can only be defined once: %v", l)
	}
	l = strings.TrimSpace(l)
	conf := strings.Split(l, " ")
	// + in (fp % n + k) is optional.
	if !(len(conf) == 5 || len(conf) == 3) || conf[0] != "fp" || conf[1] != "%" {
		return 0, fmt.Errorf("Default config format should be like: %v", "default: fp % n + k")
	}

	var err error
	var n uint64
	n, err = strconv.ParseUint(conf[2], 10, 32)
	x.Check(err)
	groupConfig.n = uint32(n)
	x.AssertTrue(groupConfig.n != 0)
	if len(conf) == 5 {
		if conf[3] != "+" {
			return 0, fmt.Errorf("Default config format should be like: %v", "default: fp % n + k")
		}
		n, err = strconv.ParseUint(conf[4], 10, 32)
		groupConfig.k = uint32(n)
		x.Check(err)
	}
	return groupConfig.k, nil
}
Beispiel #4
0
func (p *poolsi) connect(addr string) {
	if addr == *myAddr {
		return
	}
	p.RLock()
	_, has := p.all[addr]
	p.RUnlock()
	if has {
		return
	}

	pool := newPool(addr, 5)
	query := new(Payload)
	query.Data = make([]byte, 10)
	x.Check2(rand.Read(query.Data))

	conn, err := pool.Get()
	x.Checkf(err, "Unable to connect")

	c := NewWorkerClient(conn)
	resp, err := c.Echo(context.Background(), query)
	x.Checkf(err, "Unable to Echo")
	x.AssertTrue(bytes.Equal(resp.Data, query.Data))
	x.Check(pool.Put(conn))
	fmt.Printf("Connection with %q successful.\n", addr)

	p.Lock()
	defer p.Unlock()
	_, has = p.all[addr]
	if has {
		return
	}
	p.all[addr] = pool
}
Beispiel #5
0
// newQuery creates a Query task and returns it.
func newQuery(attr string, uids []uint64, srcFunc []string) *task.Query {
	x.AssertTrue(uids == nil || srcFunc == nil)
	return &task.Query{
		Uids:    uids,
		SrcFunc: srcFunc,
		Attr:    attr,
	}
}
Beispiel #6
0
func (n *node) processMembership(e raftpb.Entry, mm *task.Membership) error {
	x.AssertTrue(n.gid == 0)

	x.Printf("group: %v Addr: %q leader: %v dead: %v\n",
		mm.GroupId, mm.Addr, mm.Leader, mm.AmDead)
	groups().applyMembershipUpdate(e.Index, mm)
	return nil
}
Beispiel #7
0
func parsePeer(peer string) (uint64, string) {
	x.AssertTrue(len(peer) > 0)

	kv := strings.SplitN(peer, ":", 2)
	x.AssertTruef(len(kv) == 2, "Invalid peer format: %v", peer)
	pid, err := strconv.ParseUint(kv[0], 10, 64)
	x.Checkf(err, "Invalid peer id: %v", kv[0])
	// TODO: Validate the url kv[1]
	return pid, kv[1]
}
Beispiel #8
0
// initIndex initializes the index with the given data store.
func initIndex() {
	x.AssertTrue(pstore != nil)

	// Initialize TokensTables.
	indexedFields := schema.IndexedFields()
	type resultStruct struct {
		attr  string
		table *TokensTable
	}
	results := make(chan resultStruct, len(indexedFields))

	for _, attr := range indexedFields {
		go func(attr string) {
			table := &TokensTable{
				key: make([]string, 0, 50),
			}
			pk := x.ParsedKey{
				Attr: attr,
			}
			prefix := pk.IndexPrefix()

			it := pstore.NewIterator()
			defer it.Close()
			for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() {
				pki := x.Parse(it.Key().Data())
				x.AssertTrue(pki.IsIndex())
				x.AssertTrue(len(pki.Term) > 0)
				table.push(pki.Term)
			}
			results <- resultStruct{attr, table}
		}(attr)
	}

	tables = make(map[string]*TokensTable)
	for i := 0; i < len(indexedFields); i++ {
		r := <-results
		tables[r.attr] = r.table
	}

}
Beispiel #9
0
// newSort creates a task.Sort for sorting.
func newSort(uids [][]uint64, offset, count int) *task.Sort {
	x.AssertTrue(uids != nil)
	uidMatrix := make([]*task.List, len(uids))
	for i, l := range uids {
		uidMatrix[i] = &task.List{Uids: l}
	}
	return &task.Sort{
		Attr:      "dob",
		Offset:    int32(offset),
		Count:     int32(count),
		UidMatrix: uidMatrix,
	}
}
Beispiel #10
0
// applyWindow applies windowing to sg.sorted.
func (sg *SubGraph) applyPagination(ctx context.Context) error {
	params := sg.Params
	if params.Count == 0 && params.Offset == 0 { // No pagination.
		return nil
	}
	x.AssertTrue(len(sg.SrcUIDs.Uids) == len(sg.uidMatrix))
	for _, l := range sg.uidMatrix {
		algo.IntersectWith(l, sg.DestUIDs)
		start, end := pageRange(&sg.Params, len(l.Uids))
		l.Uids = l.Uids[start:end]
	}
	// Re-merge the UID matrix.
	sg.DestUIDs = algo.MergeSorted(sg.uidMatrix)
	return nil
}
Beispiel #11
0
// stringHelper does simple DFS to convert FilterTree to string.
func (t *FilterTree) stringHelper(buf *bytes.Buffer) {
	x.AssertTrue(t != nil)
	if t.Func != nil && len(t.Func.Name) > 0 {
		// Leaf node.
		_, err := buf.WriteRune('(')
		x.Check(err)
		_, err = buf.WriteString(t.Func.Name)
		x.Check(err)

		if len(t.Func.Attr) > 0 {
			args := make([]string, len(t.Func.Args)+1)
			args[0] = t.Func.Attr
			copy(args[1:], t.Func.Args)

			for _, arg := range args {
				_, err = buf.WriteString(" \"")
				x.Check(err)
				_, err = buf.WriteString(arg)
				x.Check(err)
				_, err := buf.WriteRune('"')
				x.Check(err)
			}
		}
		_, err = buf.WriteRune(')')
		x.Check(err)
		return
	}
	// Non-leaf node.
	_, err := buf.WriteRune('(')
	x.Check(err)
	switch t.Op {
	case "&":
		_, err = buf.WriteString("AND")
	case "|":
		_, err = buf.WriteString("OR")
	default:
		err = x.Errorf("Unknown operator: %q", t.Op)
	}
	x.Check(err)

	for _, c := range t.Child {
		_, err = buf.WriteRune(' ')
		x.Check(err)
		c.stringHelper(buf)
	}
	_, err = buf.WriteRune(')')
	x.Check(err)
}
Beispiel #12
0
func main() {
	x.Init()

	s = &status{
		start: time.Now(),
	}
	filesList := strings.Split(*files, ",")
	x.AssertTrue(len(filesList) > 0)
	for _, file := range filesList {
		processFile(file)
	}
	fmt.Printf("Number of mutations run   : %d\n", s.mutations)
	fmt.Printf("Number of RDFs processed  : %d\n", s.rdfs)
	fmt.Printf("Time spent                : %v\n", time.Since(s.start))
	fmt.Printf("RDFs processed per second : %d\n", s.rdfs/uint64(time.Since(s.start).Seconds()))
}
Beispiel #13
0
func expand(uids []uint64, pred string) []uint64 {
	x.AssertTrue(graph != nil)
	var out []uint64
	for p, m := range graph {
		if pred != p {
			continue
		}
		for _, u := range uids {
			dst := m[u]
			if dst == nil {
				continue
			}
			out = append(out, dst...)
		}
	}
	return out
}
Beispiel #14
0
func expand(uids []uint64) map[string]map[uint64][]uint64 {
	x.AssertTrue(invGraph != nil)
	out := make(map[string]map[uint64][]uint64)
	for pred, m := range invGraph {
		outM := make(map[uint64][]uint64)
		for _, u := range uids { // srcUID.
			z := m[u]
			if z == nil {
				continue
			}
			outM[u] = z
		}
		if len(outM) > 0 {
			out[pred] = outM
		}
	}
	return out
}
Beispiel #15
0
// applyOrderAndPagination orders each posting list by a given attribute
// before applying pagination.
func (sg *SubGraph) applyOrderAndPagination(ctx context.Context) error {
	if len(sg.Params.Order) == 0 {
		return nil
	}
	if sg.Params.Count == 0 {
		// Only retrieve up to 1000 results by default.
		sg.Params.Count = 1000
	}

	sort := &task.Sort{
		Attr:      sg.Params.Order,
		UidMatrix: sg.uidMatrix,
		Offset:    int32(sg.Params.Offset),
		Count:     int32(sg.Params.Count),
		Desc:      sg.Params.OrderDesc,
	}
	result, err := worker.SortOverNetwork(ctx, sort)
	if err != nil {
		return err
	}

	x.AssertTrue(len(result.UidMatrix) == len(sg.uidMatrix))
	sg.uidMatrix = result.GetUidMatrix()

	// Update sg.destUID. Iterate over the UID matrix (which is not sorted by
	// UID). For each element in UID matrix, we do a binary search in the
	// current destUID and mark it. Then we scan over this bool array and
	// rebuild destUIDs.
	included := make([]bool, len(sg.DestUIDs.Uids))
	for _, ul := range sg.uidMatrix {
		for _, uid := range ul.Uids {
			idx := algo.IndexOf(sg.DestUIDs, uid) // Binary search.
			if idx >= 0 {
				included[idx] = true
			}
		}
	}
	algo.ApplyFilter(sg.DestUIDs,
		func(uid uint64, idx int) bool { return included[idx] })
	return nil
}
Beispiel #16
0
// addIndexMutations adds mutation(s) for a single term, to maintain index.
func addIndexMutations(ctx context.Context, attr string, uid uint64,
	p types.Value, del bool) {
	x.AssertTrue(uid != 0)
	tokens, err := IndexTokens(attr, p)
	if err != nil {
		// This data is not indexable
		return
	}
	edge := &task.DirectedEdge{
		ValueId: uid,
		Attr:    attr,
		Label:   "idx",
	}

	tokensTable := GetTokensTable(attr)
	x.AssertTruef(tokensTable != nil, "TokensTable missing for attr %s", attr)

	for _, token := range tokens {
		addIndexMutation(ctx, attr, token, tokensTable, edge, del)
	}
}
Beispiel #17
0
func batchCommit() {
	var sz int
	var waits []*x.SafeWait
	var loop uint64

	b := pstore.NewWriteBatch()
	defer b.Destroy()

	for {
		select {
		case e := <-commitCh:
			b.Put(e.key, e.val)
			sz++
			waits = append(waits, e.sw)

		default:
			// default is executed if no other case is ready.
			start := time.Now()
			if sz > 0 {
				x.AssertTrue(b != nil)
				loop++
				fmt.Printf("[%4d] Writing batch of size: %v\n", loop, sz)
				x.Checkf(pstore.WriteBatch(b), "Error while writing to RocksDB.")
				for _, w := range waits {
					w.Done()
				}
				b.Clear()
				sz = 0
				waits = waits[:0]
			}
			// Add a sleep clause to avoid a busy wait loop if there's no input to commitCh.
			sleepFor := 10*time.Millisecond - time.Since(start)
			if sleepFor > time.Millisecond {
				time.Sleep(sleepFor)
			}
		}
	}
}
Beispiel #18
0
// NewTokenizer creates a new Tokenizer object from a given input string of bytes.
func NewTokenizer(s []byte) (*Tokenizer, error) {
	x.AssertTrue(s != nil)

	if disableICU {
		// ICU is disabled. Return a dummy tokenizer.
		return &Tokenizer{}, nil
	}

	sNorm, terr := normalize(s)
	if terr != nil {
		return nil, terr
	}
	sNorm = append(sNorm, 0) // Null-terminate this for ICU's C functions.

	var err C.UErrorCode
	c := C.NewTokenizer(byteToChar(sNorm), C.int(len(s)), maxTokenSize, &err)
	if int(err) > 0 {
		return nil, x.Errorf("ICU new tokenizer error %d", int(err))
	}
	if c == nil {
		return nil, x.Errorf("NewTokenizer returns nil")
	}
	return &Tokenizer{c, C.TokenizerToken(c)}, nil
}
Beispiel #19
0
func (l *List) updateMutationLayer(mpost *types.Posting) bool {
	l.AssertLock()
	x.AssertTrue(mpost.Op == Set || mpost.Op == Del)

	// First check the mutable layer.
	midx := sort.Search(len(l.mlayer), func(idx int) bool {
		mp := l.mlayer[idx]
		return mpost.Uid <= mp.Uid
	})

	// This block handles the case where mpost.UID is found in mutation layer.
	if midx < len(l.mlayer) && l.mlayer[midx].Uid == mpost.Uid {
		// mp is the posting found in mlayer.
		oldPost := l.mlayer[midx]

		// Note that mpost.Op is either Set or Del, whereas oldPost.Op can be
		// either Set or Del or Add.
		msame := samePosting(oldPost, mpost)
		if msame && ((mpost.Op == Del) == (oldPost.Op == Del)) {
			// This posting has similar content as what is found in mlayer. If the
			// ops are similar, then we do nothing. Note that Add and Set are
			// considered similar, and the second clause is true also when
			// mpost.Op==Add and oldPost.Op==Set.
			return false
		}

		if !msame && mpost.Op == Del {
			// Invalid Del as contents do not match.
			return false
		}

		// Here are the remaining cases.
		// Del, Set: Replace with new post.
		// Del, Del: Replace with new post.
		// Set, Del: Replace with new post.
		// Set, Set: Replace with new post.
		// Add, Del: Undo by removing oldPost.
		// Add, Set: Replace with new post. Need to set mpost.Op to Add.
		if oldPost.Op == Add {
			if mpost.Op == Del {
				// Undo old post.
				copy(l.mlayer[midx:], l.mlayer[midx+1:])
				l.mlayer[len(l.mlayer)-1] = nil
				l.mlayer = l.mlayer[:len(l.mlayer)-1]
				return true
			}
			// Add followed by Set is considered an Add. Hence, mutate mpost.Op.
			mpost.Op = Add
		}
		l.mlayer[midx] = mpost
		return true
	}

	// Didn't find it in mutable layer. Now check the immutable layer.
	pl := l.getPostingList(0)
	pidx := sort.Search(len(pl.Postings), func(idx int) bool {
		p := pl.Postings[idx]
		return mpost.Uid <= p.Uid
	})

	var uidFound, psame bool
	if pidx < len(pl.Postings) {
		p := pl.Postings[pidx]
		uidFound = mpost.Uid == p.Uid
		if uidFound {
			psame = samePosting(p, mpost)
		}
	}

	if mpost.Op == Set {
		if psame {
			return false
		}
		if !uidFound {
			// Posting not found in PL. This is considered an Add operation.
			mpost.Op = Add
		}
	} else if !psame { // mpost.Op==Del
		// Either we fail to find UID in immutable PL or contents don't match.
		return false
	}

	// Doesn't match what we already have in immutable layer. So, add to mutable layer.
	if midx >= len(l.mlayer) {
		// Add it at the end.
		l.mlayer = append(l.mlayer, mpost)
		return true
	}

	// Otherwise, add it where midx is pointing to.
	l.mlayer = append(l.mlayer, nil)
	copy(l.mlayer[midx+1:], l.mlayer[midx:])
	l.mlayer[midx] = mpost
	return true
}
Beispiel #20
0
func main() {
	x.Init()

	fin, err := os.Open(filename)
	x.Check(err)
	defer fin.Close()

	scanner := bufio.NewScanner(fin)
	var numLines, numValues, numNames, numReleaseDates int
	invGraph = make(map[string]map[uint64][]uint64)

	for scanner.Scan() {
		numLines++
		tokens := strings.Split(scanner.Text(), "\t")
		x.AssertTruef(len(tokens) == 4, scanner.Text())

		src := tokens[0]
		x.AssertTrue(bracketed(src))
		src = removeFirstLast(src)
		srcUID := farm.Fingerprint64([]byte(src))

		pred := tokens[1]
		x.AssertTrue(bracketed(pred))
		pred = removeFirstLast(pred)

		value := tokens[2]

		if bracketed(value) {
			// Normal edge.
			value = removeFirstLast(value)
			destUID := farm.Fingerprint64([]byte(value))
			m, found := invGraph[pred]
			if !found {
				m = make(map[uint64][]uint64)
				invGraph[pred] = m
			}
			// We are building an inverse map!
			m[destUID] = append(m[destUID], srcUID)
		} else {
			// A value.
			numValues++
			value = removeFirstLast(value)
			if pred == "type.object.name" {
				numNames++

				// Do some custom processing here.
				value = strings.ToLower(value)
				vTokens := strings.Split(value, " ")
				var found bool
				for _, t := range vTokens {
					if t == "the" {
						found = true
						break
					}
				}
				if found {
					goodUIDs = append(goodUIDs, srcUID)
				}

			} else if pred == "film.film.initial_release_date" {
				numReleaseDates++
			}
		}
	}

	fmt.Printf("Num lines read: %d\n", numLines)
	fmt.Printf("Num predicates: %d\n", len(invGraph))
	fmt.Printf("Num values read: %d\n", numValues)
	fmt.Printf("Num names read: %d\n", numNames)
	fmt.Printf("Num release dates read: %d\n", numReleaseDates)
	fmt.Printf("Num good UIDs: %d\n", len(goodUIDs))

	x.AssertTrue(numLines > 0)
	x.AssertTrue(len(invGraph) > 0)
	x.AssertTrue(numValues > 0)
	x.AssertTrue(numNames > 0)
	x.AssertTrue(numReleaseDates > 0)
	x.AssertTrue(len(goodUIDs) > 0)

	doGood()
}
Beispiel #21
0
// Backup creates a backup of data by exporting it as an RDF gzip.
func backup(gid uint32, bdir string) error {
	// Use a goroutine to write to file.
	err := os.MkdirAll(bdir, 0700)
	if err != nil {
		return err
	}
	fpath := path.Join(bdir, fmt.Sprintf("dgraph-%d-%s.rdf.gz", gid,
		time.Now().Format("2006-01-02-15-04")))
	fmt.Printf("Backing up at: %v\n", fpath)
	chb := make(chan []byte, 1000)
	errChan := make(chan error, 1)
	go func() {
		errChan <- writeToFile(fpath, chb)
	}()

	// Use a bunch of goroutines to convert to RDF format.
	chkv := make(chan kv, 1000)
	var wg sync.WaitGroup
	wg.Add(numBackupRoutines)
	for i := 0; i < numBackupRoutines; i++ {
		go func() {
			buf := new(bytes.Buffer)
			buf.Grow(50000)
			for item := range chkv {
				toRDF(buf, item)
				if buf.Len() >= 40000 {
					tmp := make([]byte, buf.Len())
					copy(tmp, buf.Bytes())
					chb <- tmp
					buf.Reset()
				}
			}
			if buf.Len() > 0 {
				tmp := make([]byte, buf.Len())
				copy(tmp, buf.Bytes())
				chb <- tmp
			}
			wg.Done()
		}()
	}

	// Iterate over rocksdb.
	it := pstore.NewIterator()
	defer it.Close()
	var lastPred string
	for it.SeekToFirst(); it.Valid(); {
		key := it.Key().Data()
		pk := x.Parse(key)

		if pk.IsIndex() {
			// Seek to the end of index keys.
			it.Seek(pk.SkipRangeOfSameType())
			continue
		}
		if pk.Attr == "_uid_" {
			// Skip the UID mappings.
			it.Seek(pk.SkipPredicate())
			continue
		}

		x.AssertTrue(pk.IsData())
		pred, uid := pk.Attr, pk.Uid
		if pred != lastPred && group.BelongsTo(pred) != gid {
			it.Seek(pk.SkipPredicate())
			continue
		}

		prefix := fmt.Sprintf("<_uid_:%#x> <%s> ", uid, pred)
		pl := &types.PostingList{}
		x.Check(pl.Unmarshal(it.Value().Data()))
		chkv <- kv{
			prefix: prefix,
			list:   pl,
		}
		lastPred = pred
		it.Next()
	}

	close(chkv) // We have stopped output to chkv.
	wg.Wait()   // Wait for numBackupRoutines to finish.
	close(chb)  // We have stopped output to chb.

	err = <-errChan
	return err
}
Beispiel #22
0
func intersectBucket(ts *task.Sort, attr, token string, out []intersectedList) error {
	count := int(ts.Count)
	sType := schema.TypeOf(attr)
	if !sType.IsScalar() {
		return x.Errorf("Cannot sort attribute %s of type object.", attr)
	}
	scalar := sType.(types.Scalar)

	key := x.IndexKey(attr, token)
	pl, decr := posting.GetOrCreate(key)
	defer decr()

	for i, ul := range ts.UidMatrix {
		il := &out[i]
		if count > 0 && len(il.ulist.Uids) >= count {
			continue
		}

		// Intersect index with i-th input UID list.
		listOpt := posting.ListOptions{Intersect: ul}
		result := pl.Uids(listOpt)
		n := len(result.Uids)

		// Check offsets[i].
		if il.offset >= n {
			// We are going to skip the whole intersection. No need to do actual
			// sorting. Just update offsets[i].
			il.offset -= n
			continue
		}

		// Sort results by value before applying offset.
		sortByValue(attr, result, scalar, ts.Desc)

		if il.offset > 0 {
			result.Uids = result.Uids[il.offset:n]
			il.offset = 0
			n = len(result.Uids)
		}

		// n is number of elements to copy from result to out.
		if count > 0 {
			slack := count - len(il.ulist.Uids)
			if slack < n {
				n = slack
			}
		}

		// Copy from result to out.
		for j := 0; j < n; j++ {
			il.ulist.Uids = append(il.ulist.Uids, result.Uids[j])
		}
	} // end for loop

	// Check out[i] sizes for all i.
	for i := 0; i < len(ts.UidMatrix); i++ { // Iterate over UID lists.
		if len(out[i].ulist.Uids) < count {
			return errContinue
		}
		x.AssertTrue(len(out[i].ulist.Uids) == count)
	}
	return errDone
}
Beispiel #23
0
// processTask processes the query, accumulates and returns the result.
func processTask(q *task.Query) (*task.Result, error) {
	attr := q.Attr

	useFunc := len(q.SrcFunc) != 0
	var n int
	var tokens []string
	var geoQuery *geo.QueryData
	var err error
	var intersectDest bool
	var ineqValue types.Value
	var ineqValueToken string
	var isGeq, isLeq bool

	if useFunc {
		f := q.SrcFunc[0]
		isGeq = f == "geq"
		isLeq = f == "leq"
		switch {
		case isGeq:
			fallthrough
		case isLeq:
			if len(q.SrcFunc) != 2 {
				return nil, x.Errorf("Function requires 2 arguments, but got %d %v",
					len(q.SrcFunc), q.SrcFunc)
			}
			ineqValue, err = getValue(attr, q.SrcFunc[1])
			if err != nil {
				return nil, err
			}
			// Tokenizing RHS value of inequality.
			ineqTokens, err := posting.IndexTokens(attr, ineqValue)
			if err != nil {
				return nil, err
			}
			if len(ineqTokens) != 1 {
				return nil, x.Errorf("Expected only 1 token but got: %v", ineqTokens)
			}
			ineqValueToken = ineqTokens[0]
			// Get tokens geq / leq ineqValueToken.
			tokens, err = getInequalityTokens(attr, ineqValueToken, isGeq)
			if err != nil {
				return nil, err
			}

		case geo.IsGeoFunc(q.SrcFunc[0]):
			// For geo functions, we get extra information used for filtering.
			tokens, geoQuery, err = geo.GetTokens(q.SrcFunc)
			if err != nil {
				return nil, err
			}

		default:
			tokens, err = getTokens(q.SrcFunc)
			if err != nil {
				return nil, err
			}
			intersectDest = (strings.ToLower(q.SrcFunc[0]) == "allof")
		}
		n = len(tokens)
	} else {
		n = len(q.Uids)
	}

	var out task.Result
	for i := 0; i < n; i++ {
		var key []byte
		if useFunc {
			key = x.IndexKey(attr, tokens[i])
		} else {
			key = x.DataKey(attr, q.Uids[i])
		}
		// Get or create the posting list for an entity, attribute combination.
		pl, decr := posting.GetOrCreate(key)
		defer decr()

		// If a posting list contains a value, we store that or else we store a nil
		// byte so that processing is consistent later.
		vbytes, vtype, err := pl.Value()

		newValue := &task.Value{ValType: uint32(vtype)}
		if err == nil {
			newValue.Val = vbytes
		} else {
			newValue.Val = x.Nilbyte
		}
		out.Values = append(out.Values, newValue)

		if q.DoCount {
			out.Counts = append(out.Counts, uint32(pl.Length(0)))
			// Add an empty UID list to make later processing consistent
			out.UidMatrix = append(out.UidMatrix, &emptyUIDList)
			continue
		}

		// The more usual case: Getting the UIDs.
		opts := posting.ListOptions{
			AfterUID: uint64(q.AfterUid),
		}
		// If we have srcFunc and Uids, it means its a filter. So we intersect.
		if useFunc && len(q.Uids) > 0 {
			opts.Intersect = &task.List{Uids: q.Uids}
		}
		out.UidMatrix = append(out.UidMatrix, pl.Uids(opts))
	}

	if (isGeq || isLeq) && len(tokens) > 0 && ineqValueToken == tokens[0] {
		// Need to evaluate inequality for entries in the first bucket.
		typ := schema.TypeOf(attr)
		if typ == nil || !typ.IsScalar() {
			return nil, x.Errorf("Attribute not scalar: %s %v", attr, typ)
		}
		scalarType := typ.(types.Scalar)

		x.AssertTrue(len(out.UidMatrix) > 0)
		// Filter the first row of UidMatrix. Since ineqValue != nil, we may
		// assume that ineqValue is equal to the first token found in TokensTable.
		algo.ApplyFilter(out.UidMatrix[0], func(uid uint64, i int) bool {
			key := x.DataKey(attr, uid)
			sv := getPostingValue(key, scalarType)
			if sv == nil {
				return false
			}
			if isGeq {
				return !scalarType.Less(*sv, ineqValue)
			}
			return !scalarType.Less(ineqValue, *sv)
		})
	}

	// If geo filter, do value check for correctness.
	var values []*task.Value
	if geoQuery != nil {
		uids := algo.MergeSorted(out.UidMatrix)
		for _, uid := range uids.Uids {
			key := x.DataKey(attr, uid)
			pl, decr := posting.GetOrCreate(key)

			vbytes, vtype, err := pl.Value()
			newValue := &task.Value{ValType: uint32(vtype)}
			if err == nil {
				newValue.Val = vbytes
			} else {
				newValue.Val = x.Nilbyte
			}
			values = append(values, newValue)
			decr() // Decrement the reference count of the pl.
		}

		filtered := geo.FilterUids(uids, values, geoQuery)
		for i := 0; i < len(out.UidMatrix); i++ {
			out.UidMatrix[i] = algo.IntersectSorted([]*task.List{out.UidMatrix[i], filtered})
		}
	}
	out.IntersectDest = intersectDest
	return &out, nil
}
Beispiel #24
0
func main() {
	x.Init()

	fin, err := os.Open(filename)
	x.Check(err)
	defer fin.Close()

	scanner := bufio.NewScanner(fin)
	var numLines, numValues, numNames, numReleaseDates int
	graph = make(map[string]map[uint64][]uint64)
	gNames = make(map[uint64]string)
	gReleaseDates = make(map[uint64]string)

	for scanner.Scan() {
		numLines++
		tokens := strings.Split(scanner.Text(), "\t")
		x.AssertTruef(len(tokens) == 4, scanner.Text())

		src := tokens[0]
		x.AssertTrue(bracketed(src))
		src = removeFirstLast(src)
		srcUID := farm.Fingerprint64([]byte(src))

		pred := tokens[1]
		x.AssertTrue(bracketed(pred))
		pred = removeFirstLast(pred)

		value := tokens[2]

		if bracketed(value) {
			// Normal edge.
			value = removeFirstLast(value)
			destUID := farm.Fingerprint64([]byte(value))
			m, found := graph[pred]
			if !found {
				m = make(map[uint64][]uint64)
				graph[pred] = m
			}
			m[srcUID] = append(m[srcUID], destUID)
		} else {
			numValues++

			// Check for "@".
			pos := strings.LastIndex(value, "@")
			if pos >= 0 {
				pred = pred + "." + value[pos+1:]
				value = removeFirstLast(value[:pos])
			}

			if pred == "type.object.name.en" {
				numNames++
				gNames[srcUID] = value
			} else if pred == "film.film.initial_release_date" {
				numReleaseDates++
				gReleaseDates[srcUID] = value
			}
		}
	}

	fmt.Printf("Num lines read: %d\n", numLines)
	fmt.Printf("Num predicates: %d\n", len(graph))
	fmt.Printf("Num values read: %d\n", numValues)
	fmt.Printf("Num names read: %d\n", numNames)
	fmt.Printf("Num release dates read: %d\n", numReleaseDates)

	x.AssertTrue(numLines > 0)
	x.AssertTrue(len(graph) > 0)
	x.AssertTrue(numValues > 0)
	x.AssertTrue(numNames > 0)
	x.AssertTrue(numReleaseDates > 0)

	//	doFilterString()
	//	doSortRelease()
	doGen()
}
Beispiel #25
0
// ProcessGraph processes the SubGraph instance accumulating result for the query
// from different instances. Note: taskQuery is nil for root node.
func ProcessGraph(ctx context.Context, sg, parent *SubGraph, rch chan error) {
	var err error

	if len(sg.Attr) == 0 {
		// If we have a filter SubGraph which only contains an operator,
		// it won't have any attribute to work on.
		// This is to allow providing SrcUIDs to the filter children.
		sg.DestUIDs = sg.SrcUIDs

	} else if parent == nil && len(sg.SrcFunc) == 0 {
		// I am root. I don't have any function to execute, and my
		// result has been prepared for me already.
		sg.DestUIDs = algo.MergeSorted(sg.uidMatrix) // Could also be = sg.SrcUIDs

	} else {
		taskQuery := createTaskQuery(sg)
		result, err := worker.ProcessTaskOverNetwork(ctx, taskQuery)
		if err != nil {
			x.TraceError(ctx, x.Wrapf(err, "Error while processing task"))
			rch <- err
			return
		}

		sg.uidMatrix = result.UidMatrix
		sg.values = result.Values
		if len(sg.values) > 0 {
			v := sg.values[0]
			x.Trace(ctx, "Sample value for attr: %v Val: %v", sg.Attr, string(v.Val))
		}
		sg.counts = result.Counts

		if sg.Params.DoCount && len(sg.Filters) == 0 {
			// If there is a filter, we need to do more work to get the actual count.
			x.Trace(ctx, "Zero uids. Only count requested")
			rch <- nil
			return
		}

		if result.IntersectDest {
			sg.DestUIDs = algo.IntersectSorted(result.UidMatrix)
		} else {
			sg.DestUIDs = algo.MergeSorted(result.UidMatrix)
		}
	}

	if len(sg.DestUIDs.Uids) == 0 {
		// Looks like we're done here. Be careful with nil srcUIDs!
		x.Trace(ctx, "Zero uids for %q. Num attr children: %v", sg.Attr, len(sg.Children))
		rch <- nil
		return
	}

	// Apply filters if any.
	if len(sg.Filters) > 0 {
		// Run all filters in parallel.
		filterChan := make(chan error, len(sg.Filters))
		for _, filter := range sg.Filters {
			filter.SrcUIDs = sg.DestUIDs
			go ProcessGraph(ctx, filter, sg, filterChan)
		}

		for _ = range sg.Filters {
			select {
			case err = <-filterChan:
				if err != nil {
					x.TraceError(ctx, x.Wrapf(err, "Error while processing filter task"))
					rch <- err
					return
				}

			case <-ctx.Done():
				x.TraceError(ctx, x.Wrapf(ctx.Err(), "Context done before full execution"))
				rch <- ctx.Err()
				return
			}
		}

		// Now apply the results from filter.
		var lists []*task.List
		for _, filter := range sg.Filters {
			lists = append(lists, filter.DestUIDs)
		}
		if sg.FilterOp == "|" {
			sg.DestUIDs = algo.MergeSorted(lists)
		} else {
			sg.DestUIDs = algo.IntersectSorted(lists)
		}
	}

	if len(sg.Params.Order) == 0 {
		// There is no ordering. Just apply pagination and return.
		if err = sg.applyPagination(ctx); err != nil {
			rch <- err
			return
		}
	} else {
		// We need to sort first before pagination.
		if err = sg.applyOrderAndPagination(ctx); err != nil {
			rch <- err
			return
		}
	}

	// Here we consider handling _count_ with filtering. We do this after
	// pagination because otherwise, we need to do the count with pagination
	// taken into account. For example, a PL might have only 50 entries but the
	// user wants to skip 100 entries and return 10 entries. In this case, you
	// should return a count of 0, not 10.
	if sg.Params.DoCount {
		x.AssertTrue(len(sg.Filters) > 0)
		sg.counts = make([]uint32, len(sg.uidMatrix))
		for i, ul := range sg.uidMatrix {
			// A possible optimization is to return the size of the intersection
			// without forming the intersection.
			algo.IntersectWith(ul, sg.DestUIDs)
			sg.counts[i] = uint32(len(ul.Uids))
		}
		rch <- nil
		return
	}

	childChan := make(chan error, len(sg.Children))
	for i := 0; i < len(sg.Children); i++ {
		child := sg.Children[i]
		child.SrcUIDs = sg.DestUIDs // Make the connection.
		go ProcessGraph(ctx, child, sg, childChan)
	}

	// Now get all the results back.
	for _ = range sg.Children {
		select {
		case err = <-childChan:
			if err != nil {
				x.TraceError(ctx, x.Wrapf(err, "Error while processing child task"))
				rch <- err
				return
			}
		case <-ctx.Done():
			x.TraceError(ctx, x.Wrapf(ctx.Err(), "Context done before full execution"))
			rch <- ctx.Err()
			return
		}
	}
	rch <- nil
}