Ejemplo n.º 1
0
func TestGenerateGroup(t *testing.T) {
	dir, err := ioutil.TempDir("", "store3")
	if err != nil {
		t.Fatal(err)
	}
	defer os.RemoveAll(dir)

	r := bytes.NewReader([]byte("default: fp % 3"))
	require.NoError(t, group.ParseConfig(r), "Unable to parse config.")

	ps, err := store.NewStore(dir)
	if err != nil {
		t.Fatal(err)
	}
	defer ps.Close()
	posting.Init(ps)
	Init(ps)

	require.Equal(t, uint32(0), group.BelongsTo("pred0"))
	writePLs(t, "pred0", 33, 1, ps)

	require.Equal(t, uint32(1), group.BelongsTo("p1"))
	writePLs(t, "p1", 34, 1, ps)

	require.Equal(t, uint32(2), group.BelongsTo("pr2"))
	writePLs(t, "pr2", 35, 1, ps)
	time.Sleep(time.Second)

	g, err := generateGroup(0)
	if err != nil {
		t.Error(err)
	}
	require.Equal(t, 33, len(g.Keys))
	for i, k := range g.Keys {
		require.Equal(t, x.DataKey("pred0", uint64(i)), k.Key)
	}

	g, err = generateGroup(1)
	if err != nil {
		t.Error(err)
	}
	require.Equal(t, 34, len(g.Keys))
	for i, k := range g.Keys {
		require.Equal(t, x.DataKey("p1", uint64(i)), k.Key)
	}

	g, err = generateGroup(2)
	if err != nil {
		t.Error(err)
	}
	require.Equal(t, 35, len(g.Keys))
	for i, k := range g.Keys {
		require.Equal(t, x.DataKey("pr2", uint64(i)), k.Key)
	}
}
Ejemplo n.º 2
0
// handleNQuads converts the nQuads that satisfy the modulo
// rule into posting lists.
func (s *state) handleNQuads(wg *sync.WaitGroup) {
	defer wg.Done()
	// Check if we need to mark used UIDs.
	markUids := s.groupsMap[group.BelongsTo("_uid_")]
	ctx := context.Background()
	for nq := range s.cnq {
		if s.Error() != nil {
			return
		}
		// Only handle this edge if the attribute satisfies the modulo rule
		if !s.groupsMap[group.BelongsTo(nq.Predicate)] {
			atomic.AddUint64(&s.ctr.ignored, 1)
			continue
		}

		edge, err := nq.ToEdge()
		for err != nil {
			// Just put in a retry loop to tackle temporary errors.
			if err == posting.ErrRetry {
				time.Sleep(time.Microsecond)

			} else {
				s.SetError(err)
				glog.WithError(err).WithField("nq", nq).
					Error("While converting to edge")
				return
			}
			edge, err = nq.ToEdge()
		}

		key := x.DataKey(edge.Attr, edge.Entity)

		plist, decr := posting.GetOrCreate(key)
		plist.AddMutationWithIndex(ctx, edge, posting.Set)
		decr() // Don't defer, just call because we're in a channel loop.

		// Mark UIDs and XIDs as taken
		if markUids {
			// Mark entity UID.
			markTaken(ctx, edge.Entity)
			// Mark the Value UID.
			if edge.ValueId != 0 {
				markTaken(ctx, edge.ValueId)
			}
		}
		atomic.AddUint64(&s.ctr.processed, 1)
	}
}
Ejemplo n.º 3
0
// Sort is used to sort given UID matrix.
func (w *grpcWorker) Sort(ctx context.Context, s *task.Sort) (*task.SortResult, error) {
	if ctx.Err() != nil {
		return &emptySortResult, ctx.Err()
	}

	gid := group.BelongsTo(s.Attr)
	//x.Trace(ctx, "Attribute: %q NumUids: %v groupId: %v Sort", q.Attr(), q.UidsLength(), gid)

	var reply *task.SortResult
	x.AssertTruef(groups().ServesGroup(gid),
		"attr: %q groupId: %v Request sent to wrong server.", s.Attr, gid)

	c := make(chan error, 1)
	go func() {
		var err error
		reply, err = processSort(s)
		c <- err
	}()

	select {
	case <-ctx.Done():
		return &emptySortResult, ctx.Err()
	case err := <-c:
		return reply, err
	}
}
Ejemplo n.º 4
0
// ProcessTaskOverNetwork is used to process the query and get the result from
// the instance which stores posting list corresponding to the predicate in the
// query.
func ProcessTaskOverNetwork(ctx context.Context, q *task.Query) (*task.Result, error) {
	attr := q.Attr
	gid := group.BelongsTo(attr)
	x.Trace(ctx, "attr: %v groupId: %v", attr, gid)

	if groups().ServesGroup(gid) {
		// No need for a network call, as this should be run from within this instance.
		return processTask(q)
	}

	// Send this over the network.
	// TODO: Send the request to multiple servers as described in Jeff Dean's talk.
	addr := groups().AnyServer(gid)
	pl := pools().get(addr)

	conn, err := pl.Get()
	if err != nil {
		return &emptyResult, x.Wrapf(err, "ProcessTaskOverNetwork: while retrieving connection.")
	}
	defer pl.Put(conn)
	x.Trace(ctx, "Sending request to %v", addr)

	c := NewWorkerClient(conn)
	reply, err := c.ServeTask(ctx, q)
	if err != nil {
		x.TraceError(ctx, x.Wrapf(err, "Error while calling Worker.ServeTask"))
		return &emptyResult, err
	}

	x.Trace(ctx, "Reply from server. length: %v Addr: %v Attr: %v",
		len(reply.UidMatrix), addr, attr)
	return reply, nil
}
Ejemplo n.º 5
0
// ServeTask is used to respond to a query.
func (w *grpcWorker) ServeTask(ctx context.Context, q *task.Query) (*task.Result, error) {
	if ctx.Err() != nil {
		return &emptyResult, ctx.Err()
	}

	gid := group.BelongsTo(q.Attr)
	x.Trace(ctx, "Attribute: %q NumUids: %v groupId: %v ServeTask", q.Attr, len(q.Uids), gid)

	var reply *task.Result
	x.AssertTruef(groups().ServesGroup(gid),
		"attr: %q groupId: %v Request sent to wrong server.", q.Attr, gid)

	c := make(chan error, 1)
	go func() {
		var err error
		reply, err = processTask(q)
		c <- err
	}()

	select {
	case <-ctx.Done():
		return reply, ctx.Err()
	case err := <-c:
		return reply, err
	}
}
Ejemplo n.º 6
0
func generateGroup(groupId uint32) (*task.GroupKeys, error) {
	it := pstore.NewIterator()
	defer it.Close()

	g := &task.GroupKeys{
		GroupId: groupId,
	}

	for it.SeekToFirst(); it.Valid(); it.Next() {
		k, v := it.Key(), it.Value()
		pk := x.Parse(k.Data())

		if pk == nil {
			continue
		}
		if group.BelongsTo(pk.Attr) != g.GroupId {
			it.Seek(pk.SkipPredicate())
			it.Prev() // To tackle it.Next() called by default.
			continue
		}

		var pl types.PostingList
		x.Check(pl.Unmarshal(v.Data()))

		kdup := make([]byte, len(k.Data()))
		copy(kdup, k.Data())
		key := &task.KC{
			Key:      kdup,
			Checksum: pl.Checksum,
		}
		g.Keys = append(g.Keys, key)
	}
	return g, it.Err()
}
Ejemplo n.º 7
0
// addToMutationArray adds the edges to the appropriate index in the mutationArray,
// taking into account the op(operation) and the attribute.
func addToMutationMap(mutationMap map[uint32]*task.Mutations,
	edges []*task.DirectedEdge, op string) {
	for _, edge := range edges {
		gid := group.BelongsTo(edge.Attr)
		mu := mutationMap[gid]
		if mu == nil {
			mu = &task.Mutations{GroupId: gid}
			mutationMap[gid] = mu
		}
		if op == set {
			mu.Set = append(mu.Set, edge)
		} else if op == del {
			mu.Del = append(mu.Del, edge)
		}
	}
}
Ejemplo n.º 8
0
// runMutations goes through all the edges and applies them. It returns the
// mutations which were not applied in left.
func runMutations(ctx context.Context, edges []*task.DirectedEdge, op uint32) error {
	for _, edge := range edges {
		if !groups().ServesGroup(group.BelongsTo(edge.Attr)) {
			return x.Errorf("Predicate fingerprint doesn't match this instance")
		}

		key := x.DataKey(edge.Attr, edge.Entity)
		plist, decr := posting.GetOrCreate(key)
		defer decr()

		if err := plist.AddMutationWithIndex(ctx, edge, op); err != nil {
			x.Printf("Error while adding mutation: %v %v", edge, err)
			return err // abort applying the rest of them.
		}
	}
	return nil
}
Ejemplo n.º 9
0
// AssignUidsOverNetwork assigns new uids and writes them to the umap.
func AssignUidsOverNetwork(ctx context.Context, umap map[string]uint64) error {
	gid := group.BelongsTo("_uid_")
	num := createNumQuery(gid, umap)

	var ul *task.List
	var err error
	if groups().ServesGroup(gid) {
		ul, err = assignUids(ctx, num)
		if err != nil {
			return err
		}

	} else {
		_, addr := groups().Leader(gid)
		p := pools().get(addr)
		conn, err := p.Get()
		if err != nil {
			x.TraceError(ctx, x.Wrapf(err, "Error while retrieving connection"))
			return err
		}
		defer p.Put(conn)

		c := NewWorkerClient(conn)
		ul, err = c.AssignUids(ctx, num)
		if err != nil {
			x.TraceError(ctx, x.Wrapf(err, "Error while getting uids"))
			return err
		}
	}

	x.AssertTruef(len(ul.Uids) == int(num.Val),
		"Requested: %d != Retrieved Uids: %d", num.Val, len(ul.Uids))

	i := 0
	for k, v := range umap {
		if v == 0 {
			uid := ul.Uids[i]
			umap[k] = uid // Write uids to map.
			i++
		}
	}
	return nil
}
Ejemplo n.º 10
0
// SortOverNetwork sends sort query over the network.
func SortOverNetwork(ctx context.Context, q *task.Sort) (*task.SortResult, error) {
	gid := group.BelongsTo(q.Attr)
	x.Trace(ctx, "worker.Sort attr: %v groupId: %v", q.Attr, gid)

	if groups().ServesGroup(gid) {
		// No need for a network call, as this should be run from within this instance.
		return processSort(q)
	}

	// Send this over the network.
	// TODO: Send the request to multiple servers as described in Jeff Dean's talk.
	addr := groups().AnyServer(gid)
	pl := pools().get(addr)

	conn, err := pl.Get()
	if err != nil {
		return &emptySortResult, x.Wrapf(err, "SortOverNetwork: while retrieving connection.")
	}
	defer pl.Put(conn)
	x.Trace(ctx, "Sending request to %v", addr)

	c := NewWorkerClient(conn)
	var reply *task.SortResult
	cerr := make(chan error, 1)
	go func() {
		var err error
		reply, err = c.Sort(ctx, q)
		cerr <- err
	}()

	select {
	case <-ctx.Done():
		return &emptySortResult, ctx.Err()
	case err := <-cerr:
		if err != nil {
			x.TraceError(ctx, x.Wrapf(err, "Error while calling Worker.Sort"))
		}
		return reply, err
	}
}
Ejemplo n.º 11
0
func TestBackup(t *testing.T) {
	// Index the name predicate. We ensure it doesn't show up on backup.
	dir, ps := initTestBackup(t, "scalar name:string @index")
	defer os.RemoveAll(dir)
	defer ps.Close()
	// Remove already existing backup folders is any.
	bdir, err := ioutil.TempDir("", "backup")
	require.NoError(t, err)
	defer os.RemoveAll(bdir)

	posting.CommitLists(10)
	time.Sleep(time.Second)

	// We have 4 friend type edges. FP("friends")%10 = 2.
	err = backup(group.BelongsTo("friend"), bdir)
	require.NoError(t, err)

	// We have 2 name type edges(with index). FP("name")%10 =7.
	err = backup(group.BelongsTo("name"), bdir)
	require.NoError(t, err)

	searchDir := bdir
	fileList := []string{}
	err = filepath.Walk(searchDir, func(path string, f os.FileInfo, err error) error {
		if path != bdir {
			fileList = append(fileList, path)
		}
		return nil
	})
	require.NoError(t, err)

	var counts []int
	for _, file := range fileList {
		f, err := os.Open(file)
		require.NoError(t, err)

		r, err := gzip.NewReader(f)
		require.NoError(t, err)

		scanner := bufio.NewScanner(r)
		count := 0
		for scanner.Scan() {
			nq, err := rdf.Parse(scanner.Text())
			require.NoError(t, err)
			// Subject should have uid 1/2/3/4.
			require.Contains(t, []string{"_uid_:0x1", "_uid_:0x2", "_uid_:0x3", "_uid_:0x4"}, nq.Subject)
			// The only value we set was "photon".
			if !bytes.Equal(nq.ObjectValue, nil) {
				require.Equal(t, []byte("pho\\ton"), nq.ObjectValue)
			}
			// The only objectId we set was uid 5.
			if nq.ObjectId != "" {
				require.Equal(t, "_uid_:0x5", nq.ObjectId)
			}
			count++
		}
		counts = append(counts, count)
		require.NoError(t, scanner.Err())
	}
	// This order will bw presereved due to file naming.
	require.Equal(t, []int{4, 2}, counts)
}
Ejemplo n.º 12
0
// Backup creates a backup of data by exporting it as an RDF gzip.
func backup(gid uint32, bdir string) error {
	// Use a goroutine to write to file.
	err := os.MkdirAll(bdir, 0700)
	if err != nil {
		return err
	}
	fpath := path.Join(bdir, fmt.Sprintf("dgraph-%d-%s.rdf.gz", gid,
		time.Now().Format("2006-01-02-15-04")))
	fmt.Printf("Backing up at: %v\n", fpath)
	chb := make(chan []byte, 1000)
	errChan := make(chan error, 1)
	go func() {
		errChan <- writeToFile(fpath, chb)
	}()

	// Use a bunch of goroutines to convert to RDF format.
	chkv := make(chan kv, 1000)
	var wg sync.WaitGroup
	wg.Add(numBackupRoutines)
	for i := 0; i < numBackupRoutines; i++ {
		go func() {
			buf := new(bytes.Buffer)
			buf.Grow(50000)
			for item := range chkv {
				toRDF(buf, item)
				if buf.Len() >= 40000 {
					tmp := make([]byte, buf.Len())
					copy(tmp, buf.Bytes())
					chb <- tmp
					buf.Reset()
				}
			}
			if buf.Len() > 0 {
				tmp := make([]byte, buf.Len())
				copy(tmp, buf.Bytes())
				chb <- tmp
			}
			wg.Done()
		}()
	}

	// Iterate over rocksdb.
	it := pstore.NewIterator()
	defer it.Close()
	var lastPred string
	for it.SeekToFirst(); it.Valid(); {
		key := it.Key().Data()
		pk := x.Parse(key)

		if pk.IsIndex() {
			// Seek to the end of index keys.
			it.Seek(pk.SkipRangeOfSameType())
			continue
		}
		if pk.Attr == "_uid_" {
			// Skip the UID mappings.
			it.Seek(pk.SkipPredicate())
			continue
		}

		x.AssertTrue(pk.IsData())
		pred, uid := pk.Attr, pk.Uid
		if pred != lastPred && group.BelongsTo(pred) != gid {
			it.Seek(pk.SkipPredicate())
			continue
		}

		prefix := fmt.Sprintf("<_uid_:%#x> <%s> ", uid, pred)
		pl := &types.PostingList{}
		x.Check(pl.Unmarshal(it.Value().Data()))
		chkv <- kv{
			prefix: prefix,
			list:   pl,
		}
		lastPred = pred
		it.Next()
	}

	close(chkv) // We have stopped output to chkv.
	wg.Wait()   // Wait for numBackupRoutines to finish.
	close(chb)  // We have stopped output to chb.

	err = <-errChan
	return err
}
Ejemplo n.º 13
0
// PredicateData can be used to return data corresponding to a predicate over
// a stream.
func (w *grpcWorker) PredicateData(gkeys *task.GroupKeys, stream Worker_PredicateDataServer) error {
	if !groups().ServesGroup(gkeys.GroupId) {
		return x.Errorf("Group %d not served.", gkeys.GroupId)
	}
	n := groups().Node(gkeys.GroupId)
	if !n.AmLeader() {
		return x.Errorf("Not leader of group: %d", gkeys.GroupId)
	}

	// TODO(pawan) - Shift to CheckPoints once we figure out how to add them to the
	// RocksDB library we are using.
	// http://rocksdb.org/blog/2609/use-checkpoints-for-efficient-snapshots/
	it := pstore.NewIterator()
	defer it.Close()

	for it.SeekToFirst(); it.Valid(); it.Next() {
		k, v := it.Key(), it.Value()
		pk := x.Parse(k.Data())

		if pk == nil {
			continue
		}
		if group.BelongsTo(pk.Attr) != gkeys.GroupId {
			it.Seek(pk.SkipPredicate())
			it.Prev() // To tackle it.Next() called by default.
			continue
		}

		var pl types.PostingList
		x.Check(pl.Unmarshal(v.Data()))

		idx := sort.Search(len(gkeys.Keys), func(i int) bool {
			t := gkeys.Keys[i]
			return bytes.Compare(k.Data(), t.Key) <= 0
		})

		if idx < len(gkeys.Keys) {
			// Found a match.
			t := gkeys.Keys[idx]
			// Different keys would have the same prefix. So, check Checksum first,
			// it would be cheaper when there's no match.
			if bytes.Equal(pl.Checksum, t.Checksum) && bytes.Equal(k.Data(), t.Key) {
				// No need to send this.
				continue
			}
		}

		// We just need to stream this kv. So, we can directly use the key
		// and val without any copying.
		kv := &task.KV{
			Key: k.Data(),
			Val: v.Data(),
		}

		if err := stream.Send(kv); err != nil {
			return err
		}
	} // end of iterator

	if err := it.Err(); err != nil {
		return err
	}
	return nil
}