Esempio n. 1
0
func generateGroup(groupId uint32) (*task.GroupKeys, error) {
	it := pstore.NewIterator()
	defer it.Close()

	g := &task.GroupKeys{
		GroupId: groupId,
	}

	for it.SeekToFirst(); it.Valid(); it.Next() {
		k, v := it.Key(), it.Value()
		pk := x.Parse(k.Data())

		if pk == nil {
			continue
		}
		if group.BelongsTo(pk.Attr) != g.GroupId {
			it.Seek(pk.SkipPredicate())
			it.Prev() // To tackle it.Next() called by default.
			continue
		}

		var pl types.PostingList
		x.Check(pl.Unmarshal(v.Data()))

		kdup := make([]byte, len(k.Data()))
		copy(kdup, k.Data())
		key := &task.KC{
			Key:      kdup,
			Checksum: pl.Checksum,
		}
		g.Keys = append(g.Keys, key)
	}
	return g, it.Err()
}
Esempio n. 2
0
// initIndex initializes the index with the given data store.
func initIndex() {
	x.AssertTrue(pstore != nil)

	// Initialize TokensTables.
	indexedFields := schema.IndexedFields()
	type resultStruct struct {
		attr  string
		table *TokensTable
	}
	results := make(chan resultStruct, len(indexedFields))

	for _, attr := range indexedFields {
		go func(attr string) {
			table := &TokensTable{
				key: make([]string, 0, 50),
			}
			pk := x.ParsedKey{
				Attr: attr,
			}
			prefix := pk.IndexPrefix()

			it := pstore.NewIterator()
			defer it.Close()
			for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() {
				pki := x.Parse(it.Key().Data())
				x.AssertTrue(pki.IsIndex())
				x.AssertTrue(len(pki.Term) > 0)
				table.push(pki.Term)
			}
			results <- resultStruct{attr, table}
		}(attr)
	}

	tables = make(map[string]*TokensTable)
	for i := 0; i < len(indexedFields); i++ {
		r := <-results
		tables[r.attr] = r.table
	}

}
Esempio n. 3
0
// Backup creates a backup of data by exporting it as an RDF gzip.
func backup(gid uint32, bdir string) error {
	// Use a goroutine to write to file.
	err := os.MkdirAll(bdir, 0700)
	if err != nil {
		return err
	}
	fpath := path.Join(bdir, fmt.Sprintf("dgraph-%d-%s.rdf.gz", gid,
		time.Now().Format("2006-01-02-15-04")))
	fmt.Printf("Backing up at: %v\n", fpath)
	chb := make(chan []byte, 1000)
	errChan := make(chan error, 1)
	go func() {
		errChan <- writeToFile(fpath, chb)
	}()

	// Use a bunch of goroutines to convert to RDF format.
	chkv := make(chan kv, 1000)
	var wg sync.WaitGroup
	wg.Add(numBackupRoutines)
	for i := 0; i < numBackupRoutines; i++ {
		go func() {
			buf := new(bytes.Buffer)
			buf.Grow(50000)
			for item := range chkv {
				toRDF(buf, item)
				if buf.Len() >= 40000 {
					tmp := make([]byte, buf.Len())
					copy(tmp, buf.Bytes())
					chb <- tmp
					buf.Reset()
				}
			}
			if buf.Len() > 0 {
				tmp := make([]byte, buf.Len())
				copy(tmp, buf.Bytes())
				chb <- tmp
			}
			wg.Done()
		}()
	}

	// Iterate over rocksdb.
	it := pstore.NewIterator()
	defer it.Close()
	var lastPred string
	for it.SeekToFirst(); it.Valid(); {
		key := it.Key().Data()
		pk := x.Parse(key)

		if pk.IsIndex() {
			// Seek to the end of index keys.
			it.Seek(pk.SkipRangeOfSameType())
			continue
		}
		if pk.Attr == "_uid_" {
			// Skip the UID mappings.
			it.Seek(pk.SkipPredicate())
			continue
		}

		x.AssertTrue(pk.IsData())
		pred, uid := pk.Attr, pk.Uid
		if pred != lastPred && group.BelongsTo(pred) != gid {
			it.Seek(pk.SkipPredicate())
			continue
		}

		prefix := fmt.Sprintf("<_uid_:%#x> <%s> ", uid, pred)
		pl := &types.PostingList{}
		x.Check(pl.Unmarshal(it.Value().Data()))
		chkv <- kv{
			prefix: prefix,
			list:   pl,
		}
		lastPred = pred
		it.Next()
	}

	close(chkv) // We have stopped output to chkv.
	wg.Wait()   // Wait for numBackupRoutines to finish.
	close(chb)  // We have stopped output to chb.

	err = <-errChan
	return err
}
Esempio n. 4
0
// PredicateData can be used to return data corresponding to a predicate over
// a stream.
func (w *grpcWorker) PredicateData(gkeys *task.GroupKeys, stream Worker_PredicateDataServer) error {
	if !groups().ServesGroup(gkeys.GroupId) {
		return x.Errorf("Group %d not served.", gkeys.GroupId)
	}
	n := groups().Node(gkeys.GroupId)
	if !n.AmLeader() {
		return x.Errorf("Not leader of group: %d", gkeys.GroupId)
	}

	// TODO(pawan) - Shift to CheckPoints once we figure out how to add them to the
	// RocksDB library we are using.
	// http://rocksdb.org/blog/2609/use-checkpoints-for-efficient-snapshots/
	it := pstore.NewIterator()
	defer it.Close()

	for it.SeekToFirst(); it.Valid(); it.Next() {
		k, v := it.Key(), it.Value()
		pk := x.Parse(k.Data())

		if pk == nil {
			continue
		}
		if group.BelongsTo(pk.Attr) != gkeys.GroupId {
			it.Seek(pk.SkipPredicate())
			it.Prev() // To tackle it.Next() called by default.
			continue
		}

		var pl types.PostingList
		x.Check(pl.Unmarshal(v.Data()))

		idx := sort.Search(len(gkeys.Keys), func(i int) bool {
			t := gkeys.Keys[i]
			return bytes.Compare(k.Data(), t.Key) <= 0
		})

		if idx < len(gkeys.Keys) {
			// Found a match.
			t := gkeys.Keys[idx]
			// Different keys would have the same prefix. So, check Checksum first,
			// it would be cheaper when there's no match.
			if bytes.Equal(pl.Checksum, t.Checksum) && bytes.Equal(k.Data(), t.Key) {
				// No need to send this.
				continue
			}
		}

		// We just need to stream this kv. So, we can directly use the key
		// and val without any copying.
		kv := &task.KV{
			Key: k.Data(),
			Val: v.Data(),
		}

		if err := stream.Send(kv); err != nil {
			return err
		}
	} // end of iterator

	if err := it.Err(); err != nil {
		return err
	}
	return nil
}