func generateGroup(groupId uint32) (*task.GroupKeys, error) { it := pstore.NewIterator() defer it.Close() g := &task.GroupKeys{ GroupId: groupId, } for it.SeekToFirst(); it.Valid(); it.Next() { k, v := it.Key(), it.Value() pk := x.Parse(k.Data()) if pk == nil { continue } if group.BelongsTo(pk.Attr) != g.GroupId { it.Seek(pk.SkipPredicate()) it.Prev() // To tackle it.Next() called by default. continue } var pl types.PostingList x.Check(pl.Unmarshal(v.Data())) kdup := make([]byte, len(k.Data())) copy(kdup, k.Data()) key := &task.KC{ Key: kdup, Checksum: pl.Checksum, } g.Keys = append(g.Keys, key) } return g, it.Err() }
// initIndex initializes the index with the given data store. func initIndex() { x.AssertTrue(pstore != nil) // Initialize TokensTables. indexedFields := schema.IndexedFields() type resultStruct struct { attr string table *TokensTable } results := make(chan resultStruct, len(indexedFields)) for _, attr := range indexedFields { go func(attr string) { table := &TokensTable{ key: make([]string, 0, 50), } pk := x.ParsedKey{ Attr: attr, } prefix := pk.IndexPrefix() it := pstore.NewIterator() defer it.Close() for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() { pki := x.Parse(it.Key().Data()) x.AssertTrue(pki.IsIndex()) x.AssertTrue(len(pki.Term) > 0) table.push(pki.Term) } results <- resultStruct{attr, table} }(attr) } tables = make(map[string]*TokensTable) for i := 0; i < len(indexedFields); i++ { r := <-results tables[r.attr] = r.table } }
// Backup creates a backup of data by exporting it as an RDF gzip. func backup(gid uint32, bdir string) error { // Use a goroutine to write to file. err := os.MkdirAll(bdir, 0700) if err != nil { return err } fpath := path.Join(bdir, fmt.Sprintf("dgraph-%d-%s.rdf.gz", gid, time.Now().Format("2006-01-02-15-04"))) fmt.Printf("Backing up at: %v\n", fpath) chb := make(chan []byte, 1000) errChan := make(chan error, 1) go func() { errChan <- writeToFile(fpath, chb) }() // Use a bunch of goroutines to convert to RDF format. chkv := make(chan kv, 1000) var wg sync.WaitGroup wg.Add(numBackupRoutines) for i := 0; i < numBackupRoutines; i++ { go func() { buf := new(bytes.Buffer) buf.Grow(50000) for item := range chkv { toRDF(buf, item) if buf.Len() >= 40000 { tmp := make([]byte, buf.Len()) copy(tmp, buf.Bytes()) chb <- tmp buf.Reset() } } if buf.Len() > 0 { tmp := make([]byte, buf.Len()) copy(tmp, buf.Bytes()) chb <- tmp } wg.Done() }() } // Iterate over rocksdb. it := pstore.NewIterator() defer it.Close() var lastPred string for it.SeekToFirst(); it.Valid(); { key := it.Key().Data() pk := x.Parse(key) if pk.IsIndex() { // Seek to the end of index keys. it.Seek(pk.SkipRangeOfSameType()) continue } if pk.Attr == "_uid_" { // Skip the UID mappings. it.Seek(pk.SkipPredicate()) continue } x.AssertTrue(pk.IsData()) pred, uid := pk.Attr, pk.Uid if pred != lastPred && group.BelongsTo(pred) != gid { it.Seek(pk.SkipPredicate()) continue } prefix := fmt.Sprintf("<_uid_:%#x> <%s> ", uid, pred) pl := &types.PostingList{} x.Check(pl.Unmarshal(it.Value().Data())) chkv <- kv{ prefix: prefix, list: pl, } lastPred = pred it.Next() } close(chkv) // We have stopped output to chkv. wg.Wait() // Wait for numBackupRoutines to finish. close(chb) // We have stopped output to chb. err = <-errChan return err }
// PredicateData can be used to return data corresponding to a predicate over // a stream. func (w *grpcWorker) PredicateData(gkeys *task.GroupKeys, stream Worker_PredicateDataServer) error { if !groups().ServesGroup(gkeys.GroupId) { return x.Errorf("Group %d not served.", gkeys.GroupId) } n := groups().Node(gkeys.GroupId) if !n.AmLeader() { return x.Errorf("Not leader of group: %d", gkeys.GroupId) } // TODO(pawan) - Shift to CheckPoints once we figure out how to add them to the // RocksDB library we are using. // http://rocksdb.org/blog/2609/use-checkpoints-for-efficient-snapshots/ it := pstore.NewIterator() defer it.Close() for it.SeekToFirst(); it.Valid(); it.Next() { k, v := it.Key(), it.Value() pk := x.Parse(k.Data()) if pk == nil { continue } if group.BelongsTo(pk.Attr) != gkeys.GroupId { it.Seek(pk.SkipPredicate()) it.Prev() // To tackle it.Next() called by default. continue } var pl types.PostingList x.Check(pl.Unmarshal(v.Data())) idx := sort.Search(len(gkeys.Keys), func(i int) bool { t := gkeys.Keys[i] return bytes.Compare(k.Data(), t.Key) <= 0 }) if idx < len(gkeys.Keys) { // Found a match. t := gkeys.Keys[idx] // Different keys would have the same prefix. So, check Checksum first, // it would be cheaper when there's no match. if bytes.Equal(pl.Checksum, t.Checksum) && bytes.Equal(k.Data(), t.Key) { // No need to send this. continue } } // We just need to stream this kv. So, we can directly use the key // and val without any copying. kv := &task.KV{ Key: k.Data(), Val: v.Data(), } if err := stream.Send(kv); err != nil { return err } } // end of iterator if err := it.Err(); err != nil { return err } return nil }