func generateGroup(groupId uint32) (*task.GroupKeys, error) { it := pstore.NewIterator() defer it.Close() g := &task.GroupKeys{ GroupId: groupId, } for it.SeekToFirst(); it.Valid(); it.Next() { k, v := it.Key(), it.Value() pk := x.Parse(k.Data()) if pk == nil { continue } if group.BelongsTo(pk.Attr) != g.GroupId { it.Seek(pk.SkipPredicate()) it.Prev() // To tackle it.Next() called by default. continue } var pl types.PostingList x.Check(pl.Unmarshal(v.Data())) kdup := make([]byte, len(k.Data())) copy(kdup, k.Data()) key := &task.KC{ Key: kdup, Checksum: pl.Checksum, } g.Keys = append(g.Keys, key) } return g, it.Err() }
func (l *List) commit() (committed bool, rerr error) { l.Lock() defer l.Unlock() if len(l.mlayer) == 0 { atomic.StoreInt64(&l.dirtyTs, 0) return false, nil } var final types.PostingList ubuf := make([]byte, 16) h := md5.New() count := 0 l.iterate(0, func(p *types.Posting) bool { // Checksum code. n := binary.PutVarint(ubuf, int64(count)) h.Write(ubuf[0:n]) n = binary.PutUvarint(ubuf, p.Uid) h.Write(ubuf[0:n]) h.Write(p.Value) h.Write([]byte(p.Label)) count++ // I think it's okay to take the pointer from the iterator, because we have a lock // over List; which won't be released until final has been marshalled. Thus, the // underlying data wouldn't be changed. final.Postings = append(final.Postings, p) return true }) final.Checksum = h.Sum(nil) data, err := final.Marshal() x.Checkf(err, "Unable to marshal posting list") sw := l.StartWait() ce := commitEntry{ key: l.key, val: data, sw: sw, } commitCh <- ce // Now reset the mutation variables. atomic.StorePointer(&l.pbuffer, nil) // Make prev buffer eligible for GC. atomic.StoreInt64(&l.dirtyTs, 0) // Set as clean. l.mlayer = l.mlayer[:0] l.lastCompact = time.Now() return true, nil }
// PredicateData can be used to return data corresponding to a predicate over // a stream. func (w *grpcWorker) PredicateData(gkeys *task.GroupKeys, stream Worker_PredicateDataServer) error { if !groups().ServesGroup(gkeys.GroupId) { return x.Errorf("Group %d not served.", gkeys.GroupId) } n := groups().Node(gkeys.GroupId) if !n.AmLeader() { return x.Errorf("Not leader of group: %d", gkeys.GroupId) } // TODO(pawan) - Shift to CheckPoints once we figure out how to add them to the // RocksDB library we are using. // http://rocksdb.org/blog/2609/use-checkpoints-for-efficient-snapshots/ it := pstore.NewIterator() defer it.Close() for it.SeekToFirst(); it.Valid(); it.Next() { k, v := it.Key(), it.Value() pk := x.Parse(k.Data()) if pk == nil { continue } if group.BelongsTo(pk.Attr) != gkeys.GroupId { it.Seek(pk.SkipPredicate()) it.Prev() // To tackle it.Next() called by default. continue } var pl types.PostingList x.Check(pl.Unmarshal(v.Data())) idx := sort.Search(len(gkeys.Keys), func(i int) bool { t := gkeys.Keys[i] return bytes.Compare(k.Data(), t.Key) <= 0 }) if idx < len(gkeys.Keys) { // Found a match. t := gkeys.Keys[idx] // Different keys would have the same prefix. So, check Checksum first, // it would be cheaper when there's no match. if bytes.Equal(pl.Checksum, t.Checksum) && bytes.Equal(k.Data(), t.Key) { // No need to send this. continue } } // We just need to stream this kv. So, we can directly use the key // and val without any copying. kv := &task.KV{ Key: k.Data(), Val: v.Data(), } if err := stream.Send(kv); err != nil { return err } } // end of iterator if err := it.Err(); err != nil { return err } return nil }