Example #1
0
func generateGroup(groupId uint32) (*task.GroupKeys, error) {
	it := pstore.NewIterator()
	defer it.Close()

	g := &task.GroupKeys{
		GroupId: groupId,
	}

	for it.SeekToFirst(); it.Valid(); it.Next() {
		k, v := it.Key(), it.Value()
		pk := x.Parse(k.Data())

		if pk == nil {
			continue
		}
		if group.BelongsTo(pk.Attr) != g.GroupId {
			it.Seek(pk.SkipPredicate())
			it.Prev() // To tackle it.Next() called by default.
			continue
		}

		var pl types.PostingList
		x.Check(pl.Unmarshal(v.Data()))

		kdup := make([]byte, len(k.Data()))
		copy(kdup, k.Data())
		key := &task.KC{
			Key:      kdup,
			Checksum: pl.Checksum,
		}
		g.Keys = append(g.Keys, key)
	}
	return g, it.Err()
}
Example #2
0
func (l *List) commit() (committed bool, rerr error) {
	l.Lock()
	defer l.Unlock()

	if len(l.mlayer) == 0 {
		atomic.StoreInt64(&l.dirtyTs, 0)
		return false, nil
	}

	var final types.PostingList
	ubuf := make([]byte, 16)
	h := md5.New()
	count := 0
	l.iterate(0, func(p *types.Posting) bool {
		// Checksum code.
		n := binary.PutVarint(ubuf, int64(count))
		h.Write(ubuf[0:n])
		n = binary.PutUvarint(ubuf, p.Uid)
		h.Write(ubuf[0:n])
		h.Write(p.Value)
		h.Write([]byte(p.Label))
		count++

		// I think it's okay to take the pointer from the iterator, because we have a lock
		// over List; which won't be released until final has been marshalled. Thus, the
		// underlying data wouldn't be changed.
		final.Postings = append(final.Postings, p)
		return true
	})
	final.Checksum = h.Sum(nil)
	data, err := final.Marshal()
	x.Checkf(err, "Unable to marshal posting list")

	sw := l.StartWait()
	ce := commitEntry{
		key: l.key,
		val: data,
		sw:  sw,
	}
	commitCh <- ce

	// Now reset the mutation variables.
	atomic.StorePointer(&l.pbuffer, nil) // Make prev buffer eligible for GC.
	atomic.StoreInt64(&l.dirtyTs, 0)     // Set as clean.
	l.mlayer = l.mlayer[:0]
	l.lastCompact = time.Now()
	return true, nil
}
Example #3
0
// PredicateData can be used to return data corresponding to a predicate over
// a stream.
func (w *grpcWorker) PredicateData(gkeys *task.GroupKeys, stream Worker_PredicateDataServer) error {
	if !groups().ServesGroup(gkeys.GroupId) {
		return x.Errorf("Group %d not served.", gkeys.GroupId)
	}
	n := groups().Node(gkeys.GroupId)
	if !n.AmLeader() {
		return x.Errorf("Not leader of group: %d", gkeys.GroupId)
	}

	// TODO(pawan) - Shift to CheckPoints once we figure out how to add them to the
	// RocksDB library we are using.
	// http://rocksdb.org/blog/2609/use-checkpoints-for-efficient-snapshots/
	it := pstore.NewIterator()
	defer it.Close()

	for it.SeekToFirst(); it.Valid(); it.Next() {
		k, v := it.Key(), it.Value()
		pk := x.Parse(k.Data())

		if pk == nil {
			continue
		}
		if group.BelongsTo(pk.Attr) != gkeys.GroupId {
			it.Seek(pk.SkipPredicate())
			it.Prev() // To tackle it.Next() called by default.
			continue
		}

		var pl types.PostingList
		x.Check(pl.Unmarshal(v.Data()))

		idx := sort.Search(len(gkeys.Keys), func(i int) bool {
			t := gkeys.Keys[i]
			return bytes.Compare(k.Data(), t.Key) <= 0
		})

		if idx < len(gkeys.Keys) {
			// Found a match.
			t := gkeys.Keys[idx]
			// Different keys would have the same prefix. So, check Checksum first,
			// it would be cheaper when there's no match.
			if bytes.Equal(pl.Checksum, t.Checksum) && bytes.Equal(k.Data(), t.Key) {
				// No need to send this.
				continue
			}
		}

		// We just need to stream this kv. So, we can directly use the key
		// and val without any copying.
		kv := &task.KV{
			Key: k.Data(),
			Val: v.Data(),
		}

		if err := stream.Send(kv); err != nil {
			return err
		}
	} // end of iterator

	if err := it.Err(); err != nil {
		return err
	}
	return nil
}