// Entries implements the raft.Storage interface. Note that maxBytes is advisory
// and this method will always return at least one entry even if it exceeds
// maxBytes. Passing maxBytes equal to zero disables size checking.
// TODO(bdarnell): consider caching for recent entries, if rocksdb's builtin caching
// is insufficient.
func (r *Range) Entries(lo, hi, maxBytes uint64) ([]raftpb.Entry, error) {
	// Scan over the log to find the requested entries in the range [lo, hi),
	// stopping once we have enough.
	var ents []raftpb.Entry
	size := uint64(0)
	var ent raftpb.Entry
	scanFunc := func(kv proto.KeyValue) (bool, error) {
		err := gogoproto.Unmarshal(kv.Value.GetBytes(), &ent)
		if err != nil {
			return false, err
		}
		size += uint64(ent.Size())
		ents = append(ents, ent)
		return maxBytes > 0 && size > maxBytes, nil
	}

	_, err := engine.MVCCIterate(r.rm.Engine(),
		keys.RaftLogKey(r.Desc().RaftID, lo),
		keys.RaftLogKey(r.Desc().RaftID, hi),
		proto.ZeroTimestamp, true /* consistent */, nil /* txn */, scanFunc)

	if err != nil {
		return nil, err
	}

	// If neither the number of entries nor the size limitations had an
	// effect, we weren't able to supply everything the client wanted.
	if len(ents) != int(hi-lo) && (maxBytes == 0 || size < maxBytes) {
		return nil, raft.ErrUnavailable
	}

	return ents, nil
}
Example #2
0
func (r *raft) appendEntry(e pb.Entry) {
	e.Term = r.Term
	e.Index = r.raftLog.lastIndex() + 1
	r.LastIndex = r.raftLog.append(r.raftLog.lastIndex(), e)
	r.prs[r.id].update(r.raftLog.lastIndex())
	r.maybeCommit()
}
Example #3
0
func tryRaftLogEntry(kv engine.MVCCKeyValue) (string, error) {
	var ent raftpb.Entry
	if err := maybeUnmarshalInline(kv.Value, &ent); err != nil {
		return "", err
	}
	if ent.Type == raftpb.EntryNormal {
		if len(ent.Data) > 0 {
			_, cmdData := storage.DecodeRaftCommand(ent.Data)
			var cmd storagebase.RaftCommand
			if err := cmd.Unmarshal(cmdData); err != nil {
				return "", err
			}
			ent.Data = nil
			return fmt.Sprintf("%s by %v\n%s\n%s\n", &ent, cmd.OriginReplica, cmd.BatchRequest, &cmd), nil
		}
		return fmt.Sprintf("%s: EMPTY\n", &ent), nil
	} else if ent.Type == raftpb.EntryConfChange {
		var cc raftpb.ConfChange
		if err := cc.Unmarshal(ent.Data); err != nil {
			return "", err
		}
		var ctx storage.ConfChangeContext
		if err := ctx.Unmarshal(cc.Context); err != nil {
			return "", err
		}
		var cmd storagebase.ReplicatedEvalResult
		if err := cmd.Unmarshal(ctx.Payload); err != nil {
			return "", err
		}
		ent.Data = nil
		return fmt.Sprintf("%s\n%s\n", &ent, &cmd), nil
	}
	return "", fmt.Errorf("unknown log entry type: %s", &ent)
}
Example #4
0
func printRaftLogEntry(kv engine.MVCCKeyValue) (bool, error) {
	var meta engine.MVCCMetadata
	if err := meta.Unmarshal(kv.Value); err != nil {
		return false, err
	}
	value := roachpb.Value{
		RawBytes: meta.RawBytes,
	}
	var ent raftpb.Entry
	if err := value.GetProto(&ent); err != nil {
		return false, err
	}
	if len(ent.Data) > 0 {
		_, cmdData := storage.DecodeRaftCommand(ent.Data)
		var cmd roachpb.RaftCommand
		if err := cmd.Unmarshal(cmdData); err != nil {
			return false, err
		}
		ent.Data = nil
		fmt.Printf("%s\n", &ent)
		fmt.Printf("%s\n", &cmd)
	} else {
		fmt.Printf("%s: EMPTY\n", &ent)
	}
	return false, nil
}
Example #5
0
func mustUnmarshalEntry(d []byte) raftpb.Entry {
	var e raftpb.Entry
	if err := e.Unmarshal(d); err != nil {
		panic(err)
	}
	return e
}
Example #6
0
// getEntries returns entries between [lo, hi) for specified range.
// If any entries are returned for the specified indexes, they will
// start with index lo and proceed sequentially without gaps until
// 1) all entries exclusive of hi are fetched, 2) > maxBytes of
// entries data is fetched, or 3) a cache miss occurs.
func (rec *raftEntryCache) getEntries(rangeID roachpb.RangeID, lo, hi, maxBytes uint64) (
	[]raftpb.Entry, uint64 /* size in bytes */, uint64 /* next log index */) {
	rec.Lock()
	defer rec.Unlock()
	var ents []raftpb.Entry
	var ent raftpb.Entry
	var bytes uint64
	nextIndex := lo

	fromKey := entryCacheKey{RangeID: rangeID, Index: lo}
	toKey := entryCacheKey{RangeID: rangeID, Index: hi}
	rec.cache.DoRange(func(k, v interface{}) bool {
		ecKey := k.(entryCacheKey)
		if ecKey.Index != nextIndex {
			return true
		}
		ent = v.(raftpb.Entry)
		ents = append(ents, ent)
		bytes += uint64(ent.Size())
		nextIndex++
		if maxBytes > 0 && bytes > maxBytes {
			return true
		}
		return false
	}, fromKey, toKey)

	return ents, bytes, nextIndex
}
Example #7
0
func (w *WAL) SaveEntry(e *raftpb.Entry) error {
	b, err := e.Marshal()
	if err != nil {
		panic(err)
	}
	rec := &walpb.Record{Type: entryType, Data: b}
	return w.encoder.encode(rec)
}
Example #8
0
File: util.go Project: lrita/etcd
func readEntryFrom(r io.Reader, ent *raftpb.Entry) error {
	var l uint64
	if err := binary.Read(r, binary.BigEndian, &l); err != nil {
		return err
	}
	buf := make([]byte, int(l))
	if _, err := io.ReadFull(r, buf); err != nil {
		return err
	}
	return ent.Unmarshal(buf)
}
Example #9
0
File: util.go Project: lrita/etcd
func writeEntryTo(w io.Writer, ent *raftpb.Entry) error {
	size := ent.Size()
	if err := binary.Write(w, binary.BigEndian, uint64(size)); err != nil {
		return err
	}
	b, err := ent.Marshal()
	if err != nil {
		return err
	}
	_, err = w.Write(b)
	return err
}
Example #10
0
func (er *entryReader) readEntry(ent *raftpb.Entry) error {
	var l uint64
	if err := binary.Read(er.r, binary.BigEndian, &l); err != nil {
		return err
	}
	buf := make([]byte, int(l))
	if _, err := io.ReadFull(er.r, buf); err != nil {
		return err
	}
	er.bytes.AddBy(8 + int64(l))
	return ent.Unmarshal(buf)
}
Example #11
0
func (ew *entryWriter) writeEntry(ent *raftpb.Entry) error {
	size := ent.Size()
	if err := binary.Write(ew.w, binary.BigEndian, uint64(size)); err != nil {
		return err
	}
	b, err := ent.Marshal()
	if err != nil {
		return err
	}
	_, err = ew.w.Write(b)
	return err
}
Example #12
0
func (w *Wal) Entries(gid uint32, fromTerm, fromIndex uint64) (es []raftpb.Entry, rerr error) {
	start := w.entryKey(gid, fromTerm, fromIndex)
	prefix := w.prefix(gid)
	itr := w.wals.NewIterator()
	defer itr.Close()

	for itr.Seek(start); itr.ValidForPrefix(prefix); itr.Next() {
		data := itr.Value().Data()
		var e raftpb.Entry
		if err := e.Unmarshal(data); err != nil {
			return es, x.Wrapf(err, "While unmarshal raftpb.Entry")
		}
		es = append(es, e)
	}
	return
}
Example #13
0
func TestSlice(t *testing.T) {
	var i uint64
	offset := uint64(100)
	num := uint64(100)
	last := offset + num
	half := offset + num/2
	halfe := pb.Entry{Index: half, Term: half}

	storage := NewMemoryStorage()
	storage.ApplySnapshot(pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: offset}})
	for i = 1; i < num/2; i++ {
		storage.Append([]pb.Entry{{Index: offset + i, Term: offset + i}})
	}
	l := newLog(storage)
	for i = num / 2; i < num; i++ {
		l.append(pb.Entry{Index: offset + i, Term: offset + i})
	}

	tests := []struct {
		from  uint64
		to    uint64
		limit uint64

		w      []pb.Entry
		wpanic bool
	}{
		// test no limit
		{offset - 1, offset + 1, noLimit, nil, true},
		{offset, offset + 1, noLimit, nil, true},
		{half - 1, half + 1, noLimit, []pb.Entry{{Index: half - 1, Term: half - 1}, {Index: half, Term: half}}, false},
		{half, half + 1, noLimit, []pb.Entry{{Index: half, Term: half}}, false},
		{last - 1, last, noLimit, []pb.Entry{{Index: last - 1, Term: last - 1}}, false},
		{last, last + 1, noLimit, nil, true},

		// test limit
		{half - 1, half + 1, 0, []pb.Entry{{Index: half - 1, Term: half - 1}}, false},
		{half - 1, half + 1, uint64(halfe.Size() + 1), []pb.Entry{{Index: half - 1, Term: half - 1}}, false},
		{half - 1, half + 1, uint64(halfe.Size() * 2), []pb.Entry{{Index: half - 1, Term: half - 1}, {Index: half, Term: half}}, false},
		{half - 1, half + 2, uint64(halfe.Size() * 3), []pb.Entry{{Index: half - 1, Term: half - 1}, {Index: half, Term: half}, {Index: half + 1, Term: half + 1}}, false},
		{half, half + 2, uint64(halfe.Size()), []pb.Entry{{Index: half, Term: half}}, false},
		{half, half + 2, uint64(halfe.Size() * 2), []pb.Entry{{Index: half, Term: half}, {Index: half + 1, Term: half + 1}}, false},
	}

	for j, tt := range tests {
		func() {
			defer func() {
				if r := recover(); r != nil {
					if !tt.wpanic {
						t.Errorf("%d: panic = %v, want %v: %v", j, true, false, r)
					}
				}
			}()
			g := l.slice(tt.from, tt.to, tt.limit)
			if !reflect.DeepEqual(g, tt.w) {
				t.Errorf("#%d: from %d to %d = %v, want %v", j, tt.from, tt.to, g, tt.w)
			}
		}()
	}
}
Example #14
0
// Entries implements the raft.Storage interface
func (s *raftStorage) Entries(lo, hi, maxSize uint64) (entries []raftpb.Entry, err error) {
	it := s.db.NewIterator(&kv.Range{Start: s.getEntryKey(lo), Limit: s.getEntryKey(hi)})
	defer it.Release()
	entries = make([]raftpb.Entry, 0)
	sizeSoFar := uint64(0)
	for ok := it.First(); ok; ok = it.Next() {
		var entry raftpb.Entry
		err = entry.Unmarshal(it.Value())
		if err != nil {
			return
		}
		sizeSoFar += uint64(entry.Size())
		// Only stop if we already have at least one entry
		if sizeSoFar > maxSize && len(entries) > 0 {
			break
		}
		entries = append(entries, entry)
		if sizeSoFar >= maxSize {
			break
		}
	}
	err = it.Error()
	return
}
Example #15
0
func (r *Replica) entries(e engine.Engine, lo, hi, maxBytes uint64) ([]raftpb.Entry, error) {
	if lo > hi {
		return nil, util.Errorf("lo:%d is greater than hi:%d", lo, hi)
	}
	// Scan over the log to find the requested entries in the range [lo, hi),
	// stopping once we have enough.
	var ents []raftpb.Entry
	size := uint64(0)
	var ent raftpb.Entry
	expectedIndex := lo
	exceededMaxBytes := false
	scanFunc := func(kv roachpb.KeyValue) (bool, error) {
		if err := kv.Value.GetProto(&ent); err != nil {
			return false, err
		}
		// Exit early if we have any gaps or it has been compacted.
		if ent.Index != expectedIndex {
			return true, nil
		}
		expectedIndex++
		size += uint64(ent.Size())
		ents = append(ents, ent)
		exceededMaxBytes = maxBytes > 0 && size > maxBytes
		return exceededMaxBytes, nil
	}

	rangeID := r.RangeID
	_, err := engine.MVCCIterate(e,
		keys.RaftLogKey(rangeID, lo),
		keys.RaftLogKey(rangeID, hi),
		roachpb.ZeroTimestamp,
		true /* consistent */, nil /* txn */, false /* !reverse */, scanFunc)

	if err != nil {
		return nil, err
	}

	// Did the correct number of results come back? If so, we're all good.
	if len(ents) == int(hi)-int(lo) {
		return ents, nil
	}

	// Did we hit the size limit? If so, return what we have.
	if exceededMaxBytes {
		return ents, nil
	}

	// Did we get any results at all? Because something went wrong.
	if len(ents) > 0 {
		// Was the lo already truncated?
		if ents[0].Index > lo {
			return nil, raft.ErrCompacted
		}

		// Was the missing index after the last index?
		lastIndex, err := r.LastIndex()
		if err != nil {
			return nil, err
		}
		if lastIndex <= expectedIndex {
			return nil, raft.ErrUnavailable
		}

		// We have a gap in the record, if so, return a nasty error.
		return nil, util.Errorf("there is a gap in the index record between lo:%d and hi:%d at index:%d", lo, hi, expectedIndex)
	}

	// No results, was it due to unavailability or truncation?
	ts, err := r.raftTruncatedStateLocked()
	if err != nil {
		return nil, err
	}
	if ts.Index >= lo {
		// The requested lo index has already been truncated.
		return nil, raft.ErrCompacted
	}
	// The requested lo index does not yet exist.
	return nil, raft.ErrUnavailable
}
func entries(
	ctx context.Context,
	e engine.Reader,
	rangeID roachpb.RangeID,
	eCache *raftEntryCache,
	lo, hi, maxBytes uint64,
) ([]raftpb.Entry, error) {
	if lo > hi {
		return nil, errors.Errorf("lo:%d is greater than hi:%d", lo, hi)
	}
	// Scan over the log to find the requested entries in the range [lo, hi),
	// stopping once we have enough.
	ents := make([]raftpb.Entry, 0, hi-lo)
	size := uint64(0)

	hitEnts, hitSize, hitIndex := eCache.getEntries(rangeID, lo, hi, maxBytes)
	// Return results if the correct number of results came back or if
	// we ran into the max bytes limit.
	if uint64(len(hitEnts)) == hi-lo || (maxBytes > 0 && hitSize > maxBytes) {
		return hitEnts, nil
	}

	ents = append(ents, hitEnts...)
	size += hitSize
	expectedIndex := hitIndex

	var ent raftpb.Entry
	exceededMaxBytes := false
	scanFunc := func(kv roachpb.KeyValue) (bool, error) {
		if err := kv.Value.GetProto(&ent); err != nil {
			return false, err
		}
		// Exit early if we have any gaps or it has been compacted.
		if ent.Index != expectedIndex {
			return true, nil
		}
		expectedIndex++
		size += uint64(ent.Size())
		ents = append(ents, ent)
		exceededMaxBytes = maxBytes > 0 && size > maxBytes
		return exceededMaxBytes, nil
	}

	if err := iterateEntries(ctx, e, rangeID, expectedIndex, hi, scanFunc); err != nil {
		return nil, err
	}
	// Cache the fetched entries.
	eCache.addEntries(rangeID, ents)

	// Did the correct number of results come back? If so, we're all good.
	if uint64(len(ents)) == hi-lo {
		return ents, nil
	}

	// Did we hit the size limit? If so, return what we have.
	if exceededMaxBytes {
		return ents, nil
	}

	// Did we get any results at all? Because something went wrong.
	if len(ents) > 0 {
		// Was the lo already truncated?
		if ents[0].Index > lo {
			return nil, raft.ErrCompacted
		}

		// Was the missing index after the last index?
		lastIndex, err := loadLastIndex(ctx, e, rangeID)
		if err != nil {
			return nil, err
		}
		if lastIndex <= expectedIndex {
			return nil, raft.ErrUnavailable
		}

		// We have a gap in the record, if so, return a nasty error.
		return nil, errors.Errorf("there is a gap in the index record between lo:%d and hi:%d at index:%d", lo, hi, expectedIndex)
	}

	// No results, was it due to unavailability or truncation?
	ts, err := loadTruncatedState(ctx, e, rangeID)
	if err != nil {
		return nil, err
	}
	if ts.Index >= lo {
		// The requested lo index has already been truncated.
		return nil, raft.ErrCompacted
	}
	// The requested lo index does not yet exist.
	return nil, raft.ErrUnavailable
}