// Entries implements the raft.Storage interface. Note that maxBytes is advisory // and this method will always return at least one entry even if it exceeds // maxBytes. Passing maxBytes equal to zero disables size checking. // TODO(bdarnell): consider caching for recent entries, if rocksdb's builtin caching // is insufficient. func (r *Range) Entries(lo, hi, maxBytes uint64) ([]raftpb.Entry, error) { // Scan over the log to find the requested entries in the range [lo, hi), // stopping once we have enough. var ents []raftpb.Entry size := uint64(0) var ent raftpb.Entry scanFunc := func(kv proto.KeyValue) (bool, error) { err := gogoproto.Unmarshal(kv.Value.GetBytes(), &ent) if err != nil { return false, err } size += uint64(ent.Size()) ents = append(ents, ent) return maxBytes > 0 && size > maxBytes, nil } _, err := engine.MVCCIterate(r.rm.Engine(), keys.RaftLogKey(r.Desc().RaftID, lo), keys.RaftLogKey(r.Desc().RaftID, hi), proto.ZeroTimestamp, true /* consistent */, nil /* txn */, scanFunc) if err != nil { return nil, err } // If neither the number of entries nor the size limitations had an // effect, we weren't able to supply everything the client wanted. if len(ents) != int(hi-lo) && (maxBytes == 0 || size < maxBytes) { return nil, raft.ErrUnavailable } return ents, nil }
func (r *raft) appendEntry(e pb.Entry) { e.Term = r.Term e.Index = r.raftLog.lastIndex() + 1 r.LastIndex = r.raftLog.append(r.raftLog.lastIndex(), e) r.prs[r.id].update(r.raftLog.lastIndex()) r.maybeCommit() }
func tryRaftLogEntry(kv engine.MVCCKeyValue) (string, error) { var ent raftpb.Entry if err := maybeUnmarshalInline(kv.Value, &ent); err != nil { return "", err } if ent.Type == raftpb.EntryNormal { if len(ent.Data) > 0 { _, cmdData := storage.DecodeRaftCommand(ent.Data) var cmd storagebase.RaftCommand if err := cmd.Unmarshal(cmdData); err != nil { return "", err } ent.Data = nil return fmt.Sprintf("%s by %v\n%s\n%s\n", &ent, cmd.OriginReplica, cmd.BatchRequest, &cmd), nil } return fmt.Sprintf("%s: EMPTY\n", &ent), nil } else if ent.Type == raftpb.EntryConfChange { var cc raftpb.ConfChange if err := cc.Unmarshal(ent.Data); err != nil { return "", err } var ctx storage.ConfChangeContext if err := ctx.Unmarshal(cc.Context); err != nil { return "", err } var cmd storagebase.ReplicatedEvalResult if err := cmd.Unmarshal(ctx.Payload); err != nil { return "", err } ent.Data = nil return fmt.Sprintf("%s\n%s\n", &ent, &cmd), nil } return "", fmt.Errorf("unknown log entry type: %s", &ent) }
func printRaftLogEntry(kv engine.MVCCKeyValue) (bool, error) { var meta engine.MVCCMetadata if err := meta.Unmarshal(kv.Value); err != nil { return false, err } value := roachpb.Value{ RawBytes: meta.RawBytes, } var ent raftpb.Entry if err := value.GetProto(&ent); err != nil { return false, err } if len(ent.Data) > 0 { _, cmdData := storage.DecodeRaftCommand(ent.Data) var cmd roachpb.RaftCommand if err := cmd.Unmarshal(cmdData); err != nil { return false, err } ent.Data = nil fmt.Printf("%s\n", &ent) fmt.Printf("%s\n", &cmd) } else { fmt.Printf("%s: EMPTY\n", &ent) } return false, nil }
func mustUnmarshalEntry(d []byte) raftpb.Entry { var e raftpb.Entry if err := e.Unmarshal(d); err != nil { panic(err) } return e }
// getEntries returns entries between [lo, hi) for specified range. // If any entries are returned for the specified indexes, they will // start with index lo and proceed sequentially without gaps until // 1) all entries exclusive of hi are fetched, 2) > maxBytes of // entries data is fetched, or 3) a cache miss occurs. func (rec *raftEntryCache) getEntries(rangeID roachpb.RangeID, lo, hi, maxBytes uint64) ( []raftpb.Entry, uint64 /* size in bytes */, uint64 /* next log index */) { rec.Lock() defer rec.Unlock() var ents []raftpb.Entry var ent raftpb.Entry var bytes uint64 nextIndex := lo fromKey := entryCacheKey{RangeID: rangeID, Index: lo} toKey := entryCacheKey{RangeID: rangeID, Index: hi} rec.cache.DoRange(func(k, v interface{}) bool { ecKey := k.(entryCacheKey) if ecKey.Index != nextIndex { return true } ent = v.(raftpb.Entry) ents = append(ents, ent) bytes += uint64(ent.Size()) nextIndex++ if maxBytes > 0 && bytes > maxBytes { return true } return false }, fromKey, toKey) return ents, bytes, nextIndex }
func (w *WAL) SaveEntry(e *raftpb.Entry) error { b, err := e.Marshal() if err != nil { panic(err) } rec := &walpb.Record{Type: entryType, Data: b} return w.encoder.encode(rec) }
func readEntryFrom(r io.Reader, ent *raftpb.Entry) error { var l uint64 if err := binary.Read(r, binary.BigEndian, &l); err != nil { return err } buf := make([]byte, int(l)) if _, err := io.ReadFull(r, buf); err != nil { return err } return ent.Unmarshal(buf) }
func writeEntryTo(w io.Writer, ent *raftpb.Entry) error { size := ent.Size() if err := binary.Write(w, binary.BigEndian, uint64(size)); err != nil { return err } b, err := ent.Marshal() if err != nil { return err } _, err = w.Write(b) return err }
func (er *entryReader) readEntry(ent *raftpb.Entry) error { var l uint64 if err := binary.Read(er.r, binary.BigEndian, &l); err != nil { return err } buf := make([]byte, int(l)) if _, err := io.ReadFull(er.r, buf); err != nil { return err } er.bytes.AddBy(8 + int64(l)) return ent.Unmarshal(buf) }
func (ew *entryWriter) writeEntry(ent *raftpb.Entry) error { size := ent.Size() if err := binary.Write(ew.w, binary.BigEndian, uint64(size)); err != nil { return err } b, err := ent.Marshal() if err != nil { return err } _, err = ew.w.Write(b) return err }
func (w *Wal) Entries(gid uint32, fromTerm, fromIndex uint64) (es []raftpb.Entry, rerr error) { start := w.entryKey(gid, fromTerm, fromIndex) prefix := w.prefix(gid) itr := w.wals.NewIterator() defer itr.Close() for itr.Seek(start); itr.ValidForPrefix(prefix); itr.Next() { data := itr.Value().Data() var e raftpb.Entry if err := e.Unmarshal(data); err != nil { return es, x.Wrapf(err, "While unmarshal raftpb.Entry") } es = append(es, e) } return }
func TestSlice(t *testing.T) { var i uint64 offset := uint64(100) num := uint64(100) last := offset + num half := offset + num/2 halfe := pb.Entry{Index: half, Term: half} storage := NewMemoryStorage() storage.ApplySnapshot(pb.Snapshot{Metadata: pb.SnapshotMetadata{Index: offset}}) for i = 1; i < num/2; i++ { storage.Append([]pb.Entry{{Index: offset + i, Term: offset + i}}) } l := newLog(storage) for i = num / 2; i < num; i++ { l.append(pb.Entry{Index: offset + i, Term: offset + i}) } tests := []struct { from uint64 to uint64 limit uint64 w []pb.Entry wpanic bool }{ // test no limit {offset - 1, offset + 1, noLimit, nil, true}, {offset, offset + 1, noLimit, nil, true}, {half - 1, half + 1, noLimit, []pb.Entry{{Index: half - 1, Term: half - 1}, {Index: half, Term: half}}, false}, {half, half + 1, noLimit, []pb.Entry{{Index: half, Term: half}}, false}, {last - 1, last, noLimit, []pb.Entry{{Index: last - 1, Term: last - 1}}, false}, {last, last + 1, noLimit, nil, true}, // test limit {half - 1, half + 1, 0, []pb.Entry{{Index: half - 1, Term: half - 1}}, false}, {half - 1, half + 1, uint64(halfe.Size() + 1), []pb.Entry{{Index: half - 1, Term: half - 1}}, false}, {half - 1, half + 1, uint64(halfe.Size() * 2), []pb.Entry{{Index: half - 1, Term: half - 1}, {Index: half, Term: half}}, false}, {half - 1, half + 2, uint64(halfe.Size() * 3), []pb.Entry{{Index: half - 1, Term: half - 1}, {Index: half, Term: half}, {Index: half + 1, Term: half + 1}}, false}, {half, half + 2, uint64(halfe.Size()), []pb.Entry{{Index: half, Term: half}}, false}, {half, half + 2, uint64(halfe.Size() * 2), []pb.Entry{{Index: half, Term: half}, {Index: half + 1, Term: half + 1}}, false}, } for j, tt := range tests { func() { defer func() { if r := recover(); r != nil { if !tt.wpanic { t.Errorf("%d: panic = %v, want %v: %v", j, true, false, r) } } }() g := l.slice(tt.from, tt.to, tt.limit) if !reflect.DeepEqual(g, tt.w) { t.Errorf("#%d: from %d to %d = %v, want %v", j, tt.from, tt.to, g, tt.w) } }() } }
// Entries implements the raft.Storage interface func (s *raftStorage) Entries(lo, hi, maxSize uint64) (entries []raftpb.Entry, err error) { it := s.db.NewIterator(&kv.Range{Start: s.getEntryKey(lo), Limit: s.getEntryKey(hi)}) defer it.Release() entries = make([]raftpb.Entry, 0) sizeSoFar := uint64(0) for ok := it.First(); ok; ok = it.Next() { var entry raftpb.Entry err = entry.Unmarshal(it.Value()) if err != nil { return } sizeSoFar += uint64(entry.Size()) // Only stop if we already have at least one entry if sizeSoFar > maxSize && len(entries) > 0 { break } entries = append(entries, entry) if sizeSoFar >= maxSize { break } } err = it.Error() return }
func (r *Replica) entries(e engine.Engine, lo, hi, maxBytes uint64) ([]raftpb.Entry, error) { if lo > hi { return nil, util.Errorf("lo:%d is greater than hi:%d", lo, hi) } // Scan over the log to find the requested entries in the range [lo, hi), // stopping once we have enough. var ents []raftpb.Entry size := uint64(0) var ent raftpb.Entry expectedIndex := lo exceededMaxBytes := false scanFunc := func(kv roachpb.KeyValue) (bool, error) { if err := kv.Value.GetProto(&ent); err != nil { return false, err } // Exit early if we have any gaps or it has been compacted. if ent.Index != expectedIndex { return true, nil } expectedIndex++ size += uint64(ent.Size()) ents = append(ents, ent) exceededMaxBytes = maxBytes > 0 && size > maxBytes return exceededMaxBytes, nil } rangeID := r.RangeID _, err := engine.MVCCIterate(e, keys.RaftLogKey(rangeID, lo), keys.RaftLogKey(rangeID, hi), roachpb.ZeroTimestamp, true /* consistent */, nil /* txn */, false /* !reverse */, scanFunc) if err != nil { return nil, err } // Did the correct number of results come back? If so, we're all good. if len(ents) == int(hi)-int(lo) { return ents, nil } // Did we hit the size limit? If so, return what we have. if exceededMaxBytes { return ents, nil } // Did we get any results at all? Because something went wrong. if len(ents) > 0 { // Was the lo already truncated? if ents[0].Index > lo { return nil, raft.ErrCompacted } // Was the missing index after the last index? lastIndex, err := r.LastIndex() if err != nil { return nil, err } if lastIndex <= expectedIndex { return nil, raft.ErrUnavailable } // We have a gap in the record, if so, return a nasty error. return nil, util.Errorf("there is a gap in the index record between lo:%d and hi:%d at index:%d", lo, hi, expectedIndex) } // No results, was it due to unavailability or truncation? ts, err := r.raftTruncatedStateLocked() if err != nil { return nil, err } if ts.Index >= lo { // The requested lo index has already been truncated. return nil, raft.ErrCompacted } // The requested lo index does not yet exist. return nil, raft.ErrUnavailable }
func entries( ctx context.Context, e engine.Reader, rangeID roachpb.RangeID, eCache *raftEntryCache, lo, hi, maxBytes uint64, ) ([]raftpb.Entry, error) { if lo > hi { return nil, errors.Errorf("lo:%d is greater than hi:%d", lo, hi) } // Scan over the log to find the requested entries in the range [lo, hi), // stopping once we have enough. ents := make([]raftpb.Entry, 0, hi-lo) size := uint64(0) hitEnts, hitSize, hitIndex := eCache.getEntries(rangeID, lo, hi, maxBytes) // Return results if the correct number of results came back or if // we ran into the max bytes limit. if uint64(len(hitEnts)) == hi-lo || (maxBytes > 0 && hitSize > maxBytes) { return hitEnts, nil } ents = append(ents, hitEnts...) size += hitSize expectedIndex := hitIndex var ent raftpb.Entry exceededMaxBytes := false scanFunc := func(kv roachpb.KeyValue) (bool, error) { if err := kv.Value.GetProto(&ent); err != nil { return false, err } // Exit early if we have any gaps or it has been compacted. if ent.Index != expectedIndex { return true, nil } expectedIndex++ size += uint64(ent.Size()) ents = append(ents, ent) exceededMaxBytes = maxBytes > 0 && size > maxBytes return exceededMaxBytes, nil } if err := iterateEntries(ctx, e, rangeID, expectedIndex, hi, scanFunc); err != nil { return nil, err } // Cache the fetched entries. eCache.addEntries(rangeID, ents) // Did the correct number of results come back? If so, we're all good. if uint64(len(ents)) == hi-lo { return ents, nil } // Did we hit the size limit? If so, return what we have. if exceededMaxBytes { return ents, nil } // Did we get any results at all? Because something went wrong. if len(ents) > 0 { // Was the lo already truncated? if ents[0].Index > lo { return nil, raft.ErrCompacted } // Was the missing index after the last index? lastIndex, err := loadLastIndex(ctx, e, rangeID) if err != nil { return nil, err } if lastIndex <= expectedIndex { return nil, raft.ErrUnavailable } // We have a gap in the record, if so, return a nasty error. return nil, errors.Errorf("there is a gap in the index record between lo:%d and hi:%d at index:%d", lo, hi, expectedIndex) } // No results, was it due to unavailability or truncation? ts, err := loadTruncatedState(ctx, e, rangeID) if err != nil { return nil, err } if ts.Index >= lo { // The requested lo index has already been truncated. return nil, raft.ErrCompacted } // The requested lo index does not yet exist. return nil, raft.ErrUnavailable }