Esempio n. 1
0
func (s *state) logRaftReady(readyGroups map[uint64]raft.Ready) {
	for groupID, ready := range readyGroups {
		if log.V(5) {
			log.Infof("node %v: group %v raft ready", s.nodeID, groupID)
			if ready.SoftState != nil {
				log.Infof("SoftState updated: %+v", *ready.SoftState)
			}
			if !raft.IsEmptyHardState(ready.HardState) {
				log.Infof("HardState updated: %+v", ready.HardState)
			}
			for i, e := range ready.Entries {
				log.Infof("New Entry[%d]: %.200s", i, raft.DescribeEntry(e, s.EntryFormatter))
			}
			for i, e := range ready.CommittedEntries {
				log.Infof("Committed Entry[%d]: %.200s", i, raft.DescribeEntry(e, s.EntryFormatter))
			}
			if !raft.IsEmptySnap(ready.Snapshot) {
				log.Infof("Snapshot updated: %.200s", ready.Snapshot.String())
			}
			for i, m := range ready.Messages {
				log.Infof("Outgoing Message[%d]: %.200s", i, raft.DescribeMessage(m, s.EntryFormatter))
			}
		}
	}
}
Esempio n. 2
0
// handleWriteReady converts a set of raft.Ready structs into a writeRequest
// to be persisted, marks the group as writing and sends it to the writeTask.
func (s *state) handleWriteReady(readyGroups map[uint64]raft.Ready) {
	if log.V(6) {
		log.Infof("node %v write ready, preparing request", s.nodeID)
	}
	writeRequest := newWriteRequest()
	for groupID, ready := range readyGroups {
		raftGroupID := proto.RaftID(groupID)
		g, ok := s.groups[raftGroupID]
		if !ok {
			if log.V(6) {
				log.Infof("dropping write request to group %d", groupID)
			}
			continue
		}
		g.writing = true

		gwr := &groupWriteRequest{}
		if !raft.IsEmptyHardState(ready.HardState) {
			gwr.state = ready.HardState
		}
		if !raft.IsEmptySnap(ready.Snapshot) {
			gwr.snapshot = ready.Snapshot
		}
		if len(ready.Entries) > 0 {
			gwr.entries = ready.Entries
		}
		writeRequest.groups[raftGroupID] = gwr
	}
	s.writeTask.in <- writeRequest
}
Esempio n. 3
0
func (n *Node) start() {
	tk := time.Tick(5 * time.Millisecond)
	for {
		select {
		case <-tk:
			n.Tick()
		case rd := <-n.Ready():
			if !raft.IsEmptyHardState(rd.HardState) {
				n.state = rd.HardState
				n.storage.SetHardState(n.state)
			}
			n.storage.Append(rd.Entries)
			n.send(rd.Messages)
			if !raft.IsEmptySnap(rd.Snapshot) {
				n.storage.ApplySnapshot(rd.Snapshot)
			}
			time.Sleep(time.Millisecond)
			for _, entry := range rd.CommittedEntries {
				n.process(entry)
				// if entry.Type == raftpb.EntryConfChange {
				// }
				// 	var cc raftpb.ConfChange
				// 	cc.Unmarshal(entry.Data)
				// 	n.ApplyConfChange(cc)
			}
			n.Advance()
		case m := <-n.receive():
			n.Step(context.TODO(), m)
		}
	}
}
Esempio n. 4
0
func logRaftReady(storeID roachpb.StoreID, groupID roachpb.RangeID, ready raft.Ready) {
	if log.V(5) {
		// Globally synchronize to avoid interleaving different sets of logs in tests.
		logRaftReadyMu.Lock()
		defer logRaftReadyMu.Unlock()
		log.Infof("store %s: group %s raft ready", storeID, groupID)
		if ready.SoftState != nil {
			log.Infof("SoftState updated: %+v", *ready.SoftState)
		}
		if !raft.IsEmptyHardState(ready.HardState) {
			log.Infof("HardState updated: %+v", ready.HardState)
		}
		for i, e := range ready.Entries {
			log.Infof("New Entry[%d]: %.200s", i, raft.DescribeEntry(e, raftEntryFormatter))
		}
		for i, e := range ready.CommittedEntries {
			log.Infof("Committed Entry[%d]: %.200s", i, raft.DescribeEntry(e, raftEntryFormatter))
		}
		if !raft.IsEmptySnap(ready.Snapshot) {
			log.Infof("Snapshot updated: %.200s", ready.Snapshot.String())
		}
		for i, m := range ready.Messages {
			log.Infof("Outgoing Message[%d]: %.200s", i, raft.DescribeMessage(m, raftEntryFormatter))
		}
	}
}
Esempio n. 5
0
func (n *node) start() {
	n.stopc = make(chan struct{})
	ticker := time.Tick(5 * time.Millisecond)

	go func() {
		for {
			select {
			case <-ticker:
				n.Tick()
			case rd := <-n.Ready():
				if !raft.IsEmptyHardState(rd.HardState) {
					n.state = rd.HardState
					n.storage.SetHardState(n.state)
				}
				n.storage.Append(rd.Entries)
				go func() {
					for _, m := range rd.Messages {
						n.iface.send(m)
					}
				}()
				n.Advance()
			case m := <-n.iface.recv():
				n.Step(context.TODO(), m)
			case <-n.stopc:
				n.Stop()
				log.Printf("raft.%d: stop", n.id)
				n.Node = nil
				close(n.stopc)
				return
			}
		}
	}()
}
Esempio n. 6
0
func updateHardState(eng engine.ReadWriter, s storagebase.ReplicaState) error {
	// Load a potentially existing HardState as we may need to preserve
	// information about cast votes. For example, during a Split for which
	// another node's new right-hand side has contacted us before our left-hand
	// side called in here to create the group.
	rangeID := s.Desc.RangeID
	oldHS, err := loadHardState(eng, rangeID)
	if err != nil {
		return err
	}

	newHS := raftpb.HardState{
		Term:   s.TruncatedState.Term,
		Commit: s.RaftAppliedIndex,
	}

	if !raft.IsEmptyHardState(oldHS) {
		if oldHS.Commit > newHS.Commit {
			newHS.Commit = oldHS.Commit
		}
		if oldHS.Term > newHS.Term {
			newHS.Term = oldHS.Term
		}
		newHS.Vote = oldHS.Vote
	}

	return setHardState(eng, rangeID, newHS)
}
Esempio n. 7
0
// Don't call this multiple times concurrently
func (s *raftStorage) save(state raftpb.HardState, entries []raftpb.Entry) error {
	wb := s.db.NewBatch()
	if !raft.IsEmptyHardState(state) {
		stateBytes, err := state.Marshal()
		if err != nil {
			return err
		}
		wb.Put(s.hardStateKey, stateBytes)
	}
	if len(entries) > 0 {
		lastIndex, err := s.LastIndex()
		if err != nil {
			return err
		}
		if entries[0].Index > lastIndex+1 {
			panic(fmt.Errorf("missing log entries [last: %d, append at: %d]", lastIndex, entries[0].Index))
		}
		// clear all old entries past the new index, if any
		for ix := entries[0].Index; ix <= lastIndex; ix++ {
			wb.Delete(s.getEntryKey(ix))
		}
		// append the new entries
		for _, entry := range entries {
			entryBytes, err := entry.Marshal()
			if err != nil {
				return err
			}
			wb.Put(s.getEntryKey(entry.Index), entryBytes)
		}
	}
	err := s.db.Write(wb)
	return err
}
Esempio n. 8
0
// HardState contains term, vote and commit.
// Snapshot contains data and snapshot metadata.
func (n *node) saveToStorage(hardState raftpb.HardState,
	entries []raftpb.Entry, snapshot raftpb.Snapshot) {

	if !raft.IsEmptySnap(snapshot) {
		fmt.Printf("saveToStorage snapshot: %v\n", snapshot.String())
		le, err := n.store.LastIndex()
		if err != nil {
			log.Fatalf("While retrieving last index: %v\n", err)
		}
		te, err := n.store.Term(le)
		if err != nil {
			log.Fatalf("While retrieving term: %v\n", err)
		}
		fmt.Printf("%d node Term for le: %v is %v\n", n.id, le, te)
		if snapshot.Metadata.Index <= le {
			fmt.Printf("%d node ignoring snapshot. Last index: %v\n", n.id, le)
			return
		}

		if err := n.store.ApplySnapshot(snapshot); err != nil {
			log.Fatalf("Applying snapshot: %v", err)
		}
	}

	if !raft.IsEmptyHardState(hardState) {
		n.store.SetHardState(hardState)
	}
	n.store.Append(entries)
}
Esempio n. 9
0
func logRaftReady(ctx context.Context, prefix fmt.Stringer, ready raft.Ready) {
	if log.V(5) {
		var buf bytes.Buffer
		if ready.SoftState != nil {
			fmt.Fprintf(&buf, "  SoftState updated: %+v\n", *ready.SoftState)
		}
		if !raft.IsEmptyHardState(ready.HardState) {
			fmt.Fprintf(&buf, "  HardState updated: %+v\n", ready.HardState)
		}
		for i, e := range ready.Entries {
			fmt.Fprintf(&buf, "  New Entry[%d]: %.200s\n",
				i, raft.DescribeEntry(e, raftEntryFormatter))
		}
		for i, e := range ready.CommittedEntries {
			fmt.Fprintf(&buf, "  Committed Entry[%d]: %.200s\n",
				i, raft.DescribeEntry(e, raftEntryFormatter))
		}
		if !raft.IsEmptySnap(ready.Snapshot) {
			fmt.Fprintf(&buf, "  Snapshot updated: %.200s\n", ready.Snapshot.String())
		}
		for i, m := range ready.Messages {
			fmt.Fprintf(&buf, "  Outgoing Message[%d]: %.200s\n",
				i, raft.DescribeMessage(m, raftEntryFormatter))
		}
		log.Infof(ctx, "%s raft ready\n%s", prefix, buf.String())
	}
}
Esempio n. 10
0
File: wal.go Progetto: rnd-ua/scope
func (w *WAL) Save(st raftpb.HardState, ents []raftpb.Entry) error {
	w.mu.Lock()
	defer w.mu.Unlock()

	// short cut, do not call sync
	if raft.IsEmptyHardState(st) && len(ents) == 0 {
		return nil
	}

	// TODO(xiangli): no more reference operator
	for i := range ents {
		if err := w.saveEntry(&ents[i]); err != nil {
			return err
		}
	}
	if err := w.saveState(&st); err != nil {
		return err
	}

	fstat, err := w.f.Stat()
	if err != nil {
		return err
	}
	if fstat.Size() < segmentSizeBytes {
		return w.sync()
	}
	// TODO: add a test for this code path when refactoring the tests
	return w.cut()
}
Esempio n. 11
0
func (w *WAL) Save(st raftpb.HardState, ents []raftpb.Entry) error {
	w.mu.Lock()
	defer w.mu.Unlock()

	// short cut, do not call sync
	if raft.IsEmptyHardState(st) && len(ents) == 0 {
		return nil
	}

	mustSync := mustSync(st, w.state, len(ents))

	// TODO(xiangli): no more reference operator
	for i := range ents {
		if err := w.saveEntry(&ents[i]); err != nil {
			return err
		}
	}
	if err := w.saveState(&st); err != nil {
		return err
	}

	curOff, err := w.tail().Seek(0, os.SEEK_CUR)
	if err != nil {
		return err
	}
	if curOff < SegmentSizeBytes {
		if mustSync {
			return w.sync()
		}
		return nil
	}

	// TODO: add a test for this code path when refactoring the tests
	return w.cut()
}
Esempio n. 12
0
File: wal.go Progetto: dterei/etcd
func (w *WAL) SaveState(s *raftpb.HardState) error {
	if raft.IsEmptyHardState(*s) {
		return nil
	}
	b := pbutil.MustMarshal(s)
	rec := &walpb.Record{Type: stateType, Data: b}
	return w.encoder.encode(rec)
}
Esempio n. 13
0
func (s *state) handleRaftReady(readyGroups map[uint64]raft.Ready) {
	// Soft state is updated immediately; everything else waits for handleWriteReady.
	for groupID, ready := range readyGroups {
		if log.V(5) {
			log.Infof("node %v: group %v raft ready", s.nodeID, groupID)
			if ready.SoftState != nil {
				log.Infof("SoftState updated: %+v", *ready.SoftState)
			}
			if !raft.IsEmptyHardState(ready.HardState) {
				log.Infof("HardState updated: %+v", ready.HardState)
			}
			for i, e := range ready.Entries {
				log.Infof("New Entry[%d]: %.200s", i, raft.DescribeEntry(e, s.EntryFormatter))
			}
			for i, e := range ready.CommittedEntries {
				log.Infof("Committed Entry[%d]: %.200s", i, raft.DescribeEntry(e, s.EntryFormatter))
			}
			if !raft.IsEmptySnap(ready.Snapshot) {
				log.Infof("Snapshot updated: %.200s", ready.Snapshot.String())
			}
			for i, m := range ready.Messages {
				log.Infof("Outgoing Message[%d]: %.200s", i, raft.DescribeMessage(m, s.EntryFormatter))
			}
		}

		g, ok := s.groups[groupID]
		if !ok {
			// This is a stale message for a removed group
			log.V(4).Infof("node %v: dropping stale ready message for group %v", s.nodeID, groupID)
			continue
		}
		term := g.committedTerm
		if ready.SoftState != nil {
			// Always save the leader whenever we get a SoftState.
			g.leader = NodeID(ready.SoftState.Lead)
		}
		if len(ready.CommittedEntries) > 0 {
			term = ready.CommittedEntries[len(ready.CommittedEntries)-1].Term
		}
		if term != g.committedTerm && g.leader != 0 {
			// Whenever the committed term has advanced and we know our leader,
			// emit an event.
			g.committedTerm = term
			s.sendEvent(&EventLeaderElection{
				GroupID: groupID,
				NodeID:  NodeID(g.leader),
				Term:    g.committedTerm,
			})

			// Re-submit all pending proposals
			for _, prop := range g.pending {
				s.proposalChan <- prop
			}
		}
	}
}
Esempio n. 14
0
// writeInitialState bootstraps a new Raft group (i.e. it is called when we
// bootstrap a Range, or when setting up the right hand side of a split).
// Its main task is to persist a consistent Raft (and associated Replica) state
// which does not start from zero but presupposes a few entries already having
// applied.
// The supplied MVCCStats are used for the Stats field after adjusting for
// persisting the state itself, and the updated stats are returned.
func writeInitialState(
	eng engine.ReadWriter, ms enginepb.MVCCStats, desc roachpb.RangeDescriptor,
) (enginepb.MVCCStats, error) {
	rangeID := desc.RangeID
	var s storagebase.ReplicaState

	s.TruncatedState = &roachpb.RaftTruncatedState{
		Term:  raftInitialLogTerm,
		Index: raftInitialLogIndex,
	}
	s.RaftAppliedIndex = s.TruncatedState.Index
	s.Desc = &roachpb.RangeDescriptor{
		RangeID: rangeID,
	}
	s.Stats = ms

	newMS, err := saveState(eng, s)
	if err != nil {
		return enginepb.MVCCStats{}, err
	}

	// Load a potentially existing HardState as we may need to preserve
	// information about cast votes. For example, during a Split for which
	// another node's new right-hand side has contacted us before our left-hand
	// side called in here to create the group.
	oldHS, err := loadHardState(eng, rangeID)
	if err != nil {
		return enginepb.MVCCStats{}, err
	}

	newHS := raftpb.HardState{
		Term:   s.TruncatedState.Term,
		Commit: s.TruncatedState.Index,
	}

	if !raft.IsEmptyHardState(oldHS) {
		if oldHS.Commit > newHS.Commit {
			newHS.Commit = oldHS.Commit
		}
		if oldHS.Term > newHS.Term {
			newHS.Term = oldHS.Term
		}
		newHS.Vote = oldHS.Vote
	}

	if err := setHardState(eng, rangeID, newHS); err != nil {
		return enginepb.MVCCStats{}, err
	}

	if err := setLastIndex(eng, rangeID, s.TruncatedState.Index); err != nil {
		return enginepb.MVCCStats{}, err
	}

	return newMS, nil
}
Esempio n. 15
0
// Saves a log entry to our Store
func (n *Node) saveToStorage(hardState raftpb.HardState, entries []raftpb.Entry, snapshot raftpb.Snapshot) {
	n.Store.Append(entries)

	if !raft.IsEmptyHardState(hardState) {
		n.Store.SetHardState(hardState)
	}

	if !raft.IsEmptySnap(snapshot) {
		n.Store.ApplySnapshot(snapshot)
	}
}
Esempio n. 16
0
func (w *WAL) SaveState(s *raftpb.HardState) error {
	if raft.IsEmptyHardState(*s) {
		return nil
	}
	b, err := s.Marshal()
	if err != nil {
		panic(err)
	}
	rec := &walpb.Record{Type: stateType, Data: b}
	return w.encoder.encode(rec)
}
Esempio n. 17
0
func (w *WAL) SaveState(s *raftpb.HardState) error {
	if raft.IsEmptyHardState(*s) {
		return nil
	}
	log.Printf("path=%s wal.saveState state=\"%+v\"", w.f.Name(), s)
	b, err := s.Marshal()
	if err != nil {
		panic(err)
	}
	rec := &walpb.Record{Type: stateType, Data: b}
	return w.encoder.encode(rec)
}
Esempio n. 18
0
// start runs the storage loop in a goroutine.
func (w *writeTask) start(stopper *stop.Stopper) {
	stopper.RunWorker(func() {
		for {
			var request *writeRequest
			select {
			case <-w.ready:
				continue
			case <-stopper.ShouldStop():
				return
			case request = <-w.in:
			}
			if log.V(6) {
				log.Infof("writeTask got request %#v", *request)
			}
			response := &writeResponse{make(map[roachpb.RangeID]*groupWriteResponse)}

			for groupID, groupReq := range request.groups {
				group, err := w.storage.GroupStorage(groupID, groupReq.replicaID)
				if err == ErrGroupDeleted {
					if log.V(4) {
						log.Infof("dropping write to deleted group %v", groupID)
					}
					continue
				} else if err != nil {
					log.Fatalf("GroupStorage(group %s, replica %s) failed: %s", groupID,
						groupReq.replicaID, err)
				}
				groupResp := &groupWriteResponse{raftpb.HardState{}, -1, -1, groupReq.entries}
				response.groups[groupID] = groupResp
				if !raft.IsEmptyHardState(groupReq.state) {
					err := group.SetHardState(groupReq.state)
					if err != nil {
						panic(err) // TODO(bdarnell): mark this node dead on storage errors
					}
					groupResp.state = groupReq.state
				}
				if !raft.IsEmptySnap(groupReq.snapshot) {
					err := group.ApplySnapshot(groupReq.snapshot)
					if err != nil {
						panic(err) // TODO(bdarnell)
					}
				}
				if len(groupReq.entries) > 0 {
					err := group.Append(groupReq.entries)
					if err != nil {
						panic(err) // TODO(bdarnell)
					}
				}
			}
			w.out <- response
		}
	})
}
Esempio n. 19
0
// InitialState implements the raft.Storage interface.
// InitialState requires that the replica lock be held.
func (r *Replica) InitialState() (raftpb.HardState, raftpb.ConfState, error) {
	hs, err := loadHardState(context.Background(), r.store.Engine(), r.RangeID)
	// For uninitialized ranges, membership is unknown at this point.
	if raft.IsEmptyHardState(hs) || err != nil {
		return raftpb.HardState{}, raftpb.ConfState{}, err
	}
	var cs raftpb.ConfState
	for _, rep := range r.mu.state.Desc.Replicas {
		cs.Nodes = append(cs.Nodes, uint64(rep.ReplicaID))
	}

	return hs, cs, nil
}
Esempio n. 20
0
// handleWriteReady converts a set of raft.Ready structs into a writeRequest
// to be persisted, marks the group as writing and sends it to the writeTask.
// It will only do this for groups which are tagged via the map.
func (s *state) handleWriteReady(checkReadyGroupIDs map[roachpb.RangeID]struct{}) map[roachpb.RangeID]raft.Ready {
	if log.V(6) {
		log.Infof("node %v write ready, preparing request", s.nodeID)
	}
	s.lockStorage()
	defer s.unlockStorage()
	writeRequest := newWriteRequest()
	readys := make(map[roachpb.RangeID]raft.Ready)
	for groupID := range checkReadyGroupIDs {
		g, ok := s.groups[groupID]
		if !ok {
			if log.V(6) {
				log.Infof("dropping write request to group %d", groupID)
			}
			continue
		}
		if !g.raftGroup.HasReady() {
			continue
		}
		ready := g.raftGroup.Ready()
		readys[groupID] = ready
		g.writing = true

		gwr := &groupWriteRequest{}
		var err error
		gwr.replicaID, err = s.Storage().ReplicaIDForStore(groupID, s.storeID)
		if err != nil {
			if log.V(1) {
				log.Warningf("failed to look up replica ID for range %v (disabling replica ID check): %s",
					groupID, err)
			}
			gwr.replicaID = 0
		}
		if !raft.IsEmptyHardState(ready.HardState) {
			gwr.state = ready.HardState
		}
		if !raft.IsEmptySnap(ready.Snapshot) {
			gwr.snapshot = ready.Snapshot
		}
		if len(ready.Entries) > 0 {
			gwr.entries = ready.Entries
		}
		writeRequest.groups[groupID] = gwr
	}
	// If no ready, don't write to writeTask as caller will
	// not wait on s.writeTask.out when len(readys) == 0.
	if len(readys) > 0 {
		s.writeTask.in <- writeRequest
	}
	return readys
}
Esempio n. 21
0
func (n *node) start() {
	n.stopc = make(chan struct{})
	ticker := time.Tick(5 * time.Millisecond)

	go func() {
		for {
			select {
			case <-ticker:
				n.Tick()
			case rd := <-n.Ready():
				if !raft.IsEmptyHardState(rd.HardState) {
					n.mu.Lock()
					n.state = rd.HardState
					n.mu.Unlock()
					n.storage.SetHardState(n.state)
				}
				n.storage.Append(rd.Entries)
				time.Sleep(time.Millisecond)
				// TODO: make send async, more like real world...
				for _, m := range rd.Messages {
					n.iface.send(m)
				}
				n.Advance()
			case m := <-n.iface.recv():
				go n.Step(context.TODO(), m)
			case <-n.stopc:
				n.Stop()
				log.Printf("raft.%d: stop", n.id)
				n.Node = nil
				close(n.stopc)
				return
			case p := <-n.pausec:
				recvms := make([]raftpb.Message, 0)
				for p {
					select {
					case m := <-n.iface.recv():
						recvms = append(recvms, m)
					case p = <-n.pausec:
					}
				}
				// step all pending messages
				for _, m := range recvms {
					n.Step(context.TODO(), m)
				}
			}
		}
	}()
}
Esempio n. 22
0
// Store stores the snapshot, hardstate and entries for a given RAFT group.
func (w *Wal) Store(gid uint32, s raftpb.Snapshot, h raftpb.HardState, es []raftpb.Entry) error {
	b := w.wals.NewWriteBatch()
	defer b.Destroy()

	if !raft.IsEmptySnap(s) {
		data, err := s.Marshal()
		if err != nil {
			return x.Wrapf(err, "wal.Store: While marshal snapshot")
		}
		b.Put(w.snapshotKey(gid), data)
	}

	if !raft.IsEmptyHardState(h) {
		data, err := h.Marshal()
		if err != nil {
			return x.Wrapf(err, "wal.Store: While marshal hardstate")
		}
		b.Put(w.hardStateKey(gid), data)
	}

	var t, i uint64
	for _, e := range es {
		t, i = e.Term, e.Index
		data, err := e.Marshal()
		if err != nil {
			return x.Wrapf(err, "wal.Store: While marshal entry")
		}
		k := w.entryKey(gid, e.Term, e.Index)
		b.Put(k, data)
	}

	// If we get no entries, then the default value of t and i would be zero. That would
	// end up deleting all the previous valid raft entry logs. This check avoids that.
	if t > 0 || i > 0 {
		// Delete all keys above this index.
		start := w.entryKey(gid, t, i+1)
		prefix := w.prefix(gid)
		itr := w.wals.NewIterator()
		defer itr.Close()

		for itr.Seek(start); itr.ValidForPrefix(prefix); itr.Next() {
			b.Delete(itr.Key().Data())
		}
	}

	err := w.wals.WriteBatch(b)
	return x.Wrapf(err, "wal.Store: While WriteBatch")
}
Esempio n. 23
0
// start runs the storage loop. Blocks until stopped, so should be run in a goroutine.
func (w *writeTask) start() {
	for {
		var request *writeRequest
		select {
		case <-w.ready:
			continue
		case <-w.stopper.ShouldStop():
			w.stopper.SetStopped()
			return
		case request = <-w.in:
		}
		log.V(6).Infof("writeTask got request %#v", *request)
		response := &writeResponse{make(map[uint64]*groupWriteResponse)}

		for groupID, groupReq := range request.groups {
			group := w.storage.GroupStorage(groupID)
			if group == nil {
				log.V(4).Infof("dropping write to group %v", groupID)
				continue
			}
			groupResp := &groupWriteResponse{raftpb.HardState{}, -1, -1, groupReq.entries}
			response.groups[groupID] = groupResp
			if !raft.IsEmptyHardState(groupReq.state) {
				err := group.SetHardState(groupReq.state)
				if err != nil {
					panic(err) // TODO(bdarnell): mark this node dead on storage errors
				}
				groupResp.state = groupReq.state
			}
			if !raft.IsEmptySnap(groupReq.snapshot) {
				err := group.ApplySnapshot(groupReq.snapshot)
				if err != nil {
					panic(err) // TODO(bdarnell)
				}
			}
			if len(groupReq.entries) > 0 {
				err := group.Append(groupReq.entries)
				if err != nil {
					panic(err) // TODO(bdarnell)
				}
			}
		}
		w.out <- response
	}
}
Esempio n. 24
0
func (s *state) handleWriteReady(readyGroups map[uint64]raft.Ready) {
	log.V(6).Infof("node %v write ready, preparing request", s.nodeID)
	writeRequest := newWriteRequest()
	for groupID, ready := range readyGroups {
		gwr := &groupWriteRequest{}
		if !raft.IsEmptyHardState(ready.HardState) {
			gwr.state = ready.HardState
		}
		if !raft.IsEmptySnap(ready.Snapshot) {
			gwr.snapshot = ready.Snapshot
		}
		if len(ready.Entries) > 0 {
			gwr.entries = ready.Entries
		}
		writeRequest.groups[groupID] = gwr
	}
	s.writeTask.in <- writeRequest
}
Esempio n. 25
0
func (n *node) initFromWal(wal *raftwal.Wal) (restart bool, rerr error) {
	n.wal = wal

	var sp raftpb.Snapshot
	sp, rerr = wal.Snapshot(n.gid)
	if rerr != nil {
		return
	}
	var term, idx uint64
	if !raft.IsEmptySnap(sp) {
		fmt.Printf("Found Snapshot: %+v\n", sp)
		restart = true
		if rerr = n.store.ApplySnapshot(sp); rerr != nil {
			return
		}
		term = sp.Metadata.Term
		idx = sp.Metadata.Index
	}

	var hd raftpb.HardState
	hd, rerr = wal.HardState(n.gid)
	if rerr != nil {
		return
	}
	if !raft.IsEmptyHardState(hd) {
		fmt.Printf("Found hardstate: %+v\n", sp)
		restart = true
		if rerr = n.store.SetHardState(hd); rerr != nil {
			return
		}
	}

	var es []raftpb.Entry
	es, rerr = wal.Entries(n.gid, term, idx)
	if rerr != nil {
		return
	}
	fmt.Printf("Found %d entries\n", len(es))
	if len(es) > 0 {
		restart = true
	}
	rerr = n.store.Append(es)
	return
}
Esempio n. 26
0
func (c *ctrl) readySave(snapshot raftpb.Snapshot, hardState raftpb.HardState, entries []raftpb.Entry) error {
	// For the moment, none of these steps persist to disk. That violates some Raft
	// invariants. But we are ephemeral, and will always boot empty, willingly
	// paying the snapshot cost. I trust that that the etcd Raft implementation
	// permits this.
	if !raft.IsEmptySnap(snapshot) {
		if err := c.storage.ApplySnapshot(snapshot); err != nil {
			return fmt.Errorf("apply snapshot: %v", err)
		}
	}
	if !raft.IsEmptyHardState(hardState) {
		if err := c.storage.SetHardState(hardState); err != nil {
			return fmt.Errorf("set hard state: %v", err)
		}
	}
	if err := c.storage.Append(entries); err != nil {
		return fmt.Errorf("append: %v", err)
	}
	return nil
}
Esempio n. 27
0
func save(rd raft.Ready, st *raft.MemoryStorage) error {
	if !raft.IsEmptyHardState(rd.HardState) {
		if err := st.SetHardState(rd.HardState); err != nil {
			return err
		}
	}

	if len(rd.Entries) > 0 {
		if err := st.Append(rd.Entries); err != nil {
			return err
		}
	}

	if !raft.IsEmptySnap(rd.Snapshot) {
		if err := st.ApplySnapshot(rd.Snapshot); err != nil {
			return err
		}
	}
	return nil
}
Esempio n. 28
0
// handleWriteReady converts a set of raft.Ready structs into a writeRequest
// to be persisted, marks the group as writing and sends it to the writeTask.
func (s *state) handleWriteReady() {
	if log.V(6) {
		log.Infof("node %v write ready, preparing request", s.nodeID)
	}
	s.lockStorage()
	defer s.unlockStorage()
	writeRequest := newWriteRequest()
	for groupID, ready := range s.readyGroups {
		raftGroupID := roachpb.RangeID(groupID)
		g, ok := s.groups[raftGroupID]
		if !ok {
			if log.V(6) {
				log.Infof("dropping write request to group %d", groupID)
			}
			continue
		}
		g.writing = true

		gwr := &groupWriteRequest{}
		var err error
		gwr.replicaID, err = s.Storage().ReplicaIDForStore(roachpb.RangeID(groupID), s.storeID)
		if err != nil {
			if log.V(1) {
				log.Warningf("failed to look up replica ID for range %v (disabling replica ID check): %s",
					groupID, err)
			}
			gwr.replicaID = 0
		}
		if !raft.IsEmptyHardState(ready.HardState) {
			gwr.state = ready.HardState
		}
		if !raft.IsEmptySnap(ready.Snapshot) {
			gwr.snapshot = ready.Snapshot
		}
		if len(ready.Entries) > 0 {
			gwr.entries = ready.Entries
		}
		writeRequest.groups[raftGroupID] = gwr
	}
	s.writeTask.in <- writeRequest
}
Esempio n. 29
0
func (n *node) saveToStorage(s raftpb.Snapshot, h raftpb.HardState,
	es []raftpb.Entry) {
	if !raft.IsEmptySnap(s) {
		le, err := n.store.LastIndex()
		if err != nil {
			log.Fatalf("While retrieving last index: %v\n", err)
		}
		if s.Metadata.Index <= le {
			return
		}

		if err := n.store.ApplySnapshot(s); err != nil {
			log.Fatalf("Applying snapshot: %v", err)
		}
	}

	if !raft.IsEmptyHardState(h) {
		n.store.SetHardState(h)
	}
	n.store.Append(es)
}
Esempio n. 30
0
// start prepares and starts raftNode in a new goroutine. It is no longer safe
// to modify the fields after it has been started.
// TODO: Ideally raftNode should get rid of the passed in server structure.
func (r *raftNode) start(s *EtcdServer) {
	r.s = s
	r.applyc = make(chan apply)
	r.stopped = make(chan struct{})
	r.done = make(chan struct{})

	heartbeat := 200 * time.Millisecond
	if s.Cfg != nil {
		heartbeat = time.Duration(s.Cfg.TickMs) * time.Millisecond
	}
	// set up contention detectors for raft heartbeat message.
	// expect to send a heartbeat within 2 heartbeat intervals.
	r.td = contention.NewTimeoutDetector(2 * heartbeat)

	go func() {
		var syncC <-chan time.Time

		defer r.onStop()
		islead := false

		for {
			select {
			case <-r.ticker:
				r.Tick()
			case rd := <-r.Ready():
				if rd.SoftState != nil {
					if lead := atomic.LoadUint64(&r.lead); rd.SoftState.Lead != raft.None && lead != rd.SoftState.Lead {
						r.mu.Lock()
						r.lt = time.Now()
						r.mu.Unlock()
						leaderChanges.Inc()
					}

					if rd.SoftState.Lead == raft.None {
						hasLeader.Set(0)
					} else {
						hasLeader.Set(1)
					}

					atomic.StoreUint64(&r.lead, rd.SoftState.Lead)
					if rd.RaftState == raft.StateLeader {
						islead = true
						// TODO: raft should send server a notification through chan when
						// it promotes or demotes instead of modifying server directly.
						syncC = r.s.SyncTicker
						if r.s.lessor != nil {
							r.s.lessor.Promote(r.s.Cfg.electionTimeout())
						}
						// TODO: remove the nil checking
						// current test utility does not provide the stats
						if r.s.stats != nil {
							r.s.stats.BecomeLeader()
						}
						if r.s.compactor != nil {
							r.s.compactor.Resume()
						}
						r.td.Reset()
					} else {
						islead = false
						if r.s.lessor != nil {
							r.s.lessor.Demote()
						}
						if r.s.compactor != nil {
							r.s.compactor.Pause()
						}
						syncC = nil
					}
				}

				raftDone := make(chan struct{}, 1)
				ap := apply{
					entries:  rd.CommittedEntries,
					snapshot: rd.Snapshot,
					raftDone: raftDone,
				}

				select {
				case r.applyc <- ap:
				case <-r.stopped:
					return
				}

				// the leader can write to its disk in parallel with replicating to the followers and them
				// writing to their disks.
				// For more details, check raft thesis 10.2.1
				if islead {
					r.s.send(rd.Messages)
				}

				if !raft.IsEmptySnap(rd.Snapshot) {
					if err := r.storage.SaveSnap(rd.Snapshot); err != nil {
						plog.Fatalf("raft save snapshot error: %v", err)
					}
					r.raftStorage.ApplySnapshot(rd.Snapshot)
					plog.Infof("raft applied incoming snapshot at index %d", rd.Snapshot.Metadata.Index)
				}
				if err := r.storage.Save(rd.HardState, rd.Entries); err != nil {
					plog.Fatalf("raft save state and entries error: %v", err)
				}
				if !raft.IsEmptyHardState(rd.HardState) {
					proposalsCommitted.Set(float64(rd.HardState.Commit))
				}

				r.raftStorage.Append(rd.Entries)

				if !islead {
					r.s.send(rd.Messages)
				}
				raftDone <- struct{}{}
				r.Advance()
			case <-syncC:
				r.s.sync(r.s.Cfg.ReqTimeout())
			case <-r.stopped:
				return
			}
		}
	}()
}