Ejemplo n.º 1
0
// startRaft starts a raft node from the given wal dir.
// If the wal dir does not exist, startRaft will start a new raft node.
// If the wal dir exists, startRaft will restart the previous raft node.
// startRaft returns the started raft node and the opened wal.
func startRaft(id int64, peerIDs []int64, waldir string) (raft.Node, *wal.WAL) {
	if !wal.Exist(waldir) {
		w, err := wal.Create(waldir)
		if err != nil {
			log.Fatal(err)
		}
		n := raft.Start(id, peerIDs, 10, 1)
		return n, w
	}

	// restart a node from previous wal
	// TODO(xiangli): check snapshot; not open from one
	w, err := wal.OpenAtIndex(waldir, 1)
	if err != nil {
		log.Fatal(err)
	}
	wid, st, ents, err := w.ReadAll()
	// TODO(xiangli): save/recovery nodeID?
	if wid != 0 {
		log.Fatalf("unexpected nodeid %d: nodeid should always be zero until we save nodeid into wal", wid)
	}
	if err != nil {
		log.Fatal(err)
	}
	n := raft.Restart(id, peerIDs, 10, 1, st, ents)
	return n, w
}
Ejemplo n.º 2
0
func readWAL(waldir string, index uint64) (w *wal.WAL, id, cid types.ID, st raftpb.HardState, ents []raftpb.Entry) {
	var err error
	if w, err = wal.OpenAtIndex(waldir, index); err != nil {
		log.Fatalf("etcdserver: open wal error: %v", err)
	}
	var wmetadata []byte
	if wmetadata, st, ents, err = w.ReadAll(); err != nil {
		log.Fatalf("etcdserver: read wal error: %v", err)
	}
	var metadata pb.Metadata
	pbutil.MustUnmarshal(&metadata, wmetadata)
	id = types.ID(metadata.NodeID)
	cid = types.ID(metadata.ClusterID)
	return
}
Ejemplo n.º 3
0
func dump5(dataDir string) ([]raftpb.Entry, error) {
	wd5 := walDir5(dataDir)
	if !wal.Exist(wd5) {
		return nil, fmt.Errorf("No wal exists at %s", wd5)
	}

	w, err := wal.OpenAtIndex(wd5, 0)
	if err != nil {
		return nil, err
	}
	defer w.Close()

	_, _, ents, err := w.ReadAll()
	return ents, err
}
Ejemplo n.º 4
0
// handleBackup handles a request that intends to do a backup.
func handleBackup(c *cli.Context) {
	srcSnap := path.Join(c.String("data-dir"), "snap")
	destSnap := path.Join(c.String("backup-dir"), "snap")
	srcWAL := path.Join(c.String("data-dir"), "wal")
	destWAL := path.Join(c.String("backup-dir"), "wal")

	if err := os.MkdirAll(destSnap, 0700); err != nil {
		log.Fatalf("failed creating backup snapshot dir %v: %v", destSnap, err)
	}
	ss := snap.New(srcSnap)
	snapshot, err := ss.Load()
	if err != nil && err != snap.ErrNoSnapshot {
		log.Fatal(err)
	}
	var index uint64
	if snapshot != nil {
		index = snapshot.Metadata.Index
		newss := snap.New(destSnap)
		if err := newss.SaveSnap(*snapshot); err != nil {
			log.Fatal(err)
		}
	}

	w, err := wal.OpenAtIndex(srcWAL, index)
	if err != nil {
		log.Fatal(err)
	}
	defer w.Close()
	wmetadata, state, ents, err := w.ReadAll()
	if err != nil {
		log.Fatal(err)
	}
	var metadata etcdserverpb.Metadata
	pbutil.MustUnmarshal(&metadata, wmetadata)
	rand.Seed(time.Now().UnixNano())
	metadata.NodeID = etcdserver.GenID()
	metadata.ClusterID = etcdserver.GenID()

	neww, err := wal.Create(destWAL, pbutil.MustMarshal(&metadata))
	if err != nil {
		log.Fatal(err)
	}
	defer neww.Close()
	if err := neww.Save(state, ents); err != nil {
		log.Fatal(err)
	}
}
Ejemplo n.º 5
0
func restartNode(cfg *ServerConfig, index uint64, snapshot *raftpb.Snapshot) (id uint64, n raft.Node, w *wal.WAL) {
	var err error
	// restart a node from previous wal
	if w, err = wal.OpenAtIndex(cfg.WALDir(), index); err != nil {
		log.Fatal(err)
	}
	wmetadata, st, ents, err := w.ReadAll()
	if err != nil {
		log.Fatal(err)
	}

	var metadata pb.Metadata
	pbutil.MustUnmarshal(&metadata, wmetadata)
	id = metadata.NodeID
	cfg.Cluster.SetID(metadata.ClusterID)
	log.Printf("etcdserver: restart member %x in cluster %x at commit index %d", id, cfg.Cluster.ID(), st.Commit)
	n = raft.RestartNode(id, 10, 1, snapshot, st, ents)
	return
}
Ejemplo n.º 6
0
// NewServer creates a new EtcdServer from the supplied configuration. The
// configuration is considered static for the lifetime of the EtcdServer.
func NewServer(cfg *ServerConfig) *EtcdServer {
	m := cfg.Cluster.FindName(cfg.Name)
	if m == nil {
		// Should never happen
		log.Fatalf("could not find name %v in cluster!", cfg.Name)
	}
	snapdir := path.Join(cfg.DataDir, "snap")
	if err := os.MkdirAll(snapdir, privateDirMode); err != nil {
		log.Fatalf("etcdserver: cannot create snapshot directory: %v", err)
	}
	ss := snap.New(snapdir)
	st := store.New()
	var w *wal.WAL
	var n raft.Node
	var err error
	waldir := path.Join(cfg.DataDir, "wal")
	if !wal.Exist(waldir) {
		if w, err = wal.Create(waldir); err != nil {
			log.Fatal(err)
		}
		n = raft.StartNode(m.ID, cfg.Cluster.IDs(), 10, 1)
	} else {
		var index int64
		snapshot, err := ss.Load()
		if err != nil && err != snap.ErrNoSnapshot {
			log.Fatal(err)
		}
		if snapshot != nil {
			log.Printf("etcdserver: restart from snapshot at index %d", snapshot.Index)
			st.Recovery(snapshot.Data)
			index = snapshot.Index
		}

		// restart a node from previous wal
		if w, err = wal.OpenAtIndex(waldir, index); err != nil {
			log.Fatal(err)
		}
		wid, st, ents, err := w.ReadAll()
		if err != nil {
			log.Fatal(err)
		}
		// TODO(xiangli): save/recovery nodeID?
		if wid != 0 {
			log.Fatalf("unexpected nodeid %d: nodeid should always be zero until we save nodeid into wal", wid)
		}
		n = raft.RestartNode(m.ID, cfg.Cluster.IDs(), 10, 1, snapshot, st, ents)
	}

	cls := NewClusterStore(st, *cfg.Cluster)

	s := &EtcdServer{
		store: st,
		node:  n,
		name:  cfg.Name,
		storage: struct {
			*wal.WAL
			*snap.Snapshotter
		}{w, ss},
		send:         Sender(cfg.Transport, cls),
		clientURLs:   cfg.ClientURLs,
		ticker:       time.Tick(100 * time.Millisecond),
		syncTicker:   time.Tick(500 * time.Millisecond),
		snapCount:    cfg.SnapCount,
		ClusterStore: cls,
	}
	return s
}