Ejemplo n.º 1
0
func TestSendMessage(t *testing.T) {
	// member 1
	tr := &Transport{
		RoundTripper: &http.Transport{},
		ID:           types.ID(1),
		ClusterID:    types.ID(1),
		Raft:         &fakeRaft{},
		ServerStats:  newServerStats(),
		LeaderStats:  stats.NewLeaderStats("1"),
	}
	tr.Start()
	srv := httptest.NewServer(tr.Handler())
	defer srv.Close()

	// member 2
	recvc := make(chan raftpb.Message, 1)
	p := &fakeRaft{recvc: recvc}
	tr2 := &Transport{
		RoundTripper: &http.Transport{},
		ID:           types.ID(2),
		ClusterID:    types.ID(1),
		Raft:         p,
		ServerStats:  newServerStats(),
		LeaderStats:  stats.NewLeaderStats("2"),
	}
	tr2.Start()
	srv2 := httptest.NewServer(tr2.Handler())
	defer srv2.Close()

	tr.AddPeer(types.ID(2), []string{srv2.URL})
	defer tr.Stop()
	tr2.AddPeer(types.ID(1), []string{srv.URL})
	defer tr2.Stop()
	if !waitStreamWorking(tr.Get(types.ID(2)).(*peer)) {
		t.Fatalf("stream from 1 to 2 is not in work as expected")
	}

	data := []byte("some data")
	tests := []raftpb.Message{
		// these messages are set to send to itself, which facilitates testing.
		{Type: raftpb.MsgProp, From: 1, To: 2, Entries: []raftpb.Entry{{Data: data}}},
		// TODO: send out MsgApp which fits msgapp stream but the term doesn't match
		{Type: raftpb.MsgApp, From: 1, To: 2, Term: 1, Index: 3, LogTerm: 0, Entries: []raftpb.Entry{{Index: 4, Term: 1, Data: data}}, Commit: 3},
		{Type: raftpb.MsgAppResp, From: 1, To: 2, Term: 1, Index: 3},
		{Type: raftpb.MsgVote, From: 1, To: 2, Term: 1, Index: 3, LogTerm: 0},
		{Type: raftpb.MsgVoteResp, From: 1, To: 2, Term: 1},
		{Type: raftpb.MsgSnap, From: 1, To: 2, Term: 1, Snapshot: raftpb.Snapshot{Metadata: raftpb.SnapshotMetadata{Index: 1000, Term: 1}, Data: data}},
		{Type: raftpb.MsgHeartbeat, From: 1, To: 2, Term: 1, Commit: 3},
		{Type: raftpb.MsgHeartbeatResp, From: 1, To: 2, Term: 1},
	}
	for i, tt := range tests {
		tr.Send([]raftpb.Message{tt})
		msg := <-recvc
		if !reflect.DeepEqual(msg, tt) {
			t.Errorf("#%d: msg = %+v, want %+v", i, msg, tt)
		}
	}
}
Ejemplo n.º 2
0
// TestSendMessageWhenStreamIsBroken tests that message can be sent to the
// remote in a limited time when all underlying connections are broken.
func TestSendMessageWhenStreamIsBroken(t *testing.T) {
	// member 1
	tr := &Transport{
		RoundTripper: &http.Transport{},
		ID:           types.ID(1),
		ClusterID:    types.ID(1),
		Raft:         &fakeRaft{},
		ServerStats:  newServerStats(),
		LeaderStats:  stats.NewLeaderStats("1"),
	}
	tr.Start()
	srv := httptest.NewServer(tr.Handler())
	defer srv.Close()

	// member 2
	recvc := make(chan raftpb.Message, 1)
	p := &fakeRaft{recvc: recvc}
	tr2 := &Transport{
		RoundTripper: &http.Transport{},
		ID:           types.ID(2),
		ClusterID:    types.ID(1),
		Raft:         p,
		ServerStats:  newServerStats(),
		LeaderStats:  stats.NewLeaderStats("2"),
	}
	tr2.Start()
	srv2 := httptest.NewServer(tr2.Handler())
	defer srv2.Close()

	tr.AddPeer(types.ID(2), []string{srv2.URL})
	defer tr.Stop()
	tr2.AddPeer(types.ID(1), []string{srv.URL})
	defer tr2.Stop()
	if !waitStreamWorking(tr.Get(types.ID(2)).(*peer)) {
		t.Fatalf("stream from 1 to 2 is not in work as expected")
	}

	// break the stream
	srv.CloseClientConnections()
	srv2.CloseClientConnections()
	var n int
	for {
		select {
		// TODO: remove this resend logic when we add retry logic into the code
		case <-time.After(time.Millisecond):
			n++
			tr.Send([]raftpb.Message{{Type: raftpb.MsgHeartbeat, From: 1, To: 2, Term: 1, Commit: 3}})
		case <-recvc:
			if n > 10 {
				t.Errorf("disconnection time = %dms, want < 10ms", n)
			}
			return
		}
	}
}
Ejemplo n.º 3
0
func TestTransportAdd(t *testing.T) {
	ls := stats.NewLeaderStats("")
	term := uint64(10)
	tr := &transport{
		roundTripper: &roundTripperRecorder{},
		leaderStats:  ls,
		term:         term,
		peers:        make(map[types.ID]Peer),
		prober:       probing.NewProber(nil),
	}
	tr.AddPeer(1, []string{"http://localhost:2380"})

	if _, ok := ls.Followers["1"]; !ok {
		t.Errorf("FollowerStats[1] is nil, want exists")
	}
	s, ok := tr.peers[types.ID(1)]
	if !ok {
		tr.Stop()
		t.Fatalf("senders[1] is nil, want exists")
	}

	// duplicate AddPeer is ignored
	tr.AddPeer(1, []string{"http://localhost:2380"})
	ns := tr.peers[types.ID(1)]
	if s != ns {
		t.Errorf("sender = %v, want %v", ns, s)
	}

	tr.Stop()

	if g := s.(*peer).msgAppReader.msgAppTerm; g != term {
		t.Errorf("peer.term = %d, want %d", g, term)
	}
}
Ejemplo n.º 4
0
func TestTransportAdd(t *testing.T) {
	ls := stats.NewLeaderStats("")
	tr := &Transport{
		LeaderStats: ls,
		streamRt:    &roundTripperRecorder{},
		peers:       make(map[types.ID]Peer),
		prober:      probing.NewProber(nil),
	}
	tr.AddPeer(1, []string{"http://localhost:2380"})

	if _, ok := ls.Followers["1"]; !ok {
		t.Errorf("FollowerStats[1] is nil, want exists")
	}
	s, ok := tr.peers[types.ID(1)]
	if !ok {
		tr.Stop()
		t.Fatalf("senders[1] is nil, want exists")
	}

	// duplicate AddPeer is ignored
	tr.AddPeer(1, []string{"http://localhost:2380"})
	ns := tr.peers[types.ID(1)]
	if s != ns {
		t.Errorf("sender = %v, want %v", ns, s)
	}

	tr.Stop()
}
Ejemplo n.º 5
0
func TestTransportErrorc(t *testing.T) {
	errorc := make(chan error, 1)
	tr := &Transport{
		Raft:        &fakeRaft{},
		LeaderStats: stats.NewLeaderStats(""),
		ErrorC:      errorc,
		streamRt:    newRespRoundTripper(http.StatusForbidden, nil),
		pipelineRt:  newRespRoundTripper(http.StatusForbidden, nil),
		peers:       make(map[types.ID]Peer),
		prober:      probing.NewProber(nil),
	}
	tr.AddPeer(1, []string{"http://localhost:2380"})
	defer tr.Stop()

	select {
	case <-errorc:
		t.Fatalf("received unexpected from errorc")
	case <-time.After(10 * time.Millisecond):
	}
	tr.peers[1].send(raftpb.Message{})

	testutil.WaitSchedule()
	select {
	case <-errorc:
	default:
		t.Fatalf("cannot receive error from errorc")
	}
}
Ejemplo n.º 6
0
Archivo: raft.go Proyecto: mqliang/etcd
func (rc *raftNode) startRaft() {
	if !fileutil.Exist(rc.snapdir) {
		if err := os.Mkdir(rc.snapdir, 0750); err != nil {
			log.Fatalf("raftexample: cannot create dir for snapshot (%v)", err)
		}
	}
	rc.snapshotter = snap.New(rc.snapdir)
	rc.snapshotterReady <- rc.snapshotter

	oldwal := wal.Exist(rc.waldir)
	rc.wal = rc.replayWAL()

	rpeers := make([]raft.Peer, len(rc.peers))
	for i := range rpeers {
		rpeers[i] = raft.Peer{ID: uint64(i + 1)}
	}
	c := &raft.Config{
		ID:              uint64(rc.id),
		ElectionTick:    10,
		HeartbeatTick:   1,
		Storage:         rc.raftStorage,
		MaxSizePerMsg:   1024 * 1024,
		MaxInflightMsgs: 256,
	}

	if oldwal {
		rc.node = raft.RestartNode(c)
	} else {
		startPeers := rpeers
		if rc.join {
			startPeers = nil
		}
		rc.node = raft.StartNode(c, startPeers)
	}

	ss := &stats.ServerStats{}
	ss.Initialize()

	rc.transport = &rafthttp.Transport{
		ID:          types.ID(rc.id),
		ClusterID:   0x1000,
		Raft:        rc,
		ServerStats: ss,
		LeaderStats: stats.NewLeaderStats(strconv.Itoa(rc.id)),
		ErrorC:      make(chan error),
	}

	rc.transport.Start()
	for i := range rc.peers {
		if i+1 != rc.id {
			rc.transport.AddPeer(types.ID(i+1), []string{rc.peers[i]})
		}
	}

	go rc.serveRaft()
	go rc.serveChannels()
}
Ejemplo n.º 7
0
func TestTransportRemove(t *testing.T) {
	tr := &transport{
		leaderStats: stats.NewLeaderStats(""),
		peers:       make(map[types.ID]*peer),
	}
	tr.AddPeer(1, []string{"http://a"})
	tr.RemovePeer(types.ID(1))

	if _, ok := tr.peers[types.ID(1)]; ok {
		t.Fatalf("senders[1] exists, want removed")
	}
}
Ejemplo n.º 8
0
func TestSendHubRemove(t *testing.T) {
	cl := newTestCluster(nil)
	ls := stats.NewLeaderStats("")
	h := newSendHub(nil, cl, nil, nil, ls)
	m := newTestMember(1, []string{"http://a"}, "", nil)
	h.Add(m)
	h.Remove(types.ID(1))

	if _, ok := h.senders[types.ID(1)]; ok {
		t.Fatalf("senders[1] exists, want removed")
	}
}
Ejemplo n.º 9
0
func TestTransportRemove(t *testing.T) {
	tr := &transport{
		roundTripper: &roundTripperRecorder{},
		leaderStats:  stats.NewLeaderStats(""),
		peers:        make(map[types.ID]Peer),
	}
	tr.AddPeer(1, []string{"http://localhost:2380"})
	tr.RemovePeer(types.ID(1))
	defer tr.Stop()

	if _, ok := tr.peers[types.ID(1)]; ok {
		t.Fatalf("senders[1] exists, want removed")
	}
}
Ejemplo n.º 10
0
Archivo: raft.go Proyecto: ikatson/etcd
func (rc *raftNode) startRaft() {
	oldwal := wal.Exist(rc.waldir)
	rc.wal = rc.replayWAL()

	rpeers := make([]raft.Peer, len(rc.peers))
	for i := range rpeers {
		rpeers[i] = raft.Peer{ID: uint64(i + 1)}
	}
	c := &raft.Config{
		ID:              uint64(rc.id),
		ElectionTick:    10,
		HeartbeatTick:   1,
		Storage:         rc.raftStorage,
		MaxSizePerMsg:   1024 * 1024,
		MaxInflightMsgs: 256,
	}

	if oldwal {
		rc.node = raft.RestartNode(c)
	} else {
		rc.node = raft.StartNode(c, rpeers)
	}

	ss := &stats.ServerStats{}
	ss.Initialize()

	rc.transport = &rafthttp.Transport{
		ID:          types.ID(rc.id),
		ClusterID:   0x1000,
		Raft:        rc,
		ServerStats: ss,
		LeaderStats: stats.NewLeaderStats(strconv.Itoa(rc.id)),
		ErrorC:      make(chan error),
	}

	rc.transport.Start()
	for i := range rc.peers {
		if i+1 != rc.id {
			rc.transport.AddPeer(types.ID(i+1), []string{rc.peers[i]})
		}
	}

	go rc.serveRaft()
	go rc.serveChannels()
}
Ejemplo n.º 11
0
func TestSendHubInitSenders(t *testing.T) {
	membs := []*Member{
		newTestMember(1, []string{"http://a"}, "", nil),
		newTestMember(2, []string{"http://b"}, "", nil),
		newTestMember(3, []string{"http://c"}, "", nil),
	}
	cl := newTestCluster(membs)
	ls := stats.NewLeaderStats("")
	h := newSendHub(nil, cl, nil, nil, ls)

	ids := cl.MemberIDs()
	if len(h.senders) != len(ids) {
		t.Errorf("len(ids) = %d, want %d", len(h.senders), len(ids))
	}
	for _, id := range ids {
		if _, ok := h.senders[id]; !ok {
			t.Errorf("senders[%s] is nil, want exists", id)
		}
	}
}
Ejemplo n.º 12
0
func TestSendHubAdd(t *testing.T) {
	cl := newTestCluster(nil)
	ls := stats.NewLeaderStats("")
	h := newSendHub(nil, cl, nil, nil, ls)
	m := newTestMember(1, []string{"http://a"}, "", nil)
	h.Add(m)

	if _, ok := ls.Followers["1"]; !ok {
		t.Errorf("FollowerStats[1] is nil, want exists")
	}
	s, ok := h.senders[types.ID(1)]
	if !ok {
		t.Fatalf("senders[1] is nil, want exists")
	}

	h.Add(m)
	ns := h.senders[types.ID(1)]
	if s != ns {
		t.Errorf("sender = %p, want %p", ns, s)
	}
}
Ejemplo n.º 13
0
func TestSendHubShouldStop(t *testing.T) {
	tr := newRespRoundTripper(http.StatusForbidden, nil)
	cl := newTestCluster(nil)
	ls := stats.NewLeaderStats("")
	h := newSendHub(tr, cl, nil, nil, ls)
	m := newTestMember(1, []string{"http://a"}, "", nil)
	h.Add(m)

	shouldstop := h.ShouldStopNotify()
	select {
	case <-shouldstop:
		t.Fatalf("received unexpected shouldstop notification")
	case <-time.After(10 * time.Millisecond):
	}
	h.senders[1].Send(raftpb.Message{})

	testutil.ForceGosched()
	select {
	case <-shouldstop:
	default:
		t.Fatalf("cannot receive stop notification")
	}
}
Ejemplo n.º 14
0
func TestTransportAdd(t *testing.T) {
	ls := stats.NewLeaderStats("")
	tr := &transport{
		leaderStats: ls,
		peers:       make(map[types.ID]*peer),
	}
	tr.AddPeer(1, []string{"http://a"})

	if _, ok := ls.Followers["1"]; !ok {
		t.Errorf("FollowerStats[1] is nil, want exists")
	}
	s, ok := tr.peers[types.ID(1)]
	if !ok {
		t.Fatalf("senders[1] is nil, want exists")
	}

	// duplicate AddPeer is ignored
	tr.AddPeer(1, []string{"http://a"})
	ns := tr.peers[types.ID(1)]
	if s != ns {
		t.Errorf("sender = %v, want %v", ns, s)
	}
}
Ejemplo n.º 15
0
func TestTransportErrorc(t *testing.T) {
	errorc := make(chan error, 1)
	tr := &transport{
		roundTripper: newRespRoundTripper(http.StatusForbidden, nil),
		leaderStats:  stats.NewLeaderStats(""),
		peers:        make(map[types.ID]*peer),
		errorc:       errorc,
	}
	tr.AddPeer(1, []string{"http://a"})

	select {
	case <-errorc:
		t.Fatalf("received unexpected from errorc")
	case <-time.After(10 * time.Millisecond):
	}
	tr.peers[1].Send(raftpb.Message{})

	testutil.ForceGosched()
	select {
	case <-errorc:
	default:
		t.Fatalf("cannot receive error from errorc")
	}
}
Ejemplo n.º 16
0
// NewServer creates a new EtcdServer from the supplied configuration. The
// configuration is considered static for the lifetime of the EtcdServer.
func NewServer(cfg *ServerConfig) *EtcdServer {
	if err := os.MkdirAll(cfg.SnapDir(), privateDirMode); err != nil {
		log.Fatalf("etcdserver: cannot create snapshot directory: %v", err)
	}
	ss := snap.New(cfg.SnapDir())
	st := store.New()
	var w *wal.WAL
	var n raft.Node
	var id uint64
	haveWAL := wal.Exist(cfg.WALDir())
	switch {
	case !haveWAL && cfg.ClusterState == ClusterStateValueExisting:
		cl, err := GetClusterFromPeers(cfg.Cluster.PeerURLs())
		if err != nil {
			log.Fatal(err)
		}
		if err := cfg.Cluster.ValidateAndAssignIDs(cl.Members()); err != nil {
			log.Fatalf("etcdserver: %v", err)
		}
		cfg.Cluster.SetID(cl.id)
		cfg.Cluster.SetStore(st)
		id, n, w = startNode(cfg, nil)
	case !haveWAL && cfg.ClusterState == ClusterStateValueNew:
		if err := cfg.VerifyBootstrapConfig(); err != nil {
			log.Fatalf("etcdserver: %v", err)
		}
		m := cfg.Cluster.MemberByName(cfg.Name)
		if cfg.ShouldDiscover() {
			d, err := discovery.New(cfg.DiscoveryURL, m.ID, cfg.Cluster.String())
			if err != nil {
				log.Fatalf("etcdserver: cannot init discovery %v", err)
			}
			s, err := d.Discover()
			if err != nil {
				log.Fatalf("etcdserver: %v", err)
			}
			if cfg.Cluster, err = NewClusterFromString(cfg.Cluster.name, s); err != nil {
				log.Fatalf("etcdserver: %v", err)
			}
		}
		cfg.Cluster.SetStore(st)
		id, n, w = startNode(cfg, cfg.Cluster.MemberIDs())
	case haveWAL:
		if cfg.ShouldDiscover() {
			log.Printf("etcdserver: warn: ignoring discovery: etcd has already been initialized and has a valid log in %q", cfg.WALDir())
		}
		var index uint64
		snapshot, err := ss.Load()
		if err != nil && err != snap.ErrNoSnapshot {
			log.Fatal(err)
		}
		if snapshot != nil {
			log.Printf("etcdserver: recovering from snapshot at index %d", snapshot.Index)
			st.Recovery(snapshot.Data)
			index = snapshot.Index
		}
		cfg.Cluster = NewClusterFromStore(cfg.Cluster.name, st)
		id, n, w = restartNode(cfg, index, snapshot)
	default:
		log.Fatalf("etcdserver: unsupported bootstrap config")
	}

	sstats := &stats.ServerStats{
		Name: cfg.Name,
		ID:   strutil.IDAsHex(id),
	}
	lstats := stats.NewLeaderStats(strutil.IDAsHex(id))

	s := &EtcdServer{
		store:      st,
		node:       n,
		id:         id,
		attributes: Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()},
		Cluster:    cfg.Cluster,
		storage: struct {
			*wal.WAL
			*snap.Snapshotter
		}{w, ss},
		stats:      sstats,
		lstats:     lstats,
		send:       Sender(cfg.Transport, cfg.Cluster, sstats, lstats),
		Ticker:     time.Tick(100 * time.Millisecond),
		SyncTicker: time.Tick(500 * time.Millisecond),
		snapCount:  cfg.SnapCount,
	}
	return s
}
Ejemplo n.º 17
0
// NewServer creates a new EtcdServer from the supplied configuration. The
// configuration is considered static for the lifetime of the EtcdServer.
func NewServer(cfg *ServerConfig) (*EtcdServer, error) {
	st := store.New(StoreClusterPrefix, StoreKeysPrefix)
	var w *wal.WAL
	var n raft.Node
	var s *raft.MemoryStorage
	var id types.ID
	var cl *cluster

	// Run the migrations.
	dataVer, err := version.DetectDataDir(cfg.DataDir)
	if err != nil {
		return nil, err
	}
	if err := upgradeDataDir(cfg.DataDir, cfg.Name, dataVer); err != nil {
		return nil, err
	}

	haveWAL := wal.Exist(cfg.WALDir())
	ss := snap.New(cfg.SnapDir())

	var remotes []*Member
	switch {
	case !haveWAL && !cfg.NewCluster:
		if err := cfg.VerifyJoinExisting(); err != nil {
			return nil, err
		}
		cl, err = newClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap)
		if err != nil {
			return nil, err
		}
		existingCluster, err := GetClusterFromRemotePeers(getRemotePeerURLs(cl, cfg.Name), cfg.Transport)
		if err != nil {
			return nil, fmt.Errorf("cannot fetch cluster info from peer urls: %v", err)
		}
		if err := ValidateClusterAndAssignIDs(cl, existingCluster); err != nil {
			return nil, fmt.Errorf("error validating peerURLs %s: %v", existingCluster, err)
		}
		if !isCompatibleWithCluster(cl, cl.MemberByName(cfg.Name).ID, cfg.Transport) {
			return nil, fmt.Errorf("incomptible with current running cluster")
		}

		remotes = existingCluster.Members()
		cl.SetID(existingCluster.id)
		cl.SetStore(st)
		cfg.Print()
		id, n, s, w = startNode(cfg, cl, nil)
	case !haveWAL && cfg.NewCluster:
		if err := cfg.VerifyBootstrap(); err != nil {
			return nil, err
		}
		cl, err = newClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap)
		if err != nil {
			return nil, err
		}
		m := cl.MemberByName(cfg.Name)
		if isMemberBootstrapped(cl, cfg.Name, cfg.Transport) {
			return nil, fmt.Errorf("member %s has already been bootstrapped", m.ID)
		}
		if cfg.ShouldDiscover() {
			str, err := discovery.JoinCluster(cfg.DiscoveryURL, cfg.DiscoveryProxy, m.ID, cfg.InitialPeerURLsMap.String())
			if err != nil {
				return nil, err
			}
			urlsmap, err := types.NewURLsMap(str)
			if err != nil {
				return nil, err
			}
			if checkDuplicateURL(urlsmap) {
				return nil, fmt.Errorf("discovery cluster %s has duplicate url", urlsmap)
			}
			if cl, err = newClusterFromURLsMap(cfg.InitialClusterToken, urlsmap); err != nil {
				return nil, err
			}
		}
		cl.SetStore(st)
		cfg.PrintWithInitial()
		id, n, s, w = startNode(cfg, cl, cl.MemberIDs())
	case haveWAL:
		if err := fileutil.IsDirWriteable(cfg.DataDir); err != nil {
			return nil, fmt.Errorf("cannot write to data directory: %v", err)
		}

		if err := fileutil.IsDirWriteable(cfg.MemberDir()); err != nil {
			return nil, fmt.Errorf("cannot write to member directory: %v", err)
		}

		if cfg.ShouldDiscover() {
			plog.Warningf("discovery token ignored since a cluster has already been initialized. Valid log found at %q", cfg.WALDir())
		}
		snapshot, err := ss.Load()
		if err != nil && err != snap.ErrNoSnapshot {
			return nil, err
		}
		if snapshot != nil {
			if err := st.Recovery(snapshot.Data); err != nil {
				plog.Panicf("recovered store from snapshot error: %v", err)
			}
			plog.Infof("recovered store from snapshot at index %d", snapshot.Metadata.Index)
		}
		cfg.Print()
		if snapshot != nil {
			plog.Infof("loaded cluster information from store: %s", cl)
		}
		if !cfg.ForceNewCluster {
			id, cl, n, s, w = restartNode(cfg, snapshot)
		} else {
			id, cl, n, s, w = restartAsStandaloneNode(cfg, snapshot)
		}
		cl.SetStore(st)
		cl.Recover()
	default:
		return nil, fmt.Errorf("unsupported bootstrap config")
	}

	sstats := &stats.ServerStats{
		Name: cfg.Name,
		ID:   id.String(),
	}
	sstats.Initialize()
	lstats := stats.NewLeaderStats(id.String())

	srv := &EtcdServer{
		cfg:       cfg,
		snapCount: cfg.SnapCount,
		errorc:    make(chan error, 1),
		store:     st,
		r: raftNode{
			Node:        n,
			ticker:      time.Tick(time.Duration(cfg.TickMs) * time.Millisecond),
			raftStorage: s,
			storage:     NewStorage(w, ss),
		},
		id:            id,
		attributes:    Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()},
		cluster:       cl,
		stats:         sstats,
		lstats:        lstats,
		SyncTicker:    time.Tick(500 * time.Millisecond),
		reqIDGen:      idutil.NewGenerator(uint8(id), time.Now()),
		forceVersionC: make(chan struct{}),
	}

	// TODO: move transport initialization near the definition of remote
	tr := rafthttp.NewTransporter(cfg.Transport, id, cl.ID(), srv, srv.errorc, sstats, lstats)
	// add all remotes into transport
	for _, m := range remotes {
		if m.ID != id {
			tr.AddRemote(m.ID, m.PeerURLs)
		}
	}
	for _, m := range cl.Members() {
		if m.ID != id {
			tr.AddPeer(m.ID, m.PeerURLs)
		}
	}
	srv.r.transport = tr
	return srv, nil
}
Ejemplo n.º 18
0
// NewServer creates a new EtcdServer from the supplied configuration. The
// configuration is considered static for the lifetime of the EtcdServer.
func NewServer(cfg *ServerConfig) (srv *EtcdServer, err error) {
	st := store.New(StoreClusterPrefix, StoreKeysPrefix)

	var (
		w  *wal.WAL
		n  raft.Node
		s  *raft.MemoryStorage
		id types.ID
		cl *membership.RaftCluster
	)

	if terr := fileutil.TouchDirAll(cfg.DataDir); terr != nil {
		return nil, fmt.Errorf("cannot access data directory: %v", terr)
	}

	haveWAL := wal.Exist(cfg.WALDir())

	if err = fileutil.TouchDirAll(cfg.SnapDir()); err != nil {
		plog.Fatalf("create snapshot directory error: %v", err)
	}
	ss := snap.New(cfg.SnapDir())

	bepath := path.Join(cfg.SnapDir(), databaseFilename)
	beExist := fileutil.Exist(bepath)

	var be backend.Backend
	beOpened := make(chan struct{})
	go func() {
		be = backend.NewDefaultBackend(bepath)
		beOpened <- struct{}{}
	}()

	select {
	case <-beOpened:
	case <-time.After(time.Second):
		plog.Warningf("another etcd process is running with the same data dir and holding the file lock.")
		plog.Warningf("waiting for it to exit before starting...")
		<-beOpened
	}

	defer func() {
		if err != nil {
			be.Close()
		}
	}()

	prt, err := rafthttp.NewRoundTripper(cfg.PeerTLSInfo, cfg.peerDialTimeout())
	if err != nil {
		return nil, err
	}
	var (
		remotes  []*membership.Member
		snapshot *raftpb.Snapshot
	)

	switch {
	case !haveWAL && !cfg.NewCluster:
		if err = cfg.VerifyJoinExisting(); err != nil {
			return nil, err
		}
		cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap)
		if err != nil {
			return nil, err
		}
		existingCluster, gerr := GetClusterFromRemotePeers(getRemotePeerURLs(cl, cfg.Name), prt)
		if gerr != nil {
			return nil, fmt.Errorf("cannot fetch cluster info from peer urls: %v", gerr)
		}
		if err = membership.ValidateClusterAndAssignIDs(cl, existingCluster); err != nil {
			return nil, fmt.Errorf("error validating peerURLs %s: %v", existingCluster, err)
		}
		if !isCompatibleWithCluster(cl, cl.MemberByName(cfg.Name).ID, prt) {
			return nil, fmt.Errorf("incompatible with current running cluster")
		}

		remotes = existingCluster.Members()
		cl.SetID(existingCluster.ID())
		cl.SetStore(st)
		cl.SetBackend(be)
		cfg.Print()
		id, n, s, w = startNode(cfg, cl, nil)
	case !haveWAL && cfg.NewCluster:
		if err = cfg.VerifyBootstrap(); err != nil {
			return nil, err
		}
		cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap)
		if err != nil {
			return nil, err
		}
		m := cl.MemberByName(cfg.Name)
		if isMemberBootstrapped(cl, cfg.Name, prt, cfg.bootstrapTimeout()) {
			return nil, fmt.Errorf("member %s has already been bootstrapped", m.ID)
		}
		if cfg.ShouldDiscover() {
			var str string
			str, err = discovery.JoinCluster(cfg.DiscoveryURL, cfg.DiscoveryProxy, m.ID, cfg.InitialPeerURLsMap.String())
			if err != nil {
				return nil, &DiscoveryError{Op: "join", Err: err}
			}
			var urlsmap types.URLsMap
			urlsmap, err = types.NewURLsMap(str)
			if err != nil {
				return nil, err
			}
			if checkDuplicateURL(urlsmap) {
				return nil, fmt.Errorf("discovery cluster %s has duplicate url", urlsmap)
			}
			if cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, urlsmap); err != nil {
				return nil, err
			}
		}
		cl.SetStore(st)
		cl.SetBackend(be)
		cfg.PrintWithInitial()
		id, n, s, w = startNode(cfg, cl, cl.MemberIDs())
	case haveWAL:
		if err = fileutil.IsDirWriteable(cfg.MemberDir()); err != nil {
			return nil, fmt.Errorf("cannot write to member directory: %v", err)
		}

		if err = fileutil.IsDirWriteable(cfg.WALDir()); err != nil {
			return nil, fmt.Errorf("cannot write to WAL directory: %v", err)
		}

		if cfg.ShouldDiscover() {
			plog.Warningf("discovery token ignored since a cluster has already been initialized. Valid log found at %q", cfg.WALDir())
		}
		snapshot, err = ss.Load()
		if err != nil && err != snap.ErrNoSnapshot {
			return nil, err
		}
		if snapshot != nil {
			if err = st.Recovery(snapshot.Data); err != nil {
				plog.Panicf("recovered store from snapshot error: %v", err)
			}
			plog.Infof("recovered store from snapshot at index %d", snapshot.Metadata.Index)
		}
		cfg.Print()
		if !cfg.ForceNewCluster {
			id, cl, n, s, w = restartNode(cfg, snapshot)
		} else {
			id, cl, n, s, w = restartAsStandaloneNode(cfg, snapshot)
		}
		cl.SetStore(st)
		cl.SetBackend(be)
		cl.Recover(api.UpdateCapability)
		if cl.Version() != nil && !cl.Version().LessThan(semver.Version{Major: 3}) && !beExist {
			os.RemoveAll(bepath)
			return nil, fmt.Errorf("database file (%v) of the backend is missing", bepath)
		}
	default:
		return nil, fmt.Errorf("unsupported bootstrap config")
	}

	if terr := fileutil.TouchDirAll(cfg.MemberDir()); terr != nil {
		return nil, fmt.Errorf("cannot access member directory: %v", terr)
	}

	sstats := &stats.ServerStats{
		Name: cfg.Name,
		ID:   id.String(),
	}
	sstats.Initialize()
	lstats := stats.NewLeaderStats(id.String())

	heartbeat := time.Duration(cfg.TickMs) * time.Millisecond
	srv = &EtcdServer{
		readych:   make(chan struct{}),
		Cfg:       cfg,
		snapCount: cfg.SnapCount,
		errorc:    make(chan error, 1),
		store:     st,
		r: raftNode{
			isIDRemoved: func(id uint64) bool { return cl.IsIDRemoved(types.ID(id)) },
			Node:        n,
			ticker:      time.Tick(heartbeat),
			// set up contention detectors for raft heartbeat message.
			// expect to send a heartbeat within 2 heartbeat intervals.
			td:          contention.NewTimeoutDetector(2 * heartbeat),
			heartbeat:   heartbeat,
			raftStorage: s,
			storage:     NewStorage(w, ss),
			msgSnapC:    make(chan raftpb.Message, maxInFlightMsgSnap),
			readStateC:  make(chan raft.ReadState, 1),
		},
		id:            id,
		attributes:    membership.Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()},
		cluster:       cl,
		stats:         sstats,
		lstats:        lstats,
		SyncTicker:    time.Tick(500 * time.Millisecond),
		peerRt:        prt,
		reqIDGen:      idutil.NewGenerator(uint16(id), time.Now()),
		forceVersionC: make(chan struct{}),
	}

	srv.applyV2 = &applierV2store{store: srv.store, cluster: srv.cluster}

	srv.be = be
	minTTL := time.Duration((3*cfg.ElectionTicks)/2) * heartbeat

	// always recover lessor before kv. When we recover the mvcc.KV it will reattach keys to its leases.
	// If we recover mvcc.KV first, it will attach the keys to the wrong lessor before it recovers.
	srv.lessor = lease.NewLessor(srv.be, int64(math.Ceil(minTTL.Seconds())))
	srv.kv = mvcc.New(srv.be, srv.lessor, &srv.consistIndex)
	if beExist {
		kvindex := srv.kv.ConsistentIndex()
		// TODO: remove kvindex != 0 checking when we do not expect users to upgrade
		// etcd from pre-3.0 release.
		if snapshot != nil && kvindex < snapshot.Metadata.Index {
			if kvindex != 0 {
				return nil, fmt.Errorf("database file (%v index %d) does not match with snapshot (index %d).", bepath, kvindex, snapshot.Metadata.Index)
			}
			plog.Warningf("consistent index never saved (snapshot index=%d)", snapshot.Metadata.Index)
		}
	}
	srv.consistIndex.setConsistentIndex(srv.kv.ConsistentIndex())

	srv.authStore = auth.NewAuthStore(srv.be)
	if h := cfg.AutoCompactionRetention; h != 0 {
		srv.compactor = compactor.NewPeriodic(h, srv.kv, srv)
		srv.compactor.Run()
	}

	srv.applyV3Base = &applierV3backend{srv}
	if err = srv.restoreAlarms(); err != nil {
		return nil, err
	}

	// TODO: move transport initialization near the definition of remote
	tr := &rafthttp.Transport{
		TLSInfo:     cfg.PeerTLSInfo,
		DialTimeout: cfg.peerDialTimeout(),
		ID:          id,
		URLs:        cfg.PeerURLs,
		ClusterID:   cl.ID(),
		Raft:        srv,
		Snapshotter: ss,
		ServerStats: sstats,
		LeaderStats: lstats,
		ErrorC:      srv.errorc,
	}
	if err = tr.Start(); err != nil {
		return nil, err
	}
	// add all remotes into transport
	for _, m := range remotes {
		if m.ID != id {
			tr.AddRemote(m.ID, m.PeerURLs)
		}
	}
	for _, m := range cl.Members() {
		if m.ID != id {
			tr.AddPeer(m.ID, m.PeerURLs)
		}
	}
	srv.r.transport = tr

	return srv, nil
}
Ejemplo n.º 19
0
// NewServer creates a new EtcdServer from the supplied configuration. The
// configuration is considered static for the lifetime of the EtcdServer.
func NewServer(cfg *ServerConfig) (*EtcdServer, error) {
	st := store.New()
	var w *wal.WAL
	var n raft.Node
	var s *raft.MemoryStorage
	var id types.ID
	walVersion, err := wal.DetectVersion(cfg.DataDir)
	if err != nil {
		return nil, err
	}
	if walVersion == wal.WALUnknown {
		return nil, fmt.Errorf("unknown wal version in data dir %s", cfg.DataDir)
	}
	haveWAL := walVersion != wal.WALNotExist

	if haveWAL && walVersion != wal.WALv0_5 {
		err := UpgradeWAL(cfg, walVersion)
		if err != nil {
			return nil, err
		}
	}

	ss := snap.New(cfg.SnapDir())
	switch {
	case !haveWAL && !cfg.NewCluster:
		us := getOtherPeerURLs(cfg.Cluster, cfg.Name)
		existingCluster, err := GetClusterFromPeers(us)
		if err != nil {
			return nil, fmt.Errorf("cannot fetch cluster info from peer urls: %v", err)
		}
		if err := ValidateClusterAndAssignIDs(cfg.Cluster, existingCluster); err != nil {
			return nil, fmt.Errorf("error validating peerURLs %s: %v", existingCluster, err)
		}
		cfg.Cluster.SetID(existingCluster.id)
		cfg.Cluster.SetStore(st)
		cfg.Print()
		id, n, s, w = startNode(cfg, nil)
	case !haveWAL && cfg.NewCluster:
		if err := cfg.VerifyBootstrapConfig(); err != nil {
			return nil, err
		}
		if err := checkClientURLsEmptyFromPeers(cfg.Cluster, cfg.Name); err != nil {
			return nil, err
		}
		m := cfg.Cluster.MemberByName(cfg.Name)
		if cfg.ShouldDiscover() {
			s, err := discovery.JoinCluster(cfg.DiscoveryURL, cfg.DiscoveryProxy, m.ID, cfg.Cluster.String())
			if err != nil {
				return nil, err
			}
			if cfg.Cluster, err = NewClusterFromString(cfg.Cluster.token, s); err != nil {
				return nil, err
			}
		}
		cfg.Cluster.SetStore(st)
		cfg.PrintWithInitial()
		id, n, s, w = startNode(cfg, cfg.Cluster.MemberIDs())
	case haveWAL:
		if cfg.ShouldDiscover() {
			log.Printf("etcdserver: discovery token ignored since a cluster has already been initialized. Valid log found at %q", cfg.WALDir())
		}
		var index uint64
		snapshot, err := ss.Load()
		if err != nil && err != snap.ErrNoSnapshot {
			return nil, err
		}
		if snapshot != nil {
			if err := st.Recovery(snapshot.Data); err != nil {
				log.Panicf("etcdserver: recovered store from snapshot error: %v", err)
			}
			log.Printf("etcdserver: recovered store from snapshot at index %d", snapshot.Metadata.Index)
			index = snapshot.Metadata.Index
		}
		cfg.Cluster = NewClusterFromStore(cfg.Cluster.token, st)
		cfg.Print()
		if snapshot != nil {
			log.Printf("etcdserver: loaded cluster information from store: %s", cfg.Cluster)
		}
		if !cfg.ForceNewCluster {
			id, n, s, w = restartNode(cfg, index+1, snapshot)
		} else {
			id, n, s, w = restartAsStandaloneNode(cfg, index+1, snapshot)
		}
	default:
		return nil, fmt.Errorf("unsupported bootstrap config")
	}

	sstats := &stats.ServerStats{
		Name: cfg.Name,
		ID:   id.String(),
	}
	lstats := stats.NewLeaderStats(id.String())

	srv := &EtcdServer{
		cfg:         cfg,
		store:       st,
		node:        n,
		raftStorage: s,
		id:          id,
		attributes:  Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()},
		Cluster:     cfg.Cluster,
		storage:     NewStorage(w, ss),
		stats:       sstats,
		lstats:      lstats,
		Ticker:      time.Tick(100 * time.Millisecond),
		SyncTicker:  time.Tick(500 * time.Millisecond),
		snapCount:   cfg.SnapCount,
	}
	srv.sendhub = newSendHub(cfg.Transport, cfg.Cluster, srv, sstats, lstats)
	for _, m := range getOtherMembers(cfg.Cluster, cfg.Name) {
		srv.sendhub.Add(m)
	}
	return srv, nil
}
Ejemplo n.º 20
0
func BenchmarkSendingMsgApp(b *testing.B) {
	// member 1
	tr := &Transport{
		RoundTripper: &http.Transport{},
		ID:           types.ID(1),
		ClusterID:    types.ID(1),
		Raft:         &fakeRaft{},
		ServerStats:  newServerStats(),
		LeaderStats:  stats.NewLeaderStats("1"),
	}
	tr.Start()
	srv := httptest.NewServer(tr.Handler())
	defer srv.Close()

	// member 2
	r := &countRaft{}
	tr2 := &Transport{
		RoundTripper: &http.Transport{},
		ID:           types.ID(2),
		ClusterID:    types.ID(1),
		Raft:         r,
		ServerStats:  newServerStats(),
		LeaderStats:  stats.NewLeaderStats("2"),
	}
	tr2.Start()
	srv2 := httptest.NewServer(tr2.Handler())
	defer srv2.Close()

	tr.AddPeer(types.ID(2), []string{srv2.URL})
	defer tr.Stop()
	tr2.AddPeer(types.ID(1), []string{srv.URL})
	defer tr2.Stop()
	if !waitStreamWorking(tr.Get(types.ID(2)).(*peer)) {
		b.Fatalf("stream from 1 to 2 is not in work as expected")
	}

	b.ReportAllocs()
	b.SetBytes(64)

	b.ResetTimer()
	data := make([]byte, 64)
	for i := 0; i < b.N; i++ {
		tr.Send([]raftpb.Message{
			{
				Type:  raftpb.MsgApp,
				From:  1,
				To:    2,
				Index: uint64(i),
				Entries: []raftpb.Entry{
					{
						Index: uint64(i + 1),
						Data:  data,
					},
				},
			},
		})
	}
	// wait until all messages are received by the target raft
	for r.count() != b.N {
		time.Sleep(time.Millisecond)
	}
	b.StopTimer()
}
Ejemplo n.º 21
0
// NewServer creates a new EtcdServer from the supplied configuration. The
// configuration is considered static for the lifetime of the EtcdServer.
func NewServer(cfg *ServerConfig) (srv *EtcdServer, err error) {
	st := store.New(StoreClusterPrefix, StoreKeysPrefix)

	var (
		w  *wal.WAL
		n  raft.Node
		s  *raft.MemoryStorage
		id types.ID
		cl *membership.RaftCluster
	)

	if terr := fileutil.TouchDirAll(cfg.DataDir); terr != nil {
		return nil, fmt.Errorf("cannot access data directory: %v", terr)
	}

	// Run the migrations.
	dataVer, err := version.DetectDataDir(cfg.DataDir)
	if err != nil {
		return nil, err
	}
	if err = upgradeDataDir(cfg.DataDir, cfg.Name, dataVer); err != nil {
		return nil, err
	}

	haveWAL := wal.Exist(cfg.WALDir())

	if err = os.MkdirAll(cfg.SnapDir(), privateDirMode); err != nil && !os.IsExist(err) {
		plog.Fatalf("create snapshot directory error: %v", err)
	}
	ss := snap.New(cfg.SnapDir())
	be := backend.NewDefaultBackend(path.Join(cfg.SnapDir(), databaseFilename))
	defer func() {
		if err != nil {
			be.Close()
		}
	}()

	prt, err := rafthttp.NewRoundTripper(cfg.PeerTLSInfo, cfg.peerDialTimeout())
	if err != nil {
		return nil, err
	}
	var remotes []*membership.Member
	switch {
	case !haveWAL && !cfg.NewCluster:
		if err = cfg.VerifyJoinExisting(); err != nil {
			return nil, err
		}
		cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap)
		if err != nil {
			return nil, err
		}
		existingCluster, gerr := GetClusterFromRemotePeers(getRemotePeerURLs(cl, cfg.Name), prt)
		if gerr != nil {
			return nil, fmt.Errorf("cannot fetch cluster info from peer urls: %v", gerr)
		}
		if err = membership.ValidateClusterAndAssignIDs(cl, existingCluster); err != nil {
			return nil, fmt.Errorf("error validating peerURLs %s: %v", existingCluster, err)
		}
		if !isCompatibleWithCluster(cl, cl.MemberByName(cfg.Name).ID, prt) {
			return nil, fmt.Errorf("incomptible with current running cluster")
		}

		remotes = existingCluster.Members()
		cl.SetID(existingCluster.ID())
		cl.SetStore(st)
		cl.SetBackend(be)
		cfg.Print()
		id, n, s, w = startNode(cfg, cl, nil)
	case !haveWAL && cfg.NewCluster:
		if err = cfg.VerifyBootstrap(); err != nil {
			return nil, err
		}
		cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap)
		if err != nil {
			return nil, err
		}
		m := cl.MemberByName(cfg.Name)
		if isMemberBootstrapped(cl, cfg.Name, prt, cfg.bootstrapTimeout()) {
			return nil, fmt.Errorf("member %s has already been bootstrapped", m.ID)
		}
		if cfg.ShouldDiscover() {
			var str string
			str, err = discovery.JoinCluster(cfg.DiscoveryURL, cfg.DiscoveryProxy, m.ID, cfg.InitialPeerURLsMap.String())
			if err != nil {
				return nil, &DiscoveryError{Op: "join", Err: err}
			}
			var urlsmap types.URLsMap
			urlsmap, err = types.NewURLsMap(str)
			if err != nil {
				return nil, err
			}
			if checkDuplicateURL(urlsmap) {
				return nil, fmt.Errorf("discovery cluster %s has duplicate url", urlsmap)
			}
			if cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, urlsmap); err != nil {
				return nil, err
			}
		}
		cl.SetStore(st)
		cl.SetBackend(be)
		cfg.PrintWithInitial()
		id, n, s, w = startNode(cfg, cl, cl.MemberIDs())
	case haveWAL:
		if err = fileutil.IsDirWriteable(cfg.MemberDir()); err != nil {
			return nil, fmt.Errorf("cannot write to member directory: %v", err)
		}

		if err = fileutil.IsDirWriteable(cfg.WALDir()); err != nil {
			return nil, fmt.Errorf("cannot write to WAL directory: %v", err)
		}

		if cfg.ShouldDiscover() {
			plog.Warningf("discovery token ignored since a cluster has already been initialized. Valid log found at %q", cfg.WALDir())
		}
		var snapshot *raftpb.Snapshot
		snapshot, err = ss.Load()
		if err != nil && err != snap.ErrNoSnapshot {
			return nil, err
		}
		if snapshot != nil {
			if err = st.Recovery(snapshot.Data); err != nil {
				plog.Panicf("recovered store from snapshot error: %v", err)
			}
			plog.Infof("recovered store from snapshot at index %d", snapshot.Metadata.Index)
		}
		cfg.Print()
		if !cfg.ForceNewCluster {
			id, cl, n, s, w = restartNode(cfg, snapshot)
		} else {
			id, cl, n, s, w = restartAsStandaloneNode(cfg, snapshot)
		}
		cl.SetStore(st)
		cl.SetBackend(be)
		cl.Recover()
	default:
		return nil, fmt.Errorf("unsupported bootstrap config")
	}

	if terr := fileutil.TouchDirAll(cfg.MemberDir()); terr != nil {
		return nil, fmt.Errorf("cannot access member directory: %v", terr)
	}

	sstats := &stats.ServerStats{
		Name: cfg.Name,
		ID:   id.String(),
	}
	sstats.Initialize()
	lstats := stats.NewLeaderStats(id.String())

	srv = &EtcdServer{
		readych:   make(chan struct{}),
		Cfg:       cfg,
		snapCount: cfg.SnapCount,
		errorc:    make(chan error, 1),
		store:     st,
		r: raftNode{
			Node:        n,
			ticker:      time.Tick(time.Duration(cfg.TickMs) * time.Millisecond),
			raftStorage: s,
			storage:     NewStorage(w, ss),
		},
		id:            id,
		attributes:    membership.Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()},
		cluster:       cl,
		stats:         sstats,
		lstats:        lstats,
		SyncTicker:    time.Tick(500 * time.Millisecond),
		peerRt:        prt,
		reqIDGen:      idutil.NewGenerator(uint16(id), time.Now()),
		forceVersionC: make(chan struct{}),
		msgSnapC:      make(chan raftpb.Message, maxInFlightMsgSnap),
	}

	srv.applyV2 = &applierV2store{store: srv.store, cluster: srv.cluster}

	srv.be = be
	srv.lessor = lease.NewLessor(srv.be)
	srv.kv = mvcc.New(srv.be, srv.lessor, &srv.consistIndex)
	srv.consistIndex.setConsistentIndex(srv.kv.ConsistentIndex())
	srv.authStore = auth.NewAuthStore(srv.be)
	if h := cfg.AutoCompactionRetention; h != 0 {
		srv.compactor = compactor.NewPeriodic(h, srv.kv, srv)
		srv.compactor.Run()
	}

	if err = srv.restoreAlarms(); err != nil {
		return nil, err
	}

	// TODO: move transport initialization near the definition of remote
	tr := &rafthttp.Transport{
		TLSInfo:     cfg.PeerTLSInfo,
		DialTimeout: cfg.peerDialTimeout(),
		ID:          id,
		URLs:        cfg.PeerURLs,
		ClusterID:   cl.ID(),
		Raft:        srv,
		Snapshotter: ss,
		ServerStats: sstats,
		LeaderStats: lstats,
		ErrorC:      srv.errorc,
	}
	if err = tr.Start(); err != nil {
		return nil, err
	}
	// add all remotes into transport
	for _, m := range remotes {
		if m.ID != id {
			tr.AddRemote(m.ID, m.PeerURLs)
		}
	}
	for _, m := range cl.Members() {
		if m.ID != id {
			tr.AddPeer(m.ID, m.PeerURLs)
		}
	}
	srv.r.transport = tr

	return srv, nil
}
Ejemplo n.º 22
0
// NewServer creates a new EtcdServer from the supplied configuration. The
// configuration is considered static for the lifetime of the EtcdServer.
func NewServer(cfg *ServerConfig) (*EtcdServer, error) {
	st := store.New(StoreAdminPrefix, StoreKeysPrefix)
	var w *wal.WAL
	var n raft.Node
	var s *raft.MemoryStorage
	var id types.ID
	walVersion, err := wal.DetectVersion(cfg.DataDir)
	if err != nil {
		return nil, err
	}
	if walVersion == wal.WALUnknown {
		return nil, fmt.Errorf("unknown wal version in data dir %s", cfg.DataDir)
	}
	haveWAL := walVersion != wal.WALNotExist

	ss := snap.New(cfg.SnapDir())
	switch {
	case !haveWAL && !cfg.NewCluster:
		us := getOtherPeerURLs(cfg.Cluster, cfg.Name)
		existingCluster, err := GetClusterFromPeers(us, cfg.Transport)
		if err != nil {
			return nil, fmt.Errorf("cannot fetch cluster info from peer urls: %v", err)
		}
		if err := ValidateClusterAndAssignIDs(cfg.Cluster, existingCluster); err != nil {
			return nil, fmt.Errorf("error validating peerURLs %s: %v", existingCluster, err)
		}
		cfg.Cluster.SetID(existingCluster.id)
		cfg.Cluster.SetStore(st)
		cfg.Print()
		id, n, s, w = startNode(cfg, nil)
	case !haveWAL && cfg.NewCluster:
		if err := cfg.VerifyBootstrapConfig(); err != nil {
			return nil, err
		}
		m := cfg.Cluster.MemberByName(cfg.Name)
		if isBootstrapped(cfg) {
			return nil, fmt.Errorf("member %s has already been bootstrapped", m.ID)
		}
		if cfg.ShouldDiscover() {
			s, err := discovery.JoinCluster(cfg.DiscoveryURL, cfg.DiscoveryProxy, m.ID, cfg.Cluster.String())
			if err != nil {
				return nil, err
			}
			if cfg.Cluster, err = NewClusterFromString(cfg.Cluster.token, s); err != nil {
				return nil, err
			}
		}
		cfg.Cluster.SetStore(st)
		cfg.PrintWithInitial()
		id, n, s, w = startNode(cfg, cfg.Cluster.MemberIDs())
	case haveWAL:
		if walVersion != wal.WALv0_5 {
			if err := upgradeWAL(cfg, walVersion); err != nil {
				return nil, err
			}
		}

		if cfg.ShouldDiscover() {
			log.Printf("etcdserver: discovery token ignored since a cluster has already been initialized. Valid log found at %q", cfg.WALDir())
		}
		snapshot, err := ss.Load()
		if err != nil && err != snap.ErrNoSnapshot {
			return nil, err
		}
		if snapshot != nil {
			if err := st.Recovery(snapshot.Data); err != nil {
				log.Panicf("etcdserver: recovered store from snapshot error: %v", err)
			}
			log.Printf("etcdserver: recovered store from snapshot at index %d", snapshot.Metadata.Index)
		}
		cfg.Cluster = NewClusterFromStore(cfg.Cluster.token, st)
		cfg.Print()
		if snapshot != nil {
			log.Printf("etcdserver: loaded cluster information from store: %s", cfg.Cluster)
		}
		if !cfg.ForceNewCluster {
			id, n, s, w = restartNode(cfg, snapshot)
		} else {
			id, n, s, w = restartAsStandaloneNode(cfg, snapshot)
		}
	default:
		return nil, fmt.Errorf("unsupported bootstrap config")
	}

	sstats := &stats.ServerStats{
		Name: cfg.Name,
		ID:   id.String(),
	}
	lstats := stats.NewLeaderStats(id.String())

	srv := &EtcdServer{
		cfg:    cfg,
		errorc: make(chan error, 1),
		store:  st,
		r: raftNode{
			Node:        n,
			snapCount:   cfg.SnapCount,
			ticker:      time.Tick(time.Duration(cfg.TickMs) * time.Millisecond),
			raftStorage: s,
			storage:     NewStorage(w, ss),
		},
		id:         id,
		attributes: Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()},
		Cluster:    cfg.Cluster,
		stats:      sstats,
		lstats:     lstats,
		SyncTicker: time.Tick(500 * time.Millisecond),
		reqIDGen:   idutil.NewGenerator(uint8(id), time.Now()),
	}

	tr := rafthttp.NewTransporter(cfg.Transport, id, cfg.Cluster.ID(), srv, srv.errorc, sstats, lstats)
	// add all the remote members into sendhub
	for _, m := range cfg.Cluster.Members() {
		if m.ID != id {
			tr.AddPeer(m.ID, m.PeerURLs)
		}
	}
	srv.r.transport = tr
	return srv, nil
}