Beispiel #1
0
// Sends RequestVote RPCs to a peer when the server is the candidate.
func (t *transporter) SendVoteRequest(server raft.Server, peer *raft.Peer, req *raft.RequestVoteRequest) *raft.RequestVoteResponse {
	var b bytes.Buffer

	if _, err := req.Encode(&b); err != nil {
		log.Warn("transporter.vr.encoding.error:", err)
		return nil
	}

	u, _ := t.registry.PeerURL(peer.Name)
	log.Debugf("Send Vote from %s to %s", server.Name(), u)

	resp, _, err := t.Post(fmt.Sprintf("%s/vote", u), &b)

	if err != nil {
		log.Debugf("Cannot send VoteRequest to %s : %s", u, err)
	}

	if resp != nil {
		defer resp.Body.Close()

		rvrsp := &raft.RequestVoteResponse{}
		if _, err = rvrsp.Decode(resp.Body); err != nil && err != io.EOF {
			log.Warn("transporter.vr.decoding.error:", err)
			return nil
		}
		return rvrsp
	}
	return nil
}
Beispiel #2
0
func (s *StandbyServer) syncCluster(peerURLs []string) error {
	peerURLs = append(s.ClusterURLs(), peerURLs...)

	for _, peerURL := range peerURLs {
		// Fetch current peer list
		machines, err := s.client.GetMachines(peerURL)
		if err != nil {
			log.Debugf("fail getting machine messages from %v", peerURL)
			continue
		}

		config, err := s.client.GetClusterConfig(peerURL)
		if err != nil {
			log.Debugf("fail getting cluster config from %v", peerURL)
			continue
		}

		s.setCluster(machines)
		s.SetSyncInterval(config.SyncInterval)
		if err := s.saveInfo(); err != nil {
			log.Warnf("fail saving cluster info into disk: %v", err)
		}
		return nil
	}
	return fmt.Errorf("unreachable cluster")
}
Beispiel #3
0
// Response to append entries request
func (ps *PeerServer) AppendEntriesHttpHandler(w http.ResponseWriter, req *http.Request) {
	start := time.Now()
	aereq := &raft.AppendEntriesRequest{}

	if _, err := aereq.Decode(req.Body); err != nil {
		http.Error(w, "", http.StatusBadRequest)
		log.Warnf("[recv] BADREQUEST %s/log/append [%v]", ps.Config.URL, err)
		return
	}

	log.Debugf("[recv] POST %s/log/append [%d]", ps.Config.URL, len(aereq.Entries))

	ps.serverStats.RecvAppendReq(aereq.LeaderName, int(req.ContentLength))

	resp := ps.raftServer.AppendEntries(aereq)

	if resp == nil {
		log.Warn("[ae] Error: nil response")
		http.Error(w, "", http.StatusInternalServerError)
		return
	}

	if !resp.Success {
		log.Debugf("[Append Entry] Step back")
	}

	if _, err := resp.Encode(w); err != nil {
		log.Warn("[ae] Error: %v", err)
		http.Error(w, "", http.StatusInternalServerError)
		return
	}

	(*ps.metrics).Timer("timer.appendentries.handle").UpdateSince(start)
}
Beispiel #4
0
// Sends SnapshotRecoveryRequest RPCs to a peer when the server is the candidate.
func (t *transporter) SendSnapshotRecoveryRequest(server raft.Server, peer *raft.Peer, req *raft.SnapshotRecoveryRequest) *raft.SnapshotRecoveryResponse {
	var b bytes.Buffer

	if _, err := req.Encode(&b); err != nil {
		log.Warn("transporter.ss.encoding.error:", err)
		return nil
	}

	u, _ := t.peerServer.registry.PeerURL(peer.Name)
	log.Debugf("Send Snapshot Recovery from %s to %s", server.Name(), u)

	resp, httpRequest, err := t.Post(fmt.Sprintf("%s/snapshotRecovery", u), &b)

	if err != nil {
		log.Debugf("Cannot send Snapshot Recovery to %s : %s", u, err)
	}

	if resp != nil {
		defer resp.Body.Close()

		t.CancelWhenTimeout(httpRequest)

		ssrrsp := &raft.SnapshotRecoveryResponse{}
		if _, err = ssrrsp.Decode(resp.Body); err != nil && err != io.EOF {
			log.Warn("transporter.ssr.decoding.error:", err)
			return nil
		}
		return ssrrsp
	}
	return nil

}
Beispiel #5
0
// Sends AppendEntries RPCs to a peer when the server is the leader.
func (t *transporter) SendAppendEntriesRequest(server raft.Server, peer *raft.Peer, req *raft.AppendEntriesRequest) *raft.AppendEntriesResponse {
	var b bytes.Buffer

	if _, err := req.Encode(&b); err != nil {
		log.Warn("transporter.ae.encoding.error:", err)
		return nil
	}

	size := b.Len()

	t.peerServer.serverStats.SendAppendReq(size)

	u, _ := t.peerServer.registry.PeerURL(peer.Name)

	log.Debugf("Send LogEntries to %s ", u)

	thisFollowerStats, ok := t.peerServer.followersStats.Followers[peer.Name]

	if !ok { //this is the first time this follower has been seen
		thisFollowerStats = &raftFollowerStats{}
		thisFollowerStats.Latency.Minimum = 1 << 63
		t.peerServer.followersStats.Followers[peer.Name] = thisFollowerStats
	}

	start := time.Now()

	resp, httpRequest, err := t.Post(fmt.Sprintf("%s/log/append", u), &b)

	end := time.Now()

	if err != nil {
		log.Debugf("Cannot send AppendEntriesRequest to %s: %s", u, err)
		if ok {
			thisFollowerStats.Fail()
		}
		return nil
	} else {
		if ok {
			thisFollowerStats.Succ(end.Sub(start))
		}
	}

	if resp != nil {
		defer resp.Body.Close()

		t.CancelWhenTimeout(httpRequest)

		aeresp := &raft.AppendEntriesResponse{}
		if _, err = aeresp.Decode(resp.Body); err != nil && err != io.EOF {
			log.Warn("transporter.ae.decoding.error:", err)
			return nil
		}
		return aeresp
	}

	return nil
}
Beispiel #6
0
// Send join requests to peer.
func (s *PeerServer) joinByPeer(server raft.Server, peer string, scheme string) error {
	var b bytes.Buffer

	// t must be ok
	t, _ := server.Transporter().(*transporter)

	// Our version must match the leaders version
	versionURL := url.URL{Host: peer, Scheme: scheme, Path: "/version"}
	version, err := getVersion(t, versionURL)
	if err != nil {
		return fmt.Errorf("Error during join version check: %v", err)
	}
	if version < store.MinVersion() || version > store.MaxVersion() {
		return fmt.Errorf("Unable to join: cluster version is %d; version compatibility is %d - %d", version, store.MinVersion(), store.MaxVersion())
	}

	json.NewEncoder(&b).Encode(NewJoinCommand(store.MinVersion(), store.MaxVersion(), server.Name(), s.Config.URL, s.server.URL()))

	joinURL := url.URL{Host: peer, Scheme: scheme, Path: "/join"}

	log.Debugf("Send Join Request to %s", joinURL.String())

	resp, req, err := t.Post(joinURL.String(), &b)

	for {
		if err != nil {
			return fmt.Errorf("Unable to join: %v", err)
		}
		if resp != nil {
			defer resp.Body.Close()

			t.CancelWhenTimeout(req)

			if resp.StatusCode == http.StatusOK {
				b, _ := ioutil.ReadAll(resp.Body)
				s.joinIndex, _ = binary.Uvarint(b)
				return nil
			}
			if resp.StatusCode == http.StatusTemporaryRedirect {
				address := resp.Header.Get("Location")
				log.Debugf("Send Join Request to %s", address)
				json.NewEncoder(&b).Encode(NewJoinCommand(store.MinVersion(), store.MaxVersion(), server.Name(), s.Config.URL, s.server.URL()))
				resp, req, err = t.Post(address, &b)

			} else if resp.StatusCode == http.StatusBadRequest {
				log.Debug("Reach max number peers in the cluster")
				decoder := json.NewDecoder(resp.Body)
				err := &etcdErr.Error{}
				decoder.Decode(err)
				return *err
			} else {
				return fmt.Errorf("Unable to join")
			}
		}

	}
}
Beispiel #7
0
// Start the raft server
func (s *PeerServer) ListenAndServe(snapshot bool, cluster []string) error {
	// LoadSnapshot
	if snapshot {
		err := s.raftServer.LoadSnapshot()

		if err == nil {
			log.Debugf("%s finished load snapshot", s.name)
		} else {
			log.Debug(err)
		}
	}

	s.raftServer.SetElectionTimeout(s.ElectionTimeout)
	s.raftServer.SetHeartbeatTimeout(s.HeartbeatTimeout)

	s.raftServer.Start()

	if s.raftServer.IsLogEmpty() {
		// start as a leader in a new cluster
		if len(cluster) == 0 {
			s.startAsLeader()
		} else {
			s.startAsFollower(cluster)
		}

	} else {
		// Rejoin the previous cluster
		cluster = s.registry.PeerURLs(s.raftServer.Leader(), s.name)
		for i := 0; i < len(cluster); i++ {
			u, err := url.Parse(cluster[i])
			if err != nil {
				log.Debug("rejoin cannot parse url: ", err)
			}
			cluster[i] = u.Host
		}
		ok := s.joinCluster(cluster)
		if !ok {
			log.Warn("the entire cluster is down! this peer will restart the cluster.")
		}

		log.Debugf("%s restart as a follower", s.name)
	}

	s.closeChan = make(chan bool)

	go s.monitorSync()
	go s.monitorTimeoutThreshold(s.closeChan)

	// open the snapshot
	if snapshot {
		go s.monitorSnapshot()
	}

	// start to response to raft requests
	return s.startTransport(s.tlsConf.Scheme, s.tlsConf.Server)
}
Beispiel #8
0
func updatePeerURL(c *JoinCommand, ps *PeerServer) error {
	log.Debugf("Update peer URL of %v to %v", c.Name, c.RaftURL)
	if err := ps.registry.UpdatePeerURL(c.Name, c.RaftURL); err != nil {
		log.Debugf("Error while updating in registry: %s (%v)", c.Name, err)
		return err
	}
	// Flush commit index, so raft will replay to here when restart
	ps.raftServer.FlushCommitIndex()
	return nil
}
Beispiel #9
0
func (s *PeerServer) InitNewCluster(clusterConfig *ClusterConfig) {
	// leader need to join self as a peer
	s.doCommand(&JoinCommand{
		MinVersion: store.MinVersion(),
		MaxVersion: store.MaxVersion(),
		Name:       s.raftServer.Name(),
		RaftURL:    s.Config.URL,
		EtcdURL:    s.server.URL(),
	})
	log.Debugf("%s start as a leader", s.Config.Name)
	s.joinIndex = 1

	s.doCommand(&SetClusterConfigCommand{Config: clusterConfig})
	log.Debugf("%s sets cluster config as %v", s.Config.Name, clusterConfig)
}
Beispiel #10
0
// ClusterConfig retrieves the current cluster configuration.
func (s *PeerServer) ClusterConfig() *ClusterConfig {
	e, err := s.store.Get(ClusterConfigKey, false, false)
	// This is useful for backward compatibility because it doesn't
	// set cluster config in older version.
	if err != nil {
		log.Debugf("failed getting cluster config key: %v", err)
		return NewClusterConfig()
	}

	var c ClusterConfig
	if err = json.Unmarshal([]byte(*e.Node.Value), &c); err != nil {
		log.Debugf("failed unmarshaling cluster config: %v", err)
		return NewClusterConfig()
	}
	return &c
}
Beispiel #11
0
// Adds a server handler to the router.
func (s *Server) handleFunc(r *mux.Router, path string, f func(http.ResponseWriter, *http.Request) error) *mux.Route {

	// Wrap the standard HandleFunc interface to pass in the server reference.
	return r.HandleFunc(path, func(w http.ResponseWriter, req *http.Request) {
		if req.Method == "HEAD" {
			w = &HEADResponseWriter{w}
		}

		// Log request.
		log.Debugf("[recv] %s %s %s [%s]", req.Method, s.URL(), req.URL.Path, req.RemoteAddr)

		// Forward request along if the server is a standby.
		if s.peerServer.Mode() == StandbyMode {
			if s.peerServer.standbyClientURL == "" {
				w.Header().Set("Content-Type", "application/json")
				etcdErr.NewError(402, "", 0).Write(w)
				return
			}
			uhttp.Redirect(s.peerServer.standbyClientURL, w, req)
			return
		}

		// Execute handler function and return error if necessary.
		if err := f(w, req); err != nil {
			if etcdErr, ok := err.(*etcdErr.Error); ok {
				log.Debug("Return error: ", (*etcdErr).Error())
				w.Header().Set("Content-Type", "application/json")
				etcdErr.Write(w)
			} else {
				http.Error(w, err.Error(), http.StatusInternalServerError)
			}
		}
	})
}
Beispiel #12
0
// monitorCluster assumes that the machine has tried to join the cluster and
// failed, so it waits for the interval at the beginning.
func (s *StandbyServer) monitorCluster() {
	for {
		timer := time.NewTimer(time.Duration(int64(s.SyncInterval * float64(time.Second))))
		defer timer.Stop()
		select {
		case <-s.closeChan:
			return
		case <-timer.C:
		}

		if err := s.syncCluster(nil); err != nil {
			log.Warnf("fail syncing cluster(%v): %v", s.ClusterURLs(), err)
			continue
		}

		leader := s.ClusterLeader()
		if leader == nil {
			log.Warnf("fail getting leader from cluster(%v)", s.ClusterURLs())
			continue
		}

		if err := s.join(leader.PeerURL); err != nil {
			log.Debugf("fail joining through leader %v: %v", leader, err)
			continue
		}

		log.Infof("join through leader %v", leader.PeerURL)
		go func() {
			s.Stop()
			close(s.removeNotify)
		}()
		return
	}
}
Beispiel #13
0
func GetHandler(w http.ResponseWriter, req *http.Request, s Server) error {
	vars := mux.Vars(req)
	key := "/" + vars["key"]

	// Help client to redirect the request to the current leader
	if req.FormValue("consistent") == "true" && s.State() != raft.Leader {
		leader := s.Leader()
		hostname, _ := s.ClientURL(leader)

		url, err := url.Parse(hostname)
		if err != nil {
			log.Warn("Redirect cannot parse hostName ", hostname)
			return err
		}
		url.RawQuery = req.URL.RawQuery
		url.Path = req.URL.Path

		log.Debugf("Redirect consistent get to %s", url.String())
		http.Redirect(w, req, url.String(), http.StatusTemporaryRedirect)
		return nil
	}

	recursive := (req.FormValue("recursive") == "true")
	sort := (req.FormValue("sorted") == "true")
	waitIndex := req.FormValue("waitIndex")
	stream := (req.FormValue("stream") == "true")

	if req.FormValue("wait") == "true" {
		return handleWatch(key, recursive, stream, waitIndex, w, s)
	}

	return handleGet(key, recursive, sort, w, s)
}
Beispiel #14
0
// Response to recover from snapshot request
func (ps *PeerServer) SnapshotHttpHandler(w http.ResponseWriter, req *http.Request) {
	ssreq := &raft.SnapshotRequest{}

	if _, err := ssreq.Decode(req.Body); err != nil {
		http.Error(w, "", http.StatusBadRequest)
		log.Warnf("[recv] BADREQUEST %s/snapshot [%v]", ps.Config.URL, err)
		return
	}

	log.Debugf("[recv] POST %s/snapshot", ps.Config.URL)

	resp := ps.raftServer.RequestSnapshot(ssreq)

	if resp == nil {
		log.Warn("[ss] Error: nil response")
		http.Error(w, "", http.StatusInternalServerError)
		return
	}

	if _, err := resp.Encode(w); err != nil {
		log.Warn("[ss] Error: %v", err)
		http.Error(w, "", http.StatusInternalServerError)
		return
	}
}
// Updates the cluster configuration.
func (ps *PeerServer) setClusterConfigHttpHandler(w http.ResponseWriter, req *http.Request) {
	// Decode map.
	m := make(map[string]interface{})
	if err := json.NewDecoder(req.Body).Decode(&m); err != nil {
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}

	// Copy config and update fields passed in.
	config := ps.ClusterConfig()
	if activeSize, ok := m["activeSize"].(float64); ok {
		config.ActiveSize = int(activeSize)
	}
	if removeDelay, ok := m["removeDelay"].(float64); ok {
		config.RemoveDelay = removeDelay
	}
	if syncInterval, ok := m["syncInterval"].(float64); ok {
		config.SyncInterval = syncInterval
	}

	// Issue command to update.
	c := &SetClusterConfigCommand{Config: config}
	log.Debugf("[recv] Update Cluster Config Request")
	ps.server.Dispatch(c, w, req)

	w.Header().Set("Content-Type", "application/json")
	json.NewEncoder(w).Encode(ps.ClusterConfig())
}
Beispiel #16
0
// Response to vote request
func (ps *PeerServer) VoteHttpHandler(w http.ResponseWriter, req *http.Request) {
	rvreq := &raft.RequestVoteRequest{}

	if _, err := rvreq.Decode(req.Body); err != nil {
		http.Error(w, "", http.StatusBadRequest)
		log.Warnf("[recv] BADREQUEST %s/vote [%v]", ps.Config.URL, err)
		return
	}

	log.Debugf("[recv] POST %s/vote [%s]", ps.Config.URL, rvreq.CandidateName)

	resp := ps.raftServer.RequestVote(rvreq)

	if resp == nil {
		log.Warn("[vote] Error: nil response")
		http.Error(w, "", http.StatusInternalServerError)
		return
	}

	if _, err := resp.Encode(w); err != nil {
		log.Warn("[vote] Error: %v", err)
		http.Error(w, "", http.StatusInternalServerError)
		return
	}
}
Beispiel #17
0
// Updates the cluster configuration.
func (ps *PeerServer) setClusterConfigHttpHandler(w http.ResponseWriter, req *http.Request) {
	// Decode map.
	m := make(map[string]interface{})
	if err := json.NewDecoder(req.Body).Decode(&m); err != nil {
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}

	// Copy config and update fields passed in.
	config := &ClusterConfig{
		ActiveSize:   ps.clusterConfig.ActiveSize,
		PromoteDelay: ps.clusterConfig.PromoteDelay,
	}
	if activeSize, ok := m["activeSize"].(float64); ok {
		config.ActiveSize = int(activeSize)
	}
	if promoteDelay, ok := m["promoteDelay"].(float64); ok {
		config.PromoteDelay = int(promoteDelay)
	}

	// Issue command to update.
	c := &SetClusterConfigCommand{Config: config}
	log.Debugf("[recv] Update Cluster Config Request")
	ps.server.Dispatch(c, w, req)

	json.NewEncoder(w).Encode(&ps.clusterConfig)
}
Beispiel #18
0
// Start the raft server
func (s *PeerServer) Start(snapshot bool, discoverURL string, peers []string) error {
	// LoadSnapshot
	if snapshot {
		err := s.raftServer.LoadSnapshot()

		if err == nil {
			log.Debugf("%s finished load snapshot", s.Config.Name)
		} else {
			log.Debug(err)
		}
	}

	s.raftServer.Start()

	s.findCluster(discoverURL, peers)

	s.closeChan = make(chan bool)

	go s.monitorSync()
	go s.monitorTimeoutThreshold(s.closeChan)

	// open the snapshot
	if snapshot {
		go s.monitorSnapshot()
	}

	return nil
}
Beispiel #19
0
// Adds a server handler to the router.
func (s *Server) handleFunc(path string, f func(http.ResponseWriter, *http.Request) error) *mux.Route {
	r := s.Handler.(*mux.Router)

	// Wrap the standard HandleFunc interface to pass in the server reference.
	return r.HandleFunc(path, func(w http.ResponseWriter, req *http.Request) {
		// Log request.
		log.Debugf("[recv] %s %s %s [%s]", req.Method, s.url, req.URL.Path, req.RemoteAddr)

		// Write CORS header.
		if s.OriginAllowed("*") {
			w.Header().Add("Access-Control-Allow-Origin", "*")
		} else if origin := req.Header.Get("Origin"); s.OriginAllowed(origin) {
			w.Header().Add("Access-Control-Allow-Origin", origin)
		}

		// Execute handler function and return error if necessary.
		if err := f(w, req); err != nil {
			if etcdErr, ok := err.(*etcdErr.Error); ok {
				log.Debug("Return error: ", (*etcdErr).Error())
				w.Header().Set("Content-Type", "application/json")
				etcdErr.Write(w)
			} else {
				http.Error(w, err.Error(), http.StatusInternalServerError)
			}
		}
	})
}
Beispiel #20
0
// Response to the join request
func (ps *PeerServer) JoinHttpHandler(w http.ResponseWriter, req *http.Request) {
	command := &JoinCommand{}

	// Write CORS header.
	if ps.server.OriginAllowed("*") {
		w.Header().Add("Access-Control-Allow-Origin", "*")
	} else if ps.server.OriginAllowed(req.Header.Get("Origin")) {
		w.Header().Add("Access-Control-Allow-Origin", req.Header.Get("Origin"))
	}

	err := decodeJsonRequest(req, command)
	if err != nil {
		w.WriteHeader(http.StatusInternalServerError)
		return
	}

	log.Debugf("Receive Join Request from %s", command.Name)
	err = ps.server.Dispatch(command, w, req)

	// Return status.
	if err != nil {
		if etcdErr, ok := err.(*etcdErr.Error); ok {
			log.Debug("Return error: ", (*etcdErr).Error())
			etcdErr.Write(w)
		} else {
			http.Error(w, err.Error(), http.StatusInternalServerError)
		}
	}
}
Beispiel #21
0
// Remove a server from the cluster
func (c *RemoveCommandV1) Apply(context raft.Context) (interface{}, error) {
	ps, _ := context.Server().Context().(*PeerServer)

	// If this is a standby then remove it and exit.
	if ps.registry.StandbyExists(c.Name) {
		return []byte{0}, ps.registry.UnregisterStandby(c.Name)
	}

	// Remove node from the shared registry.
	err := ps.registry.UnregisterPeer(c.Name)

	// Delete from stats
	delete(ps.followersStats.Followers, c.Name)

	if err != nil {
		log.Debugf("Error while unregistering: %s (%v)", c.Name, err)
		return []byte{0}, err
	}

	// Remove peer in raft
	err = context.Server().RemovePeer(c.Name)
	if err != nil {
		log.Debugf("Unable to remove peer: %s (%v)", c.Name, err)
		return []byte{0}, err
	}

	if c.Name == context.Server().Name() {
		// the removed node is this node

		// if the node is not replaying the previous logs
		// and the node has sent out a join request in this
		// start. It is sure that this node received a new remove
		// command and need to be removed
		if context.CommitIndex() > ps.joinIndex && ps.joinIndex != 0 {
			log.Debugf("server [%s] is removed", context.Server().Name())
			os.Exit(0)
		} else {
			// else ignore remove
			log.Debugf("ignore previous remove command.")
		}
	}

	b := make([]byte, 8)
	binary.PutUvarint(b, context.CommitIndex())

	return b, err
}
Beispiel #22
0
// Apply removes the given machine from the cluster.
func (c *RemoveCommandV2) Apply(context raft.Context) (interface{}, error) {
	ps, _ := context.Server().Context().(*PeerServer)
	ret, _ := json.Marshal(removeMessageV2{CommitIndex: context.CommitIndex()})

	// If this is a proxy then remove it and exit.
	if ps.registry.ProxyExists(c.Name) {
		if err := ps.registry.UnregisterProxy(c.Name); err != nil {
			return nil, err
		}
		return ret, nil
	}

	// Remove node from the shared registry.
	err := ps.registry.UnregisterPeer(c.Name)

	// Delete from stats
	delete(ps.followersStats.Followers, c.Name)

	if err != nil {
		log.Debugf("Error while unregistering: %s (%v)", c.Name, err)
		return nil, err
	}

	// Remove peer in raft
	if err := context.Server().RemovePeer(c.Name); err != nil {
		log.Debugf("Unable to remove peer: %s (%v)", c.Name, err)
		return nil, err
	}

	if c.Name == context.Server().Name() {
		// the removed node is this node

		// if the node is not replaying the previous logs
		// and the node has sent out a join request in this
		// start. It is sure that this node received a new remove
		// command and need to be removed
		if context.CommitIndex() > ps.joinIndex && ps.joinIndex != 0 {
			log.Debugf("server [%s] is removed", context.Server().Name())
			os.Exit(0)
		} else {
			// else ignore remove
			log.Debugf("ignore previous remove command.")
		}
	}
	return ret, nil
}
Beispiel #23
0
func (s *StandbyServer) join(peer string) error {
	for _, url := range s.ClusterURLs() {
		if s.Config.PeerURL == url {
			s.joinIndex = 0
			return nil
		}
	}

	// Our version must match the leaders version
	version, err := s.client.GetVersion(peer)
	if err != nil {
		log.Debugf("error getting peer version")
		return err
	}
	if version < store.MinVersion() || version > store.MaxVersion() {
		log.Debugf("fail passing version compatibility(%d-%d) using %d", store.MinVersion(), store.MaxVersion(), version)
		return fmt.Errorf("incompatible version")
	}

	// Fetch cluster config to see whether exists some place.
	clusterConfig, err := s.client.GetClusterConfig(peer)
	if err != nil {
		log.Debugf("error getting cluster config")
		return err
	}
	if clusterConfig.ActiveSize <= len(s.Cluster) {
		log.Debugf("stop joining because the cluster is full with %d nodes", len(s.Cluster))
		return fmt.Errorf("out of quota")
	}

	commitIndex, err := s.client.AddMachine(peer,
		&JoinCommand{
			MinVersion: store.MinVersion(),
			MaxVersion: store.MaxVersion(),
			Name:       s.Config.Name,
			RaftURL:    s.Config.PeerURL,
			EtcdURL:    s.Config.ClientURL,
		})
	if err != nil {
		log.Debugf("error on join request")
		return err
	}
	s.joinIndex = commitIndex

	return nil
}
Beispiel #24
0
func (s *PeerServer) doCommand(cmd raft.Command) {
	for {
		if _, err := s.raftServer.Do(cmd); err == nil {
			break
		}
	}
	log.Debugf("%s start as a leader", s.Config.Name)
}
Beispiel #25
0
// Try all possible ways to find clusters to join
// Include -discovery, -peers and log data in -data-dir
//
// Peer discovery follows this order:
// 1. -discovery
// 2. -peers
// 3. previous peers in -data-dir
func (s *PeerServer) findCluster(discoverURL string, peers []string) {
	// Attempt cluster discovery
	toDiscover := discoverURL != ""
	if toDiscover {
		discoverPeers, discoverErr := s.handleDiscovery(discoverURL)
		// It is registered in discover url
		if discoverErr == nil {
			// start as a leader in a new cluster
			if len(discoverPeers) == 0 {
				log.Debug("This peer is starting a brand new cluster based on discover URL.")
				s.startAsLeader()
			} else {
				s.startAsFollower(discoverPeers)
			}
			return
		}
	}

	hasPeerList := len(peers) > 0
	// if there is log in data dir, append previous peers to peers in config
	// to find cluster
	prevPeers := s.registry.PeerURLs(s.raftServer.Leader(), s.Config.Name)
	for i := 0; i < len(prevPeers); i++ {
		u, err := url.Parse(prevPeers[i])
		if err != nil {
			log.Debug("rejoin cannot parse url: ", err)
		}
		prevPeers[i] = u.Host
	}
	peers = append(peers, prevPeers...)

	// if there is backup peer lists, use it to find cluster
	if len(peers) > 0 {
		ok := s.joinCluster(peers)
		if !ok {
			log.Warn("No living peers are found!")
		} else {
			log.Debugf("%s restart as a follower based on peers[%v]", s.Config.Name)
			return
		}
	}

	if !s.raftServer.IsLogEmpty() {
		log.Debug("Entire cluster is down! %v will restart the cluster.", s.Config.Name)
		return
	}

	if toDiscover {
		log.Fatalf("Discovery failed, no available peers in backup list, and no log data")
	}

	if hasPeerList {
		log.Fatalf("No available peers in backup list, and no log data")
	}

	log.Infof("This peer is starting a brand new cluster now.")
	s.startAsLeader()
}
Beispiel #26
0
func (r *Registry) unregister(key, name string) error {
	r.Lock()
	defer r.Unlock()

	// Remove the key from the store.
	_, err := r.store.Delete(path.Join(key, name), false, false)
	log.Debugf("Unregister: %s", name)
	return err
}
Beispiel #27
0
func (r *Registry) register(key, name string, peerURL string, machURL string) error {
	// Write data to store.
	v := url.Values{}
	v.Set("raft", peerURL)
	v.Set("etcd", machURL)
	_, err := r.store.Create(path.Join(key, name), false, v.Encode(), false, store.Permanent)
	log.Debugf("Register: %s", name)
	return err
}
Beispiel #28
0
// Checks whether a given version is supported.
func (ps *PeerServer) VersionCheckHttpHandler(w http.ResponseWriter, req *http.Request) {
	log.Debugf("[recv] Get %s%s ", ps.Config.URL, req.URL.Path)
	vars := mux.Vars(req)
	version, _ := strconv.Atoi(vars["version"])
	if version >= store.MinVersion() && version <= store.MaxVersion() {
		w.WriteHeader(http.StatusOK)
	} else {
		w.WriteHeader(http.StatusForbidden)
	}
}
Beispiel #29
0
func (s *PeerServer) startAsLeader() {
	// leader need to join self as a peer
	for {
		_, err := s.raftServer.Do(NewJoinCommand(store.MinVersion(), store.MaxVersion(), s.raftServer.Name(), s.Config.URL, s.server.URL()))
		if err == nil {
			break
		}
	}
	log.Debugf("%s start as a leader", s.Config.Name)
}
Beispiel #30
0
// Adds a node to the registry.
func (r *Registry) Register(name string, peerURL string, url string) error {
	r.Lock()
	defer r.Unlock()

	// Write data to store.
	key := path.Join(RegistryKey, name)
	value := fmt.Sprintf("raft=%s&etcd=%s", peerURL, url)
	_, err := r.store.Create(key, false, value, false, store.Permanent)
	log.Debugf("Register: %s", name)
	return err
}