// RemoveMemberByName removes a member whose name matches the given. // // Params: // - client(client.Client): An etcd client // - name (string): The name to remove // Returns: // true if the member was found, false otherwise. func RemoveMemberByName(c cookoo.Context, p *cookoo.Params) (interface{}, cookoo.Interrupt) { cli := p.Get("client", nil).(client.Client) name := p.Get("name", "____").(string) mem := client.NewMembersAPI(cli) members, err := mem.List(dctx()) if err != nil { log.Errf(c, "Could not get a list of members: %s", err) return false, err } remIDs := []string{} for _, member := range members { if member.Name == name { log.Infof(c, "Removing member %s (ID: %s)", name, member.ID) // If this is synchronizable, we should do it in parallel. if err := mem.Remove(dctx(), member.ID); err != nil { log.Errf(c, "Failed to remove member: %s", err) return len(remIDs) > 0, err } remIDs = append(remIDs, member.ID) } } return len(remIDs) > 0, nil }
func mustNewMembersAPI(c *cli.Context) client.MembersAPI { eps, err := getEndpoints(c) if err != nil { fmt.Fprintln(os.Stderr, err.Error()) os.Exit(1) } tr, err := getTransport(c) if err != nil { fmt.Fprintln(os.Stderr, err.Error()) os.Exit(1) } hc, err := client.NewHTTPClient(tr, eps) if err != nil { fmt.Fprintln(os.Stderr, err.Error()) os.Exit(1) } if !c.GlobalBool("no-sync") { ctx, cancel := context.WithTimeout(context.Background(), client.DefaultRequestTimeout) err := hc.Sync(ctx) cancel() if err != nil { fmt.Fprintln(os.Stderr, err.Error()) os.Exit(1) } } if c.GlobalBool("debug") { fmt.Fprintf(os.Stderr, "Cluster-Endpoints: %s\n", strings.Join(hc.Endpoints(), ", ")) } return client.NewMembersAPI(hc) }
// Ensure etcd will not panic when removing a just started member. func TestIssue2904(t *testing.T) { defer testutil.AfterTest(t) // start 1-member cluster to ensure member 0 is the leader of the cluster. c := NewCluster(t, 1) c.Launch(t) defer c.Terminate(t) c.AddMember(t) c.Members[1].Stop(t) // send remove member-1 request to the cluster. cc := MustNewHTTPClient(t, c.URLs(), nil) ma := client.NewMembersAPI(cc) ctx, cancel := context.WithTimeout(context.Background(), requestTimeout) // the proposal is not committed because member 1 is stopped, but the // proposal is appended to leader's raft log. ma.Remove(ctx, c.Members[1].s.ID().String()) cancel() // restart member, and expect it to send UpdateAttributes request. // the log in the leader is like this: // [..., remove 1, ..., update attr 1, ...] c.Members[1].Restart(t) // when the member comes back, it ack the proposal to remove itself, // and apply it. <-c.Members[1].s.StopNotify() // terminate removed member c.Members[1].Terminate(t) c.Members = c.Members[:1] // wait member to be removed. c.waitMembersMatch(t, c.HTTPMembers()) }
func (c *cluster) RemoveMember(t *testing.T, id uint64) { // send remove request to the cluster cc := mustNewHTTPClient(t, c.URLs(), c.cfg.ClientTLS) ma := client.NewMembersAPI(cc) ctx, cancel := context.WithTimeout(context.Background(), requestTimeout) if err := ma.Remove(ctx, types.ID(id).String()); err != nil { t.Fatalf("unexpected remove error %v", err) } cancel() newMembers := make([]*member, 0) for _, m := range c.Members { if uint64(m.s.ID()) != id { newMembers = append(newMembers, m) } else { select { case <-m.s.StopNotify(): m.Terminate(t) // 1s stop delay + election timeout + 1s disk and network delay + connection write timeout // TODO: remove connection write timeout by selecting on http response closeNotifier // blocking on https://github.com/golang/go/issues/9524 case <-time.After(time.Second + time.Duration(electionTicks)*tickDuration + time.Second + rafthttp.ConnWriteTimeout): t.Fatalf("failed to remove member %s in time", m.s.ID()) } } } c.Members = newMembers c.waitMembersMatch(t, c.HTTPMembers()) }
func (c *cluster) RemoveMember(t *testing.T, id uint64) { // send remove request to the cluster cc := mustNewHTTPClient(t, []string{c.URL(0)}) ma := client.NewMembersAPI(cc) ctx, cancel := context.WithTimeout(context.Background(), requestTimeout) if err := ma.Remove(ctx, types.ID(id).String()); err != nil { t.Fatalf("unexpected remove error %v", err) } cancel() newMembers := make([]*member, 0) for _, m := range c.Members { if uint64(m.s.ID()) != id { newMembers = append(newMembers, m) } else { select { case <-m.s.StopNotify(): m.Terminate(t) // stop delay / election timeout + 1s disk and network delay case <-time.After(time.Duration(electionTicks)*tickDuration + time.Second): t.Fatalf("failed to remove member %s in time", m.s.ID()) } } } c.Members = newMembers c.waitMembersMatch(t, c.HTTPMembers()) }
func (c *cluster) addMember(t *testing.T) { m := c.mustNewMember(t) scheme := "http" if c.cfg.PeerTLS != nil { scheme = "https" } // send add request to the cluster cc := mustNewHTTPClient(t, []string{c.URL(0)}, c.cfg.ClientTLS) ma := client.NewMembersAPI(cc) ctx, cancel := context.WithTimeout(context.Background(), requestTimeout) peerURL := scheme + "://" + m.PeerListeners[0].Addr().String() if _, err := ma.Add(ctx, peerURL); err != nil { t.Fatalf("add member on %s error: %v", c.URL(0), err) } cancel() // wait for the add node entry applied in the cluster members := append(c.HTTPMembers(), client.Member{PeerURLs: []string{peerURL}, ClientURLs: []string{}}) c.waitMembersMatch(t, members) m.InitialPeerURLsMap = types.URLsMap{} for _, mm := range c.Members { m.InitialPeerURLsMap[mm.Name] = mm.PeerURLs } m.InitialPeerURLsMap[m.Name] = m.PeerURLs m.NewCluster = false if err := m.Launch(); err != nil { t.Fatal(err) } c.Members = append(c.Members, m) // wait cluster to be stable to receive future client requests c.waitMembersMatch(t, c.HTTPMembers()) }
func (c *cluster) AddMember(t *testing.T) { clusterStr := c.Members[0].Cluster.String() idx := len(c.Members) m := mustNewMember(t, c.name(idx)) // send add request to the cluster cc := mustNewHTTPClient(t, []string{c.URL(0)}) ma := client.NewMembersAPI(cc) ctx, cancel := context.WithTimeout(context.Background(), requestTimeout) peerURL := "http://" + m.PeerListeners[0].Addr().String() if _, err := ma.Add(ctx, peerURL); err != nil { t.Fatalf("add member on %s error: %v", c.URL(0), err) } cancel() // wait for the add node entry applied in the cluster members := append(c.HTTPMembers(), httptypes.Member{PeerURLs: []string{peerURL}, ClientURLs: []string{}}) c.waitMembersMatch(t, members) for _, ln := range m.PeerListeners { clusterStr += fmt.Sprintf(",%s=http://%s", m.Name, ln.Addr().String()) } var err error m.Cluster, err = etcdserver.NewClusterFromString(clusterName, clusterStr) if err != nil { t.Fatal(err) } m.NewCluster = false if err := m.Launch(); err != nil { t.Fatal(err) } c.Members = append(c.Members, m) // wait cluster to be stable to receive future client requests c.waitMembersMatch(t, c.HTTPMembers()) }
// Creates a new storage interface from the client // TODO: deprecate in favor of storage.Config abstraction over time func NewEtcdStorage(client etcd.Client, codec runtime.Codec, prefix string, quorum bool) storage.Interface { return &etcdHelper{ etcdMembersAPI: etcd.NewMembersAPI(client), etcdKeysAPI: etcd.NewKeysAPI(client), codec: codec, versioner: APIObjectVersioner{}, copier: api.Scheme, pathPrefix: path.Join("/", prefix), quorum: quorum, cache: util.NewCache(maxEtcdCacheEntries), } }
func newClient(url string, transport *http.Transport) (*Client, error) { cfg := client.Config{ Transport: transport, Endpoints: []string{url}, } c, err := client.New(cfg) if err != nil { return nil, err } return &Client{c, client.NewMembersAPI(c)}, nil }
// newEtcdClient create a new etcd client wrapper func newEtcdClient(endpoints []string) (*etcdClient, error) { glog.V(3).Infof("creating a new etcd client, endpoints: %s", strings.Join(endpoints, ",")) // step: create a client for etcd c, err := etcd.New(etcd.Config{Endpoints: endpoints}) if err != nil { return nil, err } return &etcdClient{ c: c, client: etcd.NewMembersAPI(c), }, nil }
func (c *cluster) addMemberByURL(t *testing.T, clientURL, peerURL string) error { cc := mustNewHTTPClient(t, []string{clientURL}, c.cfg.ClientTLS) ma := client.NewMembersAPI(cc) ctx, cancel := context.WithTimeout(context.Background(), requestTimeout) if _, err := ma.Add(ctx, peerURL); err != nil { return err } cancel() // wait for the add node entry applied in the cluster members := append(c.HTTPMembers(), client.Member{PeerURLs: []string{peerURL}, ClientURLs: []string{}}) c.waitMembersMatch(t, members) return nil }
// waitForEtcd wait until etcd is propagated correctly func (m *EtcdTestServer) waitUntilUp() error { membersAPI := etcd.NewMembersAPI(m.Client) for start := time.Now(); time.Since(start) < wait.ForeverTestTimeout; time.Sleep(10 * time.Millisecond) { members, err := membersAPI.List(context.TODO()) if err != nil { glog.Errorf("Error when getting etcd cluster members") continue } if len(members) == 1 && len(members[0].ClientURLs) > 0 { return nil } } return fmt.Errorf("timeout on waiting for etcd cluster") }
// Implements storage.Interface. func (h *etcdHelper) Backends(ctx context.Context) []string { if ctx == nil { glog.Errorf("Context is nil") } membersAPI := etcd.NewMembersAPI(h.etcdclient) members, err := membersAPI.List(ctx) if err != nil { glog.Errorf("Error obtaining etcd members list: %q", err) return nil } mlist := []string{} for _, member := range members { mlist = append(mlist, member.ClientURLs...) } return mlist }
func (c *cluster) waitMembersMatch(t *testing.T, membs []client.Member) { for _, u := range c.URLs() { cc := mustNewHTTPClient(t, []string{u}, c.cfg.ClientTLS) ma := client.NewMembersAPI(cc) for { ctx, cancel := context.WithTimeout(context.Background(), requestTimeout) ms, err := ma.List(ctx) cancel() if err == nil && isMembersEqual(ms, membs) { break } time.Sleep(tickDuration) } } return }
func (c *EtcdCluster) waitMembersMatch(membs []client.Member) { time.Sleep(200 * time.Millisecond) for _, u := range c.URLs() { cc := mustNewHTTPClient([]string{u}) ma := client.NewMembersAPI(cc) for { ctx, cancel := context.WithTimeout(context.Background(), requestTimeout) ms, err := ma.List(ctx) cancel() if err == nil && isMembersEqual(ms, membs) { break } time.Sleep(tickDuration) } } return }
// AddMember Add a new member to the cluster. // // Conceptually, this is equivalent to `etcdctl member add NAME IP`. // // Params: // - client(client.Client): An etcd client // - name (string): The name of the member to add. // - url (string): The peer ip:port or domain: port to use. // // Returns: // An etcd *client.Member. func AddMember(c cookoo.Context, p *cookoo.Params) (interface{}, cookoo.Interrupt) { cli := p.Get("client", nil).(client.Client) name := p.Get("name", "default").(string) addr := p.Get("url", "127.0.0.1:2380").(string) mem := client.NewMembersAPI(cli) member, err := mem.Add(dctx(), addr) if err != nil { log.Errf(c, "Failed to add %s to cluster: %s", addr, err) return nil, err } log.Infof(c, "Added %s (%s) to cluster", addr, member.ID) member.Name = name return member, nil }
// EtcdMembers returns a string suitable for `-initial-cluster` // This is the etcd the Blacksmith instance is using as its datastore func (ds *EtcdDataSource) EtcdMembers() (string, error) { membersAPI := etcd.NewMembersAPI(ds.client) ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) defer cancel() members, err := membersAPI.List(ctx) if err != nil { return "", fmt.Errorf("Error while checking etcd members: %s", err) } var peers []string for _, member := range members { for _, peer := range member.PeerURLs { peers = append(peers, fmt.Sprintf("%s=%s", member.Name, peer)) } } return strings.Join(peers, ","), err }
func main() { pt := &http.Transport{ // timeouts taken from http.DefaultTransport Dial: (&net.Dialer{ Timeout: 30 * time.Second, KeepAlive: 30 * time.Second, }).Dial, TLSHandshakeTimeout: 10 * time.Second, } ma, _ := client.NewMembersAPI(pt, "http://localhost:7001", 15*time.Second) // TODO(philips): persist to disk memURLs := []string{"http://localhost:4001"} uf := func() []string { mems, err := ma.List() if err != nil { log.Print("unable to list members.") return memURLs } for _, k := range mems { memURLs = append(memURLs, k.ClientURLs...) } return memURLs } ph := proxy.NewHandler(pt, uf) ph = http.HandlerFunc(wo(ph)) ph = http.HandlerFunc(ro(ph)) l, err := net.Listen("tcp", *addr) if err != nil { log.Fatal(err) } log.Print("etcd: proxy listening for client requests on ", *addr) log.Fatal(http.Serve(l, ph)) }
// GetInitialCluster gets the initial cluster members. // // When adding a new node to a cluster, Etcd requires that you pass it // a list of initial members, in the form "MEMBERNAME=URL". This command // generates that list and puts it into the environment variable // ETCD_INITIAL_CLUSTER // // Params: // client (client.Client): An etcd client. // Returns: // string representation of the list, also put into the enviornment. func GetInitialCluster(c cookoo.Context, p *cookoo.Params) (interface{}, cookoo.Interrupt) { cli := p.Get("client", nil).(client.Client) mem := client.NewMembersAPI(cli) members, err := mem.List(dctx()) if err != nil { return "", err } b := []string{} for _, member := range members { for _, purl := range member.PeerURLs { if member.Name == "" { member.Name = os.Getenv("HOSTNAME") } b = append(b, member.Name+"="+purl) } } ic := strings.Join(b, ",") log.Infof(c, "ETCD_INITIAL_CLUSTER=%s", ic) os.Setenv("ETCD_INITIAL_CLUSTER", ic) return ic, nil }
func mustNewMembersAPI(c *cli.Context) client.MembersAPI { return client.NewMembersAPI(mustNewClient(c)) }
// RemoveStaleMembers deletes cluster members whose pods are no longer running. // // This queries Kubernetes to determine what etcd pods are running, and then // compares that to the member list in the etcd cluster. It removes any // cluster members who are no longer in the pod list. // // The purpose of this is to keep the cluster membership from deadlocking // when inactive members prevent consensus building. // // Params: // - client (etcd/client.Client): The etcd client // - label (string): The pod label indicating an etcd node // - namespace (string): The namespace we're operating in func RemoveStaleMembers(c cookoo.Context, p *cookoo.Params) (interface{}, cookoo.Interrupt) { eclient := p.Get("client", nil).(client.Client) label := p.Get("label", "name=deis-etcd-1").(string) ns := p.Get("namespace", "default").(string) // Should probably pass in the client from the context. klient, err := k8s.PodClient() if err != nil { log.Errf(c, "Could not create a Kubernetes client: %s", err) return nil, err } mapi := client.NewMembersAPI(eclient) members := map[string]bool{} idmap := map[string]string{} // Get members from etcd mm, err := mapi.List(dctx()) if err != nil { log.Warnf(c, "Could not get a list of etcd members: %s", err) return nil, err } for _, member := range mm { members[member.Name] = false idmap[member.Name] = member.ID } // Get the pods running with the given label labelSelector, err := labels.Parse(label) if err != nil { log.Errf(c, "Selector failed to parse: %s", err) return nil, err } pods, err := klient.Pods(ns).List(api.ListOptions{LabelSelector: labelSelector}) if err != nil { return nil, err } for _, item := range pods.Items { if _, ok := members[item.Name]; !ok { log.Infof(c, "Etcd pod %s is not in cluster yet.", item.Name) } else { members[item.Name] = true } } // Anything marked false in members should be removed from etcd. deleted := 0 for k, v := range members { if !v { log.Infof(c, "Deleting %s (%s) from etcd cluster members", k, idmap[k]) if err := mapi.Remove(dctx(), idmap[k]); err != nil { log.Errf(c, "Failed to remove %s from cluster. Skipping. %s", k, err) } else { deleted++ } } } return deleted, nil }
func handleClusterHealth(c *cli.Context) { forever := c.Bool("forever") if forever { sigch := make(chan os.Signal, 1) signal.Notify(sigch, os.Interrupt) go func() { <-sigch os.Exit(0) }() } tr, err := getTransport(c) if err != nil { handleError(ExitServerError, err) } hc := http.Client{ Transport: tr, } cln := mustNewClientNoSync(c) mi := client.NewMembersAPI(cln) ms, err := mi.List(context.TODO()) if err != nil { fmt.Println("cluster may be unhealthy: failed to list members") handleError(ExitServerError, err) } for { health := false for _, m := range ms { if len(m.ClientURLs) == 0 { fmt.Printf("member %s is unreachable: no available published client urls\n", m.ID) continue } checked := false for _, url := range m.ClientURLs { resp, err := hc.Get(url + "/health") if err != nil { fmt.Printf("failed to check the health of member %s on %s: %v\n", m.ID, url, err) continue } result := struct{ Health string }{} nresult := struct{ Health bool }{} bytes, err := ioutil.ReadAll(resp.Body) if err != nil { fmt.Printf("failed to check the health of member %s on %s: %v\n", m.ID, url, err) continue } resp.Body.Close() err = json.Unmarshal(bytes, &result) if err != nil { err = json.Unmarshal(bytes, &nresult) } if err != nil { fmt.Printf("failed to check the health of member %s on %s: %v\n", m.ID, url, err) continue } checked = true if result.Health == "true" || nresult.Health == true { health = true fmt.Printf("member %s is healthy: got healthy result from %s\n", m.ID, url) } else { fmt.Printf("member %s is unhealthy: got unhealthy result from %s\n", m.ID, url) } break } if !checked { fmt.Printf("member %s is unreachable: %v are all unreachable\n", m.ID, m.ClientURLs) } } if health { fmt.Println("cluster is healthy") } else { fmt.Println("cluster is unhealthy") } if !forever { if health { os.Exit(ExitSuccess) } else { os.Exit(ExitClusterNotHealthy) } } fmt.Printf("\nnext check after 10 second...\n\n") time.Sleep(10 * time.Second) } }
// Check if we can talk to etcd func (es *etcdStore) Ping() error { _, err := etcd.NewMembersAPI(es.EtcdClient()).List(es.ctx) return err }