func NewClusterChecker(uid string, cfg config) (*ClusterChecker, error) { storePath := filepath.Join(common.StoreBasePath, cfg.clusterName) kvstore, err := store.NewStore(store.Config{ Backend: store.Backend(cfg.storeBackend), Endpoints: cfg.storeEndpoints, CertFile: cfg.storeCertFile, KeyFile: cfg.storeKeyFile, CAFile: cfg.storeCAFile, SkipTLSVerify: cfg.storeSkipTlsVerify, }) if err != nil { return nil, fmt.Errorf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) return &ClusterChecker{ uid: uid, listenAddress: cfg.listenAddress, port: cfg.port, stopListening: cfg.stopListening, e: e, endPollonProxyCh: make(chan error), }, nil }
func spec(cmd *cobra.Command, args []string) { storePath := filepath.Join(common.StoreBasePath, cfg.clusterName) kvstore, err := store.NewStore(store.Backend(cfg.storeBackend), cfg.storeEndpoints) if err != nil { die("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) cd, _, err := getClusterData(e) if err != nil { die("%v", err) } if cd.Cluster == nil { die("no cluster spec available") } if cd.Cluster.Spec == nil { die("no cluster spec available") } specj, err := json.MarshalIndent(cd.Cluster.Spec, "", "\t") if err != nil { die("failed to marshall spec: %v", err) } stdout("%s", specj) }
func configReplace(cmd *cobra.Command, args []string) { if crOpts.file == "" { die("no config file provided (--file/-f option)") } config := []byte{} var err error if crOpts.file == "-" { config, err = ioutil.ReadAll(os.Stdin) if err != nil { die("cannot read config file from stdin: %v", err) } } else { config, err = ioutil.ReadFile(crOpts.file) if err != nil { die("cannot read provided config file: %v", err) } } storePath := filepath.Join(common.StoreBasePath, cfg.clusterName) kvstore, err := store.NewStore(store.Backend(cfg.storeBackend), cfg.storeEndpoints) if err != nil { die("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) if err = replaceConfig(e, config); err != nil { die("error: %v", err) } }
func configGet(cmd *cobra.Command, args []string) { storePath := filepath.Join(common.StoreBasePath, cfg.clusterName) kvstore, err := store.NewStore(store.Backend(cfg.storeBackend), cfg.storeEndpoints) if err != nil { die("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) cfg, err := getConfig(e) if err != nil { die("error: %v", err) } if cfg == nil { stdout("config is not defined") os.Exit(0) } cfgj, err := json.MarshalIndent(cfg, "", "\t") if err != nil { die("failed to marshall config: %v", err) } stdout(string(cfgj)) }
func NewTestEtcd(t *testing.T, dir string, a ...string) (*TestStore, error) { u := uuid.NewV4() id := fmt.Sprintf("%x", u[:4]) dataDir := filepath.Join(dir, "etcd") // Hack to find a free tcp port ln, err := net.Listen("tcp", "localhost:0") if err != nil { return nil, err } defer ln.Close() ln2, err := net.Listen("tcp", "localhost:0") if err != nil { return nil, err } defer ln2.Close() listenAddress := ln.Addr().(*net.TCPAddr).IP.String() port := strconv.Itoa(ln.Addr().(*net.TCPAddr).Port) listenAddress2 := ln2.Addr().(*net.TCPAddr).IP.String() port2 := strconv.Itoa(ln2.Addr().(*net.TCPAddr).Port) args := []string{} args = append(args, fmt.Sprintf("--name=%s", id)) args = append(args, fmt.Sprintf("--data-dir=%s", dataDir)) args = append(args, fmt.Sprintf("--listen-client-urls=http://%s:%s", listenAddress, port)) args = append(args, fmt.Sprintf("--advertise-client-urls=http://%s:%s", listenAddress, port)) args = append(args, fmt.Sprintf("--listen-peer-urls=http://%s:%s", listenAddress2, port2)) args = append(args, fmt.Sprintf("--initial-advertise-peer-urls=http://%s:%s", listenAddress2, port2)) args = append(args, fmt.Sprintf("--initial-cluster=%s=http://%s:%s", id, listenAddress2, port2)) args = append(args, a...) storeEndpoints := fmt.Sprintf("%s:%s", listenAddress, port) kvstore, err := store.NewStore(store.ETCD, storeEndpoints) if err != nil { return nil, fmt.Errorf("cannot create store: %v", err) } bin := os.Getenv("ETCD_BIN") if bin == "" { return nil, fmt.Errorf("missing ETCD_BIN env") } te := &TestStore{ t: t, Process: Process{ t: t, id: id, name: "etcd", bin: bin, args: args, }, listenAddress: listenAddress, port: port, store: kvstore, storeBackend: store.ETCD, } return te, nil }
func TestInitialClusterConfig(t *testing.T) { dir, err := ioutil.TempDir("", "") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) tstore, err := NewTestStore(dir) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tstore.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tstore.WaitUp(10 * time.Second); err != nil { t.Fatalf("error waiting on store up: %v", err) } defer tstore.Stop() clusterName := uuid.NewV4().String() storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) storePath := filepath.Join(common.StoreBasePath, clusterName) kvstore, err := store.NewStore(tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) tmpFile, err := ioutil.TempFile(dir, "initial-cluster-config.json") if err != nil { t.Fatalf("unexpected err: %v", err) } defer tmpFile.Close() tmpFile.WriteString(`{ "synchronous_replication": true }`) ts, err := NewTestSentinel(dir, clusterName, tstore.storeBackend, storeEndpoints, fmt.Sprintf("--initial-cluster-config=%s", tmpFile.Name())) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := ts.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } defer ts.Stop() if err := WaitClusterInitialized(e, 30*time.Second); err != nil { t.Fatal("expected cluster initialized") } cv, _, err := e.GetClusterView() if err != nil { t.Fatalf("unexpected err: %v", err) } if !*cv.Config.SynchronousReplication { t.Fatal("expected cluster config with InitWithMultipleKeepers enabled") } }
func TestInitialClusterSpec(t *testing.T) { t.Parallel() dir, err := ioutil.TempDir("", "") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) tstore := setupStore(t, dir) defer tstore.Stop() clusterName := uuid.NewV4().String() storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) storePath := filepath.Join(common.StoreBasePath, clusterName) kvstore, err := store.NewStore(tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) initialClusterSpec := &cluster.ClusterSpec{ InitMode: cluster.ClusterInitModeNew, SleepInterval: cluster.Duration{Duration: 2 * time.Second}, FailInterval: cluster.Duration{Duration: 5 * time.Second}, ConvergenceTimeout: cluster.Duration{Duration: 30 * time.Second}, SynchronousReplication: true, } initialClusterSpecFile, err := writeClusterSpec(dir, initialClusterSpec) if err != nil { t.Fatalf("unexpected err: %v", err) } ts, err := NewTestSentinel(t, dir, clusterName, tstore.storeBackend, storeEndpoints, fmt.Sprintf("--initial-cluster-spec=%s", initialClusterSpecFile)) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := ts.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } defer ts.Stop() if err := WaitClusterPhase(e, cluster.ClusterPhaseInitializing, 60*time.Second); err != nil { t.Fatal("expected cluster in initializing phase") } cd, _, err := e.GetClusterData() if err != nil { t.Fatalf("unexpected err: %v", err) } if !cd.Cluster.Spec.SynchronousReplication { t.Fatal("expected cluster spec with SynchronousReplication enabled") } }
func NewTestEtcd(t *testing.T, dir string, a ...string) (*TestStore, error) { u := uuid.NewV4() uid := fmt.Sprintf("%x", u[:4]) dataDir := filepath.Join(dir, "etcd") listenAddress, port, err := getFreePort(true, false) if err != nil { return nil, err } listenAddress2, port2, err := getFreePort(true, false) if err != nil { return nil, err } args := []string{} args = append(args, fmt.Sprintf("--name=%s", uid)) args = append(args, fmt.Sprintf("--data-dir=%s", dataDir)) args = append(args, fmt.Sprintf("--listen-client-urls=http://%s:%s", listenAddress, port)) args = append(args, fmt.Sprintf("--advertise-client-urls=http://%s:%s", listenAddress, port)) args = append(args, fmt.Sprintf("--listen-peer-urls=http://%s:%s", listenAddress2, port2)) args = append(args, fmt.Sprintf("--initial-advertise-peer-urls=http://%s:%s", listenAddress2, port2)) args = append(args, fmt.Sprintf("--initial-cluster=%s=http://%s:%s", uid, listenAddress2, port2)) args = append(args, a...) storeEndpoints := fmt.Sprintf("%s:%s", listenAddress, port) storeConfig := store.Config{ Backend: store.ETCD, Endpoints: storeEndpoints, } kvstore, err := store.NewStore(storeConfig) if err != nil { return nil, fmt.Errorf("cannot create store: %v", err) } bin := os.Getenv("ETCD_BIN") if bin == "" { return nil, fmt.Errorf("missing ETCD_BIN env") } tstore := &TestStore{ t: t, Process: Process{ t: t, uid: uid, name: "etcd", bin: bin, args: args, }, listenAddress: listenAddress, port: port, store: kvstore, storeBackend: store.ETCD, } return tstore, nil }
// Tests standby elected as new master but fails to become master. Then old // master comes back and is re-elected as master. func testFailoverFailed(t *testing.T, syncRepl bool) { dir, err := ioutil.TempDir("", "stolon") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) clusterName := uuid.NewV4().String() tks, tss, tstore := setupServers(t, clusterName, dir, 2, 1, syncRepl, false) defer shutdown(tks, tss, tstore) storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) storePath := filepath.Join(common.StoreBasePath, clusterName) kvstore, err := store.NewStore(tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) master, standbys, err := getRoles(t, tks) if err != nil { t.Fatalf("unexpected err: %v", err) } standby := standbys[0] if err := populate(t, master); err != nil { t.Fatalf("unexpected err: %v", err) } if err := write(t, master, 1, 1); err != nil { t.Fatalf("unexpected err: %v", err) } // Stop the keeper process on master, should also stop the database t.Logf("Stopping current master keeper: %s", master.id) master.Stop() // Wait for cluster data containing standy as master if err := WaitClusterDataMaster(standby.id, e, 30*time.Second); err != nil { t.Fatalf("expected master %q in cluster view", standby.id) } // Stopping standby before reading the new cluster data and promoting t.Logf("Stopping current stanby keeper: %s", master.id) standby.Stop() t.Logf("Starting previous master keeper: %s", master.id) master.Start() // Wait for cluster data containing previous master as master if err := WaitClusterDataMaster(master.id, e, 30*time.Second); err != nil { t.Fatalf("expected master %q in cluster view", master.id) } }
func NewSentinel(uid string, cfg *config, stop chan bool, end chan bool) (*Sentinel, error) { var initialClusterSpec *cluster.ClusterSpec if cfg.initialClusterSpecFile != "" { configData, err := ioutil.ReadFile(cfg.initialClusterSpecFile) if err != nil { return nil, fmt.Errorf("cannot read provided initial cluster config file: %v", err) } if err := json.Unmarshal(configData, &initialClusterSpec); err != nil { return nil, fmt.Errorf("cannot parse provided initial cluster config: %v", err) } log.Debug("initialClusterSpec dump", zap.String("initialClusterSpec", spew.Sdump(initialClusterSpec))) if err := initialClusterSpec.Validate(); err != nil { return nil, fmt.Errorf("invalid initial cluster: %v", err) } } storePath := filepath.Join(common.StoreBasePath, cfg.clusterName) kvstore, err := store.NewStore(store.Config{ Backend: store.Backend(cfg.storeBackend), Endpoints: cfg.storeEndpoints, CertFile: cfg.storeCertFile, KeyFile: cfg.storeKeyFile, CAFile: cfg.storeCAFile, SkipTLSVerify: cfg.storeSkipTlsVerify, }) if err != nil { return nil, fmt.Errorf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) candidate := leadership.NewCandidate(kvstore, filepath.Join(storePath, common.SentinelLeaderKey), uid, store.MinTTL) return &Sentinel{ uid: uid, cfg: cfg, e: e, candidate: candidate, leader: false, initialClusterSpec: initialClusterSpec, stop: stop, end: end, UIDFn: common.UID, // This is just to choose a pseudo random keeper so // use math.rand (no need for crypto.rand) without an // initial seed. RandFn: rand.Intn, sleepInterval: cluster.DefaultSleepInterval, requestTimeout: cluster.DefaultRequestTimeout, }, nil }
func NewStore() (*store.StoreManager, error) { storePath := filepath.Join(common.StoreBasePath, cfg.clusterName) kvstore, err := store.NewStore(store.Config{ Backend: store.Backend(cfg.storeBackend), Endpoints: cfg.storeEndpoints, CertFile: cfg.storeCertFile, KeyFile: cfg.storeKeyFile, CAFile: cfg.storeCAFile, SkipTLSVerify: cfg.storeSkipTlsVerify, }) if err != nil { return nil, fmt.Errorf("cannot create store: %v", err) } return store.NewStoreManager(kvstore, storePath), nil }
func NewClusterChecker(id string, cfg config) (*ClusterChecker, error) { storePath := filepath.Join(common.StoreBasePath, cfg.clusterName) kvstore, err := store.NewStore(store.Backend(cfg.storeBackend), cfg.storeEndpoints) if err != nil { return nil, fmt.Errorf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) return &ClusterChecker{ id: id, listenAddress: cfg.listenAddress, port: cfg.port, stopListening: cfg.stopListening, e: e, endPollonProxyCh: make(chan error), }, nil }
func getClusters(storeBasePath string) ([]string, error) { kvstore, err := store.NewStore(store.Backend(cfg.storeBackend), cfg.storeEndpoints) if err != nil { return nil, fmt.Errorf("cannot create store: %v", err) } clusters := []string{} pairs, err := kvstore.List(storeBasePath) if err != nil { if err != libkvstore.ErrKeyNotFound { return nil, err } return clusters, nil } for _, pair := range pairs { clusters = append(clusters, filepath.Base(pair.Key)) } sort.Strings(clusters) return clusters, nil }
func configPatch(cmd *cobra.Command, args []string) { if len(args) > 1 { die("too many arguments") } if cpOpts.file == "" && len(args) < 1 { die("no patch provided as argument and no patch file provided (--file/-f option)") } if cpOpts.file != "" && len(args) == 1 { die("only one of patch provided as argument or patch file must provided (--file/-f option)") } config := []byte{} if len(args) == 1 { config = []byte(args[0]) } else { var err error if cpOpts.file == "-" { config, err = ioutil.ReadAll(os.Stdin) if err != nil { die("cannot read config file from stdin: %v", err) } } else { config, err = ioutil.ReadFile(cpOpts.file) if err != nil { die("cannot read provided config file: %v", err) } } } storePath := filepath.Join(common.StoreBasePath, cfg.clusterName) kvstore, err := store.NewStore(store.Backend(cfg.storeBackend), cfg.storeEndpoints) if err != nil { die("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) if err = patchConfig(e, config); err != nil { die("failed to patch config: %v", err) } }
func NewPostgresKeeper(id string, cfg config, stop chan bool, end chan error) (*PostgresKeeper, error) { storePath := filepath.Join(common.StoreBasePath, cfg.clusterName) kvstore, err := store.NewStore(store.Backend(cfg.storeBackend), cfg.storeEndpoints) if err != nil { return nil, fmt.Errorf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) p := &PostgresKeeper{id: id, dataDir: cfg.dataDir, e: e, listenAddress: cfg.listenAddress, port: cfg.port, pgListenAddress: cfg.pgListenAddress, pgPort: cfg.pgPort, stop: stop, end: end, } return p, nil }
func TestProxyListening(t *testing.T) { dir, err := ioutil.TempDir("", "") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) clusterName := uuid.NewV4().String() tstore, err := NewTestStore(dir) if err != nil { t.Fatalf("unexpected err: %v", err) } storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) tp, err := NewTestProxy(dir, clusterName, tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tp.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } defer tp.Stop() log.Printf("test proxy start with store down. Should not listen") // tp should not listen because it cannot talk with store if err := tp.WaitNotListening(10 * time.Second); err != nil { t.Fatalf("expecting tp not listening due to failed store communication, but it's listening.") } tp.Stop() if err := tstore.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tstore.WaitUp(10 * time.Second); err != nil { t.Fatalf("error waiting on store up: %v", err) } defer func() { if tstore.cmd != nil { tstore.Stop() } }() storePath := filepath.Join(common.StoreBasePath, clusterName) kvstore, err := store.NewStore(tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) pair, err := e.SetClusterData(cluster.KeepersState{}, &cluster.ClusterView{ Version: 1, Config: &cluster.NilConfig{ SleepInterval: &cluster.Duration{5 * time.Second}, KeeperFailInterval: &cluster.Duration{10 * time.Second}, }, ProxyConf: &cluster.ProxyConf{ // fake pg address, not relevant Host: "localhost", Port: "5432", }, }, nil) if err != nil { t.Fatalf("unexpected err: %v", err) } // test proxy start with the store up log.Printf("test proxy start with the store up. Should listen") if err := tp.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } // tp should listen if err := tp.WaitListening(10 * time.Second); err != nil { t.Fatalf("expecting tp listening, but it's not listening.") } log.Printf("test proxy error communicating with store. Should stop listening") // Stop store tstore.Stop() if err := tstore.WaitDown(10 * time.Second); err != nil { t.Fatalf("error waiting on store down: %v", err) } // tp should not listen because it cannot talk with the store if err := tp.WaitNotListening(10 * time.Second); err != nil { t.Fatalf("expecting tp not listening due to failed store communication, but it's listening.") } log.Printf("test proxy communication with store restored. Should start listening") // Start store if err := tstore.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tstore.WaitUp(10 * time.Second); err != nil { t.Fatalf("error waiting on store up: %v", err) } // tp should listen if err := tp.WaitListening(10 * time.Second); err != nil { t.Fatalf("expecting tp listening, but it's not listening.") } log.Printf("test proxyConf removed. Should continue listening") // remove proxyConf pair, err = e.SetClusterData(cluster.KeepersState{}, &cluster.ClusterView{ Version: 1, Config: &cluster.NilConfig{ SleepInterval: &cluster.Duration{5 * time.Second}, KeeperFailInterval: &cluster.Duration{10 * time.Second}, }, ProxyConf: nil, }, pair) if err != nil { t.Fatalf("unexpected err: %v", err) } // tp should listen if err := tp.WaitListening(10 * time.Second); err != nil { t.Fatalf("expecting tp listening, but it's not listening.") } log.Printf("test proxyConf restored. Should continue listening") // Set proxyConf again pair, err = e.SetClusterData(cluster.KeepersState{}, &cluster.ClusterView{ Version: 1, Config: &cluster.NilConfig{ SleepInterval: &cluster.Duration{5 * time.Second}, KeeperFailInterval: &cluster.Duration{10 * time.Second}, }, ProxyConf: &cluster.ProxyConf{ // fake pg address, not relevant Host: "localhost", Port: "5432", }, }, pair) if err != nil { t.Fatalf("unexpected err: %v", err) } // tp should listen if err := tp.WaitListening(10 * time.Second); err != nil { t.Fatalf("expecting tp listening, but it's not listening.") } log.Printf("test clusterView removed. Should continue listening") // remove whole clusterview _, err = e.SetClusterData(cluster.KeepersState{}, nil, pair) if err != nil { t.Fatalf("unexpected err: %v", err) } // tp should listen if err := tp.WaitListening(10 * time.Second); err != nil { t.Fatalf("expecting tp listening, but it's not listening.") } }
func testInitExisting(t *testing.T, merge bool) { clusterName := uuid.NewV4().String() dir, err := ioutil.TempDir("", "") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) tstore := setupStore(t, dir) defer tstore.Stop() storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) storePath := filepath.Join(common.StoreBasePath, clusterName) kvstore, err := store.NewStore(tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) initialClusterSpec := &cluster.ClusterSpec{ InitMode: cluster.ClusterInitModeNew, SleepInterval: cluster.Duration{Duration: 2 * time.Second}, FailInterval: cluster.Duration{Duration: 5 * time.Second}, ConvergenceTimeout: cluster.Duration{Duration: 30 * time.Second}, PGParameters: cluster.PGParameters{ "archive_mode": "on", }, } initialClusterSpecFile, err := writeClusterSpec(dir, initialClusterSpec) if err != nil { t.Fatalf("unexpected err: %v", err) } ts, err := NewTestSentinel(t, dir, clusterName, tstore.storeBackend, storeEndpoints, fmt.Sprintf("--initial-cluster-spec=%s", initialClusterSpecFile)) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := ts.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } tk, err := NewTestKeeper(t, dir, clusterName, pgSUUsername, pgSUPassword, pgReplUsername, pgReplPassword, tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } if err := WaitClusterPhase(e, cluster.ClusterPhaseNormal, 60*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.WaitDBUp(60 * time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } if err := populate(t, tk); err != nil { t.Fatalf("unexpected err: %v", err) } if err := write(t, tk, 1, 1); err != nil { t.Fatalf("unexpected err: %v", err) } // Now initialize a new cluster with the existing keeper initialClusterSpec = &cluster.ClusterSpec{ InitMode: cluster.ClusterInitModeExisting, SleepInterval: cluster.Duration{Duration: 2 * time.Second}, FailInterval: cluster.Duration{Duration: 5 * time.Second}, ConvergenceTimeout: cluster.Duration{Duration: 30 * time.Second}, MergePgParameters: &merge, ExistingConfig: &cluster.ExistingConfig{ KeeperUID: tk.id, }, } initialClusterSpecFile, err = writeClusterSpec(dir, initialClusterSpec) if err != nil { t.Fatalf("unexpected err: %v", err) } // time.Sleep(1 * time.Hour) t.Logf("reinitializing cluster") // Initialize cluster with new spec err = StolonCtl(clusterName, tstore.storeBackend, storeEndpoints, "init", "-y", "-f", initialClusterSpecFile) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := WaitClusterPhase(e, cluster.ClusterPhaseInitializing, 60*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } if err := WaitClusterPhase(e, cluster.ClusterPhaseNormal, 60*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.WaitDBUp(60 * time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } c, err := getLines(t, tk) if err != nil { t.Fatalf("unexpected err: %v", err) } if c != 1 { t.Fatalf("wrong number of lines, want: %d, got: %d", 1, c) } pgParameters, err := tk.GetPGParameters() if err != nil { t.Fatalf("unexpected err: %v", err) } v, ok := pgParameters["archive_mode"] if merge && v != "on" { t.Fatalf("expected archive_mode == on got %q", v) } if !merge && ok { t.Fatalf("expected archive_mode empty") } tk.Stop() }
func NewTestConsul(dir string, a ...string) (*TestStore, error) { u := uuid.NewV4() id := fmt.Sprintf("%x", u[:4]) dataDir := filepath.Join(dir, "consul") listenAddress, portHTTP, err := getFreeTCPPort() if err != nil { return nil, err } _, portRPC, err := getFreeTCPPort() if err != nil { return nil, err } _, portSerfLan, err := getFreeTCPUDPPort() if err != nil { return nil, err } _, portSerfWan, err := getFreeTCPUDPPort() if err != nil { return nil, err } _, portServer, err := getFreeTCPPort() if err != nil { return nil, err } f, err := ioutil.TempFile(dir, "consul.json") if err != nil { return nil, err } defer f.Close() f.WriteString(fmt.Sprintf(`{ "ports": { "dns": -1, "http": %s, "rpc": %s, "serf_lan": %s, "serf_wan": %s, "server": %s } }`, portHTTP, portRPC, portSerfLan, portSerfWan, portServer)) args := []string{} args = append(args, "agent") args = append(args, "-server") args = append(args, fmt.Sprintf("-config-file=%s", f.Name())) args = append(args, fmt.Sprintf("-data-dir=%s", dataDir)) args = append(args, fmt.Sprintf("-bind=%s", listenAddress)) args = append(args, fmt.Sprintf("-advertise=%s", listenAddress)) args = append(args, "-bootstrap-expect=1") args = append(args, a...) storeEndpoints := fmt.Sprintf("%s:%s", listenAddress, portHTTP) kvstore, err := store.NewStore(store.CONSUL, storeEndpoints) if err != nil { return nil, fmt.Errorf("cannot create store: %v", err) } bin := os.Getenv("CONSUL_BIN") if bin == "" { return nil, fmt.Errorf("missing CONSUL_BIN env") } te := &TestStore{ Process: Process{ id: id, name: "consul", bin: bin, args: args, }, listenAddress: listenAddress, port: portHTTP, store: kvstore, storeBackend: store.CONSUL, } return te, nil }
func TestInitUsers(t *testing.T) { t.Parallel() dir, err := ioutil.TempDir("", "") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) tstore := setupStore(t, dir) defer tstore.Stop() storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) // Test pg-repl-username == pg-su-username but password different clusterName := uuid.NewV4().String() tk, err := NewTestKeeper(t, dir, clusterName, "user01", "password01", "user01", "password02", tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.StartExpect(); err != nil { t.Fatalf("unexpected err: %v", err) } defer tk.Stop() if err := tk.cmd.Expect("provided superuser name and replication user name are the same but provided passwords are different"); err != nil { t.Fatalf("expecting keeper reporting provided superuser name and replication user name are the same but provided passwords are different") } // Test pg-repl-username == pg-su-username clusterName = uuid.NewV4().String() storePath := filepath.Join(common.StoreBasePath, clusterName) kvstore, err := store.NewStore(tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) initialClusterSpec := &cluster.ClusterSpec{ InitMode: cluster.ClusterInitModeNew, SleepInterval: cluster.Duration{Duration: 2 * time.Second}, FailInterval: cluster.Duration{Duration: 5 * time.Second}, ConvergenceTimeout: cluster.Duration{Duration: 30 * time.Second}, } initialClusterSpecFile, err := writeClusterSpec(dir, initialClusterSpec) if err != nil { t.Fatalf("unexpected err: %v", err) } ts, err := NewTestSentinel(t, dir, clusterName, tstore.storeBackend, storeEndpoints, fmt.Sprintf("--initial-cluster-spec=%s", initialClusterSpecFile)) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := ts.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } defer ts.Stop() if err := WaitClusterPhase(e, cluster.ClusterPhaseInitializing, 30*time.Second); err != nil { t.Fatal("expected cluster in initializing phase") } tk2, err := NewTestKeeper(t, dir, clusterName, "user01", "password", "user01", "password", tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk2.StartExpect(); err != nil { t.Fatalf("unexpected err: %v", err) } defer tk2.Stop() if err := tk2.cmd.ExpectTimeout("replication role added to superuser", 60*time.Second); err != nil { t.Fatalf("expecting keeper reporting replication role added to superuser") } // Test pg-repl-username != pg-su-username and pg-su-password defined clusterName = uuid.NewV4().String() storePath = filepath.Join(common.StoreBasePath, clusterName) kvstore, err = store.NewStore(tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("cannot create store: %v", err) } e = store.NewStoreManager(kvstore, storePath) ts2, err := NewTestSentinel(t, dir, clusterName, tstore.storeBackend, storeEndpoints, fmt.Sprintf("--initial-cluster-spec=%s", initialClusterSpecFile)) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := ts2.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } defer ts2.Stop() if err := WaitClusterPhase(e, cluster.ClusterPhaseInitializing, 60*time.Second); err != nil { t.Fatal("expected cluster in initializing phase") } tk3, err := NewTestKeeper(t, dir, clusterName, "user01", "password", "user02", "password", tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk3.StartExpect(); err != nil { t.Fatalf("unexpected err: %v", err) } defer tk3.Stop() if err := tk3.cmd.ExpectTimeout("superuser password set", 60*time.Second); err != nil { t.Fatalf("expecting keeper reporting superuser password set") } if err := tk3.cmd.ExpectTimeout("replication role created role=user02", 60*time.Second); err != nil { t.Fatalf("expecting keeper reporting replication role user02 created") } }
func TestServerParameters(t *testing.T) { t.Parallel() dir, err := ioutil.TempDir("", "") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) tstore, err := NewTestStore(t, dir) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tstore.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tstore.WaitUp(10 * time.Second); err != nil { t.Fatalf("error waiting on store up: %v", err) } storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) defer tstore.Stop() clusterName := uuid.NewV4().String() storePath := filepath.Join(common.StoreBasePath, clusterName) kvstore, err := store.NewStore(tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) tk, err := NewTestKeeper(t, dir, clusterName, pgSUUsername, pgSUPassword, pgReplUsername, pgReplPassword, tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } cd := &cluster.ClusterData{ FormatVersion: cluster.CurrentCDFormatVersion, Cluster: &cluster.Cluster{ UID: "01", Generation: 1, Spec: &cluster.ClusterSpec{ FailInterval: cluster.Duration{Duration: 10 * time.Second}, }, Status: cluster.ClusterStatus{ CurrentGeneration: 1, Phase: cluster.ClusterPhaseNormal, Master: tk.id, }, }, Keepers: cluster.Keepers{ tk.id: &cluster.Keeper{ UID: tk.id, Spec: &cluster.KeeperSpec{}, Status: cluster.KeeperStatus{ Healthy: true, }, }, }, DBs: cluster.DBs{ "01": &cluster.DB{ UID: "01", Generation: 1, ChangeTime: time.Time{}, Spec: &cluster.DBSpec{ KeeperUID: tk.id, InitMode: cluster.DBInitModeNew, Role: common.RoleMaster, }, Status: cluster.DBStatus{ Healthy: false, CurrentGeneration: 1, }, }, }, } cd.Cluster.Spec.SetDefaults() pair, err := e.AtomicPutClusterData(cd, nil) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.StartExpect(); err != nil { t.Fatalf("unexpected err: %v", err) } defer tk.Stop() if err := tk.WaitDBUp(60 * time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } cd.DBs["01"].Spec.PGParameters = map[string]string{ "unexistent_parameter": "value", } pair, err = e.AtomicPutClusterData(cd, pair) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.cmd.ExpectTimeout("postgres parameters changed, reloading postgres instance", 30*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } // On the next keeper check they shouldn't be changed if err := tk.cmd.ExpectTimeout("postgres parameters not changed", 30*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } tk.Stop() // Start tk again, postgres should fail to start due to bad parameter if err := tk.StartExpect(); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.cmd.ExpectTimeout("failed to start postgres", 30*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } // Fix wrong parameters cd.DBs["01"].Spec.PGParameters = map[string]string{} pair, err = e.AtomicPutClusterData(cd, pair) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.WaitDBUp(30 * time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } }
func TestInitWithMultipleKeepers(t *testing.T) { t.Parallel() dir, err := ioutil.TempDir("", "stolon") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) tstore := setupStore(t, dir) storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) clusterName := uuid.NewV4().String() storePath := filepath.Join(common.StoreBasePath, clusterName) kvstore, err := store.NewStore(tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) initialClusterSpec := &cluster.ClusterSpec{ InitMode: cluster.ClusterInitModeNew, FailInterval: cluster.Duration{Duration: 10 * time.Second}, ConvergenceTimeout: cluster.Duration{Duration: 30 * time.Second}, } initialClusterSpecFile, err := writeClusterSpec(dir, initialClusterSpec) if err != nil { t.Fatalf("unexpected err: %v", err) } tks := testKeepers{} tss := testSentinels{} // Start 3 keepers for i := uint8(0); i < 3; i++ { tk, err := NewTestKeeper(t, dir, clusterName, pgSUUsername, pgSUPassword, pgReplUsername, pgReplPassword, tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } tks[tk.id] = tk } // Start 2 sentinels for i := uint8(0); i < 2; i++ { ts, err := NewTestSentinel(t, dir, clusterName, tstore.storeBackend, storeEndpoints, fmt.Sprintf("--initial-cluster-spec=%s", initialClusterSpecFile)) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := ts.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } tss[ts.id] = ts } defer shutdown(tks, tss, tstore) // Wait for clusterView containing a master masterUID, err := WaitClusterDataWithMaster(e, 30*time.Second) if err != nil { t.Fatal("expected a master in cluster view") } if err := tks[masterUID].WaitDBUp(60 * time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } }
func TestPITR(t *testing.T) { t.Parallel() dir, err := ioutil.TempDir("", "stolon") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) baseBackupDir, err := ioutil.TempDir(dir, "basebackup") if err != nil { t.Fatalf("unexpected err: %v", err) } archiveBackupDir, err := ioutil.TempDir(dir, "archivebackup") if err != nil { t.Fatalf("unexpected err: %v", err) } tstore := setupStore(t, dir) defer tstore.Stop() storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) clusterName := uuid.NewV4().String() storePath := filepath.Join(common.StoreBasePath, clusterName) kvstore, err := store.NewStore(tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) initialClusterSpec := &cluster.ClusterSpec{ InitMode: cluster.ClusterInitModeNew, SleepInterval: cluster.Duration{Duration: 2 * time.Second}, FailInterval: cluster.Duration{Duration: 5 * time.Second}, ConvergenceTimeout: cluster.Duration{Duration: 30 * time.Second}, PGParameters: cluster.PGParameters{ "archive_mode": "on", "archive_command": fmt.Sprintf("cp %%p %s/%%f", archiveBackupDir), }, } initialClusterSpecFile, err := writeClusterSpec(dir, initialClusterSpec) if err != nil { t.Fatalf("unexpected err: %v", err) } tk, err := NewTestKeeper(t, dir, clusterName, pgSUUsername, pgSUPassword, pgReplUsername, pgReplPassword, tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } defer tk.Stop() ts, err := NewTestSentinel(t, dir, clusterName, tstore.storeBackend, storeEndpoints, fmt.Sprintf("--initial-cluster-spec=%s", initialClusterSpecFile)) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := ts.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } // Wait for clusterView containing a master _, err = WaitClusterDataWithMaster(e, 30*time.Second) if err != nil { t.Fatal("expected a master in cluster view") } if err := tk.WaitDBUp(60 * time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.WaitRole(common.RoleMaster, 30*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } if err := populate(t, tk); err != nil { t.Fatalf("unexpected err: %v", err) } if err := write(t, tk, 2, 2); err != nil { t.Fatalf("unexpected err: %v", err) } // ioutil.Tempfile already creates files with 0600 permissions pgpass, err := ioutil.TempFile("", "pgpass") if err != nil { t.Fatalf("unexpected err: %v", err) } pgpass.WriteString(fmt.Sprintf("%s:%s:*:%s:%s\n", tk.pgListenAddress, tk.pgPort, tk.pgReplUsername, tk.pgReplPassword)) // Don't save the wal during the basebackup (-x). This to test that archive_command and restore command correctly work. cmd := exec.Command("pg_basebackup", "-F", "tar", "-D", baseBackupDir, "-h", tk.pgListenAddress, "-p", tk.pgPort, "-U", tk.pgReplUsername) cmd.Env = append(cmd.Env, fmt.Sprintf("PGPASSFILE=%s", pgpass.Name())) t.Logf("execing cmd: %s", cmd) if out, err := cmd.CombinedOutput(); err != nil { t.Fatalf("error: %v, output: %s", err, string(out)) } // Switch wal so they will be archived if _, err := tk.db.Exec("select pg_switch_xlog()"); err != nil { t.Fatalf("unexpected err: %v", err) } ts.Stop() // Delete the current cluster data if err := tstore.store.Delete(filepath.Join(storePath, "clusterdata")); err != nil { t.Fatalf("unexpected err: %v", err) } // Delete sentinel leader key to just speedup new election if err := tstore.store.Delete(filepath.Join(storePath, common.SentinelLeaderKey)); err != nil { t.Fatalf("unexpected err: %v", err) } // Now initialize a new cluster with the existing keeper initialClusterSpec = &cluster.ClusterSpec{ InitMode: cluster.ClusterInitModePITR, SleepInterval: cluster.Duration{Duration: 2 * time.Second}, FailInterval: cluster.Duration{Duration: 5 * time.Second}, ConvergenceTimeout: cluster.Duration{Duration: 30 * time.Second}, PITRConfig: &cluster.PITRConfig{ DataRestoreCommand: fmt.Sprintf("tar xvf %s/base.tar -C %%d", baseBackupDir), ArchiveRecoverySettings: &cluster.ArchiveRecoverySettings{ RestoreCommand: fmt.Sprintf("cp %s/%%f %%p", archiveBackupDir), }, }, } initialClusterSpecFile, err = writeClusterSpec(dir, initialClusterSpec) if err != nil { t.Fatalf("unexpected err: %v", err) } ts, err = NewTestSentinel(t, dir, clusterName, tstore.storeBackend, storeEndpoints, fmt.Sprintf("--initial-cluster-spec=%s", initialClusterSpecFile)) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := ts.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } defer ts.Stop() if err := WaitClusterPhase(e, cluster.ClusterPhaseNormal, 60*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } _, err = WaitClusterDataWithMaster(e, 30*time.Second) if err != nil { t.Fatal("expected a master in cluster view") } if err := tk.WaitDBUp(60 * time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.WaitRole(common.RoleMaster, 30*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.WaitDBUp(60 * time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } c, err := getLines(t, tk) if err != nil { t.Fatalf("unexpected err: %v", err) } if c != 1 { t.Fatalf("wrong number of lines, want: %d, got: %d", 2, c) } }
func setupServers(t *testing.T, clusterName, dir string, numKeepers, numSentinels uint8, syncRepl bool, usePgrewind bool) (testKeepers, testSentinels, *TestStore) { tstore := setupStore(t, dir) storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) storePath := filepath.Join(common.StoreBasePath, clusterName) kvstore, err := store.NewStore(tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) initialClusterSpec := &cluster.ClusterSpec{ InitMode: cluster.ClusterInitModeNew, SleepInterval: cluster.Duration{Duration: 2 * time.Second}, FailInterval: cluster.Duration{Duration: 5 * time.Second}, ConvergenceTimeout: cluster.Duration{Duration: 30 * time.Second}, SynchronousReplication: syncRepl, UsePgrewind: usePgrewind, PGParameters: make(cluster.PGParameters), } initialClusterSpecFile, err := writeClusterSpec(dir, initialClusterSpec) if err != nil { t.Fatalf("unexpected err: %v", err) } tks := map[string]*TestKeeper{} tss := map[string]*TestSentinel{} tk, err := NewTestKeeper(t, dir, clusterName, pgSUUsername, pgSUPassword, pgReplUsername, pgReplPassword, tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } tks[tk.id] = tk t.Logf("tk: %v", tk) // Start sentinels for i := uint8(0); i < numSentinels; i++ { ts, err := NewTestSentinel(t, dir, clusterName, tstore.storeBackend, storeEndpoints, fmt.Sprintf("--initial-cluster-spec=%s", initialClusterSpecFile)) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := ts.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } tss[ts.id] = ts } // Start first keeper if err := tk.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.WaitDBUp(60 * time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.WaitRole(common.RoleMaster, 30*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } // Wait for clusterView containing tk as master if err := WaitClusterDataMaster(tk.id, e, 30*time.Second); err != nil { t.Fatalf("expected master %q in cluster view", tk.id) } // Start other keepers for i := uint8(1); i < numKeepers; i++ { tk, err := NewTestKeeper(t, dir, clusterName, pgSUUsername, pgSUPassword, pgReplUsername, pgReplPassword, tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.WaitDBUp(60 * time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } // Wait for clusterView containing tk as standby if err := tk.WaitRole(common.RoleStandby, 30*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } tks[tk.id] = tk } return tks, tss, tstore }
func update(cmd *cobra.Command, args []string) { if len(args) > 1 { die("too many arguments") } if updateOpts.file == "" && len(args) < 1 { die("no cluster spec provided as argument and no file provided (--file/-f option)") } if updateOpts.file != "" && len(args) == 1 { die("only one of cluster spec provided as argument or file must provided (--file/-f option)") } data := []byte{} if len(args) == 1 { data = []byte(args[0]) } else { var err error if updateOpts.file == "-" { data, err = ioutil.ReadAll(os.Stdin) if err != nil { die("cannot read from stdin: %v", err) } } else { data, err = ioutil.ReadFile(updateOpts.file) if err != nil { die("cannot read file: %v", err) } } } storePath := filepath.Join(common.StoreBasePath, cfg.clusterName) kvstore, err := store.NewStore(store.Backend(cfg.storeBackend), cfg.storeEndpoints) if err != nil { die("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) retry := 0 for retry < maxRetries { cd, pair, err := getClusterData(e) if err != nil { die("%v", err) } if cd.Cluster == nil { die("no cluster spec available") } if cd.Cluster.Spec == nil { die("no cluster spec available") } var newcs *cluster.ClusterSpec if updateOpts.patch { newcs, err = patchClusterSpec(cd.Cluster.Spec, data) if err != nil { die("failed to patch cluster spec: %v", err) } } else { if err := json.Unmarshal(data, &newcs); err != nil { die("failed to unmarshal cluster spec: %v", err) } } newcs.SetDefaults() if err := cd.Cluster.UpdateSpec(newcs); err != nil { die("Cannot update cluster spec: %v", err) } // retry if cd has been modified between reading and writing _, err = e.AtomicPutClusterData(cd, pair) if err != nil { if err == libkvstore.ErrKeyModified { retry++ continue } die("cannot update cluster data: %v", err) } break } if retry == maxRetries { die("failed to update cluster data after %d retries", maxRetries) } }
func NewPostgresKeeper(cfg *config, stop chan bool, end chan error) (*PostgresKeeper, error) { storePath := filepath.Join(common.StoreBasePath, cfg.clusterName) kvstore, err := store.NewStore(store.Backend(cfg.storeBackend), cfg.storeEndpoints) if err != nil { return nil, fmt.Errorf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) p := &PostgresKeeper{ cfg: cfg, dataDir: cfg.dataDir, storeBackend: cfg.storeBackend, storeEndpoints: cfg.storeEndpoints, debug: cfg.debug, pgListenAddress: cfg.pgListenAddress, pgPort: cfg.pgPort, pgBinPath: cfg.pgBinPath, pgReplUsername: cfg.pgReplUsername, pgReplPassword: cfg.pgReplPassword, pgSUUsername: cfg.pgSUUsername, pgSUPassword: cfg.pgSUPassword, pgInitialSUUsername: cfg.pgInitialSUUsername, sleepInterval: cluster.DefaultSleepInterval, requestTimeout: cluster.DefaultRequestTimeout, keeperLocalState: &KeeperLocalState{}, dbLocalState: &DBLocalState{}, e: e, stop: stop, end: end, } err = p.loadKeeperLocalState() if err != nil && !os.IsNotExist(err) { return nil, fmt.Errorf("failed to load keeper local state file: %v", err) } if p.keeperLocalState.UID != "" && p.cfg.id != "" && p.keeperLocalState.UID != p.cfg.id { fmt.Printf("saved id %q differs from configuration id: %q\n", p.keeperLocalState.UID, cfg.id) os.Exit(1) } if p.keeperLocalState.UID == "" { p.keeperLocalState.UID = cfg.id if cfg.id == "" { p.keeperLocalState.UID = common.UID() log.Info("uid generated", zap.String("id", p.keeperLocalState.UID)) } if err = p.saveKeeperLocalState(); err != nil { fmt.Printf("error: %v\n", err) os.Exit(1) } } log.Info("keeper uid", zap.String("uid", p.keeperLocalState.UID)) err = p.loadDBLocalState() if err != nil && !os.IsNotExist(err) { return nil, fmt.Errorf("failed to load db local state file: %v", err) } return p, nil }
func TestProxyListening(t *testing.T) { t.Parallel() dir, err := ioutil.TempDir("", "") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) clusterName := uuid.NewV4().String() tstore, err := NewTestStore(t, dir) if err != nil { t.Fatalf("unexpected err: %v", err) } storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) tp, err := NewTestProxy(t, dir, clusterName, tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tp.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } defer tp.Stop() t.Logf("test proxy start with store down. Should not listen") // tp should not listen because it cannot talk with store if err := tp.WaitNotListening(10 * time.Second); err != nil { t.Fatalf("expecting tp not listening due to failed store communication, but it's listening.") } tp.Stop() if err := tstore.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tstore.WaitUp(10 * time.Second); err != nil { t.Fatalf("error waiting on store up: %v", err) } defer func() { if tstore.cmd != nil { tstore.Stop() } }() storePath := filepath.Join(common.StoreBasePath, clusterName) kvstore, err := store.NewStore(tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) cd := &cluster.ClusterData{ FormatVersion: cluster.CurrentCDFormatVersion, Cluster: &cluster.Cluster{ UID: "01", Generation: 1, Spec: &cluster.ClusterSpec{ InitMode: cluster.ClusterInitModeNew, FailInterval: cluster.Duration{Duration: 10 * time.Second}, }, Status: cluster.ClusterStatus{ CurrentGeneration: 1, Phase: cluster.ClusterPhaseNormal, Master: "01", }, }, Keepers: cluster.Keepers{ "01": &cluster.Keeper{ UID: "01", Spec: &cluster.KeeperSpec{}, Status: cluster.KeeperStatus{ Healthy: true, }, }, }, DBs: cluster.DBs{ "01": &cluster.DB{ UID: "01", Generation: 1, ChangeTime: time.Time{}, Spec: &cluster.DBSpec{ KeeperUID: "01", Role: common.RoleMaster, Followers: []string{"02"}, }, Status: cluster.DBStatus{ Healthy: false, CurrentGeneration: 1, }, }, }, Proxy: &cluster.Proxy{ Spec: cluster.ProxySpec{ MasterDBUID: "01", }, }, } pair, err := e.AtomicPutClusterData(cd, nil) if err != nil { t.Fatalf("unexpected err: %v", err) } // test proxy start with the store up t.Logf("test proxy start with the store up. Should listen") if err := tp.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } // tp should listen if err := tp.WaitListening(10 * time.Second); err != nil { t.Fatalf("expecting tp listening, but it's not listening.") } t.Logf("test proxy error communicating with store. Should stop listening") // Stop store tstore.Stop() if err := tstore.WaitDown(10 * time.Second); err != nil { t.Fatalf("error waiting on store down: %v", err) } // tp should not listen because it cannot talk with the store if err := tp.WaitNotListening(10 * time.Second); err != nil { t.Fatalf("expecting tp not listening due to failed store communication, but it's listening.") } t.Logf("test proxy communication with store restored. Should start listening") // Start store if err := tstore.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tstore.WaitUp(10 * time.Second); err != nil { t.Fatalf("error waiting on store up: %v", err) } // tp should listen if err := tp.WaitListening(10 * time.Second); err != nil { t.Fatalf("expecting tp listening, but it's not listening.") } t.Logf("test proxyConf removed. Should continue listening") // remove proxyConf cd.Proxy.Spec.MasterDBUID = "" pair, err = e.AtomicPutClusterData(cd, pair) if err != nil { t.Fatalf("unexpected err: %v", err) } // tp should listen if err := tp.WaitListening(10 * time.Second); err != nil { t.Fatalf("expecting tp listening, but it's not listening.") } t.Logf("test proxyConf restored. Should continue listening") // Set proxyConf again cd.Proxy.Spec.MasterDBUID = "01" pair, err = e.AtomicPutClusterData(cd, pair) if err != nil { t.Fatalf("unexpected err: %v", err) } // tp should listen if err := tp.WaitListening(10 * time.Second); err != nil { t.Fatalf("expecting tp listening, but it's not listening.") } t.Logf("test clusterView removed. Should continue listening") // remove whole clusterview _, err = e.AtomicPutClusterData(nil, pair) if err != nil { t.Fatalf("unexpected err: %v", err) } // tp should listen if err := tp.WaitListening(10 * time.Second); err != nil { t.Fatalf("expecting tp listening, but it's not listening.") } }
func TestInitWithMultipleKeepers(t *testing.T) { dir, err := ioutil.TempDir("", "stolon") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) tstore := setupStore(t, dir) storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) clusterName := uuid.NewV4().String() storePath := filepath.Join(common.StoreBasePath, clusterName) kvstore, err := store.NewStore(tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) // TODO(sgotti) change this to a call to the sentinel to change the // cluster config (when the sentinel's code is done) e.SetClusterData(cluster.KeepersState{}, &cluster.ClusterView{ Version: 1, Config: &cluster.NilConfig{ SleepInterval: &cluster.Duration{5 * time.Second}, KeeperFailInterval: &cluster.Duration{10 * time.Second}, InitWithMultipleKeepers: cluster.BoolP(true), }, }, nil) tks := []*TestKeeper{} tss := []*TestSentinel{} // Start 3 keepers for i := uint8(0); i < 3; i++ { tk, err := NewTestKeeper(dir, clusterName, tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } tks = append(tks, tk) if err := tk.WaitDBUp(60 * time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } } // Start 2 sentinels for i := uint8(0); i < 2; i++ { ts, err := NewTestSentinel(dir, clusterName, tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := ts.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } tss = append(tss, ts) } defer shutdown(tks, tss, tstore) // Wait for clusterView containing a master if err := WaitClusterViewWithMaster(e, 30*time.Second); err != nil { t.Fatal("expected a master in cluster view") } }
func initCluster(cmd *cobra.Command, args []string) { if len(args) > 1 { die("too many arguments") } data := []byte{} switch len(args) { case 1: data = []byte(args[0]) case 0: if initOpts.file != "" { var err error if initOpts.file == "-" { data, err = ioutil.ReadAll(os.Stdin) if err != nil { die("cannot read from stdin: %v", err) } } else { data, err = ioutil.ReadFile(initOpts.file) if err != nil { die("cannot read file: %v", err) } } } } storePath := filepath.Join(common.StoreBasePath, cfg.clusterName) kvstore, err := store.NewStore(store.Backend(cfg.storeBackend), cfg.storeEndpoints) if err != nil { die("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) cd, _, err := e.GetClusterData() if err != nil { die("cannot get cluster data: %v", err) } if cd != nil { stdout("WARNING: The current cluster data will be removed") } stdout("WARNING: The databases managed by the keepers will be overwrited depending on the provided cluster spec.") accepted := true if !initOpts.forceYes { accepted, err = askConfirmation("Are you sure you want to continue? [yes/no] ") if err != nil { die("%v", err) } } if !accepted { stdout("exiting") os.Exit(0) } cd, _, err = e.GetClusterData() if err != nil { die("cannot get cluster data: %v", err) } var cs *cluster.ClusterSpec if len(data) == 0 { // Define a new cluster spec with initMode "new" cs = &cluster.ClusterSpec{} cs.InitMode = cluster.ClusterInitModeNew } else { if err := json.Unmarshal(data, &cs); err != nil { die("failed to unmarshal cluster spec: %v", err) } } cs.SetDefaults() if err := cs.Validate(); err != nil { die("invalid cluster spec: %v", err) } c := cluster.NewCluster(common.UID(), cs) cd = cluster.NewClusterData(c) // We ignore if cd has been modified between reading and writing if err := e.PutClusterData(cd); err != nil { die("cannot update cluster data: %v", err) } }
func setupServers(t *testing.T, dir string, numKeepers, numSentinels uint8, syncRepl bool) ([]*TestKeeper, []*TestSentinel, *TestStore) { tstore := setupStore(t, dir) storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) clusterName := uuid.NewV4().String() storePath := filepath.Join(common.StoreBasePath, clusterName) kvstore, err := store.NewStore(tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) // TODO(sgotti) change this to a call to the sentinel to change the // cluster config (when the sentinel's code is done) e.SetClusterData(cluster.KeepersState{}, &cluster.ClusterView{ Version: 1, Config: &cluster.NilConfig{ SleepInterval: &cluster.Duration{5 * time.Second}, KeeperFailInterval: &cluster.Duration{10 * time.Second}, SynchronousReplication: cluster.BoolP(syncRepl), }, }, nil) tks := []*TestKeeper{} tss := []*TestSentinel{} tk, err := NewTestKeeper(dir, clusterName, tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } tks = append(tks, tk) fmt.Printf("tk: %v\n", tk) // Start sentinels for i := uint8(0); i < numSentinels; i++ { ts, err := NewTestSentinel(dir, clusterName, tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := ts.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } tss = append(tss, ts) } // Start first keeper if err := tk.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.WaitDBUp(60 * time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.WaitRole(common.MasterRole, 30*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } // Wait for clusterView containing tk as master if err := WaitClusterViewMaster(tk.id, e, 30*time.Second); err != nil { t.Fatalf("expected master %q in cluster view", tk.id) } // Start other keepers for i := uint8(1); i < numKeepers; i++ { tk, err := NewTestKeeper(dir, clusterName, tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.WaitDBUp(60 * time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } // Wait for clusterView containing tk as standby if err := tk.WaitRole(common.StandbyRole, 30*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } tks = append(tks, tk) } return tks, tss, tstore }