func configReplace(cmd *cobra.Command, args []string) { if crOpts.file == "" { die("no config file provided (--file/-f option)") } config := []byte{} var err error if crOpts.file == "-" { config, err = ioutil.ReadAll(os.Stdin) if err != nil { die("cannot read config file from stdin: %v", err) } } else { config, err = ioutil.ReadFile(crOpts.file) if err != nil { die("cannot read provided config file: %v", err) } } storePath := filepath.Join(common.StoreBasePath, cfg.clusterName) kvstore, err := store.NewStore(store.Backend(cfg.storeBackend), cfg.storeEndpoints) if err != nil { die("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) if err = replaceConfig(e, config); err != nil { die("error: %v", err) } }
func spec(cmd *cobra.Command, args []string) { storePath := filepath.Join(common.StoreBasePath, cfg.clusterName) kvstore, err := store.NewStore(store.Backend(cfg.storeBackend), cfg.storeEndpoints) if err != nil { die("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) cd, _, err := getClusterData(e) if err != nil { die("%v", err) } if cd.Cluster == nil { die("no cluster spec available") } if cd.Cluster.Spec == nil { die("no cluster spec available") } specj, err := json.MarshalIndent(cd.Cluster.Spec, "", "\t") if err != nil { die("failed to marshall spec: %v", err) } stdout("%s", specj) }
func configGet(cmd *cobra.Command, args []string) { storePath := filepath.Join(common.StoreBasePath, cfg.clusterName) kvstore, err := store.NewStore(store.Backend(cfg.storeBackend), cfg.storeEndpoints) if err != nil { die("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) cfg, err := getConfig(e) if err != nil { die("error: %v", err) } if cfg == nil { stdout("config is not defined") os.Exit(0) } cfgj, err := json.MarshalIndent(cfg, "", "\t") if err != nil { die("failed to marshall config: %v", err) } stdout(string(cfgj)) }
func NewClusterChecker(uid string, cfg config) (*ClusterChecker, error) { storePath := filepath.Join(common.StoreBasePath, cfg.clusterName) kvstore, err := store.NewStore(store.Config{ Backend: store.Backend(cfg.storeBackend), Endpoints: cfg.storeEndpoints, CertFile: cfg.storeCertFile, KeyFile: cfg.storeKeyFile, CAFile: cfg.storeCAFile, SkipTLSVerify: cfg.storeSkipTlsVerify, }) if err != nil { return nil, fmt.Errorf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) return &ClusterChecker{ uid: uid, listenAddress: cfg.listenAddress, port: cfg.port, stopListening: cfg.stopListening, e: e, endPollonProxyCh: make(chan error), }, nil }
// Tests standby elected as new master but fails to become master. Then old // master comes back and is re-elected as master. func testFailoverFailed(t *testing.T, syncRepl bool) { dir, err := ioutil.TempDir("", "stolon") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) clusterName := uuid.NewV4().String() tks, tss, tstore := setupServers(t, clusterName, dir, 2, 1, syncRepl, false) defer shutdown(tks, tss, tstore) storePath := filepath.Join(common.StoreBasePath, clusterName) sm := store.NewStoreManager(tstore.store, storePath) master, standbys := waitMasterStandbysReady(t, sm, tks) standby := standbys[0] if syncRepl { if err := WaitClusterDataSynchronousStandbys([]string{standby.uid}, sm, 30*time.Second); err != nil { t.Fatalf("expected synchronous standby on keeper %q in cluster data", standby.uid) } } if err := populate(t, master); err != nil { t.Fatalf("unexpected err: %v", err) } if err := write(t, master, 1, 1); err != nil { t.Fatalf("unexpected err: %v", err) } // Stop the keeper process on master, should also stop the database t.Logf("Stopping current master keeper: %s", master.uid) master.Stop() // Wait for cluster data containing standby as master if err := WaitClusterDataMaster(standby.uid, sm, 30*time.Second); err != nil { t.Fatalf("expected master %q in cluster view", standby.uid) } // Stopping standby before reading the new cluster data and promoting // TODO(sgotti) this is flacky and the standby can read the data and // publish new state before it's stopped t.Logf("Stopping current standby keeper: %s", standby.uid) standby.Stop() t.Logf("Starting previous master keeper: %s", master.uid) master.Start() // Wait for cluster data containing previous master as master err = WaitClusterDataMaster(master.uid, sm, 30*time.Second) if !syncRepl && err != nil { t.Fatalf("expected master %q in cluster view", master.uid) } if syncRepl { if err == nil { t.Fatalf("expected timeout since with synchronous replication the old master shouldn't be elected as master") } } }
func TestInitialClusterConfig(t *testing.T) { dir, err := ioutil.TempDir("", "") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) tstore, err := NewTestStore(dir) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tstore.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tstore.WaitUp(10 * time.Second); err != nil { t.Fatalf("error waiting on store up: %v", err) } defer tstore.Stop() clusterName := uuid.NewV4().String() storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) storePath := filepath.Join(common.StoreBasePath, clusterName) kvstore, err := store.NewStore(tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) tmpFile, err := ioutil.TempFile(dir, "initial-cluster-config.json") if err != nil { t.Fatalf("unexpected err: %v", err) } defer tmpFile.Close() tmpFile.WriteString(`{ "synchronous_replication": true }`) ts, err := NewTestSentinel(dir, clusterName, tstore.storeBackend, storeEndpoints, fmt.Sprintf("--initial-cluster-config=%s", tmpFile.Name())) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := ts.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } defer ts.Stop() if err := WaitClusterInitialized(e, 30*time.Second); err != nil { t.Fatal("expected cluster initialized") } cv, _, err := e.GetClusterView() if err != nil { t.Fatalf("unexpected err: %v", err) } if !*cv.Config.SynchronousReplication { t.Fatal("expected cluster config with InitWithMultipleKeepers enabled") } }
func TestInitialClusterSpec(t *testing.T) { t.Parallel() dir, err := ioutil.TempDir("", "") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) tstore := setupStore(t, dir) defer tstore.Stop() clusterName := uuid.NewV4().String() storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) storePath := filepath.Join(common.StoreBasePath, clusterName) kvstore, err := store.NewStore(tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) initialClusterSpec := &cluster.ClusterSpec{ InitMode: cluster.ClusterInitModeNew, SleepInterval: cluster.Duration{Duration: 2 * time.Second}, FailInterval: cluster.Duration{Duration: 5 * time.Second}, ConvergenceTimeout: cluster.Duration{Duration: 30 * time.Second}, SynchronousReplication: true, } initialClusterSpecFile, err := writeClusterSpec(dir, initialClusterSpec) if err != nil { t.Fatalf("unexpected err: %v", err) } ts, err := NewTestSentinel(t, dir, clusterName, tstore.storeBackend, storeEndpoints, fmt.Sprintf("--initial-cluster-spec=%s", initialClusterSpecFile)) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := ts.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } defer ts.Stop() if err := WaitClusterPhase(e, cluster.ClusterPhaseInitializing, 60*time.Second); err != nil { t.Fatal("expected cluster in initializing phase") } cd, _, err := e.GetClusterData() if err != nil { t.Fatalf("unexpected err: %v", err) } if !cd.Cluster.Spec.SynchronousReplication { t.Fatal("expected cluster spec with SynchronousReplication enabled") } }
// tests that a master restart with changed address for both keeper and // postgres (without triggering failover since it restart before being marked // ad failed) make the slave continue to sync using the new address func TestMasterChangedAddress(t *testing.T) { t.Parallel() dir, err := ioutil.TempDir("", "stolon") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) clusterName := uuid.NewV4().String() tks, tss, tstore := setupServers(t, clusterName, dir, 2, 1, false, false) defer shutdown(tks, tss, tstore) storePath := filepath.Join(common.StoreBasePath, clusterName) sm := store.NewStoreManager(tstore.store, storePath) master, standbys := waitMasterStandbysReady(t, sm, tks) if err := populate(t, master); err != nil { t.Fatalf("unexpected err: %v", err) } if err := write(t, master, 1, 1); err != nil { t.Fatalf("unexpected err: %v", err) } // Wait standby synced with master if err := waitLines(t, master, 1, 60*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } // Restart the keeper process on master with new keeper and postgres // addresses (in this case only the port is changed) t.Logf("Restarting current master keeper %q with different addresses", master.uid) master.Stop() storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) master, err = NewTestKeeperWithID(t, dir, master.uid, clusterName, pgSUUsername, pgSUPassword, pgReplUsername, pgReplPassword, tstore.storeBackend, storeEndpoints) tks[master.uid] = master if err := master.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } if err := master.WaitRole(common.RoleMaster, 30*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } if err := write(t, master, 2, 2); err != nil { t.Fatalf("unexpected err: %v", err) } // Wait standby synced to master with changed address if err := waitLines(t, standbys[0], 2, 60*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } }
// Tests standby elected as new master but fails to become master. Then old // master comes back and is re-elected as master. func testFailoverFailed(t *testing.T, syncRepl bool) { dir, err := ioutil.TempDir("", "stolon") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) clusterName := uuid.NewV4().String() tks, tss, tstore := setupServers(t, clusterName, dir, 2, 1, syncRepl, false) defer shutdown(tks, tss, tstore) storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) storePath := filepath.Join(common.StoreBasePath, clusterName) kvstore, err := store.NewStore(tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) master, standbys, err := getRoles(t, tks) if err != nil { t.Fatalf("unexpected err: %v", err) } standby := standbys[0] if err := populate(t, master); err != nil { t.Fatalf("unexpected err: %v", err) } if err := write(t, master, 1, 1); err != nil { t.Fatalf("unexpected err: %v", err) } // Stop the keeper process on master, should also stop the database t.Logf("Stopping current master keeper: %s", master.id) master.Stop() // Wait for cluster data containing standy as master if err := WaitClusterDataMaster(standby.id, e, 30*time.Second); err != nil { t.Fatalf("expected master %q in cluster view", standby.id) } // Stopping standby before reading the new cluster data and promoting t.Logf("Stopping current stanby keeper: %s", master.id) standby.Stop() t.Logf("Starting previous master keeper: %s", master.id) master.Start() // Wait for cluster data containing previous master as master if err := WaitClusterDataMaster(master.id, e, 30*time.Second); err != nil { t.Fatalf("expected master %q in cluster view", master.id) } }
func NewSentinel(uid string, cfg *config, stop chan bool, end chan bool) (*Sentinel, error) { var initialClusterSpec *cluster.ClusterSpec if cfg.initialClusterSpecFile != "" { configData, err := ioutil.ReadFile(cfg.initialClusterSpecFile) if err != nil { return nil, fmt.Errorf("cannot read provided initial cluster config file: %v", err) } if err := json.Unmarshal(configData, &initialClusterSpec); err != nil { return nil, fmt.Errorf("cannot parse provided initial cluster config: %v", err) } log.Debug("initialClusterSpec dump", zap.String("initialClusterSpec", spew.Sdump(initialClusterSpec))) if err := initialClusterSpec.Validate(); err != nil { return nil, fmt.Errorf("invalid initial cluster: %v", err) } } storePath := filepath.Join(common.StoreBasePath, cfg.clusterName) kvstore, err := store.NewStore(store.Config{ Backend: store.Backend(cfg.storeBackend), Endpoints: cfg.storeEndpoints, CertFile: cfg.storeCertFile, KeyFile: cfg.storeKeyFile, CAFile: cfg.storeCAFile, SkipTLSVerify: cfg.storeSkipTlsVerify, }) if err != nil { return nil, fmt.Errorf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) candidate := leadership.NewCandidate(kvstore, filepath.Join(storePath, common.SentinelLeaderKey), uid, store.MinTTL) return &Sentinel{ uid: uid, cfg: cfg, e: e, candidate: candidate, leader: false, initialClusterSpec: initialClusterSpec, stop: stop, end: end, UIDFn: common.UID, // This is just to choose a pseudo random keeper so // use math.rand (no need for crypto.rand) without an // initial seed. RandFn: rand.Intn, sleepInterval: cluster.DefaultSleepInterval, requestTimeout: cluster.DefaultRequestTimeout, }, nil }
func NewStore() (*store.StoreManager, error) { storePath := filepath.Join(common.StoreBasePath, cfg.clusterName) kvstore, err := store.NewStore(store.Config{ Backend: store.Backend(cfg.storeBackend), Endpoints: cfg.storeEndpoints, CertFile: cfg.storeCertFile, KeyFile: cfg.storeKeyFile, CAFile: cfg.storeCAFile, SkipTLSVerify: cfg.storeSkipTlsVerify, }) if err != nil { return nil, fmt.Errorf("cannot create store: %v", err) } return store.NewStoreManager(kvstore, storePath), nil }
func testFailover(t *testing.T, syncRepl bool) { dir, err := ioutil.TempDir("", "stolon") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) clusterName := uuid.NewV4().String() tks, tss, tstore := setupServers(t, clusterName, dir, 2, 1, syncRepl, false) defer shutdown(tks, tss, tstore) storePath := filepath.Join(common.StoreBasePath, clusterName) sm := store.NewStoreManager(tstore.store, storePath) master, standbys := waitMasterStandbysReady(t, sm, tks) standby := standbys[0] if syncRepl { if err := WaitClusterDataSynchronousStandbys([]string{standby.uid}, sm, 30*time.Second); err != nil { t.Fatalf("expected synchronous standby on keeper %q in cluster data", standby.uid) } } if err := populate(t, master); err != nil { t.Fatalf("unexpected err: %v", err) } if err := write(t, master, 1, 1); err != nil { t.Fatalf("unexpected err: %v", err) } // Stop the keeper process on master, should also stop the database t.Logf("Stopping current master keeper: %s", master.uid) master.Stop() if err := standby.WaitRole(common.RoleMaster, 30*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } c, err := getLines(t, standby) if err != nil { t.Fatalf("unexpected err: %v", err) } if c != 1 { t.Fatalf("wrong number of lines, want: %d, got: %d", 1, c) } }
func NewClusterChecker(id string, cfg config) (*ClusterChecker, error) { storePath := filepath.Join(common.StoreBasePath, cfg.clusterName) kvstore, err := store.NewStore(store.Backend(cfg.storeBackend), cfg.storeEndpoints) if err != nil { return nil, fmt.Errorf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) return &ClusterChecker{ id: id, listenAddress: cfg.listenAddress, port: cfg.port, stopListening: cfg.stopListening, e: e, endPollonProxyCh: make(chan error), }, nil }
func configPatch(cmd *cobra.Command, args []string) { if len(args) > 1 { die("too many arguments") } if cpOpts.file == "" && len(args) < 1 { die("no patch provided as argument and no patch file provided (--file/-f option)") } if cpOpts.file != "" && len(args) == 1 { die("only one of patch provided as argument or patch file must provided (--file/-f option)") } config := []byte{} if len(args) == 1 { config = []byte(args[0]) } else { var err error if cpOpts.file == "-" { config, err = ioutil.ReadAll(os.Stdin) if err != nil { die("cannot read config file from stdin: %v", err) } } else { config, err = ioutil.ReadFile(cpOpts.file) if err != nil { die("cannot read provided config file: %v", err) } } } storePath := filepath.Join(common.StoreBasePath, cfg.clusterName) kvstore, err := store.NewStore(store.Backend(cfg.storeBackend), cfg.storeEndpoints) if err != nil { die("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) if err = patchConfig(e, config); err != nil { die("failed to patch config: %v", err) } }
func NewPostgresKeeper(id string, cfg config, stop chan bool, end chan error) (*PostgresKeeper, error) { storePath := filepath.Join(common.StoreBasePath, cfg.clusterName) kvstore, err := store.NewStore(store.Backend(cfg.storeBackend), cfg.storeEndpoints) if err != nil { return nil, fmt.Errorf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) p := &PostgresKeeper{id: id, dataDir: cfg.dataDir, e: e, listenAddress: cfg.listenAddress, port: cfg.port, pgListenAddress: cfg.pgListenAddress, pgPort: cfg.pgPort, stop: stop, end: end, } return p, nil }
func TestInitUsers(t *testing.T) { t.Parallel() dir, err := ioutil.TempDir("", "") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) tstore := setupStore(t, dir) defer tstore.Stop() storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) // Test pg-repl-username == pg-su-username but password different clusterName := uuid.NewV4().String() tk, err := NewTestKeeper(t, dir, clusterName, "user01", "password01", "user01", "password02", tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.StartExpect(); err != nil { t.Fatalf("unexpected err: %v", err) } defer tk.Stop() if err := tk.cmd.Expect("provided superuser name and replication user name are the same but provided passwords are different"); err != nil { t.Fatalf("expecting keeper reporting provided superuser name and replication user name are the same but provided passwords are different") } // Test pg-repl-username == pg-su-username clusterName = uuid.NewV4().String() storePath := filepath.Join(common.StoreBasePath, clusterName) sm := store.NewStoreManager(tstore.store, storePath) initialClusterSpec := &cluster.ClusterSpec{ InitMode: cluster.ClusterInitModeP(cluster.ClusterInitModeNew), SleepInterval: &cluster.Duration{Duration: 2 * time.Second}, FailInterval: &cluster.Duration{Duration: 5 * time.Second}, ConvergenceTimeout: &cluster.Duration{Duration: 30 * time.Second}, } initialClusterSpecFile, err := writeClusterSpec(dir, initialClusterSpec) if err != nil { t.Fatalf("unexpected err: %v", err) } ts, err := NewTestSentinel(t, dir, clusterName, tstore.storeBackend, storeEndpoints, fmt.Sprintf("--initial-cluster-spec=%s", initialClusterSpecFile)) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := ts.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } defer ts.Stop() if err := WaitClusterPhase(sm, cluster.ClusterPhaseInitializing, 30*time.Second); err != nil { t.Fatal("expected cluster in initializing phase") } tk2, err := NewTestKeeper(t, dir, clusterName, "user01", "password", "user01", "password", tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk2.StartExpect(); err != nil { t.Fatalf("unexpected err: %v", err) } defer tk2.Stop() if err := tk2.cmd.ExpectTimeout("replication role added to superuser", 60*time.Second); err != nil { t.Fatalf("expecting keeper reporting replication role added to superuser") } // Test pg-repl-username != pg-su-username and pg-su-password defined clusterName = uuid.NewV4().String() storePath = filepath.Join(common.StoreBasePath, clusterName) sm = store.NewStoreManager(tstore.store, storePath) ts2, err := NewTestSentinel(t, dir, clusterName, tstore.storeBackend, storeEndpoints, fmt.Sprintf("--initial-cluster-spec=%s", initialClusterSpecFile)) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := ts2.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } defer ts2.Stop() if err := WaitClusterPhase(sm, cluster.ClusterPhaseInitializing, 60*time.Second); err != nil { t.Fatal("expected cluster in initializing phase") } tk3, err := NewTestKeeper(t, dir, clusterName, "user01", "password", "user02", "password", tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk3.StartExpect(); err != nil { t.Fatalf("unexpected err: %v", err) } defer tk3.Stop() if err := tk3.cmd.ExpectTimeout("superuser password set", 60*time.Second); err != nil { t.Fatalf("expecting keeper reporting superuser password set") } if err := tk3.cmd.ExpectTimeout("replication role created role=user02", 60*time.Second); err != nil { t.Fatalf("expecting keeper reporting replication role user02 created") } }
func TestProxyListening(t *testing.T) { dir, err := ioutil.TempDir("", "") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) clusterName := uuid.NewV4().String() tstore, err := NewTestStore(dir) if err != nil { t.Fatalf("unexpected err: %v", err) } storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) tp, err := NewTestProxy(dir, clusterName, tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tp.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } defer tp.Stop() log.Printf("test proxy start with store down. Should not listen") // tp should not listen because it cannot talk with store if err := tp.WaitNotListening(10 * time.Second); err != nil { t.Fatalf("expecting tp not listening due to failed store communication, but it's listening.") } tp.Stop() if err := tstore.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tstore.WaitUp(10 * time.Second); err != nil { t.Fatalf("error waiting on store up: %v", err) } defer func() { if tstore.cmd != nil { tstore.Stop() } }() storePath := filepath.Join(common.StoreBasePath, clusterName) kvstore, err := store.NewStore(tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) pair, err := e.SetClusterData(cluster.KeepersState{}, &cluster.ClusterView{ Version: 1, Config: &cluster.NilConfig{ SleepInterval: &cluster.Duration{5 * time.Second}, KeeperFailInterval: &cluster.Duration{10 * time.Second}, }, ProxyConf: &cluster.ProxyConf{ // fake pg address, not relevant Host: "localhost", Port: "5432", }, }, nil) if err != nil { t.Fatalf("unexpected err: %v", err) } // test proxy start with the store up log.Printf("test proxy start with the store up. Should listen") if err := tp.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } // tp should listen if err := tp.WaitListening(10 * time.Second); err != nil { t.Fatalf("expecting tp listening, but it's not listening.") } log.Printf("test proxy error communicating with store. Should stop listening") // Stop store tstore.Stop() if err := tstore.WaitDown(10 * time.Second); err != nil { t.Fatalf("error waiting on store down: %v", err) } // tp should not listen because it cannot talk with the store if err := tp.WaitNotListening(10 * time.Second); err != nil { t.Fatalf("expecting tp not listening due to failed store communication, but it's listening.") } log.Printf("test proxy communication with store restored. Should start listening") // Start store if err := tstore.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tstore.WaitUp(10 * time.Second); err != nil { t.Fatalf("error waiting on store up: %v", err) } // tp should listen if err := tp.WaitListening(10 * time.Second); err != nil { t.Fatalf("expecting tp listening, but it's not listening.") } log.Printf("test proxyConf removed. Should continue listening") // remove proxyConf pair, err = e.SetClusterData(cluster.KeepersState{}, &cluster.ClusterView{ Version: 1, Config: &cluster.NilConfig{ SleepInterval: &cluster.Duration{5 * time.Second}, KeeperFailInterval: &cluster.Duration{10 * time.Second}, }, ProxyConf: nil, }, pair) if err != nil { t.Fatalf("unexpected err: %v", err) } // tp should listen if err := tp.WaitListening(10 * time.Second); err != nil { t.Fatalf("expecting tp listening, but it's not listening.") } log.Printf("test proxyConf restored. Should continue listening") // Set proxyConf again pair, err = e.SetClusterData(cluster.KeepersState{}, &cluster.ClusterView{ Version: 1, Config: &cluster.NilConfig{ SleepInterval: &cluster.Duration{5 * time.Second}, KeeperFailInterval: &cluster.Duration{10 * time.Second}, }, ProxyConf: &cluster.ProxyConf{ // fake pg address, not relevant Host: "localhost", Port: "5432", }, }, pair) if err != nil { t.Fatalf("unexpected err: %v", err) } // tp should listen if err := tp.WaitListening(10 * time.Second); err != nil { t.Fatalf("expecting tp listening, but it's not listening.") } log.Printf("test clusterView removed. Should continue listening") // remove whole clusterview _, err = e.SetClusterData(cluster.KeepersState{}, nil, pair) if err != nil { t.Fatalf("unexpected err: %v", err) } // tp should listen if err := tp.WaitListening(10 * time.Second); err != nil { t.Fatalf("expecting tp listening, but it's not listening.") } }
func TestLoweredMaxStandbysPerSender(t *testing.T) { t.Parallel() dir, err := ioutil.TempDir("", "stolon") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) clusterName := uuid.NewV4().String() initialClusterSpec := &cluster.ClusterSpec{ InitMode: cluster.ClusterInitModeP(cluster.ClusterInitModeNew), SleepInterval: &cluster.Duration{Duration: 2 * time.Second}, FailInterval: &cluster.Duration{Duration: 5 * time.Second}, ConvergenceTimeout: &cluster.Duration{Duration: 30 * time.Second}, MaxStandbysPerSender: cluster.Uint16P(2), PGParameters: make(cluster.PGParameters), } // Create 3 keepers tks, tss, tstore := setupServersCustom(t, clusterName, dir, 3, 1, initialClusterSpec) defer shutdown(tks, tss, tstore) storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) storePath := filepath.Join(common.StoreBasePath, clusterName) sm := store.NewStoreManager(tstore.store, storePath) // Wait for clusterView containing a master masterUID, err := WaitClusterDataWithMaster(sm, 30*time.Second) if err != nil { t.Fatal("expected a master in cluster view") } master := tks[masterUID] waitKeeperReady(t, sm, master) if err := populate(t, master); err != nil { t.Fatalf("unexpected err: %v", err) } if err := write(t, master, 1, 1); err != nil { t.Fatalf("unexpected err: %v", err) } c, err := getLines(t, master) if err != nil { t.Fatalf("unexpected err: %v", err) } if c != 1 { t.Fatalf("wrong number of lines, want: %d, got: %d", 1, c) } if err := WaitNumDBs(sm, 3, 30*time.Second); err != nil { t.Fatalf("expected 3 DBs in cluster data: %v", err) } // Set MaxStandbysPerSender to 1 err = StolonCtl(clusterName, tstore.storeBackend, storeEndpoints, "update", "--patch", `{ "maxStandbysPerSender" : 1 }`) if err != nil { t.Fatalf("unexpected err: %v", err) } // Wait for only 1 standby if err := WaitNumDBs(sm, 2, 30*time.Second); err != nil { t.Fatalf("expected 2 DBs in cluster data: %v", err) } }
func TestFailedStandby(t *testing.T) { t.Parallel() dir, err := ioutil.TempDir("", "stolon") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) clusterName := uuid.NewV4().String() initialClusterSpec := &cluster.ClusterSpec{ InitMode: cluster.ClusterInitModeP(cluster.ClusterInitModeNew), SleepInterval: &cluster.Duration{Duration: 2 * time.Second}, FailInterval: &cluster.Duration{Duration: 5 * time.Second}, ConvergenceTimeout: &cluster.Duration{Duration: 30 * time.Second}, MaxStandbysPerSender: cluster.Uint16P(1), PGParameters: make(cluster.PGParameters), } // Create 3 keepers tks, tss, tstore := setupServersCustom(t, clusterName, dir, 3, 1, initialClusterSpec) defer shutdown(tks, tss, tstore) storePath := filepath.Join(common.StoreBasePath, clusterName) sm := store.NewStoreManager(tstore.store, storePath) // Wait for clusterView containing a master masterUID, err := WaitClusterDataWithMaster(sm, 30*time.Second) if err != nil { t.Fatal("expected a master in cluster view") } master := tks[masterUID] waitKeeperReady(t, sm, master) if err := populate(t, master); err != nil { t.Fatalf("unexpected err: %v", err) } if err := write(t, master, 1, 1); err != nil { t.Fatalf("unexpected err: %v", err) } c, err := getLines(t, master) if err != nil { t.Fatalf("unexpected err: %v", err) } if c != 1 { t.Fatalf("wrong number of lines, want: %d, got: %d", 1, c) } if err := WaitNumDBs(sm, 2, 30*time.Second); err != nil { t.Fatalf("expected 2 DBs in cluster data: %v", err) } cd, _, err := sm.GetClusterData() if err != nil { t.Fatalf("unexpected err: %v", err) } // Get current standby var standby *TestKeeper for _, db := range cd.DBs { if db.UID == cd.Cluster.Status.Master { continue } standby = tks[db.Spec.KeeperUID] } if err := waitLines(t, standby, 1, 30*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } // Stop current standby. The other keeper should be choosed as new standby t.Logf("Stopping current standby keeper: %s", standby.uid) standby.Stop() // Wait for other keeper to have a standby db assigned var newStandby *TestKeeper for _, tk := range tks { if tk.uid != master.uid && tk.uid != standby.uid { newStandby = tk } } if err := WaitStandbyKeeper(sm, newStandby.uid, 20*time.Second); err != nil { t.Fatalf("expected keeper %s to have a standby db assigned: %v", newStandby.uid, err) } // Wait for new standby declared as good and remove of old standby if err := WaitNumDBs(sm, 2, 30*time.Second); err != nil { t.Fatalf("expected 2 DBs in cluster data: %v", err) } }
func NewPostgresKeeper(cfg *config, stop chan bool, end chan error) (*PostgresKeeper, error) { storePath := filepath.Join(common.StoreBasePath, cfg.clusterName) kvstore, err := store.NewStore(store.Backend(cfg.storeBackend), cfg.storeEndpoints) if err != nil { return nil, fmt.Errorf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) p := &PostgresKeeper{ cfg: cfg, dataDir: cfg.dataDir, storeBackend: cfg.storeBackend, storeEndpoints: cfg.storeEndpoints, debug: cfg.debug, pgListenAddress: cfg.pgListenAddress, pgPort: cfg.pgPort, pgBinPath: cfg.pgBinPath, pgReplUsername: cfg.pgReplUsername, pgReplPassword: cfg.pgReplPassword, pgSUUsername: cfg.pgSUUsername, pgSUPassword: cfg.pgSUPassword, pgInitialSUUsername: cfg.pgInitialSUUsername, sleepInterval: cluster.DefaultSleepInterval, requestTimeout: cluster.DefaultRequestTimeout, keeperLocalState: &KeeperLocalState{}, dbLocalState: &DBLocalState{}, e: e, stop: stop, end: end, } err = p.loadKeeperLocalState() if err != nil && !os.IsNotExist(err) { return nil, fmt.Errorf("failed to load keeper local state file: %v", err) } if p.keeperLocalState.UID != "" && p.cfg.id != "" && p.keeperLocalState.UID != p.cfg.id { fmt.Printf("saved id %q differs from configuration id: %q\n", p.keeperLocalState.UID, cfg.id) os.Exit(1) } if p.keeperLocalState.UID == "" { p.keeperLocalState.UID = cfg.id if cfg.id == "" { p.keeperLocalState.UID = common.UID() log.Info("uid generated", zap.String("id", p.keeperLocalState.UID)) } if err = p.saveKeeperLocalState(); err != nil { fmt.Printf("error: %v\n", err) os.Exit(1) } } log.Info("keeper uid", zap.String("uid", p.keeperLocalState.UID)) err = p.loadDBLocalState() if err != nil && !os.IsNotExist(err) { return nil, fmt.Errorf("failed to load db local state file: %v", err) } return p, nil }
func testTimelineFork(t *testing.T, syncRepl, usePgrewind bool) { dir, err := ioutil.TempDir("", "stolon") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) clusterName := uuid.NewV4().String() tks, tss, tstore := setupServers(t, clusterName, dir, 2, 1, syncRepl, usePgrewind) defer shutdown(tks, tss, tstore) storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) storePath := filepath.Join(common.StoreBasePath, clusterName) sm := store.NewStoreManager(tstore.store, storePath) master, standbys := waitMasterStandbysReady(t, sm, tks) if syncRepl { if err := WaitClusterDataSynchronousStandbys([]string{standbys[0].uid}, sm, 30*time.Second); err != nil { t.Fatalf("expected synchronous standby on keeper %q in cluster data", standbys[0].uid) } } if err := populate(t, master); err != nil { t.Fatalf("unexpected err: %v", err) } if err := write(t, master, 1, 1); err != nil { t.Fatalf("unexpected err: %v", err) } // Wait replicated data to standby if err := waitLines(t, standbys[0], 1, 10*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } // Add another standby tk, err := NewTestKeeper(t, dir, clusterName, pgSUUsername, pgSUPassword, pgReplUsername, pgReplPassword, tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } tks[tk.uid] = tk if err := tk.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } standbys = append(standbys, tk) // Wait replicated data to standby if err := waitLines(t, standbys[1], 1, 30*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } // Stop one standby t.Logf("Stopping standby[0]: %s", standbys[0].uid) standbys[0].Stop() if err := standbys[0].WaitDBDown(60 * time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } // Write to master (and replicated to remaining standby) if err := write(t, master, 2, 2); err != nil { t.Fatalf("unexpected err: %v", err) } // Wait replicated data to standby[1] if err := waitLines(t, standbys[1], 2, 10*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } // Stop the master and remaining standby[1] t.Logf("Stopping master: %s", master.uid) master.Stop() t.Logf("Stopping standby[1]: %s", standbys[1].uid) standbys[1].Stop() if err := master.WaitDBDown(60 * time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } if err := standbys[1].WaitDBDown(60 * time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } // Start standby[0]. // If synchronous replication is disabled it will be elected as master but it'll be behind (having only one line). // If synchronous replication is enabled it won't be elected as master t.Logf("Starting standby[0]: %s", standbys[0].uid) standbys[0].Start() waitKeeperReady(t, sm, standbys[0]) err = standbys[0].WaitRole(common.RoleMaster, 60*time.Second) if !syncRepl && err != nil { t.Fatalf("unexpected err: %v", err) } if syncRepl { if err == nil { t.Fatalf("expected timeout since with synchronous replication the standby shouldn't be elected as master") } // end here return } c, err := getLines(t, standbys[0]) if err != nil { t.Fatalf("unexpected err: %v", err) } if c != 1 { t.Fatalf("wrong number of lines, want: %d, got: %d", 1, c) } // Start the other standby, it should be ahead of current on previous timeline and should full resync himself t.Logf("Starting standby[1]: %s", standbys[1].uid) standbys[1].Start() // Standby[1] will start, then it'll detect it's in another timelinehistory, // will stop, full resync and start. We have to avoid detecting it up // at the first start. Do this waiting for the number of expected lines. if err := waitLines(t, standbys[1], 1, 60*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } if err := standbys[1].WaitRole(common.RoleStandby, 60*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } }
func TestInitWithMultipleKeepers(t *testing.T) { t.Parallel() dir, err := ioutil.TempDir("", "stolon") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) tstore := setupStore(t, dir) storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) clusterName := uuid.NewV4().String() storePath := filepath.Join(common.StoreBasePath, clusterName) sm := store.NewStoreManager(tstore.store, storePath) initialClusterSpec := &cluster.ClusterSpec{ InitMode: cluster.ClusterInitModeP(cluster.ClusterInitModeNew), FailInterval: &cluster.Duration{Duration: 10 * time.Second}, ConvergenceTimeout: &cluster.Duration{Duration: 30 * time.Second}, } initialClusterSpecFile, err := writeClusterSpec(dir, initialClusterSpec) if err != nil { t.Fatalf("unexpected err: %v", err) } tks := testKeepers{} tss := testSentinels{} // Start 3 keepers for i := uint8(0); i < 3; i++ { tk, err := NewTestKeeper(t, dir, clusterName, pgSUUsername, pgSUPassword, pgReplUsername, pgReplPassword, tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } tks[tk.uid] = tk } // Start 2 sentinels for i := uint8(0); i < 2; i++ { ts, err := NewTestSentinel(t, dir, clusterName, tstore.storeBackend, storeEndpoints, fmt.Sprintf("--initial-cluster-spec=%s", initialClusterSpecFile)) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := ts.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } tss[ts.uid] = ts } defer shutdown(tks, tss, tstore) // Wait for clusterView containing a master masterUID, err := WaitClusterDataWithMaster(sm, 30*time.Second) if err != nil { t.Fatal("expected a master in cluster view") } waitKeeperReady(t, sm, tks[masterUID]) }
func testPartition1(t *testing.T, syncRepl, usePgrewind bool) { dir, err := ioutil.TempDir("", "stolon") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) clusterName := uuid.NewV4().String() tks, tss, tstore := setupServers(t, clusterName, dir, 2, 1, syncRepl, usePgrewind) defer shutdown(tks, tss, tstore) storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) storePath := filepath.Join(common.StoreBasePath, clusterName) sm := store.NewStoreManager(tstore.store, storePath) master, standbys := waitMasterStandbysReady(t, sm, tks) if syncRepl { if err := WaitClusterDataSynchronousStandbys([]string{standbys[0].uid}, sm, 30*time.Second); err != nil { t.Fatalf("expected synchronous standby on keeper %q in cluster data", standbys[0].uid) } } if err := populate(t, master); err != nil { t.Fatalf("unexpected err: %v", err) } if err := write(t, master, 1, 1); err != nil { t.Fatalf("unexpected err: %v", err) } // Freeze the keeper and postgres processes on the master t.Logf("SIGSTOPping current master keeper: %s", master.uid) if err := master.Signal(syscall.SIGSTOP); err != nil { t.Fatalf("unexpected err: %v", err) } t.Logf("SIGSTOPping current master postgres: %s", master.uid) if err := master.SignalPG(syscall.SIGSTOP); err != nil { t.Fatalf("unexpected err: %v", err) } if err := standbys[0].WaitRole(common.RoleMaster, 60*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } c, err := getLines(t, standbys[0]) if err != nil { t.Fatalf("unexpected err: %v", err) } if c != 1 { t.Fatalf("wrong number of lines, want: %d, got: %d", 1, c) } // Add another standby so we'll have 2 standbys. With only 1 standby, // when using synchronous replication, the test will block forever when // writing to the new master since there's not active synchronous // standby. tk, err := NewTestKeeper(t, dir, clusterName, pgSUUsername, pgSUPassword, pgReplUsername, pgReplPassword, tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } tks[tk.uid] = tk if err := tk.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } standbys = append(standbys, tk) // Wait replicated data to standby if err := waitLines(t, standbys[1], 1, 30*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } if err := write(t, standbys[0], 2, 2); err != nil { t.Fatalf("unexpected err: %v", err) } // Make the master come back t.Logf("Resuming old master keeper: %s", master.uid) if err := master.Signal(syscall.SIGCONT); err != nil { t.Fatalf("unexpected err: %v", err) } t.Logf("Resuming old master postgres: %s", master.uid) if err := master.SignalPG(syscall.SIGCONT); err != nil { t.Fatalf("unexpected err: %v", err) } // Wait replicated data to old master if err := waitLines(t, master, 2, 60*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } if err := master.WaitRole(common.RoleStandby, 60*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } }
func update(cmd *cobra.Command, args []string) { if len(args) > 1 { die("too many arguments") } if updateOpts.file == "" && len(args) < 1 { die("no cluster spec provided as argument and no file provided (--file/-f option)") } if updateOpts.file != "" && len(args) == 1 { die("only one of cluster spec provided as argument or file must provided (--file/-f option)") } data := []byte{} if len(args) == 1 { data = []byte(args[0]) } else { var err error if updateOpts.file == "-" { data, err = ioutil.ReadAll(os.Stdin) if err != nil { die("cannot read from stdin: %v", err) } } else { data, err = ioutil.ReadFile(updateOpts.file) if err != nil { die("cannot read file: %v", err) } } } storePath := filepath.Join(common.StoreBasePath, cfg.clusterName) kvstore, err := store.NewStore(store.Backend(cfg.storeBackend), cfg.storeEndpoints) if err != nil { die("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) retry := 0 for retry < maxRetries { cd, pair, err := getClusterData(e) if err != nil { die("%v", err) } if cd.Cluster == nil { die("no cluster spec available") } if cd.Cluster.Spec == nil { die("no cluster spec available") } var newcs *cluster.ClusterSpec if updateOpts.patch { newcs, err = patchClusterSpec(cd.Cluster.Spec, data) if err != nil { die("failed to patch cluster spec: %v", err) } } else { if err := json.Unmarshal(data, &newcs); err != nil { die("failed to unmarshal cluster spec: %v", err) } } newcs.SetDefaults() if err := cd.Cluster.UpdateSpec(newcs); err != nil { die("Cannot update cluster spec: %v", err) } // retry if cd has been modified between reading and writing _, err = e.AtomicPutClusterData(cd, pair) if err != nil { if err == libkvstore.ErrKeyModified { retry++ continue } die("cannot update cluster data: %v", err) } break } if retry == maxRetries { die("failed to update cluster data after %d retries", maxRetries) } }
func setupServers(t *testing.T, clusterName, dir string, numKeepers, numSentinels uint8, syncRepl bool, usePgrewind bool) (testKeepers, testSentinels, *TestStore) { tstore := setupStore(t, dir) storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) storePath := filepath.Join(common.StoreBasePath, clusterName) kvstore, err := store.NewStore(tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) initialClusterSpec := &cluster.ClusterSpec{ InitMode: cluster.ClusterInitModeNew, SleepInterval: cluster.Duration{Duration: 2 * time.Second}, FailInterval: cluster.Duration{Duration: 5 * time.Second}, ConvergenceTimeout: cluster.Duration{Duration: 30 * time.Second}, SynchronousReplication: syncRepl, UsePgrewind: usePgrewind, PGParameters: make(cluster.PGParameters), } initialClusterSpecFile, err := writeClusterSpec(dir, initialClusterSpec) if err != nil { t.Fatalf("unexpected err: %v", err) } tks := map[string]*TestKeeper{} tss := map[string]*TestSentinel{} tk, err := NewTestKeeper(t, dir, clusterName, pgSUUsername, pgSUPassword, pgReplUsername, pgReplPassword, tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } tks[tk.id] = tk t.Logf("tk: %v", tk) // Start sentinels for i := uint8(0); i < numSentinels; i++ { ts, err := NewTestSentinel(t, dir, clusterName, tstore.storeBackend, storeEndpoints, fmt.Sprintf("--initial-cluster-spec=%s", initialClusterSpecFile)) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := ts.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } tss[ts.id] = ts } // Start first keeper if err := tk.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.WaitDBUp(60 * time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.WaitRole(common.RoleMaster, 30*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } // Wait for clusterView containing tk as master if err := WaitClusterDataMaster(tk.id, e, 30*time.Second); err != nil { t.Fatalf("expected master %q in cluster view", tk.id) } // Start other keepers for i := uint8(1); i < numKeepers; i++ { tk, err := NewTestKeeper(t, dir, clusterName, pgSUUsername, pgSUPassword, pgReplUsername, pgReplPassword, tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.WaitDBUp(60 * time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } // Wait for clusterView containing tk as standby if err := tk.WaitRole(common.RoleStandby, 30*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } tks[tk.id] = tk } return tks, tss, tstore }
func testInitExisting(t *testing.T, merge bool) { clusterName := uuid.NewV4().String() dir, err := ioutil.TempDir("", "") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) tstore := setupStore(t, dir) defer tstore.Stop() storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) storePath := filepath.Join(common.StoreBasePath, clusterName) sm := store.NewStoreManager(tstore.store, storePath) initialClusterSpec := &cluster.ClusterSpec{ InitMode: cluster.ClusterInitModeP(cluster.ClusterInitModeNew), SleepInterval: &cluster.Duration{Duration: 2 * time.Second}, FailInterval: &cluster.Duration{Duration: 5 * time.Second}, ConvergenceTimeout: &cluster.Duration{Duration: 30 * time.Second}, PGParameters: cluster.PGParameters{ "archive_mode": "on", }, } initialClusterSpecFile, err := writeClusterSpec(dir, initialClusterSpec) if err != nil { t.Fatalf("unexpected err: %v", err) } ts, err := NewTestSentinel(t, dir, clusterName, tstore.storeBackend, storeEndpoints, fmt.Sprintf("--initial-cluster-spec=%s", initialClusterSpecFile)) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := ts.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } tk, err := NewTestKeeper(t, dir, clusterName, pgSUUsername, pgSUPassword, pgReplUsername, pgReplPassword, tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } if err := WaitClusterPhase(sm, cluster.ClusterPhaseNormal, 60*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.WaitDBUp(60 * time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } if err := populate(t, tk); err != nil { t.Fatalf("unexpected err: %v", err) } if err := write(t, tk, 1, 1); err != nil { t.Fatalf("unexpected err: %v", err) } // Now initialize a new cluster with the existing keeper initialClusterSpec = &cluster.ClusterSpec{ InitMode: cluster.ClusterInitModeP(cluster.ClusterInitModeExisting), SleepInterval: &cluster.Duration{Duration: 2 * time.Second}, FailInterval: &cluster.Duration{Duration: 5 * time.Second}, ConvergenceTimeout: &cluster.Duration{Duration: 30 * time.Second}, MergePgParameters: &merge, ExistingConfig: &cluster.ExistingConfig{ KeeperUID: tk.uid, }, } initialClusterSpecFile, err = writeClusterSpec(dir, initialClusterSpec) if err != nil { t.Fatalf("unexpected err: %v", err) } t.Logf("reinitializing cluster") // Initialize cluster with new spec err = StolonCtl(clusterName, tstore.storeBackend, storeEndpoints, "init", "-y", "-f", initialClusterSpecFile) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := WaitClusterPhase(sm, cluster.ClusterPhaseInitializing, 60*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } if err := WaitClusterPhase(sm, cluster.ClusterPhaseNormal, 60*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.WaitDBUp(60 * time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } c, err := getLines(t, tk) if err != nil { t.Fatalf("unexpected err: %v", err) } if c != 1 { t.Fatalf("wrong number of lines, want: %d, got: %d", 1, c) } pgParameters, err := tk.GetPGParameters() if err != nil { t.Fatalf("unexpected err: %v", err) } v, ok := pgParameters["archive_mode"] if merge && v != "on" { t.Fatalf("expected archive_mode == on got %q", v) } if !merge && ok { t.Fatalf("expected archive_mode empty") } cd, _, err := sm.GetClusterData() // max_connection should be set by initdb v, ok = cd.Cluster.Spec.PGParameters["archive_mode"] if merge && v != "on" { t.Fatalf("expected archive_mode == on got %q", v) } if !merge && ok { t.Fatalf("expected archive_mode empty") } tk.Stop() }
func TestServerParameters(t *testing.T) { t.Parallel() dir, err := ioutil.TempDir("", "") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) tstore, err := NewTestStore(t, dir) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tstore.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tstore.WaitUp(10 * time.Second); err != nil { t.Fatalf("error waiting on store up: %v", err) } storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) defer tstore.Stop() clusterName := uuid.NewV4().String() storePath := filepath.Join(common.StoreBasePath, clusterName) sm := store.NewStoreManager(tstore.store, storePath) initialClusterSpec := &cluster.ClusterSpec{ InitMode: cluster.ClusterInitModeP(cluster.ClusterInitModeNew), SleepInterval: &cluster.Duration{Duration: 2 * time.Second}, FailInterval: &cluster.Duration{Duration: 5 * time.Second}, ConvergenceTimeout: &cluster.Duration{Duration: 30 * time.Second}, } initialClusterSpecFile, err := writeClusterSpec(dir, initialClusterSpec) if err != nil { t.Fatalf("unexpected err: %v", err) } ts, err := NewTestSentinel(t, dir, clusterName, tstore.storeBackend, storeEndpoints, fmt.Sprintf("--initial-cluster-spec=%s", initialClusterSpecFile)) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := ts.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } tk, err := NewTestKeeper(t, dir, clusterName, pgSUUsername, pgSUPassword, pgReplUsername, pgReplPassword, tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } if err := WaitClusterPhase(sm, cluster.ClusterPhaseNormal, 60*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.WaitDBUp(60 * time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } err = StolonCtl(clusterName, tstore.storeBackend, storeEndpoints, "update", "--patch", `{ "pgParameters" : { "unexistent_parameter": "value" } }`) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.cmd.ExpectTimeout("postgres parameters changed, reloading postgres instance", 30*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } // On the next keeper check they shouldn't be changed if err := tk.cmd.ExpectTimeout("postgres parameters not changed", 30*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } tk.Stop() // Start tk again, postgres should fail to start due to bad parameter if err := tk.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } defer tk.Stop() if err := tk.cmd.ExpectTimeout("failed to start postgres", 30*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } // Fix wrong parameters err = StolonCtl(clusterName, tstore.storeBackend, storeEndpoints, "update", "--patch", `{ "pgParameters" : null }`) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.WaitDBUp(30 * time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } }
func testInitNew(t *testing.T, merge bool) { clusterName := uuid.NewV4().String() dir, err := ioutil.TempDir("", "") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) tstore := setupStore(t, dir) defer tstore.Stop() storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) storePath := filepath.Join(common.StoreBasePath, clusterName) sm := store.NewStoreManager(tstore.store, storePath) initialClusterSpec := &cluster.ClusterSpec{ InitMode: cluster.ClusterInitModeP(cluster.ClusterInitModeNew), FailInterval: &cluster.Duration{Duration: 10 * time.Second}, ConvergenceTimeout: &cluster.Duration{Duration: 30 * time.Second}, MergePgParameters: &merge, } initialClusterSpecFile, err := writeClusterSpec(dir, initialClusterSpec) if err != nil { t.Fatalf("unexpected err: %v", err) } ts, err := NewTestSentinel(t, dir, clusterName, tstore.storeBackend, storeEndpoints, fmt.Sprintf("--initial-cluster-spec=%s", initialClusterSpecFile)) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := ts.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } tk, err := NewTestKeeper(t, dir, clusterName, pgSUUsername, pgSUPassword, pgReplUsername, pgReplPassword, tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } if err := WaitClusterPhase(sm, cluster.ClusterPhaseNormal, 60*time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tk.WaitDBUp(60 * time.Second); err != nil { t.Fatalf("unexpected err: %v", err) } cd, _, err := sm.GetClusterData() // max_connection should be set by initdb _, ok := cd.Cluster.Spec.PGParameters["max_connections"] if merge && !ok { t.Fatalf("expected max_connection set in cluster data pgParameters") } if !merge && ok { t.Fatalf("expected no max_connection set in cluster data pgParameters") } tk.Stop() }
func initCluster(cmd *cobra.Command, args []string) { if len(args) > 1 { die("too many arguments") } data := []byte{} switch len(args) { case 1: data = []byte(args[0]) case 0: if initOpts.file != "" { var err error if initOpts.file == "-" { data, err = ioutil.ReadAll(os.Stdin) if err != nil { die("cannot read from stdin: %v", err) } } else { data, err = ioutil.ReadFile(initOpts.file) if err != nil { die("cannot read file: %v", err) } } } } storePath := filepath.Join(common.StoreBasePath, cfg.clusterName) kvstore, err := store.NewStore(store.Backend(cfg.storeBackend), cfg.storeEndpoints) if err != nil { die("cannot create store: %v", err) } e := store.NewStoreManager(kvstore, storePath) cd, _, err := e.GetClusterData() if err != nil { die("cannot get cluster data: %v", err) } if cd != nil { stdout("WARNING: The current cluster data will be removed") } stdout("WARNING: The databases managed by the keepers will be overwrited depending on the provided cluster spec.") accepted := true if !initOpts.forceYes { accepted, err = askConfirmation("Are you sure you want to continue? [yes/no] ") if err != nil { die("%v", err) } } if !accepted { stdout("exiting") os.Exit(0) } cd, _, err = e.GetClusterData() if err != nil { die("cannot get cluster data: %v", err) } var cs *cluster.ClusterSpec if len(data) == 0 { // Define a new cluster spec with initMode "new" cs = &cluster.ClusterSpec{} cs.InitMode = cluster.ClusterInitModeNew } else { if err := json.Unmarshal(data, &cs); err != nil { die("failed to unmarshal cluster spec: %v", err) } } cs.SetDefaults() if err := cs.Validate(); err != nil { die("invalid cluster spec: %v", err) } c := cluster.NewCluster(common.UID(), cs) cd = cluster.NewClusterData(c) // We ignore if cd has been modified between reading and writing if err := e.PutClusterData(cd); err != nil { die("cannot update cluster data: %v", err) } }
func TestProxyListening(t *testing.T) { t.Parallel() dir, err := ioutil.TempDir("", "") if err != nil { t.Fatalf("unexpected err: %v", err) } defer os.RemoveAll(dir) clusterName := uuid.NewV4().String() tstore, err := NewTestStore(t, dir) if err != nil { t.Fatalf("unexpected err: %v", err) } storeEndpoints := fmt.Sprintf("%s:%s", tstore.listenAddress, tstore.port) tp, err := NewTestProxy(t, dir, clusterName, tstore.storeBackend, storeEndpoints) if err != nil { t.Fatalf("unexpected err: %v", err) } if err := tp.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } defer tp.Stop() t.Logf("test proxy start with store down. Should not listen") // tp should not listen because it cannot talk with store if err := tp.WaitNotListening(10 * time.Second); err != nil { t.Fatalf("expecting tp not listening due to failed store communication, but it's listening.") } tp.Stop() if err := tstore.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tstore.WaitUp(10 * time.Second); err != nil { t.Fatalf("error waiting on store up: %v", err) } defer func() { if tstore.cmd != nil { tstore.Stop() } }() storePath := filepath.Join(common.StoreBasePath, clusterName) sm := store.NewStoreManager(tstore.store, storePath) cd := &cluster.ClusterData{ FormatVersion: cluster.CurrentCDFormatVersion, Cluster: &cluster.Cluster{ UID: "01", Generation: 1, Spec: &cluster.ClusterSpec{ InitMode: cluster.ClusterInitModeP(cluster.ClusterInitModeNew), FailInterval: &cluster.Duration{Duration: 10 * time.Second}, }, Status: cluster.ClusterStatus{ CurrentGeneration: 1, Phase: cluster.ClusterPhaseNormal, Master: "01", }, }, Keepers: cluster.Keepers{ "01": &cluster.Keeper{ UID: "01", Spec: &cluster.KeeperSpec{}, Status: cluster.KeeperStatus{ Healthy: true, }, }, }, DBs: cluster.DBs{ "01": &cluster.DB{ UID: "01", Generation: 1, ChangeTime: time.Time{}, Spec: &cluster.DBSpec{ KeeperUID: "01", Role: common.RoleMaster, Followers: []string{"02"}, }, Status: cluster.DBStatus{ Healthy: false, CurrentGeneration: 1, }, }, }, Proxy: &cluster.Proxy{ Spec: cluster.ProxySpec{ MasterDBUID: "01", }, }, } pair, err := sm.AtomicPutClusterData(cd, nil) if err != nil { t.Fatalf("unexpected err: %v", err) } // test proxy start with the store up t.Logf("test proxy start with the store up. Should listen") if err := tp.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } // tp should listen if err := tp.WaitListening(10 * time.Second); err != nil { t.Fatalf("expecting tp listening, but it's not listening.") } t.Logf("test proxy error communicating with store. Should stop listening") // Stop store tstore.Stop() if err := tstore.WaitDown(10 * time.Second); err != nil { t.Fatalf("error waiting on store down: %v", err) } // tp should not listen because it cannot talk with the store if err := tp.WaitNotListening(10 * time.Second); err != nil { t.Fatalf("expecting tp not listening due to failed store communication, but it's listening.") } t.Logf("test proxy communication with store restored. Should start listening") // Start store if err := tstore.Start(); err != nil { t.Fatalf("unexpected err: %v", err) } if err := tstore.WaitUp(10 * time.Second); err != nil { t.Fatalf("error waiting on store up: %v", err) } // tp should listen if err := tp.WaitListening(10 * time.Second); err != nil { t.Fatalf("expecting tp listening, but it's not listening.") } t.Logf("test proxyConf removed. Should continue listening") // remove proxyConf cd.Proxy.Spec.MasterDBUID = "" pair, err = sm.AtomicPutClusterData(cd, pair) if err != nil { t.Fatalf("unexpected err: %v", err) } // tp should listen if err := tp.WaitListening(10 * time.Second); err != nil { t.Fatalf("expecting tp listening, but it's not listening.") } t.Logf("test proxyConf restored. Should continue listening") // Set proxyConf again cd.Proxy.Spec.MasterDBUID = "01" pair, err = sm.AtomicPutClusterData(cd, pair) if err != nil { t.Fatalf("unexpected err: %v", err) } // tp should listen if err := tp.WaitListening(10 * time.Second); err != nil { t.Fatalf("expecting tp listening, but it's not listening.") } t.Logf("test clusterView removed. Should continue listening") // remove whole clusterview _, err = sm.AtomicPutClusterData(nil, pair) if err != nil { t.Fatalf("unexpected err: %v", err) } // tp should listen if err := tp.WaitListening(10 * time.Second); err != nil { t.Fatalf("expecting tp listening, but it's not listening.") } }