func (suite *SchedulerTestSuite) TestSchedulerDriverNew_WithZkUrl() { masterAddr := "zk://127.0.0.1:5050/mesos" driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, masterAddr, nil) md, err := zoo.NewMockMasterDetector(masterAddr) suite.NoError(err) suite.NotNil(md) driver.masterDetector = md // override internal master detector md.ScheduleConnEvent(zk.StateConnected) done := make(chan struct{}) driver.masterDetector.Detect(detector.OnMasterChanged(func(m *mesos.MasterInfo) { suite.NotNil(m) suite.NotEqual(m.GetPid, suite.masterUpid) close(done) })) //TODO(vlad) revisit, detector not responding. //NOTE(jdef) this works for me, I wonder if the timeouts are too short, or if //GOMAXPROCS settings are affecting the result? // md.ScheduleSessEvent(zk.EventNodeChildrenChanged) // select { // case <-done: // case <-time.After(time.Millisecond * 1000): // suite.T().Errorf("Timed out waiting for children event.") // } }
func createMesosClient( md detector.Master, httpClient *http.Client, tr *http.Transport, stateCacheTTL time.Duration) (*mesosClient, error) { initialMaster := make(chan struct{}) client := &mesosClient{ httpClient: httpClient, tr: tr, initialMaster: initialMaster, state: &stateCache{ ttl: stateCacheTTL, }, } client.state.refill = client.pollMasterForState first := true if err := md.Detect(detector.OnMasterChanged(func(info *mesos.MasterInfo) { host, port := extractMasterAddress(info) if len(host) > 0 { client.masterLock.Lock() defer client.masterLock.Unlock() client.master = fmt.Sprintf("%s:%d", host, port) if first { first = false close(initialMaster) } } log.Infof("cloud master changed to '%v'", client.master) })); err != nil { log.V(1).Infof("detector initialization failed: %v", err) return nil, err } return client, nil }
// start expected to be guarded by eventLock func (driver *MesosSchedulerDriver) start() (mesos.Status, error) { select { case <-driver.started: return driver.status, errors.New("Unable to Start: driver has already been started once.") default: // proceed } log.Infoln("Starting the scheduler driver...") if driver.status != mesos.Status_DRIVER_NOT_STARTED { return driver.status, fmt.Errorf("Unable to Start, expecting driver status %s, but is %s:", mesos.Status_DRIVER_NOT_STARTED, driver.status) } // Start the messenger. if err := driver.messenger.Start(); err != nil { log.Errorf("Scheduler failed to start the messenger: %v\n", err) return driver.status, err } driver.self = driver.messenger.UPID() driver.status = mesos.Status_DRIVER_RUNNING close(driver.started) // TODO(jdef) hacky but we don't want to miss it if the scheduler shuts down go func() { t := time.NewTicker(2 * time.Second) defer t.Stop() for { <-t.C driver.eventCond.Broadcast() select { case <-driver.stopCh: return default: } } }() log.Infof("Mesos scheduler driver started with PID=%v", driver.self) listener := detector.OnMasterChanged(func(m *mesos.MasterInfo) { driver.messenger.Route(context.TODO(), driver.self, &mesos.InternalMasterChangeDetected{ Master: m, }) }) // register with Detect() AFTER we have a self pid from the messenger, otherwise things get ugly // because our internal messaging depends on it. detector callbacks are routed over the messenger // bus, maintaining serial (concurrency-safe) callback execution. log.V(1).Infof("starting master detector %T: %+v", driver.masterDetector, driver.masterDetector) driver.masterDetector.Detect(listener) log.V(2).Infoln("master detector started") return driver.status, nil }
func (d *ZkLeaderDetector) Detect(zkUrl *string) (*string, error) { ld, err := zoo.NewMasterDetector(*zkUrl) if err != nil { return nil, fmt.Errorf("Failed to create master detector: %v", err) } if err := ld.Detect(detector.OnMasterChanged(d.onLeaderChange)); err != nil { return nil, fmt.Errorf("Failed to initialize master detector: %v", err) } // wait for callback to write new leader into this private channel return <-d.newLeader, nil }
func TestMasterDetectorChildrenChanged(t *testing.T) { wCh := make(chan struct{}, 1) c, err := makeClient() assert.NoError(t, err) assert.False(t, c.isConnected()) md, err := NewMasterDetector(zkurl) defer md.Cancel() assert.NoError(t, err) // override zk.Conn with our own. c.errorHandler = ErrorHandler(func(c *Client, e error) { err = e }) md.client = c md.client.connect() assert.NoError(t, err) assert.True(t, c.isConnected()) called := 0 md.Detect(detector.OnMasterChanged(func(master *mesos.MasterInfo) { //expect 2 calls in sequence: the first setting a master //and the second clearing it switch called++; called { case 1: assert.NotNil(t, master) assert.Equal(t, master.GetId(), "master@localhost:5050") wCh <- struct{}{} case 2: assert.Nil(t, master) wCh <- struct{}{} default: t.Fatalf("unexpected notification call attempt %d", called) } })) startWait := time.Now() select { case <-wCh: case <-time.After(time.Second * 3): panic("Waited too long...") } // wait for the disconnect event, should be triggered // 1s after the connected event waited := time.Now().Sub(startWait) time.Sleep((2 * time.Second) - waited) assert.False(t, c.isConnected()) }
func createMesosClient( md detector.Master, httpClient *http.Client, tr *http.Transport, stateCacheTTL time.Duration) (*mesosClient, error) { initialMaster := make(chan struct{}) client := &mesosClient{ httpClient: httpClient, tr: tr, initialMaster: initialMaster, state: &stateCache{ ttl: stateCacheTTL, }, } client.state.refill = client.pollMasterForState first := true if err := md.Detect(detector.OnMasterChanged(func(info *mesos.MasterInfo) { client.masterLock.Lock() defer client.masterLock.Unlock() if info == nil { client.master = "" } else if host := info.GetHostname(); host != "" { client.master = host } else { client.master = unpackIPv4(info.GetIp()) } if len(client.master) > 0 { client.master = fmt.Sprintf("%s:%d", client.master, info.GetPort()) if first { first = false close(initialMaster) } } log.Infof("cloud master changed to '%v'", client.master) })); err != nil { log.V(1).Infof("detector initialization failed: %v", err) return nil, err } return client, nil }
// Start a Zookeeper listener to track leading master, invokes callback function when // master changes are reported. func startDefaultZKdetector(zkurl string, leaderChanged func(string)) error { // start listener logging.Verbose.Println("Starting master detector for ZK ", zkurl) md, err := detector.New(zkurl) if err != nil { return fmt.Errorf("failed to create master detector: %v", err) } // and listen for master changes if err := md.Detect(detector.OnMasterChanged(func(info *mesos.MasterInfo) { leader := "" if leaderChanged != nil { defer func() { leaderChanged(leader) }() } logging.VeryVerbose.Println("Updated Zookeeper info: ", info) if info == nil { logging.Error.Println("No leader available in Zookeeper.") } else { if host := info.GetHostname(); host != "" { leader = host } else { // unpack IPv4 octets := make([]byte, 4, 4) binary.BigEndian.PutUint32(octets, info.GetIp()) ipv4 := net.IP(octets) leader = ipv4.String() } leader = fmt.Sprintf("%s:%d", leader, info.GetPort()) logging.Verbose.Println("new master in Zookeeper ", leader) } })); err != nil { return fmt.Errorf("failed to initialize master detector: %v", err) } return nil }
// Starts the scheduler driver. // Returns immediately if an error occurs within start sequence. func (driver *MesosSchedulerDriver) Start() (mesos.Status, error) { log.Infoln("Starting the scheduler driver...") if stat := driver.Status(); stat != mesos.Status_DRIVER_NOT_STARTED { return stat, fmt.Errorf("Unable to Start, expecting driver status %s, but is %s:", mesos.Status_DRIVER_NOT_STARTED, stat) } driver.setStopped(true) driver.setStatus(mesos.Status_DRIVER_NOT_STARTED) // Start the messenger. if err := driver.messenger.Start(); err != nil { log.Errorf("Scheduler failed to start the messenger: %v\n", err) return driver.Status(), err } driver.self = driver.messenger.UPID() driver.setStatus(mesos.Status_DRIVER_RUNNING) driver.setStopped(false) log.Infof("Mesos scheduler driver started with PID=%v", driver.self) listener := detector.OnMasterChanged(func(m *mesos.MasterInfo) { driver.messenger.Route(context.TODO(), driver.self, &mesos.InternalMasterChangeDetected{ Master: m, }) }) // register with Detect() AFTER we have a self pid from the messenger, otherwise things get ugly // because our internal messaging depends on it. detector callbacks are routed over the messenger // bus, maintaining serial (concurrency-safe) callback execution. log.V(1).Infof("starting master detector %T: %+v", driver.masterDetector, driver.masterDetector) driver.masterDetector.Detect(listener) log.V(2).Infoln("master detector started") return driver.Status(), nil }
func TestMasterDetectMultiple(t *testing.T) { ch0 := make(chan zk.Event, 5) ch1 := make(chan zk.Event, 5) ch0 <- zk.Event{ Type: zk.EventSession, State: zk.StateConnected, } c, err := newClient(test_zk_hosts, test_zk_path) assert.NoError(t, err) initialChildren := []string{"info_005", "info_010", "info_022"} connector := NewMockConnector() connector.On("Close").Return(nil) connector.On("Children", test_zk_path).Return(initialChildren, &zk.Stat{}, nil).Once() connector.On("ChildrenW", test_zk_path).Return([]string{test_zk_path}, &zk.Stat{}, (<-chan zk.Event)(ch1), nil) first := true c.setFactory(asFactory(func() (Connector, <-chan zk.Event, error) { log.V(2).Infof("**** Using zk.Conn adapter ****") if !first { return nil, nil, errors.New("only 1 connector allowed") } else { first = false } return connector, ch0, nil })) md, err := NewMasterDetector(zkurl) defer md.Cancel() assert.NoError(t, err) c.errorHandler = ErrorHandler(func(c *Client, e error) { err = e }) md.client = c // **** Test 4 consecutive ChildrenChangedEvents ****** // setup event changes sequences := [][]string{ []string{"info_014", "info_010", "info_005"}, []string{"info_005", "info_004", "info_022"}, []string{}, // indicates no master []string{"info_017", "info_099", "info_200"}, } var wg sync.WaitGroup startTime := time.Now() detected := 0 md.Detect(detector.OnMasterChanged(func(master *mesos.MasterInfo) { if detected == 2 { assert.Nil(t, master, fmt.Sprintf("on-master-changed-%d", detected)) } else { assert.NotNil(t, master, fmt.Sprintf("on-master-changed-%d", detected)) } t.Logf("Leader change detected at %v: '%+v'", time.Now().Sub(startTime), master) detected++ wg.Done() })) // 3 leadership changes + disconnect (leader change to '') wg.Add(4) go func() { for i := range sequences { sorted := make([]string, len(sequences[i])) copy(sorted, sequences[i]) sort.Strings(sorted) t.Logf("testing master change sequence %d, path '%v'", i, test_zk_path) connector.On("Children", test_zk_path).Return(sequences[i], &zk.Stat{}, nil).Once() if len(sequences[i]) > 0 { connector.On("Get", fmt.Sprintf("%s/%s", test_zk_path, sorted[0])).Return(newTestMasterInfo(i), &zk.Stat{}, nil).Once() } ch1 <- zk.Event{ Type: zk.EventNodeChildrenChanged, Path: test_zk_path, } time.Sleep(100 * time.Millisecond) // give async routines time to catch up } time.Sleep(1 * time.Second) // give async routines time to catch up t.Logf("disconnecting...") ch0 <- zk.Event{ State: zk.StateDisconnected, } //TODO(jdef) does order of close matter here? probably, meaking client code is weak close(ch0) time.Sleep(500 * time.Millisecond) // give async routines time to catch up close(ch1) }() completed := make(chan struct{}) go func() { defer close(completed) wg.Wait() }() defer func() { if r := recover(); r != nil { t.Fatal(r) } }() select { case <-time.After(2 * time.Second): panic("timed out waiting for master changes to propagate") case <-completed: } }
func TestMasterDetectFlappingConnector(t *testing.T) { c, err := newClient(test_zk_hosts, test_zk_path) assert.NoError(t, err) initialChildren := []string{"info_005", "info_010", "info_022"} connector := NewMockConnector() connector.On("Close").Return(nil) connector.On("Children", test_zk_path).Return(initialChildren, &zk.Stat{}, nil) // timing // t=0 t=400ms t=800ms t=1200ms t=1600ms t=2000ms t=2400ms // |--=--=--=--|--=--=--=--|--=--=--=--|--=--=--=--|--=--=--=--|--=--=--=--|--=--=--=--|--=--=--=-- // c1 d1 c3 d3 c5 d5 d6 ... // c2 d2 c4 d4 c6 c7 ... // M M' M M' M M' attempt := 0 c.setFactory(asFactory(func() (Connector, <-chan zk.Event, error) { attempt++ sessionEvents := make(chan zk.Event, 5) watchEvents := make(chan zk.Event, 5) sessionEvents <- zk.Event{ Type: zk.EventSession, State: zk.StateConnected, } connector.On("Get", fmt.Sprintf("%s/info_005", test_zk_path)).Return(newTestMasterInfo(attempt), &zk.Stat{}, nil).Once() connector.On("ChildrenW", test_zk_path).Return([]string{test_zk_path}, &zk.Stat{}, (<-chan zk.Event)(watchEvents), nil) go func(attempt int) { defer close(sessionEvents) defer close(watchEvents) time.Sleep(400 * time.Millisecond) // this is the order in which the embedded zk implementation does it sessionEvents <- zk.Event{ Type: zk.EventSession, State: zk.StateDisconnected, } connector.On("ChildrenW", test_zk_path).Return(nil, nil, nil, zk.ErrSessionExpired).Once() watchEvents <- zk.Event{ Type: zk.EventNotWatching, State: zk.StateDisconnected, Path: test_zk_path, Err: zk.ErrSessionExpired, } }(attempt) return connector, sessionEvents, nil })) c.reconnDelay = 100 * time.Millisecond c.rewatchDelay = c.reconnDelay / 2 md, err := NewMasterDetector(zkurl) md.minDetectorCyclePeriod = 600 * time.Millisecond defer md.Cancel() assert.NoError(t, err) c.errorHandler = ErrorHandler(func(c *Client, e error) { t.Logf("zk client error: %v", e) }) md.client = c var wg sync.WaitGroup wg.Add(6) // 3 x (connected, disconnected) detected := 0 startTime := time.Now() md.Detect(detector.OnMasterChanged(func(master *mesos.MasterInfo) { if detected > 5 { // ignore return } if (detected & 1) == 0 { assert.NotNil(t, master, fmt.Sprintf("on-master-changed-%d", detected)) } else { assert.Nil(t, master, fmt.Sprintf("on-master-changed-%d", detected)) } t.Logf("Leader change detected at %v: '%+v'", time.Now().Sub(startTime), master) detected++ wg.Done() })) completed := make(chan struct{}) go func() { defer close(completed) wg.Wait() }() select { case <-completed: // expected case <-time.After(3 * time.Second): t.Fatalf("failed to detect flapping master changes") } }
// single connector instance, session does not expire, but it's internal connection to zk is flappy func TestMasterDetectFlappingConnectionState(t *testing.T) { c, err := newClient(test_zk_hosts, test_zk_path) assert.NoError(t, err) initialChildren := []string{"info_005", "info_010", "info_022"} connector := NewMockConnector() connector.On("Close").Return(nil) connector.On("Children", test_zk_path).Return(initialChildren, &zk.Stat{}, nil) var wg sync.WaitGroup wg.Add(2) // async flapping, master change detection first := true c.setFactory(asFactory(func() (Connector, <-chan zk.Event, error) { if !first { t.Fatalf("only one connector instance expected") return nil, nil, errors.New("ran out of connectors") } else { first = false } sessionEvents := make(chan zk.Event, 10) watchEvents := make(chan zk.Event, 10) connector.On("Get", fmt.Sprintf("%s/info_005", test_zk_path)).Return(newTestMasterInfo(1), &zk.Stat{}, nil).Once() connector.On("ChildrenW", test_zk_path).Return([]string{test_zk_path}, &zk.Stat{}, (<-chan zk.Event)(watchEvents), nil) go func() { defer wg.Done() time.Sleep(100 * time.Millisecond) for attempt := 0; attempt < 5; attempt++ { sessionEvents <- zk.Event{ Type: zk.EventSession, State: zk.StateConnected, } time.Sleep(500 * time.Millisecond) sessionEvents <- zk.Event{ Type: zk.EventSession, State: zk.StateDisconnected, } } sessionEvents <- zk.Event{ Type: zk.EventSession, State: zk.StateConnected, } }() return connector, sessionEvents, nil })) c.reconnDelay = 0 // there should be no reconnect, but just in case don't drag the test out md, err := NewMasterDetector(zkurl) defer md.Cancel() assert.NoError(t, err) c.errorHandler = ErrorHandler(func(c *Client, e error) { t.Logf("zk client error: %v", e) }) md.client = c startTime := time.Now() detected := false md.Detect(detector.OnMasterChanged(func(master *mesos.MasterInfo) { if detected { t.Fatalf("already detected master, was not expecting another change: %v", master) } else { detected = true assert.NotNil(t, master, fmt.Sprintf("on-master-changed %v", detected)) t.Logf("Leader change detected at %v: '%+v'", time.Now().Sub(startTime), master) wg.Done() } })) completed := make(chan struct{}) go func() { defer close(completed) wg.Wait() }() select { case <-completed: // expected case <-time.After(3 * time.Second): t.Fatalf("failed to detect master change") } }
cancel func() client ZKInterface done chan struct{} // latch: only install, at most, one ignoreChanged listener; see MasterDetector.Detect ignoreInstalled int32 leaderNode string minDetectorCyclePeriod time.Duration // guard against concurrent invocations of bootstrapFunc bootstrapLock sync.RWMutex bootstrapFunc func(ZKInterface, <-chan struct{}) (ZKInterface, error) // for one-time zk client initiation } ) // reasonable default for a noop change listener var ignoreChanged = detector.OnMasterChanged(func(*mesos.MasterInfo) {}) // MinCyclePeriod is a functional option that determines the highest frequency of master change notifications func MinCyclePeriod(d time.Duration) detector.Option { return func(di interface{}) detector.Option { md := di.(*MasterDetector) old := md.minDetectorCyclePeriod md.minDetectorCyclePeriod = d return MinCyclePeriod(old) } } func Bootstrap(f func(ZKInterface, <-chan struct{}) (ZKInterface, error)) detector.Option { return func(di interface{}) detector.Option { md := di.(*MasterDetector) old := md.bootstrapFunc
func TestMasterDetector_multipleLeadershipChanges(t *testing.T) { md, err := NewMasterDetector(zkurl) defer md.Cancel() assert.NoError(t, err) leadershipChanges := [][]string{ {"info_014", "info_010", "info_005"}, {"info_005", "info_004", "info_022"}, {}, // indicates no master {"info_017", "info_099", "info_200"}, } ITERATIONS := len(leadershipChanges) // +1 for initial snapshot, +1 for final lost-leader (close(errs)) EXPECTED_CALLS := (ITERATIONS + 2) var wg sync.WaitGroup wg.Add(ITERATIONS) // +1 for the initial snapshot that's sent for the first watch, -1 because set 3 is empty path := test_zk_path md.bootstrapFunc = func() error { if md.client != nil { return nil } log.V(1).Infoln("bootstrapping detector") defer log.V(1).Infoln("bootstrapping detector ..finished") children := []string{"info_0", "info_5", "info_10"} mocked, snaps, errs := newMockZkClient(children...) md.client = mocked md.minDetectorCyclePeriod = 10 * time.Millisecond // we don't have all day! mocked.On("data", fmt.Sprintf("%s/info_0", path)).Return(newTestMasterInfo(0), nil) mocked.On("data", fmt.Sprintf("%s/info_005", path)).Return(newTestMasterInfo(5), nil) mocked.On("data", fmt.Sprintf("%s/info_004", path)).Return(newTestMasterInfo(4), nil) mocked.On("data", fmt.Sprintf("%s/info_017", path)).Return(newTestMasterInfo(17), nil) // the first snapshot will be sent immediately and the detector will be awaiting en event. // cycle through some connected/disconnected events but maintain the same snapshot go func() { defer close(errs) for attempt := 0; attempt < ITERATIONS; attempt++ { snaps <- leadershipChanges[attempt] } }() return nil } called := 0 lostMaster := make(chan struct{}) expectedLeaders := []int{0, 5, 4, 17} leaderIdx := 0 err = md.Detect(detector.OnMasterChanged(func(master *mesos.MasterInfo) { called++ log.V(3).Infof("detector invoked: called %d", called) switch { case called < EXPECTED_CALLS: if master != nil { expectedLeader := fmt.Sprintf("master(%d)@localhost:5050", expectedLeaders[leaderIdx]) assert.Equal(t, expectedLeader, master.GetId()) leaderIdx++ wg.Done() } case called == EXPECTED_CALLS: md.Cancel() defer close(lostMaster) assert.Nil(t, master) default: t.Errorf("unexpected notification call attempt %d", called) } })) assert.NoError(t, err) fatalAfter(t, 10*time.Second, wg.Wait, "Waited too long for new-master alerts") fatalOn(t, 3*time.Second, lostMaster, "Waited too long for lost master") select { case <-md.Done(): assert.Equal(t, EXPECTED_CALLS, called, "expected %d detection callbacks instead of %d", EXPECTED_CALLS, called) case <-time.After(time.Second * 10): panic("Waited too long for detector shutdown...") } }
// single connector instance, it's internal connection to zk is flappy func TestMasterDetectorFlappyConnectionState(t *testing.T) { md, err := NewMasterDetector(zkurl) defer md.Cancel() assert.NoError(t, err) const ITERATIONS = 3 var wg sync.WaitGroup wg.Add(1 + ITERATIONS) // +1 for the initial snapshot that's sent for the first watch path := test_zk_path md.bootstrapFunc = func() error { if md.client != nil { return nil } log.V(1).Infoln("bootstrapping detector") defer log.V(1).Infoln("bootstrapping detector ..finished") children := []string{"info_0", "info_5", "info_10"} mocked, snaps, errs := newMockZkClient(children...) md.client = mocked md.minDetectorCyclePeriod = 10 * time.Millisecond // we don't have all day! mocked.On("data", fmt.Sprintf("%s/info_0", path)).Return(newTestMasterInfo(0), nil) // the first snapshot will be sent immediately and the detector will be awaiting en event. // cycle through some connected/disconnected events but maintain the same snapshot go func() { defer close(errs) for attempt := 0; attempt < ITERATIONS; attempt++ { // send an error, should cause the detector to re-issue a watch errs <- zk.ErrSessionExpired // the detection loop issues another watch, so send it a snapshot.. // send another snapshot snaps <- children } }() return nil } called := 0 lostMaster := make(chan struct{}) const EXPECTED_CALLS = (ITERATIONS * 2) + 2 // +1 for initial snapshot, +1 for final lost-leader (close(errs)) err = md.Detect(detector.OnMasterChanged(func(master *mesos.MasterInfo) { called++ log.V(3).Infof("detector invoked: called %d", called) switch { case called < EXPECTED_CALLS: if master != nil { wg.Done() assert.Equal(t, master.GetId(), "master(0)@localhost:5050") } case called == EXPECTED_CALLS: md.Cancel() defer close(lostMaster) assert.Nil(t, master) default: t.Errorf("unexpected notification call attempt %d", called) } })) assert.NoError(t, err) fatalAfter(t, 10*time.Second, wg.Wait, "Waited too long for new-master alerts") fatalOn(t, 3*time.Second, lostMaster, "Waited too long for lost master") select { case <-md.Done(): assert.Equal(t, EXPECTED_CALLS, called, "expected %d detection callbacks instead of %d", EXPECTED_CALLS, called) case <-time.After(time.Second * 10): panic("Waited too long for detector shutdown...") } }
func TestMasterDetectorChildrenChanged(t *testing.T) { md, err := NewMasterDetector(zkurl) defer md.Cancel() assert.NoError(t, err) path := test_zk_path snapDetected := make(chan struct{}) md.bootstrapFunc = func() error { if md.client != nil { return nil } log.V(1).Infoln("bootstrapping detector") defer log.V(1).Infoln("bootstrapping detector ..finished") mocked, _, errs := newMockZkClient("info_0", "info_5", "info_10") md.client = mocked md.minDetectorCyclePeriod = 10 * time.Millisecond // we don't have all day! mocked.On("data", fmt.Sprintf("%s/info_0", path)).Return(newTestMasterInfo(0), nil) // wait for the first child snapshot to be processed before signaling end-of-watch // (which is signalled by closing errs). go func() { defer close(errs) select { case <-snapDetected: case <-md.Done(): t.Errorf("detector died before child snapshot") } }() return nil } called := 0 lostMaster := make(chan struct{}) const expectedLeader = "master(0)@localhost:5050" err = md.Detect(detector.OnMasterChanged(func(master *mesos.MasterInfo) { //expect 2 calls in sequence: the first setting a master //and the second clearing it switch called++; called { case 1: defer close(snapDetected) assert.NotNil(t, master) assert.Equal(t, expectedLeader, master.GetId()) case 2: md.Cancel() defer close(lostMaster) assert.Nil(t, master) default: t.Errorf("unexpected notification call attempt %d", called) } })) assert.NoError(t, err) fatalOn(t, 10*time.Second, lostMaster, "Waited too long for lost master") select { case <-md.Done(): assert.Equal(t, 2, called, "expected 2 detection callbacks instead of %d", called) case <-time.After(time.Second * 10): panic("Waited too long for detector shutdown...") } }