func (suite *SchedulerTestSuite) TestSchedulerDriverNew_WithZkUrl() {
	masterAddr := "zk://127.0.0.1:5050/mesos"
	driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, masterAddr, nil)
	md, err := zoo.NewMockMasterDetector(masterAddr)
	suite.NoError(err)
	suite.NotNil(md)
	driver.masterDetector = md // override internal master detector

	md.ScheduleConnEvent(zk.StateConnected)

	done := make(chan struct{})
	driver.masterDetector.Detect(detector.OnMasterChanged(func(m *mesos.MasterInfo) {
		suite.NotNil(m)
		suite.NotEqual(m.GetPid, suite.masterUpid)
		close(done)
	}))

	//TODO(vlad) revisit, detector not responding.

	//NOTE(jdef) this works for me, I wonder if the timeouts are too short, or if
	//GOMAXPROCS settings are affecting the result?

	// md.ScheduleSessEvent(zk.EventNodeChildrenChanged)
	// select {
	// case <-done:
	// case <-time.After(time.Millisecond * 1000):
	// 	suite.T().Errorf("Timed out waiting for children event.")
	// }
}
Beispiel #2
0
func createMesosClient(
	md detector.Master,
	httpClient *http.Client,
	tr *http.Transport,
	stateCacheTTL time.Duration) (*mesosClient, error) {

	initialMaster := make(chan struct{})
	client := &mesosClient{
		httpClient:    httpClient,
		tr:            tr,
		initialMaster: initialMaster,
		state: &stateCache{
			ttl: stateCacheTTL,
		},
	}
	client.state.refill = client.pollMasterForState
	first := true
	if err := md.Detect(detector.OnMasterChanged(func(info *mesos.MasterInfo) {
		host, port := extractMasterAddress(info)
		if len(host) > 0 {
			client.masterLock.Lock()
			defer client.masterLock.Unlock()
			client.master = fmt.Sprintf("%s:%d", host, port)
			if first {
				first = false
				close(initialMaster)
			}
		}
		log.Infof("cloud master changed to '%v'", client.master)
	})); err != nil {
		log.V(1).Infof("detector initialization failed: %v", err)
		return nil, err
	}
	return client, nil
}
Beispiel #3
0
// start expected to be guarded by eventLock
func (driver *MesosSchedulerDriver) start() (mesos.Status, error) {
	select {
	case <-driver.started:
		return driver.status, errors.New("Unable to Start: driver has already been started once.")
	default: // proceed
	}

	log.Infoln("Starting the scheduler driver...")

	if driver.status != mesos.Status_DRIVER_NOT_STARTED {
		return driver.status, fmt.Errorf("Unable to Start, expecting driver status %s, but is %s:", mesos.Status_DRIVER_NOT_STARTED, driver.status)
	}

	// Start the messenger.
	if err := driver.messenger.Start(); err != nil {
		log.Errorf("Scheduler failed to start the messenger: %v\n", err)
		return driver.status, err
	}

	driver.self = driver.messenger.UPID()
	driver.status = mesos.Status_DRIVER_RUNNING
	close(driver.started)

	// TODO(jdef) hacky but we don't want to miss it if the scheduler shuts down
	go func() {
		t := time.NewTicker(2 * time.Second)
		defer t.Stop()
		for {
			<-t.C
			driver.eventCond.Broadcast()
			select {
			case <-driver.stopCh:
				return
			default:
			}
		}
	}()

	log.Infof("Mesos scheduler driver started with PID=%v", driver.self)

	listener := detector.OnMasterChanged(func(m *mesos.MasterInfo) {
		driver.messenger.Route(context.TODO(), driver.self, &mesos.InternalMasterChangeDetected{
			Master: m,
		})
	})

	// register with Detect() AFTER we have a self pid from the messenger, otherwise things get ugly
	// because our internal messaging depends on it. detector callbacks are routed over the messenger
	// bus, maintaining serial (concurrency-safe) callback execution.
	log.V(1).Infof("starting master detector %T: %+v", driver.masterDetector, driver.masterDetector)
	driver.masterDetector.Detect(listener)

	log.V(2).Infoln("master detector started")
	return driver.status, nil
}
Beispiel #4
0
func (d *ZkLeaderDetector) Detect(zkUrl *string) (*string, error) {
	ld, err := zoo.NewMasterDetector(*zkUrl)
	if err != nil {
		return nil, fmt.Errorf("Failed to create master detector: %v", err)
	}
	if err := ld.Detect(detector.OnMasterChanged(d.onLeaderChange)); err != nil {
		return nil, fmt.Errorf("Failed to initialize master detector: %v", err)
	}

	// wait for callback to write new leader into this private channel
	return <-d.newLeader, nil
}
Beispiel #5
0
func TestMasterDetectorChildrenChanged(t *testing.T) {
	wCh := make(chan struct{}, 1)

	c, err := makeClient()
	assert.NoError(t, err)
	assert.False(t, c.isConnected())

	md, err := NewMasterDetector(zkurl)
	defer md.Cancel()
	assert.NoError(t, err)
	// override zk.Conn with our own.
	c.errorHandler = ErrorHandler(func(c *Client, e error) {
		err = e
	})
	md.client = c
	md.client.connect()
	assert.NoError(t, err)
	assert.True(t, c.isConnected())

	called := 0
	md.Detect(detector.OnMasterChanged(func(master *mesos.MasterInfo) {
		//expect 2 calls in sequence: the first setting a master
		//and the second clearing it
		switch called++; called {
		case 1:
			assert.NotNil(t, master)
			assert.Equal(t, master.GetId(), "master@localhost:5050")
			wCh <- struct{}{}
		case 2:
			assert.Nil(t, master)
			wCh <- struct{}{}
		default:
			t.Fatalf("unexpected notification call attempt %d", called)
		}
	}))

	startWait := time.Now()
	select {
	case <-wCh:
	case <-time.After(time.Second * 3):
		panic("Waited too long...")
	}

	// wait for the disconnect event, should be triggered
	// 1s after the connected event
	waited := time.Now().Sub(startWait)
	time.Sleep((2 * time.Second) - waited)
	assert.False(t, c.isConnected())
}
Beispiel #6
0
func createMesosClient(
	md detector.Master,
	httpClient *http.Client,
	tr *http.Transport,
	stateCacheTTL time.Duration) (*mesosClient, error) {

	initialMaster := make(chan struct{})
	client := &mesosClient{
		httpClient:    httpClient,
		tr:            tr,
		initialMaster: initialMaster,
		state: &stateCache{
			ttl: stateCacheTTL,
		},
	}
	client.state.refill = client.pollMasterForState
	first := true
	if err := md.Detect(detector.OnMasterChanged(func(info *mesos.MasterInfo) {
		client.masterLock.Lock()
		defer client.masterLock.Unlock()
		if info == nil {
			client.master = ""
		} else if host := info.GetHostname(); host != "" {
			client.master = host
		} else {
			client.master = unpackIPv4(info.GetIp())
		}
		if len(client.master) > 0 {
			client.master = fmt.Sprintf("%s:%d", client.master, info.GetPort())
			if first {
				first = false
				close(initialMaster)
			}
		}
		log.Infof("cloud master changed to '%v'", client.master)
	})); err != nil {
		log.V(1).Infof("detector initialization failed: %v", err)
		return nil, err
	}
	return client, nil
}
Beispiel #7
0
// Start a Zookeeper listener to track leading master, invokes callback function when
// master changes are reported.
func startDefaultZKdetector(zkurl string, leaderChanged func(string)) error {

	// start listener
	logging.Verbose.Println("Starting master detector for ZK ", zkurl)
	md, err := detector.New(zkurl)
	if err != nil {
		return fmt.Errorf("failed to create master detector: %v", err)
	}

	// and listen for master changes
	if err := md.Detect(detector.OnMasterChanged(func(info *mesos.MasterInfo) {
		leader := ""
		if leaderChanged != nil {
			defer func() {
				leaderChanged(leader)
			}()
		}
		logging.VeryVerbose.Println("Updated Zookeeper info: ", info)
		if info == nil {
			logging.Error.Println("No leader available in Zookeeper.")
		} else {
			if host := info.GetHostname(); host != "" {
				leader = host
			} else {
				// unpack IPv4
				octets := make([]byte, 4, 4)
				binary.BigEndian.PutUint32(octets, info.GetIp())
				ipv4 := net.IP(octets)
				leader = ipv4.String()
			}
			leader = fmt.Sprintf("%s:%d", leader, info.GetPort())
			logging.Verbose.Println("new master in Zookeeper ", leader)
		}
	})); err != nil {
		return fmt.Errorf("failed to initialize master detector: %v", err)
	}
	return nil
}
// Starts the scheduler driver.
// Returns immediately if an error occurs within start sequence.
func (driver *MesosSchedulerDriver) Start() (mesos.Status, error) {
	log.Infoln("Starting the scheduler driver...")

	if stat := driver.Status(); stat != mesos.Status_DRIVER_NOT_STARTED {
		return stat, fmt.Errorf("Unable to Start, expecting driver status %s, but is %s:", mesos.Status_DRIVER_NOT_STARTED, stat)
	}

	driver.setStopped(true)
	driver.setStatus(mesos.Status_DRIVER_NOT_STARTED)

	// Start the messenger.
	if err := driver.messenger.Start(); err != nil {
		log.Errorf("Scheduler failed to start the messenger: %v\n", err)
		return driver.Status(), err
	}

	driver.self = driver.messenger.UPID()
	driver.setStatus(mesos.Status_DRIVER_RUNNING)
	driver.setStopped(false)

	log.Infof("Mesos scheduler driver started with PID=%v", driver.self)

	listener := detector.OnMasterChanged(func(m *mesos.MasterInfo) {
		driver.messenger.Route(context.TODO(), driver.self, &mesos.InternalMasterChangeDetected{
			Master: m,
		})
	})

	// register with Detect() AFTER we have a self pid from the messenger, otherwise things get ugly
	// because our internal messaging depends on it. detector callbacks are routed over the messenger
	// bus, maintaining serial (concurrency-safe) callback execution.
	log.V(1).Infof("starting master detector %T: %+v", driver.masterDetector, driver.masterDetector)
	driver.masterDetector.Detect(listener)

	log.V(2).Infoln("master detector started")
	return driver.Status(), nil
}
Beispiel #9
0
func TestMasterDetectMultiple(t *testing.T) {
	ch0 := make(chan zk.Event, 5)
	ch1 := make(chan zk.Event, 5)

	ch0 <- zk.Event{
		Type:  zk.EventSession,
		State: zk.StateConnected,
	}

	c, err := newClient(test_zk_hosts, test_zk_path)
	assert.NoError(t, err)

	initialChildren := []string{"info_005", "info_010", "info_022"}
	connector := NewMockConnector()
	connector.On("Close").Return(nil)
	connector.On("Children", test_zk_path).Return(initialChildren, &zk.Stat{}, nil).Once()
	connector.On("ChildrenW", test_zk_path).Return([]string{test_zk_path}, &zk.Stat{}, (<-chan zk.Event)(ch1), nil)

	first := true
	c.setFactory(asFactory(func() (Connector, <-chan zk.Event, error) {
		log.V(2).Infof("**** Using zk.Conn adapter ****")
		if !first {
			return nil, nil, errors.New("only 1 connector allowed")
		} else {
			first = false
		}
		return connector, ch0, nil
	}))

	md, err := NewMasterDetector(zkurl)
	defer md.Cancel()
	assert.NoError(t, err)

	c.errorHandler = ErrorHandler(func(c *Client, e error) {
		err = e
	})
	md.client = c

	// **** Test 4 consecutive ChildrenChangedEvents ******
	// setup event changes
	sequences := [][]string{
		[]string{"info_014", "info_010", "info_005"},
		[]string{"info_005", "info_004", "info_022"},
		[]string{}, // indicates no master
		[]string{"info_017", "info_099", "info_200"},
	}

	var wg sync.WaitGroup
	startTime := time.Now()
	detected := 0
	md.Detect(detector.OnMasterChanged(func(master *mesos.MasterInfo) {
		if detected == 2 {
			assert.Nil(t, master, fmt.Sprintf("on-master-changed-%d", detected))
		} else {
			assert.NotNil(t, master, fmt.Sprintf("on-master-changed-%d", detected))
		}
		t.Logf("Leader change detected at %v: '%+v'", time.Now().Sub(startTime), master)
		detected++
		wg.Done()
	}))

	// 3 leadership changes + disconnect (leader change to '')
	wg.Add(4)

	go func() {
		for i := range sequences {
			sorted := make([]string, len(sequences[i]))
			copy(sorted, sequences[i])
			sort.Strings(sorted)
			t.Logf("testing master change sequence %d, path '%v'", i, test_zk_path)
			connector.On("Children", test_zk_path).Return(sequences[i], &zk.Stat{}, nil).Once()
			if len(sequences[i]) > 0 {
				connector.On("Get", fmt.Sprintf("%s/%s", test_zk_path, sorted[0])).Return(newTestMasterInfo(i), &zk.Stat{}, nil).Once()
			}
			ch1 <- zk.Event{
				Type: zk.EventNodeChildrenChanged,
				Path: test_zk_path,
			}
			time.Sleep(100 * time.Millisecond) // give async routines time to catch up
		}
		time.Sleep(1 * time.Second) // give async routines time to catch up
		t.Logf("disconnecting...")
		ch0 <- zk.Event{
			State: zk.StateDisconnected,
		}
		//TODO(jdef) does order of close matter here? probably, meaking client code is weak
		close(ch0)
		time.Sleep(500 * time.Millisecond) // give async routines time to catch up
		close(ch1)
	}()
	completed := make(chan struct{})
	go func() {
		defer close(completed)
		wg.Wait()
	}()

	defer func() {
		if r := recover(); r != nil {
			t.Fatal(r)
		}
	}()

	select {
	case <-time.After(2 * time.Second):
		panic("timed out waiting for master changes to propagate")
	case <-completed:
	}
}
Beispiel #10
0
func TestMasterDetectFlappingConnector(t *testing.T) {
	c, err := newClient(test_zk_hosts, test_zk_path)
	assert.NoError(t, err)

	initialChildren := []string{"info_005", "info_010", "info_022"}
	connector := NewMockConnector()
	connector.On("Close").Return(nil)
	connector.On("Children", test_zk_path).Return(initialChildren, &zk.Stat{}, nil)

	// timing
	// t=0         t=400ms     t=800ms     t=1200ms    t=1600ms    t=2000ms    t=2400ms
	// |--=--=--=--|--=--=--=--|--=--=--=--|--=--=--=--|--=--=--=--|--=--=--=--|--=--=--=--|--=--=--=--
	//  c1          d1               c3          d3                c5          d5             d6    ...
	//                c2          d2                c4          d4                c6             c7 ...
	//  M           M'   M        M'       M     M'

	attempt := 0
	c.setFactory(asFactory(func() (Connector, <-chan zk.Event, error) {
		attempt++
		sessionEvents := make(chan zk.Event, 5)
		watchEvents := make(chan zk.Event, 5)

		sessionEvents <- zk.Event{
			Type:  zk.EventSession,
			State: zk.StateConnected,
		}
		connector.On("Get", fmt.Sprintf("%s/info_005", test_zk_path)).Return(newTestMasterInfo(attempt), &zk.Stat{}, nil).Once()
		connector.On("ChildrenW", test_zk_path).Return([]string{test_zk_path}, &zk.Stat{}, (<-chan zk.Event)(watchEvents), nil)
		go func(attempt int) {
			defer close(sessionEvents)
			defer close(watchEvents)
			time.Sleep(400 * time.Millisecond)
			// this is the order in which the embedded zk implementation does it
			sessionEvents <- zk.Event{
				Type:  zk.EventSession,
				State: zk.StateDisconnected,
			}
			connector.On("ChildrenW", test_zk_path).Return(nil, nil, nil, zk.ErrSessionExpired).Once()
			watchEvents <- zk.Event{
				Type:  zk.EventNotWatching,
				State: zk.StateDisconnected,
				Path:  test_zk_path,
				Err:   zk.ErrSessionExpired,
			}
		}(attempt)
		return connector, sessionEvents, nil
	}))
	c.reconnDelay = 100 * time.Millisecond
	c.rewatchDelay = c.reconnDelay / 2

	md, err := NewMasterDetector(zkurl)
	md.minDetectorCyclePeriod = 600 * time.Millisecond

	defer md.Cancel()
	assert.NoError(t, err)

	c.errorHandler = ErrorHandler(func(c *Client, e error) {
		t.Logf("zk client error: %v", e)
	})
	md.client = c

	var wg sync.WaitGroup
	wg.Add(6) // 3 x (connected, disconnected)
	detected := 0
	startTime := time.Now()
	md.Detect(detector.OnMasterChanged(func(master *mesos.MasterInfo) {
		if detected > 5 {
			// ignore
			return
		}
		if (detected & 1) == 0 {
			assert.NotNil(t, master, fmt.Sprintf("on-master-changed-%d", detected))
		} else {
			assert.Nil(t, master, fmt.Sprintf("on-master-changed-%d", detected))
		}
		t.Logf("Leader change detected at %v: '%+v'", time.Now().Sub(startTime), master)
		detected++
		wg.Done()
	}))

	completed := make(chan struct{})
	go func() {
		defer close(completed)
		wg.Wait()
	}()

	select {
	case <-completed: // expected
	case <-time.After(3 * time.Second):
		t.Fatalf("failed to detect flapping master changes")
	}
}
Beispiel #11
0
// single connector instance, session does not expire, but it's internal connection to zk is flappy
func TestMasterDetectFlappingConnectionState(t *testing.T) {
	c, err := newClient(test_zk_hosts, test_zk_path)
	assert.NoError(t, err)

	initialChildren := []string{"info_005", "info_010", "info_022"}
	connector := NewMockConnector()
	connector.On("Close").Return(nil)
	connector.On("Children", test_zk_path).Return(initialChildren, &zk.Stat{}, nil)

	var wg sync.WaitGroup
	wg.Add(2) // async flapping, master change detection

	first := true
	c.setFactory(asFactory(func() (Connector, <-chan zk.Event, error) {
		if !first {
			t.Fatalf("only one connector instance expected")
			return nil, nil, errors.New("ran out of connectors")
		} else {
			first = false
		}
		sessionEvents := make(chan zk.Event, 10)
		watchEvents := make(chan zk.Event, 10)

		connector.On("Get", fmt.Sprintf("%s/info_005", test_zk_path)).Return(newTestMasterInfo(1), &zk.Stat{}, nil).Once()
		connector.On("ChildrenW", test_zk_path).Return([]string{test_zk_path}, &zk.Stat{}, (<-chan zk.Event)(watchEvents), nil)
		go func() {
			defer wg.Done()
			time.Sleep(100 * time.Millisecond)
			for attempt := 0; attempt < 5; attempt++ {
				sessionEvents <- zk.Event{
					Type:  zk.EventSession,
					State: zk.StateConnected,
				}
				time.Sleep(500 * time.Millisecond)
				sessionEvents <- zk.Event{
					Type:  zk.EventSession,
					State: zk.StateDisconnected,
				}
			}
			sessionEvents <- zk.Event{
				Type:  zk.EventSession,
				State: zk.StateConnected,
			}
		}()
		return connector, sessionEvents, nil
	}))
	c.reconnDelay = 0 // there should be no reconnect, but just in case don't drag the test out

	md, err := NewMasterDetector(zkurl)
	defer md.Cancel()
	assert.NoError(t, err)

	c.errorHandler = ErrorHandler(func(c *Client, e error) {
		t.Logf("zk client error: %v", e)
	})
	md.client = c

	startTime := time.Now()
	detected := false
	md.Detect(detector.OnMasterChanged(func(master *mesos.MasterInfo) {
		if detected {
			t.Fatalf("already detected master, was not expecting another change: %v", master)
		} else {
			detected = true
			assert.NotNil(t, master, fmt.Sprintf("on-master-changed %v", detected))
			t.Logf("Leader change detected at %v: '%+v'", time.Now().Sub(startTime), master)
			wg.Done()
		}
	}))

	completed := make(chan struct{})
	go func() {
		defer close(completed)
		wg.Wait()
	}()

	select {
	case <-completed: // expected
	case <-time.After(3 * time.Second):
		t.Fatalf("failed to detect master change")
	}
}
Beispiel #12
0
		cancel func()
		client ZKInterface
		done   chan struct{}
		// latch: only install, at most, one ignoreChanged listener; see MasterDetector.Detect
		ignoreInstalled        int32
		leaderNode             string
		minDetectorCyclePeriod time.Duration

		// guard against concurrent invocations of bootstrapFunc
		bootstrapLock sync.RWMutex
		bootstrapFunc func(ZKInterface, <-chan struct{}) (ZKInterface, error) // for one-time zk client initiation
	}
)

// reasonable default for a noop change listener
var ignoreChanged = detector.OnMasterChanged(func(*mesos.MasterInfo) {})

// MinCyclePeriod is a functional option that determines the highest frequency of master change notifications
func MinCyclePeriod(d time.Duration) detector.Option {
	return func(di interface{}) detector.Option {
		md := di.(*MasterDetector)
		old := md.minDetectorCyclePeriod
		md.minDetectorCyclePeriod = d
		return MinCyclePeriod(old)
	}
}

func Bootstrap(f func(ZKInterface, <-chan struct{}) (ZKInterface, error)) detector.Option {
	return func(di interface{}) detector.Option {
		md := di.(*MasterDetector)
		old := md.bootstrapFunc
Beispiel #13
0
func TestMasterDetector_multipleLeadershipChanges(t *testing.T) {
	md, err := NewMasterDetector(zkurl)
	defer md.Cancel()
	assert.NoError(t, err)

	leadershipChanges := [][]string{
		{"info_014", "info_010", "info_005"},
		{"info_005", "info_004", "info_022"},
		{}, // indicates no master
		{"info_017", "info_099", "info_200"},
	}

	ITERATIONS := len(leadershipChanges)

	// +1 for initial snapshot, +1 for final lost-leader (close(errs))
	EXPECTED_CALLS := (ITERATIONS + 2)

	var wg sync.WaitGroup
	wg.Add(ITERATIONS) // +1 for the initial snapshot that's sent for the first watch, -1 because set 3 is empty
	path := test_zk_path

	md.bootstrapFunc = func() error {
		if md.client != nil {
			return nil
		}
		log.V(1).Infoln("bootstrapping detector")
		defer log.V(1).Infoln("bootstrapping detector ..finished")

		children := []string{"info_0", "info_5", "info_10"}
		mocked, snaps, errs := newMockZkClient(children...)
		md.client = mocked
		md.minDetectorCyclePeriod = 10 * time.Millisecond // we don't have all day!

		mocked.On("data", fmt.Sprintf("%s/info_0", path)).Return(newTestMasterInfo(0), nil)
		mocked.On("data", fmt.Sprintf("%s/info_005", path)).Return(newTestMasterInfo(5), nil)
		mocked.On("data", fmt.Sprintf("%s/info_004", path)).Return(newTestMasterInfo(4), nil)
		mocked.On("data", fmt.Sprintf("%s/info_017", path)).Return(newTestMasterInfo(17), nil)

		// the first snapshot will be sent immediately and the detector will be awaiting en event.
		// cycle through some connected/disconnected events but maintain the same snapshot
		go func() {
			defer close(errs)
			for attempt := 0; attempt < ITERATIONS; attempt++ {
				snaps <- leadershipChanges[attempt]
			}
		}()
		return nil
	}

	called := 0
	lostMaster := make(chan struct{})
	expectedLeaders := []int{0, 5, 4, 17}
	leaderIdx := 0
	err = md.Detect(detector.OnMasterChanged(func(master *mesos.MasterInfo) {
		called++
		log.V(3).Infof("detector invoked: called %d", called)
		switch {
		case called < EXPECTED_CALLS:
			if master != nil {
				expectedLeader := fmt.Sprintf("master(%d)@localhost:5050", expectedLeaders[leaderIdx])
				assert.Equal(t, expectedLeader, master.GetId())
				leaderIdx++
				wg.Done()
			}
		case called == EXPECTED_CALLS:
			md.Cancel()
			defer close(lostMaster)
			assert.Nil(t, master)
		default:
			t.Errorf("unexpected notification call attempt %d", called)
		}
	}))
	assert.NoError(t, err)

	fatalAfter(t, 10*time.Second, wg.Wait, "Waited too long for new-master alerts")
	fatalOn(t, 3*time.Second, lostMaster, "Waited too long for lost master")

	select {
	case <-md.Done():
		assert.Equal(t, EXPECTED_CALLS, called, "expected %d detection callbacks instead of %d", EXPECTED_CALLS, called)
	case <-time.After(time.Second * 10):
		panic("Waited too long for detector shutdown...")
	}
}
Beispiel #14
0
// single connector instance, it's internal connection to zk is flappy
func TestMasterDetectorFlappyConnectionState(t *testing.T) {
	md, err := NewMasterDetector(zkurl)
	defer md.Cancel()
	assert.NoError(t, err)

	const ITERATIONS = 3
	var wg sync.WaitGroup
	wg.Add(1 + ITERATIONS) // +1 for the initial snapshot that's sent for the first watch
	path := test_zk_path

	md.bootstrapFunc = func() error {
		if md.client != nil {
			return nil
		}
		log.V(1).Infoln("bootstrapping detector")
		defer log.V(1).Infoln("bootstrapping detector ..finished")

		children := []string{"info_0", "info_5", "info_10"}
		mocked, snaps, errs := newMockZkClient(children...)
		md.client = mocked
		md.minDetectorCyclePeriod = 10 * time.Millisecond // we don't have all day!

		mocked.On("data", fmt.Sprintf("%s/info_0", path)).Return(newTestMasterInfo(0), nil)

		// the first snapshot will be sent immediately and the detector will be awaiting en event.
		// cycle through some connected/disconnected events but maintain the same snapshot
		go func() {
			defer close(errs)
			for attempt := 0; attempt < ITERATIONS; attempt++ {
				// send an error, should cause the detector to re-issue a watch
				errs <- zk.ErrSessionExpired
				// the detection loop issues another watch, so send it a snapshot..
				// send another snapshot
				snaps <- children
			}
		}()
		return nil
	}

	called := 0
	lostMaster := make(chan struct{})
	const EXPECTED_CALLS = (ITERATIONS * 2) + 2 // +1 for initial snapshot, +1 for final lost-leader (close(errs))
	err = md.Detect(detector.OnMasterChanged(func(master *mesos.MasterInfo) {
		called++
		log.V(3).Infof("detector invoked: called %d", called)
		switch {
		case called < EXPECTED_CALLS:
			if master != nil {
				wg.Done()
				assert.Equal(t, master.GetId(), "master(0)@localhost:5050")
			}
		case called == EXPECTED_CALLS:
			md.Cancel()
			defer close(lostMaster)
			assert.Nil(t, master)
		default:
			t.Errorf("unexpected notification call attempt %d", called)
		}
	}))
	assert.NoError(t, err)

	fatalAfter(t, 10*time.Second, wg.Wait, "Waited too long for new-master alerts")
	fatalOn(t, 3*time.Second, lostMaster, "Waited too long for lost master")

	select {
	case <-md.Done():
		assert.Equal(t, EXPECTED_CALLS, called, "expected %d detection callbacks instead of %d", EXPECTED_CALLS, called)
	case <-time.After(time.Second * 10):
		panic("Waited too long for detector shutdown...")
	}
}
Beispiel #15
0
func TestMasterDetectorChildrenChanged(t *testing.T) {
	md, err := NewMasterDetector(zkurl)
	defer md.Cancel()
	assert.NoError(t, err)

	path := test_zk_path
	snapDetected := make(chan struct{})
	md.bootstrapFunc = func() error {
		if md.client != nil {
			return nil
		}
		log.V(1).Infoln("bootstrapping detector")
		defer log.V(1).Infoln("bootstrapping detector ..finished")

		mocked, _, errs := newMockZkClient("info_0", "info_5", "info_10")
		md.client = mocked
		md.minDetectorCyclePeriod = 10 * time.Millisecond // we don't have all day!

		mocked.On("data", fmt.Sprintf("%s/info_0", path)).Return(newTestMasterInfo(0), nil)

		// wait for the first child snapshot to be processed before signaling end-of-watch
		// (which is signalled by closing errs).
		go func() {
			defer close(errs)
			select {
			case <-snapDetected:
			case <-md.Done():
				t.Errorf("detector died before child snapshot")
			}
		}()
		return nil
	}

	called := 0
	lostMaster := make(chan struct{})
	const expectedLeader = "master(0)@localhost:5050"
	err = md.Detect(detector.OnMasterChanged(func(master *mesos.MasterInfo) {
		//expect 2 calls in sequence: the first setting a master
		//and the second clearing it
		switch called++; called {
		case 1:
			defer close(snapDetected)
			assert.NotNil(t, master)
			assert.Equal(t, expectedLeader, master.GetId())
		case 2:
			md.Cancel()
			defer close(lostMaster)
			assert.Nil(t, master)
		default:
			t.Errorf("unexpected notification call attempt %d", called)
		}
	}))
	assert.NoError(t, err)

	fatalOn(t, 10*time.Second, lostMaster, "Waited too long for lost master")

	select {
	case <-md.Done():
		assert.Equal(t, 2, called, "expected 2 detection callbacks instead of %d", called)
	case <-time.After(time.Second * 10):
		panic("Waited too long for detector shutdown...")
	}
}