Ejemplo n.º 1
0
func (d *discoverdWrapper) Register() (bool, error) {
	log := logger.New("fn", "discoverd.Register")

	log.Info("registering with service discovery")
	hb, err := discoverd.AddServiceAndRegister(serviceName, d.addr)
	if err != nil {
		log.Error("error registering with service discovery", "err", err)
		return false, err
	}
	shutdown.BeforeExit(func() { hb.Close() })

	selfAddr := hb.Addr()
	log = log.New("self.addr", selfAddr)

	service := discoverd.NewService(serviceName)
	var leaders chan *discoverd.Instance
	var stream stream.Stream
	connect := func() (err error) {
		log.Info("connecting service leader stream")
		leaders = make(chan *discoverd.Instance)
		stream, err = service.Leaders(leaders)
		if err != nil {
			log.Error("error connecting service leader stream", "err", err)
		}
		return
	}
	if err := connect(); err != nil {
		return false, err
	}

	go func() {
	outer:
		for {
			for leader := range leaders {
				if leader == nil {
					// a nil leader indicates there are no instances for
					// the service, ignore and wait for an actual leader
					log.Warn("received nil leader event")
					continue
				}
				log.Info("received leader event", "leader.addr", leader.Addr)
				d.leader <- leader.Addr == selfAddr
			}
			log.Warn("service leader stream disconnected", "err", stream.Err())
			for {
				if err := connect(); err == nil {
					continue outer
				}
				time.Sleep(100 * time.Millisecond)
			}
		}
	}()

	select {
	case isLeader := <-d.leader:
		return isLeader, nil
	case <-time.After(30 * time.Second):
		return false, errors.New("timed out waiting for current service leader")
	}
}
Ejemplo n.º 2
0
func (s *Scheduler) streamHostEvents() error {
	log := logger.New("fn", "streamHostEvents")

	var events chan *discoverd.Event
	var stream stream.Stream
	connect := func() (err error) {
		log.Info("connecting host event stream")
		events = make(chan *discoverd.Event, eventBufferSize)
		stream, err = s.StreamHostEvents(events)
		if err != nil {
			log.Error("error connecting host event stream", "err", err)
		}
		return
	}
	if err := connect(); err != nil {
		return err
	}

	current := make(chan struct{})
	go func() {
		var isCurrent bool
	outer:
		for {
			for event := range events {
				switch event.Kind {
				case discoverd.EventKindCurrent:
					if !isCurrent {
						isCurrent = true
						close(current)
					}
				case discoverd.EventKindUp, discoverd.EventKindDown:
					// if we are not current, explicitly handle the event
					// so that the scheduler is streaming job events from
					// all current hosts before starting the main loop.
					if !isCurrent {
						s.HandleHostEvent(event)
						continue
					}
					s.hostEvents <- event
				}
			}
			log.Warn("host event stream disconnected", "err", stream.Err())
			for {
				if err := connect(); err == nil {
					continue outer
				}
				time.Sleep(100 * time.Millisecond)
			}
		}
	}()

	select {
	case <-current:
		return nil
	case <-time.After(30 * time.Second):
		return errors.New("timed out waiting for current host list")
	}
}
Ejemplo n.º 3
0
func waitForJobEvents(t *c.C, stream stream.Stream, events chan *ct.JobEvent, expected jobEvents) (lastID int64, jobID string) {
	debugf(t, "waiting for job events: %v", expected)
	actual := make(jobEvents)
	for {
	inner:
		select {
		case event, ok := <-events:
			if !ok {
				t.Fatalf("job event stream closed: %s", stream.Err())
			}
			debugf(t, "got job event: %s %s %s", event.Type, event.JobID, event.State)
			lastID = event.ID
			jobID = event.JobID
			if _, ok := actual[event.Type]; !ok {
				actual[event.Type] = make(map[string]int)
			}
			switch event.State {
			case "starting", "up", "down":
				actual[event.Type][event.State] += 1
			case "crashed":
				actual[event.Type]["down"] += 1
			default:
				break inner
			}
			if jobEventsEqual(expected, actual) {
				return
			}
		case <-time.After(60 * time.Second):
			t.Fatal("timed out waiting for job events: ", expected)
		}
	}
}
Ejemplo n.º 4
0
func waitForJobRestart(t *c.C, stream stream.Stream, events chan *ct.JobEvent, typ string, timeout time.Duration) string {
	debug(t, "waiting for job restart")
	for {
		select {
		case event, ok := <-events:
			if !ok {
				t.Fatalf("job event stream closed: %s", stream.Err())
			}
			debug(t, "got job event: ", event.Type, event.JobID, event.State)
			if event.Type == typ && event.State == "up" {
				return event.JobID
			}
		case <-time.After(timeout):
			t.Fatal("timed out waiting for job restart")
		}
	}
}
Ejemplo n.º 5
0
func (d *discoverdWrapper) Register() bool {
	log := d.logger.New("fn", "discoverd.Register")

	var hb discoverd.Heartbeater
	for {
		var err error
		log.Info("registering with service discovery")
		hb, err = discoverd.AddServiceAndRegister(serviceName, ":"+os.Getenv("PORT"))
		if err == nil {
			break
		}
		log.Error("error registering with service discovery", "err", err)
		time.Sleep(time.Second)
	}
	shutdown.BeforeExit(func() { hb.Close() })

	selfAddr := hb.Addr()
	log = log.New("self.addr", selfAddr)

	service := discoverd.NewService(serviceName)
	var leaders chan *discoverd.Instance
	var stream stream.Stream
	connect := func() (err error) {
		log.Info("connecting service leader stream")
		leaders = make(chan *discoverd.Instance)
		stream, err = service.Leaders(leaders)
		if err != nil {
			log.Error("error connecting service leader stream", "err", err)
		}
		return
	}

	go func() {
		for {
			for {
				if err := connect(); err == nil {
					break
				}
				time.Sleep(100 * time.Millisecond)
			}
			for leader := range leaders {
				if leader == nil {
					// a nil leader indicates there are no instances for
					// the service, ignore and wait for an actual leader
					log.Warn("received nil leader event")
					continue
				}
				log.Info("received leader event", "leader.addr", leader.Addr)
				d.leader <- leader.Addr == selfAddr
			}
			log.Warn("service leader stream disconnected", "err", stream.Err())
		}
	}()

	start := time.Now()
	tick := time.Tick(30 * time.Second)
	for {
		select {
		case isLeader := <-d.leader:
			return isLeader
		case <-tick:
			log.Warn("still waiting for current service leader", "duration", time.Since(start))
		}
	}
}
Ejemplo n.º 6
0
func (s *Scheduler) streamFormationEvents() error {
	log := logger.New("fn", "streamFormationEvents")

	var events chan *ct.ExpandedFormation
	var stream stream.Stream
	var since *time.Time
	connect := func() (err error) {
		log.Info("connecting formation event stream")
		events = make(chan *ct.ExpandedFormation, eventBufferSize)
		stream, err = s.StreamFormations(since, events)
		if err != nil {
			log.Error("error connecting formation event stream", "err", err)
		}
		return
	}
	strategy := attempt.Strategy{Delay: 100 * time.Millisecond, Total: time.Minute}
	if err := strategy.Run(connect); err != nil {
		return err
	}

	current := make(chan struct{})
	go func() {
		var isCurrent bool
	outer:
		for {
			for formation := range events {
				// an empty formation indicates we now have the
				// current list of formations.
				if formation.App == nil {
					if !isCurrent {
						isCurrent = true
						close(current)
					}
					continue
				}
				since = &formation.UpdatedAt
				// if we are not current, explicitly handle the event
				// so that the scheduler has the current list of
				// formations before starting the main loop.
				if !isCurrent {
					s.HandleFormationChange(formation)
					continue
				}
				s.formationEvents <- formation
			}
			log.Warn("formation event stream disconnected", "err", stream.Err())
			for {
				if err := connect(); err == nil {
					continue outer
				}
				time.Sleep(100 * time.Millisecond)
			}
		}
	}()

	select {
	case <-current:
		return nil
	case <-time.After(30 * time.Second):
		return errors.New("timed out waiting for current formation list")
	}
}
Ejemplo n.º 7
0
func (r *Router) watchBackends() {
	log := r.logger.New("fn", "router.watchBackends", "router.id", r.ID)
	var events chan *router.StreamEvent
	var stream stream.Stream
	connect := func() (err error) {
		log.Info("connecting router event stream")
		events = make(chan *router.StreamEvent)
		opts := &router.StreamEventsOptions{
			EventTypes: []router.EventType{
				router.EventTypeBackendUp,
				router.EventTypeBackendDrained,
			},
		}
		stream, err = r.client.StreamEvents(opts, events)
		if err != nil {
			log.Error("error connecting router event stream", "err", err)
		}
		return
	}

	// make initial connection
	for {
		if err := connect(); err == nil {
			defer stream.Close()
			break
		}
		select {
		case <-r.stop:
			return
		case <-time.After(100 * time.Millisecond):
		}
	}

	for {
	eventLoop:
		for {
			select {
			case event, ok := <-events:
				if !ok {
					break eventLoop
				}
				r.events <- &RouterEvent{
					RouterID: r.ID,
					Type:     event.Event,
					Backend:  event.Backend,
				}
			case <-r.stop:
				return
			}
		}
		log.Warn("router event stream disconnected", "err", stream.Err())
		// keep trying to reconnect, unless we are told to stop
	retryLoop:
		for {
			select {
			case <-r.stop:
				return
			default:
			}

			if err := connect(); err == nil {
				break retryLoop
			}
			time.Sleep(100 * time.Millisecond)
		}
	}
}
Ejemplo n.º 8
0
// StreamEventsTo streams all job events from the host to the given channel in
// a goroutine, returning the current list of active jobs.
func (h *Host) StreamEventsTo(ch chan *host.Event) (map[string]host.ActiveJob, error) {
	log := h.logger.New("fn", "StreamEventsTo", "host.id", h.ID)
	var events chan *host.Event
	var stream stream.Stream
	connect := func() (err error) {
		log.Info("connecting job event stream")
		events = make(chan *host.Event)
		stream, err = h.client.StreamEvents("all", events)
		if err != nil {
			log.Error("error connecting job event stream", "err", err)
		}
		return
	}
	if err := connect(); err != nil {
		return nil, err
	}

	log.Info("getting active jobs")
	jobs, err := h.client.ListJobs()
	if err != nil {
		log.Error("error getting active jobs", "err", err)
		return nil, err
	}
	log.Info(fmt.Sprintf("got %d active job(s) for host %s", len(jobs), h.ID))

	go func() {
		defer stream.Close()
		defer close(h.done)
		for {
		eventLoop:
			for {
				select {
				case event, ok := <-events:
					if !ok {
						break eventLoop
					}
					ch <- event
				case <-h.stop:
					return
				}
			}

			log.Warn("job event stream disconnected", "err", stream.Err())
			// keep trying to reconnect, unless we are told to stop
		retryLoop:
			for {
				select {
				case <-h.stop:
					return
				default:
				}

				if err := connect(); err == nil {
					break retryLoop
				}
				time.Sleep(100 * time.Millisecond)
			}
		}
	}()
	return jobs, nil
}
Ejemplo n.º 9
0
func (c *serviceConn) watch(srv discoverd.Service, eventc <-chan *discoverd.Event, stream stream.Stream) {
	g := grohl.NewContext(grohl.Data{"at": "logmux_service_watch"})

	var (
		resetc                  = make(chan time.Time)
		reconc <-chan time.Time = resetc
	)
	defer close(resetc)

	for {
		select {
		case event, ok := <-eventc:
			if !ok {
				c.hangup()
				return
			}
			g.Log(grohl.Data{"status": "event", "event": event.Kind.String()})

			switch event.Kind {
			case discoverd.EventKindLeader:
				reconc = resetc

				if err := c.reset(); err != nil {
					g.Log(grohl.Data{"status": "error", "err": err.Error()})
				}

				if err := c.connect(srv); err != nil {
					g.Log(grohl.Data{"status": "error", "err": err.Error()})
					reconc = time.After(100 * time.Millisecond)
				}
			default:
			}
		case err := <-c.errc:
			g.Log(grohl.Data{"status": "write-error", "err": err.Error()})
			reconc = resetc

			if err := c.reset(); err != nil {
				g.Log(grohl.Data{"status": "error", "err": err.Error()})
			}

			if err := c.connect(srv); err != nil {
				g.Log(grohl.Data{"status": "error", "err": err.Error()})
				reconc = time.After(100 * time.Millisecond)
			}
		case <-reconc:
			if err := c.connect(srv); err != nil {
				g.Log(grohl.Data{"status": "reconnect-error", "err": err.Error()})
				reconc = time.After(100 * time.Millisecond)
			}
		case <-c.donec:
			if err := stream.Close(); err != nil {
				g.Log(grohl.Data{"status": "error", "err": err.Error()})
			}
			if err := c.reset(); err != nil {
				g.Log(grohl.Data{"status": "error", "err": err.Error()})
			}

			return
		case <-c.closec:
			if err := stream.Close(); err != nil {
				g.Log(grohl.Data{"status": "error", "err": err.Error()})
			}

			c.hangup()
			return
		}
	}
}
Ejemplo n.º 10
0
func (s *SchedulerSuite) TestDeployController(t *c.C) {
	if testCluster == nil {
		t.Skip("cannot determine test cluster size")
	}

	// get the current controller release
	client := s.controllerClient(t)
	app, err := client.GetApp("controller")
	t.Assert(err, c.IsNil)
	release, err := client.GetAppRelease(app.ID)
	t.Assert(err, c.IsNil)

	// create a controller deployment
	release.ID = ""
	t.Assert(client.CreateRelease(release), c.IsNil)
	deployment, err := client.CreateDeployment(app.ID, release.ID)
	t.Assert(err, c.IsNil)

	// use a function to create the event stream as a new stream will be needed
	// after deploying the controller
	var events chan *ct.DeploymentEvent
	var eventStream stream.Stream
	connectStream := func() {
		events = make(chan *ct.DeploymentEvent)
		err := attempt.Strategy{
			Total: 10 * time.Second,
			Delay: 500 * time.Millisecond,
		}.Run(func() (err error) {
			eventStream, err = client.StreamDeployment(deployment.ID, events)
			return
		})
		t.Assert(err, c.IsNil)
	}
	connectStream()
	defer eventStream.Close()

	// wait for the deploy to complete (this doesn't wait for specific events
	// due to the fact that when the deployer deploys itself, some events will
	// not get sent)
loop:
	for {
		select {
		case e, ok := <-events:
			if !ok {
				// reconnect the stream as it may of been closed
				// due to the controller being deployed
				debug(t, "reconnecting deployment event stream")
				connectStream()
				continue
			}
			debugf(t, "got deployment event: %s %s", e.JobType, e.JobState)
			switch e.Status {
			case "complete":
				break loop
			case "failed":
				t.Fatal("the deployment failed")
			}
		case <-time.After(60 * time.Second):
			t.Fatal("timed out waiting for the deploy to complete")
		}
	}

	// check the correct controller jobs are running
	hosts, err := s.clusterClient(t).ListHosts()
	t.Assert(err, c.IsNil)
	actual := make(map[string]map[string]int)
	for _, host := range hosts {
		for _, job := range host.Jobs {
			appID := job.Metadata["flynn-controller.app"]
			if appID != app.ID {
				continue
			}
			releaseID := job.Metadata["flynn-controller.release"]
			if _, ok := actual[releaseID]; !ok {
				actual[releaseID] = make(map[string]int)
			}
			typ := job.Metadata["flynn-controller.type"]
			actual[releaseID][typ]++
		}
	}
	expected := map[string]map[string]int{release.ID: {
		"web":       2,
		"deployer":  2,
		"scheduler": testCluster.Size(),
	}}
	t.Assert(actual, c.DeepEquals, expected)
}