Ejemplo n.º 1
0
func (r *Router) watchBackends() {
	log := r.logger.New("fn", "router.watchBackends", "router.id", r.ID)
	var events chan *router.StreamEvent
	var stream stream.Stream
	connect := func() (err error) {
		log.Info("connecting router event stream")
		events = make(chan *router.StreamEvent)
		opts := &router.StreamEventsOptions{
			EventTypes: []router.EventType{
				router.EventTypeBackendUp,
				router.EventTypeBackendDrained,
			},
		}
		stream, err = r.client.StreamEvents(opts, events)
		if err != nil {
			log.Error("error connecting router event stream", "err", err)
		}
		return
	}

	// make initial connection
	for {
		if err := connect(); err == nil {
			defer stream.Close()
			break
		}
		select {
		case <-r.stop:
			return
		case <-time.After(100 * time.Millisecond):
		}
	}

	for {
	eventLoop:
		for {
			select {
			case event, ok := <-events:
				if !ok {
					break eventLoop
				}
				r.events <- &RouterEvent{
					RouterID: r.ID,
					Type:     event.Event,
					Backend:  event.Backend,
				}
			case <-r.stop:
				return
			}
		}
		log.Warn("router event stream disconnected", "err", stream.Err())
		// keep trying to reconnect, unless we are told to stop
	retryLoop:
		for {
			select {
			case <-r.stop:
				return
			default:
			}

			if err := connect(); err == nil {
				break retryLoop
			}
			time.Sleep(100 * time.Millisecond)
		}
	}
}
Ejemplo n.º 2
0
func (c *serviceConn) watch(srv discoverd.Service, eventc <-chan *discoverd.Event, stream stream.Stream) {
	g := grohl.NewContext(grohl.Data{"at": "logmux_service_watch"})

	var (
		resetc                  = make(chan time.Time)
		reconc <-chan time.Time = resetc
	)
	defer close(resetc)

	for {
		select {
		case event, ok := <-eventc:
			if !ok {
				c.hangup()
				return
			}
			g.Log(grohl.Data{"status": "event", "event": event.Kind.String()})

			switch event.Kind {
			case discoverd.EventKindLeader:
				reconc = resetc

				if err := c.reset(); err != nil {
					g.Log(grohl.Data{"status": "error", "err": err.Error()})
				}

				if err := c.connect(srv); err != nil {
					g.Log(grohl.Data{"status": "error", "err": err.Error()})
					reconc = time.After(100 * time.Millisecond)
				}
			default:
			}
		case err := <-c.errc:
			g.Log(grohl.Data{"status": "write-error", "err": err.Error()})
			reconc = resetc

			if err := c.reset(); err != nil {
				g.Log(grohl.Data{"status": "error", "err": err.Error()})
			}

			if err := c.connect(srv); err != nil {
				g.Log(grohl.Data{"status": "error", "err": err.Error()})
				reconc = time.After(100 * time.Millisecond)
			}
		case <-reconc:
			if err := c.connect(srv); err != nil {
				g.Log(grohl.Data{"status": "reconnect-error", "err": err.Error()})
				reconc = time.After(100 * time.Millisecond)
			}
		case <-c.donec:
			if err := stream.Close(); err != nil {
				g.Log(grohl.Data{"status": "error", "err": err.Error()})
			}
			if err := c.reset(); err != nil {
				g.Log(grohl.Data{"status": "error", "err": err.Error()})
			}

			return
		case <-c.closec:
			if err := stream.Close(); err != nil {
				g.Log(grohl.Data{"status": "error", "err": err.Error()})
			}

			c.hangup()
			return
		}
	}
}
Ejemplo n.º 3
0
// StreamEventsTo streams all job events from the host to the given channel in
// a goroutine, returning the current list of active jobs.
func (h *Host) StreamEventsTo(ch chan *host.Event) (map[string]host.ActiveJob, error) {
	log := h.logger.New("fn", "StreamEventsTo", "host.id", h.ID)
	var events chan *host.Event
	var stream stream.Stream
	connect := func() (err error) {
		log.Info("connecting job event stream")
		events = make(chan *host.Event)
		stream, err = h.client.StreamEvents("all", events)
		if err != nil {
			log.Error("error connecting job event stream", "err", err)
		}
		return
	}
	if err := connect(); err != nil {
		return nil, err
	}

	log.Info("getting active jobs")
	jobs, err := h.client.ListJobs()
	if err != nil {
		log.Error("error getting active jobs", "err", err)
		return nil, err
	}
	log.Info(fmt.Sprintf("got %d active job(s) for host %s", len(jobs), h.ID))

	go func() {
		defer stream.Close()
		defer close(h.done)
		for {
		eventLoop:
			for {
				select {
				case event, ok := <-events:
					if !ok {
						break eventLoop
					}
					ch <- event
				case <-h.stop:
					return
				}
			}

			log.Warn("job event stream disconnected", "err", stream.Err())
			// keep trying to reconnect, unless we are told to stop
		retryLoop:
			for {
				select {
				case <-h.stop:
					return
				default:
				}

				if err := connect(); err == nil {
					break retryLoop
				}
				time.Sleep(100 * time.Millisecond)
			}
		}
	}()
	return jobs, nil
}
Ejemplo n.º 4
0
func (s *SchedulerSuite) TestDeployController(t *c.C) {
	if testCluster == nil {
		t.Skip("cannot determine test cluster size")
	}

	// get the current controller release
	client := s.controllerClient(t)
	app, err := client.GetApp("controller")
	t.Assert(err, c.IsNil)
	release, err := client.GetAppRelease(app.ID)
	t.Assert(err, c.IsNil)

	// create a controller deployment
	release.ID = ""
	t.Assert(client.CreateRelease(release), c.IsNil)
	deployment, err := client.CreateDeployment(app.ID, release.ID)
	t.Assert(err, c.IsNil)

	// use a function to create the event stream as a new stream will be needed
	// after deploying the controller
	var events chan *ct.DeploymentEvent
	var eventStream stream.Stream
	connectStream := func() {
		events = make(chan *ct.DeploymentEvent)
		err := attempt.Strategy{
			Total: 10 * time.Second,
			Delay: 500 * time.Millisecond,
		}.Run(func() (err error) {
			eventStream, err = client.StreamDeployment(deployment.ID, events)
			return
		})
		t.Assert(err, c.IsNil)
	}
	connectStream()
	defer eventStream.Close()

	// wait for the deploy to complete (this doesn't wait for specific events
	// due to the fact that when the deployer deploys itself, some events will
	// not get sent)
loop:
	for {
		select {
		case e, ok := <-events:
			if !ok {
				// reconnect the stream as it may of been closed
				// due to the controller being deployed
				debug(t, "reconnecting deployment event stream")
				connectStream()
				continue
			}
			debugf(t, "got deployment event: %s %s", e.JobType, e.JobState)
			switch e.Status {
			case "complete":
				break loop
			case "failed":
				t.Fatal("the deployment failed")
			}
		case <-time.After(60 * time.Second):
			t.Fatal("timed out waiting for the deploy to complete")
		}
	}

	// check the correct controller jobs are running
	hosts, err := s.clusterClient(t).ListHosts()
	t.Assert(err, c.IsNil)
	actual := make(map[string]map[string]int)
	for _, host := range hosts {
		for _, job := range host.Jobs {
			appID := job.Metadata["flynn-controller.app"]
			if appID != app.ID {
				continue
			}
			releaseID := job.Metadata["flynn-controller.release"]
			if _, ok := actual[releaseID]; !ok {
				actual[releaseID] = make(map[string]int)
			}
			typ := job.Metadata["flynn-controller.type"]
			actual[releaseID][typ]++
		}
	}
	expected := map[string]map[string]int{release.ID: {
		"web":       2,
		"deployer":  2,
		"scheduler": testCluster.Size(),
	}}
	t.Assert(actual, c.DeepEquals, expected)
}