Beispiel #1
0
func startJob(s *State, hc *cluster.Host, job *host.Job) error {
	jobStatus := make(chan error)
	events := make(chan *host.Event)
	stream, err := hc.StreamEvents(job.ID, events)
	if err != nil {
		return err
	}
	go func() {
		defer stream.Close()
	loop:
		for {
			select {
			case e, ok := <-events:
				if !ok {
					break loop
				}
				switch e.Event {
				case "start", "stop":
					jobStatus <- nil
					return
				case "error":
					job, err := hc.GetJob(job.ID)
					if err != nil {
						jobStatus <- err
						return
					}
					if job.Error == nil {
						jobStatus <- fmt.Errorf("bootstrap: unknown error from host")
						return
					}
					jobStatus <- fmt.Errorf("bootstrap: host error while launching job: %q", *job.Error)
					return
				default:
				}
			case <-time.After(30 * time.Second):
				jobStatus <- errors.New("bootstrap: timed out waiting for job event")
				return
			}
		}
		jobStatus <- fmt.Errorf("bootstrap: host job stream disconnected unexpectedly: %q", stream.Err())
	}()

	if err := hc.AddJob(job); err != nil {
		return err
	}

	return <-jobStatus
}
Beispiel #2
0
func startJob(s *State, hc *cluster.Host, job *host.Job) (*Job, error) {
	data := &Job{HostID: hc.ID(), JobID: job.ID}

	jobStatus := make(chan error)
	events := make(chan *host.Event)
	stream, err := hc.StreamEvents(data.JobID, events)
	if err != nil {
		return nil, err
	}
	go func() {
		defer stream.Close()
		for e := range events {
			switch e.Event {
			case "start", "stop":
				jobStatus <- nil
				return
			case "error":
				job, err := hc.GetJob(data.JobID)
				if err != nil {
					jobStatus <- err
					return
				}
				if job.Error == nil {
					jobStatus <- fmt.Errorf("bootstrap: unknown error from host")
					return
				}
				jobStatus <- fmt.Errorf("bootstrap: host error while launching job: %q", *job.Error)
				return
			default:
			}
		}
		jobStatus <- fmt.Errorf("bootstrap: host job stream disconnected unexpectedly: %q", stream.Err())
	}()

	if err := hc.AddJob(job); err != nil {
		return nil, err
	}

	return data, <-jobStatus
}
Beispiel #3
0
func (c *context) watchHost(h *cluster.Host, ready chan struct{}) {
	if !c.hosts.Add(h.ID()) {
		if ready != nil {
			ready <- struct{}{}
		}
		return
	}
	defer c.hosts.Remove(h.ID())

	g := grohl.NewContext(grohl.Data{"fn": "watchHost", "host.id": h.ID()})

	c.hosts.Set(h.ID(), h)

	g.Log(grohl.Data{"at": "start"})

	ch := make(chan *host.Event)
	h.StreamEvents("all", ch)
	if ready != nil {
		ready <- struct{}{}
	}

	// Call PutJob in a goroutine so we don't block receiving job events whilst potentially
	// making multiple requests to the controller (e.g. if the controller is down).
	//
	// Use a channel (rather than spawning a goroutine per event) so that events are delivered in order.
	jobs := make(chan *ct.Job, 10)
	go func() {
		for job := range jobs {
			putJobAttempts.Run(func() error {
				if err := c.PutJob(job); err != nil {
					g.Log(grohl.Data{"at": "put_job_error", "job.id": job.ID, "state": job.State, "err": err})
					// ignore validation / not found errors
					if httphelper.IsValidationError(err) || err == controller.ErrNotFound {
						return nil
					}
					return err
				}
				g.Log(grohl.Data{"at": "put_job", "job.id": job.ID, "state": job.State})
				return nil
			})
		}
	}()

	for event := range ch {
		meta := event.Job.Job.Metadata
		appID := meta["flynn-controller.app"]
		releaseID := meta["flynn-controller.release"]
		jobType := meta["flynn-controller.type"]

		if appID == "" || releaseID == "" {
			continue
		}

		job := &ct.Job{
			ID:        event.JobID,
			AppID:     appID,
			ReleaseID: releaseID,
			Type:      jobType,
			State:     jobState(event),
			Meta:      jobMetaFromMetadata(meta),
		}
		g.Log(grohl.Data{"at": "event", "job.id": event.JobID, "event": event.Event})
		jobs <- job

		// get a read lock on the mutex to ensure we are not currently
		// syncing with the cluster
		c.mtx.RLock()
		j := c.jobs.Get(h.ID(), event.JobID)
		c.mtx.RUnlock()
		if j == nil {
			continue
		}
		j.startedAt = event.Job.StartedAt

		if event.Event != "error" && event.Event != "stop" {
			continue
		}
		g.Log(grohl.Data{"at": "remove", "job.id": event.JobID, "event": event.Event})

		c.jobs.Remove(h.ID(), event.JobID)
		go func(event *host.Event) {
			c.mtx.RLock()
			j.Formation.RestartJob(jobType, h.ID(), event.JobID)
			c.mtx.RUnlock()
		}(event)
	}
	// TODO: check error/reconnect
}