Esempio n. 1
0
func runStop(args *docopt.Args, client *cluster.Client) error {
	success := true
	clients := make(map[string]cluster.Host)
	for _, id := range args.All["ID"].([]string) {
		hostID, jobID, err := cluster.ParseJobID(id)
		if err != nil {
			fmt.Printf("could not parse %s: %s", id, err)
			success = false
			continue
		}
		hostClient, ok := clients[hostID]
		if !ok {
			var err error
			hostClient, err = client.DialHost(hostID)
			if err != nil {
				fmt.Printf("could not connect to host %s: %s\n", hostID, err)
				success = false
				continue
			}
			clients[hostID] = hostClient
		}
		if err := hostClient.StopJob(jobID); err != nil {
			fmt.Printf("could not stop job %s: %s\n", jobID, err)
			success = false
			continue
		}
		fmt.Println(jobID, "stopped")
	}
	if !success {
		return errors.New("could not stop all jobs")
	}
	return nil
}
Esempio n. 2
0
func (r *JobRepo) Add(job *ct.Job) error {
	hostID, jobID, err := cluster.ParseJobID(job.ID)
	if err != nil {
		log.Printf("Unable to parse hostID from %q", job.ID)
		return ErrNotFound
	}
	meta := metaToHstore(job.Meta)
	// TODO: actually validate
	err = r.db.QueryRow("INSERT INTO job_cache (job_id, host_id, app_id, release_id, process_type, state, meta) VALUES ($1, $2, $3, $4, $5, $6, $7) RETURNING created_at, updated_at",
		jobID, hostID, job.AppID, job.ReleaseID, job.Type, job.State, meta).Scan(&job.CreatedAt, &job.UpdatedAt)
	if postgres.IsUniquenessError(err, "") {
		err = r.db.QueryRow("UPDATE job_cache SET state = $3, updated_at = now() WHERE job_id = $1 AND host_id = $2 RETURNING created_at, updated_at",
			jobID, hostID, job.State).Scan(&job.CreatedAt, &job.UpdatedAt)
		if e, ok := err.(*pq.Error); ok && e.Code.Name() == "check_violation" {
			return ct.ValidationError{Field: "state", Message: e.Error()}
		}
	}
	if err != nil {
		return err
	}

	// create a job event, ignoring possible duplications
	err = r.db.Exec("INSERT INTO job_events (job_id, host_id, app_id, state) VALUES ($1, $2, $3, $4)", jobID, hostID, job.AppID, job.State)
	if postgres.IsUniquenessError(err, "") {
		return nil
	}
	return err
}
Esempio n. 3
0
func runLog(args *docopt.Args, client *cluster.Client) error {
	hostID, jobID, err := cluster.ParseJobID(args.String["ID"])
	if err != nil {
		return err
	}
	return getLog(hostID, jobID, client, args.Bool["-f"] || args.Bool["--follow"], os.Stdout, os.Stderr)
}
Esempio n. 4
0
func (c *controllerAPI) connectHost(ctx context.Context) (utils.HostClient, string, error) {
	params, _ := ctxhelper.ParamsFromContext(ctx)
	hostID, jobID, err := cluster.ParseJobID(params.ByName("jobs_id"))
	if err != nil {
		log.Printf("Unable to parse hostID from %q", params.ByName("jobs_id"))
		return nil, jobID, err
	}

	host, err := c.clusterClient.Host(hostID)
	return host, jobID, err
}
Esempio n. 5
0
func (s *SchedulerSuite) TestControllerRestart(t *c.C) {
	// get the current controller details
	app, err := s.controllerClient(t).GetApp("controller")
	t.Assert(err, c.IsNil)
	release, err := s.controllerClient(t).GetAppRelease("controller")
	t.Assert(err, c.IsNil)
	formation, err := s.controllerClient(t).GetFormation(app.ID, release.ID)
	t.Assert(err, c.IsNil)
	list, err := s.controllerClient(t).JobList("controller")
	t.Assert(err, c.IsNil)
	var jobs []*ct.Job
	for _, job := range list {
		if job.Type == "web" && job.State == "up" {
			jobs = append(jobs, job)
		}
	}
	t.Assert(jobs, c.HasLen, 2)
	hostID, jobID, _ := cluster.ParseJobID(jobs[0].ID)
	t.Assert(hostID, c.Not(c.Equals), "")
	t.Assert(jobID, c.Not(c.Equals), "")
	debugf(t, "current controller app[%s] host[%s] job[%s]", app.ID, hostID, jobID)

	// start another controller and wait for it to come up
	watcher, err := s.controllerClient(t).WatchJobEvents("controller", release.ID)
	t.Assert(err, c.IsNil)
	defer watcher.Close()
	debug(t, "scaling the controller up")
	formation.Processes["web"]++
	t.Assert(s.controllerClient(t).PutFormation(formation), c.IsNil)
	err = watcher.WaitFor(ct.JobEvents{"web": {"up": 1}}, scaleTimeout, nil)
	t.Assert(err, c.IsNil)

	// kill the first controller and check the scheduler brings it back online
	cc := cluster.NewClientWithServices(s.discoverdClient(t).Service)
	hc, err := cc.Host(hostID)
	t.Assert(err, c.IsNil)
	debug(t, "stopping job ", jobID)
	t.Assert(hc.StopJob(jobID), c.IsNil)
	err = watcher.WaitFor(ct.JobEvents{"web": {"down": 1, "up": 1}}, scaleTimeout, nil)
	t.Assert(err, c.IsNil)

	// scale back down
	debug(t, "scaling the controller down")
	formation.Processes["web"]--
	t.Assert(s.controllerClient(t).PutFormation(formation), c.IsNil)
	err = watcher.WaitFor(ct.JobEvents{"web": {"down": 1}}, scaleTimeout, nil)
	t.Assert(err, c.IsNil)

	// unset the suite's client so other tests use a new client
	s.controller = nil
}
Esempio n. 6
0
func runInspect(args *docopt.Args, client *cluster.Client) error {
	hostID, jobID, err := cluster.ParseJobID(args.String["ID"])
	if err != nil {
		return err
	}
	hostClient, err := client.Host(hostID)
	if err != nil {
		return fmt.Errorf("could not connect to host %s: %s", hostID, err)
	}
	job, err := hostClient.GetJob(jobID)
	if err != nil {
		return fmt.Errorf("no such job")
	}

	printJobDesc(job, os.Stdout, !args.Bool["--omit-env"])
	return nil
}
Esempio n. 7
0
func (r *JobRepo) Add(job *ct.Job) error {
	hostID, jobID, err := cluster.ParseJobID(job.ID)
	if err != nil {
		log.Printf("Unable to parse hostID from %q", job.ID)
		return ErrNotFound
	}
	// TODO: actually validate
	err = r.db.QueryRow("INSERT INTO job_cache (job_id, host_id, app_id, release_id, process_type, state) VALUES ($1, $2, $3, $4, $5, $6) RETURNING created_at, updated_at",
		jobID, hostID, job.AppID, job.ReleaseID, job.Type, job.State).Scan(&job.CreatedAt, &job.UpdatedAt)
	if e, ok := err.(*pq.Error); ok && e.Code.Name() == "unique_violation" {
		err = r.db.QueryRow("UPDATE job_cache SET state = $3, updated_at = now() WHERE job_id = $1 AND host_id = $2 RETURNING created_at, updated_at",
			jobID, hostID, job.State).Scan(&job.CreatedAt, &job.UpdatedAt)
	}
	if err != nil {
		return err
	}
	return r.db.Exec("INSERT INTO job_events (job_id, host_id, app_id, state) VALUES ($1, $2, $3, $4)", jobID, hostID, job.AppID, job.State)
}
Esempio n. 8
0
func connectHostMiddleware(c martini.Context, params martini.Params, cl clusterClient, r ResponseHelper) {
	hostID, jobID, err := cluster.ParseJobID(params["jobs_id"])
	if err != nil {
		log.Printf("Unable to parse hostID from %q", params["jobs_id"])
		r.Error(ErrNotFound)
		return
	}
	params["jobs_id"] = jobID

	client, err := cl.DialHost(hostID)
	if err != nil {
		r.Error(err)
		return
	}
	c.MapTo(client, (*cluster.Host)(nil))

	c.Next()
	client.Close()
}
Esempio n. 9
0
func (c *context) syncJobStates() error {
	g := grohl.NewContext(grohl.Data{"fn": "syncJobStates"})
	g.Log(grohl.Data{"at": "appList"})
	apps, err := c.AppList()
	if err != nil {
		g.Log(grohl.Data{"at": "appList", "status": "error", "err": err})
		return err
	}
	for _, app := range apps {
		g.Log(grohl.Data{"at": "jobList", "app.id": app.ID})
		jobs, err := c.JobList(app.ID)
		if err != nil {
			g.Log(grohl.Data{"at": "jobList", "app.id": app.ID, "status": "error", "err": err})
			continue
		}
		for _, job := range jobs {
			gg := g.New(grohl.Data{"job.id": job.ID, "app.id": app.ID, "state": job.State})
			gg.Log(grohl.Data{"at": "checkState"})
			if job.State != "up" {
				continue
			}
			hostID, jobID, err := cluster.ParseJobID(job.ID)
			if err != nil {
				gg.Log(grohl.Data{"at": "parseJobID", "status": "error", "err": err})
				continue
			}
			if j := c.jobs.Get(hostID, jobID); j != nil {
				continue
			}
			job.State = "down"
			gg.Log(grohl.Data{"at": "putJob", "state": "down"})
			go c.PutJob(job)
		}
	}
	return nil
}
Esempio n. 10
0
func (s *SchedulerSuite) stopJob(t *c.C, id string) {
	debug(t, "stopping job ", id)
	hostID, jobID, _ := cluster.ParseJobID(id)
	hc := s.hostClient(t, hostID)
	t.Assert(hc.StopJob(jobID), c.IsNil)
}
Esempio n. 11
0
func (s *SchedulerSuite) TestControllerRestart(t *c.C) {
	// get the current controller details
	app, err := s.controllerClient(t).GetApp("controller")
	t.Assert(err, c.IsNil)
	release, err := s.controllerClient(t).GetAppRelease("controller")
	t.Assert(err, c.IsNil)
	list, err := s.controllerClient(t).JobList("controller")
	t.Assert(err, c.IsNil)
	var jobs []*ct.Job
	for _, job := range list {
		if job.Type == "web" {
			jobs = append(jobs, job)
		}
	}
	t.Assert(jobs, c.HasLen, 1)
	hostID, jobID, _ := cluster.ParseJobID(jobs[0].ID)
	t.Assert(hostID, c.Not(c.Equals), "")
	t.Assert(jobID, c.Not(c.Equals), "")
	debugf(t, "current controller app[%s] host[%s] job[%s]", app.ID, hostID, jobID)

	// start a second controller and wait for it to come up
	stream, err := s.controllerClient(t).StreamJobEvents("controller", 0)
	t.Assert(err, c.IsNil)
	debug(t, "scaling the controller up")
	t.Assert(s.controllerClient(t).PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Processes: map[string]int{"web": 2, "scheduler": 1},
	}), c.IsNil)
	lastID, _ := waitForJobEvents(t, stream.Events, jobEvents{"web": {"up": 1}})
	stream.Close()

	// get direct client for new controller
	var client *controller.Client
	attempts := attempt.Strategy{
		Total: 10 * time.Second,
		Delay: 500 * time.Millisecond,
	}
	t.Assert(attempts.Run(func() (err error) {
		set, err := s.discoverdClient(t).NewServiceSet("flynn-controller")
		if err != nil {
			return err
		}
		defer set.Close()
		addrs := set.Addrs()
		if len(addrs) != 2 {
			return fmt.Errorf("expected 2 controller processes, got %d", len(addrs))
		}
		addr := addrs[1]
		debug(t, "new controller address: ", addr)
		client, err = controller.NewClient("http://"+addr, s.clusterConf(t).Key)
		return
	}), c.IsNil)

	// kill the first controller and check the scheduler brings it back online
	stream, err = client.StreamJobEvents("controller", lastID)
	defer stream.Close()
	t.Assert(err, c.IsNil)
	cc, err := cluster.NewClientWithDial(nil, s.discoverdClient(t).NewServiceSet)
	t.Assert(err, c.IsNil)
	defer cc.Close()
	hc, err := cc.DialHost(hostID)
	t.Assert(err, c.IsNil)
	defer hc.Close()
	debug(t, "stopping job ", jobID)
	t.Assert(hc.StopJob(jobID), c.IsNil)
	waitForJobEvents(t, stream.Events, jobEvents{"web": {"down": 1, "up": 1}})

	// scale back down
	debug(t, "scaling the controller down")
	t.Assert(s.controllerClient(t).PutFormation(&ct.Formation{
		AppID:     app.ID,
		ReleaseID: release.ID,
		Processes: map[string]int{"web": 1, "scheduler": 1},
	}), c.IsNil)
	waitForJobEvents(t, stream.Events, jobEvents{"web": {"down": 1}})

	// unset the suite's client so other tests use a new client
	s.controller = nil
}
Esempio n. 12
0
func (h *Helper) stopJob(t *c.C, id string) {
	debugf(t, "stopping job %s", id)
	hostID, jobID, _ := cluster.ParseJobID(id)
	hc := h.hostClient(t, hostID)
	t.Assert(hc.StopJob(jobID), c.IsNil)
}