func runStop(args *docopt.Args, client *cluster.Client) error { success := true clients := make(map[string]cluster.Host) for _, id := range args.All["ID"].([]string) { hostID, jobID, err := cluster.ParseJobID(id) if err != nil { fmt.Printf("could not parse %s: %s", id, err) success = false continue } hostClient, ok := clients[hostID] if !ok { var err error hostClient, err = client.DialHost(hostID) if err != nil { fmt.Printf("could not connect to host %s: %s\n", hostID, err) success = false continue } clients[hostID] = hostClient } if err := hostClient.StopJob(jobID); err != nil { fmt.Printf("could not stop job %s: %s\n", jobID, err) success = false continue } fmt.Println(jobID, "stopped") } if !success { return errors.New("could not stop all jobs") } return nil }
func (r *JobRepo) Add(job *ct.Job) error { hostID, jobID, err := cluster.ParseJobID(job.ID) if err != nil { log.Printf("Unable to parse hostID from %q", job.ID) return ErrNotFound } meta := metaToHstore(job.Meta) // TODO: actually validate err = r.db.QueryRow("INSERT INTO job_cache (job_id, host_id, app_id, release_id, process_type, state, meta) VALUES ($1, $2, $3, $4, $5, $6, $7) RETURNING created_at, updated_at", jobID, hostID, job.AppID, job.ReleaseID, job.Type, job.State, meta).Scan(&job.CreatedAt, &job.UpdatedAt) if postgres.IsUniquenessError(err, "") { err = r.db.QueryRow("UPDATE job_cache SET state = $3, updated_at = now() WHERE job_id = $1 AND host_id = $2 RETURNING created_at, updated_at", jobID, hostID, job.State).Scan(&job.CreatedAt, &job.UpdatedAt) if e, ok := err.(*pq.Error); ok && e.Code.Name() == "check_violation" { return ct.ValidationError{Field: "state", Message: e.Error()} } } if err != nil { return err } // create a job event, ignoring possible duplications err = r.db.Exec("INSERT INTO job_events (job_id, host_id, app_id, state) VALUES ($1, $2, $3, $4)", jobID, hostID, job.AppID, job.State) if postgres.IsUniquenessError(err, "") { return nil } return err }
func runLog(args *docopt.Args, client *cluster.Client) error { hostID, jobID, err := cluster.ParseJobID(args.String["ID"]) if err != nil { return err } return getLog(hostID, jobID, client, args.Bool["-f"] || args.Bool["--follow"], os.Stdout, os.Stderr) }
func (c *controllerAPI) connectHost(ctx context.Context) (utils.HostClient, string, error) { params, _ := ctxhelper.ParamsFromContext(ctx) hostID, jobID, err := cluster.ParseJobID(params.ByName("jobs_id")) if err != nil { log.Printf("Unable to parse hostID from %q", params.ByName("jobs_id")) return nil, jobID, err } host, err := c.clusterClient.Host(hostID) return host, jobID, err }
func (s *SchedulerSuite) TestControllerRestart(t *c.C) { // get the current controller details app, err := s.controllerClient(t).GetApp("controller") t.Assert(err, c.IsNil) release, err := s.controllerClient(t).GetAppRelease("controller") t.Assert(err, c.IsNil) formation, err := s.controllerClient(t).GetFormation(app.ID, release.ID) t.Assert(err, c.IsNil) list, err := s.controllerClient(t).JobList("controller") t.Assert(err, c.IsNil) var jobs []*ct.Job for _, job := range list { if job.Type == "web" && job.State == "up" { jobs = append(jobs, job) } } t.Assert(jobs, c.HasLen, 2) hostID, jobID, _ := cluster.ParseJobID(jobs[0].ID) t.Assert(hostID, c.Not(c.Equals), "") t.Assert(jobID, c.Not(c.Equals), "") debugf(t, "current controller app[%s] host[%s] job[%s]", app.ID, hostID, jobID) // start another controller and wait for it to come up watcher, err := s.controllerClient(t).WatchJobEvents("controller", release.ID) t.Assert(err, c.IsNil) defer watcher.Close() debug(t, "scaling the controller up") formation.Processes["web"]++ t.Assert(s.controllerClient(t).PutFormation(formation), c.IsNil) err = watcher.WaitFor(ct.JobEvents{"web": {"up": 1}}, scaleTimeout, nil) t.Assert(err, c.IsNil) // kill the first controller and check the scheduler brings it back online cc := cluster.NewClientWithServices(s.discoverdClient(t).Service) hc, err := cc.Host(hostID) t.Assert(err, c.IsNil) debug(t, "stopping job ", jobID) t.Assert(hc.StopJob(jobID), c.IsNil) err = watcher.WaitFor(ct.JobEvents{"web": {"down": 1, "up": 1}}, scaleTimeout, nil) t.Assert(err, c.IsNil) // scale back down debug(t, "scaling the controller down") formation.Processes["web"]-- t.Assert(s.controllerClient(t).PutFormation(formation), c.IsNil) err = watcher.WaitFor(ct.JobEvents{"web": {"down": 1}}, scaleTimeout, nil) t.Assert(err, c.IsNil) // unset the suite's client so other tests use a new client s.controller = nil }
func runInspect(args *docopt.Args, client *cluster.Client) error { hostID, jobID, err := cluster.ParseJobID(args.String["ID"]) if err != nil { return err } hostClient, err := client.Host(hostID) if err != nil { return fmt.Errorf("could not connect to host %s: %s", hostID, err) } job, err := hostClient.GetJob(jobID) if err != nil { return fmt.Errorf("no such job") } printJobDesc(job, os.Stdout, !args.Bool["--omit-env"]) return nil }
func (r *JobRepo) Add(job *ct.Job) error { hostID, jobID, err := cluster.ParseJobID(job.ID) if err != nil { log.Printf("Unable to parse hostID from %q", job.ID) return ErrNotFound } // TODO: actually validate err = r.db.QueryRow("INSERT INTO job_cache (job_id, host_id, app_id, release_id, process_type, state) VALUES ($1, $2, $3, $4, $5, $6) RETURNING created_at, updated_at", jobID, hostID, job.AppID, job.ReleaseID, job.Type, job.State).Scan(&job.CreatedAt, &job.UpdatedAt) if e, ok := err.(*pq.Error); ok && e.Code.Name() == "unique_violation" { err = r.db.QueryRow("UPDATE job_cache SET state = $3, updated_at = now() WHERE job_id = $1 AND host_id = $2 RETURNING created_at, updated_at", jobID, hostID, job.State).Scan(&job.CreatedAt, &job.UpdatedAt) } if err != nil { return err } return r.db.Exec("INSERT INTO job_events (job_id, host_id, app_id, state) VALUES ($1, $2, $3, $4)", jobID, hostID, job.AppID, job.State) }
func connectHostMiddleware(c martini.Context, params martini.Params, cl clusterClient, r ResponseHelper) { hostID, jobID, err := cluster.ParseJobID(params["jobs_id"]) if err != nil { log.Printf("Unable to parse hostID from %q", params["jobs_id"]) r.Error(ErrNotFound) return } params["jobs_id"] = jobID client, err := cl.DialHost(hostID) if err != nil { r.Error(err) return } c.MapTo(client, (*cluster.Host)(nil)) c.Next() client.Close() }
func (c *context) syncJobStates() error { g := grohl.NewContext(grohl.Data{"fn": "syncJobStates"}) g.Log(grohl.Data{"at": "appList"}) apps, err := c.AppList() if err != nil { g.Log(grohl.Data{"at": "appList", "status": "error", "err": err}) return err } for _, app := range apps { g.Log(grohl.Data{"at": "jobList", "app.id": app.ID}) jobs, err := c.JobList(app.ID) if err != nil { g.Log(grohl.Data{"at": "jobList", "app.id": app.ID, "status": "error", "err": err}) continue } for _, job := range jobs { gg := g.New(grohl.Data{"job.id": job.ID, "app.id": app.ID, "state": job.State}) gg.Log(grohl.Data{"at": "checkState"}) if job.State != "up" { continue } hostID, jobID, err := cluster.ParseJobID(job.ID) if err != nil { gg.Log(grohl.Data{"at": "parseJobID", "status": "error", "err": err}) continue } if j := c.jobs.Get(hostID, jobID); j != nil { continue } job.State = "down" gg.Log(grohl.Data{"at": "putJob", "state": "down"}) go c.PutJob(job) } } return nil }
func (s *SchedulerSuite) stopJob(t *c.C, id string) { debug(t, "stopping job ", id) hostID, jobID, _ := cluster.ParseJobID(id) hc := s.hostClient(t, hostID) t.Assert(hc.StopJob(jobID), c.IsNil) }
func (s *SchedulerSuite) TestControllerRestart(t *c.C) { // get the current controller details app, err := s.controllerClient(t).GetApp("controller") t.Assert(err, c.IsNil) release, err := s.controllerClient(t).GetAppRelease("controller") t.Assert(err, c.IsNil) list, err := s.controllerClient(t).JobList("controller") t.Assert(err, c.IsNil) var jobs []*ct.Job for _, job := range list { if job.Type == "web" { jobs = append(jobs, job) } } t.Assert(jobs, c.HasLen, 1) hostID, jobID, _ := cluster.ParseJobID(jobs[0].ID) t.Assert(hostID, c.Not(c.Equals), "") t.Assert(jobID, c.Not(c.Equals), "") debugf(t, "current controller app[%s] host[%s] job[%s]", app.ID, hostID, jobID) // start a second controller and wait for it to come up stream, err := s.controllerClient(t).StreamJobEvents("controller", 0) t.Assert(err, c.IsNil) debug(t, "scaling the controller up") t.Assert(s.controllerClient(t).PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: map[string]int{"web": 2, "scheduler": 1}, }), c.IsNil) lastID, _ := waitForJobEvents(t, stream.Events, jobEvents{"web": {"up": 1}}) stream.Close() // get direct client for new controller var client *controller.Client attempts := attempt.Strategy{ Total: 10 * time.Second, Delay: 500 * time.Millisecond, } t.Assert(attempts.Run(func() (err error) { set, err := s.discoverdClient(t).NewServiceSet("flynn-controller") if err != nil { return err } defer set.Close() addrs := set.Addrs() if len(addrs) != 2 { return fmt.Errorf("expected 2 controller processes, got %d", len(addrs)) } addr := addrs[1] debug(t, "new controller address: ", addr) client, err = controller.NewClient("http://"+addr, s.clusterConf(t).Key) return }), c.IsNil) // kill the first controller and check the scheduler brings it back online stream, err = client.StreamJobEvents("controller", lastID) defer stream.Close() t.Assert(err, c.IsNil) cc, err := cluster.NewClientWithDial(nil, s.discoverdClient(t).NewServiceSet) t.Assert(err, c.IsNil) defer cc.Close() hc, err := cc.DialHost(hostID) t.Assert(err, c.IsNil) defer hc.Close() debug(t, "stopping job ", jobID) t.Assert(hc.StopJob(jobID), c.IsNil) waitForJobEvents(t, stream.Events, jobEvents{"web": {"down": 1, "up": 1}}) // scale back down debug(t, "scaling the controller down") t.Assert(s.controllerClient(t).PutFormation(&ct.Formation{ AppID: app.ID, ReleaseID: release.ID, Processes: map[string]int{"web": 1, "scheduler": 1}, }), c.IsNil) waitForJobEvents(t, stream.Events, jobEvents{"web": {"down": 1}}) // unset the suite's client so other tests use a new client s.controller = nil }
func (h *Helper) stopJob(t *c.C, id string) { debugf(t, "stopping job %s", id) hostID, jobID, _ := cluster.ParseJobID(id) hc := h.hostClient(t, hostID) t.Assert(hc.StopJob(jobID), c.IsNil) }