func (c *ValidateCommand) Run(args []string) int { flags := c.Meta.FlagSet("validate", FlagSetNone) flags.Usage = func() { c.Ui.Output(c.Help()) } if err := flags.Parse(args); err != nil { return 1 } // Check that we got exactly one node args = flags.Args() if len(args) != 1 { c.Ui.Error(c.Help()) return 1 } file := args[0] // Parse the job file job, err := jobspec.ParseFile(file) if err != nil { c.Ui.Error(fmt.Sprintf("Error parsing job file %s: %s", file, err)) return 1 } // Initialize any fields that need to be. job.InitFields() // Check that the job is valid if err := job.Validate(); err != nil { c.Ui.Error(fmt.Sprintf("Error validating job: %s", err)) return 1 } // Done! c.Ui.Output("Job validation successful") return 0 }
func (c *RunCommand) Run(args []string) int { var detach, verbose, output bool var checkIndexStr string flags := c.Meta.FlagSet("run", FlagSetClient) flags.Usage = func() { c.Ui.Output(c.Help()) } flags.BoolVar(&detach, "detach", false, "") flags.BoolVar(&verbose, "verbose", false, "") flags.BoolVar(&output, "output", false, "") flags.StringVar(&checkIndexStr, "check-index", "", "") if err := flags.Parse(args); err != nil { return 1 } // Truncate the id unless full length is requested length := shortId if verbose { length = fullId } // Check that we got exactly one node args = flags.Args() if len(args) != 1 { c.Ui.Error(c.Help()) return 1 } file := args[0] // Parse the job file job, err := jobspec.ParseFile(file) if err != nil { c.Ui.Error(fmt.Sprintf("Error parsing job file %s: %s", file, err)) return 1 } // Initialize any fields that need to be. job.InitFields() // Check that the job is valid if err := job.Validate(); err != nil { c.Ui.Error(fmt.Sprintf("Error validating job: %s", err)) return 1 } // Check if the job is periodic. periodic := job.IsPeriodic() // Convert it to something we can use apiJob, err := convertStructJob(job) if err != nil { c.Ui.Error(fmt.Sprintf("Error converting job: %s", err)) return 1 } if output { req := api.RegisterJobRequest{Job: apiJob} buf, err := json.MarshalIndent(req, "", " ") if err != nil { c.Ui.Error(fmt.Sprintf("Error converting job: %s", err)) return 1 } c.Ui.Output(string(buf)) return 0 } // Get the HTTP client client, err := c.Meta.Client() if err != nil { c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err)) return 1 } // Force the region to be that of the job. if r := job.Region; r != "" { client.SetRegion(r) } // Parse the check-index checkIndex, enforce, err := parseCheckIndex(checkIndexStr) if err != nil { c.Ui.Error(fmt.Sprintf("Error parsing check-index value %q: %v", checkIndexStr, err)) return 1 } // Submit the job var evalID string if enforce { evalID, _, err = client.Jobs().EnforceRegister(apiJob, checkIndex, nil) } else { evalID, _, err = client.Jobs().Register(apiJob, nil) } if err != nil { if strings.Contains(err.Error(), api.RegisterEnforceIndexErrPrefix) { // Format the error specially if the error is due to index // enforcement matches := enforceIndexRegex.FindStringSubmatch(err.Error()) if len(matches) == 2 { c.Ui.Error(matches[1]) // The matched group c.Ui.Error("Job not updated") return 1 } } c.Ui.Error(fmt.Sprintf("Error submitting job: %s", err)) return 1 } // Check if we should enter monitor mode if detach || periodic { c.Ui.Output("Job registration successful") if periodic { now := time.Now().UTC() next := job.Periodic.Next(now) c.Ui.Output(fmt.Sprintf("Approximate next launch time: %s (%s from now)", formatTime(next), formatTimeDifference(now, next, time.Second))) } else { c.Ui.Output("Evaluation ID: " + evalID) } return 0 } // Detach was not specified, so start monitoring mon := newMonitor(c.Ui, client, length) return mon.monitor(evalID, false) }
func (c *PlanCommand) Run(args []string) int { var diff, verbose bool flags := c.Meta.FlagSet("plan", FlagSetClient) flags.Usage = func() { c.Ui.Output(c.Help()) } flags.BoolVar(&diff, "diff", true, "") flags.BoolVar(&verbose, "verbose", false, "") if err := flags.Parse(args); err != nil { return 1 } // Check that we got exactly one job args = flags.Args() if len(args) != 1 { c.Ui.Error(c.Help()) return 1 } file := args[0] // Parse the job file job, err := jobspec.ParseFile(file) if err != nil { c.Ui.Error(fmt.Sprintf("Error parsing job file %s: %s", file, err)) return 1 } // Initialize any fields that need to be. job.InitFields() // Check that the job is valid if err := job.Validate(); err != nil { c.Ui.Error(fmt.Sprintf("Error validating job: %s", err)) return 1 } // Convert it to something we can use apiJob, err := convertStructJob(job) if err != nil { c.Ui.Error(fmt.Sprintf("Error converting job: %s", err)) return 1 } // Get the HTTP client client, err := c.Meta.Client() if err != nil { c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err)) return 1 } // Force the region to be that of the job. if r := job.Region; r != "" { client.SetRegion(r) } // Submit the job resp, _, err := client.Jobs().Plan(apiJob, diff, nil) if err != nil { c.Ui.Error(fmt.Sprintf("Error during plan: %s", err)) return 1 } // Print the diff if not disabled if diff { c.Ui.Output(fmt.Sprintf("%s\n", c.Colorize().Color(strings.TrimSpace(formatJobDiff(resp.Diff, verbose))))) } // Print the scheduler dry-run output c.Ui.Output(c.Colorize().Color("[bold]Scheduler dry-run:[reset]")) c.Ui.Output(c.Colorize().Color(formatDryRun(resp.FailedTGAllocs, resp.CreatedEvals))) c.Ui.Output("") // Print the job index info c.Ui.Output(c.Colorize().Color(formatJobModifyIndex(resp.JobModifyIndex, file))) return 0 }
func (c *RunCommand) Run(args []string) int { var detach bool flags := c.Meta.FlagSet("run", FlagSetClient) flags.Usage = func() { c.Ui.Output(c.Help()) } flags.BoolVar(&detach, "detach", false, "") if err := flags.Parse(args); err != nil { return 1 } // Check that we got exactly one node args = flags.Args() if len(args) != 1 { c.Ui.Error(c.Help()) return 1 } file := args[0] // Parse the job file job, err := jobspec.ParseFile(file) if err != nil { c.Ui.Error(fmt.Sprintf("Error parsing job file %s: %s", file, err)) return 1 } // Check that the job is valid if err := job.Validate(); err != nil { c.Ui.Error(fmt.Sprintf("Error validating job: %s", err)) return 1 } // Convert it to something we can use apiJob, err := convertJob(job) if err != nil { c.Ui.Error(fmt.Sprintf("Error converting job: %s", err)) return 1 } // Get the HTTP client client, err := c.Meta.Client() if err != nil { c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err)) return 1 } // Submit the job evalID, _, err := client.Jobs().Register(apiJob, nil) if err != nil { c.Ui.Error(fmt.Sprintf("Error submitting job: %s", err)) return 1 } // Check if we should enter monitor mode if detach { c.Ui.Output("Job registration successful") c.Ui.Output("Evaluation ID: " + evalID) return 0 } // Detach was not specified, so start monitoring mon := newMonitor(c.Ui, client) return mon.monitor(evalID) }
func (c *RunCommand) Run(args []string) int { var detach, verbose, output bool flags := c.Meta.FlagSet("run", FlagSetClient) flags.Usage = func() { c.Ui.Output(c.Help()) } flags.BoolVar(&detach, "detach", false, "") flags.BoolVar(&verbose, "verbose", false, "") flags.BoolVar(&output, "output", false, "") if err := flags.Parse(args); err != nil { return 1 } // Truncate the id unless full length is requested length := shortId if verbose { length = fullId } // Check that we got exactly one node args = flags.Args() if len(args) != 1 { c.Ui.Error(c.Help()) return 1 } file := args[0] // Parse the job file job, err := jobspec.ParseFile(file) if err != nil { c.Ui.Error(fmt.Sprintf("Error parsing job file %s: %s", file, err)) return 1 } // Initialize any fields that need to be. job.InitFields() // Check that the job is valid if err := job.Validate(); err != nil { c.Ui.Error(fmt.Sprintf("Error validating job: %s", err)) return 1 } // Check if the job is periodic. periodic := job.IsPeriodic() // Convert it to something we can use apiJob, err := convertStructJob(job) if err != nil { c.Ui.Error(fmt.Sprintf("Error converting job: %s", err)) return 1 } if output { req := api.RegisterJobRequest{apiJob} buf, err := json.MarshalIndent(req, "", " ") if err != nil { c.Ui.Error(fmt.Sprintf("Error converting job: %s", err)) return 1 } c.Ui.Output(string(buf)) return 0 } // Get the HTTP client client, err := c.Meta.Client() if err != nil { c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err)) return 1 } // Submit the job evalID, _, err := client.Jobs().Register(apiJob, nil) if err != nil { c.Ui.Error(fmt.Sprintf("Error submitting job: %s", err)) return 1 } // Check if we should enter monitor mode if detach || periodic { c.Ui.Output("Job registration successful") if periodic { c.Ui.Output(fmt.Sprintf("Approximate next launch time: %v", job.Periodic.Next(time.Now().UTC()))) } else { c.Ui.Output("Evaluation ID: " + evalID) } return 0 } // Detach was not specified, so start monitoring mon := newMonitor(c.Ui, client, length) return mon.monitor(evalID, false) }
func handleRun() int { // Parse the job file job, err := jobspec.ParseFile(jobFile) if err != nil { log.Fatalf("[ERR] nomad: failed parsing job file: %v", err) } // Convert to an API struct for submission apiJob, err := convertStructJob(job) if err != nil { log.Fatalf("[ERR] nomad: failed converting job: %v", err) } jobID := apiJob.ID // Get the API client client, err := api.NewClient(api.DefaultConfig()) if err != nil { log.Fatalf("[ERR] nomad: failed creating nomad client: %v", err) } jobs := client.Jobs() jobSubmitters := 64 if numJobs < jobSubmitters { jobSubmitters = numJobs } log.Printf("[DEBUG] nomad: using %d parallel job submitters", jobSubmitters) // Submit the job the requested number of times errCh := make(chan error, numJobs) stopCh := make(chan struct{}) jobsCh := make(chan *api.Job, jobSubmitters) defer close(stopCh) for i := 0; i < jobSubmitters; i++ { go submitJobs(jobs, jobsCh, stopCh, errCh) } log.Printf("[DEBUG] nomad: submitting %d jobs", numJobs) submitting := make(map[string]*api.Job, numJobs) for i := 0; i < numJobs; i++ { copy, err := copystructure.Copy(apiJob) if err != nil { log.Fatalf("[ERR] nomad: failed to copy api job: %v", err) } // Increment the job ID jobCopy := copy.(*api.Job) jobCopy.ID = fmt.Sprintf("%s-%d", jobID, i) submitting[jobCopy.ID] = jobCopy jobsCh <- jobCopy } // Collect errors if any for i := 0; i < numJobs; i++ { select { case err := <-errCh: if err != nil { log.Fatalf("[ERR] nomad: failed submitting job: %v", err) } case <-stopCh: return 0 } } // Get the jobs were submitted. submitted, _, err := jobs.List(nil) if err != nil { log.Fatalf("[ERR] nomad: failed listing jobs: %v", err) } // See if anything didn't get registered for _, job := range submitted { delete(submitting, job.ID) } // Resubmitting anything missed for id, missed := range submitting { log.Printf("[DEBUG] nomad: failed submitting job %q; retrying", id) _, _, err := jobs.Register(missed, nil) if err != nil { log.Printf("[ERR] nomad: failed submitting job: %v", err) } } return 0 }
func handleStatus() int { // Parse the job file to get the total expected allocs job, err := jobspec.ParseFile(jobFile) if err != nil { log.Fatalf("[ERR] nomad: failed parsing job file: %v", err) } var totalAllocs int for _, group := range job.TaskGroups { totalAllocs += (group.Count * len(group.Tasks)) } totalAllocs *= numJobs minEvals := numJobs log.Printf("[DEBUG] nomad: expecting %d allocs (%d evals minimum)", totalAllocs, minEvals) // Determine the set of jobs we should track. jobs := make(map[string]struct{}) for i := 0; i < numJobs; i++ { // Increment the job ID jobs[fmt.Sprintf("%s-%d", job.ID, i)] = struct{}{} } // Get the API client client, err := api.NewClient(api.DefaultConfig()) if err != nil { log.Fatalf("[ERR] nomad: failed creating nomad client: %v", err) } evalEndpoint := client.Evaluations() // Set up the args args := &api.QueryOptions{ AllowStale: true, } // Wait for all the evals to be complete. cutoff := time.Now().Add(maxWait) evals := make(map[string]*api.Evaluation, minEvals) failedEvals := make(map[string]struct{}) blockedEvals := make(map[string]int) EVAL_POLL: for { waitTime, exceeded := getSleepTime(cutoff) if !exceeded { log.Printf("[DEBUG] nomad: next eval poll in %s", waitTime) time.Sleep(waitTime) } // Start the query resp, _, err := evalEndpoint.List(args) if err != nil { // Only log and continue to skip minor errors log.Printf("[ERR] nomad: failed querying evals: %v", err) continue } // Filter out evaluations that aren't for the jobs we are tracking. var filter []*api.Evaluation for _, eval := range resp { if _, ok := jobs[eval.JobID]; ok { filter = append(filter, eval) } } // Wait til all evals have gone through the scheduler. if n := len(filter); n < minEvals { log.Printf("[DEBUG] nomad: expect %d evals, have %d, polling again", minEvals, n) continue } // Ensure that all the evals are terminal, otherwise new allocations // could be made. needPoll := false for _, eval := range filter { switch eval.Status { case "failed": failedEvals[eval.ID] = struct{}{} case "complete": evals[eval.ID] = eval case "canceled": // Do nothing since it was a redundant eval. case "blocked": blockedEvals[eval.ID]++ tries := blockedEvals[eval.ID] if tries < blockedEvalTries { needPoll = true } else if tries == blockedEvalTries { log.Printf("[DEBUG] nomad: abandoning blocked eval %q", eval.ID) } case "pending": needPoll = true } } if needPoll && !exceeded { continue EVAL_POLL } break } // We now have all the evals, gather the allocations and placement times. // scheduleTime is a map of alloc ID to map of desired status and time. scheduleTimes := make(map[string]map[string]int64, totalAllocs) startTimes := make(map[string]int64, totalAllocs) // When a task was started receivedTimes := make(map[string]int64, totalAllocs) // When a task was received by the client failedAllocs := make(map[string]int64) // Time an alloc failed failedReason := make(map[string]string) // Reason an alloc failed pendingAllocs := make(map[string]int) // Counts how many time the alloc was in pending state first := true ALLOC_POLL: for { waitTime, exceeded := getSleepTime(cutoff) if !exceeded && !first { log.Printf("[DEBUG] nomad: next eval poll in %s", waitTime) time.Sleep(waitTime) } first = false needPoll := false for evalID := range evals { // Start the query resp, _, err := evalEndpoint.Allocations(evalID, args) if err != nil { // Only log and continue to skip minor errors log.Printf("[ERR] nomad: failed querying allocations: %v", err) continue } for _, alloc := range resp { // Capture the schedule time. allocTimes, ok := scheduleTimes[alloc.ID] if !ok { allocTimes = make(map[string]int64, 3) scheduleTimes[alloc.ID] = allocTimes } allocTimes[alloc.DesiredStatus] = alloc.CreateTime // Ensure that they have started or have failed. switch alloc.ClientStatus { case "failed": failedAllocs[alloc.ID] = alloc.CreateTime var failures []string for _, state := range alloc.TaskStates { if state.State == "failed" { failures = append(failures, state.Events[0].DriverError) } } failedReason[alloc.ID] = strings.Join(failures, ",") continue case "pending": pendingAllocs[alloc.ID]++ tries := pendingAllocs[alloc.ID] if tries < pendingAllocTries { needPoll = true } else if tries == pendingAllocTries { log.Printf("[DEBUG] nomad: abandoning alloc %q", alloc.ID) } continue } // Detect the start time. for _, state := range alloc.TaskStates { if len(state.Events) == 0 { needPoll = true } for _, event := range state.Events { time := event.Time switch event.Type { case "Started": startTimes[alloc.ID] = time case "Received": receivedTimes[alloc.ID] = time } } } } } if needPoll && !exceeded { continue ALLOC_POLL } break } // Print the failure reasons for client allocs. for id, reason := range failedReason { log.Printf("[DEBUG] nomad: alloc id %q failed on client: %v", id, reason) } // Print the results. if l := len(failedEvals); l != 0 { fmt.Fprintf(os.Stdout, "failed_evals|%f\n", float64(l)) } for time, count := range accumTimes(failedAllocs) { fmt.Fprintf(os.Stdout, "failed_allocs|%f|%d\n", float64(count), time) } for time, count := range accumTimes(startTimes) { fmt.Fprintf(os.Stdout, "running|%f|%d\n", float64(count), time) } for time, count := range accumTimes(receivedTimes) { fmt.Fprintf(os.Stdout, "received|%f|%d\n", float64(count), time) } for time, count := range accumTimesOn("run", scheduleTimes) { fmt.Fprintf(os.Stdout, "placed_run|%f|%d\n", float64(count), time) } for time, count := range accumTimesOn("failed", scheduleTimes) { fmt.Fprintf(os.Stdout, "placed_failed|%f|%d\n", float64(count), time) } for time, count := range accumTimesOn("stop", scheduleTimes) { fmt.Fprintf(os.Stdout, "placed_stop|%f|%d\n", float64(count), time) } // Aggregate eval triggerbys. triggers := make(map[string]int, len(evals)) for _, eval := range evals { triggers[eval.TriggeredBy]++ } for trigger, count := range triggers { fmt.Fprintf(os.Stdout, "trigger:%s|%f\n", trigger, float64(count)) } // Print if the scheduler changed scheduling decisions flips := make(map[string]map[string]int64) // alloc id -> map[flipType]time flipTypes := make(map[string]struct{}) for id, decisions := range scheduleTimes { if len(decisions) < 2 { continue } // Have decision -> time // 1) time -> decision // 2) sort times // 3) print transitions flips[id] = make(map[string]int64) inverted := make(map[int64]string, len(decisions)) times := make([]int, 0, len(decisions)) for k, v := range decisions { inverted[v] = k times = append(times, int(v)) } sort.Ints(times) for i := 1; i < len(times); i++ { from := decisions[inverted[int64(times[i-1])]] to := decisions[inverted[int64(times[i])]] flipType := fmt.Sprintf("%s-to-%s", from, to) flips[id][flipType] = int64(times[i]) flipTypes[flipType] = struct{}{} } } for flipType, _ := range flips { for time, count := range accumTimesOn(flipType, flips) { fmt.Fprintf(os.Stdout, "%v|%f|%d\n", flipType, float64(count), time) } } return 0 }