Example #1
0
func (c *ValidateCommand) Run(args []string) int {
	flags := c.Meta.FlagSet("validate", FlagSetNone)
	flags.Usage = func() { c.Ui.Output(c.Help()) }
	if err := flags.Parse(args); err != nil {
		return 1
	}

	// Check that we got exactly one node
	args = flags.Args()
	if len(args) != 1 {
		c.Ui.Error(c.Help())
		return 1
	}
	file := args[0]

	// Parse the job file
	job, err := jobspec.ParseFile(file)
	if err != nil {
		c.Ui.Error(fmt.Sprintf("Error parsing job file %s: %s", file, err))
		return 1
	}

	// Initialize any fields that need to be.
	job.InitFields()

	// Check that the job is valid
	if err := job.Validate(); err != nil {
		c.Ui.Error(fmt.Sprintf("Error validating job: %s", err))
		return 1
	}

	// Done!
	c.Ui.Output("Job validation successful")
	return 0
}
Example #2
0
func (c *RunCommand) Run(args []string) int {
	var detach, verbose, output bool
	var checkIndexStr string

	flags := c.Meta.FlagSet("run", FlagSetClient)
	flags.Usage = func() { c.Ui.Output(c.Help()) }
	flags.BoolVar(&detach, "detach", false, "")
	flags.BoolVar(&verbose, "verbose", false, "")
	flags.BoolVar(&output, "output", false, "")
	flags.StringVar(&checkIndexStr, "check-index", "", "")

	if err := flags.Parse(args); err != nil {
		return 1
	}

	// Truncate the id unless full length is requested
	length := shortId
	if verbose {
		length = fullId
	}

	// Check that we got exactly one node
	args = flags.Args()
	if len(args) != 1 {
		c.Ui.Error(c.Help())
		return 1
	}
	file := args[0]

	// Parse the job file
	job, err := jobspec.ParseFile(file)
	if err != nil {
		c.Ui.Error(fmt.Sprintf("Error parsing job file %s: %s", file, err))
		return 1
	}

	// Initialize any fields that need to be.
	job.InitFields()

	// Check that the job is valid
	if err := job.Validate(); err != nil {
		c.Ui.Error(fmt.Sprintf("Error validating job: %s", err))
		return 1
	}

	// Check if the job is periodic.
	periodic := job.IsPeriodic()

	// Convert it to something we can use
	apiJob, err := convertStructJob(job)
	if err != nil {
		c.Ui.Error(fmt.Sprintf("Error converting job: %s", err))
		return 1
	}

	if output {
		req := api.RegisterJobRequest{Job: apiJob}
		buf, err := json.MarshalIndent(req, "", "    ")
		if err != nil {
			c.Ui.Error(fmt.Sprintf("Error converting job: %s", err))
			return 1
		}

		c.Ui.Output(string(buf))
		return 0
	}

	// Get the HTTP client
	client, err := c.Meta.Client()
	if err != nil {
		c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
		return 1
	}

	// Force the region to be that of the job.
	if r := job.Region; r != "" {
		client.SetRegion(r)
	}

	// Parse the check-index
	checkIndex, enforce, err := parseCheckIndex(checkIndexStr)
	if err != nil {
		c.Ui.Error(fmt.Sprintf("Error parsing check-index value %q: %v", checkIndexStr, err))
		return 1
	}

	// Submit the job
	var evalID string
	if enforce {
		evalID, _, err = client.Jobs().EnforceRegister(apiJob, checkIndex, nil)
	} else {
		evalID, _, err = client.Jobs().Register(apiJob, nil)
	}
	if err != nil {
		if strings.Contains(err.Error(), api.RegisterEnforceIndexErrPrefix) {
			// Format the error specially if the error is due to index
			// enforcement
			matches := enforceIndexRegex.FindStringSubmatch(err.Error())
			if len(matches) == 2 {
				c.Ui.Error(matches[1]) // The matched group
				c.Ui.Error("Job not updated")
				return 1
			}
		}

		c.Ui.Error(fmt.Sprintf("Error submitting job: %s", err))
		return 1
	}

	// Check if we should enter monitor mode
	if detach || periodic {
		c.Ui.Output("Job registration successful")
		if periodic {
			now := time.Now().UTC()
			next := job.Periodic.Next(now)
			c.Ui.Output(fmt.Sprintf("Approximate next launch time: %s (%s from now)",
				formatTime(next), formatTimeDifference(now, next, time.Second)))
		} else {
			c.Ui.Output("Evaluation ID: " + evalID)
		}

		return 0
	}

	// Detach was not specified, so start monitoring
	mon := newMonitor(c.Ui, client, length)
	return mon.monitor(evalID, false)

}
Example #3
0
func (c *PlanCommand) Run(args []string) int {
	var diff, verbose bool

	flags := c.Meta.FlagSet("plan", FlagSetClient)
	flags.Usage = func() { c.Ui.Output(c.Help()) }
	flags.BoolVar(&diff, "diff", true, "")
	flags.BoolVar(&verbose, "verbose", false, "")

	if err := flags.Parse(args); err != nil {
		return 1
	}

	// Check that we got exactly one job
	args = flags.Args()
	if len(args) != 1 {
		c.Ui.Error(c.Help())
		return 1
	}
	file := args[0]

	// Parse the job file
	job, err := jobspec.ParseFile(file)
	if err != nil {
		c.Ui.Error(fmt.Sprintf("Error parsing job file %s: %s", file, err))
		return 1
	}

	// Initialize any fields that need to be.
	job.InitFields()

	// Check that the job is valid
	if err := job.Validate(); err != nil {
		c.Ui.Error(fmt.Sprintf("Error validating job: %s", err))
		return 1
	}

	// Convert it to something we can use
	apiJob, err := convertStructJob(job)
	if err != nil {
		c.Ui.Error(fmt.Sprintf("Error converting job: %s", err))
		return 1
	}

	// Get the HTTP client
	client, err := c.Meta.Client()
	if err != nil {
		c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
		return 1
	}

	// Force the region to be that of the job.
	if r := job.Region; r != "" {
		client.SetRegion(r)
	}

	// Submit the job
	resp, _, err := client.Jobs().Plan(apiJob, diff, nil)
	if err != nil {
		c.Ui.Error(fmt.Sprintf("Error during plan: %s", err))
		return 1
	}

	// Print the diff if not disabled
	if diff {
		c.Ui.Output(fmt.Sprintf("%s\n",
			c.Colorize().Color(strings.TrimSpace(formatJobDiff(resp.Diff, verbose)))))
	}

	// Print the scheduler dry-run output
	c.Ui.Output(c.Colorize().Color("[bold]Scheduler dry-run:[reset]"))
	c.Ui.Output(c.Colorize().Color(formatDryRun(resp.FailedTGAllocs, resp.CreatedEvals)))
	c.Ui.Output("")

	// Print the job index info
	c.Ui.Output(c.Colorize().Color(formatJobModifyIndex(resp.JobModifyIndex, file)))
	return 0
}
Example #4
0
func (c *RunCommand) Run(args []string) int {
	var detach bool

	flags := c.Meta.FlagSet("run", FlagSetClient)
	flags.Usage = func() { c.Ui.Output(c.Help()) }
	flags.BoolVar(&detach, "detach", false, "")

	if err := flags.Parse(args); err != nil {
		return 1
	}

	// Check that we got exactly one node
	args = flags.Args()
	if len(args) != 1 {
		c.Ui.Error(c.Help())
		return 1
	}
	file := args[0]

	// Parse the job file
	job, err := jobspec.ParseFile(file)
	if err != nil {
		c.Ui.Error(fmt.Sprintf("Error parsing job file %s: %s", file, err))
		return 1
	}

	// Check that the job is valid
	if err := job.Validate(); err != nil {
		c.Ui.Error(fmt.Sprintf("Error validating job: %s", err))
		return 1
	}

	// Convert it to something we can use
	apiJob, err := convertJob(job)
	if err != nil {
		c.Ui.Error(fmt.Sprintf("Error converting job: %s", err))
		return 1
	}

	// Get the HTTP client
	client, err := c.Meta.Client()
	if err != nil {
		c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
		return 1
	}

	// Submit the job
	evalID, _, err := client.Jobs().Register(apiJob, nil)
	if err != nil {
		c.Ui.Error(fmt.Sprintf("Error submitting job: %s", err))
		return 1
	}

	// Check if we should enter monitor mode
	if detach {
		c.Ui.Output("Job registration successful")
		c.Ui.Output("Evaluation ID: " + evalID)
		return 0
	}

	// Detach was not specified, so start monitoring
	mon := newMonitor(c.Ui, client)
	return mon.monitor(evalID)

}
Example #5
0
func (c *RunCommand) Run(args []string) int {
	var detach, verbose, output bool

	flags := c.Meta.FlagSet("run", FlagSetClient)
	flags.Usage = func() { c.Ui.Output(c.Help()) }
	flags.BoolVar(&detach, "detach", false, "")
	flags.BoolVar(&verbose, "verbose", false, "")
	flags.BoolVar(&output, "output", false, "")

	if err := flags.Parse(args); err != nil {
		return 1
	}

	// Truncate the id unless full length is requested
	length := shortId
	if verbose {
		length = fullId
	}

	// Check that we got exactly one node
	args = flags.Args()
	if len(args) != 1 {
		c.Ui.Error(c.Help())
		return 1
	}
	file := args[0]

	// Parse the job file
	job, err := jobspec.ParseFile(file)
	if err != nil {
		c.Ui.Error(fmt.Sprintf("Error parsing job file %s: %s", file, err))
		return 1
	}

	// Initialize any fields that need to be.
	job.InitFields()

	// Check that the job is valid
	if err := job.Validate(); err != nil {
		c.Ui.Error(fmt.Sprintf("Error validating job: %s", err))
		return 1
	}

	// Check if the job is periodic.
	periodic := job.IsPeriodic()

	// Convert it to something we can use
	apiJob, err := convertStructJob(job)
	if err != nil {
		c.Ui.Error(fmt.Sprintf("Error converting job: %s", err))
		return 1
	}

	if output {
		req := api.RegisterJobRequest{apiJob}
		buf, err := json.MarshalIndent(req, "", "    ")
		if err != nil {
			c.Ui.Error(fmt.Sprintf("Error converting job: %s", err))
			return 1
		}

		c.Ui.Output(string(buf))
		return 0
	}

	// Get the HTTP client
	client, err := c.Meta.Client()
	if err != nil {
		c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
		return 1
	}

	// Submit the job
	evalID, _, err := client.Jobs().Register(apiJob, nil)
	if err != nil {
		c.Ui.Error(fmt.Sprintf("Error submitting job: %s", err))
		return 1
	}

	// Check if we should enter monitor mode
	if detach || periodic {
		c.Ui.Output("Job registration successful")
		if periodic {
			c.Ui.Output(fmt.Sprintf("Approximate next launch time: %v", job.Periodic.Next(time.Now().UTC())))
		} else {
			c.Ui.Output("Evaluation ID: " + evalID)
		}

		return 0
	}

	// Detach was not specified, so start monitoring
	mon := newMonitor(c.Ui, client, length)
	return mon.monitor(evalID, false)

}
Example #6
0
func handleRun() int {
	// Parse the job file
	job, err := jobspec.ParseFile(jobFile)
	if err != nil {
		log.Fatalf("[ERR] nomad: failed parsing job file: %v", err)
	}

	// Convert to an API struct for submission
	apiJob, err := convertStructJob(job)
	if err != nil {
		log.Fatalf("[ERR] nomad: failed converting job: %v", err)
	}
	jobID := apiJob.ID

	// Get the API client
	client, err := api.NewClient(api.DefaultConfig())
	if err != nil {
		log.Fatalf("[ERR] nomad: failed creating nomad client: %v", err)
	}
	jobs := client.Jobs()

	jobSubmitters := 64
	if numJobs < jobSubmitters {
		jobSubmitters = numJobs
	}
	log.Printf("[DEBUG] nomad: using %d parallel job submitters", jobSubmitters)

	// Submit the job the requested number of times
	errCh := make(chan error, numJobs)
	stopCh := make(chan struct{})
	jobsCh := make(chan *api.Job, jobSubmitters)
	defer close(stopCh)
	for i := 0; i < jobSubmitters; i++ {
		go submitJobs(jobs, jobsCh, stopCh, errCh)
	}

	log.Printf("[DEBUG] nomad: submitting %d jobs", numJobs)
	submitting := make(map[string]*api.Job, numJobs)
	for i := 0; i < numJobs; i++ {
		copy, err := copystructure.Copy(apiJob)
		if err != nil {
			log.Fatalf("[ERR] nomad: failed to copy api job: %v", err)
		}

		// Increment the job ID
		jobCopy := copy.(*api.Job)
		jobCopy.ID = fmt.Sprintf("%s-%d", jobID, i)
		submitting[jobCopy.ID] = jobCopy
		jobsCh <- jobCopy
	}

	// Collect errors if any
	for i := 0; i < numJobs; i++ {
		select {
		case err := <-errCh:
			if err != nil {
				log.Fatalf("[ERR] nomad: failed submitting job: %v", err)
			}
		case <-stopCh:
			return 0
		}
	}

	// Get the jobs were submitted.
	submitted, _, err := jobs.List(nil)
	if err != nil {
		log.Fatalf("[ERR] nomad: failed listing jobs: %v", err)
	}

	// See if anything didn't get registered
	for _, job := range submitted {
		delete(submitting, job.ID)
	}

	// Resubmitting anything missed
	for id, missed := range submitting {
		log.Printf("[DEBUG] nomad: failed submitting job %q; retrying", id)
		_, _, err := jobs.Register(missed, nil)
		if err != nil {
			log.Printf("[ERR] nomad: failed submitting job: %v", err)
		}
	}

	return 0
}
Example #7
0
func handleStatus() int {
	// Parse the job file to get the total expected allocs
	job, err := jobspec.ParseFile(jobFile)
	if err != nil {
		log.Fatalf("[ERR] nomad: failed parsing job file: %v", err)
	}
	var totalAllocs int
	for _, group := range job.TaskGroups {
		totalAllocs += (group.Count * len(group.Tasks))
	}
	totalAllocs *= numJobs
	minEvals := numJobs
	log.Printf("[DEBUG] nomad: expecting %d allocs (%d evals minimum)", totalAllocs, minEvals)

	// Determine the set of jobs we should track.
	jobs := make(map[string]struct{})
	for i := 0; i < numJobs; i++ {
		// Increment the job ID
		jobs[fmt.Sprintf("%s-%d", job.ID, i)] = struct{}{}
	}

	// Get the API client
	client, err := api.NewClient(api.DefaultConfig())
	if err != nil {
		log.Fatalf("[ERR] nomad: failed creating nomad client: %v", err)
	}
	evalEndpoint := client.Evaluations()

	// Set up the args
	args := &api.QueryOptions{
		AllowStale: true,
	}

	// Wait for all the evals to be complete.
	cutoff := time.Now().Add(maxWait)
	evals := make(map[string]*api.Evaluation, minEvals)
	failedEvals := make(map[string]struct{})
	blockedEvals := make(map[string]int)
EVAL_POLL:
	for {
		waitTime, exceeded := getSleepTime(cutoff)
		if !exceeded {
			log.Printf("[DEBUG] nomad: next eval poll in %s", waitTime)
			time.Sleep(waitTime)
		}

		// Start the query
		resp, _, err := evalEndpoint.List(args)
		if err != nil {
			// Only log and continue to skip minor errors
			log.Printf("[ERR] nomad: failed querying evals: %v", err)
			continue
		}

		// Filter out evaluations that aren't for the jobs we are tracking.
		var filter []*api.Evaluation
		for _, eval := range resp {
			if _, ok := jobs[eval.JobID]; ok {
				filter = append(filter, eval)
			}
		}

		// Wait til all evals have gone through the scheduler.
		if n := len(filter); n < minEvals {
			log.Printf("[DEBUG] nomad: expect %d evals, have %d, polling again",
				minEvals, n)
			continue
		}

		// Ensure that all the evals are terminal, otherwise new allocations
		// could be made.
		needPoll := false
		for _, eval := range filter {
			switch eval.Status {
			case "failed":
				failedEvals[eval.ID] = struct{}{}
			case "complete":
				evals[eval.ID] = eval
			case "canceled":
				// Do nothing since it was a redundant eval.
			case "blocked":
				blockedEvals[eval.ID]++
				tries := blockedEvals[eval.ID]
				if tries < blockedEvalTries {
					needPoll = true
				} else if tries == blockedEvalTries {
					log.Printf("[DEBUG] nomad: abandoning blocked eval %q", eval.ID)
				}
			case "pending":
				needPoll = true
			}
		}

		if needPoll && !exceeded {
			continue EVAL_POLL
		}

		break
	}

	// We now have all the evals, gather the allocations and placement times.

	// scheduleTime is a map of alloc ID to map of desired status and time.
	scheduleTimes := make(map[string]map[string]int64, totalAllocs)
	startTimes := make(map[string]int64, totalAllocs)    // When a task was started
	receivedTimes := make(map[string]int64, totalAllocs) // When a task was received by the client
	failedAllocs := make(map[string]int64)               // Time an alloc failed
	failedReason := make(map[string]string)              // Reason an alloc failed
	pendingAllocs := make(map[string]int)                // Counts how many time the alloc was in pending state
	first := true
ALLOC_POLL:
	for {
		waitTime, exceeded := getSleepTime(cutoff)
		if !exceeded && !first {
			log.Printf("[DEBUG] nomad: next eval poll in %s", waitTime)
			time.Sleep(waitTime)
		}
		first = false

		needPoll := false
		for evalID := range evals {
			// Start the query
			resp, _, err := evalEndpoint.Allocations(evalID, args)
			if err != nil {
				// Only log and continue to skip minor errors
				log.Printf("[ERR] nomad: failed querying allocations: %v", err)
				continue
			}

			for _, alloc := range resp {
				// Capture the schedule time.
				allocTimes, ok := scheduleTimes[alloc.ID]
				if !ok {
					allocTimes = make(map[string]int64, 3)
					scheduleTimes[alloc.ID] = allocTimes
				}
				allocTimes[alloc.DesiredStatus] = alloc.CreateTime

				// Ensure that they have started or have failed.
				switch alloc.ClientStatus {
				case "failed":
					failedAllocs[alloc.ID] = alloc.CreateTime
					var failures []string
					for _, state := range alloc.TaskStates {
						if state.State == "failed" {
							failures = append(failures, state.Events[0].DriverError)
						}
					}
					failedReason[alloc.ID] = strings.Join(failures, ",")
					continue
				case "pending":
					pendingAllocs[alloc.ID]++
					tries := pendingAllocs[alloc.ID]
					if tries < pendingAllocTries {
						needPoll = true
					} else if tries == pendingAllocTries {
						log.Printf("[DEBUG] nomad: abandoning alloc %q", alloc.ID)
					}
					continue
				}

				// Detect the start time.
				for _, state := range alloc.TaskStates {
					if len(state.Events) == 0 {
						needPoll = true
					}

					for _, event := range state.Events {
						time := event.Time
						switch event.Type {
						case "Started":
							startTimes[alloc.ID] = time
						case "Received":
							receivedTimes[alloc.ID] = time
						}
					}
				}
			}
		}

		if needPoll && !exceeded {
			continue ALLOC_POLL
		}

		break
	}

	// Print the failure reasons for client allocs.
	for id, reason := range failedReason {
		log.Printf("[DEBUG] nomad: alloc id %q failed on client: %v", id, reason)
	}

	// Print the results.
	if l := len(failedEvals); l != 0 {
		fmt.Fprintf(os.Stdout, "failed_evals|%f\n", float64(l))
	}
	for time, count := range accumTimes(failedAllocs) {
		fmt.Fprintf(os.Stdout, "failed_allocs|%f|%d\n", float64(count), time)
	}
	for time, count := range accumTimes(startTimes) {
		fmt.Fprintf(os.Stdout, "running|%f|%d\n", float64(count), time)
	}
	for time, count := range accumTimes(receivedTimes) {
		fmt.Fprintf(os.Stdout, "received|%f|%d\n", float64(count), time)
	}
	for time, count := range accumTimesOn("run", scheduleTimes) {
		fmt.Fprintf(os.Stdout, "placed_run|%f|%d\n", float64(count), time)
	}
	for time, count := range accumTimesOn("failed", scheduleTimes) {
		fmt.Fprintf(os.Stdout, "placed_failed|%f|%d\n", float64(count), time)
	}
	for time, count := range accumTimesOn("stop", scheduleTimes) {
		fmt.Fprintf(os.Stdout, "placed_stop|%f|%d\n", float64(count), time)
	}

	// Aggregate eval triggerbys.
	triggers := make(map[string]int, len(evals))
	for _, eval := range evals {
		triggers[eval.TriggeredBy]++
	}
	for trigger, count := range triggers {
		fmt.Fprintf(os.Stdout, "trigger:%s|%f\n", trigger, float64(count))
	}

	// Print if the scheduler changed scheduling decisions
	flips := make(map[string]map[string]int64) // alloc id -> map[flipType]time
	flipTypes := make(map[string]struct{})
	for id, decisions := range scheduleTimes {
		if len(decisions) < 2 {
			continue
		}
		// Have decision -> time
		// 1) time -> decision
		// 2) sort times
		// 3) print transitions
		flips[id] = make(map[string]int64)
		inverted := make(map[int64]string, len(decisions))
		times := make([]int, 0, len(decisions))
		for k, v := range decisions {
			inverted[v] = k
			times = append(times, int(v))
		}
		sort.Ints(times)
		for i := 1; i < len(times); i++ {
			from := decisions[inverted[int64(times[i-1])]]
			to := decisions[inverted[int64(times[i])]]
			flipType := fmt.Sprintf("%s-to-%s", from, to)
			flips[id][flipType] = int64(times[i])
			flipTypes[flipType] = struct{}{}
		}
	}

	for flipType, _ := range flips {
		for time, count := range accumTimesOn(flipType, flips) {
			fmt.Fprintf(os.Stdout, "%v|%f|%d\n", flipType, float64(count), time)
		}
	}

	return 0
}