Example #1
0
func cliStart(c *cli.Context) {
	config := nomadapi.DefaultConfig()
	config.Address = c.GlobalString("nomad-address")
	config.Region = c.GlobalString("nomad-region")
	config.WaitTime = time.Duration(c.GlobalInt("wait-time")) * time.Second
	client, err := nomadapi.NewClient(config)
	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}

	log := logrus.New()
	level, err := logrus.ParseLevel(c.GlobalString("log-level"))
	if err != nil {
		fmt.Println("incorrect log-level")
		os.Exit(2)
	}
	log.Out = os.Stderr
	log.Level = level

	usage := usage.NewUsage(client, time.Duration(c.GlobalInt("wait-time"))*time.Second, log)
	usage.Loop()

	context := api.NewContext(c.GlobalString("api-addr"), serviceVersion, usage, log, client)
	log.Fatal(api.ListenAndServe(context))

}
Example #2
0
// Client is used to initialize and return a new API client using
// the default command line arguments and env vars.
func (m *Meta) Client() (*api.Client, error) {
	config := api.DefaultConfig()
	if v := os.Getenv(EnvNomadAddress); v != "" {
		config.Address = v
	}
	if m.flagAddress != "" {
		config.Address = m.flagAddress
	}
	if v := os.Getenv(EnvNomadRegion); v != "" {
		config.Region = v
	}
	if m.region != "" {
		config.Region = m.region
	}
	// If we need custom TLS configuration, then set it
	if m.caCert != "" || m.caPath != "" || m.clientCert != "" || m.clientKey != "" || m.insecure {
		t := &api.TLSConfig{
			CACert:     m.caCert,
			CAPath:     m.caPath,
			ClientCert: m.clientCert,
			ClientKey:  m.clientKey,
			Insecure:   m.insecure,
		}
		config.TLSConfig = t
	}

	return api.NewClient(config)
}
Example #3
0
func (c *Builder) CreateNomadJob(pipeline *structs.Pipeline, runId int) (*NomadJob, error) {
	config := make(map[string]interface{})
	config["container"] = pipeline.Container
	config["pipeline"] = pipeline.Name
	config["run_id"] = strconv.Itoa(runId)
	config["server_url"] = c.ServerURL
	resources := &nomadStructs.Resources{
		CPU:      1024,
		MemoryMB: 128,
	}
	task := &nomadStructs.Task{
		Name:      pipeline.Name,
		Driver:    "gypsy",
		Config:    config,
		Resources: resources,
	}
	group := &nomadStructs.TaskGroup{
		Name:          pipeline.Name,
		Count:         1,
		Tasks:         []*nomadStructs.Task{task},
		RestartPolicy: nomadStructs.NewRestartPolicy("batch"),
	}
	job := &nomadStructs.Job{
		ID:          pipeline.Name,
		Name:        pipeline.Name,
		Region:      "global",
		Priority:    50,
		Datacenters: []string{"dc1"},
		Type:        "batch",
		TaskGroups:  []*nomadStructs.TaskGroup{group},
	}
	if err := job.Validate(); err != nil {
		log.Errorf("Nomad job validation failed. Error: %s\n", err)
		return nil, err
	}
	apiJob, err := convertJob(job)
	if err != nil {
		log.Errorf("Failed to convert nomad job in api call. Error: %s\n", err)
		return nil, err
	}
	nomadConfig := nomadApi.DefaultConfig()
	nomadClient, err := nomadApi.NewClient(nomadConfig)
	if err != nil {
		log.Errorf("Error creating nomad api client: %s", err)
		return nil, fmt.Errorf(fmt.Sprintf("Error creating nomad api client: %s", err))
	}
	evalId, _, nomadErr := nomadClient.Jobs().Register(apiJob, nil)
	if nomadErr != nil {
		log.Errorf("Error submitting job: %s", nomadErr)
		return nil, fmt.Errorf(fmt.Sprintf("Error submitting job: %s", nomadErr))
	}
	log.Infof("Syccessfullt submitted nomad job. Eval id: %s\n", evalId)
	return &NomadJob{
		Pipeline: pipeline,
		Job:      job,
	}, nil
}
Example #4
0
// Client is used to initialize and return a new API client using
// the default command line arguments and env vars.
func (m *Meta) Client() (*api.Client, error) {
	config := api.DefaultConfig()
	if v := os.Getenv(EnvNomadAddress); v != "" {
		config.Address = v
	}
	if m.flagAddress != "" {
		config.Address = m.flagAddress
	}
	return api.NewClient(config)
}
Example #5
0
func providerConfigure(d *schema.ResourceData) (interface{}, error) {
	config := api.DefaultConfig()
	config.Address = d.Get("address").(string)
	config.Region = d.Get("region").(string)

	client, err := api.NewClient(config)
	if err != nil {
		return nil, fmt.Errorf("failed to configure Nomad API: %s", err)
	}

	return client, nil
}
Example #6
0
func handleTeardown() int {
	// Get the API client
	client, err := api.NewClient(api.DefaultConfig())
	if err != nil {
		log.Fatalf("[ERR] nomad: failed creating nomad client: %v", err)
	}

	// Iterate all of the jobs and stop them
	log.Printf("[DEBUG] nomad: deregistering benchmark jobs")
	jobs, _, err := client.Jobs().List(nil)
	if err != nil {
		log.Fatalf("[ERR] nomad: failed listing jobs: %v", err)
	}
	for _, job := range jobs {
		if _, _, err := client.Jobs().Deregister(job.ID, nil); err != nil {
			log.Fatalf("[ERR] nomad: failed deregistering job: %v", err)
		}
	}
	return 0
}
Example #7
0
func testServer(
	t *testing.T,
	cb testutil.ServerConfigCallback) (*testutil.TestServer, *api.Client, string) {

	// Always run these tests in parallel.
	if _, ok := seen[t]; !ok {
		seen[t] = struct{}{}
		t.Parallel()
	}

	// Make a new test server
	srv := testutil.NewTestServer(t, cb)

	// Make a client
	clientConf := api.DefaultConfig()
	clientConf.Address = "http://" + srv.HTTPAddr
	client, err := api.NewClient(clientConf)
	if err != nil {
		t.Fatalf("err: %s", err)
	}
	return srv, client, clientConf.Address
}
Example #8
0
func main() {
	client, err := api.NewClient(api.DefaultConfig())
	if err != nil {
		fmt.Println(err.Error())
		return
	}

	total := 0
	if len(os.Args) != 2 {
		fmt.Println("need 1 arg")
		return
	}

	if total, err = strconv.Atoi(os.Args[1]); err != nil {
		fmt.Println("arg 1 must be number")
		return
	}

	fh, err := ioutil.TempFile("", "bench")
	if err != nil {
		fmt.Println(err.Error())
		return
	}
	defer os.Remove(fh.Name())

	jobContent := fmt.Sprintf(job, total)
	if _, err := fh.WriteString(jobContent); err != nil {
		fmt.Println(err.Error())
		return
	}
	fh.Close()

	isRunning := false
	allocClient := client.Allocations()

	cmd := exec.Command("nomad", "run", fh.Name())
	if err := cmd.Run(); err != nil {
		fmt.Println("nomad run failed: " + err.Error())
		return
	}
	start := time.Now()

	last := 0
	fmt.Printf("benchmarking %d allocations\n", total)
	opts := &api.QueryOptions{AllowStale: true}
	for {
		time.Sleep(100 * time.Millisecond)

		allocs, _, err := allocClient.List(opts)
		if err != nil {
			fmt.Println(err.Error())

			// keep going to paper over minor errors
			continue
		}
		now := time.Now()

		running := 0
		for _, alloc := range allocs {
			if alloc.ClientStatus == structs.AllocClientStatusRunning {
				if !isRunning {
					fmt.Printf("time to first running: %s\n", now.Sub(start))
					isRunning = true
				}
				running++
			}
		}

		if last != running {
			fmt.Printf("%d running after %s\n", running, now.Sub(start))
		}
		last = running

		if running == total {
			return
		}
	}
}
Example #9
0
func handleRun() int {
	// Parse the job file
	job, err := jobspec.ParseFile(jobFile)
	if err != nil {
		log.Fatalf("[ERR] nomad: failed parsing job file: %v", err)
	}

	// Convert to an API struct for submission
	apiJob, err := convertStructJob(job)
	if err != nil {
		log.Fatalf("[ERR] nomad: failed converting job: %v", err)
	}
	jobID := apiJob.ID

	// Get the API client
	client, err := api.NewClient(api.DefaultConfig())
	if err != nil {
		log.Fatalf("[ERR] nomad: failed creating nomad client: %v", err)
	}
	jobs := client.Jobs()

	jobSubmitters := 64
	if numJobs < jobSubmitters {
		jobSubmitters = numJobs
	}
	log.Printf("[DEBUG] nomad: using %d parallel job submitters", jobSubmitters)

	// Submit the job the requested number of times
	errCh := make(chan error, numJobs)
	stopCh := make(chan struct{})
	jobsCh := make(chan *api.Job, jobSubmitters)
	defer close(stopCh)
	for i := 0; i < jobSubmitters; i++ {
		go submitJobs(jobs, jobsCh, stopCh, errCh)
	}

	log.Printf("[DEBUG] nomad: submitting %d jobs", numJobs)
	submitting := make(map[string]*api.Job, numJobs)
	for i := 0; i < numJobs; i++ {
		copy, err := copystructure.Copy(apiJob)
		if err != nil {
			log.Fatalf("[ERR] nomad: failed to copy api job: %v", err)
		}

		// Increment the job ID
		jobCopy := copy.(*api.Job)
		jobCopy.ID = fmt.Sprintf("%s-%d", jobID, i)
		submitting[jobCopy.ID] = jobCopy
		jobsCh <- jobCopy
	}

	// Collect errors if any
	for i := 0; i < numJobs; i++ {
		select {
		case err := <-errCh:
			if err != nil {
				log.Fatalf("[ERR] nomad: failed submitting job: %v", err)
			}
		case <-stopCh:
			return 0
		}
	}

	// Get the jobs were submitted.
	submitted, _, err := jobs.List(nil)
	if err != nil {
		log.Fatalf("[ERR] nomad: failed listing jobs: %v", err)
	}

	// See if anything didn't get registered
	for _, job := range submitted {
		delete(submitting, job.ID)
	}

	// Resubmitting anything missed
	for id, missed := range submitting {
		log.Printf("[DEBUG] nomad: failed submitting job %q; retrying", id)
		_, _, err := jobs.Register(missed, nil)
		if err != nil {
			log.Printf("[ERR] nomad: failed submitting job: %v", err)
		}
	}

	return 0
}
Example #10
0
func handleStatus() int {
	// Parse the job file to get the total expected allocs
	job, err := jobspec.ParseFile(jobFile)
	if err != nil {
		log.Fatalf("[ERR] nomad: failed parsing job file: %v", err)
	}
	var totalAllocs int
	for _, group := range job.TaskGroups {
		totalAllocs += (group.Count * len(group.Tasks))
	}
	totalAllocs *= numJobs
	minEvals := numJobs
	log.Printf("[DEBUG] nomad: expecting %d allocs (%d evals minimum)", totalAllocs, minEvals)

	// Determine the set of jobs we should track.
	jobs := make(map[string]struct{})
	for i := 0; i < numJobs; i++ {
		// Increment the job ID
		jobs[fmt.Sprintf("%s-%d", job.ID, i)] = struct{}{}
	}

	// Get the API client
	client, err := api.NewClient(api.DefaultConfig())
	if err != nil {
		log.Fatalf("[ERR] nomad: failed creating nomad client: %v", err)
	}
	evalEndpoint := client.Evaluations()

	// Set up the args
	args := &api.QueryOptions{
		AllowStale: true,
	}

	// Wait for all the evals to be complete.
	cutoff := time.Now().Add(maxWait)
	evals := make(map[string]*api.Evaluation, minEvals)
	failedEvals := make(map[string]struct{})
	blockedEvals := make(map[string]int)
EVAL_POLL:
	for {
		waitTime, exceeded := getSleepTime(cutoff)
		if !exceeded {
			log.Printf("[DEBUG] nomad: next eval poll in %s", waitTime)
			time.Sleep(waitTime)
		}

		// Start the query
		resp, _, err := evalEndpoint.List(args)
		if err != nil {
			// Only log and continue to skip minor errors
			log.Printf("[ERR] nomad: failed querying evals: %v", err)
			continue
		}

		// Filter out evaluations that aren't for the jobs we are tracking.
		var filter []*api.Evaluation
		for _, eval := range resp {
			if _, ok := jobs[eval.JobID]; ok {
				filter = append(filter, eval)
			}
		}

		// Wait til all evals have gone through the scheduler.
		if n := len(filter); n < minEvals {
			log.Printf("[DEBUG] nomad: expect %d evals, have %d, polling again",
				minEvals, n)
			continue
		}

		// Ensure that all the evals are terminal, otherwise new allocations
		// could be made.
		needPoll := false
		for _, eval := range filter {
			switch eval.Status {
			case "failed":
				failedEvals[eval.ID] = struct{}{}
			case "complete":
				evals[eval.ID] = eval
			case "canceled":
				// Do nothing since it was a redundant eval.
			case "blocked":
				blockedEvals[eval.ID]++
				tries := blockedEvals[eval.ID]
				if tries < blockedEvalTries {
					needPoll = true
				} else if tries == blockedEvalTries {
					log.Printf("[DEBUG] nomad: abandoning blocked eval %q", eval.ID)
				}
			case "pending":
				needPoll = true
			}
		}

		if needPoll && !exceeded {
			continue EVAL_POLL
		}

		break
	}

	// We now have all the evals, gather the allocations and placement times.

	// scheduleTime is a map of alloc ID to map of desired status and time.
	scheduleTimes := make(map[string]map[string]int64, totalAllocs)
	startTimes := make(map[string]int64, totalAllocs)    // When a task was started
	receivedTimes := make(map[string]int64, totalAllocs) // When a task was received by the client
	failedAllocs := make(map[string]int64)               // Time an alloc failed
	failedReason := make(map[string]string)              // Reason an alloc failed
	pendingAllocs := make(map[string]int)                // Counts how many time the alloc was in pending state
	first := true
ALLOC_POLL:
	for {
		waitTime, exceeded := getSleepTime(cutoff)
		if !exceeded && !first {
			log.Printf("[DEBUG] nomad: next eval poll in %s", waitTime)
			time.Sleep(waitTime)
		}
		first = false

		needPoll := false
		for evalID := range evals {
			// Start the query
			resp, _, err := evalEndpoint.Allocations(evalID, args)
			if err != nil {
				// Only log and continue to skip minor errors
				log.Printf("[ERR] nomad: failed querying allocations: %v", err)
				continue
			}

			for _, alloc := range resp {
				// Capture the schedule time.
				allocTimes, ok := scheduleTimes[alloc.ID]
				if !ok {
					allocTimes = make(map[string]int64, 3)
					scheduleTimes[alloc.ID] = allocTimes
				}
				allocTimes[alloc.DesiredStatus] = alloc.CreateTime

				// Ensure that they have started or have failed.
				switch alloc.ClientStatus {
				case "failed":
					failedAllocs[alloc.ID] = alloc.CreateTime
					var failures []string
					for _, state := range alloc.TaskStates {
						if state.State == "failed" {
							failures = append(failures, state.Events[0].DriverError)
						}
					}
					failedReason[alloc.ID] = strings.Join(failures, ",")
					continue
				case "pending":
					pendingAllocs[alloc.ID]++
					tries := pendingAllocs[alloc.ID]
					if tries < pendingAllocTries {
						needPoll = true
					} else if tries == pendingAllocTries {
						log.Printf("[DEBUG] nomad: abandoning alloc %q", alloc.ID)
					}
					continue
				}

				// Detect the start time.
				for _, state := range alloc.TaskStates {
					if len(state.Events) == 0 {
						needPoll = true
					}

					for _, event := range state.Events {
						time := event.Time
						switch event.Type {
						case "Started":
							startTimes[alloc.ID] = time
						case "Received":
							receivedTimes[alloc.ID] = time
						}
					}
				}
			}
		}

		if needPoll && !exceeded {
			continue ALLOC_POLL
		}

		break
	}

	// Print the failure reasons for client allocs.
	for id, reason := range failedReason {
		log.Printf("[DEBUG] nomad: alloc id %q failed on client: %v", id, reason)
	}

	// Print the results.
	if l := len(failedEvals); l != 0 {
		fmt.Fprintf(os.Stdout, "failed_evals|%f\n", float64(l))
	}
	for time, count := range accumTimes(failedAllocs) {
		fmt.Fprintf(os.Stdout, "failed_allocs|%f|%d\n", float64(count), time)
	}
	for time, count := range accumTimes(startTimes) {
		fmt.Fprintf(os.Stdout, "running|%f|%d\n", float64(count), time)
	}
	for time, count := range accumTimes(receivedTimes) {
		fmt.Fprintf(os.Stdout, "received|%f|%d\n", float64(count), time)
	}
	for time, count := range accumTimesOn("run", scheduleTimes) {
		fmt.Fprintf(os.Stdout, "placed_run|%f|%d\n", float64(count), time)
	}
	for time, count := range accumTimesOn("failed", scheduleTimes) {
		fmt.Fprintf(os.Stdout, "placed_failed|%f|%d\n", float64(count), time)
	}
	for time, count := range accumTimesOn("stop", scheduleTimes) {
		fmt.Fprintf(os.Stdout, "placed_stop|%f|%d\n", float64(count), time)
	}

	// Aggregate eval triggerbys.
	triggers := make(map[string]int, len(evals))
	for _, eval := range evals {
		triggers[eval.TriggeredBy]++
	}
	for trigger, count := range triggers {
		fmt.Fprintf(os.Stdout, "trigger:%s|%f\n", trigger, float64(count))
	}

	// Print if the scheduler changed scheduling decisions
	flips := make(map[string]map[string]int64) // alloc id -> map[flipType]time
	flipTypes := make(map[string]struct{})
	for id, decisions := range scheduleTimes {
		if len(decisions) < 2 {
			continue
		}
		// Have decision -> time
		// 1) time -> decision
		// 2) sort times
		// 3) print transitions
		flips[id] = make(map[string]int64)
		inverted := make(map[int64]string, len(decisions))
		times := make([]int, 0, len(decisions))
		for k, v := range decisions {
			inverted[v] = k
			times = append(times, int(v))
		}
		sort.Ints(times)
		for i := 1; i < len(times); i++ {
			from := decisions[inverted[int64(times[i-1])]]
			to := decisions[inverted[int64(times[i])]]
			flipType := fmt.Sprintf("%s-to-%s", from, to)
			flips[id][flipType] = int64(times[i])
			flipTypes[flipType] = struct{}{}
		}
	}

	for flipType, _ := range flips {
		for time, count := range accumTimesOn(flipType, flips) {
			fmt.Fprintf(os.Stdout, "%v|%f|%d\n", flipType, float64(count), time)
		}
	}

	return 0
}