func cliStart(c *cli.Context) { config := nomadapi.DefaultConfig() config.Address = c.GlobalString("nomad-address") config.Region = c.GlobalString("nomad-region") config.WaitTime = time.Duration(c.GlobalInt("wait-time")) * time.Second client, err := nomadapi.NewClient(config) if err != nil { fmt.Println(err) os.Exit(1) } log := logrus.New() level, err := logrus.ParseLevel(c.GlobalString("log-level")) if err != nil { fmt.Println("incorrect log-level") os.Exit(2) } log.Out = os.Stderr log.Level = level usage := usage.NewUsage(client, time.Duration(c.GlobalInt("wait-time"))*time.Second, log) usage.Loop() context := api.NewContext(c.GlobalString("api-addr"), serviceVersion, usage, log, client) log.Fatal(api.ListenAndServe(context)) }
// Client is used to initialize and return a new API client using // the default command line arguments and env vars. func (m *Meta) Client() (*api.Client, error) { config := api.DefaultConfig() if v := os.Getenv(EnvNomadAddress); v != "" { config.Address = v } if m.flagAddress != "" { config.Address = m.flagAddress } if v := os.Getenv(EnvNomadRegion); v != "" { config.Region = v } if m.region != "" { config.Region = m.region } // If we need custom TLS configuration, then set it if m.caCert != "" || m.caPath != "" || m.clientCert != "" || m.clientKey != "" || m.insecure { t := &api.TLSConfig{ CACert: m.caCert, CAPath: m.caPath, ClientCert: m.clientCert, ClientKey: m.clientKey, Insecure: m.insecure, } config.TLSConfig = t } return api.NewClient(config) }
func (c *Builder) CreateNomadJob(pipeline *structs.Pipeline, runId int) (*NomadJob, error) { config := make(map[string]interface{}) config["container"] = pipeline.Container config["pipeline"] = pipeline.Name config["run_id"] = strconv.Itoa(runId) config["server_url"] = c.ServerURL resources := &nomadStructs.Resources{ CPU: 1024, MemoryMB: 128, } task := &nomadStructs.Task{ Name: pipeline.Name, Driver: "gypsy", Config: config, Resources: resources, } group := &nomadStructs.TaskGroup{ Name: pipeline.Name, Count: 1, Tasks: []*nomadStructs.Task{task}, RestartPolicy: nomadStructs.NewRestartPolicy("batch"), } job := &nomadStructs.Job{ ID: pipeline.Name, Name: pipeline.Name, Region: "global", Priority: 50, Datacenters: []string{"dc1"}, Type: "batch", TaskGroups: []*nomadStructs.TaskGroup{group}, } if err := job.Validate(); err != nil { log.Errorf("Nomad job validation failed. Error: %s\n", err) return nil, err } apiJob, err := convertJob(job) if err != nil { log.Errorf("Failed to convert nomad job in api call. Error: %s\n", err) return nil, err } nomadConfig := nomadApi.DefaultConfig() nomadClient, err := nomadApi.NewClient(nomadConfig) if err != nil { log.Errorf("Error creating nomad api client: %s", err) return nil, fmt.Errorf(fmt.Sprintf("Error creating nomad api client: %s", err)) } evalId, _, nomadErr := nomadClient.Jobs().Register(apiJob, nil) if nomadErr != nil { log.Errorf("Error submitting job: %s", nomadErr) return nil, fmt.Errorf(fmt.Sprintf("Error submitting job: %s", nomadErr)) } log.Infof("Syccessfullt submitted nomad job. Eval id: %s\n", evalId) return &NomadJob{ Pipeline: pipeline, Job: job, }, nil }
// Client is used to initialize and return a new API client using // the default command line arguments and env vars. func (m *Meta) Client() (*api.Client, error) { config := api.DefaultConfig() if v := os.Getenv(EnvNomadAddress); v != "" { config.Address = v } if m.flagAddress != "" { config.Address = m.flagAddress } return api.NewClient(config) }
func providerConfigure(d *schema.ResourceData) (interface{}, error) { config := api.DefaultConfig() config.Address = d.Get("address").(string) config.Region = d.Get("region").(string) client, err := api.NewClient(config) if err != nil { return nil, fmt.Errorf("failed to configure Nomad API: %s", err) } return client, nil }
func handleTeardown() int { // Get the API client client, err := api.NewClient(api.DefaultConfig()) if err != nil { log.Fatalf("[ERR] nomad: failed creating nomad client: %v", err) } // Iterate all of the jobs and stop them log.Printf("[DEBUG] nomad: deregistering benchmark jobs") jobs, _, err := client.Jobs().List(nil) if err != nil { log.Fatalf("[ERR] nomad: failed listing jobs: %v", err) } for _, job := range jobs { if _, _, err := client.Jobs().Deregister(job.ID, nil); err != nil { log.Fatalf("[ERR] nomad: failed deregistering job: %v", err) } } return 0 }
func testServer( t *testing.T, cb testutil.ServerConfigCallback) (*testutil.TestServer, *api.Client, string) { // Always run these tests in parallel. if _, ok := seen[t]; !ok { seen[t] = struct{}{} t.Parallel() } // Make a new test server srv := testutil.NewTestServer(t, cb) // Make a client clientConf := api.DefaultConfig() clientConf.Address = "http://" + srv.HTTPAddr client, err := api.NewClient(clientConf) if err != nil { t.Fatalf("err: %s", err) } return srv, client, clientConf.Address }
func main() { client, err := api.NewClient(api.DefaultConfig()) if err != nil { fmt.Println(err.Error()) return } total := 0 if len(os.Args) != 2 { fmt.Println("need 1 arg") return } if total, err = strconv.Atoi(os.Args[1]); err != nil { fmt.Println("arg 1 must be number") return } fh, err := ioutil.TempFile("", "bench") if err != nil { fmt.Println(err.Error()) return } defer os.Remove(fh.Name()) jobContent := fmt.Sprintf(job, total) if _, err := fh.WriteString(jobContent); err != nil { fmt.Println(err.Error()) return } fh.Close() isRunning := false allocClient := client.Allocations() cmd := exec.Command("nomad", "run", fh.Name()) if err := cmd.Run(); err != nil { fmt.Println("nomad run failed: " + err.Error()) return } start := time.Now() last := 0 fmt.Printf("benchmarking %d allocations\n", total) opts := &api.QueryOptions{AllowStale: true} for { time.Sleep(100 * time.Millisecond) allocs, _, err := allocClient.List(opts) if err != nil { fmt.Println(err.Error()) // keep going to paper over minor errors continue } now := time.Now() running := 0 for _, alloc := range allocs { if alloc.ClientStatus == structs.AllocClientStatusRunning { if !isRunning { fmt.Printf("time to first running: %s\n", now.Sub(start)) isRunning = true } running++ } } if last != running { fmt.Printf("%d running after %s\n", running, now.Sub(start)) } last = running if running == total { return } } }
func handleRun() int { // Parse the job file job, err := jobspec.ParseFile(jobFile) if err != nil { log.Fatalf("[ERR] nomad: failed parsing job file: %v", err) } // Convert to an API struct for submission apiJob, err := convertStructJob(job) if err != nil { log.Fatalf("[ERR] nomad: failed converting job: %v", err) } jobID := apiJob.ID // Get the API client client, err := api.NewClient(api.DefaultConfig()) if err != nil { log.Fatalf("[ERR] nomad: failed creating nomad client: %v", err) } jobs := client.Jobs() jobSubmitters := 64 if numJobs < jobSubmitters { jobSubmitters = numJobs } log.Printf("[DEBUG] nomad: using %d parallel job submitters", jobSubmitters) // Submit the job the requested number of times errCh := make(chan error, numJobs) stopCh := make(chan struct{}) jobsCh := make(chan *api.Job, jobSubmitters) defer close(stopCh) for i := 0; i < jobSubmitters; i++ { go submitJobs(jobs, jobsCh, stopCh, errCh) } log.Printf("[DEBUG] nomad: submitting %d jobs", numJobs) submitting := make(map[string]*api.Job, numJobs) for i := 0; i < numJobs; i++ { copy, err := copystructure.Copy(apiJob) if err != nil { log.Fatalf("[ERR] nomad: failed to copy api job: %v", err) } // Increment the job ID jobCopy := copy.(*api.Job) jobCopy.ID = fmt.Sprintf("%s-%d", jobID, i) submitting[jobCopy.ID] = jobCopy jobsCh <- jobCopy } // Collect errors if any for i := 0; i < numJobs; i++ { select { case err := <-errCh: if err != nil { log.Fatalf("[ERR] nomad: failed submitting job: %v", err) } case <-stopCh: return 0 } } // Get the jobs were submitted. submitted, _, err := jobs.List(nil) if err != nil { log.Fatalf("[ERR] nomad: failed listing jobs: %v", err) } // See if anything didn't get registered for _, job := range submitted { delete(submitting, job.ID) } // Resubmitting anything missed for id, missed := range submitting { log.Printf("[DEBUG] nomad: failed submitting job %q; retrying", id) _, _, err := jobs.Register(missed, nil) if err != nil { log.Printf("[ERR] nomad: failed submitting job: %v", err) } } return 0 }
func handleStatus() int { // Parse the job file to get the total expected allocs job, err := jobspec.ParseFile(jobFile) if err != nil { log.Fatalf("[ERR] nomad: failed parsing job file: %v", err) } var totalAllocs int for _, group := range job.TaskGroups { totalAllocs += (group.Count * len(group.Tasks)) } totalAllocs *= numJobs minEvals := numJobs log.Printf("[DEBUG] nomad: expecting %d allocs (%d evals minimum)", totalAllocs, minEvals) // Determine the set of jobs we should track. jobs := make(map[string]struct{}) for i := 0; i < numJobs; i++ { // Increment the job ID jobs[fmt.Sprintf("%s-%d", job.ID, i)] = struct{}{} } // Get the API client client, err := api.NewClient(api.DefaultConfig()) if err != nil { log.Fatalf("[ERR] nomad: failed creating nomad client: %v", err) } evalEndpoint := client.Evaluations() // Set up the args args := &api.QueryOptions{ AllowStale: true, } // Wait for all the evals to be complete. cutoff := time.Now().Add(maxWait) evals := make(map[string]*api.Evaluation, minEvals) failedEvals := make(map[string]struct{}) blockedEvals := make(map[string]int) EVAL_POLL: for { waitTime, exceeded := getSleepTime(cutoff) if !exceeded { log.Printf("[DEBUG] nomad: next eval poll in %s", waitTime) time.Sleep(waitTime) } // Start the query resp, _, err := evalEndpoint.List(args) if err != nil { // Only log and continue to skip minor errors log.Printf("[ERR] nomad: failed querying evals: %v", err) continue } // Filter out evaluations that aren't for the jobs we are tracking. var filter []*api.Evaluation for _, eval := range resp { if _, ok := jobs[eval.JobID]; ok { filter = append(filter, eval) } } // Wait til all evals have gone through the scheduler. if n := len(filter); n < minEvals { log.Printf("[DEBUG] nomad: expect %d evals, have %d, polling again", minEvals, n) continue } // Ensure that all the evals are terminal, otherwise new allocations // could be made. needPoll := false for _, eval := range filter { switch eval.Status { case "failed": failedEvals[eval.ID] = struct{}{} case "complete": evals[eval.ID] = eval case "canceled": // Do nothing since it was a redundant eval. case "blocked": blockedEvals[eval.ID]++ tries := blockedEvals[eval.ID] if tries < blockedEvalTries { needPoll = true } else if tries == blockedEvalTries { log.Printf("[DEBUG] nomad: abandoning blocked eval %q", eval.ID) } case "pending": needPoll = true } } if needPoll && !exceeded { continue EVAL_POLL } break } // We now have all the evals, gather the allocations and placement times. // scheduleTime is a map of alloc ID to map of desired status and time. scheduleTimes := make(map[string]map[string]int64, totalAllocs) startTimes := make(map[string]int64, totalAllocs) // When a task was started receivedTimes := make(map[string]int64, totalAllocs) // When a task was received by the client failedAllocs := make(map[string]int64) // Time an alloc failed failedReason := make(map[string]string) // Reason an alloc failed pendingAllocs := make(map[string]int) // Counts how many time the alloc was in pending state first := true ALLOC_POLL: for { waitTime, exceeded := getSleepTime(cutoff) if !exceeded && !first { log.Printf("[DEBUG] nomad: next eval poll in %s", waitTime) time.Sleep(waitTime) } first = false needPoll := false for evalID := range evals { // Start the query resp, _, err := evalEndpoint.Allocations(evalID, args) if err != nil { // Only log and continue to skip minor errors log.Printf("[ERR] nomad: failed querying allocations: %v", err) continue } for _, alloc := range resp { // Capture the schedule time. allocTimes, ok := scheduleTimes[alloc.ID] if !ok { allocTimes = make(map[string]int64, 3) scheduleTimes[alloc.ID] = allocTimes } allocTimes[alloc.DesiredStatus] = alloc.CreateTime // Ensure that they have started or have failed. switch alloc.ClientStatus { case "failed": failedAllocs[alloc.ID] = alloc.CreateTime var failures []string for _, state := range alloc.TaskStates { if state.State == "failed" { failures = append(failures, state.Events[0].DriverError) } } failedReason[alloc.ID] = strings.Join(failures, ",") continue case "pending": pendingAllocs[alloc.ID]++ tries := pendingAllocs[alloc.ID] if tries < pendingAllocTries { needPoll = true } else if tries == pendingAllocTries { log.Printf("[DEBUG] nomad: abandoning alloc %q", alloc.ID) } continue } // Detect the start time. for _, state := range alloc.TaskStates { if len(state.Events) == 0 { needPoll = true } for _, event := range state.Events { time := event.Time switch event.Type { case "Started": startTimes[alloc.ID] = time case "Received": receivedTimes[alloc.ID] = time } } } } } if needPoll && !exceeded { continue ALLOC_POLL } break } // Print the failure reasons for client allocs. for id, reason := range failedReason { log.Printf("[DEBUG] nomad: alloc id %q failed on client: %v", id, reason) } // Print the results. if l := len(failedEvals); l != 0 { fmt.Fprintf(os.Stdout, "failed_evals|%f\n", float64(l)) } for time, count := range accumTimes(failedAllocs) { fmt.Fprintf(os.Stdout, "failed_allocs|%f|%d\n", float64(count), time) } for time, count := range accumTimes(startTimes) { fmt.Fprintf(os.Stdout, "running|%f|%d\n", float64(count), time) } for time, count := range accumTimes(receivedTimes) { fmt.Fprintf(os.Stdout, "received|%f|%d\n", float64(count), time) } for time, count := range accumTimesOn("run", scheduleTimes) { fmt.Fprintf(os.Stdout, "placed_run|%f|%d\n", float64(count), time) } for time, count := range accumTimesOn("failed", scheduleTimes) { fmt.Fprintf(os.Stdout, "placed_failed|%f|%d\n", float64(count), time) } for time, count := range accumTimesOn("stop", scheduleTimes) { fmt.Fprintf(os.Stdout, "placed_stop|%f|%d\n", float64(count), time) } // Aggregate eval triggerbys. triggers := make(map[string]int, len(evals)) for _, eval := range evals { triggers[eval.TriggeredBy]++ } for trigger, count := range triggers { fmt.Fprintf(os.Stdout, "trigger:%s|%f\n", trigger, float64(count)) } // Print if the scheduler changed scheduling decisions flips := make(map[string]map[string]int64) // alloc id -> map[flipType]time flipTypes := make(map[string]struct{}) for id, decisions := range scheduleTimes { if len(decisions) < 2 { continue } // Have decision -> time // 1) time -> decision // 2) sort times // 3) print transitions flips[id] = make(map[string]int64) inverted := make(map[int64]string, len(decisions)) times := make([]int, 0, len(decisions)) for k, v := range decisions { inverted[v] = k times = append(times, int(v)) } sort.Ints(times) for i := 1; i < len(times); i++ { from := decisions[inverted[int64(times[i-1])]] to := decisions[inverted[int64(times[i])]] flipType := fmt.Sprintf("%s-to-%s", from, to) flips[id][flipType] = int64(times[i]) flipTypes[flipType] = struct{}{} } } for flipType, _ := range flips { for time, count := range accumTimesOn(flipType, flips) { fmt.Fprintf(os.Stdout, "%v|%f|%d\n", flipType, float64(count), time) } } return 0 }