// Finishes the job in the Buildkite Agent API. This call will keep on retrying // forever until it finally gets a successfull response from the API. func (r *JobRunner) finishJob(finishedAt time.Time, exitStatus string, failedChunkCount int) error { r.Job.FinishedAt = finishedAt.UTC().Format(time.RFC3339Nano) r.Job.ExitStatus = exitStatus r.Job.ChunksFailedCount = failedChunkCount return retry.Do(func(s *retry.Stats) error { response, err := r.APIClient.Jobs.Finish(r.Job) if err != nil { // If the API returns with a 422, that means that we // succesfully tried to finish the job, but Buildkite // rejected the finish for some reason. This can // sometimes mean that Buildkite has cancelled the job // before we get a chance to send the final API call // (maybe this agent took too long to kill the // process). In that case, we don't want to keep trying // to finish the job forever so we'll just bail out and // go find some more work to do. if response != nil && response.StatusCode == 422 { logger.Warn("Buildkite rejected the call to finish the job (%s)", err) s.Break() } else { logger.Warn("%s (%s)", err, s) } } return err }, &retry.Config{Forever: true, Interval: 1 * time.Second}) }
func (d Download) Start() error { return retry.Do(func(s *retry.Stats) error { err := d.try() if err != nil { logger.Warn("Error trying to download %s (%s) %s", d.URL, err, s) } return err }, &retry.Config{Maximum: d.Retries, Interval: 1 * time.Second}) }
func (a *ArtifactBatchCreator) Create() ([]*api.Artifact, error) { length := len(a.Artifacts) chunks := 30 // Split into the artifacts into chunks so we're not uploading a ton of // files at once. for i := 0; i < length; i += chunks { j := i + chunks if length < j { j = length } // The artifacts that will be uploaded in this chunk theseArtiacts := a.Artifacts[i:j] // An ID is required so Buildkite can ensure this create // operation is idompotent (if we try and upload the same ID // twice, it'll just return the previous data and skip the // upload) batch := &api.ArtifactBatch{api.NewUUID(), theseArtiacts, a.UploadDestination} logger.Info("Creating (%d-%d)/%d artifacts", i, j, length) var creation *api.ArtifactBatchCreateResponse var resp *api.Response var err error // Retry the batch upload a couple of times err = retry.Do(func(s *retry.Stats) error { creation, resp, err = a.APIClient.Artifacts.Create(a.JobID, batch) if resp != nil && (resp.StatusCode == 401 || resp.StatusCode == 404 || resp.StatusCode == 500) { s.Break() } if err != nil { logger.Warn("%s (%s)", err, s) } return err }, &retry.Config{Maximum: 10, Interval: 1 * time.Second}) // Did the batch creation eventually fail? if err != nil { return nil, err } // Save the id and instructions to each artifact index := 0 for _, id := range creation.ArtifactIDs { theseArtiacts[index].ID = id theseArtiacts[index].UploadInstructions = creation.UploadInstructions index += 1 } } return a.Artifacts, nil }
func (r *JobRunner) onUploadHeaderTime(cursor int, total int, times map[string]string) { retry.Do(func(s *retry.Stats) error { _, err := r.APIClient.HeaderTimes.Save(r.Job.ID, &api.HeaderTimes{Times: times}) if err != nil { logger.Warn("%s (%s)", err, s) } return err }, &retry.Config{Maximum: 10, Interval: 5 * time.Second}) }
// Connects the agent to the Buildkite Agent API, retrying up to 30 times if it // fails. func (a *AgentWorker) Connect() error { return retry.Do(func(s *retry.Stats) error { _, err := a.APIClient.Agents.Connect() if err != nil { logger.Warn("%s (%s)", err, s) } return err }, &retry.Config{Maximum: 10, Interval: 1 * time.Second}) }
// Connects the agent to the Buildkite Agent API, retrying up to 30 times if it // fails. func (a *AgentWorker) Connect() error { // Update the proc title a.UpdateProcTitle("connecting") return retry.Do(func(s *retry.Stats) error { _, err := a.APIClient.Agents.Connect() if err != nil { logger.Warn("%s (%s)", err, s) } return err }, &retry.Config{Maximum: 10, Interval: 1 * time.Second}) }
// Call when a chunk is ready for upload. It retry the chunk upload with an // interval before giving up. func (r *JobRunner) onUploadChunk(chunk *LogStreamerChunk) error { return retry.Do(func(s *retry.Stats) error { _, err := r.APIClient.Chunks.Upload(r.Job.ID, &api.Chunk{ Data: chunk.Data, Sequence: chunk.Order, }) if err != nil { logger.Warn("%s (%s)", err, s) } return err }, &retry.Config{Maximum: 10, Interval: 1 * time.Second}) }
// Starts the job in the Buildkite Agent API. We'll retry on connection-related // issues, but if a connection succeeds and we get an error response back from // Buildkite, we won't bother retrying. For example, a "no such host" will // retry, but a 422 from Buildkite won't. func (r *JobRunner) startJob(startedAt time.Time) error { r.Job.StartedAt = startedAt.UTC().Format(time.RFC3339Nano) return retry.Do(func(s *retry.Stats) error { _, err := r.APIClient.Jobs.Start(r.Job) if err != nil { if api.IsRetryableError(err) { logger.Warn("%s (%s)", err, s) } else { logger.Warn("Buildkite rejected the call to start the job (%s)", err) s.Break() } } return err }, &retry.Config{Maximum: 30, Interval: 5 * time.Second}) }
// Performs a heatbeat func (a *AgentWorker) Heartbeat() error { var beat *api.Heartbeat var err error // Retry the heartbeat a few times err = retry.Do(func(s *retry.Stats) error { beat, _, err = a.APIClient.Heartbeats.Beat() if err != nil { logger.Warn("%s (%s)", err, s) } return err }, &retry.Config{Maximum: 5, Interval: 1 * time.Second}) if err != nil { return err } logger.Debug("Heartbeat sent at %s and received at %s", beat.SentAt, beat.ReceivedAt) return nil }
func (b *Buildkite) WriteJobMetadata(metadata map[string]string) error { client := b.agentClient for k, v := range metadata { metadatum := &buildkite.MetaData{ Key: k, Value: v, } err := retry.Do(func(s *retry.Stats) error { resp, err := client.MetaData.Set(b.jobId, metadatum) if resp != nil && (resp.StatusCode == 401 || resp.StatusCode == 404) { s.Break() } return err }, &retry.Config{Maximum: 10, Interval: 1 * time.Second}) if err != nil { return fmt.Errorf("error setting metadata %s: %s", k, err) } } return nil }
func (a *ArtifactBatchCreator) Create() ([]*api.Artifact, error) { length := len(a.Artifacts) chunks := 10 uploaded := []*api.Artifact{} // Split into the artifacts into chunks so we're not uploading a ton of // files at once. for i := 0; i < length; i += chunks { j := i + chunks if length < j { j = length } artifacts := a.Artifacts[i:j] logger.Info("Creating (%d-%d)/%d artifacts", i, j, length) var u []*api.Artifact var err error // Retry the batch upload a couple of times err = retry.Do(func(s *retry.Stats) error { u, _, err = a.APIClient.Artifacts.Create(a.JobID, artifacts) if err != nil { logger.Warn("%s (%s)", err, s) } return err }, &retry.Config{Maximum: 10, Interval: 1 * time.Second}) if err != nil { return nil, err } uploaded = append(uploaded, u...) } return uploaded, nil }
// Takes the agent template and returns a registered agent. The registered // agent includes the Access Token used to communicate with the Buildkite Agent // API func (r *AgentPool) RegisterAgent(agent *api.Agent) (*api.Agent, error) { var registered *api.Agent var err error var resp *api.Response register := func(s *retry.Stats) error { registered, resp, err = r.APIClient.Agents.Register(agent) if err != nil { if resp != nil && resp.StatusCode == 401 { logger.Warn("Buildkite rejected the registration (%s)", err) s.Break() } else { logger.Warn("%s (%s)", err, s) } } return err } err = retry.Do(register, &retry.Config{Maximum: 30, Interval: 1 * time.Second}) return registered, err }
func (a *ArtifactUploader) upload(artifacts []*api.Artifact) error { var uploader Uploader // Determine what uploader to use if a.Destination != "" { if strings.HasPrefix(a.Destination, "s3://") { uploader = new(S3Uploader) } else { return errors.New("Unknown upload destination: " + a.Destination) } } else { uploader = new(FormUploader) } // Setup the uploader err := uploader.Setup(a.Destination, a.APIClient.DebugHTTP) if err != nil { return err } // Set the URL's of the artifacts based on the uploader for _, artifact := range artifacts { artifact.URL = uploader.URL(artifact) } // Create the artifacts on Buildkite batchCreator := ArtifactBatchCreator{ APIClient: a.APIClient, JobID: a.JobID, Artifacts: artifacts, UploadDestination: a.Destination, } artifacts, err = batchCreator.Create() if err != nil { return err } // Prepare a concurrency pool to upload the artifacts p := pool.New(pool.MaxConcurrencyLimit) errors := []error{} // Create a wait group so we can make sure the uploader waits for all // the artifact states to upload before finishing var stateUploaderWaitGroup sync.WaitGroup stateUploaderWaitGroup.Add(1) // A map to keep track of artifact states and how many we've uploaded artifactsStates := make(map[string]string) artifactStatesUploaded := 0 // Spin up a gourtine that'll uploading artifact statuses every few // seconds in batches go func() { for artifactStatesUploaded < len(artifacts) { statesToUpload := make(map[string]string) // Grab all the states we need to upload, and remove // them from the tracking map for id, state := range artifactsStates { statesToUpload[id] = state delete(artifactsStates, id) } if len(statesToUpload) > 0 { artifactStatesUploaded += len(statesToUpload) for id, state := range statesToUpload { logger.Debug("Artifact `%s` has state `%s`", id, state) } // Update the states of the artifacts in bulk. err = retry.Do(func(s *retry.Stats) error { _, err = a.APIClient.Artifacts.Update(a.JobID, statesToUpload) if err != nil { logger.Warn("%s (%s)", err, s) } return err }, &retry.Config{Maximum: 10, Interval: 1 * time.Second}) if err != nil { logger.Error("Error uploading artifact states: %s", err) // Track the error that was raised p.Lock() errors = append(errors, err) p.Unlock() } logger.Debug("Uploaded %d artfact states (%d/%d)", len(statesToUpload), artifactStatesUploaded, len(artifacts)) } // Check again for states to upload in a few seconds time.Sleep(1 * time.Second) } stateUploaderWaitGroup.Done() }() for _, artifact := range artifacts { // Create new instance of the artifact for the goroutine // See: http://golang.org/doc/effective_go.html#channels artifact := artifact p.Spawn(func() { // Show a nice message that we're starting to upload the file logger.Info("Uploading \"%s\" %d bytes", artifact.Path, artifact.FileSize) // Upload the artifact and then set the state depending // on whether or not it passed. We'll retry the upload // a couple of times before giving up. err = retry.Do(func(s *retry.Stats) error { err := uploader.Upload(artifact) if err != nil { logger.Warn("%s (%s)", err, s) } return err }, &retry.Config{Maximum: 10, Interval: 1 * time.Second}) var state string // Did the upload eventually fail? if err != nil { logger.Error("Error uploading artifact \"%s\": %s", artifact.Path, err) // Track the error that was raised p.Lock() errors = append(errors, err) p.Unlock() state = "error" } else { state = "finished" } artifactsStates[artifact.ID] = state }) } // Wait for the pool to finish p.Wait() // Wait for the statuses to finish uploading stateUploaderWaitGroup.Wait() if len(errors) > 0 { logger.Fatal("There were errors with uploading some of the artifacts") } return nil }
} } // Create the API client client := agent.APIClient{ Endpoint: cfg.Endpoint, Token: cfg.AgentAccessToken, }.Create() // Generate a UUID that will identifiy this pipeline change. We // do this outside of the retry loop because we want this UUID // to be the same for each attempt at updating the pipeline. uuid := api.NewUUID() // Retry the pipeline upload a few times before giving up err = retry.Do(func(s *retry.Stats) error { _, err = client.Pipelines.Upload(cfg.Job, &api.Pipeline{UUID: uuid, Data: input, FileName: filename, Replace: cfg.Replace}) if err != nil { logger.Warn("%s (%s)", err, s) } return err }, &retry.Config{Maximum: 5, Interval: 1 * time.Second}) if err != nil { logger.Fatal("Failed to upload and process pipeline: %s", err) } logger.Info("Successfully uploaded and parsed pipeline config") }, }
// Create the API client client := agent.APIClient{ Endpoint: cfg.Endpoint, Token: cfg.AgentAccessToken, }.Create() // Find the meta data value var metaData *api.MetaData var err error var resp *api.Response err = retry.Do(func(s *retry.Stats) error { metaData, resp, err = client.MetaData.Get(cfg.Job, cfg.Key) // Don't bother retrying if the response was one of these statuses if resp != nil && (resp.StatusCode == 401 || resp.StatusCode == 404 || resp.StatusCode == 400) { s.Break() } if err != nil { logger.Warn("%s (%s)", err, s) } return err }, &retry.Config{Maximum: 10, Interval: 5 * time.Second}) if err != nil { logger.Fatal("Failed to get meta-data: %s", err) } // Output the value to STDOUT fmt.Print(metaData.Value) }, }
// Create the API client client := agent.APIClient{ Endpoint: cfg.Endpoint, Token: cfg.AgentAccessToken, }.Create() // Create the meta data to set metaData := &api.MetaData{ Key: cfg.Key, Value: cfg.Value, } // Set the meta data err := retry.Do(func(s *retry.Stats) error { resp, err := client.MetaData.Set(cfg.Job, metaData) if resp != nil && (resp.StatusCode == 401 || resp.StatusCode == 404) { s.Break() } if err != nil { logger.Warn("%s (%s)", err, s) } return err }, &retry.Config{Maximum: 10, Interval: 1 * time.Second}) if err != nil { logger.Fatal("Failed to set meta-data: %s", err) } }, }
func (a *ArtifactUploader) upload(artifacts []*api.Artifact) error { var uploader Uploader // Determine what uploader to use if a.Destination != "" { if strings.HasPrefix(a.Destination, "s3://") { uploader = new(S3Uploader) } else { return errors.New("Unknown upload destination: " + a.Destination) } } else { uploader = new(FormUploader) } // Setup the uploader err := uploader.Setup(a.Destination) if err != nil { return err } // Set the URL's of the artifacts based on the uploader for _, artifact := range artifacts { artifact.URL = uploader.URL(artifact) } // Create the artifacts on Buildkite batchCreator := ArtifactBatchCreator{ APIClient: a.APIClient, JobID: a.JobID, Artifacts: artifacts, } artifacts, err = batchCreator.Create() if err != nil { return err } p := pool.New(pool.MaxConcurrencyLimit) errors := []error{} for _, artifact := range artifacts { // Create new instance of the artifact for the goroutine // See: http://golang.org/doc/effective_go.html#channels artifact := artifact p.Spawn(func() { // Show a nice message that we're starting to upload the file logger.Info("Uploading \"%s\" %d bytes", artifact.Path, artifact.FileSize) // Upload the artifact and then set the state depending // on whether or not it passed. We'll retry the upload // a couple of times before giving up. err = retry.Do(func(s *retry.Stats) error { err := uploader.Upload(artifact) if err != nil { logger.Warn("%s (%s)", err, s) } return err }, &retry.Config{Maximum: 10, Interval: 1 * time.Second}) if err != nil { artifact.State = "error" logger.Error("Error uploading artifact \"%s\": %s", artifact.Path, err) // Track the error that was raised p.Lock() errors = append(errors, err) p.Unlock() } else { artifact.State = "finished" } // Update the state of the artifact on Buildkite, we // retry this as well. err = retry.Do(func(s *retry.Stats) error { _, _, err = a.APIClient.Artifacts.Update(a.JobID, artifact) if err != nil { logger.Warn("%s (%s)", err, s) } return err }, &retry.Config{Maximum: 10, Interval: 1 * time.Second}) if err != nil { logger.Error("Error marking artifact %s as uploaded: %s", artifact.Path, err) // Track the error that was raised p.Lock() errors = append(errors, err) p.Unlock() } }) } p.Wait() if len(errors) > 0 { logger.Fatal("There were errors with uploading some of the artifacts") } return nil }
// Performs a ping, which returns what action the agent should take next. func (a *AgentWorker) Ping() { // Update the proc title a.UpdateProcTitle("pinging") ping, _, err := a.APIClient.Pings.Get() if err != nil { // If a ping fails, we don't really care, because it'll // ping again after the interval. logger.Warn("Failed to ping: %s", err) return } // Should we switch endpoints? if ping.Endpoint != "" && ping.Endpoint != a.Agent.Endpoint { // Before switching to the new one, do a ping test to make sure it's // valid. If it is, switch and carry on, otherwise ignore the switch // for now. newAPIClient := APIClient{Endpoint: ping.Endpoint, Token: a.Agent.AccessToken}.Create() newPing, _, err := newAPIClient.Pings.Get() if err != nil { logger.Warn("Failed to ping the new endpoint %s - ignoring switch for now (%s)", ping.Endpoint, err) } else { // Replace the APIClient and process the new ping a.APIClient = newAPIClient a.Agent.Endpoint = ping.Endpoint ping = newPing } } // Is there a message that should be shown in the logs? if ping.Message != "" { logger.Info(ping.Message) } // Should the agent disconnect? if ping.Action == "disconnect" { a.Stop(false) return } // If we don't have a job, there's nothing to do! if ping.Job == nil { // Update the proc title a.UpdateProcTitle("idle") return } // Update the proc title a.UpdateProcTitle(fmt.Sprintf("job %s", strings.Split(ping.Job.ID, "-")[0])) logger.Info("Assigned job %s. Accepting...", ping.Job.ID) // Accept the job. We'll retry on connection related issues, but if // Buildkite returns a 422 or 500 for example, we'll just bail out, // re-ping, and try the whole process again. var accepted *api.Job retry.Do(func(s *retry.Stats) error { accepted, _, err = a.APIClient.Jobs.Accept(ping.Job) if err != nil { if api.IsRetryableError(err) { logger.Warn("%s (%s)", err, s) } else { logger.Warn("Buildkite rejected the call to accept the job (%s)", err) s.Break() } } return err }, &retry.Config{Maximum: 30, Interval: 1 * time.Second}) // If `accepted` is nil, then the job was never accepted if accepted == nil { logger.Error("Failed to accept job") return } // Now that the job has been accepted, we can start it. a.jobRunner, err = JobRunner{ Endpoint: accepted.Endpoint, Agent: a.Agent, AgentConfiguration: a.AgentConfiguration, Job: accepted, }.Create() // Was there an error creating the job runner? if err != nil { logger.Error("Failed to initialize job: %s", err) return } // Start running the job if err = a.jobRunner.Run(); err != nil { logger.Error("Failed to run job: %s", err) } // No more job, no more runner. a.jobRunner = nil }
// PostProcess sends the Artifact to Buildkite func (p *PostProcessor) PostProcess(ui packer.Ui, artifact packer.Artifact) (packer.Artifact, bool, error) { buildkiteEnabled := os.Getenv("BUILDKITE") if buildkiteEnabled != "true" { ui.Message("This Packer build is not run within Buildkite. Skipping the BuildKite post-processor.") return artifact, true, nil } client := p.client.Create() metadata := make([]buildkite.MetaData, 0, 2) builderID := artifact.BuilderId() // packer's artifact model doesn't support AMIs very well since // both a region and an id need to be encoded into a single field. // We handle it as a special case so we can make more useful metadata // in the buildkite job. // N.B. this doesn't support AMIs being created across multiple regions. if builderID == "mitchellh.amazonebs" || builderID == "mitchellh.amazoninstance" || builderID == "mitchellh.amazonchroot" { packedID := artifact.Id() parts := strings.SplitN(packedID, ":", 2) idKeyName := "artifact_ami_id" regionKeyName := "artifact_ami_region" // override key names with an optional prefixed version if p.config.Prefix != "" { idKeyName = fmt.Sprintf("%s_id", p.config.Prefix) regionKeyName = fmt.Sprintf("%s_region", p.config.Prefix) } metadata = append(metadata, buildkite.MetaData{ Key: idKeyName, Value: parts[1], }) metadata = append(metadata, buildkite.MetaData{ Key: regionKeyName, Value: parts[0], }) } else { id := artifact.Id() idKeyName := "artifact_id" // override key name with an optional prefixed version if p.config.Prefix != "" { idKeyName = fmt.Sprintf("%s_id", p.config.Prefix) } if id != "" { metadata = append(metadata, buildkite.MetaData{ Key: idKeyName, Value: id, }) } // TODO: Support uploading files as BuildKite artifacts } if len(metadata) > 0 { ui.Message("Setting metadata in BuildKite:") for _, item := range metadata { ui.Message(fmt.Sprintf("- %s: %s", item.Key, item.Value)) err := retry.Do(func(s *retry.Stats) error { resp, err := client.MetaData.Set(p.jobID, &item) if resp != nil && (resp.StatusCode == 401 || resp.StatusCode == 404) { s.Break() } if err != nil { log.Println(err) } return err }, &retry.Config{Maximum: 10, Interval: 1 * time.Second}) if err != nil { return nil, false, err } } } return artifact, true, nil }