Пример #1
0
// Finishes the job in the Buildkite Agent API. This call will keep on retrying
// forever until it finally gets a successfull response from the API.
func (r *JobRunner) finishJob(finishedAt time.Time, exitStatus string, failedChunkCount int) error {
	r.Job.FinishedAt = finishedAt.UTC().Format(time.RFC3339Nano)
	r.Job.ExitStatus = exitStatus
	r.Job.ChunksFailedCount = failedChunkCount

	return retry.Do(func(s *retry.Stats) error {
		response, err := r.APIClient.Jobs.Finish(r.Job)
		if err != nil {
			// If the API returns with a 422, that means that we
			// succesfully tried to finish the job, but Buildkite
			// rejected the finish for some reason. This can
			// sometimes mean that Buildkite has cancelled the job
			// before we get a chance to send the final API call
			// (maybe this agent took too long to kill the
			// process). In that case, we don't want to keep trying
			// to finish the job forever so we'll just bail out and
			// go find some more work to do.
			if response != nil && response.StatusCode == 422 {
				logger.Warn("Buildkite rejected the call to finish the job (%s)", err)
				s.Break()
			} else {
				logger.Warn("%s (%s)", err, s)
			}
		}

		return err
	}, &retry.Config{Forever: true, Interval: 1 * time.Second})
}
Пример #2
0
func (d Download) Start() error {
	return retry.Do(func(s *retry.Stats) error {
		err := d.try()
		if err != nil {
			logger.Warn("Error trying to download %s (%s) %s", d.URL, err, s)
		}
		return err
	}, &retry.Config{Maximum: d.Retries, Interval: 1 * time.Second})
}
func (a *ArtifactBatchCreator) Create() ([]*api.Artifact, error) {
	length := len(a.Artifacts)
	chunks := 30

	// Split into the artifacts into chunks so we're not uploading a ton of
	// files at once.
	for i := 0; i < length; i += chunks {
		j := i + chunks
		if length < j {
			j = length
		}

		// The artifacts that will be uploaded in this chunk
		theseArtiacts := a.Artifacts[i:j]

		// An ID is required so Buildkite can ensure this create
		// operation is idompotent (if we try and upload the same ID
		// twice, it'll just return the previous data and skip the
		// upload)
		batch := &api.ArtifactBatch{api.NewUUID(), theseArtiacts, a.UploadDestination}

		logger.Info("Creating (%d-%d)/%d artifacts", i, j, length)

		var creation *api.ArtifactBatchCreateResponse
		var resp *api.Response
		var err error

		// Retry the batch upload a couple of times
		err = retry.Do(func(s *retry.Stats) error {
			creation, resp, err = a.APIClient.Artifacts.Create(a.JobID, batch)
			if resp != nil && (resp.StatusCode == 401 || resp.StatusCode == 404 || resp.StatusCode == 500) {
				s.Break()
			}
			if err != nil {
				logger.Warn("%s (%s)", err, s)
			}

			return err
		}, &retry.Config{Maximum: 10, Interval: 1 * time.Second})

		// Did the batch creation eventually fail?
		if err != nil {
			return nil, err
		}

		// Save the id and instructions to each artifact
		index := 0
		for _, id := range creation.ArtifactIDs {
			theseArtiacts[index].ID = id
			theseArtiacts[index].UploadInstructions = creation.UploadInstructions
			index += 1
		}
	}

	return a.Artifacts, nil
}
Пример #4
0
func (r *JobRunner) onUploadHeaderTime(cursor int, total int, times map[string]string) {
	retry.Do(func(s *retry.Stats) error {
		_, err := r.APIClient.HeaderTimes.Save(r.Job.ID, &api.HeaderTimes{Times: times})
		if err != nil {
			logger.Warn("%s (%s)", err, s)
		}

		return err
	}, &retry.Config{Maximum: 10, Interval: 5 * time.Second})
}
Пример #5
0
// Connects the agent to the Buildkite Agent API, retrying up to 30 times if it
// fails.
func (a *AgentWorker) Connect() error {
	return retry.Do(func(s *retry.Stats) error {
		_, err := a.APIClient.Agents.Connect()
		if err != nil {
			logger.Warn("%s (%s)", err, s)
		}

		return err
	}, &retry.Config{Maximum: 10, Interval: 1 * time.Second})
}
Пример #6
0
// Connects the agent to the Buildkite Agent API, retrying up to 30 times if it
// fails.
func (a *AgentWorker) Connect() error {
	// Update the proc title
	a.UpdateProcTitle("connecting")

	return retry.Do(func(s *retry.Stats) error {
		_, err := a.APIClient.Agents.Connect()
		if err != nil {
			logger.Warn("%s (%s)", err, s)
		}

		return err
	}, &retry.Config{Maximum: 10, Interval: 1 * time.Second})
}
Пример #7
0
// Call when a chunk is ready for upload. It retry the chunk upload with an
// interval before giving up.
func (r *JobRunner) onUploadChunk(chunk *LogStreamerChunk) error {
	return retry.Do(func(s *retry.Stats) error {
		_, err := r.APIClient.Chunks.Upload(r.Job.ID, &api.Chunk{
			Data:     chunk.Data,
			Sequence: chunk.Order,
		})
		if err != nil {
			logger.Warn("%s (%s)", err, s)
		}

		return err
	}, &retry.Config{Maximum: 10, Interval: 1 * time.Second})
}
Пример #8
0
// Starts the job in the Buildkite Agent API. We'll retry on connection-related
// issues, but if a connection succeeds and we get an error response back from
// Buildkite, we won't bother retrying. For example, a "no such host" will
// retry, but a 422 from Buildkite won't.
func (r *JobRunner) startJob(startedAt time.Time) error {
	r.Job.StartedAt = startedAt.UTC().Format(time.RFC3339Nano)

	return retry.Do(func(s *retry.Stats) error {
		_, err := r.APIClient.Jobs.Start(r.Job)

		if err != nil {
			if api.IsRetryableError(err) {
				logger.Warn("%s (%s)", err, s)
			} else {
				logger.Warn("Buildkite rejected the call to start the job (%s)", err)
				s.Break()
			}
		}

		return err
	}, &retry.Config{Maximum: 30, Interval: 5 * time.Second})
}
Пример #9
0
// Performs a heatbeat
func (a *AgentWorker) Heartbeat() error {
	var beat *api.Heartbeat
	var err error

	// Retry the heartbeat a few times
	err = retry.Do(func(s *retry.Stats) error {
		beat, _, err = a.APIClient.Heartbeats.Beat()
		if err != nil {
			logger.Warn("%s (%s)", err, s)
		}
		return err
	}, &retry.Config{Maximum: 5, Interval: 1 * time.Second})

	if err != nil {
		return err
	}

	logger.Debug("Heartbeat sent at %s and received at %s", beat.SentAt, beat.ReceivedAt)
	return nil
}
Пример #10
0
func (b *Buildkite) WriteJobMetadata(metadata map[string]string) error {
	client := b.agentClient
	for k, v := range metadata {
		metadatum := &buildkite.MetaData{
			Key:   k,
			Value: v,
		}
		err := retry.Do(func(s *retry.Stats) error {
			resp, err := client.MetaData.Set(b.jobId, metadatum)
			if resp != nil && (resp.StatusCode == 401 || resp.StatusCode == 404) {
				s.Break()
			}

			return err
		}, &retry.Config{Maximum: 10, Interval: 1 * time.Second})

		if err != nil {
			return fmt.Errorf("error setting metadata %s: %s", k, err)
		}
	}
	return nil
}
Пример #11
0
func (a *ArtifactBatchCreator) Create() ([]*api.Artifact, error) {
	length := len(a.Artifacts)
	chunks := 10
	uploaded := []*api.Artifact{}

	// Split into the artifacts into chunks so we're not uploading a ton of
	// files at once.
	for i := 0; i < length; i += chunks {
		j := i + chunks
		if length < j {
			j = length
		}

		artifacts := a.Artifacts[i:j]

		logger.Info("Creating (%d-%d)/%d artifacts", i, j, length)

		var u []*api.Artifact
		var err error

		// Retry the batch upload a couple of times
		err = retry.Do(func(s *retry.Stats) error {
			u, _, err = a.APIClient.Artifacts.Create(a.JobID, artifacts)
			if err != nil {
				logger.Warn("%s (%s)", err, s)
			}

			return err
		}, &retry.Config{Maximum: 10, Interval: 1 * time.Second})
		if err != nil {
			return nil, err
		}

		uploaded = append(uploaded, u...)
	}

	return uploaded, nil
}
Пример #12
0
// Takes the agent template and returns a registered agent. The registered
// agent includes the Access Token used to communicate with the Buildkite Agent
// API
func (r *AgentPool) RegisterAgent(agent *api.Agent) (*api.Agent, error) {
	var registered *api.Agent
	var err error
	var resp *api.Response

	register := func(s *retry.Stats) error {
		registered, resp, err = r.APIClient.Agents.Register(agent)
		if err != nil {
			if resp != nil && resp.StatusCode == 401 {
				logger.Warn("Buildkite rejected the registration (%s)", err)
				s.Break()
			} else {
				logger.Warn("%s (%s)", err, s)
			}
		}

		return err
	}

	err = retry.Do(register, &retry.Config{Maximum: 30, Interval: 1 * time.Second})

	return registered, err
}
Пример #13
0
func (a *ArtifactUploader) upload(artifacts []*api.Artifact) error {
	var uploader Uploader

	// Determine what uploader to use
	if a.Destination != "" {
		if strings.HasPrefix(a.Destination, "s3://") {
			uploader = new(S3Uploader)
		} else {
			return errors.New("Unknown upload destination: " + a.Destination)
		}
	} else {
		uploader = new(FormUploader)
	}

	// Setup the uploader
	err := uploader.Setup(a.Destination, a.APIClient.DebugHTTP)
	if err != nil {
		return err
	}

	// Set the URL's of the artifacts based on the uploader
	for _, artifact := range artifacts {
		artifact.URL = uploader.URL(artifact)
	}

	// Create the artifacts on Buildkite
	batchCreator := ArtifactBatchCreator{
		APIClient:         a.APIClient,
		JobID:             a.JobID,
		Artifacts:         artifacts,
		UploadDestination: a.Destination,
	}
	artifacts, err = batchCreator.Create()
	if err != nil {
		return err
	}

	// Prepare a concurrency pool to upload the artifacts
	p := pool.New(pool.MaxConcurrencyLimit)
	errors := []error{}

	// Create a wait group so we can make sure the uploader waits for all
	// the artifact states to upload before finishing
	var stateUploaderWaitGroup sync.WaitGroup
	stateUploaderWaitGroup.Add(1)

	// A map to keep track of artifact states and how many we've uploaded
	artifactsStates := make(map[string]string)
	artifactStatesUploaded := 0

	// Spin up a gourtine that'll uploading artifact statuses every few
	// seconds in batches
	go func() {
		for artifactStatesUploaded < len(artifacts) {
			statesToUpload := make(map[string]string)

			// Grab all the states we need to upload, and remove
			// them from the tracking map
			for id, state := range artifactsStates {
				statesToUpload[id] = state
				delete(artifactsStates, id)
			}

			if len(statesToUpload) > 0 {
				artifactStatesUploaded += len(statesToUpload)
				for id, state := range statesToUpload {
					logger.Debug("Artifact `%s` has state `%s`", id, state)
				}

				// Update the states of the artifacts in bulk.
				err = retry.Do(func(s *retry.Stats) error {
					_, err = a.APIClient.Artifacts.Update(a.JobID, statesToUpload)
					if err != nil {
						logger.Warn("%s (%s)", err, s)
					}

					return err
				}, &retry.Config{Maximum: 10, Interval: 1 * time.Second})

				if err != nil {
					logger.Error("Error uploading artifact states: %s", err)

					// Track the error that was raised
					p.Lock()
					errors = append(errors, err)
					p.Unlock()
				}

				logger.Debug("Uploaded %d artfact states (%d/%d)", len(statesToUpload), artifactStatesUploaded, len(artifacts))
			}

			// Check again for states to upload in a few seconds
			time.Sleep(1 * time.Second)
		}

		stateUploaderWaitGroup.Done()
	}()

	for _, artifact := range artifacts {
		// Create new instance of the artifact for the goroutine
		// See: http://golang.org/doc/effective_go.html#channels
		artifact := artifact

		p.Spawn(func() {
			// Show a nice message that we're starting to upload the file
			logger.Info("Uploading \"%s\" %d bytes", artifact.Path, artifact.FileSize)

			// Upload the artifact and then set the state depending
			// on whether or not it passed. We'll retry the upload
			// a couple of times before giving up.
			err = retry.Do(func(s *retry.Stats) error {
				err := uploader.Upload(artifact)
				if err != nil {
					logger.Warn("%s (%s)", err, s)
				}

				return err
			}, &retry.Config{Maximum: 10, Interval: 1 * time.Second})

			var state string

			// Did the upload eventually fail?
			if err != nil {
				logger.Error("Error uploading artifact \"%s\": %s", artifact.Path, err)

				// Track the error that was raised
				p.Lock()
				errors = append(errors, err)
				p.Unlock()

				state = "error"
			} else {
				state = "finished"
			}

			artifactsStates[artifact.ID] = state
		})
	}

	// Wait for the pool to finish
	p.Wait()

	// Wait for the statuses to finish uploading
	stateUploaderWaitGroup.Wait()

	if len(errors) > 0 {
		logger.Fatal("There were errors with uploading some of the artifacts")
	}

	return nil
}
Пример #14
0
			}
		}

		// Create the API client
		client := agent.APIClient{
			Endpoint: cfg.Endpoint,
			Token:    cfg.AgentAccessToken,
		}.Create()

		// Generate a UUID that will identifiy this pipeline change. We
		// do this outside of the retry loop because we want this UUID
		// to be the same for each attempt at updating the pipeline.
		uuid := api.NewUUID()

		// Retry the pipeline upload a few times before giving up
		err = retry.Do(func(s *retry.Stats) error {
			_, err = client.Pipelines.Upload(cfg.Job, &api.Pipeline{UUID: uuid, Data: input, FileName: filename, Replace: cfg.Replace})
			if err != nil {
				logger.Warn("%s (%s)", err, s)
			}

			return err
		}, &retry.Config{Maximum: 5, Interval: 1 * time.Second})
		if err != nil {
			logger.Fatal("Failed to upload and process pipeline: %s", err)
		}

		logger.Info("Successfully uploaded and parsed pipeline config")
	},
}
Пример #15
0
		// Create the API client
		client := agent.APIClient{
			Endpoint: cfg.Endpoint,
			Token:    cfg.AgentAccessToken,
		}.Create()

		// Find the meta data value
		var metaData *api.MetaData
		var err error
		var resp *api.Response
		err = retry.Do(func(s *retry.Stats) error {
			metaData, resp, err = client.MetaData.Get(cfg.Job, cfg.Key)
			// Don't bother retrying if the response was one of these statuses
			if resp != nil && (resp.StatusCode == 401 || resp.StatusCode == 404 || resp.StatusCode == 400) {
				s.Break()
			}
			if err != nil {
				logger.Warn("%s (%s)", err, s)
			}

			return err
		}, &retry.Config{Maximum: 10, Interval: 5 * time.Second})
		if err != nil {
			logger.Fatal("Failed to get meta-data: %s", err)
		}

		// Output the value to STDOUT
		fmt.Print(metaData.Value)
	},
}
Пример #16
0
		// Create the API client
		client := agent.APIClient{
			Endpoint: cfg.Endpoint,
			Token:    cfg.AgentAccessToken,
		}.Create()

		// Create the meta data to set
		metaData := &api.MetaData{
			Key:   cfg.Key,
			Value: cfg.Value,
		}

		// Set the meta data
		err := retry.Do(func(s *retry.Stats) error {
			resp, err := client.MetaData.Set(cfg.Job, metaData)
			if resp != nil && (resp.StatusCode == 401 || resp.StatusCode == 404) {
				s.Break()
			}
			if err != nil {
				logger.Warn("%s (%s)", err, s)
			}

			return err
		}, &retry.Config{Maximum: 10, Interval: 1 * time.Second})
		if err != nil {
			logger.Fatal("Failed to set meta-data: %s", err)
		}
	},
}
Пример #17
0
func (a *ArtifactUploader) upload(artifacts []*api.Artifact) error {
	var uploader Uploader

	// Determine what uploader to use
	if a.Destination != "" {
		if strings.HasPrefix(a.Destination, "s3://") {
			uploader = new(S3Uploader)
		} else {
			return errors.New("Unknown upload destination: " + a.Destination)
		}
	} else {
		uploader = new(FormUploader)
	}

	// Setup the uploader
	err := uploader.Setup(a.Destination)
	if err != nil {
		return err
	}

	// Set the URL's of the artifacts based on the uploader
	for _, artifact := range artifacts {
		artifact.URL = uploader.URL(artifact)
	}

	// Create the artifacts on Buildkite
	batchCreator := ArtifactBatchCreator{
		APIClient: a.APIClient,
		JobID:     a.JobID,
		Artifacts: artifacts,
	}
	artifacts, err = batchCreator.Create()
	if err != nil {
		return err
	}

	p := pool.New(pool.MaxConcurrencyLimit)
	errors := []error{}

	for _, artifact := range artifacts {
		// Create new instance of the artifact for the goroutine
		// See: http://golang.org/doc/effective_go.html#channels
		artifact := artifact

		p.Spawn(func() {
			// Show a nice message that we're starting to upload the file
			logger.Info("Uploading \"%s\" %d bytes", artifact.Path, artifact.FileSize)

			// Upload the artifact and then set the state depending
			// on whether or not it passed. We'll retry the upload
			// a couple of times before giving up.
			err = retry.Do(func(s *retry.Stats) error {
				err := uploader.Upload(artifact)
				if err != nil {
					logger.Warn("%s (%s)", err, s)
				}

				return err
			}, &retry.Config{Maximum: 10, Interval: 1 * time.Second})
			if err != nil {
				artifact.State = "error"
				logger.Error("Error uploading artifact \"%s\": %s", artifact.Path, err)

				// Track the error that was raised
				p.Lock()
				errors = append(errors, err)
				p.Unlock()
			} else {
				artifact.State = "finished"
			}

			// Update the state of the artifact on Buildkite, we
			// retry this as well.
			err = retry.Do(func(s *retry.Stats) error {
				_, _, err = a.APIClient.Artifacts.Update(a.JobID, artifact)
				if err != nil {
					logger.Warn("%s (%s)", err, s)
				}

				return err
			}, &retry.Config{Maximum: 10, Interval: 1 * time.Second})
			if err != nil {
				logger.Error("Error marking artifact %s as uploaded: %s", artifact.Path, err)

				// Track the error that was raised
				p.Lock()
				errors = append(errors, err)
				p.Unlock()
			}
		})
	}

	p.Wait()

	if len(errors) > 0 {
		logger.Fatal("There were errors with uploading some of the artifacts")
	}

	return nil
}
Пример #18
0
// Performs a ping, which returns what action the agent should take next.
func (a *AgentWorker) Ping() {
	// Update the proc title
	a.UpdateProcTitle("pinging")

	ping, _, err := a.APIClient.Pings.Get()
	if err != nil {
		// If a ping fails, we don't really care, because it'll
		// ping again after the interval.
		logger.Warn("Failed to ping: %s", err)
		return
	}

	// Should we switch endpoints?
	if ping.Endpoint != "" && ping.Endpoint != a.Agent.Endpoint {
		// Before switching to the new one, do a ping test to make sure it's
		// valid. If it is, switch and carry on, otherwise ignore the switch
		// for now.
		newAPIClient := APIClient{Endpoint: ping.Endpoint, Token: a.Agent.AccessToken}.Create()
		newPing, _, err := newAPIClient.Pings.Get()
		if err != nil {
			logger.Warn("Failed to ping the new endpoint %s - ignoring switch for now (%s)", ping.Endpoint, err)
		} else {
			// Replace the APIClient and process the new ping
			a.APIClient = newAPIClient
			a.Agent.Endpoint = ping.Endpoint
			ping = newPing
		}
	}

	// Is there a message that should be shown in the logs?
	if ping.Message != "" {
		logger.Info(ping.Message)
	}

	// Should the agent disconnect?
	if ping.Action == "disconnect" {
		a.Stop(false)
		return
	}

	// If we don't have a job, there's nothing to do!
	if ping.Job == nil {
		// Update the proc title
		a.UpdateProcTitle("idle")

		return
	}

	// Update the proc title
	a.UpdateProcTitle(fmt.Sprintf("job %s", strings.Split(ping.Job.ID, "-")[0]))

	logger.Info("Assigned job %s. Accepting...", ping.Job.ID)

	// Accept the job. We'll retry on connection related issues, but if
	// Buildkite returns a 422 or 500 for example, we'll just bail out,
	// re-ping, and try the whole process again.
	var accepted *api.Job
	retry.Do(func(s *retry.Stats) error {
		accepted, _, err = a.APIClient.Jobs.Accept(ping.Job)

		if err != nil {
			if api.IsRetryableError(err) {
				logger.Warn("%s (%s)", err, s)
			} else {
				logger.Warn("Buildkite rejected the call to accept the job (%s)", err)
				s.Break()
			}
		}

		return err
	}, &retry.Config{Maximum: 30, Interval: 1 * time.Second})

	// If `accepted` is nil, then the job was never accepted
	if accepted == nil {
		logger.Error("Failed to accept job")
		return
	}

	// Now that the job has been accepted, we can start it.
	a.jobRunner, err = JobRunner{
		Endpoint:           accepted.Endpoint,
		Agent:              a.Agent,
		AgentConfiguration: a.AgentConfiguration,
		Job:                accepted,
	}.Create()

	// Was there an error creating the job runner?
	if err != nil {
		logger.Error("Failed to initialize job: %s", err)
		return
	}

	// Start running the job
	if err = a.jobRunner.Run(); err != nil {
		logger.Error("Failed to run job: %s", err)
	}

	// No more job, no more runner.
	a.jobRunner = nil
}
Пример #19
0
// PostProcess sends the Artifact to Buildkite
func (p *PostProcessor) PostProcess(ui packer.Ui, artifact packer.Artifact) (packer.Artifact, bool, error) {

	buildkiteEnabled := os.Getenv("BUILDKITE")
	if buildkiteEnabled != "true" {
		ui.Message("This Packer build is not run within Buildkite. Skipping the BuildKite post-processor.")
		return artifact, true, nil
	}

	client := p.client.Create()

	metadata := make([]buildkite.MetaData, 0, 2)

	builderID := artifact.BuilderId()

	// packer's artifact model doesn't support AMIs very well since
	// both a region and an id need to be encoded into a single field.
	// We handle it as a special case so we can make more useful metadata
	// in the buildkite job.
	// N.B. this doesn't support AMIs being created across multiple regions.

	if builderID == "mitchellh.amazonebs" || builderID == "mitchellh.amazoninstance" || builderID == "mitchellh.amazonchroot" {
		packedID := artifact.Id()
		parts := strings.SplitN(packedID, ":", 2)

		idKeyName := "artifact_ami_id"
		regionKeyName := "artifact_ami_region"

		// override key names with an optional prefixed version
		if p.config.Prefix != "" {
			idKeyName = fmt.Sprintf("%s_id", p.config.Prefix)
			regionKeyName = fmt.Sprintf("%s_region", p.config.Prefix)
		}

		metadata = append(metadata, buildkite.MetaData{
			Key:   idKeyName,
			Value: parts[1],
		})
		metadata = append(metadata, buildkite.MetaData{
			Key:   regionKeyName,
			Value: parts[0],
		})
	} else {
		id := artifact.Id()

		idKeyName := "artifact_id"

		// override key name with an optional prefixed version
		if p.config.Prefix != "" {
			idKeyName = fmt.Sprintf("%s_id", p.config.Prefix)
		}

		if id != "" {
			metadata = append(metadata, buildkite.MetaData{
				Key:   idKeyName,
				Value: id,
			})
		}

		// TODO: Support uploading files as BuildKite artifacts
	}

	if len(metadata) > 0 {
		ui.Message("Setting metadata in BuildKite:")
		for _, item := range metadata {
			ui.Message(fmt.Sprintf("- %s: %s", item.Key, item.Value))
			err := retry.Do(func(s *retry.Stats) error {
				resp, err := client.MetaData.Set(p.jobID, &item)
				if resp != nil && (resp.StatusCode == 401 || resp.StatusCode == 404) {
					s.Break()
				}
				if err != nil {
					log.Println(err)
				}

				return err
			}, &retry.Config{Maximum: 10, Interval: 1 * time.Second})

			if err != nil {
				return nil, false, err
			}
		}
	}

	return artifact, true, nil
}