// Takes the options passed to the CLI, and creates an api.Agent record that // will be sent to the Buildkite Agent API for registration. func (r *AgentPool) CreateAgentTemplate() *api.Agent { agent := &api.Agent{ Name: r.Name, Priority: r.Priority, MetaData: r.MetaData, ScriptEvalEnabled: r.AgentConfiguration.CommandEval, Version: Version(), Build: BuildVersion(), PID: os.Getpid(), Arch: runtime.GOARCH, } // Attempt to add the EC2 meta-data if r.MetaDataEC2 { tags, err := EC2MetaData{}.Get() if err != nil { // Don't blow up if we can't find them, just show a nasty error. logger.Error(fmt.Sprintf("Failed to fetch EC2 meta-data: %s", err.Error())) } else { for tag, value := range tags { agent.MetaData = append(agent.MetaData, fmt.Sprintf("%s=%s", tag, value)) } } } // Attempt to add the EC2 tags if r.MetaDataEC2Tags { tags, err := EC2Tags{}.Get() if err != nil { // Don't blow up if we can't find them, just show a nasty error. logger.Error(fmt.Sprintf("Failed to find EC2 Tags: %s", err.Error())) } else { for tag, value := range tags { agent.MetaData = append(agent.MetaData, fmt.Sprintf("%s=%s", tag, value)) } } } var err error // Add the hostname agent.Hostname, err = os.Hostname() if err != nil { logger.Warn("Failed to find hostname: %s", err) } // Add the OS dump agent.OS, err = system.VersionDump() if err != nil { logger.Warn("Failed to find OS information: %s", err) } return agent }
// The actual log streamer worker func Worker(id int, ls *LogStreamer) { logger.Debug("[LogStreamer/Worker#%d] Worker is starting...", id) var chunk *LogStreamerChunk for { // Get the next chunk (pointer) from the queue. This will block // until something is returned. chunk = <-ls.queue // If the next chunk is nil, then there is no more work to do if chunk == nil { break } // Upload the chunk err := ls.Callback(chunk) if err != nil { atomic.AddInt32(&ls.ChunksFailedCount, 1) logger.Error("Giving up on uploading chunk %d, this will result in only a partial build log on Buildkite", chunk.Order) } // Signal to the chunkWaitGroup that this one is done ls.chunkWaitGroup.Done() } logger.Debug("[LogStreamer/Worker#%d] Worker has shutdown", id) }
// Takes the options passed to the CLI, and creates an api.Agent record that // will be sent to the Buildkite Agent API for registration. func (r *AgentPool) CreateAgentTemplate() *api.Agent { agent := &api.Agent{ Name: r.Name, Priority: r.Priority, MetaData: r.MetaData, ScriptEvalEnabled: r.AgentConfiguration.CommandEval, Version: Version(), PID: os.Getpid(), } // Attempt to add the EC2 tags if r.MetaDataEC2Tags { tags, err := EC2Tags{}.Get() if err != nil { // Don't blow up if we can't find them, just show a nasty error. logger.Error(fmt.Sprintf("Failed to find EC2 Tags: %s", err.Error())) } else { for tag, value := range tags { agent.MetaData = append(agent.MetaData, fmt.Sprintf("%s=%s", tag, value)) } } } // Add the hostname agent.Hostname, _ = os.Hostname() // Add the OS dump agent.OS, _ = OSDump() return agent }
func (r *JobRunner) Kill() error { if !r.cancelled { logger.Info("Canceling job %s", r.Job.ID) r.cancelled = true if r.process != nil { r.process.Kill() } else { logger.Error("No process to kill") } } return nil }
func (p *Process) signal(sig os.Signal) error { if p.command != nil && p.command.Process != nil { logger.Debug("[Process] Sending signal: %s to PID: %d", sig.String(), p.Pid) err := p.command.Process.Signal(syscall.SIGTERM) if err != nil { logger.Error("[Process] Failed to send signal: %s to PID: %d (%T: %v)", sig.String(), p.Pid, err, err) return err } } else { logger.Debug("[Process] No process to signal yet") } return nil }
// https://github.com/hnakamur/commango/blob/fe42b1cf82bf536ce7e24dceaef6656002e03743/os/executil/executil.go#L29 // TODO: Can this be better? func getExitStatus(waitResult error) string { exitStatus := -1 if waitResult != nil { if err, ok := waitResult.(*exec.ExitError); ok { if s, ok := err.Sys().(syscall.WaitStatus); ok { exitStatus = s.ExitStatus() } else { logger.Error("[Process] Unimplemented for system where exec.ExitError.Sys() is not syscall.WaitStatus.") } } } else { exitStatus = 0 } return fmt.Sprintf("%d", exitStatus) }
// Starts the agent worker func (a *AgentWorker) Start() error { // Mark the agent as running a.running = true // Create the intervals we'll be using pingInterval := time.Second * time.Duration(a.Agent.PingInterval) heartbeatInterval := time.Second * time.Duration(a.Agent.HearbeatInterval) // Setup and start the heartbeater go func() { // Keep the heartbeat running as long as the agent is for a.running { err := a.Heartbeat() if err != nil { logger.Error("Failed to heartbeat %s. Will try again in %s", err, heartbeatInterval) } time.Sleep(heartbeatInterval) } }() // Create the ticker and stop channels a.ticker = time.NewTicker(pingInterval) a.stop = make(chan struct{}) // Continue this loop until the the ticker is stopped, and we received // a message on the stop channel. for { a.Ping() select { case <-a.ticker.C: continue case <-a.stop: a.ticker.Stop() return nil } } // Mark the agent as not running anymore a.running = false return nil }
// Performs a ping, which returns what action the agent should take next. func (a *AgentWorker) Ping() { // Update the proc title a.UpdateProcTitle("pinging") ping, _, err := a.APIClient.Pings.Get() if err != nil { // If a ping fails, we don't really care, because it'll // ping again after the interval. logger.Warn("Failed to ping: %s", err) return } // Should we switch endpoints? if ping.Endpoint != "" && ping.Endpoint != a.Agent.Endpoint { // Before switching to the new one, do a ping test to make sure it's // valid. If it is, switch and carry on, otherwise ignore the switch // for now. newAPIClient := APIClient{Endpoint: ping.Endpoint, Token: a.Agent.AccessToken}.Create() newPing, _, err := newAPIClient.Pings.Get() if err != nil { logger.Warn("Failed to ping the new endpoint %s - ignoring switch for now (%s)", ping.Endpoint, err) } else { // Replace the APIClient and process the new ping a.APIClient = newAPIClient a.Agent.Endpoint = ping.Endpoint ping = newPing } } // Is there a message that should be shown in the logs? if ping.Message != "" { logger.Info(ping.Message) } // Should the agent disconnect? if ping.Action == "disconnect" { a.Stop(false) return } // If we don't have a job, there's nothing to do! if ping.Job == nil { // Update the proc title a.UpdateProcTitle("idle") return } // Update the proc title a.UpdateProcTitle(fmt.Sprintf("job %s", strings.Split(ping.Job.ID, "-")[0])) logger.Info("Assigned job %s. Accepting...", ping.Job.ID) // Accept the job. We'll retry on connection related issues, but if // Buildkite returns a 422 or 500 for example, we'll just bail out, // re-ping, and try the whole process again. var accepted *api.Job retry.Do(func(s *retry.Stats) error { accepted, _, err = a.APIClient.Jobs.Accept(ping.Job) if err != nil { if api.IsRetryableError(err) { logger.Warn("%s (%s)", err, s) } else { logger.Warn("Buildkite rejected the call to accept the job (%s)", err) s.Break() } } return err }, &retry.Config{Maximum: 30, Interval: 1 * time.Second}) // If `accepted` is nil, then the job was never accepted if accepted == nil { logger.Error("Failed to accept job") return } // Now that the job has been accepted, we can start it. a.jobRunner, err = JobRunner{ Endpoint: accepted.Endpoint, Agent: a.Agent, AgentConfiguration: a.AgentConfiguration, Job: accepted, }.Create() // Was there an error creating the job runner? if err != nil { logger.Error("Failed to initialize job: %s", err) return } // Start running the job if err = a.jobRunner.Run(); err != nil { logger.Error("Failed to run job: %s", err) } // No more job, no more runner. a.jobRunner = nil }
func (p *Process) Start() error { c, err := shell.CommandFromString(p.Script) if err != nil { return err } p.command = exec.Command(c.Command, c.Args...) // Copy the current processes ENV and merge in the new ones. We do this // so the sub process gets PATH and stuff. We merge our path in over // the top of the current one so the ENV from Buildkite and the agent // take precedence over the agent currentEnv := os.Environ() p.command.Env = append(currentEnv, p.Env...) var waitGroup sync.WaitGroup lineReaderPipe, lineWriterPipe := io.Pipe() multiWriter := io.MultiWriter(&p.buffer, lineWriterPipe) logger.Info("Starting to run: %s", c.String()) // Toggle between running in a pty if p.PTY { pty, err := StartPTY(p.command) if err != nil { p.ExitStatus = "1" return err } p.Pid = p.command.Process.Pid p.setRunning(true) waitGroup.Add(1) go func() { logger.Debug("[Process] Starting to copy PTY to the buffer") // Copy the pty to our buffer. This will block until it // EOF's or something breaks. _, err = io.Copy(multiWriter, pty) if e, ok := err.(*os.PathError); ok && e.Err == syscall.EIO { // We can safely ignore this error, because // it's just the PTY telling us that it closed // successfully. See: // https://github.com/buildkite/agent/pull/34#issuecomment-46080419 err = nil } if err != nil { logger.Error("[Process] PTY output copy failed with error: %T: %v", err, err) } else { logger.Debug("[Process] PTY has finished being copied to the buffer") } waitGroup.Done() }() } else { p.command.Stdout = multiWriter p.command.Stderr = multiWriter p.command.Stdin = nil err := p.command.Start() if err != nil { p.ExitStatus = "1" return err } p.Pid = p.command.Process.Pid p.setRunning(true) } logger.Info("[Process] Process is running with PID: %d", p.Pid) // Add the line callback routine to the waitGroup waitGroup.Add(1) go func() { logger.Debug("[LineScanner] Starting to read lines") reader := bufio.NewReader(lineReaderPipe) var appending []byte for { line, isPrefix, err := reader.ReadLine() if err != nil { if err == io.EOF { logger.Debug("[LineScanner] Encountered EOF") break } logger.Error("[LineScanner] Failed to read: (%T: %v)", err, err) } // If isPrefix is true, that means we've got a really // long line incoming, and we'll keep appending to it // until isPrefix is false (which means the long line // has ended. if isPrefix && appending == nil { logger.Debug("[LineScanner] Line is too long to read, going to buffer it until it finishes") appending = line continue } // Should we be appending? if appending != nil { appending = append(appending, line...) // No more isPrefix! Line is finished! if !isPrefix { logger.Debug("[LineScanner] Finished buffering long line") line = appending // Reset appending back to nil appending = nil } else { continue } } go p.LineCallback(string(line)) } logger.Debug("[LineScanner] Finished") waitGroup.Done() }() // Call the StartCallback go p.StartCallback() // Wait until the process has finished. The returned error is nil if the command runs, // has no problems copying stdin, stdout, and stderr, and exits with a zero exit status. waitResult := p.command.Wait() // Close the line writer pipe lineWriterPipe.Close() // The process is no longer running at this point p.setRunning(false) // Find the exit status of the script p.ExitStatus = getExitStatus(waitResult) logger.Info("Process with PID: %d finished with Exit Status: %s", p.Pid, p.ExitStatus) // Sometimes (in docker containers) io.Copy never seems to finish. This is a mega // hack around it. If it doesn't finish after 1 second, just continue. logger.Debug("[Process] Waiting for routines to finish") err = timeoutWait(&waitGroup) if err != nil { logger.Debug("[Process] Timed out waiting for wait group: (%T: %v)", err, err) } // No error occurred so we can return nil return nil }
func (a *ArtifactUploader) upload(artifacts []*api.Artifact) error { var uploader Uploader // Determine what uploader to use if a.Destination != "" { if strings.HasPrefix(a.Destination, "s3://") { uploader = new(S3Uploader) } else { return errors.New("Unknown upload destination: " + a.Destination) } } else { uploader = new(FormUploader) } // Setup the uploader err := uploader.Setup(a.Destination, a.APIClient.DebugHTTP) if err != nil { return err } // Set the URL's of the artifacts based on the uploader for _, artifact := range artifacts { artifact.URL = uploader.URL(artifact) } // Create the artifacts on Buildkite batchCreator := ArtifactBatchCreator{ APIClient: a.APIClient, JobID: a.JobID, Artifacts: artifacts, UploadDestination: a.Destination, } artifacts, err = batchCreator.Create() if err != nil { return err } // Prepare a concurrency pool to upload the artifacts p := pool.New(pool.MaxConcurrencyLimit) errors := []error{} // Create a wait group so we can make sure the uploader waits for all // the artifact states to upload before finishing var stateUploaderWaitGroup sync.WaitGroup stateUploaderWaitGroup.Add(1) // A map to keep track of artifact states and how many we've uploaded artifactsStates := make(map[string]string) artifactStatesUploaded := 0 // Spin up a gourtine that'll uploading artifact statuses every few // seconds in batches go func() { for artifactStatesUploaded < len(artifacts) { statesToUpload := make(map[string]string) // Grab all the states we need to upload, and remove // them from the tracking map for id, state := range artifactsStates { statesToUpload[id] = state delete(artifactsStates, id) } if len(statesToUpload) > 0 { artifactStatesUploaded += len(statesToUpload) for id, state := range statesToUpload { logger.Debug("Artifact `%s` has state `%s`", id, state) } // Update the states of the artifacts in bulk. err = retry.Do(func(s *retry.Stats) error { _, err = a.APIClient.Artifacts.Update(a.JobID, statesToUpload) if err != nil { logger.Warn("%s (%s)", err, s) } return err }, &retry.Config{Maximum: 10, Interval: 1 * time.Second}) if err != nil { logger.Error("Error uploading artifact states: %s", err) // Track the error that was raised p.Lock() errors = append(errors, err) p.Unlock() } logger.Debug("Uploaded %d artfact states (%d/%d)", len(statesToUpload), artifactStatesUploaded, len(artifacts)) } // Check again for states to upload in a few seconds time.Sleep(1 * time.Second) } stateUploaderWaitGroup.Done() }() for _, artifact := range artifacts { // Create new instance of the artifact for the goroutine // See: http://golang.org/doc/effective_go.html#channels artifact := artifact p.Spawn(func() { // Show a nice message that we're starting to upload the file logger.Info("Uploading \"%s\" %d bytes", artifact.Path, artifact.FileSize) // Upload the artifact and then set the state depending // on whether or not it passed. We'll retry the upload // a couple of times before giving up. err = retry.Do(func(s *retry.Stats) error { err := uploader.Upload(artifact) if err != nil { logger.Warn("%s (%s)", err, s) } return err }, &retry.Config{Maximum: 10, Interval: 1 * time.Second}) var state string // Did the upload eventually fail? if err != nil { logger.Error("Error uploading artifact \"%s\": %s", artifact.Path, err) // Track the error that was raised p.Lock() errors = append(errors, err) p.Unlock() state = "error" } else { state = "finished" } artifactsStates[artifact.ID] = state }) } // Wait for the pool to finish p.Wait() // Wait for the statuses to finish uploading stateUploaderWaitGroup.Wait() if len(errors) > 0 { logger.Fatal("There were errors with uploading some of the artifacts") } return nil }
func (a *ArtifactDownloader) Download() error { // Turn the download destination into an absolute path and confirm it exists downloadDestination, _ := filepath.Abs(a.Destination) fileInfo, err := os.Stat(downloadDestination) if err != nil { logger.Fatal("Could not find information about destination: %s", downloadDestination) } if !fileInfo.IsDir() { logger.Fatal("%s is not a directory", downloadDestination) } // Find the artifacts that we want to download searcher := ArtifactSearcher{BuildID: a.BuildID, APIClient: a.APIClient} artifacts, err := searcher.Search(a.Query, a.Step) if err != nil { return err } artifactCount := len(artifacts) if artifactCount == 0 { logger.Info("No artifacts found for downloading") } else { logger.Info("Found %d artifacts. Starting to download to: %s", artifactCount, downloadDestination) p := pool.New(pool.MaxConcurrencyLimit) errors := []error{} for _, artifact := range artifacts { // Create new instance of the artifact for the goroutine // See: http://golang.org/doc/effective_go.html#channels artifact := artifact p.Spawn(func() { var err error // Handle downloading from S3 and GS if strings.HasPrefix(artifact.UploadDestination, "s3://") { err = S3Downloader{ Path: artifact.Path, Bucket: artifact.UploadDestination, Destination: downloadDestination, Retries: 5, DebugHTTP: a.APIClient.DebugHTTP, }.Start() } else if strings.HasPrefix(artifact.UploadDestination, "gs://") { err = GSDownloader{ Path: artifact.Path, Bucket: artifact.UploadDestination, Destination: downloadDestination, Retries: 5, DebugHTTP: a.APIClient.DebugHTTP, }.Start() } else { err = Download{ URL: artifact.URL, Path: artifact.Path, Destination: downloadDestination, Retries: 5, DebugHTTP: a.APIClient.DebugHTTP, }.Start() } // If the downloaded encountered an error, lock // the pool, collect it, then unlock the pool // again. if err != nil { logger.Error("Failed to download artifact: %s", err) p.Lock() errors = append(errors, err) p.Unlock() } }) } p.Wait() if len(errors) > 0 { logger.Fatal("There were errors with downloading some of the artifacts") } } return nil }
func (a *ArtifactUploader) upload(artifacts []*api.Artifact) error { var uploader Uploader // Determine what uploader to use if a.Destination != "" { if strings.HasPrefix(a.Destination, "s3://") { uploader = new(S3Uploader) } else { return errors.New("Unknown upload destination: " + a.Destination) } } else { uploader = new(FormUploader) } // Setup the uploader err := uploader.Setup(a.Destination) if err != nil { return err } // Set the URL's of the artifacts based on the uploader for _, artifact := range artifacts { artifact.URL = uploader.URL(artifact) } // Create the artifacts on Buildkite batchCreator := ArtifactBatchCreator{ APIClient: a.APIClient, JobID: a.JobID, Artifacts: artifacts, } artifacts, err = batchCreator.Create() if err != nil { return err } p := pool.New(pool.MaxConcurrencyLimit) errors := []error{} for _, artifact := range artifacts { // Create new instance of the artifact for the goroutine // See: http://golang.org/doc/effective_go.html#channels artifact := artifact p.Spawn(func() { // Show a nice message that we're starting to upload the file logger.Info("Uploading \"%s\" %d bytes", artifact.Path, artifact.FileSize) // Upload the artifact and then set the state depending // on whether or not it passed. We'll retry the upload // a couple of times before giving up. err = retry.Do(func(s *retry.Stats) error { err := uploader.Upload(artifact) if err != nil { logger.Warn("%s (%s)", err, s) } return err }, &retry.Config{Maximum: 10, Interval: 1 * time.Second}) if err != nil { artifact.State = "error" logger.Error("Error uploading artifact \"%s\": %s", artifact.Path, err) // Track the error that was raised p.Lock() errors = append(errors, err) p.Unlock() } else { artifact.State = "finished" } // Update the state of the artifact on Buildkite, we // retry this as well. err = retry.Do(func(s *retry.Stats) error { _, _, err = a.APIClient.Artifacts.Update(a.JobID, artifact) if err != nil { logger.Warn("%s (%s)", err, s) } return err }, &retry.Config{Maximum: 10, Interval: 1 * time.Second}) if err != nil { logger.Error("Error marking artifact %s as uploaded: %s", artifact.Path, err) // Track the error that was raised p.Lock() errors = append(errors, err) p.Unlock() } }) } p.Wait() if len(errors) > 0 { logger.Fatal("There were errors with uploading some of the artifacts") } return nil }