示例#1
0
// Takes the options passed to the CLI, and creates an api.Agent record that
// will be sent to the Buildkite Agent API for registration.
func (r *AgentPool) CreateAgentTemplate() *api.Agent {
	agent := &api.Agent{
		Name:              r.Name,
		Priority:          r.Priority,
		MetaData:          r.MetaData,
		ScriptEvalEnabled: r.AgentConfiguration.CommandEval,
		Version:           Version(),
		Build:             BuildVersion(),
		PID:               os.Getpid(),
		Arch:              runtime.GOARCH,
	}

	// Attempt to add the EC2 meta-data
	if r.MetaDataEC2 {
		tags, err := EC2MetaData{}.Get()
		if err != nil {
			// Don't blow up if we can't find them, just show a nasty error.
			logger.Error(fmt.Sprintf("Failed to fetch EC2 meta-data: %s", err.Error()))
		} else {
			for tag, value := range tags {
				agent.MetaData = append(agent.MetaData, fmt.Sprintf("%s=%s", tag, value))
			}
		}
	}

	// Attempt to add the EC2 tags
	if r.MetaDataEC2Tags {
		tags, err := EC2Tags{}.Get()
		if err != nil {
			// Don't blow up if we can't find them, just show a nasty error.
			logger.Error(fmt.Sprintf("Failed to find EC2 Tags: %s", err.Error()))
		} else {
			for tag, value := range tags {
				agent.MetaData = append(agent.MetaData, fmt.Sprintf("%s=%s", tag, value))
			}
		}
	}

	var err error

	// Add the hostname
	agent.Hostname, err = os.Hostname()
	if err != nil {
		logger.Warn("Failed to find hostname: %s", err)
	}

	// Add the OS dump
	agent.OS, err = system.VersionDump()
	if err != nil {
		logger.Warn("Failed to find OS information: %s", err)
	}

	return agent
}
示例#2
0
// The actual log streamer worker
func Worker(id int, ls *LogStreamer) {
	logger.Debug("[LogStreamer/Worker#%d] Worker is starting...", id)

	var chunk *LogStreamerChunk
	for {
		// Get the next chunk (pointer) from the queue. This will block
		// until something is returned.
		chunk = <-ls.queue

		// If the next chunk is nil, then there is no more work to do
		if chunk == nil {
			break
		}

		// Upload the chunk
		err := ls.Callback(chunk)
		if err != nil {
			atomic.AddInt32(&ls.ChunksFailedCount, 1)

			logger.Error("Giving up on uploading chunk %d, this will result in only a partial build log on Buildkite", chunk.Order)
		}

		// Signal to the chunkWaitGroup that this one is done
		ls.chunkWaitGroup.Done()
	}

	logger.Debug("[LogStreamer/Worker#%d] Worker has shutdown", id)
}
示例#3
0
// Takes the options passed to the CLI, and creates an api.Agent record that
// will be sent to the Buildkite Agent API for registration.
func (r *AgentPool) CreateAgentTemplate() *api.Agent {
	agent := &api.Agent{
		Name:              r.Name,
		Priority:          r.Priority,
		MetaData:          r.MetaData,
		ScriptEvalEnabled: r.AgentConfiguration.CommandEval,
		Version:           Version(),
		PID:               os.Getpid(),
	}

	// Attempt to add the EC2 tags
	if r.MetaDataEC2Tags {
		tags, err := EC2Tags{}.Get()
		if err != nil {
			// Don't blow up if we can't find them, just show a nasty error.
			logger.Error(fmt.Sprintf("Failed to find EC2 Tags: %s", err.Error()))
		} else {
			for tag, value := range tags {
				agent.MetaData = append(agent.MetaData, fmt.Sprintf("%s=%s", tag, value))
			}
		}
	}

	// Add the hostname
	agent.Hostname, _ = os.Hostname()

	// Add the OS dump
	agent.OS, _ = OSDump()

	return agent
}
示例#4
0
func (r *JobRunner) Kill() error {
	if !r.cancelled {
		logger.Info("Canceling job %s", r.Job.ID)
		r.cancelled = true

		if r.process != nil {
			r.process.Kill()
		} else {
			logger.Error("No process to kill")
		}
	}

	return nil
}
示例#5
0
func (p *Process) signal(sig os.Signal) error {
	if p.command != nil && p.command.Process != nil {
		logger.Debug("[Process] Sending signal: %s to PID: %d", sig.String(), p.Pid)

		err := p.command.Process.Signal(syscall.SIGTERM)
		if err != nil {
			logger.Error("[Process] Failed to send signal: %s to PID: %d (%T: %v)", sig.String(), p.Pid, err, err)
			return err
		}
	} else {
		logger.Debug("[Process] No process to signal yet")
	}

	return nil
}
示例#6
0
// https://github.com/hnakamur/commango/blob/fe42b1cf82bf536ce7e24dceaef6656002e03743/os/executil/executil.go#L29
// TODO: Can this be better?
func getExitStatus(waitResult error) string {
	exitStatus := -1

	if waitResult != nil {
		if err, ok := waitResult.(*exec.ExitError); ok {
			if s, ok := err.Sys().(syscall.WaitStatus); ok {
				exitStatus = s.ExitStatus()
			} else {
				logger.Error("[Process] Unimplemented for system where exec.ExitError.Sys() is not syscall.WaitStatus.")
			}
		}
	} else {
		exitStatus = 0
	}

	return fmt.Sprintf("%d", exitStatus)
}
示例#7
0
// Starts the agent worker
func (a *AgentWorker) Start() error {
	// Mark the agent as running
	a.running = true

	// Create the intervals we'll be using
	pingInterval := time.Second * time.Duration(a.Agent.PingInterval)
	heartbeatInterval := time.Second * time.Duration(a.Agent.HearbeatInterval)

	// Setup and start the heartbeater
	go func() {
		// Keep the heartbeat running as long as the agent is
		for a.running {
			err := a.Heartbeat()
			if err != nil {
				logger.Error("Failed to heartbeat %s. Will try again in %s", err, heartbeatInterval)
			}

			time.Sleep(heartbeatInterval)
		}
	}()

	// Create the ticker and stop channels
	a.ticker = time.NewTicker(pingInterval)
	a.stop = make(chan struct{})

	// Continue this loop until the the ticker is stopped, and we received
	// a message on the stop channel.
	for {
		a.Ping()

		select {
		case <-a.ticker.C:
			continue
		case <-a.stop:
			a.ticker.Stop()
			return nil
		}
	}

	// Mark the agent as not running anymore
	a.running = false

	return nil
}
示例#8
0
// Performs a ping, which returns what action the agent should take next.
func (a *AgentWorker) Ping() {
	// Update the proc title
	a.UpdateProcTitle("pinging")

	ping, _, err := a.APIClient.Pings.Get()
	if err != nil {
		// If a ping fails, we don't really care, because it'll
		// ping again after the interval.
		logger.Warn("Failed to ping: %s", err)
		return
	}

	// Should we switch endpoints?
	if ping.Endpoint != "" && ping.Endpoint != a.Agent.Endpoint {
		// Before switching to the new one, do a ping test to make sure it's
		// valid. If it is, switch and carry on, otherwise ignore the switch
		// for now.
		newAPIClient := APIClient{Endpoint: ping.Endpoint, Token: a.Agent.AccessToken}.Create()
		newPing, _, err := newAPIClient.Pings.Get()
		if err != nil {
			logger.Warn("Failed to ping the new endpoint %s - ignoring switch for now (%s)", ping.Endpoint, err)
		} else {
			// Replace the APIClient and process the new ping
			a.APIClient = newAPIClient
			a.Agent.Endpoint = ping.Endpoint
			ping = newPing
		}
	}

	// Is there a message that should be shown in the logs?
	if ping.Message != "" {
		logger.Info(ping.Message)
	}

	// Should the agent disconnect?
	if ping.Action == "disconnect" {
		a.Stop(false)
		return
	}

	// If we don't have a job, there's nothing to do!
	if ping.Job == nil {
		// Update the proc title
		a.UpdateProcTitle("idle")

		return
	}

	// Update the proc title
	a.UpdateProcTitle(fmt.Sprintf("job %s", strings.Split(ping.Job.ID, "-")[0]))

	logger.Info("Assigned job %s. Accepting...", ping.Job.ID)

	// Accept the job. We'll retry on connection related issues, but if
	// Buildkite returns a 422 or 500 for example, we'll just bail out,
	// re-ping, and try the whole process again.
	var accepted *api.Job
	retry.Do(func(s *retry.Stats) error {
		accepted, _, err = a.APIClient.Jobs.Accept(ping.Job)

		if err != nil {
			if api.IsRetryableError(err) {
				logger.Warn("%s (%s)", err, s)
			} else {
				logger.Warn("Buildkite rejected the call to accept the job (%s)", err)
				s.Break()
			}
		}

		return err
	}, &retry.Config{Maximum: 30, Interval: 1 * time.Second})

	// If `accepted` is nil, then the job was never accepted
	if accepted == nil {
		logger.Error("Failed to accept job")
		return
	}

	// Now that the job has been accepted, we can start it.
	a.jobRunner, err = JobRunner{
		Endpoint:           accepted.Endpoint,
		Agent:              a.Agent,
		AgentConfiguration: a.AgentConfiguration,
		Job:                accepted,
	}.Create()

	// Was there an error creating the job runner?
	if err != nil {
		logger.Error("Failed to initialize job: %s", err)
		return
	}

	// Start running the job
	if err = a.jobRunner.Run(); err != nil {
		logger.Error("Failed to run job: %s", err)
	}

	// No more job, no more runner.
	a.jobRunner = nil
}
示例#9
0
func (p *Process) Start() error {
	c, err := shell.CommandFromString(p.Script)
	if err != nil {
		return err
	}

	p.command = exec.Command(c.Command, c.Args...)

	// Copy the current processes ENV and merge in the new ones. We do this
	// so the sub process gets PATH and stuff. We merge our path in over
	// the top of the current one so the ENV from Buildkite and the agent
	// take precedence over the agent
	currentEnv := os.Environ()
	p.command.Env = append(currentEnv, p.Env...)

	var waitGroup sync.WaitGroup

	lineReaderPipe, lineWriterPipe := io.Pipe()

	multiWriter := io.MultiWriter(&p.buffer, lineWriterPipe)

	logger.Info("Starting to run: %s", c.String())

	// Toggle between running in a pty
	if p.PTY {
		pty, err := StartPTY(p.command)
		if err != nil {
			p.ExitStatus = "1"
			return err
		}

		p.Pid = p.command.Process.Pid
		p.setRunning(true)

		waitGroup.Add(1)

		go func() {
			logger.Debug("[Process] Starting to copy PTY to the buffer")

			// Copy the pty to our buffer. This will block until it
			// EOF's or something breaks.
			_, err = io.Copy(multiWriter, pty)
			if e, ok := err.(*os.PathError); ok && e.Err == syscall.EIO {
				// We can safely ignore this error, because
				// it's just the PTY telling us that it closed
				// successfully.  See:
				// https://github.com/buildkite/agent/pull/34#issuecomment-46080419
				err = nil
			}

			if err != nil {
				logger.Error("[Process] PTY output copy failed with error: %T: %v", err, err)
			} else {
				logger.Debug("[Process] PTY has finished being copied to the buffer")
			}

			waitGroup.Done()
		}()
	} else {
		p.command.Stdout = multiWriter
		p.command.Stderr = multiWriter
		p.command.Stdin = nil

		err := p.command.Start()
		if err != nil {
			p.ExitStatus = "1"
			return err
		}

		p.Pid = p.command.Process.Pid
		p.setRunning(true)
	}

	logger.Info("[Process] Process is running with PID: %d", p.Pid)

	// Add the line callback routine to the waitGroup
	waitGroup.Add(1)

	go func() {
		logger.Debug("[LineScanner] Starting to read lines")

		reader := bufio.NewReader(lineReaderPipe)

		var appending []byte

		for {
			line, isPrefix, err := reader.ReadLine()
			if err != nil {
				if err == io.EOF {
					logger.Debug("[LineScanner] Encountered EOF")
					break
				}

				logger.Error("[LineScanner] Failed to read: (%T: %v)", err, err)
			}

			// If isPrefix is true, that means we've got a really
			// long line incoming, and we'll keep appending to it
			// until isPrefix is false (which means the long line
			// has ended.
			if isPrefix && appending == nil {
				logger.Debug("[LineScanner] Line is too long to read, going to buffer it until it finishes")
				appending = line

				continue
			}

			// Should we be appending?
			if appending != nil {
				appending = append(appending, line...)

				// No more isPrefix! Line is finished!
				if !isPrefix {
					logger.Debug("[LineScanner] Finished buffering long line")
					line = appending

					// Reset appending back to nil
					appending = nil
				} else {
					continue
				}
			}

			go p.LineCallback(string(line))
		}

		logger.Debug("[LineScanner] Finished")

		waitGroup.Done()
	}()

	// Call the StartCallback
	go p.StartCallback()

	// Wait until the process has finished. The returned error is nil if the command runs,
	// has no problems copying stdin, stdout, and stderr, and exits with a zero exit status.
	waitResult := p.command.Wait()

	// Close the line writer pipe
	lineWriterPipe.Close()

	// The process is no longer running at this point
	p.setRunning(false)

	// Find the exit status of the script
	p.ExitStatus = getExitStatus(waitResult)

	logger.Info("Process with PID: %d finished with Exit Status: %s", p.Pid, p.ExitStatus)

	// Sometimes (in docker containers) io.Copy never seems to finish. This is a mega
	// hack around it. If it doesn't finish after 1 second, just continue.
	logger.Debug("[Process] Waiting for routines to finish")
	err = timeoutWait(&waitGroup)
	if err != nil {
		logger.Debug("[Process] Timed out waiting for wait group: (%T: %v)", err, err)
	}

	// No error occurred so we can return nil
	return nil
}
示例#10
0
func (a *ArtifactUploader) upload(artifacts []*api.Artifact) error {
	var uploader Uploader

	// Determine what uploader to use
	if a.Destination != "" {
		if strings.HasPrefix(a.Destination, "s3://") {
			uploader = new(S3Uploader)
		} else {
			return errors.New("Unknown upload destination: " + a.Destination)
		}
	} else {
		uploader = new(FormUploader)
	}

	// Setup the uploader
	err := uploader.Setup(a.Destination, a.APIClient.DebugHTTP)
	if err != nil {
		return err
	}

	// Set the URL's of the artifacts based on the uploader
	for _, artifact := range artifacts {
		artifact.URL = uploader.URL(artifact)
	}

	// Create the artifacts on Buildkite
	batchCreator := ArtifactBatchCreator{
		APIClient:         a.APIClient,
		JobID:             a.JobID,
		Artifacts:         artifacts,
		UploadDestination: a.Destination,
	}
	artifacts, err = batchCreator.Create()
	if err != nil {
		return err
	}

	// Prepare a concurrency pool to upload the artifacts
	p := pool.New(pool.MaxConcurrencyLimit)
	errors := []error{}

	// Create a wait group so we can make sure the uploader waits for all
	// the artifact states to upload before finishing
	var stateUploaderWaitGroup sync.WaitGroup
	stateUploaderWaitGroup.Add(1)

	// A map to keep track of artifact states and how many we've uploaded
	artifactsStates := make(map[string]string)
	artifactStatesUploaded := 0

	// Spin up a gourtine that'll uploading artifact statuses every few
	// seconds in batches
	go func() {
		for artifactStatesUploaded < len(artifacts) {
			statesToUpload := make(map[string]string)

			// Grab all the states we need to upload, and remove
			// them from the tracking map
			for id, state := range artifactsStates {
				statesToUpload[id] = state
				delete(artifactsStates, id)
			}

			if len(statesToUpload) > 0 {
				artifactStatesUploaded += len(statesToUpload)
				for id, state := range statesToUpload {
					logger.Debug("Artifact `%s` has state `%s`", id, state)
				}

				// Update the states of the artifacts in bulk.
				err = retry.Do(func(s *retry.Stats) error {
					_, err = a.APIClient.Artifacts.Update(a.JobID, statesToUpload)
					if err != nil {
						logger.Warn("%s (%s)", err, s)
					}

					return err
				}, &retry.Config{Maximum: 10, Interval: 1 * time.Second})

				if err != nil {
					logger.Error("Error uploading artifact states: %s", err)

					// Track the error that was raised
					p.Lock()
					errors = append(errors, err)
					p.Unlock()
				}

				logger.Debug("Uploaded %d artfact states (%d/%d)", len(statesToUpload), artifactStatesUploaded, len(artifacts))
			}

			// Check again for states to upload in a few seconds
			time.Sleep(1 * time.Second)
		}

		stateUploaderWaitGroup.Done()
	}()

	for _, artifact := range artifacts {
		// Create new instance of the artifact for the goroutine
		// See: http://golang.org/doc/effective_go.html#channels
		artifact := artifact

		p.Spawn(func() {
			// Show a nice message that we're starting to upload the file
			logger.Info("Uploading \"%s\" %d bytes", artifact.Path, artifact.FileSize)

			// Upload the artifact and then set the state depending
			// on whether or not it passed. We'll retry the upload
			// a couple of times before giving up.
			err = retry.Do(func(s *retry.Stats) error {
				err := uploader.Upload(artifact)
				if err != nil {
					logger.Warn("%s (%s)", err, s)
				}

				return err
			}, &retry.Config{Maximum: 10, Interval: 1 * time.Second})

			var state string

			// Did the upload eventually fail?
			if err != nil {
				logger.Error("Error uploading artifact \"%s\": %s", artifact.Path, err)

				// Track the error that was raised
				p.Lock()
				errors = append(errors, err)
				p.Unlock()

				state = "error"
			} else {
				state = "finished"
			}

			artifactsStates[artifact.ID] = state
		})
	}

	// Wait for the pool to finish
	p.Wait()

	// Wait for the statuses to finish uploading
	stateUploaderWaitGroup.Wait()

	if len(errors) > 0 {
		logger.Fatal("There were errors with uploading some of the artifacts")
	}

	return nil
}
示例#11
0
func (a *ArtifactDownloader) Download() error {
	// Turn the download destination into an absolute path and confirm it exists
	downloadDestination, _ := filepath.Abs(a.Destination)
	fileInfo, err := os.Stat(downloadDestination)
	if err != nil {
		logger.Fatal("Could not find information about destination: %s", downloadDestination)
	}
	if !fileInfo.IsDir() {
		logger.Fatal("%s is not a directory", downloadDestination)
	}

	// Find the artifacts that we want to download
	searcher := ArtifactSearcher{BuildID: a.BuildID, APIClient: a.APIClient}
	artifacts, err := searcher.Search(a.Query, a.Step)
	if err != nil {
		return err
	}

	artifactCount := len(artifacts)

	if artifactCount == 0 {
		logger.Info("No artifacts found for downloading")
	} else {
		logger.Info("Found %d artifacts. Starting to download to: %s", artifactCount, downloadDestination)

		p := pool.New(pool.MaxConcurrencyLimit)
		errors := []error{}

		for _, artifact := range artifacts {
			// Create new instance of the artifact for the goroutine
			// See: http://golang.org/doc/effective_go.html#channels
			artifact := artifact

			p.Spawn(func() {
				var err error

				// Handle downloading from S3 and GS
				if strings.HasPrefix(artifact.UploadDestination, "s3://") {
					err = S3Downloader{
						Path:        artifact.Path,
						Bucket:      artifact.UploadDestination,
						Destination: downloadDestination,
						Retries:     5,
						DebugHTTP:   a.APIClient.DebugHTTP,
					}.Start()
				} else if strings.HasPrefix(artifact.UploadDestination, "gs://") {
					err = GSDownloader{
						Path:        artifact.Path,
						Bucket:      artifact.UploadDestination,
						Destination: downloadDestination,
						Retries:     5,
						DebugHTTP:   a.APIClient.DebugHTTP,
					}.Start()
				} else {
					err = Download{
						URL:         artifact.URL,
						Path:        artifact.Path,
						Destination: downloadDestination,
						Retries:     5,
						DebugHTTP:   a.APIClient.DebugHTTP,
					}.Start()
				}

				// If the downloaded encountered an error, lock
				// the pool, collect it, then unlock the pool
				// again.
				if err != nil {
					logger.Error("Failed to download artifact: %s", err)

					p.Lock()
					errors = append(errors, err)
					p.Unlock()
				}
			})
		}

		p.Wait()

		if len(errors) > 0 {
			logger.Fatal("There were errors with downloading some of the artifacts")
		}
	}

	return nil
}
示例#12
0
func (a *ArtifactUploader) upload(artifacts []*api.Artifact) error {
	var uploader Uploader

	// Determine what uploader to use
	if a.Destination != "" {
		if strings.HasPrefix(a.Destination, "s3://") {
			uploader = new(S3Uploader)
		} else {
			return errors.New("Unknown upload destination: " + a.Destination)
		}
	} else {
		uploader = new(FormUploader)
	}

	// Setup the uploader
	err := uploader.Setup(a.Destination)
	if err != nil {
		return err
	}

	// Set the URL's of the artifacts based on the uploader
	for _, artifact := range artifacts {
		artifact.URL = uploader.URL(artifact)
	}

	// Create the artifacts on Buildkite
	batchCreator := ArtifactBatchCreator{
		APIClient: a.APIClient,
		JobID:     a.JobID,
		Artifacts: artifacts,
	}
	artifacts, err = batchCreator.Create()
	if err != nil {
		return err
	}

	p := pool.New(pool.MaxConcurrencyLimit)
	errors := []error{}

	for _, artifact := range artifacts {
		// Create new instance of the artifact for the goroutine
		// See: http://golang.org/doc/effective_go.html#channels
		artifact := artifact

		p.Spawn(func() {
			// Show a nice message that we're starting to upload the file
			logger.Info("Uploading \"%s\" %d bytes", artifact.Path, artifact.FileSize)

			// Upload the artifact and then set the state depending
			// on whether or not it passed. We'll retry the upload
			// a couple of times before giving up.
			err = retry.Do(func(s *retry.Stats) error {
				err := uploader.Upload(artifact)
				if err != nil {
					logger.Warn("%s (%s)", err, s)
				}

				return err
			}, &retry.Config{Maximum: 10, Interval: 1 * time.Second})
			if err != nil {
				artifact.State = "error"
				logger.Error("Error uploading artifact \"%s\": %s", artifact.Path, err)

				// Track the error that was raised
				p.Lock()
				errors = append(errors, err)
				p.Unlock()
			} else {
				artifact.State = "finished"
			}

			// Update the state of the artifact on Buildkite, we
			// retry this as well.
			err = retry.Do(func(s *retry.Stats) error {
				_, _, err = a.APIClient.Artifacts.Update(a.JobID, artifact)
				if err != nil {
					logger.Warn("%s (%s)", err, s)
				}

				return err
			}, &retry.Config{Maximum: 10, Interval: 1 * time.Second})
			if err != nil {
				logger.Error("Error marking artifact %s as uploaded: %s", artifact.Path, err)

				// Track the error that was raised
				p.Lock()
				errors = append(errors, err)
				p.Unlock()
			}
		})
	}

	p.Wait()

	if len(errors) > 0 {
		logger.Fatal("There were errors with uploading some of the artifacts")
	}

	return nil
}