Example #1
0
func (w *amqpLogWriter) Write(p []byte) (int, error) {
	if w.closed() {
		return 0, fmt.Errorf("attempted write to closed log")
	}

	context.LoggerFromContext(w.ctx).WithFields(logrus.Fields{
		"length": len(p),
		"bytes":  string(p),
	}).Debug("writing bytes")

	w.timer.Reset(w.timeout)

	w.bytesWritten += len(p)
	if w.bytesWritten > w.maxLength {
		_, err := w.WriteAndClose([]byte(fmt.Sprintf("\n\nThe log length has exceeded the limit of %d MB (this usually means that the test suite is raising the same exception over and over).\n\nThe job has been terminated\n", w.maxLength/1000/1000)))
		if err != nil {
			context.LoggerFromContext(w.ctx).WithField("err", err).Error("couldn't write 'log length exceeded' error message to log")
		}
		return 0, ErrWrotePastMaxLogLength
	}

	w.bufferMutex.Lock()
	defer w.bufferMutex.Unlock()
	return w.buffer.Write(p)
}
func (s *stepGenerateScript) Run(state multistep.StateBag) multistep.StepAction {
	buildJob := state.Get("buildJob").(Job)
	ctx := state.Get("ctx").(gocontext.Context)

	b := backoff.NewExponentialBackOff()
	b.MaxInterval = 10 * time.Second
	b.MaxElapsedTime = time.Minute

	var script []byte
	err := backoff.Retry(func() (err error) {
		script, err = s.generator.Generate(ctx, buildJob.RawPayload())
		return
	}, b)

	if err != nil {
		context.LoggerFromContext(ctx).WithField("err", err).Error("couldn't generate build script, erroring job")
		err := buildJob.Error(ctx, "An error occurred while generating the build script.")
		if err != nil {
			context.LoggerFromContext(ctx).WithField("err", err).Error("couldn't requeue job")
		}

		return multistep.ActionHalt
	}

	context.LoggerFromContext(ctx).Info("generated script")

	state.Put("script", script)

	return multistep.ActionContinue
}
func (p *ProcessorPool) runProcessor(queue JobQueue) error {
	processorUUID := uuid.NewRandom()
	ctx := context.FromProcessor(p.Context, processorUUID.String())

	jobsChan, err := queue.Jobs(ctx)
	if err != nil {
		context.LoggerFromContext(p.Context).WithField("err", err).Error("couldn't create jobs channel")
		return err
	}

	proc, err := NewProcessor(ctx, p.Hostname, jobsChan, p.Provider, p.Generator, p.Canceller, p.HardTimeout, p.LogTimeout)
	if err != nil {
		context.LoggerFromContext(p.Context).WithField("err", err).Error("couldn't create processor")
		return err
	}

	proc.SkipShutdownOnLogTimeout = p.SkipShutdownOnLogTimeout

	p.processorsLock.Lock()
	p.processors = append(p.processors, proc)
	p.processorsLock.Unlock()

	proc.Run()
	return nil
}
Example #4
0
func (d *AMQPCanceller) processCommand(delivery amqp.Delivery) error {
	command := &cancelCommand{}
	err := json.Unmarshal(delivery.Body, command)
	if err != nil {
		context.LoggerFromContext(d.ctx).WithField("err", err).Error("unable to parse JSON")
		return err
	}

	if command.Type != "cancel_job" {
		context.LoggerFromContext(d.ctx).WithField("command", command.Type).Error("unknown worker command")
		return nil
	}

	d.cancelMutex.Lock()
	defer d.cancelMutex.Unlock()

	cancelChan, ok := d.cancelMap[command.JobID]
	if !ok {
		context.LoggerFromContext(d.ctx).WithField("command", command.Type).WithField("job", command.JobID).Info("no job with this ID found on this worker")
		return nil
	}

	if tryClose(cancelChan) {
		context.LoggerFromContext(d.ctx).WithField("command", command.Type).WithField("job", command.JobID).Info("cancelling job")
	} else {
		context.LoggerFromContext(d.ctx).WithField("command", command.Type).WithField("job", command.JobID).Warn("job already cancelled")
	}

	return nil
}
func (s *stepStartInstance) Run(state multistep.StateBag) multistep.StepAction {
	buildJob := state.Get("buildJob").(Job)
	ctx := state.Get("ctx").(gocontext.Context)

	context.LoggerFromContext(ctx).Info("starting instance")

	ctx, cancel := gocontext.WithTimeout(ctx, s.startTimeout)
	defer cancel()

	startTime := time.Now()

	instance, err := s.provider.Start(ctx, buildJob.StartAttributes())
	if err != nil {
		context.LoggerFromContext(ctx).WithField("err", err).Error("couldn't start instance")
		err := buildJob.Requeue()
		if err != nil {
			context.LoggerFromContext(ctx).WithField("err", err).Error("couldn't requeue job")
		}

		return multistep.ActionHalt
	}

	context.LoggerFromContext(ctx).WithField("boot_time", time.Now().Sub(startTime)).Info("started instance")

	state.Put("instance", instance)

	return multistep.ActionContinue
}
func (s *stepUploadScript) Run(state multistep.StateBag) multistep.StepAction {
	ctx := state.Get("ctx").(gocontext.Context)
	buildJob := state.Get("buildJob").(Job)

	instance := state.Get("instance").(backend.Instance)
	script := state.Get("script").([]byte)

	ctx, cancel := gocontext.WithTimeout(ctx, s.uploadTimeout)
	defer cancel()

	err := instance.UploadScript(ctx, script)
	if err != nil {
		errMetric := "worker.job.upload.error"
		if err == backend.ErrStaleVM {
			errMetric += ".stalevm"
		}
		metrics.Mark(errMetric)

		context.LoggerFromContext(ctx).WithField("err", err).Error("couldn't upload script, attemping requeue")

		err := buildJob.Requeue()
		if err != nil {
			context.LoggerFromContext(ctx).WithField("err", err).Error("couldn't requeue job")
		}

		return multistep.ActionHalt
	}

	context.LoggerFromContext(ctx).Info("uploaded script")

	return multistep.ActionContinue
}
func (f *FileJobQueue) pollInDirTick(ctx gocontext.Context) {
	logger := context.LoggerFromContext(ctx)
	entries, err := ioutil.ReadDir(f.createdDir)
	if err != nil {
		logger.WithField("err", err).Error("input directory read error")
		return
	}

	logger.WithFields(logrus.Fields{
		"entries":        entries,
		"file_job_queue": fmt.Sprintf("%p", f),
	}).Debug("entries")

	for _, entry := range entries {
		if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".json") {
			continue
		}

		buildJob := &fileJob{
			createdFile:     filepath.Join(f.createdDir, entry.Name()),
			payload:         &JobPayload{},
			startAttributes: &backend.StartAttributes{},
		}
		startAttrs := &jobPayloadStartAttrs{Config: &backend.StartAttributes{}}

		fb, err := ioutil.ReadFile(buildJob.createdFile)
		if err != nil {
			context.LoggerFromContext(ctx).WithField("err", err).Error("input file read error")
			continue
		}

		err = json.Unmarshal(fb, buildJob.payload)
		if err != nil {
			context.LoggerFromContext(ctx).WithField("err", err).Error("payload JSON parse error")
			continue
		}

		err = json.Unmarshal(fb, &startAttrs)
		if err != nil {
			context.LoggerFromContext(ctx).WithField("err", err).Error("start attributes JSON parse error")
			continue
		}

		buildJob.rawPayload, err = simplejson.NewJson(fb)
		if err != nil {
			context.LoggerFromContext(ctx).WithField("err", err).Error("raw payload JSON parse error")
			continue
		}

		buildJob.startAttributes = startAttrs.Config
		buildJob.receivedFile = filepath.Join(f.receivedDir, entry.Name())
		buildJob.startedFile = filepath.Join(f.startedDir, entry.Name())
		buildJob.finishedFile = filepath.Join(f.finishedDir, entry.Name())
		buildJob.logFile = filepath.Join(f.logDir, strings.Replace(entry.Name(), ".json", ".log", -1))
		buildJob.bytes = fb

		f.buildJobChan <- buildJob
	}
}
Example #8
0
// GracefulShutdown tells the processor to finish the job it is currently
// processing, but not pick up any new jobs. This method will return
// immediately, the processor is done when Run() returns.
func (p *Processor) GracefulShutdown() {
	defer func() {
		err := recover()
		if err != nil {
			context.LoggerFromContext(p.ctx).WithField("err", err).Error("recovered from panic")
		}
	}()
	context.LoggerFromContext(p.ctx).Info("processor initiating graceful shutdown")
	tryClose(p.graceful)
}
Example #9
0
func (s *stepRunScript) writeLogAndFinishWithState(ctx gocontext.Context, logWriter LogWriter, buildJob Job, state FinishState, logMessage string) {
	_, err := logWriter.WriteAndClose([]byte(logMessage))
	if err != nil {
		context.LoggerFromContext(ctx).WithField("err", err).Error("couldn't write final log message")
	}

	err = buildJob.Finish(state)
	if err != nil {
		context.LoggerFromContext(ctx).WithField("err", err).WithField("state", state).Error("couldn't update job state")
	}
}
Example #10
0
func (s *stepUpdateState) Cleanup(state multistep.StateBag) {
	buildJob := state.Get("buildJob").(Job)
	ctx := state.Get("ctx").(gocontext.Context)

	mresult, ok := state.GetOk("scriptResult")

	if ok {
		result := mresult.(*backend.RunResult)

		var err error

		switch result.ExitCode {
		case 0:
			err = buildJob.Finish(FinishStatePassed)
		case 1:
			err = buildJob.Finish(FinishStateFailed)
		default:
			err = buildJob.Finish(FinishStateErrored)
		}

		if err != nil {
			context.LoggerFromContext(ctx).WithField("err", err).Error("couldn't mark job as finished")
		}
	}
}
Example #11
0
func (i *gceInstance) Stop(ctx gocontext.Context) error {
	logger := context.LoggerFromContext(ctx)
	state := &multistep.BasicStateBag{}

	c := &gceInstanceStopContext{
		ctx:     ctx,
		errChan: make(chan error),
	}

	runner := &multistep.BasicRunner{
		Steps: []multistep.Step{
			&gceInstanceStopMultistepWrapper{c: c, f: i.stepDeleteInstance},
			&gceInstanceStopMultistepWrapper{c: c, f: i.stepWaitForInstanceDeleted},
		},
	}

	logger.WithField("instance", i.instance.Name).Info("deleting instance")
	go runner.Run(state)

	logger.Debug("selecting over error and done channels")
	select {
	case err := <-c.errChan:
		return err
	case <-ctx.Done():
		if ctx.Err() == gocontext.DeadlineExceeded {
			metrics.Mark("worker.vm.provider.gce.delete.timeout")
		}
		return ctx.Err()
	}
}
Example #12
0
func (p *gceProvider) apiRateLimit(ctx gocontext.Context) error {
	metrics.Gauge("travis.worker.vm.provider.gce.rate-limit.queue", int64(p.rateLimitQueueDepth))
	startWait := time.Now()
	defer metrics.TimeSince("travis.worker.vm.provider.gce.rate-limit", startWait)

	atomic.AddUint64(&p.rateLimitQueueDepth, 1)
	// This decrements the counter, see the docs for atomic.AddUint64
	defer atomic.AddUint64(&p.rateLimitQueueDepth, ^uint64(0))

	errCount := 0

	for {
		ok, err := p.rateLimiter.RateLimit("gce-api", p.rateLimitMaxCalls, p.rateLimitDuration)
		if err != nil {
			errCount++
			if errCount >= 5 {
				context.CaptureError(ctx, err)
				context.LoggerFromContext(ctx).WithField("err", err).Info("rate limiter errored 5 times")
				return err
			}
		} else {
			errCount = 0
		}
		if ok {
			return nil
		}

		// Sleep for up to 1 second
		time.Sleep(time.Millisecond * time.Duration(mathrand.Intn(1000)))
	}
}
Example #13
0
func (p *Processor) process(ctx gocontext.Context, buildJob Job) {
	state := new(multistep.BasicStateBag)
	state.Put("hostname", p.fullHostname())
	state.Put("buildJob", buildJob)
	state.Put("ctx", ctx)

	logTimeout := p.logTimeout
	if buildJob.Payload().Timeouts.LogSilence != 0 {
		logTimeout = time.Duration(buildJob.Payload().Timeouts.LogSilence) * time.Second
	}

	steps := []multistep.Step{
		&stepSubscribeCancellation{
			canceller: p.canceller,
		},
		&stepGenerateScript{
			generator: p.generator,
		},
		&stepSendReceived{},
		&stepStartInstance{
			provider:     p.provider,
			startTimeout: p.startupTimeout,
		},
		&stepUploadScript{
			uploadTimeout: p.scriptUploadTimeout,
		},
		&stepUpdateState{},
		&stepOpenLogWriter{
			logTimeout:   logTimeout,
			maxLogLength: 4500000,
		},
		&stepRunScript{
			logTimeout:               logTimeout,
			hardTimeout:              p.hardTimeout,
			skipShutdownOnLogTimeout: p.SkipShutdownOnLogTimeout,
		},
	}

	runner := &multistep.BasicRunner{Steps: steps}

	context.LoggerFromContext(ctx).Info("starting job")
	runner.Run(state)
	context.LoggerFromContext(ctx).Info("finished job")
	p.ProcessedCount++
}
func (s *stepSubscribeCancellation) Run(state multistep.StateBag) multistep.StepAction {
	ctx := state.Get("ctx").(gocontext.Context)
	buildJob := state.Get("buildJob").(Job)

	ch := make(chan struct{})
	state.Put("cancelChan", (<-chan struct{})(ch))
	err := s.canceller.Subscribe(buildJob.Payload().Job.ID, ch)
	if err != nil {
		context.LoggerFromContext(ctx).WithField("err", err).Error("couldn't subscribe to canceller")
		err := buildJob.Requeue()
		if err != nil {
			context.LoggerFromContext(ctx).WithField("err", err).Error("couldn't requeue job")
		}
		return multistep.ActionHalt
	}

	return multistep.ActionContinue
}
Example #15
0
func (w *amqpLogWriter) flush() {
	if w.buffer.Len() <= 0 {
		return
	}

	buf := make([]byte, LogChunkSize)

	for w.buffer.Len() > 0 {
		w.bufferMutex.Lock()
		n, err := w.buffer.Read(buf)
		w.bufferMutex.Unlock()
		if err != nil {
			// According to documentation, err should only be non-nil if
			// there's no data in the buffer. We've checked for this, so
			// this means that err should never be nil. Something is very
			// wrong if this happens, so let's abort!
			panic("non-empty buffer shouldn't return an error on Read")
		}

		part := amqpLogPart{
			JobID:   w.jobID,
			Content: string(buf[0:n]),
			Number:  w.logPartNumber,
		}
		w.logPartNumber++

		err = w.publishLogPart(part)
		if err != nil {
			switch err.(type) {
			case *amqp.Error:
				if w.reopenChannel() != nil {
					context.LoggerFromContext(w.ctx).WithField("err", err).Error("couldn't publish log part and couldn't reopen channel")
					// Close or something
					return
				}

				err = w.publishLogPart(part)
				context.LoggerFromContext(w.ctx).WithField("err", err).Error("couldn't publish log part, even after reopening channel")
			default:
				context.LoggerFromContext(w.ctx).WithField("err", err).Error("couldn't publish log part")
			}
		}
	}
}
Example #16
0
func (s *stepSendReceived) Run(state multistep.StateBag) multistep.StepAction {
	buildJob := state.Get("buildJob").(Job)
	ctx := state.Get("ctx").(gocontext.Context)

	err := buildJob.Received()
	if err != nil {
		context.LoggerFromContext(ctx).WithField("err", err).Error("couldn't send received event")
	}

	return multistep.ActionContinue
}
func (s *stepStartInstance) Cleanup(state multistep.StateBag) {
	ctx := state.Get("ctx").(gocontext.Context)
	instance, ok := state.Get("instance").(backend.Instance)
	if !ok {
		context.LoggerFromContext(ctx).Info("no instance to stop")
		return
	}

	skipShutdown, ok := state.Get("skipShutdown").(bool)
	if ok && skipShutdown {
		context.LoggerFromContext(ctx).WithFields(logrus.Fields{"instance": instance}).Error("skipping shutdown, VM will be left running")
		return
	}

	if err := instance.Stop(ctx); err != nil {
		context.LoggerFromContext(ctx).WithFields(logrus.Fields{"err": err, "instance": instance}).Warn("couldn't stop instance")
	} else {
		context.LoggerFromContext(ctx).Info("stopped instance")
	}
}
Example #18
0
func (b *blueBoxProvider) Start(ctx gocontext.Context, startAttributes *StartAttributes) (Instance, error) {
	password := generatePassword()
	params := goblueboxapi.BlockParams{
		Product:  b.cfg.Get("PRODUCT_ID"),
		Template: b.templateIDForLanguageGroup(startAttributes.Language, startAttributes.Group),
		Location: b.cfg.Get("LOCATION_ID"),
		Hostname: fmt.Sprintf("testing-bb-%s", uuid.NewRandom()),
		Username: "******",
		Password: password,
		IPv6Only: b.cfg.Get("IPV6_ONLY") == "true",
	}

	startBooting := time.Now()

	block, err := b.client.Blocks.Create(params)
	if err != nil {
		return nil, err
	}

	blockReady := make(chan *goblueboxapi.Block)
	go func(id string) {
		for {
			b, err := b.client.Blocks.Get(id)
			if err == nil && b.Status == "running" {
				blockReady <- b
				return
			}

			time.Sleep(5 * time.Second)
		}
	}(block.ID)

	select {
	case block := <-blockReady:
		metrics.TimeSince("worker.vm.provider.bluebox.boot", startBooting)
		return &blueBoxInstance{
			client:   b.client,
			block:    block,
			password: password,
		}, nil
	case <-ctx.Done():
		if block != nil {
			err := b.client.Blocks.Destroy(block.ID)
			if err != nil {
				context.LoggerFromContext(ctx).WithField("block", block).WithField("err", err).Error("could not destroy block")
			}
		}

		if ctx.Err() == gocontext.DeadlineExceeded {
			metrics.Mark("worker.vm.provider.bluebox.boot.timeout")
		}
		return nil, ctx.Err()
	}
}
Example #19
0
func (s *stepUpdateState) Run(state multistep.StateBag) multistep.StepAction {
	buildJob := state.Get("buildJob").(Job)
	ctx := state.Get("ctx").(gocontext.Context)

	err := buildJob.Started()
	if err != nil {
		context.LoggerFromContext(ctx).WithField("err", err).Error("couldn't mark job as started")
	}

	return multistep.ActionContinue
}
Example #20
0
// Run will make the AMQPCanceller listen to the worker command queue and
// start dispatching any incoming commands.
func (d *AMQPCanceller) Run() {
	amqpChan, err := d.conn.Channel()
	if err != nil {
		context.LoggerFromContext(d.ctx).WithField("err", err).Error("couldn't open channel")
		return
	}
	defer amqpChan.Close()

	err = amqpChan.Qos(1, 0, false)
	if err != nil {
		context.LoggerFromContext(d.ctx).WithField("err", err).Error("couldn't set prefetch")
		return
	}

	err = amqpChan.ExchangeDeclare("worker.commands", "fanout", false, false, false, false, nil)
	if err != nil {
		context.LoggerFromContext(d.ctx).WithField("err", err).Error("couldn't declare exchange")
		return
	}

	queue, err := amqpChan.QueueDeclare("", true, false, true, false, nil)
	if err != nil {
		context.LoggerFromContext(d.ctx).WithField("err", err).Error("couldn't declare queue")
		return
	}

	err = amqpChan.QueueBind(queue.Name, "", "worker.commands", false, nil)
	if err != nil {
		context.LoggerFromContext(d.ctx).WithField("err", err).Error("couldn't bind queue to exchange")
		return
	}

	deliveries, err := amqpChan.Consume(queue.Name, "commands", false, true, false, false, nil)
	if err != nil {
		context.LoggerFromContext(d.ctx).WithField("err", err).Error("couldn't consume queue")
		return
	}

	for delivery := range deliveries {
		err := d.processCommand(delivery)
		if err != nil {
			context.LoggerFromContext(d.ctx).WithField("err", err).WithField("delivery", delivery).Error("couldn't process delivery")
		}

		err = delivery.Ack(false)
		if err != nil {
			context.LoggerFromContext(d.ctx).WithField("err", err).WithField("delivery", delivery).Error("couldn't ack delivery")
		}
	}
}
Example #21
0
func (p *gceProvider) getImage(ctx gocontext.Context, startAttributes *StartAttributes) (*compute.Image, error) {
	logger := context.LoggerFromContext(ctx)

	switch p.imageSelectorType {
	case "env", "api":
		return p.imageSelect(ctx, startAttributes)
	default:
		logger.WithFields(logrus.Fields{
			"selector_type": p.imageSelectorType,
		}).Warn("unknown image selector, falling back to legacy image selection")
		return p.legacyImageSelect(ctx, startAttributes)
	}
}
func (s *stepOpenLogWriter) Run(state multistep.StateBag) multistep.StepAction {
	ctx := state.Get("ctx").(gocontext.Context)
	buildJob := state.Get("buildJob").(Job)

	logWriter, err := buildJob.LogWriter(ctx)
	if err != nil {
		context.LoggerFromContext(ctx).WithField("err", err).Error("couldn't open a log writer")
		err := buildJob.Requeue()
		if err != nil {
			context.LoggerFromContext(ctx).WithField("err", err).Error("couldn't requeue job")
		}
		return multistep.ActionHalt
	}

	logWriter.SetTimeout(s.logTimeout)
	logWriter.SetMaxLogLength(s.maxLogLength)

	s.writeUsingWorker(state, logWriter)

	state.Put("logWriter", logWriter)

	return multistep.ActionContinue
}
Example #23
0
func (i *gceInstance) stepWaitForInstanceDeleted(c *gceInstanceStopContext) multistep.StepAction {
	logger := context.LoggerFromContext(c.ctx)

	if i.ic.SkipStopPoll {
		logger.Debug("skipping instance deletion polling")
		c.errChan <- nil
		return multistep.ActionContinue
	}

	logger.WithFields(logrus.Fields{
		"duration": i.ic.StopPrePollSleep,
	}).Debug("sleeping before first checking instance delete operation")

	time.Sleep(i.ic.StopPrePollSleep)

	zoneOpCall := i.client.ZoneOperations.Get(i.projectID,
		i.ic.Zone.Name, c.instanceDeleteOp.Name)

	b := backoff.NewExponentialBackOff()
	b.InitialInterval = i.ic.StopPollSleep
	b.MaxInterval = 10 * i.ic.StopPollSleep
	b.MaxElapsedTime = 2 * time.Minute

	err := backoff.Retry(func() error {
		i.provider.apiRateLimit()
		newOp, err := zoneOpCall.Do()
		if err != nil {
			return err
		}

		if newOp.Status == "DONE" {
			if newOp.Error != nil {
				return &gceOpError{Err: newOp.Error}
			}

			return nil
		}

		return errGCEInstanceDeletionNotDone
	}, b)

	c.errChan <- err

	if err != nil {
		return multistep.ActionHalt
	}

	return multistep.ActionContinue
}
Example #24
0
func (p *gceProvider) Start(ctx gocontext.Context, startAttributes *StartAttributes) (Instance, error) {
	logger := context.LoggerFromContext(ctx)

	state := &multistep.BasicStateBag{}

	c := &gceStartContext{
		startAttributes: startAttributes,
		ctx:             ctx,
		instChan:        make(chan Instance),
		errChan:         make(chan error),
	}

	runner := &multistep.BasicRunner{
		Steps: []multistep.Step{
			&gceStartMultistepWrapper{c: c, f: p.stepGetImage},
			&gceStartMultistepWrapper{c: c, f: p.stepRenderScript},
			&gceStartMultistepWrapper{c: c, f: p.stepInsertInstance},
			&gceStartMultistepWrapper{c: c, f: p.stepWaitForInstanceIP},
		},
	}

	abandonedStart := false

	defer func(c *gceStartContext) {
		if c.instance != nil && abandonedStart {
			p.apiRateLimit()
			_, _ = p.client.Instances.Delete(p.projectID, p.ic.Zone.Name, c.instance.Name).Do()
		}
	}(c)

	logger.Info("starting instance")
	go runner.Run(state)

	logger.Debug("selecting over instance, error, and done channels")
	select {
	case inst := <-c.instChan:
		return inst, nil
	case err := <-c.errChan:
		abandonedStart = true
		return nil, err
	case <-ctx.Done():
		if ctx.Err() == gocontext.DeadlineExceeded {
			metrics.Mark("worker.vm.provider.gce.boot.timeout")
		}
		abandonedStart = true
		return nil, ctx.Err()
	}
}
Example #25
0
func (i *gceInstance) RunScript(ctx gocontext.Context, output io.Writer) (*RunResult, error) {
	client, err := i.sshClient(ctx)
	if err != nil {
		return &RunResult{Completed: false}, err
	}
	defer client.Close()

	session, err := client.NewSession()
	if err != nil {
		return &RunResult{Completed: false}, err
	}
	defer session.Close()

	err = session.RequestPty("xterm", 40, 80, ssh.TerminalModes{})
	if err != nil {
		return &RunResult{Completed: false}, err
	}

	session.Stdout = output
	session.Stderr = output

	err = session.Run("bash ~/build.sh")

	preempted, googleErr := i.isPreempted(ctx)
	if googleErr != nil {
		context.LoggerFromContext(ctx).WithField("err", googleErr).Error("couldn't determine if instance was preempted")
		// could not get answer from google
		// requeue just in case
		return &RunResult{Completed: false}, googleErr
	}
	if preempted {
		metrics.Mark("travis.worker.gce.preempted-instances")
		return &RunResult{Completed: false}, nil
	}

	if err == nil {
		return &RunResult{Completed: true, ExitCode: 0}, nil
	}

	switch err := err.(type) {
	case *ssh.ExitError:
		return &RunResult{Completed: true, ExitCode: uint8(err.ExitStatus())}, nil
	default:
		return &RunResult{Completed: false}, err
	}
}
// Run starts up a number of processors and connects them to the given queue.
// This method stalls until all processors have finished.
func (p *ProcessorPool) Run(poolSize int, queue JobQueue) error {
	p.queue = queue
	p.poolErrors = []error{}

	for i := 0; i < poolSize; i++ {
		p.Incr()
	}

	if len(p.poolErrors) > 0 {
		context.LoggerFromContext(p.Context).WithFields(logrus.Fields{
			"pool_errors": p.poolErrors,
		}).Panic("failed to populate pool")
	}

	p.processorsWG.Wait()

	return nil
}
Example #27
0
func (i *blueBoxInstance) sshClient(ctx gocontext.Context) (*ssh.Client, error) {
	if len(i.block.IPs) == 0 {
		return nil, errNoBlueBoxIP
	}

	client, err := ssh.Dial("tcp6", fmt.Sprintf("[%s]:22", i.block.IPs[0].Address), &ssh.ClientConfig{
		User: "******",
		Auth: []ssh.AuthMethod{
			ssh.Password(i.password),
		},
	})

	if err != nil {
		metrics.Mark("worker.vm.provider.bluebox.ssh.error")
		context.LoggerFromContext(ctx).WithField("block", i.block).WithField("vsh_id", i.block.VSHID).WithField("err", err).Error("error connecting to SSH")
	}

	return client, err
}
Example #28
0
func (p *gceProvider) legacyImageSelect(ctx gocontext.Context, startAttributes *StartAttributes) (*compute.Image, error) {
	logger := context.LoggerFromContext(ctx)

	var (
		image *compute.Image
		err   error
	)

	candidateLangs := []string{}

	mappedLang := fmt.Sprintf("LANGUAGE_MAP_%s", strings.ToUpper(startAttributes.Language))
	if p.cfg.IsSet(mappedLang) {
		logger.WithFields(logrus.Fields{
			"original": startAttributes.Language,
			"mapped":   p.cfg.Get(mappedLang),
		}).Debug("using mapped language to candidates")
		candidateLangs = append(candidateLangs, p.cfg.Get(mappedLang))
	} else {
		logger.WithFields(logrus.Fields{
			"original": startAttributes.Language,
		}).Debug("adding original language to candidates")
		candidateLangs = append(candidateLangs, startAttributes.Language)
	}
	candidateLangs = append(candidateLangs, p.defaultLanguage)

	for _, language := range candidateLangs {
		logger.WithFields(logrus.Fields{
			"original":  startAttributes.Language,
			"candidate": language,
		}).Debug("searching for image matching language")

		image, err = p.imageForLanguage(language)
		if err == nil {
			logger.WithFields(logrus.Fields{
				"candidate": language,
				"image":     image,
			}).Debug("found matching image for language")
			break
		}
	}

	return image, err
}
Example #29
0
// Run starts the processor. This method will not return until the processor is
// terminated, either by calling the GracefulShutdown or Terminate methods, or
// if the build jobs channel is closed.
func (p *Processor) Run() {
	context.LoggerFromContext(p.ctx).Info("starting processor")
	defer context.LoggerFromContext(p.ctx).Info("processor done")

	for {
		select {
		case <-p.ctx.Done():
			context.LoggerFromContext(p.ctx).Info("processor is done, terminating")
			return
		case <-p.graceful:
			context.LoggerFromContext(p.ctx).Info("processor is done, terminating")
			return
		default:
		}

		select {
		case <-p.ctx.Done():
			context.LoggerFromContext(p.ctx).Info("processor is done, terminating")
			p.CurrentStatus = "done"
			return
		case <-p.graceful:
			context.LoggerFromContext(p.ctx).Info("processor is done, terminating")
			p.CurrentStatus = "done"
			return
		case buildJob, ok := <-p.buildJobsChan:
			if !ok {
				p.CurrentStatus = "done"
				return
			}

			hardTimeout := p.hardTimeout
			if buildJob.Payload().Timeouts.HardLimit != 0 {
				hardTimeout = time.Duration(buildJob.Payload().Timeouts.HardLimit) * time.Second
			}
			buildJob.StartAttributes().HardTimeout = hardTimeout

			ctx := context.FromJobID(context.FromRepository(p.ctx, buildJob.Payload().Repository.Slug), buildJob.Payload().Job.ID)
			if buildJob.Payload().UUID != "" {
				ctx = context.FromUUID(ctx, buildJob.Payload().UUID)
			}
			ctx, cancel := gocontext.WithTimeout(ctx, hardTimeout)
			p.LastJobID = buildJob.Payload().Job.ID
			p.CurrentStatus = "processing"
			p.process(ctx, buildJob)
			p.CurrentStatus = "waiting"
			cancel()
		}
	}
}
Example #30
0
func (p *gceProvider) stepInsertInstance(c *gceStartContext) multistep.StepAction {
	inst := p.buildInstance(c.startAttributes, c.image.SelfLink, c.script)

	context.LoggerFromContext(c.ctx).WithFields(logrus.Fields{
		"instance": inst,
	}).Debug("inserting instance")

	c.bootStart = time.Now().UTC()

	p.apiRateLimit()
	op, err := p.client.Instances.Insert(p.projectID, p.ic.Zone.Name, inst).Do()
	if err != nil {
		c.errChan <- err
		return multistep.ActionHalt
	}

	c.instance = inst
	c.instanceInsertOp = op
	return multistep.ActionContinue
}