Beispiel #1
0
func (p *gceProvider) apiRateLimit(ctx gocontext.Context) error {
	metrics.Gauge("travis.worker.vm.provider.gce.rate-limit.queue", int64(p.rateLimitQueueDepth))
	startWait := time.Now()
	defer metrics.TimeSince("travis.worker.vm.provider.gce.rate-limit", startWait)

	atomic.AddUint64(&p.rateLimitQueueDepth, 1)
	// This decrements the counter, see the docs for atomic.AddUint64
	defer atomic.AddUint64(&p.rateLimitQueueDepth, ^uint64(0))

	errCount := 0

	for {
		ok, err := p.rateLimiter.RateLimit("gce-api", p.rateLimitMaxCalls, p.rateLimitDuration)
		if err != nil {
			errCount++
			if errCount >= 5 {
				context.CaptureError(ctx, err)
				context.LoggerFromContext(ctx).WithField("err", err).Info("rate limiter errored 5 times")
				return err
			}
		} else {
			errCount = 0
		}
		if ok {
			return nil
		}

		// Sleep for up to 1 second
		time.Sleep(time.Millisecond * time.Duration(mathrand.Intn(1000)))
	}
}
Beispiel #2
0
func (p *gceProvider) apiRateLimit() {
	atomic.AddUint64(&p.rateLimitQueueDepth, 1)
	metrics.Gauge("travis.worker.vm.provider.gce.rate-limit.queue", int64(p.rateLimitQueueDepth))
	startWait := time.Now()
	<-p.rateLimiter.C
	metrics.TimeSince("travis.worker.vm.provider.gce.rate-limit", startWait)
	// This decrements the counter, see the docs for atomic.AddUint64
	atomic.AddUint64(&p.rateLimitQueueDepth, ^uint64(0))
}
func (b *blueBoxProvider) Start(ctx gocontext.Context, startAttributes *StartAttributes) (Instance, error) {
	password := generatePassword()
	params := goblueboxapi.BlockParams{
		Product:  b.cfg.Get("PRODUCT_ID"),
		Template: b.templateIDForLanguageGroup(startAttributes.Language, startAttributes.Group),
		Location: b.cfg.Get("LOCATION_ID"),
		Hostname: fmt.Sprintf("testing-bb-%s", uuid.NewRandom()),
		Username: "******",
		Password: password,
		IPv6Only: b.cfg.Get("IPV6_ONLY") == "true",
	}

	startBooting := time.Now()

	block, err := b.client.Blocks.Create(params)
	if err != nil {
		return nil, err
	}

	blockReady := make(chan *goblueboxapi.Block)
	go func(id string) {
		for {
			b, err := b.client.Blocks.Get(id)
			if err == nil && b.Status == "running" {
				blockReady <- b
				return
			}

			time.Sleep(5 * time.Second)
		}
	}(block.ID)

	select {
	case block := <-blockReady:
		metrics.TimeSince("worker.vm.provider.bluebox.boot", startBooting)
		return &blueBoxInstance{
			client:   b.client,
			block:    block,
			password: password,
		}, nil
	case <-ctx.Done():
		if block != nil {
			err := b.client.Blocks.Destroy(block.ID)
			if err != nil {
				context.LoggerFromContext(ctx).WithField("block", block).WithField("err", err).Error("could not destroy block")
			}
		}

		if ctx.Err() == gocontext.DeadlineExceeded {
			metrics.Mark("worker.vm.provider.bluebox.boot.timeout")
		}
		return nil, ctx.Err()
	}
}
Beispiel #4
0
func (j *amqpJob) Started() error {
	j.started = time.Now()

	metrics.TimeSince("travis.worker.job.start_time", j.received)

	return j.sendStateUpdate("job:test:start", map[string]interface{}{
		"id":          j.Payload().Job.ID,
		"state":       "started",
		"received_at": j.received.UTC().Format(time.RFC3339),
		"started_at":  j.started.UTC().Format(time.RFC3339),
	})
}
func (p *jupiterBrainProvider) Start(ctx context.Context, startAttributes *StartAttributes) (Instance, error) {
	u, err := p.baseURL.Parse("instances")
	if err != nil {
		return nil, err
	}

	imageName := p.getImageName(startAttributes)

	if imageName == "" {
		return nil, fmt.Errorf("no image alias for %#v", startAttributes)
	}

	workerctx.LoggerFromContext(ctx).WithFields(logrus.Fields{
		"image_name": imageName,
		"osx_image":  startAttributes.OsxImage,
		"language":   startAttributes.Language,
		"dist":       startAttributes.Dist,
		"group":      startAttributes.Group,
		"os":         startAttributes.OS,
	}).Info("selected image name")

	startBooting := time.Now()

	bodyPayload := map[string]map[string]string{
		"data": {
			"type":       "instances",
			"base-image": imageName,
		},
	}

	jsonBody, err := json.Marshal(bodyPayload)
	if err != nil {
		return nil, err
	}

	req, err := http.NewRequest("POST", u.String(), bytes.NewReader(jsonBody))
	if err != nil {
		return nil, err
	}
	req.Header.Set("Content-Type", "application/vnd.api+json")

	resp, err := p.httpDo(req)
	if err != nil {
		return nil, err
	}
	defer io.Copy(ioutil.Discard, resp.Body)
	defer resp.Body.Close()

	if c := resp.StatusCode; c < 200 || c >= 300 {
		body, _ := ioutil.ReadAll(resp.Body)
		return nil, fmt.Errorf("expected 2xx from Jupiter Brain API, got %d (error: %s)", c, body)
	}

	dataPayload := &jupiterBrainDataResponse{}
	err = json.NewDecoder(resp.Body).Decode(dataPayload)
	if err != nil {
		workerctx.LoggerFromContext(ctx).WithFields(logrus.Fields{
			"err":     err,
			"payload": dataPayload,
			"body":    resp.Body,
		}).Error("couldn't decode created payload")
		return nil, fmt.Errorf("couldn't decode created payload: %s", err)
	}

	payload := dataPayload.Data[0]

	instanceReady := make(chan *jupiterBrainInstancePayload, 1)
	errChan := make(chan error, 1)
	go func(id string) {
		u, err := p.baseURL.Parse(fmt.Sprintf("instances/%s", url.QueryEscape(id)))
		if err != nil {
			errChan <- err
			return
		}

		req, err := http.NewRequest("GET", u.String(), nil)
		if err != nil {
			errChan <- err
			return
		}

		for {
			resp, err := p.httpDo(req)
			if err != nil {
				errChan <- err
				return
			}

			if resp.StatusCode != 200 {
				body, _ := ioutil.ReadAll(resp.Body)
				errChan <- fmt.Errorf("unknown status code: %d, expected 200 (body: %q)", resp.StatusCode, string(body))
				return
			}

			dataPayload := &jupiterBrainDataResponse{}
			err = json.NewDecoder(resp.Body).Decode(dataPayload)
			if err != nil {
				errChan <- fmt.Errorf("couldn't decode refresh payload: %s", err)
				return
			}
			payload := dataPayload.Data[0]

			_, _ = io.Copy(ioutil.Discard, resp.Body)
			_ = resp.Body.Close()

			var ip net.IP
			for _, ipString := range payload.IPAddresses {
				curIP := net.ParseIP(ipString)
				if curIP.To4() != nil {
					ip = curIP
					break
				}

			}

			if ip == nil {
				time.Sleep(p.bootPollSleep)
				continue
			}

			conn, err := net.Dial("tcp", fmt.Sprintf("%s:22", ip.String()))
			if conn != nil {
				conn.Close()
			}

			if err == nil {
				instanceReady <- payload
				return
			}

			time.Sleep(p.bootPollSleep)
		}
	}(payload.ID)

	select {
	case payload := <-instanceReady:
		metrics.TimeSince("worker.vm.provider.jupiterbrain.boot", startBooting)
		normalizedImageName := string(metricNameCleanRegexp.ReplaceAll([]byte(imageName), []byte("-")))
		metrics.TimeSince(fmt.Sprintf("worker.vm.provider.jupiterbrain.boot.image.%s", normalizedImageName), startBooting)
		workerctx.LoggerFromContext(ctx).WithField("instance_uuid", payload.ID).Info("booted instance")
		return &jupiterBrainInstance{
			payload:  payload,
			provider: p,
		}, nil
	case err := <-errChan:
		instance := &jupiterBrainInstance{
			payload:  payload,
			provider: p,
		}
		instance.Stop(ctx)

		return nil, err
	case <-ctx.Done():
		if ctx.Err() == context.DeadlineExceeded {
			metrics.Mark("worker.vm.provider.jupiterbrain.boot.timeout")
		}

		instance := &jupiterBrainInstance{
			payload:  payload,
			provider: p,
		}
		instance.Stop(ctx)

		return nil, ctx.Err()
	}
}
Beispiel #6
0
func (p *gceProvider) Start(ctx gocontext.Context, startAttributes *StartAttributes) (Instance, error) {
	logger := context.LoggerFromContext(ctx)

	image, err := p.getImage(ctx, startAttributes)
	if err != nil {
		return nil, err
	}

	scriptBuf := bytes.Buffer{}
	err = gceStartupScript.Execute(&scriptBuf, p.ic)
	if err != nil {
		return nil, err
	}

	inst := p.buildInstance(startAttributes, image.SelfLink, scriptBuf.String())

	logger.WithFields(logrus.Fields{
		"instance": inst,
	}).Debug("inserting instance")
	op, err := p.client.Instances.Insert(p.projectID, p.ic.Zone.Name, inst).Do()
	if err != nil {
		return nil, err
	}

	abandonedStart := false

	defer func() {
		if abandonedStart {
			_, _ = p.client.Instances.Delete(p.projectID, p.ic.Zone.Name, inst.Name).Do()
		}
	}()

	startBooting := time.Now()

	var instChan chan *compute.Instance

	instanceReady := make(chan *compute.Instance)
	instChan = instanceReady

	errChan := make(chan error)
	go func() {
		for {
			newOp, err := p.client.ZoneOperations.Get(p.projectID, p.ic.Zone.Name, op.Name).Do()
			if err != nil {
				errChan <- err
				return
			}

			if newOp.Status == "DONE" {
				if newOp.Error != nil {
					errChan <- &gceOpError{Err: newOp.Error}
					return
				}

				logger.WithFields(logrus.Fields{
					"status": newOp.Status,
					"name":   op.Name,
				}).Debug("instance is ready")

				instanceReady <- inst
				return
			}

			if newOp.Error != nil {
				logger.WithFields(logrus.Fields{
					"err":  newOp.Error,
					"name": op.Name,
				}).Error("encountered an error while waiting for instance insert operation")

				errChan <- &gceOpError{Err: newOp.Error}
				return
			}

			logger.WithFields(logrus.Fields{
				"status": newOp.Status,
				"name":   op.Name,
			}).Debug("sleeping before checking instance insert operation")

			time.Sleep(p.bootPollSleep)
		}
	}()

	if p.instanceGroup != "" {
		logger.WithFields(logrus.Fields{
			"instance":       inst,
			"instance_group": p.instanceGroup,
		}).Debug("instance group is non-empty, adding instance to group")

		origInstanceReady := instanceReady
		instChan = make(chan *compute.Instance)

		err = func() error {
			for {
				select {
				case readyInst := <-origInstanceReady:
					inst = readyInst
					logger.WithFields(logrus.Fields{
						"instance":       inst,
						"instance_group": p.instanceGroup,
					}).Debug("inserting instance into group")
					return nil
				case <-ctx.Done():
					if ctx.Err() == gocontext.DeadlineExceeded {
						metrics.Mark("worker.vm.provider.gce.boot.timeout")
					}
					abandonedStart = true

					return ctx.Err()
				default:
					logger.Debug("sleeping while waiting for instance to be ready")
					time.Sleep(p.bootPollSleep)
				}
			}
		}()

		if err != nil {
			return nil, err
		}

		inst, err = p.client.Instances.Get(p.projectID, p.ic.Zone.Name, inst.Name).Do()
		if err != nil {
			return nil, err
		}

		ref := &compute.InstanceReference{
			Instance: inst.SelfLink,
		}

		logger.WithFields(logrus.Fields{
			"ref":                ref,
			"instance_self_link": inst.SelfLink,
		}).Debug("inserting instance into group with ref")

		op, err := p.client.InstanceGroups.AddInstances(p.projectID, p.ic.Zone.Name, p.instanceGroup, &compute.InstanceGroupsAddInstancesRequest{
			Instances: []*compute.InstanceReference{ref},
		}).Do()

		if err != nil {
			abandonedStart = true
			return nil, err
		}

		logger.WithFields(logrus.Fields{
			"instance":       inst,
			"instance_group": p.instanceGroup,
		}).Debug("starting goroutine to poll for instance group addition")

		go func() {
			for {
				newOp, err := p.client.ZoneOperations.Get(p.projectID, p.ic.Zone.Name, op.Name).Do()
				if err != nil {
					errChan <- err
					return
				}

				if newOp.Status == "DONE" {
					if newOp.Error != nil {
						errChan <- &gceOpError{Err: newOp.Error}
						return
					}

					instChan <- inst
					return
				}

				if newOp.Error != nil {
					logger.WithFields(logrus.Fields{
						"err":  newOp.Error,
						"name": op.Name,
					}).Error("encountered an error while waiting for instance group addition operation")

					errChan <- &gceOpError{Err: newOp.Error}
					return
				}

				logger.WithFields(logrus.Fields{
					"status": newOp.Status,
					"name":   op.Name,
				}).Debug("sleeping before checking instance group addition operation")

				time.Sleep(p.bootPollSleep)
			}
		}()
	}

	logger.Debug("selecting over instance, error, and done channels")
	select {
	case inst := <-instChan:
		metrics.TimeSince("worker.vm.provider.gce.boot", startBooting)
		return &gceInstance{
			client:   p.client,
			provider: p,
			instance: inst,
			ic:       p.ic,

			authUser: "******",

			projectID: p.projectID,
			imageName: image.Name,
		}, nil
	case err := <-errChan:
		abandonedStart = true
		return nil, err
	case <-ctx.Done():
		if ctx.Err() == gocontext.DeadlineExceeded {
			metrics.Mark("worker.vm.provider.gce.boot.timeout")
		}
		abandonedStart = true
		return nil, ctx.Err()
	}
}
func (g *webBuildScriptGenerator) Generate(ctx gocontext.Context, payload *simplejson.Json) ([]byte, error) {
	if g.aptCacheHost != "" {
		payload.SetPath([]string{"hosts", "apt_cache"}, g.aptCacheHost)
	}
	if g.npmCacheHost != "" {
		payload.SetPath([]string{"hosts", "npm_cache"}, g.npmCacheHost)
	}

	payload.Set("paranoid", g.paranoid)
	payload.Set("fix_resolv_conf", g.fixResolvConf)
	payload.Set("fix_etc_hosts", g.fixEtcHosts)

	if g.cacheType != "" {
		payload.SetPath([]string{"cache_options", "type"}, g.cacheType)
		payload.SetPath([]string{"cache_options", "fetch_timeout"}, g.cacheFetchTimeout)
		payload.SetPath([]string{"cache_options", "push_timeout"}, g.cachePushTimeout)
		payload.SetPath([]string{"cache_options", "s3", "scheme"}, g.s3CacheOptions.scheme)
		payload.SetPath([]string{"cache_options", "s3", "region"}, g.s3CacheOptions.region)
		payload.SetPath([]string{"cache_options", "s3", "bucket"}, g.s3CacheOptions.bucket)
		payload.SetPath([]string{"cache_options", "s3", "access_key_id"}, g.s3CacheOptions.accessKeyID)
		payload.SetPath([]string{"cache_options", "s3", "secret_access_key"}, g.s3CacheOptions.secretAccessKey)
	}

	b, err := payload.Encode()
	if err != nil {
		return nil, err
	}

	var token string
	u, err := url.Parse(g.URL)
	if err != nil {
		return nil, err
	}
	if u.User != nil {
		token = u.User.Username()
		u.User = nil
	}

	buf := bytes.NewBuffer(b)
	req, err := http.NewRequest("POST", u.String(), buf)
	if err != nil {
		return nil, err
	}
	if token != "" {
		req.Header.Set("Authorization", "token "+token)
	}
	req.Header.Set("User-Agent", fmt.Sprintf("worker-go v=%v rev=%v d=%v", VersionString, RevisionString, GeneratedString))
	req.Header.Set("Content-Type", "application/json")

	startRequest := time.Now()

	resp, err := g.httpClient.Do(req)
	if err != nil {
		return nil, err
	}
	defer resp.Body.Close()
	metrics.TimeSince("worker.job.script.api", startRequest)

	body, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		return nil, err
	}

	if resp.StatusCode >= 500 {
		return nil, BuildScriptGeneratorError{error: fmt.Errorf("server error: %q", string(body)), Recover: true}
	} else if resp.StatusCode >= 400 {
		return nil, BuildScriptGeneratorError{error: fmt.Errorf("client error: %q", string(body)), Recover: false}
	}

	return body, nil
}
Beispiel #8
0
func (p *dockerProvider) Start(ctx gocontext.Context, startAttributes *StartAttributes) (Instance, error) {
	logger := context.LoggerFromContext(ctx)

	cpuSets, err := p.checkoutCPUSets()
	if err != nil && cpuSets != "" {
		return nil, err
	}

	imageID, imageName, err := p.imageForLanguage(startAttributes.Language)
	if err != nil {
		return nil, err
	}

	dockerConfig := &docker.Config{
		Cmd:      p.runCmd,
		Image:    imageID,
		Memory:   int64(p.runMemory),
		Hostname: fmt.Sprintf("testing-docker-%s", uuid.NewRandom()),
	}

	dockerHostConfig := &docker.HostConfig{
		Privileged: p.runPrivileged,
		Memory:     int64(p.runMemory),
	}

	if cpuSets != "" {
		dockerConfig.CPUSet = cpuSets
		dockerHostConfig.CPUSet = cpuSets
	}

	logger.WithFields(logrus.Fields{
		"config":      fmt.Sprintf("%#v", dockerConfig),
		"host_config": fmt.Sprintf("%#v", dockerHostConfig),
	}).Debug("starting container")

	container, err := p.client.CreateContainer(docker.CreateContainerOptions{
		Config:     dockerConfig,
		HostConfig: dockerHostConfig,
	})

	if err != nil {
		if container != nil {
			err := p.client.RemoveContainer(docker.RemoveContainerOptions{
				ID:            container.ID,
				RemoveVolumes: true,
				Force:         true,
			})
			if err != nil {
				logger.WithField("err", err).Error("couldn't remove container after create failure")
			}
		}

		return nil, err
	}

	startBooting := time.Now()

	err = p.client.StartContainer(container.ID, dockerHostConfig)
	if err != nil {
		return nil, err
	}

	containerReady := make(chan *docker.Container)
	errChan := make(chan error)
	go func(id string) {
		for {
			container, err := p.client.InspectContainer(id)
			if err != nil {
				errChan <- err
				return
			}

			if container.State.Running {
				containerReady <- container
				return
			}
		}
	}(container.ID)

	select {
	case container := <-containerReady:
		metrics.TimeSince("worker.vm.provider.docker.boot", startBooting)
		return &dockerInstance{
			client:    p.client,
			provider:  p,
			container: container,
			imageName: imageName,
		}, nil
	case err := <-errChan:
		return nil, err
	case <-ctx.Done():
		if ctx.Err() == gocontext.DeadlineExceeded {
			metrics.Mark("worker.vm.provider.docker.boot.timeout")
		}
		return nil, ctx.Err()
	}
}