func (p *gceProvider) apiRateLimit(ctx gocontext.Context) error { metrics.Gauge("travis.worker.vm.provider.gce.rate-limit.queue", int64(p.rateLimitQueueDepth)) startWait := time.Now() defer metrics.TimeSince("travis.worker.vm.provider.gce.rate-limit", startWait) atomic.AddUint64(&p.rateLimitQueueDepth, 1) // This decrements the counter, see the docs for atomic.AddUint64 defer atomic.AddUint64(&p.rateLimitQueueDepth, ^uint64(0)) errCount := 0 for { ok, err := p.rateLimiter.RateLimit("gce-api", p.rateLimitMaxCalls, p.rateLimitDuration) if err != nil { errCount++ if errCount >= 5 { context.CaptureError(ctx, err) context.LoggerFromContext(ctx).WithField("err", err).Info("rate limiter errored 5 times") return err } } else { errCount = 0 } if ok { return nil } // Sleep for up to 1 second time.Sleep(time.Millisecond * time.Duration(mathrand.Intn(1000))) } }
func (p *gceProvider) apiRateLimit() { atomic.AddUint64(&p.rateLimitQueueDepth, 1) metrics.Gauge("travis.worker.vm.provider.gce.rate-limit.queue", int64(p.rateLimitQueueDepth)) startWait := time.Now() <-p.rateLimiter.C metrics.TimeSince("travis.worker.vm.provider.gce.rate-limit", startWait) // This decrements the counter, see the docs for atomic.AddUint64 atomic.AddUint64(&p.rateLimitQueueDepth, ^uint64(0)) }
func (b *blueBoxProvider) Start(ctx gocontext.Context, startAttributes *StartAttributes) (Instance, error) { password := generatePassword() params := goblueboxapi.BlockParams{ Product: b.cfg.Get("PRODUCT_ID"), Template: b.templateIDForLanguageGroup(startAttributes.Language, startAttributes.Group), Location: b.cfg.Get("LOCATION_ID"), Hostname: fmt.Sprintf("testing-bb-%s", uuid.NewRandom()), Username: "******", Password: password, IPv6Only: b.cfg.Get("IPV6_ONLY") == "true", } startBooting := time.Now() block, err := b.client.Blocks.Create(params) if err != nil { return nil, err } blockReady := make(chan *goblueboxapi.Block) go func(id string) { for { b, err := b.client.Blocks.Get(id) if err == nil && b.Status == "running" { blockReady <- b return } time.Sleep(5 * time.Second) } }(block.ID) select { case block := <-blockReady: metrics.TimeSince("worker.vm.provider.bluebox.boot", startBooting) return &blueBoxInstance{ client: b.client, block: block, password: password, }, nil case <-ctx.Done(): if block != nil { err := b.client.Blocks.Destroy(block.ID) if err != nil { context.LoggerFromContext(ctx).WithField("block", block).WithField("err", err).Error("could not destroy block") } } if ctx.Err() == gocontext.DeadlineExceeded { metrics.Mark("worker.vm.provider.bluebox.boot.timeout") } return nil, ctx.Err() } }
func (j *amqpJob) Started() error { j.started = time.Now() metrics.TimeSince("travis.worker.job.start_time", j.received) return j.sendStateUpdate("job:test:start", map[string]interface{}{ "id": j.Payload().Job.ID, "state": "started", "received_at": j.received.UTC().Format(time.RFC3339), "started_at": j.started.UTC().Format(time.RFC3339), }) }
func (p *jupiterBrainProvider) Start(ctx context.Context, startAttributes *StartAttributes) (Instance, error) { u, err := p.baseURL.Parse("instances") if err != nil { return nil, err } imageName := p.getImageName(startAttributes) if imageName == "" { return nil, fmt.Errorf("no image alias for %#v", startAttributes) } workerctx.LoggerFromContext(ctx).WithFields(logrus.Fields{ "image_name": imageName, "osx_image": startAttributes.OsxImage, "language": startAttributes.Language, "dist": startAttributes.Dist, "group": startAttributes.Group, "os": startAttributes.OS, }).Info("selected image name") startBooting := time.Now() bodyPayload := map[string]map[string]string{ "data": { "type": "instances", "base-image": imageName, }, } jsonBody, err := json.Marshal(bodyPayload) if err != nil { return nil, err } req, err := http.NewRequest("POST", u.String(), bytes.NewReader(jsonBody)) if err != nil { return nil, err } req.Header.Set("Content-Type", "application/vnd.api+json") resp, err := p.httpDo(req) if err != nil { return nil, err } defer io.Copy(ioutil.Discard, resp.Body) defer resp.Body.Close() if c := resp.StatusCode; c < 200 || c >= 300 { body, _ := ioutil.ReadAll(resp.Body) return nil, fmt.Errorf("expected 2xx from Jupiter Brain API, got %d (error: %s)", c, body) } dataPayload := &jupiterBrainDataResponse{} err = json.NewDecoder(resp.Body).Decode(dataPayload) if err != nil { workerctx.LoggerFromContext(ctx).WithFields(logrus.Fields{ "err": err, "payload": dataPayload, "body": resp.Body, }).Error("couldn't decode created payload") return nil, fmt.Errorf("couldn't decode created payload: %s", err) } payload := dataPayload.Data[0] instanceReady := make(chan *jupiterBrainInstancePayload, 1) errChan := make(chan error, 1) go func(id string) { u, err := p.baseURL.Parse(fmt.Sprintf("instances/%s", url.QueryEscape(id))) if err != nil { errChan <- err return } req, err := http.NewRequest("GET", u.String(), nil) if err != nil { errChan <- err return } for { resp, err := p.httpDo(req) if err != nil { errChan <- err return } if resp.StatusCode != 200 { body, _ := ioutil.ReadAll(resp.Body) errChan <- fmt.Errorf("unknown status code: %d, expected 200 (body: %q)", resp.StatusCode, string(body)) return } dataPayload := &jupiterBrainDataResponse{} err = json.NewDecoder(resp.Body).Decode(dataPayload) if err != nil { errChan <- fmt.Errorf("couldn't decode refresh payload: %s", err) return } payload := dataPayload.Data[0] _, _ = io.Copy(ioutil.Discard, resp.Body) _ = resp.Body.Close() var ip net.IP for _, ipString := range payload.IPAddresses { curIP := net.ParseIP(ipString) if curIP.To4() != nil { ip = curIP break } } if ip == nil { time.Sleep(p.bootPollSleep) continue } conn, err := net.Dial("tcp", fmt.Sprintf("%s:22", ip.String())) if conn != nil { conn.Close() } if err == nil { instanceReady <- payload return } time.Sleep(p.bootPollSleep) } }(payload.ID) select { case payload := <-instanceReady: metrics.TimeSince("worker.vm.provider.jupiterbrain.boot", startBooting) normalizedImageName := string(metricNameCleanRegexp.ReplaceAll([]byte(imageName), []byte("-"))) metrics.TimeSince(fmt.Sprintf("worker.vm.provider.jupiterbrain.boot.image.%s", normalizedImageName), startBooting) workerctx.LoggerFromContext(ctx).WithField("instance_uuid", payload.ID).Info("booted instance") return &jupiterBrainInstance{ payload: payload, provider: p, }, nil case err := <-errChan: instance := &jupiterBrainInstance{ payload: payload, provider: p, } instance.Stop(ctx) return nil, err case <-ctx.Done(): if ctx.Err() == context.DeadlineExceeded { metrics.Mark("worker.vm.provider.jupiterbrain.boot.timeout") } instance := &jupiterBrainInstance{ payload: payload, provider: p, } instance.Stop(ctx) return nil, ctx.Err() } }
func (p *gceProvider) Start(ctx gocontext.Context, startAttributes *StartAttributes) (Instance, error) { logger := context.LoggerFromContext(ctx) image, err := p.getImage(ctx, startAttributes) if err != nil { return nil, err } scriptBuf := bytes.Buffer{} err = gceStartupScript.Execute(&scriptBuf, p.ic) if err != nil { return nil, err } inst := p.buildInstance(startAttributes, image.SelfLink, scriptBuf.String()) logger.WithFields(logrus.Fields{ "instance": inst, }).Debug("inserting instance") op, err := p.client.Instances.Insert(p.projectID, p.ic.Zone.Name, inst).Do() if err != nil { return nil, err } abandonedStart := false defer func() { if abandonedStart { _, _ = p.client.Instances.Delete(p.projectID, p.ic.Zone.Name, inst.Name).Do() } }() startBooting := time.Now() var instChan chan *compute.Instance instanceReady := make(chan *compute.Instance) instChan = instanceReady errChan := make(chan error) go func() { for { newOp, err := p.client.ZoneOperations.Get(p.projectID, p.ic.Zone.Name, op.Name).Do() if err != nil { errChan <- err return } if newOp.Status == "DONE" { if newOp.Error != nil { errChan <- &gceOpError{Err: newOp.Error} return } logger.WithFields(logrus.Fields{ "status": newOp.Status, "name": op.Name, }).Debug("instance is ready") instanceReady <- inst return } if newOp.Error != nil { logger.WithFields(logrus.Fields{ "err": newOp.Error, "name": op.Name, }).Error("encountered an error while waiting for instance insert operation") errChan <- &gceOpError{Err: newOp.Error} return } logger.WithFields(logrus.Fields{ "status": newOp.Status, "name": op.Name, }).Debug("sleeping before checking instance insert operation") time.Sleep(p.bootPollSleep) } }() if p.instanceGroup != "" { logger.WithFields(logrus.Fields{ "instance": inst, "instance_group": p.instanceGroup, }).Debug("instance group is non-empty, adding instance to group") origInstanceReady := instanceReady instChan = make(chan *compute.Instance) err = func() error { for { select { case readyInst := <-origInstanceReady: inst = readyInst logger.WithFields(logrus.Fields{ "instance": inst, "instance_group": p.instanceGroup, }).Debug("inserting instance into group") return nil case <-ctx.Done(): if ctx.Err() == gocontext.DeadlineExceeded { metrics.Mark("worker.vm.provider.gce.boot.timeout") } abandonedStart = true return ctx.Err() default: logger.Debug("sleeping while waiting for instance to be ready") time.Sleep(p.bootPollSleep) } } }() if err != nil { return nil, err } inst, err = p.client.Instances.Get(p.projectID, p.ic.Zone.Name, inst.Name).Do() if err != nil { return nil, err } ref := &compute.InstanceReference{ Instance: inst.SelfLink, } logger.WithFields(logrus.Fields{ "ref": ref, "instance_self_link": inst.SelfLink, }).Debug("inserting instance into group with ref") op, err := p.client.InstanceGroups.AddInstances(p.projectID, p.ic.Zone.Name, p.instanceGroup, &compute.InstanceGroupsAddInstancesRequest{ Instances: []*compute.InstanceReference{ref}, }).Do() if err != nil { abandonedStart = true return nil, err } logger.WithFields(logrus.Fields{ "instance": inst, "instance_group": p.instanceGroup, }).Debug("starting goroutine to poll for instance group addition") go func() { for { newOp, err := p.client.ZoneOperations.Get(p.projectID, p.ic.Zone.Name, op.Name).Do() if err != nil { errChan <- err return } if newOp.Status == "DONE" { if newOp.Error != nil { errChan <- &gceOpError{Err: newOp.Error} return } instChan <- inst return } if newOp.Error != nil { logger.WithFields(logrus.Fields{ "err": newOp.Error, "name": op.Name, }).Error("encountered an error while waiting for instance group addition operation") errChan <- &gceOpError{Err: newOp.Error} return } logger.WithFields(logrus.Fields{ "status": newOp.Status, "name": op.Name, }).Debug("sleeping before checking instance group addition operation") time.Sleep(p.bootPollSleep) } }() } logger.Debug("selecting over instance, error, and done channels") select { case inst := <-instChan: metrics.TimeSince("worker.vm.provider.gce.boot", startBooting) return &gceInstance{ client: p.client, provider: p, instance: inst, ic: p.ic, authUser: "******", projectID: p.projectID, imageName: image.Name, }, nil case err := <-errChan: abandonedStart = true return nil, err case <-ctx.Done(): if ctx.Err() == gocontext.DeadlineExceeded { metrics.Mark("worker.vm.provider.gce.boot.timeout") } abandonedStart = true return nil, ctx.Err() } }
func (g *webBuildScriptGenerator) Generate(ctx gocontext.Context, payload *simplejson.Json) ([]byte, error) { if g.aptCacheHost != "" { payload.SetPath([]string{"hosts", "apt_cache"}, g.aptCacheHost) } if g.npmCacheHost != "" { payload.SetPath([]string{"hosts", "npm_cache"}, g.npmCacheHost) } payload.Set("paranoid", g.paranoid) payload.Set("fix_resolv_conf", g.fixResolvConf) payload.Set("fix_etc_hosts", g.fixEtcHosts) if g.cacheType != "" { payload.SetPath([]string{"cache_options", "type"}, g.cacheType) payload.SetPath([]string{"cache_options", "fetch_timeout"}, g.cacheFetchTimeout) payload.SetPath([]string{"cache_options", "push_timeout"}, g.cachePushTimeout) payload.SetPath([]string{"cache_options", "s3", "scheme"}, g.s3CacheOptions.scheme) payload.SetPath([]string{"cache_options", "s3", "region"}, g.s3CacheOptions.region) payload.SetPath([]string{"cache_options", "s3", "bucket"}, g.s3CacheOptions.bucket) payload.SetPath([]string{"cache_options", "s3", "access_key_id"}, g.s3CacheOptions.accessKeyID) payload.SetPath([]string{"cache_options", "s3", "secret_access_key"}, g.s3CacheOptions.secretAccessKey) } b, err := payload.Encode() if err != nil { return nil, err } var token string u, err := url.Parse(g.URL) if err != nil { return nil, err } if u.User != nil { token = u.User.Username() u.User = nil } buf := bytes.NewBuffer(b) req, err := http.NewRequest("POST", u.String(), buf) if err != nil { return nil, err } if token != "" { req.Header.Set("Authorization", "token "+token) } req.Header.Set("User-Agent", fmt.Sprintf("worker-go v=%v rev=%v d=%v", VersionString, RevisionString, GeneratedString)) req.Header.Set("Content-Type", "application/json") startRequest := time.Now() resp, err := g.httpClient.Do(req) if err != nil { return nil, err } defer resp.Body.Close() metrics.TimeSince("worker.job.script.api", startRequest) body, err := ioutil.ReadAll(resp.Body) if err != nil { return nil, err } if resp.StatusCode >= 500 { return nil, BuildScriptGeneratorError{error: fmt.Errorf("server error: %q", string(body)), Recover: true} } else if resp.StatusCode >= 400 { return nil, BuildScriptGeneratorError{error: fmt.Errorf("client error: %q", string(body)), Recover: false} } return body, nil }
func (p *dockerProvider) Start(ctx gocontext.Context, startAttributes *StartAttributes) (Instance, error) { logger := context.LoggerFromContext(ctx) cpuSets, err := p.checkoutCPUSets() if err != nil && cpuSets != "" { return nil, err } imageID, imageName, err := p.imageForLanguage(startAttributes.Language) if err != nil { return nil, err } dockerConfig := &docker.Config{ Cmd: p.runCmd, Image: imageID, Memory: int64(p.runMemory), Hostname: fmt.Sprintf("testing-docker-%s", uuid.NewRandom()), } dockerHostConfig := &docker.HostConfig{ Privileged: p.runPrivileged, Memory: int64(p.runMemory), } if cpuSets != "" { dockerConfig.CPUSet = cpuSets dockerHostConfig.CPUSet = cpuSets } logger.WithFields(logrus.Fields{ "config": fmt.Sprintf("%#v", dockerConfig), "host_config": fmt.Sprintf("%#v", dockerHostConfig), }).Debug("starting container") container, err := p.client.CreateContainer(docker.CreateContainerOptions{ Config: dockerConfig, HostConfig: dockerHostConfig, }) if err != nil { if container != nil { err := p.client.RemoveContainer(docker.RemoveContainerOptions{ ID: container.ID, RemoveVolumes: true, Force: true, }) if err != nil { logger.WithField("err", err).Error("couldn't remove container after create failure") } } return nil, err } startBooting := time.Now() err = p.client.StartContainer(container.ID, dockerHostConfig) if err != nil { return nil, err } containerReady := make(chan *docker.Container) errChan := make(chan error) go func(id string) { for { container, err := p.client.InspectContainer(id) if err != nil { errChan <- err return } if container.State.Running { containerReady <- container return } } }(container.ID) select { case container := <-containerReady: metrics.TimeSince("worker.vm.provider.docker.boot", startBooting) return &dockerInstance{ client: p.client, provider: p, container: container, imageName: imageName, }, nil case err := <-errChan: return nil, err case <-ctx.Done(): if ctx.Err() == gocontext.DeadlineExceeded { metrics.Mark("worker.vm.provider.docker.boot.timeout") } return nil, ctx.Err() } }