func (s *endpointServiceImpl) send(ctx context.Context, data []byte) error { ctx = log.SetField(ctx, "endpointURL", s.url) return retryCall(ctx, "endpoint.send", func() error { startTime := clock.Now(ctx) log.Debugf(ctx, "Pushing message to endpoint.") req, err := http.NewRequest("POST", s.url, bytes.NewReader(data)) if err != nil { log.Errorf(log.SetError(ctx, err), "Failed to create HTTP request.") return err } req.Header.Add("content-type", protobufContentType) req.Header.Add("user-agent", monitoringEndpointUserAgent) resp, err := s.client.Do(req) if err != nil { // Treat a client error as transient. log.Warningf(log.SetError(ctx, err), "Failed proxy client request.") return errors.WrapTransient(err) } defer resp.Body.Close() // Read the full response body. This will enable us to re-use the // connection. bodyData, err := ioutil.ReadAll(resp.Body) if err != nil { log.Errorf(log.SetError(ctx, err), "Error during endpoint connection.") return errors.WrapTransient(err) } log.Fields{ "status": resp.Status, "statusCode": resp.StatusCode, "headers": resp.Header, "contentLength": resp.ContentLength, "body": string(bodyData), "duration": clock.Now(ctx).Sub(startTime), }.Debugf(ctx, "Received HTTP response from endpoint.") if http.StatusOK <= resp.StatusCode && resp.StatusCode < http.StatusMultipleChoices { log.Debugf(ctx, "Message pushed successfully.") return nil } err = fmt.Errorf("http: server error (%d)", resp.StatusCode) if resp.StatusCode >= http.StatusInternalServerError { err = errors.WrapTransient(err) } log.Fields{ log.ErrorKey: err, "status": resp.Status, "statusCode": resp.StatusCode, }.Warningf(ctx, "Proxy error.") return err }) }
// New returns a new EventLogger. If cfg.Interval is not set, // it will be assigned time.Second as the default. cfg.Client // will likewise be asinged http.DefaultClient by default. func New(ctx context.Context, cfg Config) EventLogger { if cfg.Interval == time.Duration(0) { cfg.Interval = time.Second } if cfg.Client == nil { cfg.Client = http.DefaultClient } ret := &evtLogger{ cfg: cfg, evtCh: make(chan *crit_event.LogRequestLite_LogEventLite, maxEvts), src: &crit_event.InfraEventSource{ HostName: proto.String(cfg.Hostname), AppengineName: proto.String(cfg.AppengineName), ServiceName: proto.String(cfg.Service), }, } go func() { if err := ret.start(ctx); err != nil { logging.Errorf(ctx, "Error: %v", err) } }() return ret }
func (l *evtLogger) Log(ctx context.Context, evt *crit_event.ChromeInfraEvent) { // Get this as soon as possible, so proto encoding time etc // doesn't bump the logged event time. clck := clock.Get(ctx) now := clck.Now().Unix() evt.EventSource = l.src // TODO: Consider making this a func parameter. evt.TimestampKind = crit_event.ChromeInfraEvent_POINT.Enum() if trace, ok := ctx.Value(TraceID).(string); ok { evt.TraceId = proto.String(trace) } // TODO: // evt.SpanId = // ? // evt.ParentId = // ? bytes, err := proto.Marshal(evt) if err != nil { logging.Errorf(ctx, "Error marshaling ChromeInfraEvent: %v", err.Error()) return } l.evtCh <- &crit_event.LogRequestLite_LogEventLite{ EventTimeMs: proto.Int64(now), EventCode: proto.Int32(0), EventFlowId: proto.Int32(0), SourceExtension: bytes, } }
// process runs in its own goroutine and continuously processes data in the // configured subscription. func (a *application) process(ctx context.Context) { for !a.isShutdown() { // Although we call pullAckMessages without backoff or a throttle, the calls // to the Pub/Sub service use retry library's exponential backoff, so we // don't need to implement DoS protection at this level. err := a.pubsub.pullAckMessages(ctx, func(msgs []*pubsub.Message) { log.Fields{ "count": len(msgs), }.Infof(ctx, "Pull()ed messages from subscription.") if err := a.proxyMessages(ctx, msgs); err != nil { log.Errorf(log.SetError(ctx, err), "Error sending messages to proxy.") } }) if err == errNoMessages { log.Fields{ "delay": noMessageDelay, }.Debugf(ctx, "Received no messages; sleeping for next round.") a.sleepWithInterrupt(ctx, noMessageDelay) } else if err != nil { log.Errorf(log.SetError(ctx, err), "process: Failed to Pull() round of messages.") } } }
// retryCall is a wrapper around the retry library that logs information about // the retry attempts. func retryCall(ctx context.Context, title string, f func() error) error { log.Debugf(ctx, "Executing retriable call %s.", title) err := retry.Retry(ctx, func() error { return f() }, func(err error, delay time.Duration) { log.Fields{ log.ErrorKey: err, "delay": delay, }.Warningf(ctx, "Transient error encountered during %s; retrying.", title) }) if err != nil { log.Errorf(log.SetError(ctx, err), "Failed to %s.", title) return err } return nil }
func (c *endpointConfig) createService(ctx context.Context) (endpointService, error) { if c.url == "" { return nil, errors.New("endpoint: you must supply a monitoring endpoint") } authenticator := auth.NewAuthenticator( auth.Options{ Method: auth.ServiceAccountMethod, Scopes: endpointScopes, ServiceAccountJSONPath: c.serviceAccountJSONPath, Logger: log.Get(ctx), }) client, err := auth.AuthenticatedClient(auth.SilentLogin, authenticator) if err != nil { log.Errorf(log.SetError(ctx, err), "Failed to configure endpoint client.") return nil, errors.New("endpoint: failed to configure endpoint client") } return &endpointServiceImpl{ endpointConfig: *c, client: client, }, nil }
// newPubSubClient instantiates a new Pub/Sub client. // // This method will also perform authentication and setup the topic/subscription // if it isn't already set up. func newPubSubClient(ctx context.Context, config pubsubConfig, svc pubSubService) (*pubsubClient, error) { if config.subscription == "" { return nil, errors.New("pubsub: you must supply a subscription") } if config.batchSize <= 0 { return nil, errors.New("pubsub: batch size must be at least 1") } else if config.batchSize > maxSubscriptionPullSize { return nil, fmt.Errorf("pubsub: batch size cannot exceed %d", maxSubscriptionPullSize) } p := pubsubClient{ pubsubConfig: &config, service: svc, } // Ensure that our Subscription (and topic) exist. if err := p.setupSubscription(ctx); err != nil { log.Errorf(log.SetError(ctx, err), "Failed to set up subscription.") return nil, err } return &p, nil }
// setupSubscription asserts that the configured subscription exists. In doing // so, it also asserts that the client credentials are valid with respect to the // configured project/subscription. // // If the subscription doesn't exist, this method can create the subscription // and (if missing) its topic, if the "create" flag is set. func (p *pubsubClient) setupSubscription(ctx context.Context) error { exists := false log.Fields{ "subscription": p.topic, }.Infof(ctx, "Checking for subscription existence.") err := retryCall(ctx, "SubExists()", func() error { var err error exists, err = p.service.SubExists(p.subscription) return p.wrapTransient(err) }) if err != nil { log.Warningf(log.SetError(ctx, err), "Failed to test for subscription; assuming it doesn't exist.") } if exists { return nil } if !p.create { return errors.New("pubsub: subscription doesn't exist, not configured to create") } // Create the subscription if it doesn't exist. if p.topic == "" { log.Errorf(ctx, "Cannot create subscription; no topic was specified.") return errors.New("pubsub: cannot create subscription") } // Test if the topic exists... log.Fields{ "topic": p.topic, }.Infof(ctx, "Checking for topic existence.") err = retryCall(ctx, "TopicExists()", func() error { var err error exists, err = p.service.TopicExists(p.topic) return p.wrapTransient(err) }) if err != nil { log.Warningf(log.SetError(ctx, err), "Failed to test for topic; assuming it doesn't exist.") } if !exists { log.Fields{ "topic": p.topic, }.Infof(ctx, "Creating topic.") err := retryCall(ctx, "CreateTopic()", func() error { return p.service.CreateTopic(p.topic) }) if err != nil { log.Warningf(log.SetError(ctx, err), "Failed to create topic.") return errors.New("pubsub: cannot create topic") } } log.Fields{ "topic": p.topic, "subscription": p.subscription, }.Infof(ctx, "Creating pull subscription for topic.") if err := retryCall(ctx, "CreateSub()", func() error { return p.service.CreatePullSub(p.subscription, p.topic) }); err != nil { log.Warningf(log.SetError(ctx, err), "Failed to test for subscription; assuming it doesn't exist.") return errors.New("pubsub: failed to create subscription") } return nil }
// Start runs the event flushing loop. It does not return until either // an error occurs or ctx.Done(). func (l *evtLogger) start(ctx context.Context) error { defer close(l.evtCh) evtBuf := []*crit_event.LogRequestLite_LogEventLite{} clck := clock.Get(ctx) flush := func(evts []*crit_event.LogRequestLite_LogEventLite) error { // TODO: Auth, retry/backoff etc. req := &crit_event.LogRequestLite{ RequestTimeMs: proto.Int64(clck.Now().Unix()), LogSourceName: proto.String("CHROME_INFRA"), LogEvent: evts, } logging.Infof(ctx, "Flushing %v events\n", len(req.LogEvent)) b, err := proto.Marshal(req) if err != nil { return err } resp, err := l.cfg.Client.Post(l.cfg.Endpoint, "application/octet-stream", bytes.NewBuffer(b)) if err != nil { return err } defer resp.Body.Close() if resp.StatusCode >= 400 { b, err := httputil.DumpResponse(resp, true) logging.Errorf(ctx, "Error body: %v %v", string(b), err) return fmt.Errorf("http status %d: %s", resp.StatusCode, l.cfg.Endpoint) } return nil } // Loop: // - If you can read an event off ch, append it to the array of events. // - If you can't read an event off ch, and it's been at least 1s since the // last flush call, then flush the events if there are any buffered up. // - if ctx.Done() returns something, try one last flush and exit. // Ways this can act poorly: // - If the capacity of l.evtCh is full and a client attempts to log an event, // that client will block. This is unlikely to happen because the select // statement spends very little time on anything else, including the flush // operation. // - If flush operations take longer than the flushTicker delay, then // multiple simultaneous flush operations can stack up. // - If flush operation fail, there is no retry mechanism (yet) and they // will just log an error locally. ticker := clck.After(l.cfg.Interval) for { select { case evt := <-l.evtCh: evtBuf = append(evtBuf, evt) case <-ticker: if len(evtBuf) > 0 { // Flush in a separate goroutine so we can free this // one up to keep reading off of l.evtCh and not block // clients. TODO(seanmccullough): add retry logic on // POST failure, but make sure to not leak memory. This // may mean dropping events or persisting them to disk. go func(evts []*crit_event.LogRequestLite_LogEventLite) { if err := flush(evts); err != nil { logging.Errorf(ctx, "Error flushing: %v", err.Error()) } }(evtBuf) // The above goroutine has a copy of evtBuf, so // we can now reset it safely. evtBuf = []*crit_event.LogRequestLite_LogEventLite{} } ticker = clck.After(time.Second) case <-ctx.Done(): if len(evtBuf) > 0 { if err := flush(evtBuf); err != nil { logging.Errorf(ctx, "Error flushing: %v", err.Error()) } } logging.Infof(ctx, "Event logger exiting.") return nil } } }
// mainImpl is the main execution function. func mainImpl(args []string) int { // Use all of teh corez. if os.Getenv("GOMAXPROCS") == "" { runtime.GOMAXPROCS(runtime.NumCPU()) } // Install a console logger by default. ctx := context.Background() ctx = gologger.Use(ctx) loggerConfig := newLoggerConfig() // Internal logging config (cloud logging). logConfig := log.Config{Level: log.Debug} config := config{} fs := flag.CommandLine config.addFlags(fs) loggerConfig.addFlags(fs) logConfig.AddFlags(fs) fs.Parse(args) // TODO(dnj): Fix this once LUCI logging CL lands. ctx = log.SetLevel(ctx, logConfig.Level) ctx = logConfig.Set(ctx) // Load authenticated client. client, err := config.createAuthenticatedClient(ctx) if err != nil { log.Errorf(log.SetError(ctx, err), "Failed to create authenticated service client.") return 1 } // Setup local logging configuration. ctx, logFlushFunc, err := loggerConfig.use(ctx, client) if err == nil { defer logFlushFunc() } else { log.Warningf(log.SetError(ctx, err), "Failed to setup cloud logging.") } app := newApplication(config) if err := app.loadServices(ctx, client); err != nil { log.Errorf(log.SetError(ctx, err), "Failed to initialize services.") return 1 } // Set up interrupt handler. signalC := make(chan os.Signal, 1) go func() { triggered := false for sig := range signalC { if !triggered { triggered = true log.Infof(log.SetField(ctx, "signal", sig), "Received signal; starting shutdown.") app.shutdown() } else { // Triggered multiple times; immediately shut down. os.Exit(1) } } }() signal.Notify(signalC, os.Interrupt, os.Kill) defer func() { signal.Stop(signalC) close(signalC) }() log.Infof(ctx, "Starting application execution...") if err := app.run(ctx); err != nil { log.Errorf(log.SetError(ctx, err), "Error during application execution.") return 1 } return 0 }