示例#1
0
func (s *endpointServiceImpl) send(ctx context.Context, data []byte) error {
	ctx = log.SetField(ctx, "endpointURL", s.url)
	return retryCall(ctx, "endpoint.send", func() error {
		startTime := clock.Now(ctx)

		log.Debugf(ctx, "Pushing message to endpoint.")
		req, err := http.NewRequest("POST", s.url, bytes.NewReader(data))
		if err != nil {
			log.Errorf(log.SetError(ctx, err), "Failed to create HTTP request.")
			return err
		}
		req.Header.Add("content-type", protobufContentType)
		req.Header.Add("user-agent", monitoringEndpointUserAgent)

		resp, err := s.client.Do(req)
		if err != nil {
			// Treat a client error as transient.
			log.Warningf(log.SetError(ctx, err), "Failed proxy client request.")
			return errors.WrapTransient(err)
		}
		defer resp.Body.Close()

		// Read the full response body. This will enable us to re-use the
		// connection.
		bodyData, err := ioutil.ReadAll(resp.Body)
		if err != nil {
			log.Errorf(log.SetError(ctx, err), "Error during endpoint connection.")
			return errors.WrapTransient(err)
		}

		log.Fields{
			"status":        resp.Status,
			"statusCode":    resp.StatusCode,
			"headers":       resp.Header,
			"contentLength": resp.ContentLength,
			"body":          string(bodyData),
			"duration":      clock.Now(ctx).Sub(startTime),
		}.Debugf(ctx, "Received HTTP response from endpoint.")

		if http.StatusOK <= resp.StatusCode && resp.StatusCode < http.StatusMultipleChoices {
			log.Debugf(ctx, "Message pushed successfully.")
			return nil
		}

		err = fmt.Errorf("http: server error (%d)", resp.StatusCode)
		if resp.StatusCode >= http.StatusInternalServerError {
			err = errors.WrapTransient(err)
		}

		log.Fields{
			log.ErrorKey: err,
			"status":     resp.Status,
			"statusCode": resp.StatusCode,
		}.Warningf(ctx, "Proxy error.")
		return err
	})
}
示例#2
0
// New returns a new EventLogger. If cfg.Interval is not set,
// it will be assigned time.Second as the default. cfg.Client
// will likewise be asinged http.DefaultClient by default.
func New(ctx context.Context, cfg Config) EventLogger {
	if cfg.Interval == time.Duration(0) {
		cfg.Interval = time.Second
	}
	if cfg.Client == nil {
		cfg.Client = http.DefaultClient
	}

	ret := &evtLogger{
		cfg:   cfg,
		evtCh: make(chan *crit_event.LogRequestLite_LogEventLite, maxEvts),
		src: &crit_event.InfraEventSource{
			HostName:      proto.String(cfg.Hostname),
			AppengineName: proto.String(cfg.AppengineName),
			ServiceName:   proto.String(cfg.Service),
		},
	}

	go func() {
		if err := ret.start(ctx); err != nil {
			logging.Errorf(ctx, "Error: %v", err)
		}
	}()

	return ret
}
示例#3
0
func (l *evtLogger) Log(ctx context.Context, evt *crit_event.ChromeInfraEvent) {
	// Get this as soon as possible, so proto encoding time etc
	// doesn't bump the logged event time.
	clck := clock.Get(ctx)
	now := clck.Now().Unix()

	evt.EventSource = l.src
	// TODO: Consider making this a func parameter.
	evt.TimestampKind = crit_event.ChromeInfraEvent_POINT.Enum()

	if trace, ok := ctx.Value(TraceID).(string); ok {
		evt.TraceId = proto.String(trace)
	}

	// TODO:
	//	evt.SpanId = // ?
	//	evt.ParentId = // ?

	bytes, err := proto.Marshal(evt)
	if err != nil {
		logging.Errorf(ctx, "Error marshaling ChromeInfraEvent: %v", err.Error())
		return
	}

	l.evtCh <- &crit_event.LogRequestLite_LogEventLite{
		EventTimeMs:     proto.Int64(now),
		EventCode:       proto.Int32(0),
		EventFlowId:     proto.Int32(0),
		SourceExtension: bytes,
	}
}
示例#4
0
// process runs in its own goroutine and continuously processes data in the
// configured subscription.
func (a *application) process(ctx context.Context) {
	for !a.isShutdown() {
		// Although we call pullAckMessages without backoff or a throttle, the calls
		// to the Pub/Sub service use retry library's exponential backoff, so we
		// don't need to implement DoS protection at this level.
		err := a.pubsub.pullAckMessages(ctx, func(msgs []*pubsub.Message) {
			log.Fields{
				"count": len(msgs),
			}.Infof(ctx, "Pull()ed messages from subscription.")

			if err := a.proxyMessages(ctx, msgs); err != nil {
				log.Errorf(log.SetError(ctx, err), "Error sending messages to proxy.")
			}
		})
		if err == errNoMessages {
			log.Fields{
				"delay": noMessageDelay,
			}.Debugf(ctx, "Received no messages; sleeping for next round.")
			a.sleepWithInterrupt(ctx, noMessageDelay)
		} else if err != nil {
			log.Errorf(log.SetError(ctx, err), "process: Failed to Pull() round of messages.")
		}
	}
}
示例#5
0
// retryCall is a wrapper around the retry library that logs information about
// the retry attempts.
func retryCall(ctx context.Context, title string, f func() error) error {
	log.Debugf(ctx, "Executing retriable call %s.", title)
	err := retry.Retry(ctx, func() error {
		return f()
	}, func(err error, delay time.Duration) {
		log.Fields{
			log.ErrorKey: err,
			"delay":      delay,
		}.Warningf(ctx, "Transient error encountered during %s; retrying.", title)
	})
	if err != nil {
		log.Errorf(log.SetError(ctx, err), "Failed to %s.", title)
		return err
	}
	return nil
}
示例#6
0
func (c *endpointConfig) createService(ctx context.Context) (endpointService, error) {
	if c.url == "" {
		return nil, errors.New("endpoint: you must supply a monitoring endpoint")
	}

	authenticator := auth.NewAuthenticator(
		auth.Options{
			Method:                 auth.ServiceAccountMethod,
			Scopes:                 endpointScopes,
			ServiceAccountJSONPath: c.serviceAccountJSONPath,
			Logger:                 log.Get(ctx),
		})
	client, err := auth.AuthenticatedClient(auth.SilentLogin, authenticator)
	if err != nil {
		log.Errorf(log.SetError(ctx, err), "Failed to configure endpoint client.")
		return nil, errors.New("endpoint: failed to configure endpoint client")
	}

	return &endpointServiceImpl{
		endpointConfig: *c,
		client:         client,
	}, nil
}
示例#7
0
// newPubSubClient instantiates a new Pub/Sub client.
//
// This method will also perform authentication and setup the topic/subscription
// if it isn't already set up.
func newPubSubClient(ctx context.Context, config pubsubConfig, svc pubSubService) (*pubsubClient, error) {
	if config.subscription == "" {
		return nil, errors.New("pubsub: you must supply a subscription")
	}
	if config.batchSize <= 0 {
		return nil, errors.New("pubsub: batch size must be at least 1")
	} else if config.batchSize > maxSubscriptionPullSize {
		return nil, fmt.Errorf("pubsub: batch size cannot exceed %d", maxSubscriptionPullSize)
	}

	p := pubsubClient{
		pubsubConfig: &config,
		service:      svc,
	}

	// Ensure that our Subscription (and topic) exist.
	if err := p.setupSubscription(ctx); err != nil {
		log.Errorf(log.SetError(ctx, err), "Failed to set up subscription.")
		return nil, err
	}

	return &p, nil
}
示例#8
0
// setupSubscription asserts that the configured subscription exists. In doing
// so, it also asserts that the client credentials are valid with respect to the
// configured project/subscription.
//
// If the subscription doesn't exist, this method can create the subscription
// and (if missing) its topic, if the "create" flag is set.
func (p *pubsubClient) setupSubscription(ctx context.Context) error {
	exists := false
	log.Fields{
		"subscription": p.topic,
	}.Infof(ctx, "Checking for subscription existence.")
	err := retryCall(ctx, "SubExists()", func() error {
		var err error
		exists, err = p.service.SubExists(p.subscription)
		return p.wrapTransient(err)
	})
	if err != nil {
		log.Warningf(log.SetError(ctx, err),
			"Failed to test for subscription; assuming it doesn't exist.")
	}
	if exists {
		return nil
	}

	if !p.create {
		return errors.New("pubsub: subscription doesn't exist, not configured to create")
	}

	// Create the subscription if it doesn't exist.
	if p.topic == "" {
		log.Errorf(ctx, "Cannot create subscription; no topic was specified.")
		return errors.New("pubsub: cannot create subscription")
	}

	// Test if the topic exists...
	log.Fields{
		"topic": p.topic,
	}.Infof(ctx, "Checking for topic existence.")
	err = retryCall(ctx, "TopicExists()", func() error {
		var err error
		exists, err = p.service.TopicExists(p.topic)
		return p.wrapTransient(err)
	})
	if err != nil {
		log.Warningf(log.SetError(ctx, err),
			"Failed to test for topic; assuming it doesn't exist.")
	}

	if !exists {
		log.Fields{
			"topic": p.topic,
		}.Infof(ctx, "Creating topic.")
		err := retryCall(ctx, "CreateTopic()", func() error {
			return p.service.CreateTopic(p.topic)
		})
		if err != nil {
			log.Warningf(log.SetError(ctx, err),
				"Failed to create topic.")
			return errors.New("pubsub: cannot create topic")
		}
	}

	log.Fields{
		"topic":        p.topic,
		"subscription": p.subscription,
	}.Infof(ctx, "Creating pull subscription for topic.")
	if err := retryCall(ctx, "CreateSub()", func() error {
		return p.service.CreatePullSub(p.subscription, p.topic)
	}); err != nil {
		log.Warningf(log.SetError(ctx, err),
			"Failed to test for subscription; assuming it doesn't exist.")
		return errors.New("pubsub: failed to create subscription")
	}

	return nil
}
示例#9
0
// Start runs the event flushing loop. It does not return until either
// an error occurs or ctx.Done().
func (l *evtLogger) start(ctx context.Context) error {
	defer close(l.evtCh)

	evtBuf := []*crit_event.LogRequestLite_LogEventLite{}
	clck := clock.Get(ctx)

	flush := func(evts []*crit_event.LogRequestLite_LogEventLite) error {
		// TODO: Auth, retry/backoff etc.
		req := &crit_event.LogRequestLite{
			RequestTimeMs: proto.Int64(clck.Now().Unix()),
			LogSourceName: proto.String("CHROME_INFRA"),
			LogEvent:      evts,
		}

		logging.Infof(ctx, "Flushing %v events\n", len(req.LogEvent))

		b, err := proto.Marshal(req)
		if err != nil {
			return err
		}

		resp, err := l.cfg.Client.Post(l.cfg.Endpoint, "application/octet-stream", bytes.NewBuffer(b))
		if err != nil {
			return err
		}

		defer resp.Body.Close()
		if resp.StatusCode >= 400 {
			b, err := httputil.DumpResponse(resp, true)
			logging.Errorf(ctx, "Error body: %v %v", string(b), err)
			return fmt.Errorf("http status %d: %s", resp.StatusCode, l.cfg.Endpoint)
		}

		return nil
	}

	// Loop:
	// - If you can read an event off ch, append it to the array of events.
	// - If you can't read an event off ch, and it's been at least 1s since the
	// last flush call, then flush the events if there are any buffered up.
	// - if ctx.Done() returns something, try one last flush and exit.

	// Ways this can act poorly:
	// - If the capacity of l.evtCh is full and a client attempts to log an event,
	// that client will block.  This is unlikely to happen because the select
	// statement spends very little time on anything else, including the flush
	// operation.
	// - If flush operations take longer than the flushTicker delay, then
	// multiple simultaneous flush operations can stack up.
	// - If flush operation fail, there is no retry mechanism (yet) and they
	// will just log an error locally.
	ticker := clck.After(l.cfg.Interval)
	for {
		select {
		case evt := <-l.evtCh:
			evtBuf = append(evtBuf, evt)
		case <-ticker:
			if len(evtBuf) > 0 {
				// Flush in a separate goroutine so we can free this
				// one up to keep reading off of l.evtCh and not block
				// clients.  TODO(seanmccullough): add retry logic on
				// POST failure, but make sure to not leak memory. This
				// may mean dropping events or persisting them to disk.
				go func(evts []*crit_event.LogRequestLite_LogEventLite) {
					if err := flush(evts); err != nil {
						logging.Errorf(ctx, "Error flushing: %v", err.Error())
					}
				}(evtBuf)
				// The above goroutine has a copy of evtBuf, so
				// we can now reset it safely.
				evtBuf = []*crit_event.LogRequestLite_LogEventLite{}
			}
			ticker = clck.After(time.Second)
		case <-ctx.Done():
			if len(evtBuf) > 0 {
				if err := flush(evtBuf); err != nil {
					logging.Errorf(ctx, "Error flushing: %v", err.Error())
				}
			}
			logging.Infof(ctx, "Event logger exiting.")
			return nil
		}
	}
}
示例#10
0
// mainImpl is the main execution function.
func mainImpl(args []string) int {
	// Use all of teh corez.
	if os.Getenv("GOMAXPROCS") == "" {
		runtime.GOMAXPROCS(runtime.NumCPU())
	}

	// Install a console logger by default.
	ctx := context.Background()
	ctx = gologger.Use(ctx)

	loggerConfig := newLoggerConfig() // Internal logging config (cloud logging).
	logConfig := log.Config{Level: log.Debug}
	config := config{}

	fs := flag.CommandLine
	config.addFlags(fs)
	loggerConfig.addFlags(fs)
	logConfig.AddFlags(fs)
	fs.Parse(args)

	// TODO(dnj): Fix this once LUCI logging CL lands.
	ctx = log.SetLevel(ctx, logConfig.Level)
	ctx = logConfig.Set(ctx)

	// Load authenticated client.
	client, err := config.createAuthenticatedClient(ctx)
	if err != nil {
		log.Errorf(log.SetError(ctx, err), "Failed to create authenticated service client.")
		return 1
	}

	// Setup local logging configuration.
	ctx, logFlushFunc, err := loggerConfig.use(ctx, client)
	if err == nil {
		defer logFlushFunc()
	} else {
		log.Warningf(log.SetError(ctx, err), "Failed to setup cloud logging.")
	}

	app := newApplication(config)
	if err := app.loadServices(ctx, client); err != nil {
		log.Errorf(log.SetError(ctx, err), "Failed to initialize services.")
		return 1
	}

	// Set up interrupt handler.
	signalC := make(chan os.Signal, 1)
	go func() {
		triggered := false
		for sig := range signalC {
			if !triggered {
				triggered = true

				log.Infof(log.SetField(ctx, "signal", sig),
					"Received signal; starting shutdown.")
				app.shutdown()
			} else {
				// Triggered multiple times; immediately shut down.
				os.Exit(1)
			}
		}
	}()
	signal.Notify(signalC, os.Interrupt, os.Kill)
	defer func() {
		signal.Stop(signalC)
		close(signalC)
	}()

	log.Infof(ctx, "Starting application execution...")
	if err := app.run(ctx); err != nil {
		log.Errorf(log.SetError(ctx, err), "Error during application execution.")
		return 1
	}

	return 0
}