Example #1
0
func (l *evtLogger) Log(ctx context.Context, evt *crit_event.ChromeInfraEvent) {
	// Get this as soon as possible, so proto encoding time etc
	// doesn't bump the logged event time.
	clck := clock.Get(ctx)
	now := clck.Now().Unix()

	evt.EventSource = l.src
	// TODO: Consider making this a func parameter.
	evt.TimestampKind = crit_event.ChromeInfraEvent_POINT.Enum()

	if trace, ok := ctx.Value(TraceID).(string); ok {
		evt.TraceId = proto.String(trace)
	}

	// TODO:
	//	evt.SpanId = // ?
	//	evt.ParentId = // ?

	bytes, err := proto.Marshal(evt)
	if err != nil {
		logging.Errorf(ctx, "Error marshaling ChromeInfraEvent: %v", err.Error())
		return
	}

	l.evtCh <- &crit_event.LogRequestLite_LogEventLite{
		EventTimeMs:     proto.Int64(now),
		EventCode:       proto.Int32(0),
		EventFlowId:     proto.Int32(0),
		SourceExtension: bytes,
	}
}
Example #2
0
// Start runs the event flushing loop. It does not return until either
// an error occurs or ctx.Done().
func (l *evtLogger) start(ctx context.Context) error {
	defer close(l.evtCh)

	evtBuf := []*crit_event.LogRequestLite_LogEventLite{}
	clck := clock.Get(ctx)

	flush := func(evts []*crit_event.LogRequestLite_LogEventLite) error {
		// TODO: Auth, retry/backoff etc.
		req := &crit_event.LogRequestLite{
			RequestTimeMs: proto.Int64(clck.Now().Unix()),
			LogSourceName: proto.String("CHROME_INFRA"),
			LogEvent:      evts,
		}

		logging.Infof(ctx, "Flushing %v events\n", len(req.LogEvent))

		b, err := proto.Marshal(req)
		if err != nil {
			return err
		}

		resp, err := l.cfg.Client.Post(l.cfg.Endpoint, "application/octet-stream", bytes.NewBuffer(b))
		if err != nil {
			return err
		}

		defer resp.Body.Close()
		if resp.StatusCode >= 400 {
			b, err := httputil.DumpResponse(resp, true)
			logging.Errorf(ctx, "Error body: %v %v", string(b), err)
			return fmt.Errorf("http status %d: %s", resp.StatusCode, l.cfg.Endpoint)
		}

		return nil
	}

	// Loop:
	// - If you can read an event off ch, append it to the array of events.
	// - If you can't read an event off ch, and it's been at least 1s since the
	// last flush call, then flush the events if there are any buffered up.
	// - if ctx.Done() returns something, try one last flush and exit.

	// Ways this can act poorly:
	// - If the capacity of l.evtCh is full and a client attempts to log an event,
	// that client will block.  This is unlikely to happen because the select
	// statement spends very little time on anything else, including the flush
	// operation.
	// - If flush operations take longer than the flushTicker delay, then
	// multiple simultaneous flush operations can stack up.
	// - If flush operation fail, there is no retry mechanism (yet) and they
	// will just log an error locally.
	ticker := clck.After(l.cfg.Interval)
	for {
		select {
		case evt := <-l.evtCh:
			evtBuf = append(evtBuf, evt)
		case <-ticker:
			if len(evtBuf) > 0 {
				// Flush in a separate goroutine so we can free this
				// one up to keep reading off of l.evtCh and not block
				// clients.  TODO(seanmccullough): add retry logic on
				// POST failure, but make sure to not leak memory. This
				// may mean dropping events or persisting them to disk.
				go func(evts []*crit_event.LogRequestLite_LogEventLite) {
					if err := flush(evts); err != nil {
						logging.Errorf(ctx, "Error flushing: %v", err.Error())
					}
				}(evtBuf)
				// The above goroutine has a copy of evtBuf, so
				// we can now reset it safely.
				evtBuf = []*crit_event.LogRequestLite_LogEventLite{}
			}
			ticker = clck.After(time.Second)
		case <-ctx.Done():
			if len(evtBuf) > 0 {
				if err := flush(evtBuf); err != nil {
					logging.Errorf(ctx, "Error flushing: %v", err.Error())
				}
			}
			logging.Infof(ctx, "Event logger exiting.")
			return nil
		}
	}
}