// instrumentedHandler wraps the handler to provide instrumentation func (ep *Endpoint) instrumentedHandler(req *Request) (proto.Message, perrors.Error) { start := time.Now() var err perrors.Error var m proto.Message // In a defer in case the handler panics defer func() { stats.Record(ep, err, time.Since(start)) if err == nil { inst.Timing(1.0, "success."+ep.Name, time.Since(start)) return } switch err.Type() { case perrors.ErrorBadRequest, perrors.ErrorNotFound: // Ignore errors that are caused by clients // TODO: consider a new stat for clienterror? inst.Timing(1.0, "success."+ep.Name, time.Since(start)) return default: inst.Timing(1.0, "error."+ep.Name, time.Since(start)) } }() traceIn(req) // check auth, only call handler if auth passes err = ep.Authoriser.Authorise(req) if err == nil { req.Auth().SetAuthorised(true) m, err = ep.Handler(req) } traceOut(req, m, err, time.Since(start)) return m, err }
// commonLogHandler will log to w using the Apache common log format // http://httpd.apache.org/docs/2.2/logs.html#common // If w is nil, nothing will be logged func commonLogHandler(w io.Writer, h Handler) Handler { if w == nil { return h } return func(req *Request) (proto.Message, errors.Error) { var userId string if req.Auth() != nil && req.Auth().AuthUser() != nil { userId = req.Auth().AuthUser().Id } var err errors.Error var m proto.Message // In defer in case the handler panics defer func() { status := uint32(200) if err != nil { status = err.HttpCode() } size := 0 if m != nil { log.Debug(m.String()) size = len(m.String()) } fmt.Fprintf(w, "%s - %s [%s] \"%s %s %s\" %d %d\n", req.From(), userId, time.Now().Format("02/Jan/2006:15:04:05 -0700"), "GET", // Treat them all as GET's at the moment req.Endpoint(), "HTTP/1.0", // Has to be HTTP or apachetop ignores it status, size, ) }() // Execute the actual handler m, err = h(req) return m, err } }
// traceRsp decides if we want to trigger a trace event (when processing response) and if so deals with it func (c *client) traceRsp(req *Request, rsp *Response, err errors.Error, d time.Duration) { if req.shouldTrace() { e := &traceproto.Event{ Timestamp: proto.Int64(time.Now().UnixNano()), TraceId: proto.String(req.TraceID()), Type: traceproto.Event_REP.Enum(), MessageId: proto.String(req.MessageID()), From: proto.String(req.From()), FromEndpoint: proto.String(req.FromEndpoint()), To: proto.String(fmt.Sprintf("%v.%v", req.Service(), req.Endpoint())), ParentMessageId: proto.String(req.ParentMessageID()), Hostname: proto.String(c.hostname), Az: proto.String(c.az), Payload: proto.String(""), // @todo Duration: proto.Int64(int64(d)), PersistentTrace: proto.Bool(req.TraceShouldPersist()), } if err != nil { e.ErrorCode = proto.String(err.Code()) e.ErrorDescription = proto.String(err.Description()) } trace.Send(e) } }
// traceOut traces a request outbound from a service handler func traceOut(req *Request, msg proto.Message, err perrors.Error, d time.Duration) { if req.shouldTrace() { e := &traceproto.Event{ Timestamp: proto.Int64(time.Now().UnixNano()), TraceId: proto.String(req.TraceID()), Type: traceproto.Event_OUT.Enum(), MessageId: proto.String(req.MessageID()), ParentMessageId: proto.String(req.ParentMessageID()), From: proto.String(req.From()), To: proto.String(fmt.Sprintf("%v.%v", req.Service(), req.Endpoint())), Hostname: proto.String(hostname), Az: proto.String(az), Payload: proto.String(""), // @todo HandlerInstanceId: proto.String(InstanceID), Duration: proto.Int64(int64(d)), PersistentTrace: proto.Bool(req.TraceShouldPersist()), } if err != nil { e.ErrorCode = proto.String(err.Code()) e.ErrorDescription = proto.String(err.Description()) } trace.Send(e) } }
// HandleRequest and send back response func HandleRequest(req *Request) { defer func() { if r := recover(); r != nil { log.Criticalf("[Server] Panic \"%v\" when handling request: (id: %s, endpoint: %s, content-type: %s,"+ " content-length: %d)", r, req.MessageID(), req.Destination(), req.delivery.ContentType, len(req.delivery.Body)) inst.Counter(1.0, "runtime.panic", 1) publishFailure(r) debug.PrintStack() } }() if len(req.Service()) > 0 && req.Service() != Name { log.Criticalf(`[Server] Message meant for "%s" not "%s"`, req.Service(), Name) return } reqProcessor: switch { case req.isHeartbeat(): if dsc.IsConnected() { log.Tracef("[Server] Inbound heartbeat from: %s", req.ReplyTo()) dsc.hb.beat() raven.SendResponse(PongResponse(req), InstanceID) } else { log.Warnf("[Server] Not connected but heartbeat from: %s", req.ReplyTo()) } case req.IsPublication(): log.Tracef("[Server] Inbound publication on topic: %s", req.Topic()) if endpoint, ok := reg.find(req.Topic()); ok { // Match + call handler if data, err := endpoint.unmarshalRequest(req); err != nil { log.Warnf("[Server] Failed to unmarshal published message: %s", err.Error()) break reqProcessor } else { req.unmarshaledData = data } if _, err := commonLogHandler(commonLogger, endpoint.instrumentedHandler)(req); err != nil { // don't do anything on error apart from log - it's a pub sub call so no response required log.Warnf("[Server] Failed to process published message: %v", err) } } default: log.Tracef("[Server] Inbound message %s from %s", req.MessageID(), req.ReplyTo()) // Match a handler endpoint, ok := reg.find(req.Endpoint()) if !ok { if rsp, err := ErrorResponse(req, errors.InternalServerError("com.hailocab.kernel.handler.missing", fmt.Sprintf("No handler registered for %s", req.Destination()))); err != nil { log.Criticalf("[Server] Unable to build response: %v", err) } else { raven.SendResponse(rsp, InstanceID) } return } // Unmarshal the request data var ( reqData, rspData proto.Message err errors.Error ) if reqData, err = endpoint.unmarshalRequest(req); err == nil { req.unmarshaledData = reqData } // Call handler - constraining the max concurrent requests handled if err == nil { callerName := req.From() if callerName == "" { callerName = "unknown" } tokenBucketName := fmt.Sprintf("server.tokens.%s", callerName) reqsBucketName := fmt.Sprintf("server.inflightrequests.%s", callerName) tokC := tokensChan(callerName) select { case t := <-tokC: func() { defer func() { atomic.AddUint64(&inFlightRequests, ^uint64(0)) // This is actually a subtraction tokC <- t // Return the token to the pool }() nowInFlight := atomic.AddUint64(&inFlightRequests, 1) // Update active request counters inst.Gauge(1.0, tokenBucketName, len(tokC)) inst.Gauge(1.0, reqsBucketName, int(nowInFlight)) rspData, err = commonLogHandler(commonLogger, endpoint.instrumentedHandler)(req) }() case <-time.After(time.Duration(endpoint.Mean) * time.Millisecond): inst.Gauge(1.0, tokenBucketName, len(tokC)) inst.Counter(1.0, "server.error.capacity", 1) err = errors.InternalServerError("com.hailocab.kernel.server.capacity", fmt.Sprintf("Server %v out of capacity", Name)) } } // Check response type matches what's registered if err == nil && rspData != nil { _, rspProtoT := endpoint.ProtoTypes() rspDataT := reflect.TypeOf(rspData) if rspProtoT != nil && rspProtoT != rspDataT { err = errors.InternalServerError("com.hailocab.kernel.server.mismatchedprotocol", fmt.Sprintf("Mismatched response protocol. %s != %s", rspDataT.String(), rspProtoT.String())) } } if err != nil { inst.Counter(1.0, fmt.Sprintf("server.error.%s", err.Code()), 1) switch err.Type() { case errors.ErrorBadRequest, errors.ErrorForbidden, errors.ErrorNotFound: log.Debugf("[Server] Handler error %s calling %v.%v from %v: %v", err.Type(), req.Service(), req.Endpoint(), req.From(), err) case errors.ErrorInternalServer: go publishError(req, err) fallthrough default: log.Errorf("[Server] Handler error %s calling %v.%v from %v: %v", err.Type(), req.Service(), req.Endpoint(), req.From(), err) } if rsp, err := ErrorResponse(req, err); err != nil { log.Criticalf("[Server] Unable to build response: %v", err) } else { raven.SendResponse(rsp, InstanceID) } return } if rsp, err := ReplyResponse(req, rspData); err != nil { if rsp, err2 := ErrorResponse(req, errors.InternalServerError("com.hailocab.kernel.marshal.error", fmt.Sprintf("Could not marshal response %v", err))); err2 != nil { log.Criticalf("[Server] Unable to build error response: %v", err2) } else { // Send the error response raven.SendResponse(rsp, InstanceID) } } else { // Send the succesful response raven.SendResponse(rsp, InstanceID) } } }
// publishError publishes an event when a handler returns an error func publishError(req *Request, e errors.Error) { if !PublishErrors { return } stacktrace := "" if e.MultiStack() != nil { stacktrace = e.MultiStack().String() } msg := map[string]interface{}{ "created": time.Now(), "service": Name, "version": Version, "azName": az, "hostname": hostname, "instanceId": InstanceID, "error": e.Error(), "type": e.Type(), "code": e.Code(), "description": e.Description(), "httpCode": e.HttpCode(), "context": e.Context(), "traceId": req.TraceID(), "stacktrace": stacktrace, } payload, err := json.Marshal(msg) if err != nil { log.Errorf("[Server] Failed to JSON encode error event: %v", err) } if err = nsq.Publish(errorTopic, payload); err != nil { log.Errorf("[Server] Failed to publish error event: %v", err) } }