// publishError publishes an event when a handler returns an error func publishError(req *Request, e errors.Error) { if !PublishErrors { return } stacktrace := "" if e.MultiStack() != nil { stacktrace = e.MultiStack().String() } msg := map[string]interface{}{ "created": time.Now(), "service": Name, "version": Version, "azName": az, "hostname": hostname, "instanceId": InstanceID, "error": e.Error(), "type": e.Type(), "code": e.Code(), "description": e.Description(), "httpCode": e.HttpCode(), "context": e.Context(), "traceId": req.TraceID(), "stacktrace": stacktrace, } payload, err := json.Marshal(msg) if err != nil { log.Errorf("[Server] Failed to JSON encode error event: %v", err) } if err = nsq.Publish(errorTopic, payload); err != nil { log.Errorf("[Server] Failed to publish error event: %v", err) } }
// traceRsp decides if we want to trigger a trace event (when processing response) and if so deals with it func (c *client) traceRsp(req *Request, rsp *Response, err errors.Error, d time.Duration) { if req.shouldTrace() { e := &traceproto.Event{ Timestamp: proto.Int64(time.Now().UnixNano()), TraceId: proto.String(req.TraceID()), Type: traceproto.Event_REP.Enum(), MessageId: proto.String(req.MessageID()), From: proto.String(req.From()), FromEndpoint: proto.String(req.FromEndpoint()), To: proto.String(fmt.Sprintf("%v.%v", req.Service(), req.Endpoint())), ParentMessageId: proto.String(req.ParentMessageID()), Hostname: proto.String(c.hostname), Az: proto.String(c.az), Payload: proto.String(""), // @todo Duration: proto.Int64(int64(d)), PersistentTrace: proto.Bool(req.TraceShouldPersist()), } if err != nil { e.ErrorCode = proto.String(err.Code()) e.ErrorDescription = proto.String(err.Description()) } trace.Send(e) } }
// traceOut traces a request outbound from a service handler func traceOut(req *Request, msg proto.Message, err perrors.Error, d time.Duration) { if req.shouldTrace() { e := &traceproto.Event{ Timestamp: proto.Int64(time.Now().UnixNano()), TraceId: proto.String(req.TraceID()), Type: traceproto.Event_OUT.Enum(), MessageId: proto.String(req.MessageID()), ParentMessageId: proto.String(req.ParentMessageID()), From: proto.String(req.From()), To: proto.String(fmt.Sprintf("%v.%v", req.Service(), req.Endpoint())), Hostname: proto.String(hostname), Az: proto.String(az), Payload: proto.String(""), // @todo HandlerInstanceId: proto.String(InstanceID), Duration: proto.Int64(int64(d)), PersistentTrace: proto.Bool(req.TraceShouldPersist()), } if err != nil { e.ErrorCode = proto.String(err.Code()) e.ErrorDescription = proto.String(err.Description()) } trace.Send(e) } }
// HandleRequest and send back response func HandleRequest(req *Request) { defer func() { if r := recover(); r != nil { log.Criticalf("[Server] Panic \"%v\" when handling request: (id: %s, endpoint: %s, content-type: %s,"+ " content-length: %d)", r, req.MessageID(), req.Destination(), req.delivery.ContentType, len(req.delivery.Body)) inst.Counter(1.0, "runtime.panic", 1) publishFailure(r) debug.PrintStack() } }() if len(req.Service()) > 0 && req.Service() != Name { log.Criticalf(`[Server] Message meant for "%s" not "%s"`, req.Service(), Name) return } reqProcessor: switch { case req.isHeartbeat(): if dsc.IsConnected() { log.Tracef("[Server] Inbound heartbeat from: %s", req.ReplyTo()) dsc.hb.beat() raven.SendResponse(PongResponse(req), InstanceID) } else { log.Warnf("[Server] Not connected but heartbeat from: %s", req.ReplyTo()) } case req.IsPublication(): log.Tracef("[Server] Inbound publication on topic: %s", req.Topic()) if endpoint, ok := reg.find(req.Topic()); ok { // Match + call handler if data, err := endpoint.unmarshalRequest(req); err != nil { log.Warnf("[Server] Failed to unmarshal published message: %s", err.Error()) break reqProcessor } else { req.unmarshaledData = data } if _, err := commonLogHandler(commonLogger, endpoint.instrumentedHandler)(req); err != nil { // don't do anything on error apart from log - it's a pub sub call so no response required log.Warnf("[Server] Failed to process published message: %v", err) } } default: log.Tracef("[Server] Inbound message %s from %s", req.MessageID(), req.ReplyTo()) // Match a handler endpoint, ok := reg.find(req.Endpoint()) if !ok { if rsp, err := ErrorResponse(req, errors.InternalServerError("com.hailocab.kernel.handler.missing", fmt.Sprintf("No handler registered for %s", req.Destination()))); err != nil { log.Criticalf("[Server] Unable to build response: %v", err) } else { raven.SendResponse(rsp, InstanceID) } return } // Unmarshal the request data var ( reqData, rspData proto.Message err errors.Error ) if reqData, err = endpoint.unmarshalRequest(req); err == nil { req.unmarshaledData = reqData } // Call handler - constraining the max concurrent requests handled if err == nil { callerName := req.From() if callerName == "" { callerName = "unknown" } tokenBucketName := fmt.Sprintf("server.tokens.%s", callerName) reqsBucketName := fmt.Sprintf("server.inflightrequests.%s", callerName) tokC := tokensChan(callerName) select { case t := <-tokC: func() { defer func() { atomic.AddUint64(&inFlightRequests, ^uint64(0)) // This is actually a subtraction tokC <- t // Return the token to the pool }() nowInFlight := atomic.AddUint64(&inFlightRequests, 1) // Update active request counters inst.Gauge(1.0, tokenBucketName, len(tokC)) inst.Gauge(1.0, reqsBucketName, int(nowInFlight)) rspData, err = commonLogHandler(commonLogger, endpoint.instrumentedHandler)(req) }() case <-time.After(time.Duration(endpoint.Mean) * time.Millisecond): inst.Gauge(1.0, tokenBucketName, len(tokC)) inst.Counter(1.0, "server.error.capacity", 1) err = errors.InternalServerError("com.hailocab.kernel.server.capacity", fmt.Sprintf("Server %v out of capacity", Name)) } } // Check response type matches what's registered if err == nil && rspData != nil { _, rspProtoT := endpoint.ProtoTypes() rspDataT := reflect.TypeOf(rspData) if rspProtoT != nil && rspProtoT != rspDataT { err = errors.InternalServerError("com.hailocab.kernel.server.mismatchedprotocol", fmt.Sprintf("Mismatched response protocol. %s != %s", rspDataT.String(), rspProtoT.String())) } } if err != nil { inst.Counter(1.0, fmt.Sprintf("server.error.%s", err.Code()), 1) switch err.Type() { case errors.ErrorBadRequest, errors.ErrorForbidden, errors.ErrorNotFound: log.Debugf("[Server] Handler error %s calling %v.%v from %v: %v", err.Type(), req.Service(), req.Endpoint(), req.From(), err) case errors.ErrorInternalServer: go publishError(req, err) fallthrough default: log.Errorf("[Server] Handler error %s calling %v.%v from %v: %v", err.Type(), req.Service(), req.Endpoint(), req.From(), err) } if rsp, err := ErrorResponse(req, err); err != nil { log.Criticalf("[Server] Unable to build response: %v", err) } else { raven.SendResponse(rsp, InstanceID) } return } if rsp, err := ReplyResponse(req, rspData); err != nil { if rsp, err2 := ErrorResponse(req, errors.InternalServerError("com.hailocab.kernel.marshal.error", fmt.Sprintf("Could not marshal response %v", err))); err2 != nil { log.Criticalf("[Server] Unable to build error response: %v", err2) } else { // Send the error response raven.SendResponse(rsp, InstanceID) } } else { // Send the succesful response raven.SendResponse(rsp, InstanceID) } } }