// publishFailure publishes a failure/panic event to be monitored. func publishFailure(r interface{}) { var p string switch r.(type) { case string: p = r.(string) case error: p = fmt.Sprintf("%v", r.(error)) default: p = "Unknown panic" } b := make([]byte, 1024) runtime.Stack(b, true) if err := client.Pub("com.hailocab.monitor.failure", &fproto.Failure{ ServiceName: proto.String(Name), ServiceVersion: proto.Uint64(Version), AzName: proto.String(az), Hostname: proto.String(hostname), InstanceId: proto.String(InstanceID), Timestamp: proto.Int64(time.Now().Unix()), Uptime: proto.Int64(int64(time.Since(serviceStarted).Seconds())), Type: proto.String("PANIC"), Reason: proto.String(p), Stack: proto.String(string(b)), }); err != nil { log.Errorf("[Server] Failed to publish failure event: %v", err) } }
func (s *stats) rusage() *pstats.RusageStats { return &pstats.RusageStats{ UserTime: proto.Float32(getCpuUsage(s.registry, "rusage.UserTime")), SystemTime: proto.Float32(getCpuUsage(s.registry, "rusage.SystemTime")), MaxRss: proto.Int64(getGaugeVal(s.registry, "rusage.MaxRss")), InBlock: proto.Int64(getGaugeVal(s.registry, "rusage.InBlock")), OuBlock: proto.Int64(getGaugeVal(s.registry, "rusage.OuBlock")), } }
// get returns a snapshot of platform stats func (s *stats) get(status string) *pstats.PlatformStats { rusageStats := s.rusage() runtimeStats := s.runtime() endpointStats := s.endpoints() return &pstats.PlatformStats{ ServiceName: proto.String(ServiceName), ServiceVersion: proto.Uint64(ServiceVersion), ServiceType: proto.String(ServiceType), AzName: proto.String(AzName), Hostname: proto.String(hostname), InstanceId: proto.String(InstanceID), Status: proto.String(status), Timestamp: proto.Int64(time.Now().Unix()), Uptime: proto.Int64(int64(time.Since(s.startTime).Seconds())), Rusage: rusageStats, Runtime: runtimeStats, Endpoints: endpointStats, } }
// traceAttemptTimeout decides if we want to trigger a trace event for an attempt timeout, and processes it func (c *client) traceAttemptTimeout(req *Request, attemptNum int, timeout time.Duration) { if req.shouldTrace() { desc := fmt.Sprintf("Attempt %v timeout talking to '%s.%s' after '%v' for '%s'", attemptNum, req.Service(), req.Endpoint(), timeout, req.MessageID()) trace.Send(&traceproto.Event{ Timestamp: proto.Int64(time.Now().UnixNano()), TraceId: proto.String(req.TraceID()), Type: traceproto.Event_ATTEMPT_TIMEOUT.Enum(), MessageId: proto.String(req.MessageID()), From: proto.String(req.From()), FromEndpoint: proto.String(req.FromEndpoint()), To: proto.String(fmt.Sprintf("%v.%v", req.Service(), req.Endpoint())), ParentMessageId: proto.String(req.ParentMessageID()), Hostname: proto.String(c.hostname), Az: proto.String(c.az), Payload: proto.String(""), // @todo ErrorCode: proto.String("com.hailocab.kernel.platform.attemptTimeout"), ErrorDescription: proto.String(desc), Duration: proto.Int64(int64(timeout)), PersistentTrace: proto.Bool(req.TraceShouldPersist()), }) } }
func healthCheckSampleToProto(hc *HealthCheck, sample *Sample) *hcproto.HealthCheck { return &hcproto.HealthCheck{ Timestamp: proto.Int64(sample.At.Unix()), HealthCheckId: proto.String(hc.Id), ServiceName: proto.String(hc.ServiceName), ServiceVersion: proto.Uint64(hc.ServiceVersion), Hostname: proto.String(hc.Hostname), InstanceId: proto.String(hc.InstanceId), IsHealthy: proto.Bool(sample.IsHealthy), ErrorDescription: proto.String(sample.ErrorDescription), Measurements: mapToProto(sample.Measurements), Priority: hcproto.HealthCheck_Priority(hc.Priority).Enum(), } }
// traceRsp decides if we want to trigger a trace event (when processing response) and if so deals with it func (c *client) traceRsp(req *Request, rsp *Response, err errors.Error, d time.Duration) { if req.shouldTrace() { e := &traceproto.Event{ Timestamp: proto.Int64(time.Now().UnixNano()), TraceId: proto.String(req.TraceID()), Type: traceproto.Event_REP.Enum(), MessageId: proto.String(req.MessageID()), From: proto.String(req.From()), FromEndpoint: proto.String(req.FromEndpoint()), To: proto.String(fmt.Sprintf("%v.%v", req.Service(), req.Endpoint())), ParentMessageId: proto.String(req.ParentMessageID()), Hostname: proto.String(c.hostname), Az: proto.String(c.az), Payload: proto.String(""), // @todo Duration: proto.Int64(int64(d)), PersistentTrace: proto.Bool(req.TraceShouldPersist()), } if err != nil { e.ErrorCode = proto.String(err.Code()) e.ErrorDescription = proto.String(err.Description()) } trace.Send(e) } }
// traceOut traces a request outbound from a service handler func traceOut(req *Request, msg proto.Message, err perrors.Error, d time.Duration) { if req.shouldTrace() { e := &traceproto.Event{ Timestamp: proto.Int64(time.Now().UnixNano()), TraceId: proto.String(req.TraceID()), Type: traceproto.Event_OUT.Enum(), MessageId: proto.String(req.MessageID()), ParentMessageId: proto.String(req.ParentMessageID()), From: proto.String(req.From()), To: proto.String(fmt.Sprintf("%v.%v", req.Service(), req.Endpoint())), Hostname: proto.String(hostname), Az: proto.String(az), Payload: proto.String(""), // @todo HandlerInstanceId: proto.String(InstanceID), Duration: proto.Int64(int64(d)), PersistentTrace: proto.Bool(req.TraceShouldPersist()), } if err != nil { e.ErrorCode = proto.String(err.Code()) e.ErrorDescription = proto.String(err.Description()) } trace.Send(e) } }
// traceReq decides if we want to trigger a trace event (when sending a request) and if so deals with it func (c *client) traceReq(req *Request) { if req.shouldTrace() { trace.Send(&traceproto.Event{ Timestamp: proto.Int64(time.Now().UnixNano()), TraceId: proto.String(req.TraceID()), Type: traceproto.Event_REQ.Enum(), MessageId: proto.String(req.MessageID()), ParentMessageId: proto.String(req.ParentMessageID()), From: proto.String(req.From()), FromEndpoint: proto.String(req.FromEndpoint()), To: proto.String(fmt.Sprintf("%v.%v", req.Service(), req.Endpoint())), Hostname: proto.String(c.hostname), Az: proto.String(c.az), Payload: proto.String(""), // @todo PersistentTrace: proto.Bool(req.TraceShouldPersist()), }) } }
// traceIn traces a request inbound to a service to handle func traceIn(req *Request) { if req.shouldTrace() { go trace.Send(&traceproto.Event{ Timestamp: proto.Int64(time.Now().UnixNano()), TraceId: proto.String(req.TraceID()), Type: traceproto.Event_IN.Enum(), MessageId: proto.String(req.MessageID()), ParentMessageId: proto.String(req.ParentMessageID()), From: proto.String(req.From()), To: proto.String(fmt.Sprintf("%v.%v", req.Service(), req.Endpoint())), Hostname: proto.String(hostname), Az: proto.String(az), Payload: proto.String(""), // @todo HandlerInstanceId: proto.String(InstanceID), PersistentTrace: proto.Bool(req.TraceShouldPersist()), }) } }
func TestResponder(t *testing.T) { stub := &Stub{ Service: mockFooService, Endpoint: mockHealthEndpoint, Responder: func(invocation int, req *client.Request) (proto.Message, errors.Error) { if invocation == 1 { return &hcproto.Response{ Healthchecks: []*hcproto.HealthCheck{ &hcproto.HealthCheck{ Timestamp: proto.Int64(1403629015), ServiceName: proto.String("foo"), ServiceVersion: proto.Uint64(1403629015), Hostname: proto.String("localhost"), InstanceId: proto.String("foobar"), HealthCheckId: proto.String("boom"), IsHealthy: proto.Bool(true), }, }, }, nil } return nil, errors.InternalServerError("only.one.allowed", "First call only works") }, } mock := NewMock().Stub(stub) caller := mock.Caller() req, _ := client.NewRequest(mockFooService, mockHealthEndpoint, &hcproto.Request{}) rsp := &hcproto.Response{} e := caller(req, rsp) assert.Nil(t, e, "Expecting our mocked call to be intercepted and stubbed response returned, got err: %v", e) assert.Len(t, rsp.GetHealthchecks(), 1, "Response does not contain our mocked content: no healthchecks") // now repeat, and we SHOULD get an error e = caller(req, rsp) assert.NotNil(t, e, "Expecting our mocked call to be intercepted and error response returned on 2nd call") assert.Equal(t, e.Code(), "only.one.allowed", "Expecting code 'only.one.allowed', got '%s'", e.Code()) }
func TestMockCallerPopulatesResponse(t *testing.T) { req, _ := client.NewRequest(mockFooService, mockHealthEndpoint, &hcproto.Request{}) stub := &Stub{ Service: mockFooService, Endpoint: mockHealthEndpoint, Response: &hcproto.Response{ Healthchecks: []*hcproto.HealthCheck{ &hcproto.HealthCheck{ Timestamp: proto.Int64(1403629015), ServiceName: proto.String("foo"), ServiceVersion: proto.Uint64(1403629015), Hostname: proto.String("localhost"), InstanceId: proto.String("foobar"), HealthCheckId: proto.String("boom"), IsHealthy: proto.Bool(true), }, }, }, } mock := NewMock().Stub(stub) caller := mock.Caller() rsp := &hcproto.Response{} e := caller(req, rsp) assert.Nil(t, e, "Expecting our mocked call to be intercepted and stubbed response returned, got err: %v", e) // ensure stub has what we expect assert.Len(t, stub.matched, 1, "Expecting 1 match payload to be stored after execution") assert.Equal(t, stub.CountCalls(), 1, "CountCalls should return 1 too") assert.Len(t, rsp.GetHealthchecks(), 1, "Response does not contain our mocked content: no healthchecks") }