func monitor(port host.Port, container *ContainerInit, env map[string]string, log log15.Logger) (discoverd.Heartbeater, error) { config := port.Service client := discoverd.NewClientWithURL(env["DISCOVERD"]) client.Logger = logger.New("component", "discoverd") if config.Create { // TODO: maybe reuse maybeAddService() from the client log.Info("creating service") if err := client.AddService(config.Name, nil); err != nil { if !hh.IsObjectExistsError(err) { log.Error("error creating service", "err", err) return nil, fmt.Errorf("something went wrong with discoverd: %s", err) } } } inst := &discoverd.Instance{ Addr: fmt.Sprintf("%s:%v", env["EXTERNAL_IP"], port.Port), Proto: port.Proto, } // add discoverd.EnvInstanceMeta if present for k, v := range env { if _, ok := discoverd.EnvInstanceMeta[k]; !ok { continue } if inst.Meta == nil { inst.Meta = make(map[string]string) } inst.Meta[k] = v } // no checker, but we still want to register a service if config.Check == nil { log.Info("registering instance", "instance", inst) return client.RegisterInstance(config.Name, inst) } var check health.Check switch config.Check.Type { case "tcp": check = &health.TCPCheck{Addr: inst.Addr} case "http", "https": check = &health.HTTPCheck{ URL: fmt.Sprintf("%s://%s%s", config.Check.Type, inst.Addr, config.Check.Path), Host: config.Check.Host, StatusCode: config.Check.Status, MatchBytes: []byte(config.Check.Match), } default: // unsupported checker type return nil, fmt.Errorf("unsupported check type: %s", config.Check.Type) } log.Info("adding healthcheck", "type", config.Check.Type, "interval", config.Check.Interval, "threshold", config.Check.Threshold) reg := health.Registration{ Registrar: client, Service: config.Name, Instance: inst, Monitor: health.Monitor{ Interval: config.Check.Interval, Threshold: config.Check.Threshold, Logger: log.New("component", "monitor"), }.Run, Check: check, Logger: log, } if config.Check.KillDown { reg.Events = make(chan health.MonitorEvent) go func() { if config.Check.StartTimeout == 0 { config.Check.StartTimeout = 10 * time.Second } start := false lastStatus := health.MonitorStatusDown var mtx sync.Mutex maybeKill := func() { if lastStatus == health.MonitorStatusDown { log.Warn("killing the job") container.Signal(int(syscall.SIGKILL), &struct{}{}) } } go func() { // ignore events for the first StartTimeout interval <-time.After(config.Check.StartTimeout) mtx.Lock() defer mtx.Unlock() maybeKill() // check if the app is down start = true }() for e := range reg.Events { log.Info("got health monitor event", "status", e.Status) mtx.Lock() lastStatus = e.Status if !start { mtx.Unlock() continue } maybeKill() mtx.Unlock() } }() } return reg.Register(), nil }
func (s *ClientSuite) TestWatchReconnect(c *C) { c.Skip("fix discoverd watch reconnect") // FIXME(benbjohnson) raftPort, err := testutil.RandomPort() c.Assert(err, IsNil) httpPort, err := testutil.RandomPort() c.Assert(err, IsNil) // clientA is used to register services and instances, and remains connected clientA, cleanup := testutil.SetupDiscoverd(c) defer cleanup() // clientB is connected to the server which will be restarted, and is used to // test that the watch generates the correct events after reconnecting clientB, killDiscoverd := testutil.BootDiscoverd(c, raftPort, httpPort) defer func() { killDiscoverd() }() // create a service with manual leader and some metadata service := "foo" config := &discoverd.ServiceConfig{LeaderType: discoverd.LeaderTypeManual} c.Assert(clientA.AddService(service, config), IsNil) serviceMeta := &discoverd.ServiceMeta{Data: []byte(`{"foo": "bar"}`)} c.Assert(clientA.Service(service).SetMeta(serviceMeta), IsNil) register := func(client *discoverd.Client, addr string, meta map[string]string) (discoverd.Heartbeater, *discoverd.Instance) { inst := &discoverd.Instance{Addr: addr, Proto: "tcp", Meta: meta} hb, err := client.RegisterInstance(service, inst) c.Assert(err, IsNil) return hb, inst } waitForEvent := func(events chan *discoverd.Event, addr string, kind discoverd.EventKind) { for { select { case e := <-events: if e.Kind == kind && (addr == "" || addr == e.Instance.Addr) { return } case <-time.After(10 * time.Second): c.Fatalf("timed out wating for %s event", kind) } } } waitForWatchState := func(ch chan discoverd.WatchState, state discoverd.WatchState) { for { select { case s := <-ch: if s == state { return } case <-time.After(10 * time.Second): c.Fatalf("timed out waiting for watch %s state", state) } } } // register three services register(clientA, ":1111", nil) hb2, _ := register(clientA, ":2222", map[string]string{"foo": "bar"}) hb3, _ := register(clientA, ":3333", nil) // create watches using both clients so we can synchronize assertions eventsA := make(chan *discoverd.Event) watchA, err := clientA.Service(service).Watch(eventsA) c.Assert(err, IsNil) defer watchA.Close() waitForEvent(eventsA, "", discoverd.EventKindCurrent) eventsB := make(chan *discoverd.Event) watchB, err := clientB.Service(service).Watch(eventsB) c.Assert(err, IsNil) defer watchB.Close() waitForEvent(eventsB, "", discoverd.EventKindCurrent) // kill clientB's server and wait for the watch to disconnect stateCh := make(chan discoverd.WatchState) watchB.(*discoverd.Watch).SetStateChannel(stateCh) killDiscoverd() waitForWatchState(stateCh, discoverd.WatchStateDisconnected) // make some changes using clientA // change some metadata c.Assert(hb2.SetMeta(map[string]string{"foo": "baz"}), IsNil) waitForEvent(eventsA, ":2222", discoverd.EventKindUpdate) // register a new instance _, inst := register(clientA, ":4444", nil) waitForEvent(eventsA, ":4444", discoverd.EventKindUp) // set a new leader clientA.Service(service).SetLeader(inst.ID) waitForEvent(eventsA, ":4444", discoverd.EventKindLeader) // unregister an instance hb3.Close() waitForEvent(eventsA, ":3333", discoverd.EventKindDown) // update the service metadata serviceMeta.Data = []byte(`{"foo": "baz"}`) c.Assert(clientA.Service(service).SetMeta(serviceMeta), IsNil) waitForEvent(eventsA, "", discoverd.EventKindServiceMeta) // restart clientB's server and wait for the watch to reconnect _, killDiscoverd = testutil.RunDiscoverdServer(c, raftPort, httpPort) waitForWatchState(stateCh, discoverd.WatchStateConnected) type expectedEvent struct { Addr string Kind discoverd.EventKind ServiceMeta *discoverd.ServiceMeta } assertCurrent := func(events chan *discoverd.Event, expected []*expectedEvent) { count := 0 isExpected := func(event *discoverd.Event) bool { for _, e := range expected { if e.Kind != event.Kind { continue } switch event.Kind { case discoverd.EventKindServiceMeta: if reflect.DeepEqual(event.ServiceMeta, e.ServiceMeta) { return true } default: if event.Instance != nil && event.Instance.Addr == e.Addr { return true } } } return false } for { select { case event := <-events: if event.Kind == discoverd.EventKindCurrent { if count != len(expected) { c.Fatalf("expected %d events, got %d", len(expected), count) } return } if !isExpected(event) { c.Fatalf("unexpected event: %+v", event) } count++ case <-time.After(10 * time.Second): c.Fatal("timed out waiting for events") } } } // check watchB emits missed events assertCurrent(eventsB, []*expectedEvent{ {Addr: ":2222", Kind: discoverd.EventKindUpdate}, {Addr: ":4444", Kind: discoverd.EventKindUp}, {Addr: ":4444", Kind: discoverd.EventKindLeader}, {Kind: discoverd.EventKindServiceMeta, ServiceMeta: serviceMeta}, {Addr: ":3333", Kind: discoverd.EventKindDown}, }) }