// redial continually connects to the URL, exiting the program when no longer possible func redial(ctx context.Context, url, exchange string) chan chan session { sessions := make(chan chan session) go func() { sess := make(chan session) defer close(sessions) for { select { case sessions <- sess: case <-ctx.Done(): log.Info("shutting down session factory") return } connected := false var conn *amqp.Connection var ch *amqp.Channel var err error for !connected { log.Debug("dialing amqp url: %s", url) conn, err = amqp.Dial(url) if err != nil { log.Error(3, "cannot (re)dial: %v: %q", err, url) time.Sleep(time.Second) continue } log.Debug("connected to %s", url) log.Debug("creating new channel on AMQP connection.") ch, err = conn.Channel() if err != nil { log.Error(3, "cannot create channel: %v", err) conn.Close() time.Sleep(time.Second) continue } log.Debug("Ensuring that %s topic exchange exists on AMQP server.", exchange) if err := ch.ExchangeDeclare(exchange, "topic", true, false, false, false, nil); err != nil { log.Error(3, "cannot declare topic exchange: %v", err) conn.Close() time.Sleep(time.Second) } log.Debug("Successfully connected to RabbitMQ.") connected = true } select { case sess <- session{conn, ch}: case <-ctx.Done(): log.Info("shutting down new session") return } } }() return sessions }
// publish publishes messages to a reconnecting session to a topic exchange. // It receives from the application specific source of messages. func publish(sessions chan chan session, exchange string, messages <-chan Message) { var ( running bool reading = messages pending = make(chan Message, 1) confirm = make(chan amqp.Confirmation, 1) ) for session := range sessions { log.Debug("waiting for new session to be established.") pub := <-session // publisher confirms for this channel/connection if err := pub.Confirm(false); err != nil { log.Info("publisher confirms not supported") close(confirm) // confirms not supported, simulate by always nacking } else { pub.NotifyPublish(confirm) } log.Info("Event publisher started...") for { var body Message select { case confirmed := <-confirm: if !confirmed.Ack { log.Error(3, "nack message %d, body: %q", confirmed.DeliveryTag, string(body.Payload)) } reading = messages case body = <-pending: err := pub.Publish(exchange, body.RoutingKey, false, false, amqp.Publishing{ Body: body.Payload, }) // Retry failed delivery on the next session if err != nil { pending <- body pub.Close() break } case body, running = <-reading: // all messages consumed if !running { return } // work on pending delivery until ack'd pending <- body reading = nil } } } }
func Publish(metrics []*schema.MetricData) error { if globalProducer == nil { log.Debug("droping %d metrics as publishing is disbaled", len(metrics)) return nil } if len(metrics) == 0 { return nil } subslices := schema.Reslice(metrics, 3500) for _, subslice := range subslices { id := time.Now().UnixNano() data, err := msg.CreateMsg(subslice, id, msg.FormatMetricDataArrayMsgp) if err != nil { log.Fatal(4, "Fatal error creating metric message: %s", err) } metricsPublished.Inc(int64(len(subslice))) messagesPublished.Inc(1) messagesSize.Value(int64(len(data))) metricsPerMessage.Value(int64(len(subslice))) pre := time.Now() err = globalProducer.Publish(topic, data) publishDuration.Value(time.Since(pre)) if err != nil { log.Fatal(4, "can't publish to nsqd: %s", err) } log.Info("published metrics %d size=%d", id, len(data)) } //globalProducer.Stop() return nil }
func (a *AgentSession) Start() error { if err := a.saveDbSession(); err != nil { log.Error(3, "unable to add agentSession to DB. %s", err.Error()) a.close() return err } log.Debug("setting handler for disconnect event.") if err := a.SocketSession.On("disconnect", a.OnDisconnect()); err != nil { log.Error(3, "failed to bind disconnect event. %s", err.Error()) a.close() return err } log.Debug("setting handler for catalog event.") if err := a.SocketSession.On("catalog", a.HandleCatalog()); err != nil { log.Error(3, "failed to bind catalog event handler. %s", err.Error()) a.close() return err } log.Info("starting session %s", a.SocketSession.Id) go a.SocketSession.Start() // run background tasks for this session. go a.sendHeartbeat() go a.sendTaskListPeriodically() a.sendTaskList() return nil }
func deleteAgentSession(sess *session, a *model.AgentSession) ([]event.Event, error) { events := make([]event.Event, 0) var rawSql = "DELETE FROM agent_session WHERE id=?" _, err := sess.Exec(rawSql, a.Id) if err != nil { return nil, err } // we query here to prevent race conditions when agents dicsonnect from one task-server node // and connect to another. The new connection may establish before the old connection times out. total, err := sess.Where("agent_session.agent_id=?", a.AgentId).Count(&model.AgentSession{}) if err != nil { return nil, err } if total == 0 { agent, err := getAgentById(sess, a.AgentId, 0) if err != nil { return nil, err } log.Info("Agent %s has no sessions. Marking as offline.", agent.Name) agent.Online = false agent.OnlineChange = time.Now() sess.UseBool("online") _, err = sess.Id(agent.Id).Update(agent) if err != nil { return nil, err } events = append(events, &event.AgentOffline{Ts: time.Now(), Payload: agent}) } return events, nil }
func deleteAgentSessionsByServer(sess *session, server string) ([]event.Event, error) { events := make([]event.Event, 0) var rawSql = "DELETE FROM agent_session WHERE server=?" _, err := sess.Exec(rawSql, server) if err != nil { return nil, err } // Get agents that are now offline. nowOffline, err := onlineAgentsWithNoSession(sess) if err != nil { return nil, err } if len(nowOffline) > 0 { agentIds := make([]int64, len(nowOffline)) for i, a := range nowOffline { a.Online = false a.OnlineChange = time.Now() agentIds[i] = a.Id log.Info("Agent %s has no sessions. Marking as offline.", a.Name) } sess.UseBool("online") update := map[string]interface{}{"online": false, "online_change": time.Now()} _, err = sess.Table(&model.Agent{}).In("id", agentIds).Update(update) if err != nil { return nil, err } for _, a := range nowOffline { events = append(events, &event.AgentOffline{Ts: time.Now(), Payload: a}) } } return events, nil }
func connect(u *url.URL) (*websocket.Conn, error) { log.Info("connecting to %s", u.String()) header := make(http.Header) header.Set("Authorization", fmt.Sprintf("Bearer %s", *apiKey)) conn, _, err := websocket.DefaultDialer.Dial(u.String(), header) return conn, err }
func getEngine(dbType, dbConnectStr string) (*xorm.Engine, error) { switch dbType { case "sqlite3": case "mysql": default: return nil, fmt.Errorf("unknown DB type. %s", dbType) } log.Info("Database: %v", dbType) return xorm.NewEngine(dbType, dbConnectStr) }
func (s *Scheduler) Refresh(checks []*m.CheckWithSlug) { log.Info("refreshing checks, there are %d", len(checks)) seenChecks := make(map[int64]struct{}) s.Lock() for _, c := range checks { if !c.Enabled { continue } seenChecks[c.Id] = struct{}{} if existing, ok := s.Checks[c.Id]; ok { log.Debug("checkId=%d already running", c.Id) if c.Updated.After(existing.Check.Updated) { log.Info("syncing update to checkId=%d", c.Id) err := existing.Update(c, s.Healthy) if err != nil { log.Error(3, "Unable to update check instance for checkId=%d", c.Id, err) existing.Stop() delete(s.Checks, c.Id) } } } else { log.Debug("new check definition found for checkId=%d.", c.Id) instance, err := NewCheckInstance(c, s.Healthy) if err != nil { log.Error(3, "Unabled to create new check instance for checkId=%d.", c.Id, err) } else { s.Checks[c.Id] = instance } } } for id, instance := range s.Checks { if _, ok := seenChecks[id]; !ok { log.Info("checkId=%d no longer scheduled to this probe, removing it.", id) instance.Stop() delete(s.Checks, id) } } s.Unlock() log.Debug("refresh complete") return }
func (s *Scheduler) Remove(check *m.CheckWithSlug) { log.Info("removing %s check for %s", check.Type, check.Slug) s.Lock() if existing, ok := s.Checks[check.Id]; !ok { log.Warn("recieved remove event for check that is not currently running. checkId=%d", check.Id) } else { existing.Stop() delete(s.Checks, check.Id) } s.Unlock() return }
func bindHandlers(client *gosocketio.Client, controllerUrl *url.URL, jobScheduler *scheduler.Scheduler, interrupt chan os.Signal) { client.On(gosocketio.OnDisconnection, func(c *gosocketio.Channel) { log.Error(3, "Disconnected from remote server.") //reconnect connected := false var err error for !connected { client, err = gosocketio.Dial(controllerUrl.String(), transport.GetDefaultWebsocketTransport()) if err != nil { log.Error(3, err.Error()) time.Sleep(time.Second * 2) } else { connected = true bindHandlers(client, controllerUrl, jobScheduler, interrupt) } } }) client.On("refresh", func(c *gosocketio.Channel, checks []*m.CheckWithSlug) { if probe.Self.Public { for _, c := range PublicChecks { check := c checks = append(checks, &check) } } jobScheduler.Refresh(checks) }) client.On("created", func(c *gosocketio.Channel, check m.CheckWithSlug) { jobScheduler.Create(&check) }) client.On("updated", func(c *gosocketio.Channel, check m.CheckWithSlug) { jobScheduler.Update(&check) }) client.On("removed", func(c *gosocketio.Channel, check m.CheckWithSlug) { jobScheduler.Remove(&check) }) client.On("ready", func(c *gosocketio.Channel, event m.ProbeReadyPayload) { log.Info("server sent ready event. ProbeId=%d", event.Collector.Id) probe.Self = event.Collector queryParams := controllerUrl.Query() queryParams["lastSocketId"] = []string{event.SocketId} controllerUrl.RawQuery = queryParams.Encode() }) client.On("error", func(c *gosocketio.Channel, reason string) { log.Error(3, "Controller emitted an error. %s", reason) close(interrupt) }) }
func (t *Tsdb) sendData() { counter := 0 bytesSent := 0 last := time.Now() ticker := time.NewTicker(time.Second * 10) for { select { case <-ticker.C: if counter > 0 { log.Info("published %d (%d bytes) payloads in last %f seconds", counter, bytesSent, time.Since(last).Seconds()) counter = 0 bytesSent = 0 last = time.Now() } case data := <-t.dataChan: u := t.Url.String() + data.Path body := new(bytes.Buffer) snappyBody := snappy.NewWriter(body) snappyBody.Write(data.Body) snappyBody.Close() req, err := http.NewRequest("POST", u, body) if err != nil { log.Error(3, "failed to create request payload. ", err) break } req.Header.Set("Content-Type", "rt-metric-binary-snappy") req.Header.Set("Authorization", "Bearer "+t.ApiKey) var reqBytesSent int sent := false for !sent { reqBytesSent = body.Len() if err := send(req); err != nil { log.Error(3, err.Error()) time.Sleep(time.Second) body.Reset() snappyBody := snappy.NewWriter(body) snappyBody.Write(data.Body) snappyBody.Close() } else { sent = true log.Debug("sent %d bytes", reqBytesSent) } } bytesSent += reqBytesSent counter++ } } }
func NewCheckInstance(c *m.CheckWithSlug, probeHealthy bool) (*CheckInstance, error) { log.Info("Creating new CheckInstance for %s check for %s", c.Type, c.Slug) executor, err := GetCheck(c.Type, c.Settings) if err != nil { return nil, err } instance := &CheckInstance{ Check: c, Exec: executor, State: m.EvalResultUnknown, } if probeHealthy { go instance.Run() } return instance, nil }
func (s *Scheduler) Create(check *m.CheckWithSlug) { log.Info("creating %s check for %s", check.Type, check.Slug) s.Lock() if existing, ok := s.Checks[check.Id]; ok { log.Warn("recieved create event for check that is already running. checkId=%d", check.Id) existing.Stop() delete(s.Checks, check.Id) } instance, err := NewCheckInstance(check, s.Healthy) if err != nil { log.Error(3, "Unabled to create new check instance for checkId=%d.", check.Id, err) } else { s.Checks[check.Id] = instance } s.Unlock() return }
func (c *CheckInstance) Run() { c.Lock() log.Info("Starting execution loop for %s check for %s, Frequency: %d, Offset: %d", c.Check.Type, c.Check.Slug, c.Check.Frequency, c.Check.Offset) now := time.Now().Unix() waitTime := ((c.Check.Frequency + c.Check.Offset) - (now % c.Check.Frequency)) % c.Check.Frequency if waitTime == c.Check.Offset { waitTime = 0 } log.Debug("executing %s check for %s in %d seconds", c.Check.Type, c.Check.Slug, waitTime) if waitTime > 0 { time.Sleep(time.Second * time.Duration(waitTime)) } c.Ticker = time.NewTicker(time.Duration(c.Check.Frequency) * time.Second) c.Unlock() c.run(time.Now()) for t := range c.Ticker.C { c.run(t) } }
func relocateRouteAnyTasks(sess *session, agent *model.AgentDTO) ([]event.Event, error) { events := make([]event.Event, 0) // get list of tasks. var twm taskWithMetrics sess.Join("LEFT", "task_metric", "task.id = task_metric.task_id") sess.Join("INNER", "route_by_any_index", "route_by_any_index.task_id = task.id").Where("route_by_any_index.agent_id=?", agent.Id) sess.Cols("`task_metric`.*", "`task`.*") err := sess.Find(&twm) if err != nil { return nil, err } tasks := twm.ToTaskDTO() if len(tasks) == 0 { return nil, nil } for _, t := range tasks { candidates, err := taskRouteAnyCandidates(sess, t.Id) if err != nil { return nil, err } if len(candidates) == 0 { log.Error(3, "Cant re-locate task %d, no online agents capable of providing requested metrics.", t.Id) continue } newAgent := candidates[rand.Intn(len(candidates))] if newAgent == agent.Id { log.Debug("No need to re-allocated task as the agent it was running on is back online") continue } _, err = sess.Exec("UPDATE route_by_any_index set agent_id=? where task_id=?", newAgent, t.Id) if err != nil { return nil, err } log.Info("Task %d rescheduled to agent %d", t.Id, newAgent) e := new(event.TaskUpdated) e.Ts = time.Now() e.Payload.Last = t e.Payload.Current = t events = append(events, e) } return events, nil }
func (s *Scheduler) Update(check *m.CheckWithSlug) { log.Info("updating %s check for %s", check.Type, check.Slug) s.Lock() if existing, ok := s.Checks[check.Id]; !ok { log.Warn("recieved update event for check that is not currently running. checkId=%d", check.Id) instance, err := NewCheckInstance(check, s.Healthy) if err != nil { log.Error(3, "Unabled to create new check instance for checkId=%d. %s", check.Id, err) } else { s.Checks[check.Id] = instance } } else { err := existing.Update(check, s.Healthy) if err != nil { log.Error(3, "Unable to update check instance for checkId=%d, %s", check.Id, err) existing.Stop() delete(s.Checks, check.Id) } } s.Unlock() return }
// subscribe consumes deliveries from an exclusive queue from a fanout exchange and sends to the application specific messages chan. func subscribe(sessions chan chan session, exchange string, messages chan<- Message) { for session := range sessions { log.Debug("waiting for new session to be established.") sub := <-session log.Debug("declaring new ephemeral Queue %v", sub) q, err := sub.QueueDeclare("", false, true, true, false, nil) if err != nil { log.Error(3, "cannot consume from exclusive: %v", err) sub.Close() continue } log.Debug("binding queue %s to routingKey #", q.Name) routingKey := "#" if err := sub.QueueBind(q.Name, routingKey, exchange, false, nil); err != nil { log.Error(3, "cannot consume without a binding to exchange: %q, %v", exchange, err) sub.Close() continue } deliveries, err := sub.Consume(q.Name, "", false, true, false, false, nil) if err != nil { log.Error(3, "cannot consume from queue: %q, %v", q.Name, err) sub.Close() continue } log.Info("subscribed to rabbitmq %s exchange...", exchange) for msg := range deliveries { log.Debug("new message received from rabbitmq") messages <- Message{RoutingKey: msg.RoutingKey, Payload: msg.Body} sub.Ack(msg.DeliveryTag, false) } } }
func (t *TaskCache) Sync() { tasksByName := make(map[string]*model.TaskDTO) t.Lock() for _, task := range t.Tasks { name := fmt.Sprintf("raintank-apps:%d", task.Id) tasksByName[name] = task log.Debug("seen %s", name) err := t.addTask(task) if err != nil { log.Error(3, err.Error()) } } for name := range t.SnapTasks { if _, ok := tasksByName[name]; !ok { log.Info("%s not in taskList. removing from snap.") if err := t.removeSnapTask(name); err != nil { log.Error(3, "failed to remove snapTask. %s", name) } } } t.Unlock() }
func (c *CheckInstance) run(t time.Time) { if !c.LastRun.IsZero() { delta := time.Since(c.LastRun) freq := time.Duration(c.Check.Frequency) * time.Second if delta > (freq + time.Duration(100)*time.Millisecond) { log.Warn("check is running late by %d milliseconds", delta/time.Millisecond) } } c.Lock() c.LastRun = t c.Unlock() desc := fmt.Sprintf("%s check for %s", c.Check.Type, c.Check.Slug) log.Debug("Running %s", desc) results, err := c.Exec.Run() var metrics []*schema.MetricData if err != nil { log.Error(3, "Failed to execute %s", desc, err) return } else { metrics = results.Metrics(t, c.Check) log.Debug("got %d metrics for %s", len(metrics), desc) // check if we need to send any events. Events are sent on state change, or if the error reason has changed // or the check has been in an error state for 10minutes. newState := m.EvalResultOK if msg := results.ErrorMsg(); msg != "" { log.Debug("%s failed: %s", desc, msg) newState = m.EvalResultCrit if (c.State != newState) || (msg != c.LastError) || (time.Since(c.StateChange) > time.Minute*10) { c.State = newState c.LastError = msg c.StateChange = time.Now() //send Error event. log.Info("%s is in error state", desc) event := schema.ProbeEvent{ EventType: "monitor_state", OrgId: c.Check.OrgId, Severity: "ERROR", Source: "monitor_collector", Timestamp: t.UnixNano() / int64(time.Millisecond), Message: msg, Tags: map[string]string{ "endpoint": c.Check.Slug, "collector": probe.Self.Slug, "monitor_type": string(c.Check.Type), }, } publisher.Publisher.AddEvent(&event) } } else if c.State != newState { c.State = newState c.StateChange = time.Now() //send OK event. log.Info("%s is now in OK state", desc) event := schema.ProbeEvent{ EventType: "monitor_state", OrgId: c.Check.OrgId, Severity: "OK", Source: "monitor_collector", Timestamp: t.UnixNano() / int64(time.Millisecond), Message: "Monitor now Ok.", Tags: map[string]string{ "endpoint": c.Check.Slug, "collector": probe.Self.Slug, "monitor_type": string(c.Check.Type), }, } publisher.Publisher.AddEvent(&event) } } // set or ok_state, error_state metrics. okState := 0.0 errState := 0.0 if c.State == m.EvalResultCrit { errState = 1 } else { okState = 1 } metrics = append(metrics, &schema.MetricData{ OrgId: int(c.Check.OrgId), Name: fmt.Sprintf("worldping.%s.%s.%s.ok_state", c.Check.Slug, probe.Self.Slug, c.Check.Type), Metric: fmt.Sprintf("worldping.%s.ok_state", c.Check.Type), Interval: int(c.Check.Frequency), Unit: "state", Mtype: "gauge", Time: t.Unix(), Tags: []string{ fmt.Sprintf("endpoint:%s", c.Check.Slug), fmt.Sprintf("monitor_type:%s", c.Check.Type), fmt.Sprintf("probe:%s", probe.Self.Slug), }, Value: okState, }, &schema.MetricData{ OrgId: int(c.Check.OrgId), Name: fmt.Sprintf("worldping.%s.%s.%s.error_state", c.Check.Slug, probe.Self.Slug, c.Check.Type), Metric: fmt.Sprintf("worldping.%s.error_state", c.Check.Type), Interval: int(c.Check.Frequency), Unit: "state", Mtype: "gauge", Time: t.Unix(), Tags: []string{ fmt.Sprintf("endpoint:%s", c.Check.Slug), fmt.Sprintf("monitor_type:%s", c.Check.Type), fmt.Sprintf("probe:%s", probe.Self.Slug), }, Value: errState, }) for _, m := range metrics { m.SetId() } //publish metrics to TSDB publisher.Publisher.Add(metrics) }
func main() { flag.Parse() // Set 'cfile' here if *confFile exists, because we should only try and // parse the conf file if it exists. If we try and parse the default // conf file location when it's not there, we (unsurprisingly) get a // panic. var cfile string if _, err := os.Stat(*confFile); err == nil { cfile = *confFile } // Still parse globalconf, though, even if the config file doesn't exist // because we want to be able to use environment variables. conf, err := globalconf.NewWithOptions(&globalconf.Options{ Filename: cfile, EnvPrefix: "TASKAGENT_", }) if err != nil { panic(fmt.Sprintf("error with configuration file: %s", err)) } conf.ParseAll() log.NewLogger(0, "console", fmt.Sprintf(`{"level": %d, "formatting":true}`, *logLevel)) // workaround for https://github.com/grafana/grafana/issues/4055 switch *logLevel { case 0: log.Level(log.TRACE) case 1: log.Level(log.DEBUG) case 2: log.Level(log.INFO) case 3: log.Level(log.WARN) case 4: log.Level(log.ERROR) case 5: log.Level(log.CRITICAL) case 6: log.Level(log.FATAL) } if *showVersion { fmt.Printf("task-agent (built with %s, git hash %s)\n", runtime.Version(), GitHash) return } if *nodeName == "" { log.Fatal(4, "name must be set.") } snapUrl, err := url.Parse(*snapUrlStr) if err != nil { log.Fatal(4, "could not parse snapUrl. %s", err) } snapClient, err := snap.NewClient(*nodeName, *tsdbAddr, *apiKey, snapUrl) if err != nil { log.Fatal(4, err.Error()) } InitTaskCache(snapClient) interrupt := make(chan os.Signal, 1) signal.Notify(interrupt, os.Interrupt) shutdownStart := make(chan struct{}) controllerUrl, err := url.Parse(*serverAddr) if err != nil { log.Fatal(4, err.Error()) } controllerUrl.Path = path.Clean(controllerUrl.Path + fmt.Sprintf("/socket/%s/%d", *nodeName, Version)) if controllerUrl.Scheme != "ws" && controllerUrl.Scheme != "wss" { log.Fatal(4, "invalid server address. scheme must be ws or wss. was %s", controllerUrl.Scheme) } conn, err := connect(controllerUrl) if err != nil { log.Fatal(4, "unable to connect to server on url %s: %s", controllerUrl.String(), err) } //create new session, allow 1000 events to be queued in the writeQueue before Emit() blocks. sess := session.NewSession(conn, 1000) sess.On("disconnect", func() { // on disconnect, reconnect. ticker := time.NewTicker(time.Second) connected := false for !connected { select { case <-shutdownStart: ticker.Stop() return case <-ticker.C: conn, err := connect(controllerUrl) if err == nil { sess.Conn = conn connected = true go sess.Start() } } } ticker.Stop() }) sess.On("heartbeat", func(body []byte) { log.Debug("recieved heartbeat event. %s", body) }) sess.On("taskList", HandleTaskList()) sess.On("taskUpdate", HandleTaskUpdate()) sess.On("taskAdd", HandleTaskAdd()) sess.On("taskRemove", HandleTaskRemove()) go sess.Start() //periodically send an Updated Catalog. go SendCatalog(sess, snapClient, shutdownStart) // connect to the snap server and monitor that it is up. go snapClient.Run() //wait for interupt Signal. <-interrupt log.Info("interrupt") close(shutdownStart) sess.Close() return }
func main() { flag.Parse() // Set 'cfile' here if *confFile exists, because we should only try and // parse the conf file if it exists. If we try and parse the default // conf file location when it's not there, we (unsurprisingly) get a // panic. var cfile string if _, err := os.Stat(*confFile); err == nil { cfile = *confFile } // Still parse globalconf, though, even if the config file doesn't exist // because we want to be able to use environment variables. conf, err := globalconf.NewWithOptions(&globalconf.Options{ Filename: cfile, EnvPrefix: "RTPROBE_", }) if err != nil { panic(fmt.Sprintf("error with configuration file: %s", err)) } conf.ParseAll() log.NewLogger(0, "console", fmt.Sprintf(`{"level": %d, "formatting":true}`, *logLevel)) // workaround for https://github.com/grafana/grafana/issues/4055 switch *logLevel { case 0: log.Level(log.TRACE) case 1: log.Level(log.DEBUG) case 2: log.Level(log.INFO) case 3: log.Level(log.WARN) case 4: log.Level(log.ERROR) case 5: log.Level(log.CRITICAL) case 6: log.Level(log.FATAL) } if *showVersion { fmt.Printf("raintank-probe (built with %s, git hash %s)\n", runtime.Version(), GitHash) return } if *nodeName == "" { log.Fatal(4, "name must be set.") } file, err := ioutil.ReadFile(*publicChecksFile) if err != nil { log.Error(3, "Could not read publicChecks file. %s", err.Error()) } else { err = json.Unmarshal(file, &PublicChecks) if err != nil { log.Error(3, "Could not parse publicChecks file. %s", err.Error()) } } jobScheduler := scheduler.New(*healthHosts) go jobScheduler.CheckHealth() interrupt := make(chan os.Signal, 1) signal.Notify(interrupt, os.Interrupt) controllerUrl, err := url.Parse(*serverAddr) if err != nil { log.Fatal(4, err.Error()) } controllerUrl.Path = path.Clean(controllerUrl.Path + "/socket.io") version := strings.Split(GitHash, "-")[0] controllerUrl.RawQuery = fmt.Sprintf("EIO=3&transport=websocket&apiKey=%s&name=%s&version=%s", *apiKey, url.QueryEscape(*nodeName), version) if controllerUrl.Scheme != "ws" && controllerUrl.Scheme != "wss" { log.Fatal(4, "invalid server address. scheme must be ws or wss. was %s", controllerUrl.Scheme) } tsdbUrl, err := url.Parse(*tsdbAddr) if err != nil { log.Fatal(4, "Invalid TSDB url.", err) } if !strings.HasPrefix(tsdbUrl.Path, "/") { tsdbUrl.Path += "/" } publisher.Init(tsdbUrl, *apiKey, *concurrency) client, err := gosocketio.Dial(controllerUrl.String(), transport.GetDefaultWebsocketTransport()) if err != nil { log.Fatal(4, "unable to connect to server on url %s: %s", controllerUrl.String(), err) } bindHandlers(client, controllerUrl, jobScheduler, interrupt) //wait for interupt Signal. <-interrupt log.Info("interrupt") jobScheduler.Close() client.Close() return }