func metricsJson(ctx *Context) { defer ctx.Req.Request.Body.Close() if ctx.Req.Request.Body != nil { body, err := ioutil.ReadAll(ctx.Req.Request.Body) if err != nil { log.Error(3, "unable to read requst body. %s", err) } metrics := make([]*schema.MetricData, 0) err = json.Unmarshal(body, &metrics) if err != nil { ctx.JSON(400, fmt.Sprintf("unable to parse request body. %s", err)) return } if !ctx.IsAdmin { for _, m := range metrics { m.OrgId = int(ctx.OrgId) m.SetId() } } err = metric_publish.Publish(metrics) if err != nil { log.Error(3, "failed to publush metrics. %s", err) ctx.JSON(500, err) return } ctx.JSON(200, "ok") return } ctx.JSON(400, "no data included in request.") }
func (a *AgentSession) Start() error { if err := a.saveDbSession(); err != nil { log.Error(3, "unable to add agentSession to DB. %s", err.Error()) a.close() return err } log.Debug("setting handler for disconnect event.") if err := a.SocketSession.On("disconnect", a.OnDisconnect()); err != nil { log.Error(3, "failed to bind disconnect event. %s", err.Error()) a.close() return err } log.Debug("setting handler for catalog event.") if err := a.SocketSession.On("catalog", a.HandleCatalog()); err != nil { log.Error(3, "failed to bind catalog event handler. %s", err.Error()) a.close() return err } log.Info("starting session %s", a.SocketSession.Id) go a.SocketSession.Start() // run background tasks for this session. go a.sendHeartbeat() go a.sendTaskListPeriodically() a.sendTaskList() return nil }
func (t *TaskCache) UpdateTasks(tasks []*model.TaskDTO) { seenTaskIds := make(map[int64]struct{}) t.Lock() for _, task := range tasks { seenTaskIds[task.Id] = struct{}{} err := t.addTask(task) if err != nil { log.Error(3, err.Error()) } } tasksToDel := make([]*model.TaskDTO, 0) for id, task := range t.Tasks { if _, ok := seenTaskIds[id]; !ok { tasksToDel = append(tasksToDel, task) } } t.Unlock() if len(tasksToDel) > 0 { for _, task := range tasksToDel { if err := t.RemoveTask(task); err != nil { log.Error(3, "Failed to remove task %d", task.Id) } } } }
// redial continually connects to the URL, exiting the program when no longer possible func redial(ctx context.Context, url, exchange string) chan chan session { sessions := make(chan chan session) go func() { sess := make(chan session) defer close(sessions) for { select { case sessions <- sess: case <-ctx.Done(): log.Info("shutting down session factory") return } connected := false var conn *amqp.Connection var ch *amqp.Channel var err error for !connected { log.Debug("dialing amqp url: %s", url) conn, err = amqp.Dial(url) if err != nil { log.Error(3, "cannot (re)dial: %v: %q", err, url) time.Sleep(time.Second) continue } log.Debug("connected to %s", url) log.Debug("creating new channel on AMQP connection.") ch, err = conn.Channel() if err != nil { log.Error(3, "cannot create channel: %v", err) conn.Close() time.Sleep(time.Second) continue } log.Debug("Ensuring that %s topic exchange exists on AMQP server.", exchange) if err := ch.ExchangeDeclare(exchange, "topic", true, false, false, false, nil); err != nil { log.Error(3, "cannot declare topic exchange: %v", err) conn.Close() time.Sleep(time.Second) } log.Debug("Successfully connected to RabbitMQ.") connected = true } select { case sess <- session{conn, ch}: case <-ctx.Done(): log.Info("shutting down new session") return } } }() return sessions }
func Auth(adminKey, keyString string) (*SignedInUser, error) { if keyString == adminKey { return &SignedInUser{ Role: ROLE_ADMIN, OrgId: 1, OrgName: "Admin", OrgSlug: "admin", IsAdmin: true, key: keyString, }, nil } // check the cache log.Debug("Checking cache for apiKey") user, cached := cache.Get(keyString) if user != nil { log.Debug("valid key cached") return user, nil } if cached { log.Debug("invalid key cached") return nil, ErrInvalidApiKey } //validate the API key against grafana.net payload := url.Values{} payload.Add("token", keyString) res, err := http.PostForm("https://grafana.net/api/api-keys/check", payload) if err != nil { log.Error(3, "failed to check apiKey. %s", err) return nil, err } body, err := ioutil.ReadAll(res.Body) log.Debug("apiKey check response was: %s", body) res.Body.Close() if res.StatusCode != 200 { //add the invalid key to the cache log.Debug("Caching invalidKey response for %d seconds", invalidTTL/time.Second) cache.Set(keyString, nil, invalidTTL) return nil, ErrInvalidApiKey } user = &SignedInUser{key: keyString} err = json.Unmarshal(body, user) if err != nil { log.Error(3, "failed to parse api-keys/check response. %s", err) return nil, err } // add the user to the cache. log.Debug("Caching validKey response for %d seconds", validTTL/time.Second) cache.Set(keyString, user, validTTL) return user, nil }
func metricsBinary(ctx *Context, compressed bool) { var body io.ReadCloser var err error if compressed { body, err = gzip.NewReader(ctx.Req.Request.Body) if err != nil { ctx.JSON(400, err.Error()) return } } else { body = ctx.Req.Request.Body } defer body.Close() if ctx.Req.Request.Body != nil { body, err := ioutil.ReadAll(body) if err != nil { log.Error(3, "unable to read requst body. %s", err) ctx.JSON(500, err) return } metricData := new(msg.MetricData) err = metricData.InitFromMsg(body) if err != nil { log.Error(3, "payload not metricData. %s", err) ctx.JSON(500, err) return } err = metricData.DecodeMetricData() if err != nil { log.Error(3, "failed to unmarshal metricData. %s", err) ctx.JSON(500, err) return } if !ctx.IsAdmin { for _, m := range metricData.Metrics { m.OrgId = int(ctx.OrgId) m.SetId() } } err = metric_publish.Publish(metricData.Metrics) if err != nil { log.Error(3, "failed to publush metrics. %s", err) ctx.JSON(500, err) return } ctx.JSON(200, "ok") return } ctx.JSON(400, "no data included in request.") }
func bindHandlers(client *gosocketio.Client, controllerUrl *url.URL, jobScheduler *scheduler.Scheduler, interrupt chan os.Signal) { client.On(gosocketio.OnDisconnection, func(c *gosocketio.Channel) { log.Error(3, "Disconnected from remote server.") //reconnect connected := false var err error for !connected { client, err = gosocketio.Dial(controllerUrl.String(), transport.GetDefaultWebsocketTransport()) if err != nil { log.Error(3, err.Error()) time.Sleep(time.Second * 2) } else { connected = true bindHandlers(client, controllerUrl, jobScheduler, interrupt) } } }) client.On("refresh", func(c *gosocketio.Channel, checks []*m.CheckWithSlug) { if probe.Self.Public { for _, c := range PublicChecks { check := c checks = append(checks, &check) } } jobScheduler.Refresh(checks) }) client.On("created", func(c *gosocketio.Channel, check m.CheckWithSlug) { jobScheduler.Create(&check) }) client.On("updated", func(c *gosocketio.Channel, check m.CheckWithSlug) { jobScheduler.Update(&check) }) client.On("removed", func(c *gosocketio.Channel, check m.CheckWithSlug) { jobScheduler.Remove(&check) }) client.On("ready", func(c *gosocketio.Channel, event m.ProbeReadyPayload) { log.Info("server sent ready event. ProbeId=%d", event.Collector.Id) probe.Self = event.Collector queryParams := controllerUrl.Query() queryParams["lastSocketId"] = []string{event.SocketId} controllerUrl.RawQuery = queryParams.Encode() }) client.On("error", func(c *gosocketio.Channel, reason string) { log.Error(3, "Controller emitted an error. %s", reason) close(interrupt) }) }
func HandleTaskUpdate() interface{} { return func(data []byte) { task := model.TaskDTO{} err := json.Unmarshal(data, &task) if err != nil { log.Error(3, "failed to decode taskUpdate payload. %s", err) return } log.Debug("TaskUpdate. %s", data) if err := GlobalTaskCache.AddTask(&task); err != nil { log.Error(3, "failed to add task to cache. %s", err) } } }
func HandleTaskRemove() interface{} { return func(data []byte) { task := model.TaskDTO{} err := json.Unmarshal(data, &task) if err != nil { log.Error(3, "failed to decode taskAdd payload. %s", err) return } log.Debug("Removing Task. %s", data) if err := GlobalTaskCache.RemoveTask(&task); err != nil { log.Error(3, "failed to remove task from cache. %s", err) } } }
func emitMetrics(sess *session.Session, snapClient *snap.Client) { catalog, err := snapClient.GetSnapMetrics() if err != nil { log.Error(3, err.Error()) return } body, err := json.Marshal(catalog) if err != nil { log.Error(3, err.Error()) return } e := &message.Event{Event: "catalog", Payload: body} sess.Emit(e) }
func (t *Tsdb) sendData() { counter := 0 bytesSent := 0 last := time.Now() ticker := time.NewTicker(time.Second * 10) for { select { case <-ticker.C: if counter > 0 { log.Info("published %d (%d bytes) payloads in last %f seconds", counter, bytesSent, time.Since(last).Seconds()) counter = 0 bytesSent = 0 last = time.Now() } case data := <-t.dataChan: u := t.Url.String() + data.Path body := new(bytes.Buffer) snappyBody := snappy.NewWriter(body) snappyBody.Write(data.Body) snappyBody.Close() req, err := http.NewRequest("POST", u, body) if err != nil { log.Error(3, "failed to create request payload. ", err) break } req.Header.Set("Content-Type", "rt-metric-binary-snappy") req.Header.Set("Authorization", "Bearer "+t.ApiKey) var reqBytesSent int sent := false for !sent { reqBytesSent = body.Len() if err := send(req); err != nil { log.Error(3, err.Error()) time.Sleep(time.Second) body.Reset() snappyBody := snappy.NewWriter(body) snappyBody.Write(data.Body) snappyBody.Close() } else { sent = true log.Debug("sent %d bytes", reqBytesSent) } } bytesSent += reqBytesSent counter++ } } }
func (t *Tsdb) Flush() { t.Lock() if len(t.Metrics) == 0 { t.Unlock() return } metrics := make([]*schema.MetricData, len(t.Metrics)) copy(metrics, t.Metrics) t.Metrics = t.Metrics[:0] t.Unlock() // Write the metrics to our HTTP server. log.Debug("writing %d metrics to API", len(metrics)) batches := schema.Reslice(metrics, maxMetricsPerFlush*2) for _, batch := range batches { id := time.Now().UnixNano() body, err := msg.CreateMsg(batch, id, msg.FormatMetricDataArrayMsgp) if err != nil { log.Error(3, "unable to convert metrics to MetricDataArrayMsgp.", "error", err) return } t.dataChan <- tsdbData{Path: "metrics", Body: body} log.Debug("%d metrics queud for delivery", len(batch)) } }
func socket(ctx *Context) { agentName := ctx.Params(":agent") agentVer := ctx.ParamsInt64(":ver") //TODO: add auth owner := ctx.OrgId agent, err := connectedAgent(agentName, owner) if err != nil { log.Debug("agent cant connect. %s", err) ctx.JSON(400, err.Error()) return } c, err := upgrader.Upgrade(ctx.Resp, ctx.Req.Request, nil) if err != nil { log.Error(3, "upgrade:", err) return } log.Debug("agent %s connected.", agent.Name) sess := agent_session.NewSession(agent, agentVer, c) ActiveSockets.NewSocket(sess) sess.Start() //block until connection closes. <-sess.Done ActiveSockets.DeleteSocket(sess) }
func (a *AgentSession) sendTaskList() { log.Debug("sending TaskUpdate to %s", a.SocketSession.Id) tasks, err := sqlstore.GetAgentTasks(a.Agent) if err != nil { log.Error(3, "failed to get task list. %s", err) return } body, err := json.Marshal(&tasks) if err != nil { log.Error(3, "failed to Marshal task list to json. %s", err) return } e := &message.Event{Event: "taskList", Payload: body} err = a.SocketSession.Emit(e) if err != nil { log.Error(3, "failed to emit taskList event. %s", err) } }
func GetAgents(ctx *Context, query model.GetAgentsQuery) { query.OrgId = ctx.OrgId agents, err := sqlstore.GetAgents(&query) if err != nil { log.Error(3, err.Error()) ctx.JSON(200, rbody.ErrResp(500, err)) return } ctx.JSON(200, rbody.OkResp("agents", agents)) }
// publish publishes messages to a reconnecting session to a topic exchange. // It receives from the application specific source of messages. func publish(sessions chan chan session, exchange string, messages <-chan Message) { var ( running bool reading = messages pending = make(chan Message, 1) confirm = make(chan amqp.Confirmation, 1) ) for session := range sessions { log.Debug("waiting for new session to be established.") pub := <-session // publisher confirms for this channel/connection if err := pub.Confirm(false); err != nil { log.Info("publisher confirms not supported") close(confirm) // confirms not supported, simulate by always nacking } else { pub.NotifyPublish(confirm) } log.Info("Event publisher started...") for { var body Message select { case confirmed := <-confirm: if !confirmed.Ack { log.Error(3, "nack message %d, body: %q", confirmed.DeliveryTag, string(body.Payload)) } reading = messages case body = <-pending: err := pub.Publish(exchange, body.RoutingKey, false, false, amqp.Publishing{ Body: body.Payload, }) // Retry failed delivery on the next session if err != nil { pending <- body pub.Close() break } case body, running = <-reading: // all messages consumed if !running { return } // work on pending delivery until ack'd pending <- body reading = nil } } } }
func (s *Scheduler) Refresh(checks []*m.CheckWithSlug) { log.Info("refreshing checks, there are %d", len(checks)) seenChecks := make(map[int64]struct{}) s.Lock() for _, c := range checks { if !c.Enabled { continue } seenChecks[c.Id] = struct{}{} if existing, ok := s.Checks[c.Id]; ok { log.Debug("checkId=%d already running", c.Id) if c.Updated.After(existing.Check.Updated) { log.Info("syncing update to checkId=%d", c.Id) err := existing.Update(c, s.Healthy) if err != nil { log.Error(3, "Unable to update check instance for checkId=%d", c.Id, err) existing.Stop() delete(s.Checks, c.Id) } } } else { log.Debug("new check definition found for checkId=%d.", c.Id) instance, err := NewCheckInstance(c, s.Healthy) if err != nil { log.Error(3, "Unabled to create new check instance for checkId=%d.", c.Id, err) } else { s.Checks[c.Id] = instance } } } for id, instance := range s.Checks { if _, ok := seenChecks[id]; !ok { log.Info("checkId=%d no longer scheduled to this probe, removing it.", id) instance.Stop() delete(s.Checks, id) } } s.Unlock() log.Debug("refresh complete") return }
func HandleTaskList() interface{} { return func(data []byte) { tasks := make([]*model.TaskDTO, 0) err := json.Unmarshal(data, &tasks) if err != nil { log.Error(3, "failed to decode taskUpdate payload. %s", err) return } log.Debug("TaskList. %s", data) GlobalTaskCache.UpdateTasks(tasks) } }
func GetAgentMetrics(ctx *Context) { id := ctx.ParamsInt64(":id") owner := ctx.OrgId agent, err := sqlstore.GetAgentById(id, owner) if err != nil { log.Error(3, err.Error()) ctx.JSON(200, rbody.ErrResp(500, err)) return } if agent == nil { ctx.JSON(200, rbody.ErrResp(404, fmt.Errorf("agent not found"))) return } metrics, err := sqlstore.GetAgentMetrics(agent) if err != nil { log.Error(3, err.Error()) ctx.JSON(200, rbody.ErrResp(500, err)) return } ctx.JSON(200, rbody.OkResp("metrics", metrics)) }
func (s *Scheduler) Update(check *m.CheckWithSlug) { log.Info("updating %s check for %s", check.Type, check.Slug) s.Lock() if existing, ok := s.Checks[check.Id]; !ok { log.Warn("recieved update event for check that is not currently running. checkId=%d", check.Id) instance, err := NewCheckInstance(check, s.Healthy) if err != nil { log.Error(3, "Unabled to create new check instance for checkId=%d. %s", check.Id, err) } else { s.Checks[check.Id] = instance } } else { err := existing.Update(check, s.Healthy) if err != nil { log.Error(3, "Unable to update check instance for checkId=%d, %s", check.Id, err) existing.Stop() delete(s.Checks, check.Id) } } s.Unlock() return }
// subscribe consumes deliveries from an exclusive queue from a fanout exchange and sends to the application specific messages chan. func subscribe(sessions chan chan session, exchange string, messages chan<- Message) { for session := range sessions { log.Debug("waiting for new session to be established.") sub := <-session log.Debug("declaring new ephemeral Queue %v", sub) q, err := sub.QueueDeclare("", false, true, true, false, nil) if err != nil { log.Error(3, "cannot consume from exclusive: %v", err) sub.Close() continue } log.Debug("binding queue %s to routingKey #", q.Name) routingKey := "#" if err := sub.QueueBind(q.Name, routingKey, exchange, false, nil); err != nil { log.Error(3, "cannot consume without a binding to exchange: %q, %v", exchange, err) sub.Close() continue } deliveries, err := sub.Consume(q.Name, "", false, true, false, false, nil) if err != nil { log.Error(3, "cannot consume from queue: %q, %v", q.Name, err) sub.Close() continue } log.Info("subscribed to rabbitmq %s exchange...", exchange) for msg := range deliveries { log.Debug("new message received from rabbitmq") messages <- Message{RoutingKey: msg.RoutingKey, Payload: msg.Body} sub.Ack(msg.DeliveryTag, false) } } }
func (t *TaskCache) Sync() { tasksByName := make(map[string]*model.TaskDTO) t.Lock() for _, task := range t.Tasks { name := fmt.Sprintf("raintank-apps:%d", task.Id) tasksByName[name] = task log.Debug("seen %s", name) err := t.addTask(task) if err != nil { log.Error(3, err.Error()) } } for name := range t.SnapTasks { if _, ok := tasksByName[name]; !ok { log.Info("%s not in taskList. removing from snap.") if err := t.removeSnapTask(name); err != nil { log.Error(3, "failed to remove snapTask. %s", name) } } } t.Unlock() }
func (a *AgentSession) HandleCatalog() interface{} { return func(body []byte) { catalog := make([]*rbody.Metric, 0) if err := json.Unmarshal(body, &catalog); err != nil { log.Error(3, err.Error()) return } log.Debug("Received catalog for session %s: %s", a.SocketSession.Id, body) metrics := make([]*model.Metric, len(catalog)) for i, m := range catalog { metrics[i] = &model.Metric{ OrgId: a.Agent.OrgId, Public: a.Agent.Public, Namespace: m.Namespace, Version: int64(m.Version), Policy: m.Policy, } } err := sqlstore.AddMissingMetricsForAgent(a.Agent, metrics) if err != nil { log.Error(3, "failed to update metrics in DB. %s", err) } } }
func (a *AgentSession) sendHeartbeat() { ticker := time.NewTicker(time.Second * 2) for { select { case <-a.Shutdown: log.Debug("session ended stopping heartbeat.") return case t := <-ticker.C: e := &message.Event{Event: "heartbeat", Payload: []byte(t.String())} err := a.SocketSession.Emit(e) if err != nil { log.Error(3, "failed to emit heartbeat event. %s", err) } } } }
func AddAgent(ctx *Context, agent model.AgentDTO) { if !agent.ValidName() { ctx.JSON(400, "invalde agent Name. must match /^[0-9a-Z_-]+$/") return } agent.Id = 0 //need to add suport for middelware context with AUTH/ agent.OrgId = ctx.OrgId err := sqlstore.AddAgent(&agent) if err != nil { log.Error(3, err.Error()) ctx.JSON(200, rbody.ErrResp(500, err)) return } ctx.JSON(200, rbody.OkResp("agent", agent)) }
func GetAgentById(ctx *Context) { id := ctx.ParamsInt64(":id") owner := ctx.OrgId agent, err := sqlstore.GetAgentById(id, owner) if err == model.AgentNotFound { ctx.JSON(200, rbody.ErrResp(404, fmt.Errorf("agent not found"))) return } if err != nil { log.Error(3, err.Error()) ctx.JSON(200, rbody.ErrResp(500, err)) return } ctx.JSON(200, rbody.OkResp("agent", agent)) }
func (s *Scheduler) Create(check *m.CheckWithSlug) { log.Info("creating %s check for %s", check.Type, check.Slug) s.Lock() if existing, ok := s.Checks[check.Id]; ok { log.Warn("recieved create event for check that is already running. checkId=%d", check.Id) existing.Stop() delete(s.Checks, check.Id) } instance, err := NewCheckInstance(check, s.Healthy) if err != nil { log.Error(3, "Unabled to create new check instance for checkId=%d.", check.Id, err) } else { s.Checks[check.Id] = instance } s.Unlock() return }
func SendCatalog(sess *session.Session, snapClient *snap.Client, shutdownStart chan struct{}) { ticker := time.NewTicker(time.Minute * 5) for { select { case <-shutdownStart: return case <-ticker.C: emitMetrics(sess, snapClient) case <-snapClient.ConnectChan: log.Debug("connected to SNAP. re-indexing task list") if err := GlobalTaskCache.IndexSnapTasks(); err != nil { log.Error(3, "failed to add task to cache. %s", err) } emitMetrics(sess, snapClient) } } }
func DeleteAgent(ctx *Context) { id := ctx.ParamsInt64(":id") owner := ctx.OrgId err := sqlstore.DeleteAgent(id, owner) if err != nil { if err == model.AgentNotFound { ctx.JSON(200, rbody.ErrResp(404, fmt.Errorf("agent not found"))) return } log.Error(3, err.Error()) ctx.JSON(200, rbody.ErrResp(500, err)) return } ActiveSockets.CloseSocketByAgentId(id) ctx.JSON(200, rbody.OkResp("agent", nil)) }
func UpdateAgent(ctx *Context, agent model.AgentDTO) { if !agent.ValidName() { ctx.JSON(200, rbody.ErrResp(400, fmt.Errorf("invalid agent Name. must match /^[0-9a-Z_-]+$/"))) return } if agent.Id == 0 { ctx.JSON(200, rbody.ErrResp(400, fmt.Errorf("agent ID not set."))) return } //need to add suport for middelware context with AUTH/ agent.OrgId = ctx.OrgId err := sqlstore.UpdateAgent(&agent) if err != nil { log.Error(3, err.Error()) ctx.JSON(200, rbody.ErrResp(500, err)) return } ctx.JSON(200, rbody.OkResp("agent", agent)) }