Beispiel #1
0
func (t *Tsdb) Flush() {
	t.Lock()
	if len(t.Metrics) == 0 {
		t.Unlock()
		return
	}
	metrics := make([]*schema.MetricData, len(t.Metrics))
	copy(metrics, t.Metrics)
	t.Metrics = t.Metrics[:0]
	t.Unlock()

	// Write the metrics to our HTTP server.
	log.Debug("writing %d metrics to API", len(metrics))
	batches := schema.Reslice(metrics, maxMetricsPerFlush*2)
	for _, batch := range batches {
		id := time.Now().UnixNano()
		body, err := msg.CreateMsg(batch, id, msg.FormatMetricDataArrayMsgp)
		if err != nil {
			log.Error(3, "unable to convert metrics to MetricDataArrayMsgp.", "error", err)
			return
		}
		t.dataChan <- tsdbData{Path: "metrics", Body: body}
		log.Debug("%d metrics queud for delivery", len(batch))
	}
}
Beispiel #2
0
func socket(ctx *Context) {
	agentName := ctx.Params(":agent")
	agentVer := ctx.ParamsInt64(":ver")
	//TODO: add auth
	owner := ctx.OrgId
	agent, err := connectedAgent(agentName, owner)
	if err != nil {
		log.Debug("agent cant connect. %s", err)
		ctx.JSON(400, err.Error())
		return
	}

	c, err := upgrader.Upgrade(ctx.Resp, ctx.Req.Request, nil)
	if err != nil {
		log.Error(3, "upgrade:", err)
		return
	}

	log.Debug("agent %s connected.", agent.Name)

	sess := agent_session.NewSession(agent, agentVer, c)
	ActiveSockets.NewSocket(sess)
	sess.Start()
	//block until connection closes.
	<-sess.Done
	ActiveSockets.DeleteSocket(sess)
}
Beispiel #3
0
func (t *Tsdb) Run() {
	for i := 0; i < t.Concurrency; i++ {
		go t.sendData()
	}

	ticker := time.NewTicker(time.Second)
	last := time.Now()
	for {
		select {
		case <-ticker.C:
			if time.Since(last) >= time.Second {
				log.Debug("no flushes in last 1second. Flushing now.")
				last = time.Now()
				t.Flush()
				log.Debug("flush took %f seconds", time.Since(last).Seconds())
			}
		case <-t.flushMetrics:
			log.Debug("flush trigger received.")
			last = time.Now()
			t.Flush()
			log.Debug("flush took %f seconds", time.Since(last).Seconds())
		case <-t.flushEvents:
			t.SendEvents()
		case <-t.closeChan:
			close(t.dataChan)
			return
		}
	}
}
Beispiel #4
0
func (s *socketList) EmitTask(task *model.TaskDTO, event string) error {
	log.Debug("sending %s task event to connected agents.", event)
	agents, err := sqlstore.GetAgentsForTask(task)
	log.Debug("Task has %d agents. %v", len(agents), agents)
	if err != nil {
		return err
	}
	body, err := json.Marshal(task)
	if err != nil {
		return err
	}
	e := &message.Event{
		Event:   event,
		Payload: body,
	}
	sent := false
	s.Lock()

	for _, id := range agents {
		if as, ok := s.Sockets[id]; ok {
			log.Debug("sending %s event to agent %d", event, id)
			as.SocketSession.Emit(e)
			sent = true
		} else {
			log.Debug("agent %d is not connected to this server.", id)
		}
	}
	s.Unlock()
	if !sent {
		log.Debug("no connected agents for task %d.", task.Id)
	}
	return nil
}
func (t *TaskCache) addTask(task *model.TaskDTO) error {
	t.Tasks[task.Id] = task
	if !t.initialized {
		return nil
	}
	snapTaskName := fmt.Sprintf("raintank-apps:%d", task.Id)
	snapTask, ok := t.SnapTasks[snapTaskName]
	if !ok {
		log.Debug("New task recieved %s", snapTaskName)
		snapTask, err := t.c.CreateSnapTask(task, snapTaskName)
		if err != nil {
			return err
		}
		t.SnapTasks[snapTaskName] = snapTask
	} else {
		log.Debug("task %s already in the cache.", snapTaskName)
		if task.Updated.After(time.Unix(snapTask.CreationTimestamp, 0)) {
			log.Debug("%s needs to be updated", snapTaskName)
			// need to update task.
			if err := t.c.RemoveSnapTask(snapTask); err != nil {
				return err
			}
			snapTask, err := t.c.CreateSnapTask(task, snapTaskName)
			if err != nil {
				return err
			}
			t.SnapTasks[snapTaskName] = snapTask
		}
	}

	return nil
}
func (a *AgentSession) Start() error {
	if err := a.saveDbSession(); err != nil {
		log.Error(3, "unable to add agentSession to DB. %s", err.Error())
		a.close()
		return err
	}

	log.Debug("setting handler for disconnect event.")
	if err := a.SocketSession.On("disconnect", a.OnDisconnect()); err != nil {
		log.Error(3, "failed to bind disconnect event. %s", err.Error())
		a.close()
		return err
	}

	log.Debug("setting handler for catalog event.")
	if err := a.SocketSession.On("catalog", a.HandleCatalog()); err != nil {
		log.Error(3, "failed to bind catalog event handler. %s", err.Error())
		a.close()
		return err
	}

	log.Info("starting session %s", a.SocketSession.Id)
	go a.SocketSession.Start()

	// run background tasks for this session.
	go a.sendHeartbeat()
	go a.sendTaskListPeriodically()
	a.sendTaskList()
	return nil
}
Beispiel #7
0
// redial continually connects to the URL, exiting the program when no longer possible
func redial(ctx context.Context, url, exchange string) chan chan session {
	sessions := make(chan chan session)

	go func() {
		sess := make(chan session)
		defer close(sessions)

		for {
			select {
			case sessions <- sess:
			case <-ctx.Done():
				log.Info("shutting down session factory")
				return
			}

			connected := false
			var conn *amqp.Connection
			var ch *amqp.Channel
			var err error
			for !connected {
				log.Debug("dialing amqp url: %s", url)
				conn, err = amqp.Dial(url)
				if err != nil {
					log.Error(3, "cannot (re)dial: %v: %q", err, url)
					time.Sleep(time.Second)
					continue
				}
				log.Debug("connected to %s", url)

				log.Debug("creating new channel on AMQP connection.")
				ch, err = conn.Channel()
				if err != nil {
					log.Error(3, "cannot create channel: %v", err)
					conn.Close()
					time.Sleep(time.Second)
					continue
				}
				log.Debug("Ensuring that %s topic exchange exists on AMQP server.", exchange)
				if err := ch.ExchangeDeclare(exchange, "topic", true, false, false, false, nil); err != nil {
					log.Error(3, "cannot declare topic exchange: %v", err)
					conn.Close()
					time.Sleep(time.Second)
				}
				log.Debug("Successfully connected to RabbitMQ.")
				connected = true
			}

			select {
			case sess <- session{conn, ch}:
			case <-ctx.Done():
				log.Info("shutting down new session")
				return
			}
		}
	}()

	return sessions
}
func (a *AgentSession) cleanup() {
	//remove agentSession from DB.
	if a.dbSession != nil {
		log.Debug("deleting agent_session for %s from DB", a.Agent.Name)
		sqlstore.DeleteAgentSession(a.dbSession)
	} else {
		log.Debug("agent_session for %s has no db session.", a.Agent.Name)
	}
}
Beispiel #9
0
func (s *socketList) NewSocket(a *agent_session.AgentSession) {
	s.Lock()
	existing, ok := s.Sockets[a.Agent.Id]
	if ok {
		log.Debug("new connection for agent %d - %s, closing existing session", a.Agent.Id, a.Agent.Name)
		existing.Close()
	}
	log.Debug("Agent %d is connected to this server.", a.Agent.Id)
	s.Sockets[a.Agent.Id] = a
	s.Unlock()
}
func (a *AgentSession) close() {
	if !a.closing {
		a.closing = true
		close(a.Shutdown)
		log.Debug("closing websocket")
		a.SocketSession.Close()
		log.Debug("websocket closed")

		a.cleanup()
		close(a.Done)
	}
}
Beispiel #11
0
func Publish(metrics []*schema.MetricData) error {
	if globalProducer == nil {
		log.Debug("droping %d metrics as publishing is disbaled", len(metrics))
		return nil
	}
	if len(metrics) == 0 {
		return nil
	}

	subslices := schema.Reslice(metrics, 3500)

	for _, subslice := range subslices {
		id := time.Now().UnixNano()
		data, err := msg.CreateMsg(subslice, id, msg.FormatMetricDataArrayMsgp)
		if err != nil {
			log.Fatal(4, "Fatal error creating metric message: %s", err)
		}
		metricsPublished.Inc(int64(len(subslice)))
		messagesPublished.Inc(1)
		messagesSize.Value(int64(len(data)))
		metricsPerMessage.Value(int64(len(subslice)))
		pre := time.Now()
		err = globalProducer.Publish(topic, data)
		publishDuration.Value(time.Since(pre))
		if err != nil {
			log.Fatal(4, "can't publish to nsqd: %s", err)
		}
		log.Info("published metrics %d size=%d", id, len(data))
	}

	//globalProducer.Stop()
	return nil
}
Beispiel #12
0
func (t *Tsdb) Add(metrics []*schema.MetricData) {
	log.Debug("received %d new metrics", len(metrics))
	t.Lock()
	t.Metrics = append(t.Metrics, metrics...)
	numMetrics := len(t.Metrics)
	t.Unlock()
	if numMetrics > maxMetricsPerFlush {
		//non-blocking send on the channel. If there is already
		// an item in the channel we dont need to add another.
		select {
		default:
			log.Debug("flushMetrics channel blocked.")
		case t.flushMetrics <- struct{}{}:
		}
	}
}
Beispiel #13
0
func (s *socketList) CloseSocketByAgentId(id int64) {
	s.Lock()
	existing, ok := s.Sockets[id]
	if ok {
		existing.Close()
		log.Debug("removing session for Agent %d from socketList.", id)
		delete(s.Sockets, id)
	}
	s.Unlock()
}
Beispiel #14
0
func (s *socketList) CloseSocket(a *agent_session.AgentSession) {
	s.Lock()
	existing, ok := s.Sockets[a.Agent.Id]
	if ok {
		existing.Close()
		log.Debug("removing session for Agent %d from socketList.", a.Agent.Id)
		delete(s.Sockets, a.Agent.Id)
	}
	s.Unlock()
}
Beispiel #15
0
// publish publishes messages to a reconnecting session to a topic exchange.
// It receives from the application specific source of messages.
func publish(sessions chan chan session, exchange string, messages <-chan Message) {
	var (
		running bool
		reading = messages
		pending = make(chan Message, 1)
		confirm = make(chan amqp.Confirmation, 1)
	)

	for session := range sessions {
		log.Debug("waiting for new session to be established.")
		pub := <-session

		// publisher confirms for this channel/connection
		if err := pub.Confirm(false); err != nil {
			log.Info("publisher confirms not supported")
			close(confirm) // confirms not supported, simulate by always nacking
		} else {
			pub.NotifyPublish(confirm)
		}

		log.Info("Event publisher started...")

		for {
			var body Message
			select {
			case confirmed := <-confirm:
				if !confirmed.Ack {
					log.Error(3, "nack message %d, body: %q", confirmed.DeliveryTag, string(body.Payload))
				}
				reading = messages

			case body = <-pending:
				err := pub.Publish(exchange, body.RoutingKey, false, false, amqp.Publishing{
					Body: body.Payload,
				})
				// Retry failed delivery on the next session
				if err != nil {
					pending <- body
					pub.Close()
					break
				}

			case body, running = <-reading:
				// all messages consumed
				if !running {
					return
				}
				// work on pending delivery until ack'd
				pending <- body
				reading = nil
			}
		}
	}
}
Beispiel #16
0
func (s *Scheduler) Refresh(checks []*m.CheckWithSlug) {
	log.Info("refreshing checks, there are %d", len(checks))
	seenChecks := make(map[int64]struct{})
	s.Lock()
	for _, c := range checks {
		if !c.Enabled {
			continue
		}
		seenChecks[c.Id] = struct{}{}
		if existing, ok := s.Checks[c.Id]; ok {
			log.Debug("checkId=%d already running", c.Id)
			if c.Updated.After(existing.Check.Updated) {
				log.Info("syncing update to checkId=%d", c.Id)
				err := existing.Update(c, s.Healthy)
				if err != nil {
					log.Error(3, "Unable to update check instance for checkId=%d", c.Id, err)
					existing.Stop()
					delete(s.Checks, c.Id)
				}
			}
		} else {
			log.Debug("new check definition found for checkId=%d.", c.Id)
			instance, err := NewCheckInstance(c, s.Healthy)
			if err != nil {
				log.Error(3, "Unabled to create new check instance for checkId=%d.", c.Id, err)
			} else {
				s.Checks[c.Id] = instance
			}
		}
	}
	for id, instance := range s.Checks {
		if _, ok := seenChecks[id]; !ok {
			log.Info("checkId=%d no longer scheduled to this probe, removing it.", id)
			instance.Stop()
			delete(s.Checks, id)
		}
	}
	s.Unlock()
	log.Debug("refresh complete")
	return
}
func HandleTaskList() interface{} {
	return func(data []byte) {
		tasks := make([]*model.TaskDTO, 0)
		err := json.Unmarshal(data, &tasks)
		if err != nil {
			log.Error(3, "failed to decode taskUpdate payload. %s", err)
			return
		}
		log.Debug("TaskList. %s", data)
		GlobalTaskCache.UpdateTasks(tasks)
	}
}
func (t *TaskCache) RemoveTask(task *model.TaskDTO) error {
	t.Lock()
	defer t.Unlock()
	snapTaskName := fmt.Sprintf("raintank-apps:%d", task.Id)
	log.Debug("removing snap task %s", snapTaskName)
	if err := t.removeSnapTask(snapTaskName); err != nil {
		return err
	}

	delete(t.Tasks, task.Id)
	return nil
}
Beispiel #19
0
func (t *Tsdb) AddEvent(event *schema.ProbeEvent) {
	t.Lock()
	t.Events = append(t.Events, event)
	t.Unlock()
	//non-blocking send on the channel. If there is already
	// an item in the channel we dont need to add another.
	select {
	default:
		log.Debug("flushEvents channel blocked.")
	case t.flushEvents <- struct{}{}:
	}
}
func (a *AgentSession) sendTaskListPeriodically() {
	ticker := time.NewTicker(time.Second * 60)
	for {
		select {
		case <-a.Shutdown:
			log.Debug("session ended stopping taskListPeriodically.")
			return
		case <-ticker.C:
			a.sendTaskList()
		}
	}
}
func (t *TaskCache) removeSnapTask(taskName string) error {
	snapTask, ok := t.SnapTasks[taskName]
	if !ok {
		log.Debug("task to remove not in cache. %s", taskName)
	} else {
		if err := t.c.RemoveSnapTask(snapTask); err != nil {
			return err
		}
		delete(t.SnapTasks, taskName)
	}

	return nil
}
func HandleTaskUpdate() interface{} {
	return func(data []byte) {
		task := model.TaskDTO{}
		err := json.Unmarshal(data, &task)
		if err != nil {
			log.Error(3, "failed to decode taskUpdate payload. %s", err)
			return
		}
		log.Debug("TaskUpdate. %s", data)
		if err := GlobalTaskCache.AddTask(&task); err != nil {
			log.Error(3, "failed to add task to cache. %s", err)
		}
	}
}
func HandleTaskRemove() interface{} {
	return func(data []byte) {
		task := model.TaskDTO{}
		err := json.Unmarshal(data, &task)
		if err != nil {
			log.Error(3, "failed to decode taskAdd payload. %s", err)
			return
		}
		log.Debug("Removing Task. %s", data)
		if err := GlobalTaskCache.RemoveTask(&task); err != nil {
			log.Error(3, "failed to remove task from cache. %s", err)
		}
	}
}
Beispiel #24
0
func Publish(event *schema.ProbeEvent) error {
	if globalProducer == nil {
		log.Debug("droping event as publishing is disbaled")
		return nil
	}

	id := time.Now().UnixNano()
	data, err := msg.CreateProbeEventMsg(event, id, msg.FormatProbeEventMsgp)
	if err != nil {
		log.Fatal(4, "Fatal error creating event message: %s", err)
	}
	eventsPublished.Inc(1)
	messagesSize.Value(int64(len(data)))
	pre := time.Now()
	err = globalProducer.Publish(topic, data)
	publishDuration.Value(time.Since(pre))
	if err != nil {
		log.Fatal(4, "can't publish to nsqd: %s", err)
	}
	log.Debug("published event %d", id)

	return nil
}
Beispiel #25
0
// subscribe consumes deliveries from an exclusive queue from a fanout exchange and sends to the application specific messages chan.
func subscribe(sessions chan chan session, exchange string, messages chan<- Message) {
	for session := range sessions {
		log.Debug("waiting for new session to be established.")
		sub := <-session

		log.Debug("declaring new ephemeral Queue %v", sub)
		q, err := sub.QueueDeclare("", false, true, true, false, nil)
		if err != nil {
			log.Error(3, "cannot consume from exclusive: %v", err)
			sub.Close()
			continue
		}

		log.Debug("binding queue %s to routingKey #", q.Name)
		routingKey := "#"
		if err := sub.QueueBind(q.Name, routingKey, exchange, false, nil); err != nil {
			log.Error(3, "cannot consume without a binding to exchange: %q, %v", exchange, err)
			sub.Close()
			continue
		}

		deliveries, err := sub.Consume(q.Name, "", false, true, false, false, nil)
		if err != nil {
			log.Error(3, "cannot consume from queue: %q, %v", q.Name, err)
			sub.Close()
			continue
		}

		log.Info("subscribed to rabbitmq %s exchange...", exchange)

		for msg := range deliveries {
			log.Debug("new message received from rabbitmq")
			messages <- Message{RoutingKey: msg.RoutingKey, Payload: msg.Body}
			sub.Ack(msg.DeliveryTag, false)
		}
	}
}
Beispiel #26
0
func Auth(adminKey, keyString string) (*SignedInUser, error) {
	if keyString == adminKey {
		return &SignedInUser{
			Role:    ROLE_ADMIN,
			OrgId:   1,
			OrgName: "Admin",
			OrgSlug: "admin",
			IsAdmin: true,
			key:     keyString,
		}, nil
	}

	// check the cache
	log.Debug("Checking cache for apiKey")
	user, cached := cache.Get(keyString)
	if user != nil {
		log.Debug("valid key cached")
		return user, nil
	}
	if cached {
		log.Debug("invalid key cached")
		return nil, ErrInvalidApiKey
	}

	//validate the API key against grafana.net
	payload := url.Values{}
	payload.Add("token", keyString)
	res, err := http.PostForm("https://grafana.net/api/api-keys/check", payload)

	if err != nil {
		log.Error(3, "failed to check apiKey. %s", err)
		return nil, err
	}
	body, err := ioutil.ReadAll(res.Body)
	log.Debug("apiKey check response was: %s", body)
	res.Body.Close()
	if res.StatusCode != 200 {
		//add the invalid key to the cache
		log.Debug("Caching invalidKey response for %d seconds", invalidTTL/time.Second)
		cache.Set(keyString, nil, invalidTTL)

		return nil, ErrInvalidApiKey
	}

	user = &SignedInUser{key: keyString}
	err = json.Unmarshal(body, user)
	if err != nil {
		log.Error(3, "failed to parse api-keys/check response. %s", err)
		return nil, err
	}

	// add the user to the cache.
	log.Debug("Caching validKey response for %d seconds", validTTL/time.Second)
	cache.Set(keyString, user, validTTL)
	return user, nil
}
Beispiel #27
0
func (t *Tsdb) sendData() {
	counter := 0
	bytesSent := 0
	last := time.Now()
	ticker := time.NewTicker(time.Second * 10)
	for {
		select {
		case <-ticker.C:
			if counter > 0 {
				log.Info("published %d (%d bytes) payloads in last %f seconds", counter, bytesSent, time.Since(last).Seconds())
				counter = 0
				bytesSent = 0
				last = time.Now()
			}
		case data := <-t.dataChan:
			u := t.Url.String() + data.Path
			body := new(bytes.Buffer)
			snappyBody := snappy.NewWriter(body)
			snappyBody.Write(data.Body)
			snappyBody.Close()
			req, err := http.NewRequest("POST", u, body)
			if err != nil {
				log.Error(3, "failed to create request payload. ", err)
				break
			}
			req.Header.Set("Content-Type", "rt-metric-binary-snappy")
			req.Header.Set("Authorization", "Bearer "+t.ApiKey)
			var reqBytesSent int
			sent := false
			for !sent {
				reqBytesSent = body.Len()
				if err := send(req); err != nil {
					log.Error(3, err.Error())
					time.Sleep(time.Second)
					body.Reset()
					snappyBody := snappy.NewWriter(body)
					snappyBody.Write(data.Body)
					snappyBody.Close()
				} else {
					sent = true
					log.Debug("sent %d bytes", reqBytesSent)
				}
			}
			bytesSent += reqBytesSent
			counter++
		}
	}
}
func (a *AgentSession) sendHeartbeat() {
	ticker := time.NewTicker(time.Second * 2)
	for {
		select {
		case <-a.Shutdown:
			log.Debug("session ended stopping heartbeat.")
			return
		case t := <-ticker.C:
			e := &message.Event{Event: "heartbeat", Payload: []byte(t.String())}
			err := a.SocketSession.Emit(e)
			if err != nil {
				log.Error(3, "failed to emit heartbeat event. %s", err)
			}
		}
	}
}
Beispiel #29
0
func SendCatalog(sess *session.Session, snapClient *snap.Client, shutdownStart chan struct{}) {
	ticker := time.NewTicker(time.Minute * 5)
	for {
		select {
		case <-shutdownStart:
			return
		case <-ticker.C:
			emitMetrics(sess, snapClient)
		case <-snapClient.ConnectChan:
			log.Debug("connected to SNAP. re-indexing task list")
			if err := GlobalTaskCache.IndexSnapTasks(); err != nil {
				log.Error(3, "failed to add task to cache. %s", err)
			}
			emitMetrics(sess, snapClient)
		}
	}
}
func (t *TaskCache) IndexSnapTasks() error {
	log.Debug("running indexSnapTasks")
	tasks, err := t.c.GetSnapTasks()
	if err != nil {
		return err
	}
	t.Lock()
	t.SnapTasks = make(map[string]*rbody.ScheduledTask)
	for _, task := range tasks {
		t.SnapTasks[task.Name] = task
	}
	if !t.initialized {
		t.initialized = true
	}
	t.Unlock()
	t.Sync()
	return nil
}