예제 #1
0
func Publish(metrics []*schema.MetricData) error {
	if globalProducer == nil {
		log.Debug("droping %d metrics as publishing is disbaled", len(metrics))
		return nil
	}
	if len(metrics) == 0 {
		return nil
	}

	subslices := schema.Reslice(metrics, 3500)

	for _, subslice := range subslices {
		id := time.Now().UnixNano()
		data, err := msg.CreateMsg(subslice, id, msg.FormatMetricDataArrayMsgp)
		if err != nil {
			log.Fatal(4, "Fatal error creating metric message: %s", err)
		}
		metricsPublished.Inc(int64(len(subslice)))
		messagesPublished.Inc(1)
		messagesSize.Value(int64(len(data)))
		metricsPerMessage.Value(int64(len(subslice)))
		pre := time.Now()
		err = globalProducer.Publish(topic, data)
		publishDuration.Value(time.Since(pre))
		if err != nil {
			log.Fatal(4, "can't publish to nsqd: %s", err)
		}
		log.Info("published metrics %d size=%d", id, len(data))
	}

	//globalProducer.Stop()
	return nil
}
예제 #2
0
func NewEngine(dbType, dbConnectStr string, enableLog bool) {
	x, err := getEngine(dbType, dbConnectStr)

	if err != nil {
		log.Fatal(3, "Sqlstore: Fail to connect to database: %v", err)
	}
	err = SetEngine(x, enableLog)
	if err != nil {
		log.Fatal(3, "fail to initialize orm engine: %v", err)
	}
}
예제 #3
0
func Init(metrics met.Backend, t string, addr string, enabled bool) {
	if !enabled {
		return
	}
	topic = t
	cfg := nsq.NewConfig()
	cfg.UserAgent = fmt.Sprintf("raintank-apps-server")
	var err error
	globalProducer, err = nsq.NewProducer(addr, cfg)
	if err != nil {
		log.Fatal(4, "failed to initialize nsq producer for events. %s", err)
	}
	err = globalProducer.Ping()
	if err != nil {
		log.Fatal(4, "can't connect to nsqd: %s", err)
	}
	eventsPublished = metrics.NewCount("eventpublisher.events-published")
	messagesSize = metrics.NewMeter("eventpublisher.message_size", 0)
	publishDuration = metrics.NewTimer("eventpublisher.publish_duration", 0)
}
예제 #4
0
func Publish(event *schema.ProbeEvent) error {
	if globalProducer == nil {
		log.Debug("droping event as publishing is disbaled")
		return nil
	}

	id := time.Now().UnixNano()
	data, err := msg.CreateProbeEventMsg(event, id, msg.FormatProbeEventMsgp)
	if err != nil {
		log.Fatal(4, "Fatal error creating event message: %s", err)
	}
	eventsPublished.Inc(1)
	messagesSize.Value(int64(len(data)))
	pre := time.Now()
	err = globalProducer.Publish(topic, data)
	publishDuration.Value(time.Since(pre))
	if err != nil {
		log.Fatal(4, "can't publish to nsqd: %s", err)
	}
	log.Debug("published event %d", id)

	return nil
}
예제 #5
0
func main() {
	flag.Parse()
	// Set 'cfile' here if *confFile exists, because we should only try and
	// parse the conf file if it exists. If we try and parse the default
	// conf file location when it's not there, we (unsurprisingly) get a
	// panic.
	var cfile string
	if _, err := os.Stat(*confFile); err == nil {
		cfile = *confFile
	}

	// Still parse globalconf, though, even if the config file doesn't exist
	// because we want to be able to use environment variables.
	conf, err := globalconf.NewWithOptions(&globalconf.Options{
		Filename:  cfile,
		EnvPrefix: "RTPROBE_",
	})
	if err != nil {
		panic(fmt.Sprintf("error with configuration file: %s", err))
	}
	conf.ParseAll()

	log.NewLogger(0, "console", fmt.Sprintf(`{"level": %d, "formatting":true}`, *logLevel))
	// workaround for https://github.com/grafana/grafana/issues/4055
	switch *logLevel {
	case 0:
		log.Level(log.TRACE)
	case 1:
		log.Level(log.DEBUG)
	case 2:
		log.Level(log.INFO)
	case 3:
		log.Level(log.WARN)
	case 4:
		log.Level(log.ERROR)
	case 5:
		log.Level(log.CRITICAL)
	case 6:
		log.Level(log.FATAL)
	}

	if *showVersion {
		fmt.Printf("raintank-probe (built with %s, git hash %s)\n", runtime.Version(), GitHash)
		return
	}

	if *nodeName == "" {
		log.Fatal(4, "name must be set.")
	}

	file, err := ioutil.ReadFile(*publicChecksFile)
	if err != nil {
		log.Error(3, "Could not read publicChecks file. %s", err.Error())
	} else {
		err = json.Unmarshal(file, &PublicChecks)
		if err != nil {
			log.Error(3, "Could not parse publicChecks file. %s", err.Error())
		}
	}

	jobScheduler := scheduler.New(*healthHosts)
	go jobScheduler.CheckHealth()

	interrupt := make(chan os.Signal, 1)
	signal.Notify(interrupt, os.Interrupt)

	controllerUrl, err := url.Parse(*serverAddr)
	if err != nil {
		log.Fatal(4, err.Error())
	}
	controllerUrl.Path = path.Clean(controllerUrl.Path + "/socket.io")
	version := strings.Split(GitHash, "-")[0]
	controllerUrl.RawQuery = fmt.Sprintf("EIO=3&transport=websocket&apiKey=%s&name=%s&version=%s", *apiKey, url.QueryEscape(*nodeName), version)

	if controllerUrl.Scheme != "ws" && controllerUrl.Scheme != "wss" {
		log.Fatal(4, "invalid server address.  scheme must be ws or wss. was %s", controllerUrl.Scheme)
	}

	tsdbUrl, err := url.Parse(*tsdbAddr)
	if err != nil {
		log.Fatal(4, "Invalid TSDB url.", err)
	}
	if !strings.HasPrefix(tsdbUrl.Path, "/") {
		tsdbUrl.Path += "/"
	}
	publisher.Init(tsdbUrl, *apiKey, *concurrency)

	client, err := gosocketio.Dial(controllerUrl.String(), transport.GetDefaultWebsocketTransport())
	if err != nil {
		log.Fatal(4, "unable to connect to server on url %s: %s", controllerUrl.String(), err)
	}
	bindHandlers(client, controllerUrl, jobScheduler, interrupt)

	//wait for interupt Signal.
	<-interrupt
	log.Info("interrupt")
	jobScheduler.Close()
	client.Close()
	return
}
예제 #6
0
// Ping scheduler.HealthHosts to determin if this probe is healthy and should
// execute checks.  If all of the HealthHosts are experiencing issues, then
// there is likely something wrong with this probe so it should stop executing
// checks until things recover.
//
func (s *Scheduler) CheckHealth() {
	chks := make([]*checks.RaintankProbePing, len(s.HealthHosts))
	for i, host := range s.HealthHosts {
		settings := make(map[string]interface{})
		settings["timeout"] = 1.0
		settings["hostname"] = host
		chk, err := checks.NewRaintankPingProbe(settings)
		if err != nil {
			log.Fatal(4, "unable to create health check. %s", err)
		}
		chks[i] = chk
	}

	lastState := 1

	ticker := time.NewTicker(time.Second)
	var wg sync.WaitGroup
	for range ticker.C {
		resultsCh := make(chan int, len(chks))
		for i := range chks {
			check := chks[i]
			wg.Add(1)
			go func(ch chan int, chk *checks.RaintankProbePing) {
				defer wg.Done()
				results, err := chk.Run()
				if err != nil {
					ch <- 3
					return
				}
				if results.ErrorMsg() != "" {
					log.Warn("Health check to %s failed. %s", chk.Hostname, results.ErrorMsg())
					ch <- 1
					return
				}
				ch <- 0
			}(resultsCh, check)
		}
		wg.Wait()
		close(resultsCh)
		score := 0
		for r := range resultsCh {
			if r == 3 {
				// fatal error, trying to run the check.
				score = len(chks)
			} else {
				score += r
			}
		}
		newState := 0
		// if more the 50% of healthHosts are down, then we consider ourselves down.
		if float64(score) > float64(len(chks)/2.0) {
			newState = 1
		}

		if newState != lastState {
			if newState == 1 {
				// we are now unhealthy.
				s.Lock()
				log.Warn("This probe is in an unhealthy state. Stopping execution of checks.")
				s.Healthy = false
				for _, instance := range s.Checks {
					instance.Stop()
				}
				s.Unlock()
			} else {
				//we are now healthy.
				s.Lock()
				log.Warn("This probe is now healthy again. Resuming execution of checks.")
				s.Healthy = true
				for _, instance := range s.Checks {
					log.Debug("starting %s check for %s", instance.Check.Type, instance.Check.Slug)
					go instance.Run()
				}
				s.Unlock()
			}
			lastState = newState
		}

	}
}
예제 #7
0
func main() {
	flag.Parse()
	// Set 'cfile' here if *confFile exists, because we should only try and
	// parse the conf file if it exists. If we try and parse the default
	// conf file location when it's not there, we (unsurprisingly) get a
	// panic.
	var cfile string
	if _, err := os.Stat(*confFile); err == nil {
		cfile = *confFile
	}

	// Still parse globalconf, though, even if the config file doesn't exist
	// because we want to be able to use environment variables.
	conf, err := globalconf.NewWithOptions(&globalconf.Options{
		Filename:  cfile,
		EnvPrefix: "TASKAGENT_",
	})
	if err != nil {
		panic(fmt.Sprintf("error with configuration file: %s", err))
	}
	conf.ParseAll()

	log.NewLogger(0, "console", fmt.Sprintf(`{"level": %d, "formatting":true}`, *logLevel))
	// workaround for https://github.com/grafana/grafana/issues/4055
	switch *logLevel {
	case 0:
		log.Level(log.TRACE)
	case 1:
		log.Level(log.DEBUG)
	case 2:
		log.Level(log.INFO)
	case 3:
		log.Level(log.WARN)
	case 4:
		log.Level(log.ERROR)
	case 5:
		log.Level(log.CRITICAL)
	case 6:
		log.Level(log.FATAL)
	}

	if *showVersion {
		fmt.Printf("task-agent (built with %s, git hash %s)\n", runtime.Version(), GitHash)
		return
	}

	if *nodeName == "" {
		log.Fatal(4, "name must be set.")
	}

	snapUrl, err := url.Parse(*snapUrlStr)
	if err != nil {
		log.Fatal(4, "could not parse snapUrl. %s", err)
	}
	snapClient, err := snap.NewClient(*nodeName, *tsdbAddr, *apiKey, snapUrl)
	if err != nil {
		log.Fatal(4, err.Error())
	}

	InitTaskCache(snapClient)

	interrupt := make(chan os.Signal, 1)
	signal.Notify(interrupt, os.Interrupt)
	shutdownStart := make(chan struct{})

	controllerUrl, err := url.Parse(*serverAddr)
	if err != nil {
		log.Fatal(4, err.Error())
	}
	controllerUrl.Path = path.Clean(controllerUrl.Path + fmt.Sprintf("/socket/%s/%d", *nodeName, Version))

	if controllerUrl.Scheme != "ws" && controllerUrl.Scheme != "wss" {
		log.Fatal(4, "invalid server address.  scheme must be ws or wss. was %s", controllerUrl.Scheme)
	}

	conn, err := connect(controllerUrl)
	if err != nil {
		log.Fatal(4, "unable to connect to server on url %s: %s", controllerUrl.String(), err)
	}

	//create new session, allow 1000 events to be queued in the writeQueue before Emit() blocks.
	sess := session.NewSession(conn, 1000)
	sess.On("disconnect", func() {
		// on disconnect, reconnect.
		ticker := time.NewTicker(time.Second)
		connected := false
		for !connected {
			select {
			case <-shutdownStart:
				ticker.Stop()
				return
			case <-ticker.C:
				conn, err := connect(controllerUrl)
				if err == nil {
					sess.Conn = conn
					connected = true
					go sess.Start()
				}
			}
		}
		ticker.Stop()
	})

	sess.On("heartbeat", func(body []byte) {
		log.Debug("recieved heartbeat event. %s", body)
	})

	sess.On("taskList", HandleTaskList())
	sess.On("taskUpdate", HandleTaskUpdate())
	sess.On("taskAdd", HandleTaskAdd())
	sess.On("taskRemove", HandleTaskRemove())

	go sess.Start()

	//periodically send an Updated Catalog.
	go SendCatalog(sess, snapClient, shutdownStart)

	// connect to the snap server and monitor that it is up.
	go snapClient.Run()

	//wait for interupt Signal.
	<-interrupt
	log.Info("interrupt")
	close(shutdownStart)
	sess.Close()
	return
}