Пример #1
0
// error is what is used to determine to ACK or NACK
func (kg *KairosGateway) process(job Job) error {
	msg := job.msg
	messagesSize.Value(int64(len(job.Msg.Msg)))
	log.Debug("processing metrics %s %d. timestamp: %s. format: %s. attempts: %d\n", job.qualifier, job.Msg.Id, time.Unix(0, msg.Timestamp), job.Msg.Format, msg.Attempts)

	err := job.Msg.DecodeMetricData()
	if err != nil {
		log.Info("%s: skipping message", err.Error())
		return nil
	}

	metricsPerMessage.Value(int64(len(job.Msg.Metrics)))
	if !kg.dryRun {
		pre := time.Now()
		err = kg.kairos.SendMetricPointers(job.Msg.Metrics)
		if err != nil {
			metricsToKairosFail.Inc(int64(len(job.Msg.Metrics)))
			log.Warn("can't send to kairosdb: %s. retrying later", err)
		} else {
			metricsToKairosOK.Inc(int64(len(job.Msg.Metrics)))
			kairosPutDuration.Value(time.Now().Sub(pre))
		}
	}
	log.Debug("finished metrics %s %d - %d metrics sent\n", job.qualifier, job.Msg.Id, len(job.Msg.Metrics))
	return err
}
Пример #2
0
func HandleRequest(c *middleware.Context, ds *m.DataSource) {
	var req sqlDataRequest
	req.Body, _ = ioutil.ReadAll(c.Req.Request.Body)
	json.Unmarshal(req.Body, &req)

	log.Debug("SQL request: query='%v'", req.Query)

	engine, err := getEngine(ds)
	if err != nil {
		c.JsonApiErr(500, "Unable to open SQL connection", err)
		return
	}
	defer engine.Close()

	session := engine.NewSession()
	defer session.Close()

	db := session.DB()

	result, err := getData(db, &req)
	if err != nil {
		c.JsonApiErr(500, fmt.Sprintf("Data error: %v, Query: %s", err.Error(), req.Query), err)
		return
	}

	c.JSON(200, result)
}
Пример #3
0
func (this *thunderTask) fetch() error {
	this.Avatar.timestamp = time.Now()

	log.Debug("avatar.fetch(fetch new avatar): %s", this.Url)
	req, _ := http.NewRequest("GET", this.Url, nil)
	req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/jpeg,image/png,*/*;q=0.8")
	req.Header.Set("Accept-Encoding", "deflate,sdch")
	req.Header.Set("Accept-Language", "zh-CN,zh;q=0.8")
	req.Header.Set("Cache-Control", "no-cache")
	req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.154 Safari/537.36")
	resp, err := client.Do(req)

	if err != nil {
		this.Avatar.notFound = true
		return fmt.Errorf("gravatar unreachable, %v", err)
	}

	defer resp.Body.Close()

	if resp.StatusCode != 200 {
		this.Avatar.notFound = true
		return fmt.Errorf("status code: %d", resp.StatusCode)
	}

	this.Avatar.data = &bytes.Buffer{}
	writer := bufio.NewWriter(this.Avatar.data)

	if _, err = io.Copy(writer, resp.Body); err != nil {
		return err
	}

	return nil
}
Пример #4
0
func (k *KairosHandler) HandleMessage(m *nsq.Message) error {
	created := time.Unix(0, m.Timestamp)
	if time.Now().Add(-time.Duration(4) * time.Minute).After(created) {
		log.Debug("requeuing msg %s. timestamp: %s. attempts: %d\n ", m.ID, time.Unix(0, m.Timestamp), m.Attempts)
		attempts := 3 // try 3 different hosts before giving up and requeuing
		var err error
		for attempt := 1; attempt <= attempts; attempt++ {
			err = k.trySubmit(m.Body)
			if err == nil {
				msgsToLowPrioOK.Inc(1)
				return nil // we published the msg as lowprio and can mark it as processed
			}
		}
		msgsToLowPrioFail.Inc(1)
		log.Warn("failed to publish out of date message %s as low-prio. reprocessing later\n", m.ID)
		return err
	}
	err := k.gateway.ProcessHighPrio(m)
	if err != nil {
		msgsHandleHighPrioFail.Inc(1)
	} else {
		msgsHandleHighPrioOK.Inc(1)
	}
	return err
}
Пример #5
0
func NewApiPluginProxy(ctx *middleware.Context, proxyPath string, route *plugins.AppPluginRoute, appId string) *httputil.ReverseProxy {
	targetUrl, _ := url.Parse(route.Url)

	director := func(req *http.Request) {

		req.URL.Scheme = targetUrl.Scheme
		req.URL.Host = targetUrl.Host
		req.Host = targetUrl.Host

		req.URL.Path = util.JoinUrlFragments(targetUrl.Path, proxyPath)

		// clear cookie headers
		req.Header.Del("Cookie")
		req.Header.Del("Set-Cookie")

		//Create a HTTP header with the context in it.
		ctxJson, err := json.Marshal(ctx.SignedInUser)
		if err != nil {
			ctx.JsonApiErr(500, "failed to marshal context to json.", err)
			return
		}

		req.Header.Add("Grafana-Context", string(ctxJson))
		// add custom headers defined in the plugin config.
		for _, header := range route.Headers {
			var contentBuf bytes.Buffer
			t, err := template.New("content").Parse(header.Content)
			if err != nil {
				ctx.JsonApiErr(500, fmt.Sprintf("could not parse header content template for header %s.", header.Name), err)
				return
			}

			//lookup appSettings
			query := m.GetAppSettingByAppIdQuery{OrgId: ctx.OrgId, AppId: appId}

			if err := bus.Dispatch(&query); err != nil {
				ctx.JsonApiErr(500, "failed to get AppSettings.", err)
				return
			}
			type templateData struct {
				JsonData       map[string]interface{}
				SecureJsonData map[string]string
			}
			data := templateData{
				JsonData:       query.Result.JsonData,
				SecureJsonData: query.Result.SecureJsonData.Decrypt(),
			}
			err = t.Execute(&contentBuf, data)
			if err != nil {
				ctx.JsonApiErr(500, fmt.Sprintf("failed to execute header content template for header %s.", header.Name), err)
				return
			}
			log.Debug("Adding header to proxy request. %s: %s", header.Name, contentBuf.String())
			req.Header.Add(header.Name, contentBuf.String())
		}
	}

	return &httputil.ReverseProxy{Director: director}
}
Пример #6
0
func (a *AggMetric) Persist(c *Chunk) {
	log.Debug("starting to save %v", c)
	data := c.Series.Bytes()
	chunkSizeAtSave.Value(int64(len(data)))
	err := InsertMetric(a.Key, c.T0, data, *metricTTL)
	if err == nil {
		a.Lock()
		c.Saved = true
		a.Unlock()
		log.Debug("save complete. %v", c)
		chunkSaveOk.Inc(1)
	} else {
		log.Error(1, "failed to save metric to cassandra. %v, %s", c, err)
		chunkSaveFail.Inc(1)
		// TODO
	}
}
Пример #7
0
func (mg *Migrator) Start() error {
	if mg.LogLevel <= log.INFO {
		log.Info("Migrator: Starting DB migration")
	}

	logMap, err := mg.GetMigrationLog()
	if err != nil {
		return err
	}

	for _, m := range mg.migrations {
		_, exists := logMap[m.Id()]
		if exists {
			if mg.LogLevel <= log.DEBUG {
				log.Debug("Migrator: Skipping migration: %v, Already executed", m.Id())
			}
			continue
		}

		sql := m.Sql(mg.dialect)

		record := MigrationLog{
			MigrationId: m.Id(),
			Sql:         sql,
			Timestamp:   time.Now(),
		}

		if mg.LogLevel <= log.DEBUG {
			log.Debug("Migrator: Executing SQL: \n %v \n", sql)
		}

		if err := mg.exec(m); err != nil {
			log.Error(3, "Migrator: error: \n%s:\n%s", err, sql)
			record.Error = err.Error()
			mg.x.Insert(&record)
			return err
		} else {
			record.Success = true
			mg.x.Insert(&record)
		}
	}

	return nil
}
Пример #8
0
func indexMetric(m *schema.MetricDefinition) error {
	log.Debug("indexing %s in redis", m.Id)
	metricStr, err := json.Marshal(m)
	if err != nil {
		return err
	}
	if rerr := rs.SetEx(m.Id, time.Duration(300)*time.Second, string(metricStr)).Err(); err != nil {
		log.Error(3, "redis err. %s", rerr)
	}

	log.Debug("indexing %s in elasticsearch", m.Id)
	err = Indexer.Index("metric", "metric_index", m.Id, "", "", nil, m)
	if err != nil {
		log.Error(3, "failed to send payload to BulkApi indexer. %s", err)
		return err
	}

	return nil
}
Пример #9
0
func authenticate(data *Auth_data, b []byte) error {
	auth_url := data.Server + "/v3/auth/tokens?nocatalog"

	log.Debug("Authentication request to URL: %s", auth_url)

	log.Debug("Authentication request body: \n%s", anonymisePasswordsTokens(data, b))

	request, err := http.NewRequest("POST", auth_url, bytes.NewBuffer(b))
	if err != nil {
		return err
	}

	resp, err := GetHttpClient().Do(request)
	if err != nil {
		return err
	}
	defer resp.Body.Close()

	if resp.StatusCode != 201 {
		return errors.New("Keystone authentication failed: " + resp.Status)
	}

	buf := new(bytes.Buffer)
	buf.ReadFrom(resp.Body)
	strBody := buf.Bytes()
	log.Debug("Authentication response: \n%s", strBody)

	bodyReader := bytes.NewBufferString(fmt.Sprintf("%s", strBody))
	var decoder = json.NewDecoder(bodyReader)

	var auth_response auth_response_struct
	err = decoder.Decode(&auth_response)
	if err != nil {
		return err
	}

	data.Token = resp.Header.Get("X-Subject-Token")
	data.Expiration = auth_response.Token.Expires_at
	data.Roles = auth_response.Token.Roles

	return nil
}
Пример #10
0
func Save(e *schema.ProbeEvent) error {
	if e.Id == "" {
		u := uuid.NewRandom()
		e.Id = u.String()
	}
	if e.Timestamp == 0 {
		// looks like this expects timestamps in milliseconds
		e.Timestamp = time.Now().UnixNano() / int64(time.Millisecond)
	}
	if err := e.Validate(); err != nil {
		return err
	}
	log.Debug("saving event to elasticsearch.")
	resp, err := es.Index("events", e.EventType, e.Id, nil, e)
	log.Debug("elasticsearch response: %v", resp)
	if err != nil {
		return err
	}

	return nil
}
Пример #11
0
// don't ever call with a ts of 0, cause we use 0 to mean not initialized!
func (a *AggMetric) Add(ts uint32, val float64) {
	a.Lock()
	defer a.Unlock()

	t0 := ts - (ts % a.ChunkSpan)

	currentChunk := a.getChunk(a.CurrentChunkPos)
	if currentChunk == nil {
		chunkCreate.Inc(1)
		// no data has been added to this metric at all.
		log.Debug("instantiating new circular buffer.")
		a.Chunks = append(a.Chunks, NewChunk(t0))

		if err := a.Chunks[0].Push(ts, val); err != nil {
			panic(fmt.Sprintf("FATAL ERROR: this should never happen. Pushing initial value <%d,%f> to new chunk at pos 0 failed: %q", ts, val, err))
		}

		log.Debug("created new chunk. %s:  %v", a.Key, a.Chunks[0])
	} else if t0 == currentChunk.T0 {
		if currentChunk.Saved {
			//TODO(awoods): allow the chunk to be re-opened.
			log.Error(3, "cant write to chunk that has already been saved. %s T0:%d", a.Key, currentChunk.T0)
			return
		}
		// last prior data was in same chunk as new point
		if err := a.Chunks[a.CurrentChunkPos].Push(ts, val); err != nil {
			log.Error(3, "failed to add metric to chunk for %s. %s", a.Key, err)
			return
		}
	} else if t0 < currentChunk.T0 {
		log.Error(3, "Point at %d has t0 %d, goes back into previous chunk. CurrentChunk t0: %d, LastTs: %d", ts, t0, currentChunk.T0, currentChunk.LastTs)
		return
	} else {
		currentChunk.Finish()
		go a.Persist(currentChunk)

		a.CurrentChunkPos++
		if a.CurrentChunkPos >= int(a.NumChunks) {
			a.CurrentChunkPos = 0
		}

		chunkCreate.Inc(1)
		if len(a.Chunks) < int(a.NumChunks) {
			log.Debug("adding new chunk to cirular Buffer. now %d chunks", a.CurrentChunkPos+1)
			a.Chunks = append(a.Chunks, NewChunk(t0))
		} else {
			chunkClear.Inc(1)
			log.Debug("numChunks: %d  currentPos: %d", len(a.Chunks), a.CurrentChunkPos)
			log.Debug("clearing chunk from circular buffer. %v", a.Chunks[a.CurrentChunkPos])
			a.Chunks[a.CurrentChunkPos] = NewChunk(t0)
		}
		log.Debug("created new chunk. %s: %v", a.Key, a.Chunks[a.CurrentChunkPos])

		if err := a.Chunks[a.CurrentChunkPos].Push(ts, val); err != nil {
			panic(fmt.Sprintf("FATAL ERROR: this should never happen. Pushing initial value <%d,%f> to new chunk at pos %d failed: %q", ts, val, a.CurrentChunkPos, err))
		}
	}
	a.addAggregators(ts, val)
}
Пример #12
0
func GetMetricDefinition(id string) (*schema.MetricDefinition, error) {
	// TODO: fetch from redis before checking elasticsearch
	if v, err := rs.Get(id).Result(); err != nil && err != redis.Nil {
		log.Error(3, "The redis client bombed: %s", err)
		return nil, err
	} else if err == nil {
		//fmt.Printf("json for %s found in redis\n", id)
		def, err := schema.MetricDefinitionFromJSON([]byte(v))
		if err != nil {
			return nil, err
		}
		return def, nil
	}

	log.Debug("%s not in redis. checking elasticsearch.", id)
	res, err := es.Get("metric", "metric_index", id, nil)
	if err != nil {
		if err == elastigo.RecordNotFound {
			log.Debug("%s not in ES. %s", id, err)
		} else {
			log.Error(3, "elasticsearch query failed. %s", err)
		}
		return nil, err
	}
	//fmt.Printf("elasticsearch query returned %q\n", res.Source)
	//fmt.Printf("placing %s into redis\n", id)
	if rerr := rs.SetEx(id, time.Duration(300)*time.Second, string(*res.Source)).Err(); err != nil {
		log.Error(3, "redis err. %s", rerr)
	}

	def, err := schema.MetricDefinitionFromJSON(*res.Source)
	if err != nil {
		return nil, err
	}

	return def, nil
}
Пример #13
0
func inspect(fn GraphiteReturner, job *Job, cache *lru.Cache) {
	key := fmt.Sprintf("%d-%d", job.MonitorId, job.LastPointTs.Unix())
	if found, _ := cache.ContainsOrAdd(key, true); found {
		log.Debug("Job %s already done", job)
		return
	}
	gr, err := fn(job.OrgId)
	if err != nil {
		log.Debug("Job %s: FATAL: %q", job, err)
		return
	}
	evaluator, err := NewGraphiteCheckEvaluator(gr, job.Definition)
	if err != nil {
		log.Debug("Job %s: FATAL: invalid check definition: %q", job, err)
		return
	}

	res, err := evaluator.Eval(job.LastPointTs)
	if err != nil {
		log.Debug("Job %s: FATAL: eval failed: %q", job, err)
		return
	}
	log.Debug("Job %s results: %v", job, res)
}
Пример #14
0
func LoadOrSetOffset() int {
	query := m.GetAlertSchedulerValueQuery{
		Id: "offset",
	}
	err := bus.Dispatch(&query)
	if err != nil {
		panic(fmt.Sprintf("failure querying for current offset: %q", err))
	}
	if query.Result == "" {
		log.Debug("initializing offset to default value of 30 seconds.")
		setOffset(30)
		return 30
	}
	i, err := strconv.Atoi(query.Result)
	if err != nil {
		panic(fmt.Sprintf("failure reading in offset: %q. input value was: %q", err, query.Result))
	}
	return i
}
func (k *ESHandler) HandleMessage(m *nsq.Message) error {
	log.Debug("received message.")
	format := "unknown"
	if m.Body[0] == '\x00' {
		format = "msgFormatJson"
	}
	var id int64
	buf := bytes.NewReader(m.Body[1:9])
	binary.Read(buf, binary.BigEndian, &id)
	produced := time.Unix(0, id)

	msgsAge.Value(time.Now().Sub(produced).Nanoseconds() / 1000)
	messagesSize.Value(int64(len(m.Body)))

	event := new(schema.ProbeEvent)
	if err := json.Unmarshal(m.Body[9:], &event); err != nil {
		log.Error(3, "ERROR: failure to unmarshal message body via format %s: %s. skipping message", format, err)
		return nil
	}
	done := make(chan error, 1)
	go func() {
		pre := time.Now()
		if err := eventdef.Save(event); err != nil {
			log.Error(3, "ERROR: couldn't process %s: %s\n", event.Id, err)
			eventsToEsFail.Inc(1)
			done <- err
			return
		}
		esPutDuration.Value(time.Now().Sub(pre))
		eventsToEsOK.Inc(1)
		done <- nil
	}()

	if err := <-done; err != nil {
		msgsHandleFail.Inc(1)
		return err
	}

	msgsHandleOK.Inc(1)

	return nil
}
Пример #16
0
func (u *S3Uploader) Upload(imageDiskPath string) (string, error) {

	s3util.DefaultConfig.AccessKey = u.accessKey
	s3util.DefaultConfig.SecretKey = u.secretKey

	header := make(http.Header)
	header.Add("x-amz-acl", "public-read")
	header.Add("Content-Type", "image/png")

	var imageUrl *url.URL
	var err error

	if imageUrl, err = url.Parse(u.bucket); err != nil {
		return "", err
	}

	// add image to url
	imageUrl.Path = path.Join(imageUrl.Path, util.GetRandomString(20)+".png")
	imageUrlString := imageUrl.String()
	log.Debug("Uploading image to s3", "url", imageUrlString)

	writer, err := s3util.Create(imageUrlString, header, nil)
	if err != nil {
		return "", err
	}

	defer writer.Close()

	imgData, err := ioutil.ReadFile(imageDiskPath)
	if err != nil {
		return "", err
	}

	_, err = writer.Write(imgData)
	if err != nil {
		return "", err
	}

	return imageUrlString, nil
}
Пример #17
0
func GetProjects(data *Projects_data) error {
	log.Info("Authentication request to URL: %s", data.Server+"/v3/auth/projects")

	request, err := http.NewRequest("GET", data.Server+"/v3/auth/projects", nil)
	if err != nil {
		return err
	}
	request.Header.Add("X-Auth-Token", data.Token)

	resp, err := GetHttpClient().Do(request)
	if err != nil {
		return err
	}
	defer resp.Body.Close()

	if resp.StatusCode != 200 {
		return errors.New("Keystone project-list failed: " + resp.Status)
	}

	buf := new(bytes.Buffer)
	buf.ReadFrom(resp.Body)
	strBody := buf.Bytes()
	log.Debug("Projects response: \n%s", strBody)

	bodyReader := bytes.NewBufferString(fmt.Sprintf("%s", strBody))
	var decoder = json.NewDecoder(bodyReader)

	var project_response project_response_struct
	err = decoder.Decode(&project_response)
	if err != nil {
		return err
	}
	for _, project := range project_response.Projects {
		if project.Enabled {
			data.Projects = append(data.Projects, project.Name)
		}
	}
	return nil
}
Пример #18
0
func (u *S3Uploader) Upload(imageDiskPath string) (string, error) {
	sess := session.New()
	creds := credentials.NewChainCredentials(
		[]credentials.Provider{
			&credentials.StaticProvider{Value: credentials.Value{
				AccessKeyID:     u.accessKey,
				SecretAccessKey: u.secretKey,
			}},
			&credentials.EnvProvider{},
			&ec2rolecreds.EC2RoleProvider{Client: ec2metadata.New(sess), ExpiryWindow: 5 * time.Minute},
		})
	cfg := &aws.Config{
		Region:      aws.String(u.region),
		Credentials: creds,
	}

	key := util.GetRandomString(20) + ".png"
	log.Debug("Uploading image to s3", "bucket = ", u.bucket, ", key = ", key)

	file, err := os.Open(imageDiskPath)
	if err != nil {
		return "", err
	}

	svc := s3.New(session.New(cfg), cfg)
	params := &s3.PutObjectInput{
		Bucket:      aws.String(u.bucket),
		Key:         aws.String(key),
		ACL:         aws.String(u.acl),
		Body:        file,
		ContentType: aws.String("image/png"),
	}
	_, err = svc.PutObject(params)
	if err != nil {
		return "", err
	}

	return "https://" + u.bucket + ".s3.amazonaws.com/" + key, nil
}
Пример #19
0
func InitAppPluginRoutes(r *macaron.Macaron) {
	for _, plugin := range plugins.Apps {
		for _, route := range plugin.Routes {
			url := util.JoinUrlFragments("/api/plugin-proxy/"+plugin.Id, route.Path)
			handlers := make([]macaron.Handler, 0)
			handlers = append(handlers, middleware.Auth(&middleware.AuthOptions{
				ReqSignedIn:     true,
				ReqGrafanaAdmin: route.ReqGrafanaAdmin,
			}))

			if route.ReqRole != "" {
				if route.ReqRole == m.ROLE_ADMIN {
					handlers = append(handlers, middleware.RoleAuth(m.ROLE_ADMIN))
				} else if route.ReqRole == m.ROLE_EDITOR {
					handlers = append(handlers, middleware.RoleAuth(m.ROLE_EDITOR, m.ROLE_ADMIN))
				}
			}
			handlers = append(handlers, AppPluginRoute(route, plugin.Id))
			r.Route(url, route.Method, handlers...)
			log.Debug("Plugins: Adding proxy route %s", url)
		}
	}
}
Пример #20
0
func QuotaReached(c *Context, target string) (bool, error) {
	if !setting.Quota.Enabled {
		return false, nil
	}

	// get the list of scopes that this target is valid for. Org, User, Global
	scopes, err := m.GetQuotaScopes(target)
	if err != nil {
		return false, err
	}

	log.Debug(fmt.Sprintf("checking quota for %s in scopes %v", target, scopes))

	for _, scope := range scopes {
		log.Debug(fmt.Sprintf("checking scope %s", scope.Name))

		switch scope.Name {
		case "global":
			if scope.DefaultLimit < 0 {
				continue
			}
			if scope.DefaultLimit == 0 {
				return true, nil
			}
			if target == "session" {
				usedSessions := getSessionCount()
				if int64(usedSessions) > scope.DefaultLimit {
					log.Debug(fmt.Sprintf("%d sessions active, limit is %d", usedSessions, scope.DefaultLimit))
					return true, nil
				}
				continue
			}
			query := m.GetGlobalQuotaByTargetQuery{Target: scope.Target}
			if err := bus.Dispatch(&query); err != nil {
				return true, err
			}
			if query.Result.Used >= scope.DefaultLimit {
				return true, nil
			}
		case "org":
			if !c.IsSignedIn {
				continue
			}
			query := m.GetOrgQuotaByTargetQuery{OrgId: c.OrgId, Target: scope.Target, Default: scope.DefaultLimit}
			if err := bus.Dispatch(&query); err != nil {
				return true, err
			}
			if query.Result.Limit < 0 {
				continue
			}
			if query.Result.Limit == 0 {
				return true, nil
			}

			if query.Result.Used >= query.Result.Limit {
				return true, nil
			}
		case "user":
			if !c.IsSignedIn || c.UserId == 0 {
				continue
			}
			query := m.GetUserQuotaByTargetQuery{UserId: c.UserId, Target: scope.Target, Default: scope.DefaultLimit}
			if err := bus.Dispatch(&query); err != nil {
				return true, err
			}
			if query.Result.Limit < 0 {
				continue
			}
			if query.Result.Limit == 0 {
				return true, nil
			}

			if query.Result.Used >= query.Result.Limit {
				return true, nil
			}
		}
	}

	return false, nil
}
Пример #21
0
// execute executes an alerting job and returns any errors.
// errors are always prefixed with 'non-fatal' (i.e. error condition that imply retrying the job later might fix it)
// or 'fatal', when we're sure the job will never process successfully.
func execute(fn GraphiteReturner, job *Job, cache *lru.Cache) error {
	key := fmt.Sprintf("%d-%d", job.MonitorId, job.LastPointTs.Unix())

	preConsider := time.Now()

	if found, _ := cache.ContainsOrAdd(key, true); found {
		log.Debug("T %s already done", key)
		executorNumAlreadyDone.Inc(1)
		executorConsiderJobAlreadyDone.Value(time.Since(preConsider))
		return nil
	}

	log.Debug("T %s doing", key)
	executorNumOriginalTodo.Inc(1)
	executorConsiderJobOriginalTodo.Value(time.Since(preConsider))
	gr, err := fn(job.OrgId)
	if err != nil {
		return fmt.Errorf("fatal: job %q: %q", job, err)
	}
	if gr, ok := gr.(*graphite.GraphiteContext); ok {
		gr.AssertMinSeries = job.AssertMinSeries
		gr.AssertStart = job.AssertStart
		gr.AssertStep = job.AssertStep
		gr.AssertSteps = job.AssertSteps
	}

	preExec := time.Now()
	executorJobExecDelay.Value(preExec.Sub(job.LastPointTs))
	evaluator, err := NewGraphiteCheckEvaluator(gr, job.Definition)
	if err != nil {
		// expressions should be validated before they are stored in the db!
		return fmt.Errorf("fatal: job %q: invalid check definition %q: %q", job, job.Definition, err)
	}

	res, err := evaluator.Eval(job.LastPointTs)
	log.Debug("job results - job:%v err:%v res:%v", job, err, res)
	if err != nil {
		return fmt.Errorf("Eval failed for job %q : %s", job, err.Error())
	}

	durationExec := time.Since(preExec)

	updateMonitorStateCmd := m.UpdateMonitorStateCommand{
		Id:      job.MonitorId,
		State:   res,
		Updated: job.LastPointTs,
		Checked: preExec,
	}
	if err := bus.Dispatch(&updateMonitorStateCmd); err != nil {
		//check if we failed due to deadlock.
		if err.Error() == "Error 1213: Deadlock found when trying to get lock; try restarting transaction" {
			err = bus.Dispatch(&updateMonitorStateCmd)
		}
	}
	if err != nil {
		return fmt.Errorf("non-fatal: failed to update monitor state: %q", err)
	}
	if gr, ok := gr.(*graphite.GraphiteContext); ok {
		requests := ""
		for _, trace := range gr.Traces {
			r := trace.Request
			requests += fmt.Sprintf("\ntargets: %s\nfrom:%s\nto:%s\nresponse:%s\n", r.Targets, r.Start, r.End, trace.Response)
		}
		log.Debug("Job %s state_change=%t request traces: %s", job, updateMonitorStateCmd.Affected > 0, requests)
	}
	if updateMonitorStateCmd.Affected > 0 {
		//emit a state change event.
		if job.Notifications.Enabled {
			emails := strings.Split(job.Notifications.Addresses, ",")
			if len(emails) < 1 {
				log.Debug("no email addresses provided. OrgId: %d monitorId: %d", job.OrgId, job.MonitorId)
			} else {
				for _, email := range emails {
					log.Info("sending email. addr=%s, orgId=%d, monitorId=%d, endpointSlug=%s, state=%s", email, job.OrgId, job.MonitorId, job.EndpointSlug, res.String())
				}
				sendCmd := m.SendEmailCommand{
					To:       emails,
					Template: "alerting_notification.html",
					Data: map[string]interface{}{
						"EndpointId":   job.EndpointId,
						"EndpointName": job.EndpointName,
						"EndpointSlug": job.EndpointSlug,
						"Settings":     job.Settings,
						"CheckType":    job.MonitorTypeName,
						"State":        res.String(),
						"TimeLastData": job.LastPointTs, // timestamp of the most recent data used
						"TimeExec":     preExec,         // when we executed the alerting rule and made the determination
					},
				}

				if err := bus.Dispatch(&sendCmd); err != nil {
					log.Info("failed to send email to %s. OrgId: %d monitorId: %d", emails, job.OrgId, job.MonitorId, err)
				}
			}
		}
	}
	//store the result in graphite.
	job.StoreResult(res)

	// the bosun api abstracts parsing, execution and graphite querying for us via 1 call.
	// we want to have some individual times
	if gr, ok := gr.(*graphite.GraphiteContext); ok {
		executorJobQueryGraphite.Value(gr.Dur)
		executorJobParseAndEval.Value(durationExec - gr.Dur)
		if gr.MissingVals > 0 {
			executorGraphiteMissingVals.Value(int64(gr.MissingVals))
		}
		if gr.EmptyResp != 0 {
			executorGraphiteEmptyResponse.Inc(int64(gr.EmptyResp))
		}
		if gr.IncompleteResp != 0 {
			executorGraphiteIncompleteResponse.Inc(int64(gr.IncompleteResp))
		}
		if gr.BadStart != 0 {
			executorGraphiteBadStart.Inc(int64(gr.BadStart))
		}
		if gr.BadStep != 0 {
			executorGraphiteBadStep.Inc(int64(gr.BadStep))
		}
		if gr.BadSteps != 0 {
			executorGraphiteBadSteps.Inc(int64(gr.BadSteps))
		}
	}

	switch res {
	case m.EvalResultOK:
		executorAlertOutcomesOk.Inc(1)
	case m.EvalResultWarn:
		executorAlertOutcomesWarn.Inc(1)
	case m.EvalResultCrit:
		executorAlertOutcomesCrit.Inc(1)
	case m.EvalResultUnknown:
		executorAlertOutcomesUnkn.Inc(1)
	}

	return nil
}
Пример #22
0
func Executor(fn GraphiteReturner, jobQueue <-chan Job) {
	cache, err := lru.New(10000) // TODO configurable
	if err != nil {
		panic(fmt.Sprintf("Can't create LRU: %s", err.Error()))
	}
	// create series explicitly otherwise the grafana-influxdb graphs don't work if the series doesn't exist
	Stat.IncrementValue("alert-executor.alert-outcomes.ok", 0)
	Stat.IncrementValue("alert-executor.alert-outcomes.critical", 0)
	Stat.IncrementValue("alert-executor.alert-outcomes.unknown", 0)
	Stat.IncrementValue("alert-executor.graphite-emptyresponse", 0)
	Stat.TimeDuration("alert-executor.consider-job.already-done", 0)
	Stat.TimeDuration("alert-executor.consider-job.original-todo", 0)

	for job := range jobQueue {
		Stat.Gauge("alert-jobqueue-internal.items", int64(len(jobQueue)))
		Stat.Gauge("alert-jobqueue-internal.size", int64(jobQueueSize))

		key := fmt.Sprintf("%s-%d", job.Key, job.LastPointTs.Unix())

		preConsider := time.Now()

		if _, ok := cache.Get(key); ok {
			log.Debug("T %s alredy done", key)
			Stat.TimeDuration("alert-executor.consider-job.already-done", time.Since(preConsider))
			continue
		}

		log.Debug("T %s doing", key)
		Stat.TimeDuration("alert-executor.consider-job.original-todo", time.Since(preConsider))
		gr := fn(job.OrgId)

		preExec := time.Now()
		evaluator, err := NewGraphiteCheckEvaluator(gr, job.Definition)
		if err != nil {
			// expressions should be validated before they are stored in the db
			// if they fail now it's a critical error
			panic(fmt.Sprintf("received invalid check definition '%s': %s", job.Definition, err))
		}

		res, err := evaluator.Eval(job.LastPointTs)
		log.Debug("job results - job:%v err:%v res:%v", job, err, res)

		durationExec := time.Since(preExec)
		if job.State != res {
			//monitor state has changed.
			updateMonitorStateCmd := m.UpdateMonitorStateCommand{
				Id:      job.MonitorId,
				State:   res,
				Updated: job.LastPointTs,
			}
			if err := bus.Dispatch(&updateMonitorStateCmd); err != nil {
				panic(fmt.Sprintf("failed to update monitor state. %s", err.Error()))
			}
			//emit a state change event.
			if job.Notifications.Enabled {
				emails := strings.Split(job.Notifications.Addresses, ",")
				if len(emails) < 1 {
					log.Debug("no email addresses provided. OrgId: %d monitorId: %d", job.OrgId, job.MonitorId)
					continue
				}
				sendCmd := m.SendEmailCommand{
					To:       emails,
					Template: "alerting_notification.html",
					Data: map[string]interface{}{
						"Endpoint":  job.EndpointSlug,
						"CheckType": job.MonitorTypeName,
						"State":     res.String(),
					},
				}

				if err := bus.Dispatch(&sendCmd); err != nil {
					log.Info("failed to send email to %s. OrgId: %d monitorId: %d", emails, job.OrgId, job.MonitorId, err)
				}
			}
		}
		//store the result in graphite.
		job.StoreResult(res)

		// the bosun api abstracts parsing, execution and graphite querying for us via 1 call.
		// we want to have some individual times
		if gr, ok := gr.(*GraphiteContext); ok {
			Stat.TimeDuration("alert-executor.job_query_graphite", gr.dur)
			Stat.TimeDuration("alert-executor.job_parse-and-evaluate", durationExec-gr.dur)
			Stat.Timing("alert-executor.graphite-missingVals", int64(gr.missingVals))
			if gr.emptyResp {
				Stat.Increment("alert-executor.graphite-emptyresponse")
			}
		}

		Stat.Increment(strings.ToLower(fmt.Sprintf("alert-executor.alert-outcomes.%s", res)))

		cache.Add(key, true)
	}
}
Пример #23
0
// note: we don't normalize/quantize/fill-unknowns
// we just serve what we know
func Get(w http.ResponseWriter, req *http.Request) {
	pre := time.Now()
	values := req.URL.Query()
	keys, ok := values["target"]
	if !ok {
		http.Error(w, "missing render arg", http.StatusBadRequest)
		return
	}
	now := time.Now()
	fromUnix := uint32(now.Add(-time.Duration(24) * time.Hour).Unix())
	toUnix := uint32(now.Add(time.Duration(1) * time.Second).Unix())
	from := values.Get("from")
	if from != "" {
		fromUnixInt, err := strconv.Atoi(from)
		if err != nil {
			http.Error(w, err.Error(), http.StatusInternalServerError)
			return
		}
		fromUnix = uint32(fromUnixInt)
	}
	to := values.Get("to")
	if to != "" {
		toUnixInt, err := strconv.Atoi(to)
		if err != nil {
			http.Error(w, err.Error(), http.StatusInternalServerError)
			return
		}
		toUnix = uint32(toUnixInt)
	}
	if fromUnix >= toUnix {
		http.Error(w, "to must be higher than from", http.StatusBadRequest)
		return
	}

	out := make([]Series, len(keys))
	for i, key := range keys {
		iters := make([]*tsz.Iter, 0)
		var memIters []*tsz.Iter
		oldest := toUnix
		if metric, ok := metrics.Get(key); ok {
			oldest, memIters = metric.Get(fromUnix, toUnix)
		} else {
			memIters = make([]*tsz.Iter, 0)
		}
		if oldest > fromUnix {
			reqSpanBoth.Value(int64(toUnix - fromUnix))
			log.Debug("data load from cassandra: %s - %s from mem: %s - %s", TS(fromUnix), TS(oldest), TS(oldest), TS(toUnix))
			storeIters, err := searchCassandra(key, fromUnix, oldest)
			if err != nil {
				http.Error(w, err.Error(), http.StatusBadRequest)
				return
			}
			//for _, i := range storeIters {
			//	fmt.Println("c>", TS(i.T0()))
			//	}
			iters = append(iters, storeIters...)
		} else {
			reqSpanMem.Value(int64(toUnix - fromUnix))
			log.Debug("data load from mem: %s-%s, oldest (%d)", TS(fromUnix), TS(toUnix), oldest)
		}
		iters = append(iters, memIters...)
		//	for _, i := range memIters {
		//fmt.Println("m>", TS(i.T0()))
		//	}
		points := make([]Point, 0)
		for _, iter := range iters {
			for iter.Next() {
				ts, val := iter.Values()
				if ts >= fromUnix && ts < toUnix {
					points = append(points, Point{val, ts})
				}
			}
		}
		out[i] = Series{
			Target:     key,
			Datapoints: points,
		}
	}
	js, err := json.Marshal(out)
	if err != nil {
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}

	w.Header().Set("Content-Type", "application/json")
	reqHandleDuration.Value(time.Now().Sub(pre))
	w.Write(js)
}
Пример #24
0
// Get all data between the requested time ranges. From is inclusive, to is exclusive. from <= x < to
// more data then what's requested may be included
// also returns oldest point we have, so that if your query needs data before it, the caller knows when to query cassandra
func (a *AggMetric) Get(from, to uint32) (uint32, []*tsz.Iter) {
	log.Debug("GET: %s from: %d to:%d", a.Key, from, to)
	if from >= to {
		panic("invalid request. to must > from")
	}
	a.RLock()
	defer a.RUnlock()

	newestChunk := a.getChunk(a.CurrentChunkPos)

	if newestChunk == nil {
		// we dont have any data yet.
		log.Debug("no data for requested range.")
		return math.MaxUint32, make([]*tsz.Iter, 0)
	}
	if from >= newestChunk.T0+a.ChunkSpan {
		// we have no data in the requested range.
		log.Debug("no data for requested range.")
		return math.MaxUint32, make([]*tsz.Iter, 0)
	}

	// get the oldest chunk we have.
	// eg if we have 5 chunks, N is the current chunk and n-4 is the oldest chunk.
	// -----------------------------
	// | n-4 | n-3 | n-2 | n-1 | n |  CurrentChunkPos = 4
	// -----------------------------
	// -----------------------------
	// | n | n-4 | n-3 | n-2 | n-1 |  CurrentChunkPos = 0
	// -----------------------------
	// -----------------------------
	// | n-2 | n-1 | n | n-4 | n-3 |  CurrentChunkPos = 2
	// -----------------------------
	oldestPos := a.CurrentChunkPos + 1
	if oldestPos >= len(a.Chunks) {
		oldestPos = 0
	}

	oldestChunk := a.getChunk(oldestPos)
	if oldestChunk == nil {
		log.Error(3, "unexpected nil chunk.")
		return math.MaxUint32, make([]*tsz.Iter, 0)
	}

	if to <= oldestChunk.T0 {
		// the requested time range ends before any data we have.
		log.Debug("no data for requested range")
		return oldestChunk.T0, make([]*tsz.Iter, 0)
	}

	// Find the oldest Chunk that the "from" ts falls in.  If from extends before the oldest
	// chunk, then we just use the oldest chunk.
	for from >= oldestChunk.T0+a.ChunkSpan {
		oldestPos++
		if oldestPos >= len(a.Chunks) {
			oldestPos = 0
		}
		oldestChunk = a.getChunk(oldestPos)
		if oldestChunk == nil {
			log.Error(3, "unexpected nil chunk.")
			return to, make([]*tsz.Iter, 0)
		}
	}

	// find the newest Chunk that "to" falls in.  If "to" extends to after the newest data
	// then just return the newest chunk.
	// some examples to clarify this more. assume newestChunk.T0 is at 120, then
	// for a to of 121 -> data upto (incl) 120 -> stay at this chunk, it has a point we need
	// for a to of 120 -> data upto (incl) 119 -> use older chunk
	// for a to of 119 -> data upto (incl) 118 -> use older chunk
	newestPos := a.CurrentChunkPos
	for to <= newestChunk.T0 {
		newestPos--
		if newestPos < 0 {
			newestPos += len(a.Chunks)
		}
		newestChunk = a.getChunk(newestPos)
		if newestChunk == nil {
			log.Error(3, "unexpected nil chunk.")
			return to, make([]*tsz.Iter, 0)
		}
	}

	// now just start at oldestPos and move through the Chunks circular Buffer to newestPos
	iters := make([]*tsz.Iter, 0, a.NumChunks)
	for oldestPos != newestPos {
		iters = append(iters, a.getChunk(oldestPos).Iter())
		oldestPos++
		if oldestPos >= int(a.NumChunks) {
			oldestPos = 0
		}
	}
	// add the last chunk
	iters = append(iters, a.getChunk(oldestPos).Iter())

	return oldestChunk.T0, iters
}
Пример #25
0
// this function must only be called while holding the lock
func (a *AggMetric) addAggregators(ts uint32, val float64) {
	for _, agg := range a.aggregators {
		log.Debug("pushing value to aggregator")
		agg.Add(ts, val)
	}
}
Пример #26
0
func (a *keystoneAuther) syncOrgRoles(username, password string, user *m.User) error {
	log.Trace("syncOrgRoles()")
	err := a.getProjectList(username, password)
	if err != nil {
		return err
	}
	log.Debug("OpenStack project_list[roles]: %v", a.project_list)

	orgsQuery := m.GetUserOrgListQuery{UserId: user.Id}
	if err := bus.Dispatch(&orgsQuery); err != nil {
		return err
	}

	handledOrgIds := map[int64]bool{}

	// update or remove org roles
	for _, org := range orgsQuery.Result {
		handledOrgIds[org.OrgId] = true
		log.Info(fmt.Sprintf("Checking Grafana org %v for roles", org.Name))

		if user_roles, ok := a.project_list[org.Name]; ok {
			// Update roles if user belongs to org
			role_name := a.getRole(user_roles)
			if role_name != "" {
				if err := a.updateGrafanaOrgUser(user.Id, org.OrgId, role_name); err != nil {
					return err
				}
			} else {
				// remove user if no permissions
				if err := a.removeGrafanaOrgUser(user.Id, org.OrgId); err != nil {
					return err
				}
			}
		} else {
			// remove role if no mappings match
			if err := a.removeGrafanaOrgUser(user.Id, org.OrgId); err != nil {
				return err
			}
		}
	}

	// add missing org roles
	for project, _ := range a.project_list {
		if grafanaOrg, err := a.getGrafanaOrgFor(project); err != nil {
			return err
		} else {
			if _, exists := handledOrgIds[grafanaOrg.Id]; exists {
				continue
			}

			// add role
			role_name := a.getRole(a.project_list[project])
			if role_name != "" {
				cmd := m.AddOrgUserCommand{UserId: user.Id, Role: role_name, OrgId: grafanaOrg.Id}
				if err := bus.Dispatch(&cmd); err != nil {
					return err
				}
			}

			// mark this tenant has handled so we do not process it again
			handledOrgIds[grafanaOrg.Id] = true
		}
	}

	// set or unset admin permissions
	isAdmin := false
	role_map := make(map[string]bool)
	for _, role := range a.admin_roles {
		role_map[role] = true
	}
	for project, _ := range a.project_list {
		if isAdmin == true {
			break
		}
		project_roles := a.project_list[project]
		for _, role := range project_roles {
			if _, ok := role_map[role]; ok {
				isAdmin = true
				break
			}
		}
	}
	if isAdmin != user.IsAdmin {
		if err := a.updateGrafanaUserPermissions(user.Id, isAdmin); err != nil {
			return err
		}
	}

	orgsQuery = m.GetUserOrgListQuery{UserId: user.Id}
	if err := bus.Dispatch(&orgsQuery); err != nil {
		return err
	}

	if len(orgsQuery.Result) == 0 {
		return errors.New("Keystone authentication failed: No grafana permissions")
	}

	match := false
	var orgid int64
	for _, org := range orgsQuery.Result {
		orgid = org.OrgId
		if user.OrgId == orgid {
			match = true
			break
		}
	}

	// set org if none is set (for new users), or if user no longer has permissions for the current org
	if (user.OrgId == 1) || (match == false) {
		cmd := m.SetUsingOrgCommand{UserId: user.Id, OrgId: orgid}
		if err := bus.Dispatch(&cmd); err != nil {
			return err
		}
	}

	return nil
}