Ejemplo n.º 1
0
// monitorIssueTracker reads the counts for all the types of issues in the skia
// issue tracker (code.google.com/p/skia) and stuffs the counts into Graphite.
func monitorIssueTracker() {
	c := &http.Client{
		Transport: &http.Transport{
			Dial: dialTimeout,
		},
	}

	if *useMetadata {
		*apikey = metadata.Must(metadata.ProjectGet(metadata.APIKEY))
	}

	// Create a new metrics registry for the issue tracker metrics.
	addr, err := net.ResolveTCPAddr("tcp", *graphiteServer)
	if err != nil {
		glog.Fatalln("Failed to resolve the Graphite server: ", err)
	}
	issueRegistry := metrics.NewRegistry()
	go graphite.Graphite(issueRegistry, common.SAMPLE_PERIOD, "issues", addr)

	// IssueStatus has all the info we need to capture and record a single issue status. I.e. capture
	// the count of all issues with a status of "New".
	type IssueStatus struct {
		Name   string
		Metric metrics.Gauge
		URL    string
	}

	allIssueStatusLabels := []string{
		"New", "Accepted", "Unconfirmed", "Started", "Fixed", "Verified", "Invalid", "WontFix", "Done", "Available", "Assigned",
	}

	issueStatus := []*IssueStatus{}
	for _, issueName := range allIssueStatusLabels {
		issueStatus = append(issueStatus, &IssueStatus{
			Name:   issueName,
			Metric: metrics.NewRegisteredGauge(strings.ToLower(issueName), issueRegistry),
			URL:    "https://www.googleapis.com/projecthosting/v2/projects/skia/issues?fields=totalResults&key=" + *apikey + "&status=" + issueName,
		})
	}

	liveness := imetrics.NewLiveness("issue-tracker")
	for _ = range time.Tick(ISSUE_TRACKER_PERIOD) {
		for _, issue := range issueStatus {
			resp, err := c.Get(issue.URL)
			jsonResp := map[string]int64{}
			dec := json.NewDecoder(resp.Body)
			if err := dec.Decode(&jsonResp); err != nil {
				glog.Warningf("Failed to decode JSON response: %s", err)
				util.Close(resp.Body)
				continue
			}
			issue.Metric.Update(jsonResp["totalResults"])
			glog.Infof("Num Issues: %s - %d", issue.Name, jsonResp["totalResults"])
			if err == nil && resp.Body != nil {
				util.Close(resp.Body)
			}
		}
		liveness.Update()
	}
}
Ejemplo n.º 2
0
// NewTraceServiceDB creates a new DB that stores the data in the BoltDB backed
// gRPC accessible traceservice.
func NewTraceServiceDB(conn *grpc.ClientConn, traceBuilder tiling.TraceBuilder) (*TsDB, error) {
	ret := &TsDB{
		conn:         conn,
		traceService: traceservice.NewTraceServiceClient(conn),
		traceBuilder: traceBuilder,
		cache:        lru.New(MAX_ID_CACHED),
		ctx:          context.Background(),
	}

	// This ping causes the client to try and reach the backend. If the backend
	// is down, it will keep trying until it's up.
	if err := ret.ping(); err != nil {
		return nil, err
	}

	go func() {
		liveness := metrics.NewLiveness("tracedb-ping")
		for _ = range time.Tick(time.Minute) {
			if ret.ping() == nil {
				liveness.Update()
			}
		}
	}()
	return ret, nil
}
Ejemplo n.º 3
0
// NewBuilder creates a new Builder given the gitinfo, and loads Tiles from the
// traceserver running at the given address. The tiles contain the last
// 'tileSize' commits and are built from Traces of the type that traceBuilder
// returns.
func NewBuilder(git *gitinfo.GitInfo, address string, tileSize int, traceBuilder tiling.TraceBuilder) (*Builder, error) {
	conn, err := grpc.Dial(address, grpc.WithInsecure())
	if err != nil {
		return nil, fmt.Errorf("did not connect: %v", err)
	}

	// Build a tracedb.DB client.
	tracedb, err := NewTraceServiceDB(conn, traceBuilder)
	if err != nil {
		return nil, fmt.Errorf("NewTraceStore: Failed to create DB: %s", err)
	}

	ret := &Builder{
		tileSize: tileSize,
		DB:       tracedb,
		git:      git,
	}
	if err := ret.LoadTile(); err != nil {
		return nil, fmt.Errorf("NewTraceStore: Failed to load initial Tile: %s", err)
	}
	go func() {
		liveness := metrics.NewLiveness("perf-tracedb-tile-refresh")
		for _ = range time.Tick(TILE_REFRESH_DURATION) {
			if err := ret.LoadTile(); err != nil {
				glog.Errorf("Failed to refresh tile: %s", err)
			} else {
				liveness.Update()
			}
		}
	}()
	return ret, nil
}
Ejemplo n.º 4
0
// monitorIssueTracker reads the counts for all the types of issues in the Skia
// issue tracker (bugs.chromium.org/p/skia) and stuffs the counts into Graphite.
func monitorIssueTracker(c *http.Client) {
	// Create a new metrics registry for the issue tracker metrics.
	addr, err := net.ResolveTCPAddr("tcp", *graphiteServer)
	if err != nil {
		glog.Fatalln("Failed to resolve the Graphite server: ", err)
	}
	issueRegistry := metrics.NewRegistry()
	go graphite.Graphite(issueRegistry, common.SAMPLE_PERIOD, "issues", addr)

	// IssueStatus has all the info we need to capture and record a single issue status. I.e. capture
	// the count of all issues with a status of "New".
	type IssueStatus struct {
		Name   string
		Metric metrics.Gauge
		URL    string
	}

	allIssueStatusLabels := []string{
		"New", "Accepted", "Unconfirmed", "Started", "Fixed", "Verified", "Invalid", "WontFix", "Done", "Available", "Assigned",
	}

	issueStatus := []*IssueStatus{}
	for _, issueName := range allIssueStatusLabels {
		q := url.Values{}
		q.Set("fields", "totalResults")
		q.Set("status", issueName)
		issueStatus = append(issueStatus, &IssueStatus{
			Name:   issueName,
			Metric: metrics.NewRegisteredGauge(strings.ToLower(issueName), issueRegistry),
			URL:    issues.MONORAIL_BASE_URL + "?" + q.Encode(),
		})
	}

	liveness := imetrics.NewLiveness("issue-tracker")
	for _ = range time.Tick(ISSUE_TRACKER_PERIOD) {
		for _, issue := range issueStatus {
			resp, err := c.Get(issue.URL)
			if err != nil {
				glog.Errorf("Failed to retrieve response from %s: %s", issue.URL, err)
				continue
			}
			jsonResp := map[string]int64{}
			dec := json.NewDecoder(resp.Body)
			if err := dec.Decode(&jsonResp); err != nil {
				glog.Warningf("Failed to decode JSON response: %s", err)
				util.Close(resp.Body)
				continue
			}
			issue.Metric.Update(jsonResp["totalResults"])
			glog.Infof("Num Issues: %s - %d", issue.Name, jsonResp["totalResults"])
			if err == nil && resp.Body != nil {
				util.Close(resp.Body)
			}
		}
		liveness.Update()
	}
}
Ejemplo n.º 5
0
func main() {
	defer common.LogPanic()
	// Setup flags.
	dbConf := buildbot.DBConfigFromFlags()

	// Global init.
	common.InitWithMetrics(APP_NAME, graphiteServer)

	// Parse the time period.
	period, err := human.ParseDuration(*timePeriod)
	if err != nil {
		glog.Fatal(err)
	}

	// Initialize the buildbot database.
	if !*local {
		if err := dbConf.GetPasswordFromMetadata(); err != nil {
			glog.Fatal(err)
		}
	}
	if err := dbConf.InitDB(); err != nil {
		glog.Fatal(err)
	}

	// Initialize the BuildBucket client.
	c, err := auth.NewClient(*local, path.Join(*workdir, "oauth_token_cache"), buildbucket.DEFAULT_SCOPES...)
	if err != nil {
		glog.Fatal(err)
	}
	bb := buildbucket.NewClient(c)

	// Build the queue.
	repos := gitinfo.NewRepoMap(*workdir)
	for _, r := range REPOS {
		if _, err := repos.Repo(r); err != nil {
			glog.Fatal(err)
		}
	}
	q, err := build_queue.NewBuildQueue(period, repos, *scoreThreshold, *scoreDecay24Hr, BOT_BLACKLIST)
	if err != nil {
		glog.Fatal(err)
	}

	// Start scheduling builds in a loop.
	liveness := metrics.NewLiveness(APP_NAME)
	if err := scheduleBuilds(q, bb); err != nil {
		glog.Errorf("Failed to schedule builds: %v", err)
	}
	for _ = range time.Tick(time.Minute) {
		liveness.Update()
		if err := scheduleBuilds(q, bb); err != nil {
			glog.Errorf("Failed to schedule builds: %v", err)
		}
	}
}
Ejemplo n.º 6
0
// IngestNewBuildsLoop continually ingests new builds.
func IngestNewBuildsLoop(workdir string) {
	lv := metrics.NewLiveness("buildbot-ingest")
	repos := gitinfo.NewRepoMap(workdir)
	for _ = range time.Tick(30 * time.Second) {
		glog.Info("Ingesting builds.")
		if err := ingestNewBuilds(repos); err != nil {
			glog.Errorf("Failed to ingest new builds: %v", err)
		} else {
			lv.Update()
		}
	}
}
Ejemplo n.º 7
0
// newSourceMetrics instantiates a set of metrics for an input source.
func newSourceMetrics(id string, sources []Source) []*sourceMetrics {
	ret := make([]*sourceMetrics, len(sources))
	for idx, source := range sources {
		prefix := fmt.Sprintf("%s.%s", id, source.ID())
		ret[idx] = &sourceMetrics{
			liveness:       smetrics.NewLiveness(prefix + ".poll-liveness"),
			pollTimer:      metrics.NewRegisteredTimer(prefix+".poll-timer", metrics.DefaultRegistry),
			pollError:      metrics.NewRegisteredGauge(prefix+".poll-error", metrics.DefaultRegistry),
			eventsReceived: metrics.NewRegisteredMeter(prefix+".events-received", metrics.DefaultRegistry),
		}
	}
	return ret
}
Ejemplo n.º 8
0
func main() {
	defer common.LogPanic()
	common.InitWithMetrics("probeserver", graphiteServer)

	client, err := auth.NewDefaultJWTServiceAccountClient("https://www.googleapis.com/auth/userinfo.email")
	if err != nil {
		glog.Fatalf("Failed to create client for talking to the issue tracker: %s", err)
	}
	go monitorIssueTracker(client)
	glog.Infoln("Looking for Graphite server.")
	addr, err := net.ResolveTCPAddr("tcp", *graphiteServer)
	if err != nil {
		glog.Fatalln("Failed to resolve the Graphite server: ", err)
	}
	glog.Infoln("Found Graphite server.")

	liveness := imetrics.NewLiveness("probes")

	// We have two sets of metrics, one for the probes and one for the probe
	// server itself. The server's metrics are handled by common.Init()
	probeRegistry := metrics.NewRegistry()
	go graphite.Graphite(probeRegistry, common.SAMPLE_PERIOD, *prefix, addr)

	// TODO(jcgregorio) Monitor config file and reload if it changes.
	cfg, err := readConfigFiles(*config)
	if err != nil {
		glog.Fatalln("Failed to read config file: ", err)
	}
	glog.Infoln("Successfully read config file.")
	// Register counters for each probe.
	for name, probe := range cfg {
		probe.failure = metrics.NewRegisteredGauge(name+".failure", probeRegistry)
		probe.latency = metrics.NewRegisteredGauge(name+".latency", probeRegistry)
	}

	// Create a client that uses our dialer with a timeout.
	c := &http.Client{
		Transport: &http.Transport{
			Dial: dialTimeout,
		},
	}
	probeOneRound(cfg, c)
	for _ = range time.Tick(*runEvery) {
		probeOneRound(cfg, c)
		liveness.Update()
	}
}
Ejemplo n.º 9
0
// IngestNewBuildsLoop continually ingests new builds.
func IngestNewBuildsLoop(workdir string) {
	repos := gitinfo.NewRepoMap(workdir)
	var wg sync.WaitGroup
	for _, m := range MASTER_NAMES {
		go func(master string) {
			defer wg.Done()
			lv := metrics.NewLiveness(fmt.Sprintf("buildbot-ingest-%s", master))
			for _ = range time.Tick(30 * time.Second) {
				if err := ingestNewBuilds(master, repos); err != nil {
					glog.Errorf("Failed to ingest new builds: %v", err)
				} else {
					lv.Update()
				}
			}
		}(m)
	}
	wg.Wait()
}
Ejemplo n.º 10
0
// NewTraceServiceDB creates a new DB that stores the data in the BoltDB backed
// gRPC accessible traceservice.
func NewTraceServiceDB(conn *grpc.ClientConn, traceBuilder tiling.TraceBuilder) (*TsDB, error) {
	ret := &TsDB{
		conn:         conn,
		traceService: traceservice.NewTraceServiceClient(conn),
		traceBuilder: traceBuilder,
		cache:        lru.New(MAX_ID_CACHED),
		paramsCache:  map[string]map[string]string{},
		id64Cache:    map[uint64]string{},
		ctx:          context.Background(),
	}

	// This ping causes the client to try and reach the backend. If the backend
	// is down, it will keep trying until it's up.
	if err := ret.ping(); err != nil {
		return nil, err
	}

	// Liveness metric.
	go func() {
		liveness := metrics.NewLiveness("tracedb-ping")
		for _ = range time.Tick(time.Minute) {
			if ret.ping() == nil {
				liveness.Update()
			}
		}
	}()

	// Keep the caches sizes in check.
	go func() {
		for _ = range time.Tick(15 * time.Minute) {
			ret.clearMutex.Lock()
			if len(ret.paramsCache) > MAX_ID_CACHED {
				ret.paramsCache = map[string]map[string]string{}
				glog.Warning("Had to clear paramsCache, this is unexpected. MAX_ID_CACHED too small?")
			}
			if len(ret.id64Cache) > MAX_ID_CACHED {
				ret.id64Cache = map[uint64]string{}
				glog.Warning("Had to clear id64Cache, this is unexpected. MAX_ID_CACHED too small?")
			}
			ret.clearMutex.Unlock()
		}
	}()
	return ret, nil
}
Ejemplo n.º 11
0
func (s *StatusWatcher) calcAndWatchStatus() error {
	expChanges := make(chan []string)
	s.storages.EventBus.SubscribeAsync(expstorage.EV_EXPSTORAGE_CHANGED, func(e interface{}) {
		expChanges <- e.([]string)
	})

	tileStream := s.storages.GetTileStreamNow(2*time.Minute, false)

	lastTile := <-tileStream
	if err := s.calcStatus(lastTile); err != nil {
		return err
	}

	liveness := imetrics.NewLiveness("status-monitoring")

	go func() {
		for {
			select {
			case <-tileStream:
				tile, err := s.storages.GetLastTileTrimmed(false)
				if err != nil {
					glog.Errorf("Error retrieving tile: %s", err)
					continue
				}
				if err := s.calcStatus(tile); err != nil {
					glog.Errorf("Error calculating status: %s", err)
				} else {
					lastTile = tile
					liveness.Update()
				}
			case <-expChanges:
				storage.DrainChangeChannel(expChanges)
				if err := s.calcStatus(lastTile); err != nil {
					glog.Errorf("Error calculating tile after expectation update: %s", err)
				}
				liveness.Update()
			}
		}
	}()

	return nil
}
Ejemplo n.º 12
0
// StartMonitoring starts a new monitoring routine for the given
// ignore store that counts expired ignore rules and pushes
// that info into a metric.
func Init(store IgnoreStore) error {
	numExpired := metrics.NewRegisteredGauge("num-expired-ignore-rules", metrics.DefaultRegistry)
	liveness := imetrics.NewLiveness("expired-ignore-rules-monitoring")

	err := oneStep(store, numExpired)
	if err != nil {
		return fmt.Errorf("Unable to start monitoring ignore rules: %s", err)
	}
	go func() {
		for _ = range time.Tick(time.Minute) {
			err = oneStep(store, numExpired)
			if err != nil {
				glog.Errorf("Failed one step of monitoring ignore rules: %s", err)
				continue
			}
			liveness.Update()
		}
	}()

	return nil
}
Ejemplo n.º 13
0
func (h *historian) start() error {
	expChanges := make(chan []string)
	h.storages.EventBus.SubscribeAsync(expstorage.EV_EXPSTORAGE_CHANGED, func(e interface{}) {
		expChanges <- e.([]string)
	})
	tileStream := h.storages.GetTileStreamNow(2*time.Minute, true)

	lastTile := <-tileStream
	if err := h.updateDigestInfo(lastTile); err != nil {
		return err
	}
	liveness := metrics.NewLiveness("digest-history-monitoring")

	// Keep processing tiles and feed them into the process channel.
	go func() {
		for {
			select {
			case tile := <-tileStream:
				if err := h.updateDigestInfo(tile); err != nil {
					glog.Errorf("Error calculating status: %s", err)
					continue
				} else {
					lastTile = tile
				}
			case <-expChanges:
				storage.DrainChangeChannel(expChanges)
				if err := h.updateDigestInfo(lastTile); err != nil {
					glog.Errorf("Error calculating tile after expectation udpate: %s", err)
					continue
				}
			}
			liveness.Update()
		}
	}()

	return nil
}
Ejemplo n.º 14
0
// IngestNewBuildsLoop continually ingests new builds.
func IngestNewBuildsLoop(db DB, workdir string) error {
	if _, ok := db.(*localDB); !ok {
		return fmt.Errorf("Can only ingest builds with a local DB instance.")
	}
	repos := gitinfo.NewRepoMap(workdir)
	go func() {
		var wg sync.WaitGroup
		for _, m := range MASTER_NAMES {
			go func(master string) {
				defer wg.Done()
				lv := metrics.NewLiveness(fmt.Sprintf("buildbot-ingest-%s", master))
				for _ = range time.Tick(10 * time.Second) {
					if err := ingestNewBuilds(db.(*localDB), master, repos); err != nil {
						glog.Errorf("Failed to ingest new builds: %s", err)
					} else {
						lv.Update()
					}
				}
			}(m)
		}
		wg.Wait()
	}()
	return nil
}
Ejemplo n.º 15
0
// NewBuilder creates a new Builder given the gitinfo, and loads Tiles from the
// traceserver running at the given address. The tiles contain the last
// 'tileSize' commits and are built from Traces of the type that traceBuilder
// returns.
func NewMasterTileBuilder(db DB, git *gitinfo.GitInfo, tileSize int, evt *eventbus.EventBus) (MasterTileBuilder, error) {
	ret := &masterTileBuilder{
		tileSize: tileSize,
		db:       db,
		git:      git,
		evt:      evt,
	}
	if err := ret.LoadTile(); err != nil {
		return nil, fmt.Errorf("NewTraceStore: Failed to load initial Tile: %s", err)
	}
	evt.Publish(NEW_TILE_AVAILABLE_EVENT, ret.GetTile())
	go func() {
		liveness := metrics.NewLiveness("perf-tracedb-tile-refresh")
		for _ = range time.Tick(TILE_REFRESH_DURATION) {
			if err := ret.LoadTile(); err != nil {
				glog.Errorf("Failed to refresh tile: %s", err)
			} else {
				liveness.Update()
				evt.Publish(NEW_TILE_AVAILABLE_EVENT, ret.GetTile())
			}
		}
	}()
	return ret, nil
}
Ejemplo n.º 16
0
	local          = flag.Bool("local", false, "Running locally if true. As opposed to in production.")
	oauthCacheFile = flag.String("oauth_cache_file", "", "Path to the OAuth credential cache file.")
	targetList     = flag.String("targets", "", "The targets to monitor, a space separated list.")
	codenameDbDir  = flag.String("codename_db_dir", "codenames", "The location of the leveldb database that holds the mappings between targets and their codenames.")
	period         = flag.Duration("period", 5*time.Minute, "The time between ingestion runs.")
)

var (
	// terminal_build_status are the tradefed build status's that mean the build is done.
	terminal_build_status = []string{"complete", "error"}

	// codenameDB is a leveldb to store codenames and their deobfuscated counterparts.
	codenameDB *leveldb.DB

	// liveness is a metric for the time since last successful run through step().
	liveness = skmetics.NewLiveness("android_internal_ingest")
)

// isFinished returns true if the Build has finished running.
func isFinished(b *androidbuildinternal.Build) bool {
	return util.In(b.BuildAttemptStatus, terminal_build_status)
}

// buildFromCommit builds a buildbot.Build from the commit and the info
// returned from the Apiary API.  It also returns a key that uniqely identifies
// this build.
func buildFromCommit(build *androidbuildinternal.Build, commit *vcsinfo.ShortCommit) (string, *buildbot.Build) {
	codename := util.StringToCodeName(build.Target.Name)
	key := build.Branch + ":" + build.Target.Name + ":" + build.BuildId
	b := &buildbot.Build{
		Builder:     codename,