func main() { cfg := flag.String("c", "cfg.json", "specify config file") version := flag.Bool("v", false, "show version") versionGit := flag.Bool("vg", false, "show version and git commit log") flag.Parse() if *version { fmt.Println(g.VERSION) os.Exit(0) } if *versionGit { fmt.Println(g.VERSION, g.COMMIT) os.Exit(0) } // global config g.ParseConfig(*cfg) // init db g.InitDB() // rrdtool before api for disable loopback connection rrdtool.Start() // start api go api.Start() // start indexing index.Start() // start http server go http.Start() start_signal(os.Getpid(), g.Config()) }
func configCommonRoutes() { http.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) { w.Write([]byte("ok")) }) http.HandleFunc("/version", func(w http.ResponseWriter, r *http.Request) { w.Write([]byte(g.VERSION)) }) http.HandleFunc("/workdir", func(w http.ResponseWriter, r *http.Request) { RenderDataJson(w, file.SelfDir()) }) http.HandleFunc("/config", func(w http.ResponseWriter, r *http.Request) { RenderDataJson(w, g.Config()) }) http.HandleFunc("/config/reload", func(w http.ResponseWriter, r *http.Request) { if strings.HasPrefix(r.RemoteAddr, "127.0.0.1") { g.ParseConfig(g.ConfigFile) RenderDataJson(w, "ok") } else { RenderDataJson(w, "no privilege") } }) }
// index收到一条新上报的监控数据,尝试用于增量更新索引 func ReceiveItem(item *cmodel.GraphItem, md5 string) { if item == nil { return } uuid := item.UUID() // 已上报过的数据 if indexedItemCache.ContainsKey(md5) { old := indexedItemCache.Get(md5).(*IndexCacheItem) if uuid == old.UUID { // dsType+step没有发生变化,只更新缓存 TODO 存在线程安全的问题 old.Item = item } else { // dsType+step变化了,当成一个新的增量来处理(甚至,不用rrd文件来过滤) //indexedItemCache.Remove(md5) unIndexedItemCache.Put(md5, NewIndexCacheItem(uuid, item)) } return } // 是否有rrdtool文件存在,如果有 认为已建立索引 // 针对 索引缓存重建场景 做的优化, 结合索引全量更新 来保证一致性 rrdFileName := g.RrdFileName(g.Config().RRD.Storage, md5, item.DsType, item.Step) if g.IsRrdFileExist(rrdFileName) { indexedItemCache.Put(md5, NewIndexCacheItem(uuid, item)) return } // 缓存未命中, 放入增量更新队列 unIndexedItemCache.Put(md5, NewIndexCacheItem(uuid, item)) }
func Start() { if !g.Config().Http.Enabled { log.Println("http.Start warning, not enabled") return } if g.Config().Migrate.Enabled { http.HandleFunc("/counter/migrate", func(w http.ResponseWriter, r *http.Request) { RenderDataJson(w, rrdtool.GetCounter()) }) } addr := g.Config().Http.Listen if addr == "" { return } s := &http.Server{ Addr: addr, MaxHeaderBytes: 1 << 30, } log.Println("http listening", addr) ln, err := net.Listen("tcp", addr) if err != nil { log.Fatalln(err) return } l := ln.(*net.TCPListener) go s.Serve(TcpKeepAliveListener{l}) select { case <-Close_chan: log.Println("http recv sigout and exit...") l.Close() Close_done_chan <- 1 return } }
func fetch_rrd(client **rpc.Client, key string, addr string) error { var ( err error flag uint32 md5 string dsType string filename string step, i int rrdfile g.File ) cfg := g.Config() if flag, err = store.GraphItems.GetFlag(key); err != nil { return err } store.GraphItems.SetFlag(key, flag|g.GRAPH_F_FETCHING) md5, dsType, step, _ = g.SplitRrdCacheKey(key) filename = g.RrdFileName(cfg.RRD.Storage, md5, dsType, step) for i = 0; i < 3; i++ { err = rpc_call(*client, "Graph.GetRrd", key, &rrdfile, time.Duration(cfg.CallTimeout)*time.Millisecond) if err == nil { done := make(chan error, 1) io_task_chan <- &io_task_t{ method: IO_TASK_M_WRITE, args: &g.File{ Filename: filename, Body: rrdfile.Body[:], }, done: done, } if err = <-done; err != nil { goto out } else { flag &= ^g.GRAPH_F_MISS goto out } } else { log.Println(err) } if err == rpc.ErrShutdown { reconnection(client, addr) } } out: flag &= ^g.GRAPH_F_FETCHING store.GraphItems.SetFlag(key, flag) return err }
func CommitByKey(key string) { md5, dsType, step, err := g.SplitRrdCacheKey(key) if err != nil { return } filename := g.RrdFileName(g.Config().RRD.Storage, md5, dsType, step) items := store.GraphItems.PopAll(key) if len(items) == 0 { return } FlushFile(filename, items) }
func Start() { cfg := g.Config() var err error // check data dir if err = file.EnsureDirRW(cfg.RRD.Storage); err != nil { log.Fatalln("rrdtool.Start error, bad data dir "+cfg.RRD.Storage+",", err) } migrate_start(cfg) // sync disk go syncDisk() go ioWorker() log.Println("rrdtool.Start ok") }
func (this *Graph) GetRrd(key string, rrdfile *g.File) (err error) { if md5, dsType, step, err := g.SplitRrdCacheKey(key); err != nil { return err } else { rrdfile.Filename = g.RrdFileName(g.Config().RRD.Storage, md5, dsType, step) } items := store.GraphItems.PopAll(key) if len(items) > 0 { rrdtool.FlushFile(rrdfile.Filename, items) } rrdfile.Body, err = rrdtool.ReadFile(rrdfile.Filename) return }
func (this *Graph) Info(param cmodel.GraphInfoParam, resp *cmodel.GraphInfoResp) error { // statistics proc.GraphInfoCnt.Incr() dsType, step, exists := index.GetTypeAndStep(param.Endpoint, param.Counter) if !exists { return nil } md5 := cutils.Md5(param.Endpoint + "/" + param.Counter) filename := fmt.Sprintf("%s/%s/%s_%s_%d.rrd", g.Config().RRD.Storage, md5[0:2], md5, dsType, step) resp.ConsolFun = dsType resp.Step = step resp.Filename = filename return nil }
func send_data(client **rpc.Client, key string, addr string) error { var ( err error flag uint32 resp *cmodel.SimpleRpcResponse i int ) //remote if flag, err = store.GraphItems.GetFlag(key); err != nil { return err } cfg := g.Config() store.GraphItems.SetFlag(key, flag|g.GRAPH_F_SENDING) items := store.GraphItems.PopAll(key) items_size := len(items) if items_size == 0 { goto out } resp = &cmodel.SimpleRpcResponse{} for i = 0; i < 3; i++ { err = rpc_call(*client, "Graph.Send", items, resp, time.Duration(cfg.CallTimeout)*time.Millisecond) if err == nil { goto out } if err == rpc.ErrShutdown { reconnection(client, addr) } } // err store.GraphItems.PushAll(key, items) //flag |= g.GRAPH_F_ERR out: flag &= ^g.GRAPH_F_SENDING store.GraphItems.SetFlag(key, flag) return err }
func query_data(client **rpc.Client, addr string, args interface{}, resp interface{}) error { var ( err error i int ) for i = 0; i < 3; i++ { err = rpc_call(*client, "Graph.Query", args, resp, time.Duration(g.Config().CallTimeout)*time.Millisecond) if err == nil { break } if err == rpc.ErrShutdown { reconnection(client, addr) } } return err }
func handleItems(items []*cmodel.GraphItem) { if items == nil { return } count := len(items) if count == 0 { return } cfg := g.Config() for i := 0; i < count; i++ { if items[i] == nil { continue } dsType := items[i].DsType step := items[i].Step checksum := items[i].Checksum() key := g.FormRrdCacheKey(checksum, dsType, step) //statistics proc.GraphRpcRecvCnt.Incr() // To Graph first := store.GraphItems.First(key) if first != nil && items[i].Timestamp <= first.Timestamp { continue } store.GraphItems.PushFront(key, items[i], checksum, cfg) // To Index index.ReceiveItem(items[i], checksum) // To History store.AddItem(checksum, items[i]) } }
func FlushRRD(idx int, force bool) { begin := time.Now() atomic.StoreInt32(&flushrrd_timeout, 0) keys := store.GraphItems.KeysByIndex(idx) if len(keys) == 0 { return } for _, key := range keys { flag, _ := store.GraphItems.GetFlag(key) //write err data to local filename if force == false && g.Config().Migrate.Enabled && flag&g.GRAPH_F_MISS != 0 { if time.Since(begin) > time.Millisecond*g.FLUSH_DISK_STEP { atomic.StoreInt32(&flushrrd_timeout, 1) } PullByKey(key) } else { CommitByKey(key) } } }
func Start() { if !g.Config().Rpc.Enabled { log.Println("rpc.Start warning, not enabled") return } addr := g.Config().Rpc.Listen tcpAddr, err := net.ResolveTCPAddr("tcp", addr) if err != nil { log.Fatalf("rpc.Start error, net.ResolveTCPAddr failed, %s", err) } listener, err := net.ListenTCP("tcp", tcpAddr) if err != nil { log.Fatalf("rpc.Start error, listen %s failed, %s", addr, err) } else { log.Println("rpc.Start ok, listening on", addr) } rpc.Register(new(Graph)) go func() { var tempDelay time.Duration // how long to sleep on accept failure for { conn, err := listener.Accept() if err != nil { if tempDelay == 0 { tempDelay = 5 * time.Millisecond } else { tempDelay *= 2 } if max := 1 * time.Second; tempDelay > max { tempDelay = max } time.Sleep(tempDelay) continue } tempDelay = 0 go func() { e := connects.insert(conn) defer connects.remove(e) rpc.ServeConn(conn) }() } }() select { case <-Close_chan: log.Println("rpc, recv sigout and exiting...") listener.Close() Close_done_chan <- 1 connects.Lock() for e := connects.list.Front(); e != nil; e = e.Next() { e.Value.(net.Conn).Close() } connects.Unlock() return } }
func (this *Graph) Query(param cmodel.GraphQueryParam, resp *cmodel.GraphQueryResponse) error { var ( datas []*cmodel.RRDData datas_size int ) // statistics proc.GraphQueryCnt.Incr() cfg := g.Config() // form empty response resp.Values = []*cmodel.RRDData{} resp.Endpoint = param.Endpoint resp.Counter = param.Counter dsType, step, exists := index.GetTypeAndStep(param.Endpoint, param.Counter) // complete dsType and step qstep := step if param.Step != 0 { qstep = param.Step } if !exists { return nil } resp.DsType = dsType resp.Step = qstep start_ts := param.Start - param.Start%int64(step) end_ts := param.End - param.End%int64(step) + int64(step) if end_ts-start_ts-int64(step) < 1 { return nil } md5 := cutils.Md5(param.Endpoint + "/" + param.Counter) key := g.FormRrdCacheKey(md5, dsType, step) filename := g.RrdFileName(cfg.RRD.Storage, md5, dsType, step) // read cached items items, flag := store.GraphItems.FetchAll(key) items_size := len(items) if cfg.Migrate.Enabled && flag&g.GRAPH_F_MISS != 0 { node, _ := rrdtool.Consistent.Get(param.Endpoint + "/" + param.Counter) done := make(chan error, 1) res := &cmodel.GraphAccurateQueryResponse{} rrdtool.Net_task_ch[node] <- &rrdtool.Net_task_t{ Method: rrdtool.NET_TASK_M_QUERY, Done: done, Args: param, Reply: res, } <-done // fetch data from remote datas = res.Values datas_size = len(datas) } else { // read data from rrd file datas, _ = rrdtool.Fetch(filename, param.ConsolFun, start_ts, end_ts, qstep) datas_size = len(datas) } nowTs := time.Now().Unix() lastUpTs := nowTs - nowTs%int64(step) rra1StartTs := lastUpTs - int64(rrdtool.RRA1PointCnt*step) // consolidated, do not merge if start_ts < rra1StartTs { resp.Values = datas goto _RETURN_OK } // no cached items, do not merge if items_size < 1 { resp.Values = datas goto _RETURN_OK } // merge { // fmt cached items var val cmodel.JsonFloat cache := make([]*cmodel.RRDData, 0) ts := items[0].Timestamp itemEndTs := items[items_size-1].Timestamp itemIdx := 0 if dsType == g.DERIVE || dsType == g.COUNTER { for ts < itemEndTs { if itemIdx < items_size-1 && ts == items[itemIdx].Timestamp && ts == items[itemIdx+1].Timestamp-int64(step) { val = cmodel.JsonFloat(items[itemIdx+1].Value-items[itemIdx].Value) / cmodel.JsonFloat(step) if val < 0 { val = cmodel.JsonFloat(math.NaN()) } itemIdx++ } else { // missing val = cmodel.JsonFloat(math.NaN()) } if ts >= start_ts && ts <= end_ts { cache = append(cache, &cmodel.RRDData{Timestamp: ts, Value: val}) } ts = ts + int64(step) } } else if dsType == g.GAUGE { for ts <= itemEndTs { if itemIdx < items_size && ts == items[itemIdx].Timestamp { val = cmodel.JsonFloat(items[itemIdx].Value) itemIdx++ } else { // missing val = cmodel.JsonFloat(math.NaN()) } if ts >= start_ts && ts <= end_ts { cache = append(cache, &cmodel.RRDData{Timestamp: ts, Value: val}) } ts = ts + int64(step) } } cache_size := len(cache) // do merging merged := make([]*cmodel.RRDData, 0) if datas_size > 0 { for _, val := range datas { if val.Timestamp >= start_ts && val.Timestamp <= end_ts { merged = append(merged, val) //rrdtool返回的数据,时间戳是连续的、不会有跳点的情况 } } } if cache_size > 0 { rrdDataSize := len(merged) lastTs := cache[0].Timestamp // find junction rrdDataIdx := 0 for rrdDataIdx = rrdDataSize - 1; rrdDataIdx >= 0; rrdDataIdx-- { if merged[rrdDataIdx].Timestamp < cache[0].Timestamp { lastTs = merged[rrdDataIdx].Timestamp break } } // fix missing for ts := lastTs + int64(step); ts < cache[0].Timestamp; ts += int64(step) { merged = append(merged, &cmodel.RRDData{Timestamp: ts, Value: cmodel.JsonFloat(math.NaN())}) } // merge cached items to result rrdDataIdx += 1 for cacheIdx := 0; cacheIdx < cache_size; cacheIdx++ { if rrdDataIdx < rrdDataSize { if !math.IsNaN(float64(cache[cacheIdx].Value)) { merged[rrdDataIdx] = cache[cacheIdx] } } else { merged = append(merged, cache[cacheIdx]) } rrdDataIdx++ } } mergedSize := len(merged) // fmt result ret_size := int((end_ts - start_ts) / int64(step)) ret := make([]*cmodel.RRDData, ret_size, ret_size) mergedIdx := 0 ts = start_ts for i := 0; i < ret_size; i++ { if mergedIdx < mergedSize && ts == merged[mergedIdx].Timestamp { ret[i] = merged[mergedIdx] mergedIdx++ } else { ret[i] = &cmodel.RRDData{Timestamp: ts, Value: cmodel.JsonFloat(math.NaN())} } ts += int64(step) } resp.Values = ret } _RETURN_OK: // statistics proc.GraphQueryItemCnt.IncrBy(int64(len(resp.Values))) return nil }