func (self *EruApp) Report() { defer self.Client.Close() defer logs.Info(self.Name, self.EntryPoint, self.ID[:12], "metrics report stop") logs.Info(self.Name, self.EntryPoint, self.ID[:12], "metrics report start") for { select { case now := <-time.Tick(self.Step): go func() { info, upOk := self.updateStats() if isLimit { limitChan <- SoftLimit{upOk, self.ID, info} } if !upOk { logs.Info("Update mertic failed", self.Meta.ID[:12]) return } rate := self.calcRate(info, now) self.saveLast(info) // for safe go self.send(rate) }() case <-self.Stop: return } } }
func (self *EruApp) Report() { t := time.NewTicker(self.Step) defer t.Stop() defer self.Client.Close() defer logs.Info(self.Name, self.EntryPoint, self.ID[:12], "metrics report stop") logs.Info(self.Name, self.EntryPoint, self.ID[:12], "metrics report start") for { select { case now := <-t.C: go func() { if info, err := self.UpdateStats(self.ID); err == nil { if isLimit { limitChan <- SoftLimit{self.ID, info} } rate := self.CalcRate(info, now) self.SaveLast(info) go self.Send(rate) } else { logs.Info("Update mertic failed", self.ID[:12]) } }() case <-self.Stop: return } } }
func SetDefaultRoute(cid, gateway string) bool { lock.Lock() defer lock.Unlock() logs.Info("Set", cid[:12], "default route", gateway) container, err := g.Docker.InspectContainer(cid) if err != nil { logs.Info("RouteSetter inspect docker failed", err) return false } pid := strconv.Itoa(container.State.Pid) cmd := exec.Command("nsenter", "-t", pid, "-n", "route", "del", "default") if err := cmd.Run(); err != nil { logs.Info("Clean default route failed", err) return false } cmd = exec.Command("nsenter", "-t", pid, "-n", "route", "add", "default", "gw", gateway) if err := cmd.Run(); err != nil { logs.Info("RouteSetter set default route failed", err) return false } logs.Info("Set default route success", cid[:12], gateway) return true }
func getContainerMeta(cid string) map[string]interface{} { conn := g.GetRedisConn() defer g.ReleaseRedisConn(conn) containersKey := fmt.Sprintf("eru:agent:%s:containers:meta", g.Config.HostName) rep, err := gore.NewCommand("HGET", containersKey, cid).Run(conn) if err != nil { logs.Info("Status get meta", err) return nil } var result map[string]interface{} if rep.IsNil() { return nil } if b, err := rep.Bytes(); err != nil { logs.Info("Status get meta", err) return nil } else { if err := json.Unmarshal(b, &result); err != nil { logs.Info("Status unmarshal meta", err) return nil } } return result }
func monitor() { for event := range events { switch event.Status { case common.STATUS_DIE: logs.Debug("Status", event.Status, event.ID[:12], event.From) app.Remove(event.ID) reportContainerDeath(event.ID) case common.STATUS_START: logs.Debug("Status", event.Status, event.ID[:12], event.From) // if not in watching list, just ignore it if meta := getContainerMeta(event.ID); meta != nil && !app.Valid(event.ID) { container, err := g.Docker.InspectContainer(event.ID) if err != nil { logs.Info("Status inspect docker failed", err) break } eruApp := app.NewEruApp(container, meta) if eruApp == nil { logs.Info("Create EruApp failed") break } lenz.Attacher.Attach(&eruApp.Meta) app.Add(eruApp) reportContainerCure(event.ID) } } } }
func routeWatcher() { conn := g.GetRedisConn() defer g.ReleaseRedisConn(conn) subs := gore.NewSubscriptions(conn) defer subs.Close() subKey := fmt.Sprintf("eru:agent:%s:route", g.Config.HostName) logs.Debug("API route subscribe", subKey) subs.Subscribe(subKey) for message := range subs.Message() { if message == nil { logs.Info("API route watcher shutdown") break } command := string(message.Message) logs.Debug("API route watcher get", command) parser := strings.Split(command, "|") if len(parser) != 2 { logs.Info("API route watcher command invaild", command) continue } cid, gateway := parser[0], parser[1] if !network.SetDefaultRoute(cid, gateway) { logs.Info("Set default route failed") } } }
func (self *EruApp) updateStats() bool { statsChan := make(chan *docker.Stats) opt := docker.StatsOptions{self.ID, statsChan, false, nil, time.Duration(2 * time.Second)} go func() { if err := g.Docker.Stats(opt); err != nil { logs.Info("Get Stats Failed", err) } }() stats := <-statsChan if stats == nil { return false } self.Info["cpu_user"] = stats.CPUStats.CPUUsage.UsageInUsermode self.Info["cpu_system"] = stats.CPUStats.CPUUsage.UsageInKernelmode self.Info["cpu_usage"] = stats.CPUStats.CPUUsage.TotalUsage //FIXME in container it will get all CPUStats // for seq, d := range stats.CPUStats.CPUUsage.PercpuUsage { // self.Info[fmt.Sprintf("cpu_%d", seq)] = d // } self.Info["mem_usage"] = stats.MemoryStats.Usage self.Info["mem_max_usage"] = stats.MemoryStats.MaxUsage self.Info["mem_rss"] = stats.MemoryStats.Stats.Rss network, err := GetNetStats(self.Exec) if err != nil { logs.Info(err) return false } for k, d := range network { self.Info[k] = d } return true }
// URL /api/container/add/ func addNewContainer(req *Request) (int, interface{}) { type Data struct { Control string `json:"control"` ContainerID string `json:"container_id"` Meta map[string]interface{} `json:"meta"` } data := &Data{} decoder := json.NewDecoder(req.Body) err := decoder.Decode(data) if err != nil { return http.StatusBadRequest, JSON{"message": "wrong JSON format"} } switch data.Control { case "+": if app.Valid(data.ContainerID) { break } logs.Info("API status watch", data.ContainerID) container, err := g.Docker.InspectContainer(data.ContainerID) if err != nil { logs.Info("API status inspect docker failed", err) break } if eruApp := app.NewEruApp(container.ID, container.Name, data.Meta); eruApp != nil { app.Add(eruApp) lenz.Attacher.Attach(&eruApp.Meta) } } return http.StatusOK, JSON{"message": "ok"} }
func AddRoute(cid, CIDR string, ifc string) bool { _, err := g.Docker.InspectContainer(cid) if err != nil { logs.Info("VLanSetter inspect docker failed", err) return false } logs.Info("Add route success", cid, CIDR, ifc) return true }
func AddVLan(vethName, ips, cid string) bool { _, err := g.Docker.InspectContainer(cid) if err != nil { logs.Info("VLanSetter inspect docker failed", err) return false } logs.Info("Add VLAN device success", cid, vethName) return true }
func SetDefaultRoute(cid, gateway string) bool { _, err := g.Docker.InspectContainer(cid) if err != nil { logs.Info("VLanSetter inspect docker failed", err) return false } logs.Info("Set default route success", cid, gateway) return true }
func load() { containers, err := g.Docker.ListContainers(docker.ListContainersOptions{All: true}) if err != nil { logs.Assert(err, "List containers") } conn := g.GetRedisConn() defer g.ReleaseRedisConn(conn) containersKey := fmt.Sprintf("eru:agent:%s:containers:meta", g.Config.HostName) logs.Debug("Status get targets from", containersKey) rep, err := gore.NewCommand("HGETALL", containersKey).Run(conn) if err != nil { logs.Assert(err, "Status get targets") } if rep.IsNil() { return } targets, err := rep.Map() if err != nil { logs.Assert(err, "Status load targets") } logs.Debug("Status targets:", targets) logs.Info("Status load container") for _, container := range containers { if _, ok := targets[container.ID]; !ok { continue } status := getStatus(container.Status) if status != common.STATUS_START { reportContainerDeath(container.ID) continue } var meta map[string]interface{} if err := json.Unmarshal([]byte(targets[container.ID]), &meta); err != nil { logs.Info("Status load failed", err) continue } c, err := g.Docker.InspectContainer(container.ID) if err != nil { logs.Info("Status inspect docker failed", err) continue } if eruApp := app.NewEruApp(c, meta); eruApp != nil { lenz.Attacher.Attach(&eruApp.Meta) app.Add(eruApp) reportContainerCure(container.ID) } } }
func Streamer(route *defines.Route, logstream chan *defines.Log, stdout bool) { var types map[string]struct{} var count int64 = 0 if route.Source != nil { types = make(map[string]struct{}) for _, t := range route.Source.Types { types[t] = struct{}{} } } for logline := range logstream { if types != nil { if _, ok := types[logline.Type]; !ok { continue } } logline.Tag = route.Target.AppendTag logline.Count = count switch stdout { case true: logs.Info("Debug Output", logline) default: for offset := 0; offset < route.Backends.Len(); offset++ { addr, err := route.Backends.Get(logline.Name, offset) if err != nil { logs.Info("Get backend failed", err, logline.Name, logline.Data) break } if _, ok := upstreams[addr]; !ok { if ups, err := NewUpStream(addr); err != nil || ups == nil { route.Backends.Remove(addr) continue } else { upstreams[addr] = ups } } if err := upstreams[addr].WriteData(logline); err != nil { upstreams[addr].Close() delete(upstreams, addr) continue } //logs.Debug("Lenz Send", logline.Name, logline.EntryPoint, logline.ID, "to", addr) break } } if count == math.MaxInt64 { count = 0 } else { count++ } } }
func CloseLenz() { logs.Info("Close all lenz streamer") routes, err := Router.GetAll() if err != nil { logs.Info("Get all lenz route failed", err) return } for _, route := range routes { if !Router.Remove(route.ID) { logs.Info("Close lenz route failed", route.ID) } } }
func softOOMKill(cid string, rate float64) { logs.Debug("OOM killed", cid[:12]) conn := g.GetRedisConn() defer g.ReleaseRedisConn(conn) key := fmt.Sprintf("eru:agent:%s:container:reason", cid) if _, err := gore.NewCommand("SET", key, common.OOM_KILLED).Run(conn); err != nil { logs.Info("OOM killed set flag", err) } if err := g.Docker.StopContainer(cid, 10); err != nil { logs.Info("OOM killed failed", cid[:12]) return } logs.Info("OOM killed success", cid[:12]) }
func (self *EruApp) InitMetric() bool { var err error if self.statFile, err = os.Open(fmt.Sprintf("/proc/%d/net/dev", self.Meta.Pid)); err != nil { logs.Info("Open net stats failed", self.Meta.ID[:12]) return false } info, upOk := self.updateStats() if !upOk { logs.Info("Init mertics failed", self.Meta.ID[:12]) return false } self.Last = time.Now() self.saveLast(info) return true }
func InitTransfers() { Transfers = consistent.New() for _, transfer := range Config.Metrics.Transfers { Transfers.Add(transfer) } logs.Info("Metrics initiated") }
func HTTPServe() { restfulAPIServer := pat.New() handlers := map[string]map[string]func(*Request) (int, interface{}){ "GET": { "/profile/": profile, "/version/": version, "/api/app/list/": listEruApps, }, "POST": { "/api/container/add/": addNewContainer, "/api/container/:container_id/addvlan/": addVlanForContainer, "/api/container/:container_id/setroute/": setRouteForContainer, }, } for method, routes := range handlers { for route, handler := range routes { restfulAPIServer.Add(method, route, http.HandlerFunc(JSONWrapper(handler))) } } http.Handle("/", restfulAPIServer) logs.Info("API http server start at", g.Config.API.Addr) err := http.ListenAndServe(g.Config.API.Addr, nil) if err != nil { logs.Assert(err, "ListenAndServe: ") } }
func (self *Metric) UpdateStats(cid string) (map[string]uint64, error) { info := map[string]uint64{} statsChan := make(chan *docker.Stats) doneChan := make(chan bool) opt := docker.StatsOptions{cid, statsChan, false, doneChan, g.timeout * time.Second} go func() { if err := g.client.Stats(opt); err != nil { logs.Info("Get stats failed", cid[:12], err) } }() var stats *docker.Stats = nil select { case stats = <-statsChan: if stats == nil { return info, errors.New("Get stats failed") } case <-time.After(g.force * time.Second): doneChan <- true return info, errors.New("Get stats timeout") } info["cpu_user"] = stats.CPUStats.CPUUsage.UsageInUsermode info["cpu_system"] = stats.CPUStats.CPUUsage.UsageInKernelmode info["cpu_usage"] = stats.CPUStats.CPUUsage.TotalUsage //FIXME in container it will get all CPUStats info["mem_usage"] = stats.MemoryStats.Usage info["mem_max_usage"] = stats.MemoryStats.MaxUsage info["mem_rss"] = stats.MemoryStats.Stats.Rss if err := self.getNetStats(info); err != nil { return info, err } return info, nil }
func Limit() { if g.Config.Limit.Memory != 0 { logs.Info("App memory soft limit start") isLimit = true go calcMemoryUsage() } }
func calcMemoryUsage() { for { select { case d := <-limitChan: if !d.flag { logs.Info("Get mem stats failed", d.cid) } if v, ok := d.info["mem_usage"]; ok { usage[d.cid] = v } else { usage[d.cid] = 0 } var doCalc bool = true for id, _ := range Apps { if _, ok := usage[id]; !ok { doCalc = false break } } if doCalc { judgeMemoryUsage() } } } }
func (self *UpStream) createUDPConn() error { self.scheme = "udp" udpAddr, err := net.ResolveUDPAddr("udp", self.addr) if err != nil { logs.Info("Resolve", self.addr, "failed", err) return err } conn, err := net.DialUDP("udp", nil, udpAddr) if err != nil { logs.Info("Connect backend failed", err) return err } self.udplog = conn self.Close = self.udplog.Close return nil }
func main() { g.LoadConfig() g.InitialConn() g.InitTransfers() defer g.CloseConn() lenz.InitLenz() status.InitStatus() network.InitVlan() utils.WritePid(g.Config.PidFile) defer os.Remove(g.Config.PidFile) api.Serve() status.Load() status.StartMonitor() health.Check() app.Limit() var c = make(chan os.Signal, 1) signal.Notify(c, os.Interrupt) signal.Notify(c, syscall.SIGTERM) signal.Notify(c, syscall.SIGHUP) signal.Notify(c, syscall.SIGKILL) signal.Notify(c, syscall.SIGQUIT) logs.Info("Eru Agent Catch", <-c) }
func MakeMockedWrapper(fptr interface{}) { var maker = func(in []reflect.Value) []reflect.Value { wrapper := in[0].Elem() client := in[1] wrapperType := wrapper.Type() for i := 1; i < wrapperType.NumField(); i++ { field := wrapper.Field(i) fd, ok := client.Type().MethodByName(wrapperType.Field(i).Name) if !ok { logs.Info("Reflect Failed") continue } fdt := fd.Type f := reflect.MakeFunc(field.Type(), func(in []reflect.Value) []reflect.Value { ret := make([]reflect.Value, 0, fdt.NumOut()) for i := 0; i < fdt.NumOut(); i++ { ret = append(ret, reflect.Zero(fdt.Out(i))) } return ret }) field.Set(f) } return []reflect.Value{in[0]} } fn := reflect.ValueOf(fptr).Elem() v := reflect.MakeFunc(fn.Type(), maker) fn.Set(v) }
func InitVlan() { Devices = consistent.New() for _, device := range g.Config.VLan.Physical { Devices.Add(device) } logs.Info("Vlan initiated") }
func Marshal(obj interface{}) []byte { bytes, err := json.MarshalIndent(obj, "", " ") if err != nil { logs.Info("Utils Marshal:", err) } return bytes }
func (this *SingleConnRpcClient) insureConn() error { if this.rpcClient != nil { return nil } var err error var retry int = 1 for { if this.rpcClient != nil { return nil } this.rpcClient, err = net.JsonRpcClient("tcp", this.RpcServer, this.Timeout) if err == nil { return nil } logs.Info("Metrics rpc dial fail", err) if retry > 5 { return err } time.Sleep(time.Duration(math.Pow(2.0, float64(retry))) * time.Second) retry++ } return nil }
func (this *SingleConnRpcClient) Call(method string, args interface{}, reply interface{}) error { this.Lock() defer this.Unlock() if err := this.insureConn(); err != nil { return err } timeout := time.Duration(50 * time.Second) done := make(chan error) go func() { err := this.rpcClient.Call(method, args, reply) done <- err }() select { case <-time.After(timeout): logs.Info("Metrics rpc call timeout", this.rpcClient, this.RpcServer) this.Close() case err := <-done: if err != nil { this.Close() return err } } return nil }
func NewEruApp(container *docker.Container, extend map[string]interface{}) *EruApp { name, entrypoint, ident := utils.GetAppInfo(container.Name) if name == "" { logs.Info("Container name invaild", container.Name) return nil } logs.Debug("Eru App", name, entrypoint, ident) transfer, _ := g.Transfers.Get(container.ID, 0) client := falcon.CreateFalconClient( transfer, time.Duration(g.Config.Metrics.Timeout)*time.Millisecond, ) step := time.Duration(g.Config.Metrics.Step) * time.Second extend["hostname"] = g.Config.HostName extend["cid"] = container.ID[:12] extend["ident"] = ident tag := []string{} for k, v := range extend { tag = append(tag, fmt.Sprintf("%s=%v", k, v)) } endpoint := fmt.Sprintf("%s-%s", name, entrypoint) meta := defines.Meta{container.ID, container.State.Pid, name, entrypoint, ident, extend} metric := metric.CreateMetric(step, client, strings.Join(tag, ","), endpoint) eruApp := &EruApp{meta, metric} return eruApp }
func NewEruApp(container *docker.Container, extend map[string]interface{}) *EruApp { name, entrypoint, ident := utils.GetAppInfo(container.Name) if name == "" { logs.Info("Container name invaild", container.Name) return nil } logs.Debug("Eru App", name, entrypoint, ident) transfer, _ := g.Transfers.Get(container.ID, 0) client := defines.SingleConnRpcClient{ RpcServer: transfer, Timeout: time.Duration(g.Config.Metrics.Timeout) * time.Millisecond, } step := time.Duration(g.Config.Metrics.Step) * time.Second extend["hostname"] = g.Config.HostName extend["cid"] = container.ID[:12] extend["ident"] = ident tag := []string{} for k, v := range extend { tag = append(tag, fmt.Sprintf("%s=%v", k, v)) } endpoint := fmt.Sprintf("%s-%s", name, entrypoint) eruApp := &EruApp{ defines.Meta{container.ID, container.State.Pid, name, entrypoint, ident, extend}, defines.Metric{Step: step, Client: client, Tag: strings.Join(tag, ","), Endpoint: endpoint}, nil, } eruApp.Stop = make(chan bool) return eruApp }