func main() { initEnv() log.Printf("[GUARDKEEPER]Now begin to start guardkeeper...") if *MONITOR_INTERVAL <= 0 { log.Println("[GUARDKEEPER]the param: monitorInterval is less than 0!") os.Exit(2) } for { if !common.FileExists(signPath, signFile) { log.Println("[GUARDKEEPER]sign file not found yet, path: " + signPath + ", filename: " + signFile + " please wait...") time.Sleep(time.Duration(*MONITOR_INTERVAL) * time.Second) continue } log.Println("[GUARDKEEPER]found the sign file!") break } // hostIp := common.GetHostIp() hostName := common.GetHostName() realIp := common.GetShell("ifconfig eth0 | grep \"inet addr\" | awk '{print $2}' | awk -F: '{print $2}'") // 选用--net=bridge方式的话,则进行ip替换。 // commandCore := fmt.Sprintf("s/%s\t%s/%s\t%s/g", hostIp, hostName, realIp, hostName) // common.GetShell("sed \"" + commandCore + "\" /etc/hosts > out.tmp && cat out.tmp > /etc/hosts && rm -f out.tmp") // 选用--net=none方式的话,那么/etc/hosts里面不会有ip\tport信息。此时直接往里面插就行。 common.GetShell(fmt.Sprintf("echo \"%s\t%s\" >> /etc/hosts", realIp, hostName)) log.Println("[GUARDKEEPER]now begin to execute supervisord!") log.Fatalln("[GUARDKEEPER]Error...", common.GetShell("/usr/bin/supervisord")) }
func getHostname(id string, simpleCut bool) string { if !simpleCut { return common.GetShell("docker inspect -f {{.Config.Hostname}} " + id) } else { return id[:12] // 这边考虑到了destroy的话就没有hostname了,这时候就通过id进行分割获取。 } }
func startProcessor(msg *dockerapi.APIEvents, shortId string) { log.Println("now found a START event!") cnt, _ := strconv.Atoi(common.GetShell("docker inspect " + shortId + " | grep SERVICE_NAME | wc -l")) if cnt == 0 { return } log.Println("Step1: begin to process container info restore") storeContainerInfo(shortId) log.Println("Step2: begin to process the container network") ip := getIp(shortId) fmt.Println("IP is: " + ip) common.GetShell("sh " + SCRIPT_HOME + START_SCRIPT + " " + shortId + " " + ip) log.Println("Step3: begin to process log collection") // TODO 处理logstash、kafka topic相关脚本 }
func main() { flag.Parse() if os.Getenv("DOCKER_HOST") == "" { os.Setenv("DOCKER_HOST", "unix:///var/run/docker.sock") } common.GetShell("mkdir -p " + SCRIPT_HOME) common.GetShell("wget " + *ARGS_SCRIPT_WGET_HOME + START_SCRIPT + " && mv " + START_SCRIPT + " " + SCRIPT_HOME) common.GetShell("wget " + *ARGS_SCRIPT_WGET_HOME + STOP_SCRIPT + " && mv " + STOP_SCRIPT + " " + SCRIPT_HOME) common.GetShell("wget " + *ARGS_SCRIPT_WGET_HOME + DELETE_SCRIPT + " && mv " + DELETE_SCRIPT + " " + SCRIPT_HOME) dockerClient, _ = dockerapi.NewClientFromEnv() events := make(chan *dockerapi.APIEvents) assert(dockerClient.AddEventListener(events)) log.Println("Listening for Docker events ...") consulClient, _ = consulApi.NewClient(defaultConfig()) for msg := range events { log.Printf("get docker event: %s now... \n", msg) switch msg.Status { case "start": go startProcessor(msg, getHostname(msg.ID, true)) case "die": go dieProcessor(msg, getHostname(msg.ID, true)) case "destroy": go destroyProcessor(msg, getHostname(msg.ID, true)) } } log.Fatalln("Docker listener closed!") // TODO 需要一个守护进程。定时扫有没有忘了删除的服务 // 此处的逻辑为: 扫所有consul注册的服务,获取虚拟IP,进一步扫本机所有的容器, // 去进行一一比对。如果本机容器Name带有"mesos"字样,ip在上面存活的,且PID=0的, // 则认为服务已经停止,此时删除consul上的节点数据。 // ------ 以上为删除已被停止的容器服务注册信息 ------- // 每个宿主机上各自定时扫描汇报给cloud-server自己机器上的所有存活节点。 // 由cloud-server抓取consul-server进行比对。取出不存在列表中的consul服务信息, // 再等待下一次比对。如果下一次比对仍然没有,则判定为不存在。此时进行删除。 // ------ 以上为删除已不存在的容器服务注册信息 ------- }
func destroyProcessor(msg *dockerapi.APIEvents, shortId string) { log.Println("now found a DESTROY event! ") log.Println("Step1: begin to clear up all info abt this container") // 即使没有start和stop,去调用delete脚本也不会有问题。 common.GetShell("sh " + SCRIPT_HOME + DELETE_SCRIPT + " " + shortId) delIp(shortId) log.Println("Step2: begin to mask container info") maskContainerInfo(shortId) log.Println("Step3: begin to process log collection") // TODO }
func dieProcessor(msg *dockerapi.APIEvents, shortId string) { log.Println("now found a DIE event! ") log.Println("Step1: deregister") needDeregister := globalDeregister(msg, shortId) if !needDeregister { log.Println("No need to deregister. No Service Found") return } log.Println("Step2: begin to clear link") common.GetShell("sh " + SCRIPT_HOME + STOP_SCRIPT + " " + shortId) log.Println("Step3: begin to process log collection") }
func defaultConfig() *consulApi.Config { // 拿ip最后一位设置为1,即为宿主机ip。默认宿主机上必须有consul // Deprecated。采用OVS+none划分VLAN的方式重做二层网络。采用读取信号量文件来拿 // hostIp := serviceIp[:strings.LastIndex(serviceIp, ".")] + ".1:5000" hostIp := common.GetShell("cat "+signFile+" | head -n1") + ":8500" // hostIp := "10.14.5.14:8500" config := &consulApi.Config{ Address: hostIp, Scheme: "http", HttpClient: cleanhttp.DefaultClient(), } return config }