func isolate(w http.ResponseWriter, req *http.Request) { var steps []string vars := mux.Vars(req) n := vars["name"] log.WithFields(log.Fields{ "RemoteAddr": req.RemoteAddr, "URL": req.URL, "Name": n, }).Info("handle `/isolate/{name}` start") var err error defer func() { log.WithFields(log.Fields{ "RemoteAddr": req.RemoteAddr, "URL": req.URL, "Name": n, }).Info("handle `/isolate/{name}` end") if err != nil { w.Write([]byte(err.Error())) } if len(sm.steps) != 0 { s := "" for _, v := range steps { s = s + v } w.Write([]byte(s)) } atomic.CompareAndSwapInt64(&sm.state, 2, 0) }() if !getLock(2) { w.WriteHeader(http.StatusNotModified) err = errors.New("http isolate get lock fail") return } sm.steps = []string{} var t string sm.rwMutex.RLock() _, ok := sm.proxys[n] if ok { t = consts.Type_Proxy } else { _, ok = sm.topology.DataNodeGroup["default"][n] if ok { status := sm.topology.DataNodeGroup["default"][n].Status if status == consts.Normal { t = consts.Type_Db } } } sm.rwMutex.RUnlock() if t == consts.Type_Proxy { err := sm.consul.PutVal(prefix+consts.ActionKey, []byte(consts.ActionIsolateProxyVal+","+n)) if err != nil { w.WriteHeader(http.StatusInternalServerError) return } err = CloseProxy(n) if err != nil { w.WriteHeader(http.StatusInternalServerError) return } w.WriteHeader(http.StatusOK) sm.rwMutex.RLock() bproxys, _ := json.Marshal(sm.proxys) sm.rwMutex.RUnlock() err = sm.consul.PutVal(prefix+consts.ProxyKey, bproxys) if err != nil { log.WithFields(log.Fields{ "key": prefix + consts.ProxyKey, "val": string(bproxys), "err": err.Error(), }).Error("push proxy to consul fail, continue") } err = presistent.ProxyPresist(string(bproxys)) if err != nil { log.WithFields(log.Fields{ "val": string(bproxys), "err": err.Error(), }).Error("proxy presist fail, continue") } err = sm.consul.PutVal(prefix+consts.ActionKey, []byte("")) if err != nil { log.WithFields(log.Fields{ "key": prefix + consts.ActionKey, "val": "", "err": err.Error(), }).Error("push action key to consul fail, continue") } } else if t == consts.Type_Db { sm.rwMutex.RLock() status := sm.topology.DataNodeGroup["default"][n].Status sm.rwMutex.RUnlock() if status != consts.Normal { w.WriteHeader(http.StatusBadRequest) return } err := sm.consul.PutVal(prefix+consts.ActionKey, []byte(consts.ActionIsolateDbVal+","+n)) if err != nil { w.WriteHeader(http.StatusInternalServerError) return } err, steps = ha.IsolateDB(n, sm.rwMutex, sm.topology, sm.swarm) sm.steps = steps if err != nil { w.WriteHeader(http.StatusInternalServerError) return } w.WriteHeader(http.StatusOK) sm.rwMutex.RLock() btopology, _ := json.Marshal(sm.topology) sm.rwMutex.RUnlock() err = sm.consul.PutVal(prefix+consts.TopologyKey, btopology) if err != nil { log.WithFields(log.Fields{ "key": prefix + consts.TopologyKey, "val": string(btopology), "err": err.Error(), }).Error("push topology to consul fail, continue") } err = presistent.TopologyPresist(string(btopology)) if err != nil { log.WithFields(log.Fields{ "val": string(btopology), "err": err.Error(), }).Error("topology presist fail, continue") } err = sm.consul.PutVal(prefix+consts.ActionKey, []byte("")) if err != nil { log.WithFields(log.Fields{ "key": prefix + consts.ActionKey, "val": "", "err": err.Error(), }).Error("push action key to consul fail, continue") } } else { w.WriteHeader(http.StatusBadRequest) err = errors.New(http.StatusText(http.StatusBadRequest)) } }
// no need lock func NewSwitchManager(context *cli.Context) error { config, err := NewConfig(context) if err != nil { return err } sm = &switchManager{ config: config, rwMutex: new(sync.RWMutex), steps: []string{}, closed: true, } if sm.config.Domain == "" { return errors.New("need Domain specify") } if sm.config.Name == "" { return errors.New("need Name specify") } if sm.config.Port == "" { return errors.New("need Port specify") } if sm.config.ProxyPort == "" { return errors.New("need ProxyPort specify") } prefix = sm.config.Domain + "/" + sm.config.Name + "/" cs, err := consul.NewConsul(context) if err != nil { sm.steps = append(sm.steps, fmt.Sprintf("Step1. New Consul fail:%s\n", err.Error())) return err } sm.steps = append(sm.steps, "Step1. New Consul success\n") sm.consul = cs swm, err := swarm.NewSwarm(context, cs) if err != nil { sm.steps = append(sm.steps, fmt.Sprintf("Step2. New Swarm fail:%s\n", err.Error())) return err } sm.steps = append(sm.steps, "Step2. New Swarm success\n") sm.swarm = swm // init from consul isInitalized, err := sm.consul.GetVal(prefix + consts.InitKey) if err == nil && isInitalized == "true" { log.Info("init from consul") err = initProxy() if err != nil { log.WithFields(log.Fields{ "err:": err.Error(), }).Error("pull proxy from consul fail, return") sm.steps = append(sm.steps, fmt.Sprintf("Step3. Init from consul fail:%s\n", err.Error())) return err } err = initTopo() if err != nil { log.WithFields(log.Fields{ "err:": err.Error(), }).Error("pull topology from consul fail, continue") sm.steps = append(sm.steps, fmt.Sprintf("Step3. Init from consul fail:%s\n", err.Error())) return err } err = initSwarm() if err != nil { log.WithFields(log.Fields{ "err:": err.Error(), }).Error("pull swarm from consul fail, continue") sm.steps = append(sm.steps, fmt.Sprintf("Step3. Init from consul fail:%s\n", err.Error())) return err } sm.steps = append(sm.steps, "Step3. Init from consul success\n") } else { sm.steps = append(sm.steps, "Step3. No init from consul\n") } sm.closed = false sm.state = 0 // recover action no need lock actionVal, err := sm.consul.GetVal(prefix + consts.ActionKey) if actionVal != "" { log.Info("recover action:%s\n", actionVal) split := strings.Split(actionVal, ",") actionVal := split[0] name := split[1] if actionVal == consts.ActionIsolateProxyVal { if sm.proxys[name].Status != consts.ProxyClose { err = CloseProxy(name) if err != nil { sm.steps = append(sm.steps, fmt.Sprintf("Step4. Init recover action %s fail:%s\n", actionVal, err.Error())) return err } bproxys, _ := json.Marshal(sm.proxys) err = sm.consul.PutVal(prefix+consts.ProxyKey, bproxys) if err != nil { log.WithFields(log.Fields{ "key": prefix + consts.ProxyKey, "val": string(bproxys), "err": err.Error(), }).Error("push proxy to consul fail, continue") } err = presistent.ProxyPresist(string(bproxys)) if err != nil { log.WithFields(log.Fields{ "val": string(bproxys), "err": err.Error(), }).Error("proxy presist fail, continue") } } err := sm.consul.PutVal(prefix+consts.ActionKey, []byte("")) if err != nil { log.WithFields(log.Fields{ "key": prefix + consts.ActionKey, "val": "", "err": err.Error(), }).Error("push action key to consul fail, continue") } sm.steps = append(sm.steps, "Step4. Init recover action %s success\n") } else if actionVal == consts.ActionIsolateDbVal { if sm.topology.DataNodeGroup["default"][name].Status != consts.Abnormal { err, _ = ha.IsolateDB(name, sm.rwMutex, sm.topology, sm.swarm) if err != nil { sm.steps = append(sm.steps, fmt.Sprintf("Step4. Init recover action %s fail:%s\n", actionVal, err.Error())) return err } btopology, _ := json.Marshal(sm.topology) err = sm.consul.PutVal(prefix+consts.TopologyKey, btopology) if err != nil { log.WithFields(log.Fields{ "key": prefix + consts.TopologyKey, "val": string(btopology), "err": err.Error(), }).Error("push topology to consul fail, continue") } err = presistent.TopologyPresist(string(btopology)) if err != nil { log.WithFields(log.Fields{ "val": string(btopology), "err": err.Error(), }).Error("topology presist fail, continue") } } err := sm.consul.PutVal(prefix+consts.ActionKey, []byte("")) if err != nil { log.WithFields(log.Fields{ "key": prefix + consts.ActionKey, "val": "", "err": err.Error(), }).Error("push action key to consul fail, continue") } sm.steps = append(sm.steps, "Step4. Init recover action %s success\n") } else if actionVal == consts.ActionRecoverDbVal { if sm.topology.DataNodeGroup["default"][name].Status != consts.Normal { err, _ = ha.RecoverDb(name, sm.rwMutex, sm.topology, sm.swarm) if err != nil { sm.steps = append(sm.steps, fmt.Sprintf("Step4. Init recover action %s fail:%s\n", actionVal, err.Error())) return err } btopology, _ := json.Marshal(sm.topology) err = sm.consul.PutVal(prefix+consts.TopologyKey, btopology) if err != nil { log.WithFields(log.Fields{ "key": prefix + consts.TopologyKey, "val": string(btopology), "err": err.Error(), }).Error("push topology to consul fail, continue") } err = presistent.TopologyPresist(string(btopology)) if err != nil { log.WithFields(log.Fields{ "val": string(btopology), "err": err.Error(), }).Error("topology presist fail, continue") } } err := sm.consul.PutVal(prefix+consts.ActionKey, []byte("")) if err != nil { log.WithFields(log.Fields{ "key": prefix + consts.ActionKey, "val": "", "err": err.Error(), }).Error("push action key to consul fail, continue") } sm.steps = append(sm.steps, "Step4. Init recover action %s success\n") } } else { sm.steps = append(sm.steps, "Step4. No init recover action\n") } log.Info("sm init success") return nil }
func HealthCheck() { m := cmap.New() for { if atomic.LoadInt64(&sm.state) != 0 { time.Sleep(sm.config.HealthCheckInterval) continue } sm.rwMutex.RLock() if sm.topology == nil { sm.rwMutex.RUnlock() time.Sleep(sm.config.HealthCheckInterval) continue } for k, v := range sm.topology.DataNodeGroup["default"] { serviceName := k dbName := k if v.Status == consts.Normal { go func() { health, err := sm.consul.HealthCheck(serviceName) if err != nil { log.WithFields(log.Fields{ "dbName": dbName, }).Error("consul health check error, direct use swarm dbcheck") health, err = sm.swarm.DbHealthCheck(serviceName) if err != nil { log.WithFields(log.Fields{ "dbName": dbName, }).Error("swarm DbHealthCheck error") } } if !health || err != nil { log.WithFields(log.Fields{ "fail times": m.GetFail(dbName), "dbName": dbName, }).Warn("health check fail") if ok := m.Fail(dbName); ok { return } log.Error("toggle health check auto isolate") if getLock(1) { sm.steps = []string{} // maybe already isolated at before goroute? // no need rwMutex? if sm.topology.DataNodeGroup["default"][dbName].Status != consts.Normal { log.WithFields(log.Fields{ "dbName": dbName, "status": sm.topology.DataNodeGroup["default"][dbName].Status, }).Warn("db node already isolated") atomic.CompareAndSwapInt64(&sm.state, 1, 0) return } err, steps := ha.IsolateDB(dbName, sm.rwMutex, sm.topology, sm.swarm) sm.steps = steps atomic.CompareAndSwapInt64(&sm.state, 1, 0) if err != nil { log.WithFields(log.Fields{ "dbName": dbName, }).Error("health check auto isolate fail") } sm.rwMutex.RLock() btopology, _ := json.Marshal(sm.topology) sm.rwMutex.RUnlock() err = sm.consul.PutVal(prefix+consts.TopologyKey, btopology) if err != nil { log.WithFields(log.Fields{ "key": prefix + consts.TopologyKey, "val": string(btopology), "err": err.Error(), }).Error("push topology to consul fail, continue") } err = presistent.TopologyPresist(string(btopology)) if err != nil { log.WithFields(log.Fields{ "val": string(btopology), "err": err.Error(), }).Error("topology presist fail, continue") } } else { log.WithFields(log.Fields{ "dbname": dbName, }).Error("health check auto isolate get lock fail") } } else { m.Success(dbName) } }() } } sm.rwMutex.RUnlock() time.Sleep(sm.config.HealthCheckInterval) } }