예제 #1
0
파일: handler.go 프로젝트: yiduoyunQ/sm
func isolate(w http.ResponseWriter, req *http.Request) {
	var steps []string
	vars := mux.Vars(req)
	n := vars["name"]
	log.WithFields(log.Fields{
		"RemoteAddr": req.RemoteAddr,
		"URL":        req.URL,
		"Name":       n,
	}).Info("handle `/isolate/{name}` start")
	var err error
	defer func() {
		log.WithFields(log.Fields{
			"RemoteAddr": req.RemoteAddr,
			"URL":        req.URL,
			"Name":       n,
		}).Info("handle `/isolate/{name}` end")

		if err != nil {
			w.Write([]byte(err.Error()))
		}

		if len(sm.steps) != 0 {
			s := ""
			for _, v := range steps {
				s = s + v
			}
			w.Write([]byte(s))
		}

		atomic.CompareAndSwapInt64(&sm.state, 2, 0)
	}()
	if !getLock(2) {
		w.WriteHeader(http.StatusNotModified)
		err = errors.New("http isolate get lock fail")
		return
	}

	sm.steps = []string{}

	var t string
	sm.rwMutex.RLock()
	_, ok := sm.proxys[n]
	if ok {
		t = consts.Type_Proxy
	} else {
		_, ok = sm.topology.DataNodeGroup["default"][n]
		if ok {
			status := sm.topology.DataNodeGroup["default"][n].Status
			if status == consts.Normal {
				t = consts.Type_Db
			}
		}
	}
	sm.rwMutex.RUnlock()

	if t == consts.Type_Proxy {
		err := sm.consul.PutVal(prefix+consts.ActionKey, []byte(consts.ActionIsolateProxyVal+","+n))
		if err != nil {
			w.WriteHeader(http.StatusInternalServerError)
			return
		}
		err = CloseProxy(n)
		if err != nil {
			w.WriteHeader(http.StatusInternalServerError)
			return
		}
		w.WriteHeader(http.StatusOK)
		sm.rwMutex.RLock()
		bproxys, _ := json.Marshal(sm.proxys)
		sm.rwMutex.RUnlock()
		err = sm.consul.PutVal(prefix+consts.ProxyKey, bproxys)
		if err != nil {
			log.WithFields(log.Fields{
				"key": prefix + consts.ProxyKey,
				"val": string(bproxys),
				"err": err.Error(),
			}).Error("push proxy to consul fail, continue")
		}
		err = presistent.ProxyPresist(string(bproxys))
		if err != nil {
			log.WithFields(log.Fields{
				"val": string(bproxys),
				"err": err.Error(),
			}).Error("proxy presist fail, continue")
		}
		err = sm.consul.PutVal(prefix+consts.ActionKey, []byte(""))
		if err != nil {
			log.WithFields(log.Fields{
				"key": prefix + consts.ActionKey,
				"val": "",
				"err": err.Error(),
			}).Error("push action key to consul fail, continue")
		}
	} else if t == consts.Type_Db {
		sm.rwMutex.RLock()
		status := sm.topology.DataNodeGroup["default"][n].Status
		sm.rwMutex.RUnlock()
		if status != consts.Normal {
			w.WriteHeader(http.StatusBadRequest)
			return
		}
		err := sm.consul.PutVal(prefix+consts.ActionKey, []byte(consts.ActionIsolateDbVal+","+n))
		if err != nil {
			w.WriteHeader(http.StatusInternalServerError)
			return
		}

		err, steps = ha.IsolateDB(n, sm.rwMutex, sm.topology, sm.swarm)
		sm.steps = steps
		if err != nil {
			w.WriteHeader(http.StatusInternalServerError)
			return
		}
		w.WriteHeader(http.StatusOK)
		sm.rwMutex.RLock()
		btopology, _ := json.Marshal(sm.topology)
		sm.rwMutex.RUnlock()
		err = sm.consul.PutVal(prefix+consts.TopologyKey, btopology)
		if err != nil {
			log.WithFields(log.Fields{
				"key": prefix + consts.TopologyKey,
				"val": string(btopology),
				"err": err.Error(),
			}).Error("push topology to consul fail, continue")
		}
		err = presistent.TopologyPresist(string(btopology))
		if err != nil {
			log.WithFields(log.Fields{
				"val": string(btopology),
				"err": err.Error(),
			}).Error("topology presist fail, continue")
		}
		err = sm.consul.PutVal(prefix+consts.ActionKey, []byte(""))
		if err != nil {
			log.WithFields(log.Fields{
				"key": prefix + consts.ActionKey,
				"val": "",
				"err": err.Error(),
			}).Error("push action key to consul fail, continue")
		}
	} else {
		w.WriteHeader(http.StatusBadRequest)
		err = errors.New(http.StatusText(http.StatusBadRequest))
	}
}
예제 #2
0
파일: sm.go 프로젝트: yiduoyunQ/sm
// no need lock
func NewSwitchManager(context *cli.Context) error {
	config, err := NewConfig(context)
	if err != nil {
		return err
	}
	sm = &switchManager{
		config:  config,
		rwMutex: new(sync.RWMutex),
		steps:   []string{},
		closed:  true,
	}

	if sm.config.Domain == "" {
		return errors.New("need Domain specify")
	}
	if sm.config.Name == "" {
		return errors.New("need Name specify")
	}
	if sm.config.Port == "" {
		return errors.New("need Port specify")
	}
	if sm.config.ProxyPort == "" {
		return errors.New("need ProxyPort specify")
	}

	prefix = sm.config.Domain + "/" + sm.config.Name + "/"

	cs, err := consul.NewConsul(context)
	if err != nil {
		sm.steps = append(sm.steps, fmt.Sprintf("Step1. New Consul fail:%s\n", err.Error()))
		return err
	}
	sm.steps = append(sm.steps, "Step1. New Consul success\n")
	sm.consul = cs
	swm, err := swarm.NewSwarm(context, cs)
	if err != nil {
		sm.steps = append(sm.steps, fmt.Sprintf("Step2. New Swarm fail:%s\n", err.Error()))
		return err
	}
	sm.steps = append(sm.steps, "Step2. New Swarm success\n")
	sm.swarm = swm

	// init from consul
	isInitalized, err := sm.consul.GetVal(prefix + consts.InitKey)
	if err == nil && isInitalized == "true" {
		log.Info("init from consul")
		err = initProxy()
		if err != nil {
			log.WithFields(log.Fields{
				"err:": err.Error(),
			}).Error("pull proxy from consul fail, return")
			sm.steps = append(sm.steps, fmt.Sprintf("Step3. Init from consul fail:%s\n", err.Error()))
			return err
		}
		err = initTopo()
		if err != nil {
			log.WithFields(log.Fields{
				"err:": err.Error(),
			}).Error("pull topology from consul fail, continue")
			sm.steps = append(sm.steps, fmt.Sprintf("Step3. Init from consul fail:%s\n", err.Error()))
			return err
		}
		err = initSwarm()
		if err != nil {
			log.WithFields(log.Fields{
				"err:": err.Error(),
			}).Error("pull swarm from consul fail, continue")
			sm.steps = append(sm.steps, fmt.Sprintf("Step3. Init from consul fail:%s\n", err.Error()))
			return err
		}
		sm.steps = append(sm.steps, "Step3. Init from consul success\n")
	} else {
		sm.steps = append(sm.steps, "Step3. No init from consul\n")
	}

	sm.closed = false
	sm.state = 0

	// recover action no need lock
	actionVal, err := sm.consul.GetVal(prefix + consts.ActionKey)
	if actionVal != "" {
		log.Info("recover action:%s\n", actionVal)
		split := strings.Split(actionVal, ",")
		actionVal := split[0]
		name := split[1]
		if actionVal == consts.ActionIsolateProxyVal {
			if sm.proxys[name].Status != consts.ProxyClose {
				err = CloseProxy(name)
				if err != nil {
					sm.steps = append(sm.steps, fmt.Sprintf("Step4. Init recover action %s fail:%s\n", actionVal, err.Error()))
					return err
				}
				bproxys, _ := json.Marshal(sm.proxys)
				err = sm.consul.PutVal(prefix+consts.ProxyKey, bproxys)
				if err != nil {
					log.WithFields(log.Fields{
						"key": prefix + consts.ProxyKey,
						"val": string(bproxys),
						"err": err.Error(),
					}).Error("push proxy to consul fail, continue")
				}
				err = presistent.ProxyPresist(string(bproxys))
				if err != nil {
					log.WithFields(log.Fields{
						"val": string(bproxys),
						"err": err.Error(),
					}).Error("proxy presist fail, continue")
				}
			}
			err := sm.consul.PutVal(prefix+consts.ActionKey, []byte(""))
			if err != nil {
				log.WithFields(log.Fields{
					"key": prefix + consts.ActionKey,
					"val": "",
					"err": err.Error(),
				}).Error("push action key to consul fail, continue")
			}
			sm.steps = append(sm.steps, "Step4. Init recover action %s success\n")
		} else if actionVal == consts.ActionIsolateDbVal {
			if sm.topology.DataNodeGroup["default"][name].Status != consts.Abnormal {
				err, _ = ha.IsolateDB(name, sm.rwMutex, sm.topology, sm.swarm)
				if err != nil {
					sm.steps = append(sm.steps, fmt.Sprintf("Step4. Init recover action %s fail:%s\n", actionVal, err.Error()))
					return err
				}
				btopology, _ := json.Marshal(sm.topology)
				err = sm.consul.PutVal(prefix+consts.TopologyKey, btopology)
				if err != nil {
					log.WithFields(log.Fields{
						"key": prefix + consts.TopologyKey,
						"val": string(btopology),
						"err": err.Error(),
					}).Error("push topology to consul fail, continue")
				}
				err = presistent.TopologyPresist(string(btopology))
				if err != nil {
					log.WithFields(log.Fields{
						"val": string(btopology),
						"err": err.Error(),
					}).Error("topology presist fail, continue")
				}
			}
			err := sm.consul.PutVal(prefix+consts.ActionKey, []byte(""))
			if err != nil {
				log.WithFields(log.Fields{
					"key": prefix + consts.ActionKey,
					"val": "",
					"err": err.Error(),
				}).Error("push action key to consul fail, continue")
			}
			sm.steps = append(sm.steps, "Step4. Init recover action %s success\n")
		} else if actionVal == consts.ActionRecoverDbVal {
			if sm.topology.DataNodeGroup["default"][name].Status != consts.Normal {
				err, _ = ha.RecoverDb(name, sm.rwMutex, sm.topology, sm.swarm)
				if err != nil {
					sm.steps = append(sm.steps, fmt.Sprintf("Step4. Init recover action %s fail:%s\n", actionVal, err.Error()))
					return err
				}
				btopology, _ := json.Marshal(sm.topology)
				err = sm.consul.PutVal(prefix+consts.TopologyKey, btopology)
				if err != nil {
					log.WithFields(log.Fields{
						"key": prefix + consts.TopologyKey,
						"val": string(btopology),
						"err": err.Error(),
					}).Error("push topology to consul fail, continue")
				}
				err = presistent.TopologyPresist(string(btopology))
				if err != nil {
					log.WithFields(log.Fields{
						"val": string(btopology),
						"err": err.Error(),
					}).Error("topology presist fail, continue")
				}
			}
			err := sm.consul.PutVal(prefix+consts.ActionKey, []byte(""))
			if err != nil {
				log.WithFields(log.Fields{
					"key": prefix + consts.ActionKey,
					"val": "",
					"err": err.Error(),
				}).Error("push action key to consul fail, continue")
			}
			sm.steps = append(sm.steps, "Step4. Init recover action %s success\n")
		}
	} else {
		sm.steps = append(sm.steps, "Step4. No init recover action\n")
	}

	log.Info("sm init success")

	return nil
}
예제 #3
0
func HealthCheck() {
	m := cmap.New()

	for {
		if atomic.LoadInt64(&sm.state) != 0 {
			time.Sleep(sm.config.HealthCheckInterval)
			continue
		}

		sm.rwMutex.RLock()
		if sm.topology == nil {
			sm.rwMutex.RUnlock()
			time.Sleep(sm.config.HealthCheckInterval)
			continue
		}

		for k, v := range sm.topology.DataNodeGroup["default"] {
			serviceName := k
			dbName := k
			if v.Status == consts.Normal {
				go func() {
					health, err := sm.consul.HealthCheck(serviceName)
					if err != nil {
						log.WithFields(log.Fields{
							"dbName": dbName,
						}).Error("consul health check error, direct use swarm dbcheck")
						health, err = sm.swarm.DbHealthCheck(serviceName)
						if err != nil {
							log.WithFields(log.Fields{
								"dbName": dbName,
							}).Error("swarm DbHealthCheck error")
						}
					}
					if !health || err != nil {
						log.WithFields(log.Fields{
							"fail times": m.GetFail(dbName),
							"dbName":     dbName,
						}).Warn("health check fail")
						if ok := m.Fail(dbName); ok {
							return
						}
						log.Error("toggle health check auto isolate")
						if getLock(1) {
							sm.steps = []string{}
							// maybe already isolated at before goroute?
							// no need rwMutex?
							if sm.topology.DataNodeGroup["default"][dbName].Status != consts.Normal {
								log.WithFields(log.Fields{
									"dbName": dbName,
									"status": sm.topology.DataNodeGroup["default"][dbName].Status,
								}).Warn("db node already isolated")
								atomic.CompareAndSwapInt64(&sm.state, 1, 0)
								return
							}
							err, steps := ha.IsolateDB(dbName, sm.rwMutex, sm.topology, sm.swarm)
							sm.steps = steps
							atomic.CompareAndSwapInt64(&sm.state, 1, 0)
							if err != nil {
								log.WithFields(log.Fields{
									"dbName": dbName,
								}).Error("health check auto isolate fail")
							}
							sm.rwMutex.RLock()
							btopology, _ := json.Marshal(sm.topology)
							sm.rwMutex.RUnlock()
							err = sm.consul.PutVal(prefix+consts.TopologyKey, btopology)
							if err != nil {
								log.WithFields(log.Fields{
									"key": prefix + consts.TopologyKey,
									"val": string(btopology),
									"err": err.Error(),
								}).Error("push topology to consul fail, continue")
							}
							err = presistent.TopologyPresist(string(btopology))
							if err != nil {
								log.WithFields(log.Fields{
									"val": string(btopology),
									"err": err.Error(),
								}).Error("topology presist fail, continue")
							}
						} else {
							log.WithFields(log.Fields{
								"dbname": dbName,
							}).Error("health check auto isolate get lock fail")
						}
					} else {
						m.Success(dbName)
					}
				}()
			}
		}
		sm.rwMutex.RUnlock()

		time.Sleep(sm.config.HealthCheckInterval)
	}
}