func (s *testAgentSuite) testProxy(c *C) { agent := s.agentProxy proxyAddr := "127.0.0.1:19000" proxyHTTPAddr := "127.0.0.1:29000" args := make(url.Values) args.Set("addr", proxyAddr) args.Set("http_addr", proxyHTTPAddr) agent.httpCall(c, nil, "start_proxy", args.Encode(), "POST") // now the proxy will wait 3s for online, this is very long for test // maybe later we will change it. var err error for i := 0; i < 3; i++ { time.Sleep(2 * time.Second) if err = utils.Ping(proxyAddr, ""); err == nil { break } } c.Assert(err, IsNil) // kill proxy and then wait for restart s.testKillAllProcs(c, agent) for i := 0; i < 3; i++ { time.Sleep(2 * time.Second) if err = utils.Ping(proxyAddr, ""); err == nil { break } } c.Assert(err, IsNil) }
func (s *testAgentSuite) testStore(c *C, agent testAgentInfo, port int) { addr := fmt.Sprintf("127.0.0.1:%d", port) agent.httpCall(c, nil, "start_redis", fmt.Sprintf("addr=%s", url.QueryEscape(addr)), "POST") err := utils.Ping(addr, globalEnv.StoreAuth()) c.Assert(err, IsNil) // kill store and then wait 2s for restart s.testKillAllProcs(c, agent) time.Sleep(2 * time.Second) err = utils.Ping(addr, globalEnv.StoreAuth()) c.Assert(err, IsNil) }
// /check_store?addr=addr func apiCheckStore(w http.ResponseWriter, r *http.Request) { addr := r.FormValue("addr") err := utils.Ping(addr, globalEnv.StoreAuth()) if err != nil { respError(w, http.StatusServiceUnavailable, err.Error()) return } w.WriteHeader(http.StatusOK) return }
func (t *haTask) checkGroupServer(s *models.Server, ch chan<- interface{}) { // we don't check offline server /*if s.Type == models.SERVER_TYPE_OFFLINE { ch <- nil return }*/ var err error for i := 0; i < haMaxRetryNum; i++ { if err = utils.Ping(s.Addr, globalEnv.StoreAuth()); err == nil { break } err = errors.Trace(err) time.Sleep(time.Duration(haRetryDelay) * time.Second) } if err == nil && s.Type == models.SERVER_TYPE_OFFLINE { ch <- s return } if err == nil && s.Type != models.SERVER_TYPE_OFFLINE { ch <- nil return } // here means we cannot ping server ok, so we think it is down // let other help use to check log.Infof("leader check server %s in group %d err %v, let other agents help check", s.Addr, s.GroupId, err) // get all agents agents, err := getAgents() if err != nil { log.Errorf("get agents err %v", err) ch <- errors.Trace(err) return } reply := make([]interface{}, len(agents)) var wg sync.WaitGroup for i, agent := range agents { if agent.ID == agentID { // ignore itself reply[i] = nil continue } wg.Add(1) go func(i int) { defer wg.Done() resp, err := http.Get(fmt.Sprintf("http://%s/api/check_store?addr=%s", agent.Addr, s.Addr)) if err != nil { reply[i] = errors.Trace(err) return } defer resp.Body.Close() if _, err = ioutil.ReadAll(resp.Body); err != nil { reply[i] = errors.Trace(err) return } reply[i] = int(resp.StatusCode) }(i) } wg.Wait() for i, r := range reply { switch v := r.(type) { case nil: // itself, ignore case error: log.Errorf("let agent %s check %s err %v", agents[i].Addr, s.Addr, v) err = errors.Trace(v) case int: if v == http.StatusOK { log.Infof("agent %s check %s ok, maybe it is alive", agents[i].Addr, s.Addr) ch <- nil return } // here mean agent check server failed } } if err != nil { // here mean let some agent check err, maybe we cann't connect the agent // so return error to retry again ch <- errors.Trace(err) return } // if all nodes check the store server is down, we will think it is down log.Infof("all agents check server %s is down", s.Addr) ch <- s }
func (t *MigrateTask) migrateSingleSlot(slotId int, to int) error { // set slot status s, err := models.GetSlot(t.coordConn, t.productName, slotId) if err != nil { log.Error(err) return errors.Trace(err) } if s.State.Status != models.SLOT_STATUS_ONLINE && s.State.Status != models.SLOT_STATUS_MIGRATE { log.Warning("status is not online && migrate", s) return nil } from := s.GroupId if s.State.Status == models.SLOT_STATUS_MIGRATE { from = s.State.MigrateStatus.From } // cannot migrate to itself, just ignore if from == to { log.Warning("from == to, ignore", s) return nil } // make sure from group & target group exists exists, err := models.GroupExists(t.coordConn, t.productName, from) if err != nil { return errors.Trace(err) } if !exists { log.Errorf("src group %d not exist when migrate from %d to %d", from, from, to) return errors.NotFoundf("group %d", from) } exists, err = models.GroupExists(t.coordConn, t.productName, to) if err != nil { return errors.Trace(err) } if !exists { return errors.NotFoundf("group %d", to) } /***************************************************************/ // make sure to group has at least one master server ---zjp 20160711 log.Warning("Get group %d has no master master not found") groupTo, err := models.GetGroup(t.coordConn, t.productName, to) if err != nil { log.Warning(err) return errors.Trace(err) } log.Warning("Get to master ") toMaster, err := groupTo.Master(t.coordConn) if err != nil { log.Warning(err) return errors.Trace(err) } if toMaster == nil { log.Warning("to master not found") return errors.NotFoundf("group %d has no master", to) } log.Warning("Get to master != nil") var perr error for i := 0; i < haMaxRetryNum; i++ { if perr = utils.Ping(toMaster.Addr, globalEnv.StoreAuth()); perr == nil { break } perr = errors.Trace(perr) time.Sleep(time.Duration(haRetryDelay) * time.Second) } if perr != nil { log.Warning(perr) log.Warning("To master is not online") return errors.Trace(perr) } groupFrom, err := models.GetGroup(t.coordConn, t.productName, from) if err != nil { log.Warning(err) return errors.Trace(err) } log.Warning("Get from master ") fromMaster, err := groupFrom.Master(t.coordConn) if err != nil { log.Warning(err) return errors.Trace(err) } if fromMaster == nil { log.Warning(" from master not found") return errors.NotFoundf("group %d has no master", to) } log.Warning("Get from master != nil") var Fmerr error for i := 0; i < haMaxRetryNum; i++ { if Fmerr = utils.Ping(fromMaster.Addr, globalEnv.StoreAuth()); Fmerr == nil { break } Fmerr = errors.Trace(Fmerr) time.Sleep(time.Duration(haRetryDelay) * time.Second) } if Fmerr != nil { log.Warning(Fmerr) log.Warning("From master is not online") return errors.Trace(Fmerr) } groupFrom, gerr := models.GetGroup(t.coordConn, t.productName, from) if gerr != nil { log.Warning(gerr) return errors.Trace(gerr) } /***************************************************************/ // modify slot status if err := s.SetMigrateStatus(t.coordConn, from, to); err != nil { log.Error(err) return errors.Trace(err) } err = t.slotMigrator.Migrate(s, from, to, t, func(p SlotMigrateProgress) { // on migrate slot progress if p.Remain%500 == 0 { log.Info(p) } }) if err != nil { log.Error(err) return errors.Trace(err) } // migrate done, change slot status back s.State.Status = models.SLOT_STATUS_ONLINE s.State.MigrateStatus.From = models.INVALID_ID s.State.MigrateStatus.To = models.INVALID_ID if err := s.Update(t.coordConn); err != nil { log.Error(err) return errors.Trace(err) } return nil }