Beispiel #1
0
func (s *testAgentSuite) testProxy(c *C) {
	agent := s.agentProxy
	proxyAddr := "127.0.0.1:19000"
	proxyHTTPAddr := "127.0.0.1:29000"

	args := make(url.Values)
	args.Set("addr", proxyAddr)
	args.Set("http_addr", proxyHTTPAddr)

	agent.httpCall(c, nil, "start_proxy", args.Encode(), "POST")

	// now the proxy will wait 3s for online, this is very long for test
	// maybe later we will change it.
	var err error
	for i := 0; i < 3; i++ {
		time.Sleep(2 * time.Second)
		if err = utils.Ping(proxyAddr, ""); err == nil {
			break
		}
	}
	c.Assert(err, IsNil)

	// kill proxy and then wait for restart
	s.testKillAllProcs(c, agent)

	for i := 0; i < 3; i++ {
		time.Sleep(2 * time.Second)
		if err = utils.Ping(proxyAddr, ""); err == nil {
			break
		}
	}
	c.Assert(err, IsNil)
}
Beispiel #2
0
func (s *testAgentSuite) testStore(c *C, agent testAgentInfo, port int) {
	addr := fmt.Sprintf("127.0.0.1:%d", port)
	agent.httpCall(c, nil, "start_redis", fmt.Sprintf("addr=%s", url.QueryEscape(addr)), "POST")

	err := utils.Ping(addr, globalEnv.StoreAuth())
	c.Assert(err, IsNil)

	// kill store and then wait 2s for restart
	s.testKillAllProcs(c, agent)

	time.Sleep(2 * time.Second)

	err = utils.Ping(addr, globalEnv.StoreAuth())
	c.Assert(err, IsNil)
}
Beispiel #3
0
// /check_store?addr=addr
func apiCheckStore(w http.ResponseWriter, r *http.Request) {
	addr := r.FormValue("addr")

	err := utils.Ping(addr, globalEnv.StoreAuth())
	if err != nil {
		respError(w, http.StatusServiceUnavailable, err.Error())
		return
	}

	w.WriteHeader(http.StatusOK)
	return
}
Beispiel #4
0
func (t *haTask) checkGroupServer(s *models.Server, ch chan<- interface{}) {
	// we don't check offline server
	/*if s.Type == models.SERVER_TYPE_OFFLINE {
		ch <- nil
		return
	}*/

	var err error
	for i := 0; i < haMaxRetryNum; i++ {
		if err = utils.Ping(s.Addr, globalEnv.StoreAuth()); err == nil {
			break
		}

		err = errors.Trace(err)
		time.Sleep(time.Duration(haRetryDelay) * time.Second)
	}

	if err == nil && s.Type == models.SERVER_TYPE_OFFLINE {
		ch <- s
		return
	}

	if err == nil && s.Type != models.SERVER_TYPE_OFFLINE {
		ch <- nil
		return
	}

	// here means we cannot ping server ok, so we think it is down
	// let other help use to check
	log.Infof("leader check server %s in group %d err %v, let other agents help check", s.Addr, s.GroupId, err)

	// get all agents
	agents, err := getAgents()
	if err != nil {
		log.Errorf("get agents err %v", err)
		ch <- errors.Trace(err)
		return
	}

	reply := make([]interface{}, len(agents))

	var wg sync.WaitGroup
	for i, agent := range agents {
		if agent.ID == agentID {
			// ignore itself
			reply[i] = nil
			continue
		}

		wg.Add(1)
		go func(i int) {
			defer wg.Done()

			resp, err := http.Get(fmt.Sprintf("http://%s/api/check_store?addr=%s", agent.Addr, s.Addr))
			if err != nil {
				reply[i] = errors.Trace(err)
				return
			}
			defer resp.Body.Close()
			if _, err = ioutil.ReadAll(resp.Body); err != nil {
				reply[i] = errors.Trace(err)
				return
			}

			reply[i] = int(resp.StatusCode)
		}(i)
	}

	wg.Wait()

	for i, r := range reply {
		switch v := r.(type) {
		case nil:
			// itself, ignore
		case error:
			log.Errorf("let agent %s check %s err %v", agents[i].Addr, s.Addr, v)
			err = errors.Trace(v)
		case int:
			if v == http.StatusOK {
				log.Infof("agent %s check %s ok, maybe it is alive", agents[i].Addr, s.Addr)
				ch <- nil
				return
			}
			// here mean agent check server failed
		}
	}

	if err != nil {
		// here mean let some agent check err, maybe we cann't connect the agent
		// so return error to retry again
		ch <- errors.Trace(err)
		return
	}

	// if all nodes check the store server is down, we will think it is down
	log.Infof("all agents check server %s is down", s.Addr)
	ch <- s
}
Beispiel #5
0
func (t *MigrateTask) migrateSingleSlot(slotId int, to int) error {
	// set slot status
	s, err := models.GetSlot(t.coordConn, t.productName, slotId)
	if err != nil {
		log.Error(err)
		return errors.Trace(err)
	}
	if s.State.Status != models.SLOT_STATUS_ONLINE && s.State.Status != models.SLOT_STATUS_MIGRATE {
		log.Warning("status is not online && migrate", s)
		return nil
	}

	from := s.GroupId
	if s.State.Status == models.SLOT_STATUS_MIGRATE {
		from = s.State.MigrateStatus.From
	}

	// cannot migrate to itself, just ignore
	if from == to {
		log.Warning("from == to, ignore", s)
		return nil
	}

	// make sure from group & target group exists
	exists, err := models.GroupExists(t.coordConn, t.productName, from)
	if err != nil {
		return errors.Trace(err)
	}
	if !exists {
		log.Errorf("src group %d not exist when migrate from %d to %d", from, from, to)
		return errors.NotFoundf("group %d", from)
	}

	exists, err = models.GroupExists(t.coordConn, t.productName, to)
	if err != nil {
		return errors.Trace(err)
	}
	if !exists {
		return errors.NotFoundf("group %d", to)
	}

	/***************************************************************/
	// make sure to group has at least one master server ---zjp 20160711

	log.Warning("Get group %d has no master master not found")
	groupTo, err := models.GetGroup(t.coordConn, t.productName, to)
	if err != nil {
		log.Warning(err)
		return errors.Trace(err)
	}
	log.Warning("Get to master ")
	toMaster, err := groupTo.Master(t.coordConn)
	if err != nil {
		log.Warning(err)
		return errors.Trace(err)
	}

	if toMaster == nil {
		log.Warning("to master not found")
		return errors.NotFoundf("group %d has no master", to)
	}
	log.Warning("Get to master  != nil")

	var perr error
	for i := 0; i < haMaxRetryNum; i++ {
		if perr = utils.Ping(toMaster.Addr, globalEnv.StoreAuth()); perr == nil {
			break
		}

		perr = errors.Trace(perr)
		time.Sleep(time.Duration(haRetryDelay) * time.Second)
	}

	if perr != nil {
		log.Warning(perr)
		log.Warning("To master is not online")
		return errors.Trace(perr)
	}
	groupFrom, err := models.GetGroup(t.coordConn, t.productName, from)
	if err != nil {
		log.Warning(err)
		return errors.Trace(err)
	}
	log.Warning("Get from master ")
	fromMaster, err := groupFrom.Master(t.coordConn)
	if err != nil {
		log.Warning(err)
		return errors.Trace(err)
	}

	if fromMaster == nil {
		log.Warning(" from master not found")
		return errors.NotFoundf("group %d has no master", to)
	}
	log.Warning("Get from master  != nil")

	var Fmerr error
	for i := 0; i < haMaxRetryNum; i++ {
		if Fmerr = utils.Ping(fromMaster.Addr, globalEnv.StoreAuth()); Fmerr == nil {
			break
		}

		Fmerr = errors.Trace(Fmerr)
		time.Sleep(time.Duration(haRetryDelay) * time.Second)
	}

	if Fmerr != nil {
		log.Warning(Fmerr)
		log.Warning("From master is not online")
		return errors.Trace(Fmerr)
	}
	groupFrom, gerr := models.GetGroup(t.coordConn, t.productName, from)
	if gerr != nil {
		log.Warning(gerr)
		return errors.Trace(gerr)
	}

	/***************************************************************/

	// modify slot status
	if err := s.SetMigrateStatus(t.coordConn, from, to); err != nil {
		log.Error(err)
		return errors.Trace(err)
	}

	err = t.slotMigrator.Migrate(s, from, to, t, func(p SlotMigrateProgress) {
		// on migrate slot progress
		if p.Remain%500 == 0 {
			log.Info(p)
		}
	})
	if err != nil {
		log.Error(err)
		return errors.Trace(err)
	}

	// migrate done, change slot status back
	s.State.Status = models.SLOT_STATUS_ONLINE
	s.State.MigrateStatus.From = models.INVALID_ID
	s.State.MigrateStatus.To = models.INVALID_ID
	if err := s.Update(t.coordConn); err != nil {
		log.Error(err)
		return errors.Trace(err)
	}

	return nil
}