Example #1
0
func (s *Sentinel) updateClusterView(cv *cluster.ClusterView, keepersState cluster.KeepersState) (*cluster.ClusterView, error) {
	var wantedMasterID string
	if cv.Master == "" {
		log.Debugf("trying to find initial master")
		// Check for an initial master
		if len(keepersState) < 1 {
			return nil, fmt.Errorf("cannot init cluster, no keepers registered")
		}
		if len(keepersState) > 1 {
			return nil, fmt.Errorf("cannot init cluster, more than 1 keeper registered")
		}
		for id, k := range keepersState {
			if k.PGState == nil {
				return nil, fmt.Errorf("cannot init cluster using keeper %q since its pg state is unknown", id)
			}
			if !k.PGState.Initialized {
				return nil, fmt.Errorf("cannot init cluster using keeper %q since pg instance is not initializied", id)
			}
			log.Infof("initializing cluster with master: %q", id)
			wantedMasterID = id
			break
		}
	} else {
		masterID := cv.Master
		wantedMasterID = masterID

		masterOK := true
		master, ok := keepersState[masterID]
		if !ok {
			return nil, fmt.Errorf("keeper state for master %q not available. This shouldn't happen!", masterID)
		}
		log.Debugf(spew.Sprintf("masterState: %#v", master))

		if !master.Healthy {
			log.Infof("master is failed")
			masterOK = false
		}

		// Check that the wanted master is in master state (i.e. check that promotion from standby to master happened)
		if !s.isKeeperConverged(master, cv) {
			log.Infof("keeper %s not yet master", masterID)
			masterOK = false
		}

		if !masterOK {
			log.Infof("trying to find a standby to replace failed master")
			bestStandby, err := s.GetBestStandby(cv, keepersState, masterID)
			if err != nil {
				log.Errorf("error trying to find the best standby: %v", err)
			} else {
				if bestStandby != masterID {
					log.Infof("electing new master: %q", bestStandby)
					wantedMasterID = bestStandby
				} else {
					log.Infof("cannot find a good standby to replace failed master")
				}
			}
		}
	}

	newCV := cv.Copy()
	newKeepersRole := newCV.KeepersRole

	// Add new keepersRole from keepersState
	for id, _ := range keepersState {
		if _, ok := newKeepersRole[id]; !ok {
			if err := newKeepersRole.Add(id, ""); err != nil {
				// This shouldn't happen
				panic(err)
			}
		}
	}

	// Setup master role
	if cv.Master != wantedMasterID {
		newCV.Master = wantedMasterID
		newKeepersRole[wantedMasterID].Follow = ""
	}

	// Setup standbys
	if cv.Master == wantedMasterID {
		// wanted master is the previous one
		masterState := keepersState[wantedMasterID]
		if masterState.Healthy && s.isKeeperConverged(masterState, cv) {
			for id, _ := range newKeepersRole {
				if id == wantedMasterID {
					continue
				}
				newKeepersRole[id].Follow = wantedMasterID
			}
		}
	}

	s.updateProxyConf(cv, newCV, keepersState)

	if !newCV.Equals(cv) {
		newCV.Version = cv.Version + 1
		newCV.ChangeTime = time.Now()
	}
	return newCV, nil
}
Example #2
0
func (s *Sentinel) updateClusterView(cv *cluster.ClusterView, membersState cluster.MembersState) (*cluster.ClusterView, error) {
	var wantedMasterID string
	// Cluster first initialization
	if cv.Version == 0 {
		log.Debugf("trying to find initial master")
		// Check for an initial master
		if len(membersState) < 1 {
			return nil, fmt.Errorf("cannot init cluster, no members registered")
		}
		if len(membersState) > 1 {
			return nil, fmt.Errorf("cannot init cluster, more than 1 member registered")
		}
		for id, m := range membersState {
			if m.PGState == nil {
				return nil, fmt.Errorf("cannot init cluster using member %q since its pg state is unknown", id)
			}
			log.Infof("Initializing cluster with master: %q", id)
			wantedMasterID = id
			break
		}
	} else {
		masterID := cv.Master

		masterOK := true
		master, ok := membersState[masterID]
		if !ok {
			return nil, fmt.Errorf("member state for master %q not available. This shouldn't happen!", masterID)
		}
		log.Debugf(spew.Sprintf("masterState: %#v", master))

		if !s.isMemberHealthy(master) {
			log.Infof("master is failed")
			masterOK = false
		}

		// Check that the wanted master is in master state (i.e. check that promotion from standby to master happened)
		if !s.isMemberConverged(master, cv) {
			log.Infof("member %s not yet master", masterID)
			masterOK = false
		}

		wantedMasterID = masterID
		if !masterOK {
			log.Infof("trying to find a standby to replace failed master")
			bestStandby, err := s.GetBestStandby(cv, membersState, masterID)
			if err != nil {
				log.Errorf("error trying to find the best standby: %v", err)
			} else {
				if bestStandby != masterID {
					log.Infof("electing new master: %q", bestStandby)
					wantedMasterID = bestStandby
				} else {
					log.Infof("cannot find a good standby to replace failed master")
				}
			}
		}
	}

	newCV := cv.Copy()
	newMembersRole := newCV.MembersRole

	// Add new members from membersState
	for id, _ := range membersState {
		if _, ok := newMembersRole[id]; !ok {
			newMembersRole[id] = &cluster.MemberRole{}
		}

	}

	// Setup master role
	if cv.Master != wantedMasterID {
		newCV.Master = wantedMasterID
		newMembersRole[wantedMasterID] = &cluster.MemberRole{Follow: ""}
	}

	// Setup standbys
	if cv.Master == wantedMasterID {
		// wanted master is the previous one
		masterState := membersState[wantedMasterID]
		if s.isMemberHealthy(masterState) && s.isMemberConverged(masterState, cv) {
			for id, _ := range newMembersRole {
				if id == wantedMasterID {
					continue
				}
				newMembersRole[id] = &cluster.MemberRole{Follow: wantedMasterID}
			}
		}
	}

	if !newCV.Equals(cv) {
		newCV.Version = cv.Version + 1
		newCV.ChangeTime = time.Now()
	}
	return newCV, nil
}