示例#1
0
func (s *Sentinel) updateProxyConf(prevCV *cluster.ClusterView, cv *cluster.ClusterView, keepersState cluster.KeepersState) {
	masterID := cv.Master
	if prevCV.Master != masterID {
		log.Infof("deleting proxyconf")
		// Tell proxy to close connection to old master
		cv.ProxyConf = nil
		return
	}

	master, _ := keepersState[masterID]
	if s.isKeeperConverged(master, prevCV) {
		pc := &cluster.ProxyConf{
			Host: master.PGListenAddress,
			Port: master.PGPort,
		}
		prevPC := prevCV.ProxyConf
		update := true
		if prevPC != nil {
			if prevPC.Host == pc.Host && prevPC.Port == pc.Port {
				update = false
			}
		}
		if update {
			log.Infof("updating proxyconf to %s:%s", pc.Host, pc.Port)
			cv.ProxyConf = pc
		}
	}
	return
}
示例#2
0
func NewPostgresKeeper(id string, cfg config, stop chan bool, end chan error) (*PostgresKeeper, error) {
	etcdPath := filepath.Join(common.EtcdBasePath, cfg.clusterName)
	e, err := etcdm.NewEtcdManager(cfg.etcdEndpoints, etcdPath, common.DefaultEtcdRequestTimeout)
	if err != nil {
		return nil, fmt.Errorf("cannot create etcd manager: %v", err)
	}

	cd, _, err := e.GetClusterData()
	if err != nil {
		return nil, fmt.Errorf("error retrieving cluster data: %v", err)
	}

	var cv *cluster.ClusterView
	if cd == nil {
		cv = cluster.NewClusterView()
	} else {
		cv = cd.ClusterView
	}
	log.Debugf(spew.Sprintf("clusterView: %#v", cv))

	clusterConfig := cv.Config.ToConfig()
	log.Debugf(spew.Sprintf("clusterConfig: %#v", clusterConfig))

	p := &PostgresKeeper{id: id,
		dataDir:         cfg.dataDir,
		e:               e,
		listenAddress:   cfg.listenAddress,
		port:            cfg.port,
		pgListenAddress: cfg.pgListenAddress,
		pgPort:          cfg.pgPort,
		clusterConfig:   clusterConfig,
		stop:            stop,
		end:             end,
	}

	followersIDs := cv.GetFollowersIDs(p.id)
	pgParameters := p.createPGParameters(followersIDs)
	pgm, err := postgresql.NewManager(id, cfg.pgBinPath, cfg.dataDir, cfg.pgConfDir, pgParameters, p.getOurConnString(), p.getOurReplConnString(), clusterConfig.PGReplUser, clusterConfig.PGReplPassword, clusterConfig.RequestTimeout)
	if err != nil {
		return nil, fmt.Errorf("cannot create postgres manager: %v", err)
	}
	p.pgm = pgm
	return p, nil
}
示例#3
0
func printTree(id string, cv *cluster.ClusterView, level int, prefix string, tail bool) {
	out := prefix
	if level > 0 {
		if tail {
			out += "└─"
		} else {
			out += "├─"
		}
	}
	out += id
	if id == cv.Master {
		out += " (master)"
	}
	stdout(out)
	followersIDs := cv.GetFollowersIDs(id)
	c := len(followersIDs)
	for i, f := range cv.GetFollowersIDs(id) {
		emptyspace := ""
		if level > 0 {
			emptyspace = "  "
		}
		linespace := "│ "
		if i < c-1 {
			if tail {
				printTree(f, cv, level+1, prefix+emptyspace, false)
			} else {
				printTree(f, cv, level+1, prefix+linespace, false)
			}
		} else {
			if tail {
				printTree(f, cv, level+1, prefix+emptyspace, true)
			} else {
				printTree(f, cv, level+1, prefix+linespace, true)
			}
		}
	}
}
示例#4
0
func (p *PostgresKeeper) Start() {
	endSMCh := make(chan struct{})
	endPgStatecheckerCh := make(chan struct{})
	endApiCh := make(chan error)

	var err error
	var cd *cluster.ClusterData
	// TODO(sgotti) make the postgres manager stateless and instantiate a
	// new one at every check loop, this will avoid the need to loop here
	// to get the clusterconfig
	for {
		cd, _, err = p.e.GetClusterData()
		if err == nil {
			break
		}
		log.Errorf("error retrieving cluster data: %v", err)
		time.Sleep(cluster.DefaultSleepInterval)
	}

	var cv *cluster.ClusterView
	if cd == nil {
		cv = cluster.NewClusterView()
	} else {
		cv = cd.ClusterView
	}
	log.Debugf(spew.Sprintf("clusterView: %#v", cv))

	p.clusterConfig = cv.Config.ToConfig()
	log.Debugf(spew.Sprintf("clusterConfig: %#v", p.clusterConfig))

	if err := p.loadCVVersion(); err != nil {
		p.end <- fmt.Errorf("failed to load cluster version file: %v", err)
		return
	}

	// TODO(sgotti) reconfigure the various configurations options (PGRepl*
	// and RequestTimeout) after a changed cluster config
	followersIDs := cv.GetFollowersIDs(p.id)
	pgParameters := p.createPGParameters(followersIDs)
	pgm := postgresql.NewManager(p.id, cfg.pgBinPath, cfg.dataDir, cfg.pgConfDir, pgParameters, p.getOurConnString(), p.getOurReplConnString(), p.clusterConfig.PGReplUser, p.clusterConfig.PGReplPassword, p.clusterConfig.RequestTimeout)
	p.pgm = pgm

	p.pgm.Stop(true)

	http.HandleFunc("/info", p.infoHandler)
	http.HandleFunc("/pgstate", p.pgStateHandler)
	go func() {
		endApiCh <- http.ListenAndServe(fmt.Sprintf("%s:%s", p.listenAddress, p.port), nil)
	}()

	ctx, cancel := context.WithCancel(context.Background())
	smTimerCh := time.NewTimer(0).C
	updatePGStateTimerCh := time.NewTimer(0).C
	for true {
		select {
		case <-p.stop:
			log.Debugf("stopping stolon keeper")
			cancel()
			p.pgm.Stop(true)
			p.end <- nil
			return
		case <-smTimerCh:
			go func() {
				p.postgresKeeperSM(ctx)
				endSMCh <- struct{}{}
			}()
		case <-endSMCh:
			smTimerCh = time.NewTimer(p.clusterConfig.SleepInterval).C
		case <-updatePGStateTimerCh:
			go func() {
				p.updatePGState(ctx)
				endPgStatecheckerCh <- struct{}{}
			}()
		case <-endPgStatecheckerCh:
			updatePGStateTimerCh = time.NewTimer(p.clusterConfig.SleepInterval).C
		case err := <-endApiCh:
			if err != nil {
				log.Fatal("ListenAndServe: ", err)
			}
			close(p.stop)
		}
	}
}
示例#5
0
func (s *Sentinel) updateClusterView(cv *cluster.ClusterView, keepersState cluster.KeepersState) (*cluster.ClusterView, error) {
	var wantedMasterID string
	if cv.Master == "" {
		log.Debugf("trying to find initial master")
		// Check for an initial master
		if len(keepersState) < 1 {
			return nil, fmt.Errorf("cannot init cluster, no keepers registered")
		}
		if len(keepersState) > 1 {
			return nil, fmt.Errorf("cannot init cluster, more than 1 keeper registered")
		}
		for id, k := range keepersState {
			if k.PGState == nil {
				return nil, fmt.Errorf("cannot init cluster using keeper %q since its pg state is unknown", id)
			}
			if !k.PGState.Initialized {
				return nil, fmt.Errorf("cannot init cluster using keeper %q since pg instance is not initializied", id)
			}
			log.Infof("initializing cluster with master: %q", id)
			wantedMasterID = id
			break
		}
	} else {
		masterID := cv.Master
		wantedMasterID = masterID

		masterOK := true
		master, ok := keepersState[masterID]
		if !ok {
			return nil, fmt.Errorf("keeper state for master %q not available. This shouldn't happen!", masterID)
		}
		log.Debugf(spew.Sprintf("masterState: %#v", master))

		if !master.Healthy {
			log.Infof("master is failed")
			masterOK = false
		}

		// Check that the wanted master is in master state (i.e. check that promotion from standby to master happened)
		if !s.isKeeperConverged(master, cv) {
			log.Infof("keeper %s not yet master", masterID)
			masterOK = false
		}

		if !masterOK {
			log.Infof("trying to find a standby to replace failed master")
			bestStandby, err := s.GetBestStandby(cv, keepersState, masterID)
			if err != nil {
				log.Errorf("error trying to find the best standby: %v", err)
			} else {
				if bestStandby != masterID {
					log.Infof("electing new master: %q", bestStandby)
					wantedMasterID = bestStandby
				} else {
					log.Infof("cannot find a good standby to replace failed master")
				}
			}
		}
	}

	newCV := cv.Copy()
	newKeepersRole := newCV.KeepersRole

	// Add new keepersRole from keepersState
	for id, _ := range keepersState {
		if _, ok := newKeepersRole[id]; !ok {
			if err := newKeepersRole.Add(id, ""); err != nil {
				// This shouldn't happen
				panic(err)
			}
		}
	}

	// Setup master role
	if cv.Master != wantedMasterID {
		newCV.Master = wantedMasterID
		newKeepersRole[wantedMasterID].Follow = ""
	}

	// Setup standbys
	if cv.Master == wantedMasterID {
		// wanted master is the previous one
		masterState := keepersState[wantedMasterID]
		if masterState.Healthy && s.isKeeperConverged(masterState, cv) {
			for id, _ := range newKeepersRole {
				if id == wantedMasterID {
					continue
				}
				newKeepersRole[id].Follow = wantedMasterID
			}
		}
	}

	s.updateProxyConf(cv, newCV, keepersState)

	if !newCV.Equals(cv) {
		newCV.Version = cv.Version + 1
		newCV.ChangeTime = time.Now()
	}
	return newCV, nil
}
示例#6
0
func (s *Sentinel) updateClusterView(cv *cluster.ClusterView, membersState cluster.MembersState) (*cluster.ClusterView, error) {
	var wantedMasterID string
	// Cluster first initialization
	if cv.Version == 0 {
		log.Debugf("trying to find initial master")
		// Check for an initial master
		if len(membersState) < 1 {
			return nil, fmt.Errorf("cannot init cluster, no members registered")
		}
		if len(membersState) > 1 {
			return nil, fmt.Errorf("cannot init cluster, more than 1 member registered")
		}
		for id, m := range membersState {
			if m.PGState == nil {
				return nil, fmt.Errorf("cannot init cluster using member %q since its pg state is unknown", id)
			}
			log.Infof("Initializing cluster with master: %q", id)
			wantedMasterID = id
			break
		}
	} else {
		masterID := cv.Master

		masterOK := true
		master, ok := membersState[masterID]
		if !ok {
			return nil, fmt.Errorf("member state for master %q not available. This shouldn't happen!", masterID)
		}
		log.Debugf(spew.Sprintf("masterState: %#v", master))

		if !s.isMemberHealthy(master) {
			log.Infof("master is failed")
			masterOK = false
		}

		// Check that the wanted master is in master state (i.e. check that promotion from standby to master happened)
		if !s.isMemberConverged(master, cv) {
			log.Infof("member %s not yet master", masterID)
			masterOK = false
		}

		wantedMasterID = masterID
		if !masterOK {
			log.Infof("trying to find a standby to replace failed master")
			bestStandby, err := s.GetBestStandby(cv, membersState, masterID)
			if err != nil {
				log.Errorf("error trying to find the best standby: %v", err)
			} else {
				if bestStandby != masterID {
					log.Infof("electing new master: %q", bestStandby)
					wantedMasterID = bestStandby
				} else {
					log.Infof("cannot find a good standby to replace failed master")
				}
			}
		}
	}

	newCV := cv.Copy()
	newMembersRole := newCV.MembersRole

	// Add new members from membersState
	for id, _ := range membersState {
		if _, ok := newMembersRole[id]; !ok {
			newMembersRole[id] = &cluster.MemberRole{}
		}

	}

	// Setup master role
	if cv.Master != wantedMasterID {
		newCV.Master = wantedMasterID
		newMembersRole[wantedMasterID] = &cluster.MemberRole{Follow: ""}
	}

	// Setup standbys
	if cv.Master == wantedMasterID {
		// wanted master is the previous one
		masterState := membersState[wantedMasterID]
		if s.isMemberHealthy(masterState) && s.isMemberConverged(masterState, cv) {
			for id, _ := range newMembersRole {
				if id == wantedMasterID {
					continue
				}
				newMembersRole[id] = &cluster.MemberRole{Follow: wantedMasterID}
			}
		}
	}

	if !newCV.Equals(cv) {
		newCV.Version = cv.Version + 1
		newCV.ChangeTime = time.Now()
	}
	return newCV, nil
}