// Given the current state, figure out our current role and update our xlog // position accordingly. This is used when we assume a new role or when postgres // comes online in order to simulate client writes to the primary, synchronous // replication (and catch-up) on the sync, and asynchronous replication on the // other peers. func (p *postgresSimulatorClient) updateXlog(ds *state.DiscoverdState) { if ds.State == nil || !p.Online || p.Config == nil { return } s := ds.State var role state.Role switch { case s.Primary.ID == p.inst.ID: role = state.RolePrimary case s.Sync.ID == p.inst.ID: role = state.RoleSync case p.Config.Role == state.RoleAsync: role = state.RoleAsync default: role = state.RoleNone } // If the peer we're testing is an async or unassigned, we don't modify the // transaction log position at all. We act as though these are getting // arbitrarily far behind (since that should be fine). if role == state.RoleAsync || role == state.RoleNone { return } // If the peer we're testing is a primary, we act as though the sync // instantly connected and caught up, and we start taking writes immediately // and bump the transaction log position. if role == state.RolePrimary { if cmp, err := xlog.Compare(s.InitWAL, p.XLog); err != nil { panic(err) } else if cmp > 0 { panic("primary is behind the generation's initial xlog") } var err error p.XLog, err = xlog.Increment(p.XLog, 10) if err != nil { panic(err) } return } // The most complicated case is the sync, for which we need to schedule the // wal position to catch up to the primary's. if role != state.RoleSync { panic("unexpected role") } if cmp, err := xlog.Compare(s.InitWAL, p.XLog); err != nil { panic(err) } else if cmp < 0 { panic("sync is ahead of primary") } p.XLogWaiting = s.InitWAL }
func (p *Postgres) waitForSync(inst *discoverd.Instance, enableWrites bool) { stopCh := make(chan struct{}) doneCh := make(chan struct{}) var cancelOnce sync.Once p.cancelSyncWait = func() { cancelOnce.Do(func() { close(stopCh) <-doneCh }) } go func() { defer close(doneCh) startTime := time.Now().UTC() lastFlushed := xlog.Zero log := p.log.New( "fn", "waitForSync", "sync_name", inst.Meta["POSTGRES_ID"], "start_time", log15.Lazy{func() time.Time { return startTime }}, "last_flushed", log15.Lazy{func() xlog.Position { return lastFlushed }}, ) shouldStop := func() bool { select { case <-stopCh: log.Debug("canceled, stopping") return true default: return false } } sleep := func() bool { select { case <-stopCh: log.Debug("canceled, stopping") return false case <-time.After(checkInterval): return true } } log.Info("waiting for downstream replication to catch up") for { if shouldStop() { return } sent, flushed, err := p.checkReplStatus(inst.Meta["POSTGRES_ID"]) if err != nil { // If we can't query the replication state, we just keep trying. // We do not count this as part of the replication timeout. // Generally this means the standby hasn't started or is unable // to start. This means that the standby will eventually time // itself out and we will exit the loop since a new event will // be emitted when the standby leaves the cluster. startTime = time.Now().UTC() if !sleep() { return } continue } elapsedTime := time.Now().Sub(startTime) log := log.New("sent", sent, "flushed", flushed, "elapsed", elapsedTime) if cmp, err := xlog.Compare(lastFlushed, flushed); err != nil { log.Error("error parsing log locations", "err", err) return } else if lastFlushed == xlog.Zero || cmp == -1 { log.Debug("flushed row incremented, resetting startTime") startTime = time.Now().UTC() lastFlushed = flushed } if sent == flushed { log.Info("downstream caught up") p.setSyncedDownstream(inst) break } else if elapsedTime > p.replTimeout { log.Error("error checking replication status", "err", "downstream unable to make forward progress") return } else { log.Debug("continuing replication check") if !sleep() { return } continue } } if enableWrites { // sync caught up, enable write transactions if err := p.writeConfig(configData{Sync: inst.Meta["POSTGRES_ID"]}); err != nil { log.Error("error writing postgres.conf", "err", err) return } if err := p.sighup(); err != nil { log.Error("error calling sighup", "err", err) return } } }() }
func (p *Peer) startTakeoverWithPeer(reason string, minWAL xlog.Position, newState *State) (err error) { log := p.log.New("fn", "startTakeoverWithPeer", "reason", reason, "min_wal", minWAL) log.Info("starting takeover") if p.updatingState != nil { panic("startTakeoverWithPeer with non-nil updatingState") } newState.Generation = p.Info().State.Generation + 1 newState.Primary = p.self p.updatingState = newState if p.updatingState.Primary.Meta["POSTGRES_ID"] != p.Info().State.Primary.Meta["POSTGRES_ID"] && len(p.updatingState.Deposed) == 0 { panic("startTakeoverWithPeer without deposing old primary") } defer func() { if err == nil { return } p.updatingState = nil switch err { case ErrPostgresOffline: // If postgres is offline, it's because we haven't started yet, so // trigger another state evaluation after we start it. log.Error("failed to declare new generation, trying later", "err", err) p.triggerEval() case ErrClusterFrozen: log.Error("failed to declare new generation", "err", err) default: // In the event of an error, back off a bit and check state again in // a second. There are several transient failure modes that will resolve // themselves (e.g. postgres synchronous replication not yet caught up). log.Error("failed to declare new generation, backing off", "err", err) p.evalLater(1 * time.Second) } }() if p.Info().State.Freeze != nil { return ErrClusterFrozen } // In order to declare a new generation, we'll need to fetch our current // transaction log position, which requires that postres be online. In most // cases, it will be, since we only declare a new generation as a primary or // a caught-up sync. During initial startup, however, we may find out // simultaneously that we're the primary or sync AND that the other is gone, // so we may attempt to declare a new generation before we've started // postgres. In this case, this step will fail, but we'll just skip the // takeover attempt until postgres is running. if !*p.pgOnline { return ErrPostgresOffline } wal, err := p.postgres.XLogPosition() if err != nil { return err } if x, err := xlog.Compare(wal, minWAL); err != nil || x < 0 { if err == nil { log.Warn("would attempt takeover but not caught up with primary yet", "found_wal", wal) err = ErrPeerNotCaughtUp } return err } p.updatingState.InitWAL = wal log.Info("declaring new generation") if err := p.putClusterState(); err != nil { return err } p.setState(p.updatingState) p.updatingState = nil p.generation = p.Info().State.Generation log.Info("declared new generation", "generation", p.Info().State.Generation) // assumePrimary() calls evalClusterState() to catch any // changes we missed while we were updating. p.assumePrimary() return nil }