func main() { mongoSession, err := mgo.Dial("127.0.0.1") if err != nil { panic(err) } defer mongoSession.Close() mongoSession.SetMode(mgo.Monotonic, true) log := zap.NewJSON( zap.Debug, zap.Fields(zap.Int("count", 1)), zap.Output(NewWriter(mongoSession)), ) url := "http://example.local" tryNum := 42 startTime := time.Now() for i := range [logCounts]struct{}{} { log.Info("Failed to fetch URL.", zap.String("url", url), zap.Int("attempt", tryNum), zap.Duration("backoff", time.Since(startTime)), zap.Int("index", i), ) } fmt.Printf("Finished in %v\n", time.Since(startTime)) }
func (conn *Connection) reader(responses chan<- Response, logger zap.Logger) { buffer := make([]byte, 6) for { n, err := conn.conn.Read(buffer) if err != nil && n < 6 { logger.Info("APNS: Connection error before reading complete response", zap.Int("connectionId", conn.id), zap.Int("n", n), zap.Error(err), ) conn.shouldReconnect <- true return } else if err != nil { logger.Info("APNS: Connection error before reading complete response", zap.Int("connectionId", conn.id), zap.Error(err), ) } command := uint8(buffer[0]) if command != 8 { logger.Info("APNS: Something went wrong in a connection - Command should have been 8 but it had other value instead", zap.Int("connectionId", conn.id), zap.Object("commandValue", command), ) } resp := newResponse() resp.Identifier = binary.BigEndian.Uint32(buffer[2:6]) resp.Status = uint8(buffer[1]) responses <- resp conn.shouldReconnect <- true return } }
func (c *Connection) spinUntilReconnect(logger zap.Logger) { var backoff = time.Duration(100) for { logger.Info("APNS: Connection lost. Reconnecting", zap.Int("connectionId", c.id), ) err := c.connect(logger) if err != nil { //Exponential backoff up to a limit logger.Info("APNS: Error connecting to server", zap.Int("connectionId", c.id), zap.Error(err), ) backoff = backoff * 2 if backoff > maxBackoff { backoff = maxBackoff } time.Sleep(backoff) } else { backoff = 100 logger.Info("APNS: New connection established", zap.Int("connectionId", c.id), ) break } } }
func (cg *ConsumerGroup) topicListConsumer(topics []string, logger zap.Logger) { for { select { case <-cg.stopper: return default: } consumers, consumerChanges, err := cg.group.WatchInstances() if err != nil { logger.Fatal("KAFKA: FAILED to get list of registered consumer instances for replica", zap.Int("replicaId", cg.replicaId), zap.Error(err), ) return } cg.consumers = consumers logger.Info("KAFKA: Got currently registered consumers for replica", zap.Int("replicaId", cg.replicaId), zap.Int("numRegisteredConsumers", len(cg.consumers)), ) stopper := make(chan struct{}) for _, topic := range topics { cg.wg.Add(1) go cg.topicConsumer(topic, cg.messages, cg.errors, stopper, logger) } select { case <-cg.stopper: close(stopper) return case event := <-consumerChanges: if event.Err == zk.ErrSessionExpired || event.Err == zk.ErrConnectionClosed { logger.Info("KAFKA: Session was expired, reloading consumer for replica", zap.Int("replicaId", cg.replicaId), ) go cg.reload(logger) <-cg.stopper close(stopper) return } else { logger.Info("KAFKA: Triggering rebalance due to consumer list change in replica", zap.Int("replicaId", cg.replicaId), ) close(stopper) cg.wg.Wait() } } } }
// TestLogError ... func TestLogError(t *testing.T) { Convey("Check Log standart error", t, func() { buf := &bytes.Buffer{} logger := zap.NewJSON(zap.DebugLevel, zap.Output(zap.AddSync(buf))) logger.StubTime() LogError(logger, errors.New("message")) So(string(buf.Bytes()), ShouldEqual, `{"msg":"message","level":"error","ts":0,"fields":{}}`+"\n") }) Convey("Check Log ErrorEx", t, func() { callerTest = true defer func() { callerTest = false }() buf := &bytes.Buffer{} logger := zap.NewJSON(zap.DebugLevel, zap.Output(zap.AddSync(buf))) logger.StubTime() LogError(logger, NewEx(zap.DebugLevel, "message", zap.String("field1", "field1 data"), zap.Int("field2", 5))) So(string(buf.Bytes()), ShouldEqual, `{"msg":"message","level":"debug","ts":0,"fields":{"caller":"<fake>","field1":"field1 data","field2":5}}`+"\n") }) }
func (zom *zookeeperOffsetManager) FinalizePartition(topic string, partition int32, lastOffset int64, timeout time.Duration, replicaId int, logger zap.Logger) error { zom.l.RLock() tracker := zom.offsets[topic][partition] zom.l.RUnlock() if lastOffset >= 0 { if lastOffset-tracker.highestProcessedOffset > 0 { logger.Info("ZOOKEEPER: Finalizing partition. Waiting before processing remaining messages", zap.Int("replicaId", replicaId), zap.String("topic", topic), zap.Int64("partition", int64(partition)), zap.Int64("lastProcessedOffset", tracker.highestProcessedOffset), zap.Duration("waitingTimeToProcessMoreMessages", timeout/time.Second), zap.Int64("numMessagesToProcess", lastOffset-tracker.highestProcessedOffset), ) if !tracker.waitForOffset(lastOffset, timeout) { return fmt.Errorf("REP %d - TIMEOUT waiting for offset %d. Last committed offset: %d", replicaId, lastOffset, tracker.lastCommittedOffset) } } if err := zom.commitOffset(topic, partition, tracker, logger); err != nil { return fmt.Errorf("REP %d - FAILED to commit offset %d to Zookeeper. Last committed offset: %d", replicaId, tracker.highestProcessedOffset, tracker.lastCommittedOffset) } } zom.l.Lock() delete(zom.offsets[topic], partition) zom.l.Unlock() return nil }
//Stop gracefully closes the connection - it waits for the sending queue to clear, and then shuts down. func (conn *Connection) Stop(logger zap.Logger) chan bool { logger.Info("APNS: Shutting down one of the connections", zap.Int("connectionId", conn.id), ) conn.stopping <- true return conn.stopped //Thought: Don't necessarily need a channel here. Could signal finishing by closing errors? }
func (cg *ConsumerGroup) reload(logger zap.Logger) error { cg.reloadMutex.Lock() defer cg.reloadMutex.Unlock() cg.singleReload.Do(func() { logger.Info("KAFKA: Closing down old connections for replica", zap.Int("replicaId", cg.replicaId), ) err := cg.Close(logger) if err != nil { logger.Error("KAFKA: Failed to close consumergroup for replica", zap.Int("replicaId", cg.replicaId), zap.Error(err), ) } cg.Load(logger) }) return nil }
func checkStatusCode(statusCode int) error { if statusCode != 200 { lvl := zap.ErrorLevel // 401 Unauthorized // 404 Not Found if statusCode == 404 || statusCode == 401 { lvl = zap.WarnLevel } return werrors.NewEx(lvl, ErrStatusCode, zap.Int("status_code", statusCode)) } return nil }
func (cg *ConsumerGroup) Close(logger zap.Logger) error { shutdownError := AlreadyClosing cg.singleShutdown.Do(func() { defer cg.kazoo.Close() shutdownError = nil close(cg.stopper) cg.wg.Wait() if err := cg.offsetManager.Close(logger); err != nil { logger.Error("KAFKA: FAILED closing the offset manager for replica!", zap.Int("replicaId", cg.replicaId), zap.Error(err), ) } if shutdownError = cg.instance.Deregister(); shutdownError != nil { logger.Warn("KAFKA: Replica FAILED deregistering consumer instance", zap.Int("replicaId", cg.replicaId), zap.Error(shutdownError), ) } else { logger.Info("KAFKA: Replica deregistered consumer instance", zap.Int("replicaId", cg.replicaId), zap.String("instanceId", cg.instance.ID), ) } if shutdownError = cg.consumer.Close(); shutdownError != nil { logger.Error("Replica FAILED closing the Sarama client", zap.Int("replicaId", cg.replicaId), zap.Error(shutdownError), ) } close(cg.messages) close(cg.errors) cg.instance = nil }) return shutdownError }
//Start initiates a connection to APNS and asnchronously sends notifications which have been queued. func (conn *Connection) Start(logger zap.Logger) error { //Connect to APNS. The reason this is here as well as in sender is that this probably catches any unavoidable errors in a synchronous fashion, while in sender it can reconnect after temporary errors (which should work most of the time.) err := conn.connect(logger) if err != nil { logger.Fatal("APNS: Failed to connect", zap.Int("connectionId", conn.id), zap.Error(err), ) return err } //Start sender goroutine sent := make(chan PushNotification, 10000) go conn.sender(conn.queue, sent, logger) //Start limbo goroutine go conn.limbo(sent, conn.responses, conn.errors, conn.queue, logger) return nil }
func (m *hostsManager) resolveURL(hostName string) (string, error) { hostURL := NormalizeURL(&url.URL{Scheme: "http", Host: hostName}) response, err := http.Get(hostURL) if err == nil { err = response.Body.Close() if response.StatusCode != 200 { return "", werrors.NewFields(ErrResolveBaseURL, zap.Int("status_code", response.StatusCode), zap.String("url", hostURL)) } } if err != nil { return "", werrors.NewFields(ErrGetRequest, zap.String("details", err.Error()), zap.String("url", hostURL)) } return response.Request.URL.String(), nil }
func (r *responseParser) processBody(body []byte, contentType string) (database.State, error) { if !isHTML(body) { return database.StateParseError, werrors.New(ErrBodyNotHTML) } bodyReader, err := bodyToUTF8(body, contentType) if err != nil { return database.StateEncodingError, err } node, err := html.Parse(bodyReader) if err != nil { return database.StateParseError, werrors.NewDetails(ErrHTMLParse, err) } parser, err := RunDataExtrator(r.hostMng, node, r.meta.GetURL()) if err != nil { return database.StateParseError, err } if !parser.MetaTagIndex { return database.StateNoFollow, werrors.NewLevel(zap.InfoLevel, WarnPageNotIndexed) } parser.WrongURLsToLog(r.logger) var buf bytes.Buffer err = bodyMinification(node, &buf) if err != nil { return database.StateParseError, err } r.URLs = parser.URLs r.meta.SetContent(proxy.NewContent(buf.Bytes(), parser.GetTitle())) r.logger.Debug(DbgBodySize, zap.Int("size", buf.Len())) return database.StateSuccess, nil }
func (s *Sentinel) updateCluster(cd *cluster.ClusterData) (*cluster.ClusterData, error) { newcd := cd.DeepCopy() switch cd.Cluster.Status.Phase { case cluster.ClusterPhaseInitializing: switch *cd.Cluster.DefSpec().InitMode { case cluster.ClusterInitModeNew: // Is there already a keeper choosed to be the new master? if cd.Cluster.Status.Master == "" { log.Info("trying to find initial master") k, err := s.findInitialKeeper(cd) if err != nil { return nil, fmt.Errorf("cannot choose initial master: %v", err) } log.Info("initializing cluster", zap.String("keeper", k.UID)) db := &cluster.DB{ UID: s.UIDFn(), Generation: cluster.InitialGeneration, ChangeTime: time.Now(), Spec: &cluster.DBSpec{ KeeperUID: k.UID, InitMode: cluster.DBInitModeNew, Role: common.RoleMaster, Followers: []string{}, IncludeConfig: *cd.Cluster.DefSpec().MergePgParameters, }, } newcd.DBs[db.UID] = db newcd.Cluster.Status.Master = db.UID log.Debug("newcd dump", zap.String("newcd", spew.Sdump(newcd))) } else { db, ok := cd.DBs[cd.Cluster.Status.Master] if !ok { panic(fmt.Errorf("db %q object doesn't exists. This shouldn't happen", cd.Cluster.Status.Master)) } // Check that the choosed db for being the master has correctly initialized switch s.dbConvergenceState(db, cd.Cluster.DefSpec().InitTimeout.Duration) { case Converged: if db.Status.Healthy { log.Info("db initialized", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID)) // Set db initMode to none, not needed but just a security measure db.Spec.InitMode = cluster.DBInitModeNone // Don't include previous config anymore db.Spec.IncludeConfig = false // Replace reported pg parameters in cluster spec if *cd.Cluster.DefSpec().MergePgParameters { newcd.Cluster.Spec.PGParameters = db.Status.PGParameters } // Cluster initialized, switch to Normal state newcd.Cluster.Status.Phase = cluster.ClusterPhaseNormal } case Converging: log.Info("waiting for db", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID)) case ConvergenceFailed: log.Info("db failed to initialize", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID)) // Empty DBs newcd.DBs = cluster.DBs{} // Unset master so another keeper can be choosen newcd.Cluster.Status.Master = "" } } case cluster.ClusterInitModeExisting: if cd.Cluster.Status.Master == "" { wantedKeeper := cd.Cluster.DefSpec().ExistingConfig.KeeperUID log.Info("trying to use keeper as initial master", zap.String("keeper", wantedKeeper)) k, ok := cd.Keepers[wantedKeeper] if !ok { return nil, fmt.Errorf("keeper %q state not available", wantedKeeper) } log.Info("initializing cluster using selected keeper as master db owner", zap.String("keeper", k.UID)) db := &cluster.DB{ UID: s.UIDFn(), Generation: cluster.InitialGeneration, ChangeTime: time.Now(), Spec: &cluster.DBSpec{ KeeperUID: k.UID, InitMode: cluster.DBInitModeExisting, Role: common.RoleMaster, Followers: []string{}, IncludeConfig: *cd.Cluster.DefSpec().MergePgParameters, }, } newcd.DBs[db.UID] = db newcd.Cluster.Status.Master = db.UID log.Debug("newcd dump", zap.String("newcd", spew.Sdump(newcd))) } else { db, ok := newcd.DBs[cd.Cluster.Status.Master] if !ok { panic(fmt.Errorf("db %q object doesn't exists. This shouldn't happen", cd.Cluster.Status.Master)) } // Check that the choosed db for being the master has correctly initialized if db.Status.Healthy && s.dbConvergenceState(db, cd.Cluster.DefSpec().ConvergenceTimeout.Duration) == Converged { log.Info("db initialized", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID)) // Don't include previous config anymore db.Spec.IncludeConfig = false // Replace reported pg parameters in cluster spec if *cd.Cluster.DefSpec().MergePgParameters { newcd.Cluster.Spec.PGParameters = db.Status.PGParameters } // Cluster initialized, switch to Normal state newcd.Cluster.Status.Phase = cluster.ClusterPhaseNormal } } case cluster.ClusterInitModePITR: // Is there already a keeper choosed to be the new master? if cd.Cluster.Status.Master == "" { log.Info("trying to find initial master") k, err := s.findInitialKeeper(cd) if err != nil { return nil, fmt.Errorf("cannot choose initial master: %v", err) } log.Info("initializing cluster using selected keeper as master db owner", zap.String("keeper", k.UID)) db := &cluster.DB{ UID: s.UIDFn(), Generation: cluster.InitialGeneration, ChangeTime: time.Now(), Spec: &cluster.DBSpec{ KeeperUID: k.UID, InitMode: cluster.DBInitModePITR, PITRConfig: cd.Cluster.DefSpec().PITRConfig, Role: common.RoleMaster, Followers: []string{}, IncludeConfig: *cd.Cluster.DefSpec().MergePgParameters, }, } newcd.DBs[db.UID] = db newcd.Cluster.Status.Master = db.UID log.Debug("newcd dump", zap.String("newcd", spew.Sdump(newcd))) } else { db, ok := cd.DBs[cd.Cluster.Status.Master] if !ok { panic(fmt.Errorf("db %q object doesn't exists. This shouldn't happen", cd.Cluster.Status.Master)) } // Check that the choosed db for being the master has correctly initialized // TODO(sgotti) set a timeout (the max time for a restore operation) switch s.dbConvergenceState(db, 0) { case Converged: if db.Status.Healthy { log.Info("db initialized", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID)) // Set db initMode to none, not needed but just a security measure db.Spec.InitMode = cluster.DBInitModeNone // Don't include previous config anymore db.Spec.IncludeConfig = false // Replace reported pg parameters in cluster spec if *cd.Cluster.DefSpec().MergePgParameters { newcd.Cluster.Spec.PGParameters = db.Status.PGParameters } // Cluster initialized, switch to Normal state newcd.Cluster.Status.Phase = cluster.ClusterPhaseNormal } case Converging: log.Info("waiting for db to converge", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID)) case ConvergenceFailed: log.Info("db failed to initialize", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID)) // Empty DBs newcd.DBs = cluster.DBs{} // Unset master so another keeper can be choosen newcd.Cluster.Status.Master = "" } } default: return nil, fmt.Errorf("unknown init mode %q", cd.Cluster.DefSpec().InitMode) } case cluster.ClusterPhaseNormal: // TODO(sgotti) When keeper removal is implemented, remove DBs for unexistent keepers // Calculate current master status curMasterDBUID := cd.Cluster.Status.Master wantedMasterDBUID := curMasterDBUID masterOK := true curMasterDB := cd.DBs[curMasterDBUID] if curMasterDB == nil { return nil, fmt.Errorf("db for keeper %q not available. This shouldn't happen!", curMasterDBUID) } log.Debug("db dump", zap.String("db", spew.Sdump(curMasterDB))) if !curMasterDB.Status.Healthy { log.Info("master db is failed", zap.String("db", curMasterDB.UID), zap.String("keeper", curMasterDB.Spec.KeeperUID)) masterOK = false } // Check that the wanted master is in master state (i.e. check that promotion from standby to master happened) if s.dbConvergenceState(curMasterDB, cd.Cluster.DefSpec().ConvergenceTimeout.Duration) == ConvergenceFailed { log.Info("db not converged", zap.String("db", curMasterDB.UID), zap.String("keeper", curMasterDB.Spec.KeeperUID)) masterOK = false } if !masterOK { log.Info("trying to find a new master to replace failed master") bestNewMasters := s.findBestNewMasters(cd, curMasterDB) if len(bestNewMasters) == 0 { log.Error("no eligible masters") } else { // if synchronous replication is enabled, only choose new master in the synchronous replication standbys. var bestNewMasterDB *cluster.DB if *cd.Cluster.DefSpec().SynchronousReplication { onlyFake := true // if only fake synchronous standbys are defined we cannot choose any standby for _, dbUID := range curMasterDB.Spec.SynchronousStandbys { if dbUID != fakeStandbyName { onlyFake = false } } if !onlyFake { if !util.CompareStringSlice(curMasterDB.Status.SynchronousStandbys, curMasterDB.Spec.SynchronousStandbys) { log.Warn("cannot choose synchronous standby since the latest master reported synchronous standbys are different from the db spec ones", zap.Object("reported", spew.Sdump(curMasterDB.Status.SynchronousStandbys)), zap.Object("spec", spew.Sdump(curMasterDB.Spec.SynchronousStandbys))) } else { for _, nm := range bestNewMasters { if util.StringInSlice(curMasterDB.Spec.SynchronousStandbys, nm.UID) { bestNewMasterDB = nm break } } } } } else { bestNewMasterDB = bestNewMasters[0] } if bestNewMasterDB != nil { log.Info("electing db as the new master", zap.String("db", bestNewMasterDB.UID), zap.String("keeper", bestNewMasterDB.Spec.KeeperUID)) wantedMasterDBUID = bestNewMasterDB.UID } else { log.Error("no eligible masters") } } } // New master elected if curMasterDBUID != wantedMasterDBUID { // maintain the current role, remove followers oldMasterdb := newcd.DBs[curMasterDBUID] oldMasterdb.Spec.Followers = []string{} newcd.Cluster.Status.Master = wantedMasterDBUID newMasterDB := newcd.DBs[wantedMasterDBUID] newMasterDB.Spec.Role = common.RoleMaster newMasterDB.Spec.FollowConfig = nil // Tell proxy that there's currently no active master newcd.Proxy.Spec.MasterDBUID = "" newcd.Proxy.ChangeTime = time.Now() // Setup synchronous standbys to the one of the previous master (replacing ourself with the previous master) if *cd.Cluster.DefSpec().SynchronousReplication { for _, dbUID := range oldMasterdb.Spec.SynchronousStandbys { newMasterDB.Spec.SynchronousStandbys = []string{} if dbUID != newMasterDB.UID { newMasterDB.Spec.SynchronousStandbys = append(newMasterDB.Spec.SynchronousStandbys, dbUID) } else { newMasterDB.Spec.SynchronousStandbys = append(newMasterDB.Spec.SynchronousStandbys, oldMasterdb.UID) } } if len(newMasterDB.Spec.SynchronousStandbys) == 0 { newMasterDB.Spec.SynchronousStandbys = []string{fakeStandbyName} } } } // TODO(sgotti) Wait for the proxies being converged (closed connections to old master)? // Setup standbys, do this only when there's no master change if curMasterDBUID == wantedMasterDBUID { masterDB := newcd.DBs[curMasterDBUID] // Set standbys to follow master only if it's healthy and converged if masterDB.Status.Healthy && s.dbConvergenceState(masterDB, cd.Cluster.DefSpec().ConvergenceTimeout.Duration) == Converged { // Tell proxy that there's a new active master newcd.Proxy.Spec.MasterDBUID = wantedMasterDBUID newcd.Proxy.ChangeTime = time.Now() // Remove old masters toRemove := []*cluster.DB{} for _, db := range newcd.DBs { if db.UID == wantedMasterDBUID { continue } if s.dbType(newcd, db.UID) != dbTypeMaster { continue } log.Info("removing old master db", zap.String("db", db.UID)) toRemove = append(toRemove, db) } for _, db := range toRemove { delete(newcd.DBs, db.UID) } // Remove invalid dbs toRemove = []*cluster.DB{} for _, db := range newcd.DBs { if db.UID == wantedMasterDBUID { continue } if s.dbValidity(newcd, db.UID) != dbValidityInvalid { continue } log.Info("removing invalid db", zap.String("db", db.UID)) toRemove = append(toRemove, db) } for _, db := range toRemove { delete(newcd.DBs, db.UID) } goodStandbys, failedStandbys, convergingStandbys := s.validStandbysByStatus(newcd) goodStandbysCount := len(goodStandbys) failedStandbysCount := len(failedStandbys) convergingStandbysCount := len(convergingStandbys) log.Debug("standbys states", zap.Int("good", goodStandbysCount), zap.Int("failed", failedStandbysCount), zap.Int("converging", convergingStandbysCount)) // Setup synchronous standbys if *cd.Cluster.DefSpec().SynchronousReplication { // make a map of synchronous standbys starting from the current ones synchronousStandbys := map[string]struct{}{} for _, dbUID := range masterDB.Spec.SynchronousStandbys { // filter out fake standby if dbUID == fakeStandbyName { continue } synchronousStandbys[dbUID] = struct{}{} } // Check if the current synchronous standbys are healthy or remove them toRemove := map[string]struct{}{} for dbUID, _ := range synchronousStandbys { if _, ok := goodStandbys[dbUID]; !ok { log.Info("removing failed synchronous standby", zap.String("masterDB", masterDB.UID), zap.String("db", dbUID)) toRemove[dbUID] = struct{}{} } } for dbUID, _ := range toRemove { delete(synchronousStandbys, dbUID) } // Remove synchronous standbys in excess if uint16(len(synchronousStandbys)) > *cd.Cluster.DefSpec().MaxSynchronousStandbys { rc := len(synchronousStandbys) - int(*cd.Cluster.DefSpec().MaxSynchronousStandbys) removedCount := 0 toRemove = map[string]struct{}{} for dbUID, _ := range synchronousStandbys { if removedCount >= rc { break } log.Info("removing synchronous standby in excess", zap.String("masterDB", masterDB.UID), zap.String("db", dbUID)) toRemove[dbUID] = struct{}{} removedCount++ } for dbUID, _ := range toRemove { delete(synchronousStandbys, dbUID) } } // try to add missing standbys up to *cd.Cluster.DefSpec().MaxSynchronousStandbys bestStandbys := s.findBestStandbys(newcd, curMasterDB) ac := int(*cd.Cluster.DefSpec().MaxSynchronousStandbys) - len(synchronousStandbys) addedCount := 0 for _, bestStandby := range bestStandbys { if addedCount >= ac { break } if _, ok := synchronousStandbys[bestStandby.UID]; ok { continue } log.Info("adding synchronous standby", zap.String("masterDB", masterDB.UID), zap.String("synchronousStandbyDB", bestStandby.UID)) synchronousStandbys[bestStandby.UID] = struct{}{} addedCount++ } // If there're not enough real synchronous standbys add a fake synchronous standby because we have to be strict and make the master block transactions until MaxSynchronousStandbys real standbys are available if len(synchronousStandbys) < int(*cd.Cluster.DefSpec().MinSynchronousStandbys) { log.Info("using a fake synchronous standby since there are not enough real standbys available", zap.String("masterDB", masterDB.UID), zap.Int("required", int(*cd.Cluster.DefSpec().MinSynchronousStandbys))) synchronousStandbys[fakeStandbyName] = struct{}{} } masterDB.Spec.SynchronousStandbys = []string{} for dbUID, _ := range synchronousStandbys { masterDB.Spec.SynchronousStandbys = append(masterDB.Spec.SynchronousStandbys, dbUID) } // Sort synchronousStandbys so we can compare the slice regardless of its order sort.Sort(sort.StringSlice(masterDB.Spec.SynchronousStandbys)) } // NotFailed != Good since there can be some dbs that are converging // it's the total number of standbys - the failed standbys // or the sum of good + converging standbys notFailedStandbysCount := goodStandbysCount + convergingStandbysCount // Remove dbs in excess if we have a good number >= MaxStandbysPerSender if uint16(goodStandbysCount) >= *cd.Cluster.DefSpec().MaxStandbysPerSender { toRemove := []*cluster.DB{} // Remove all non good standbys for _, db := range newcd.DBs { if s.dbType(newcd, db.UID) != dbTypeStandby { continue } if _, ok := goodStandbys[db.UID]; !ok { log.Info("removing non good standby", zap.String("db", db.UID)) toRemove = append(toRemove, db) } } // Remove good standbys in excess nr := int(uint16(goodStandbysCount) - *cd.Cluster.DefSpec().MaxStandbysPerSender) i := 0 for _, db := range goodStandbys { if i >= nr { break } // Don't remove standbys marked as synchronous standbys if util.StringInSlice(masterDB.Spec.SynchronousStandbys, db.UID) { continue } log.Info("removing good standby in excess", zap.String("db", db.UID)) toRemove = append(toRemove, db) i++ } for _, db := range toRemove { delete(newcd.DBs, db.UID) } } else { // Add new dbs to substitute failed dbs. we // don't remove failed db until the number of // good db is >= MaxStandbysPerSender since they can come back // define, if there're available keepers, new dbs // nc can be negative if MaxStandbysPerSender has been lowered nc := int(*cd.Cluster.DefSpec().MaxStandbysPerSender - uint16(notFailedStandbysCount)) // Add missing DBs until MaxStandbysPerSender freeKeepers := s.freeKeepers(newcd) nf := len(freeKeepers) for i := 0; i < nc && i < nf; i++ { freeKeeper := freeKeepers[i] db := &cluster.DB{ UID: s.UIDFn(), Generation: cluster.InitialGeneration, ChangeTime: time.Now(), Spec: &cluster.DBSpec{ KeeperUID: freeKeeper.UID, InitMode: cluster.DBInitModeResync, Role: common.RoleStandby, Followers: []string{}, FollowConfig: &cluster.FollowConfig{Type: cluster.FollowTypeInternal, DBUID: wantedMasterDBUID}, }, } newcd.DBs[db.UID] = db log.Info("added new standby db", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID)) } } // Reconfigure all standbys as followers of the current master for _, db := range newcd.DBs { if s.dbType(newcd, db.UID) != dbTypeStandby { continue } db.Spec.Role = common.RoleStandby // Remove followers db.Spec.Followers = []string{} db.Spec.FollowConfig = &cluster.FollowConfig{Type: cluster.FollowTypeInternal, DBUID: wantedMasterDBUID} } // Set followers for master DB masterDB.Spec.Followers = []string{} for _, db := range newcd.DBs { if masterDB.UID == db.UID { continue } fc := db.Spec.FollowConfig if fc != nil { if fc.Type == cluster.FollowTypeInternal && fc.DBUID == wantedMasterDBUID { masterDB.Spec.Followers = append(masterDB.Spec.Followers, db.UID) } } } // Sort followers so the slice won't be considered changed due to different order of the same entries. sort.Strings(masterDB.Spec.Followers) } } default: return nil, fmt.Errorf("unknown cluster phase %s", cd.Cluster.Status.Phase) } // Copy the clusterSpec parameters to the dbSpec s.setDBSpecFromClusterSpec(newcd) // Update generation on DBs if they have changed for dbUID, db := range newcd.DBs { prevDB, ok := cd.DBs[dbUID] if !ok { continue } if !reflect.DeepEqual(db.Spec, prevDB.Spec) { log.Debug("db spec changed, updating generation", zap.String("prevDB", spew.Sdump(prevDB.Spec)), zap.String("db", spew.Sdump(db.Spec))) db.Generation++ db.ChangeTime = time.Now() } } return newcd, nil }
// TestErrorEx ... func TestErrorEx(t *testing.T) { Convey("Check Caller", t, func() { caller := helperGetCaller() So(caller, ShouldStartWith, `[errors_test.go:`) So(caller, ShouldEndWith, `werrors.helperGetCaller`) }) Convey("Check stack error", t, func() { originalSkip := callerSkip callerSkip = 1e3 defer func() { callerSkip = originalSkip }() So(helperGetCaller(), ShouldEqual, ErrFailedGetCaller) }) Convey("Check NewEx", t, func() { callerTest = true defer func() { callerTest = false }() msg := "message" lvl := zap.InfoLevel err := NewEx(lvl, msg, zap.String("field1", "field1 data"), zap.Int("field2", 5)) So(err.Error(), ShouldEqual, msg) werr, ok := err.(*ErrorEx) So(ok, ShouldBeTrue) So(werr.Level, ShouldEqual, lvl) So(werr.Error(), ShouldEqual, msg) So(werr.Fields, ShouldResemble, []zap.Field{zap.String("caller", "<fake>"), zap.String("field1", "field1 data"), zap.Int("field2", 5)}) }) Convey("Check New", t, func() { callerTest = true defer func() { callerTest = false }() msg := "message" err := New(msg) So(err.Error(), ShouldEqual, msg) werr, ok := err.(*ErrorEx) So(ok, ShouldBeTrue) So(werr.Level, ShouldEqual, zap.ErrorLevel) So(werr.Error(), ShouldEqual, msg) So(werr.Fields, ShouldResemble, []zap.Field{zap.String("caller", "<fake>")}) }) Convey("Check Newlevel", t, func() { callerTest = true defer func() { callerTest = false }() msg := "message" lvl := zap.WarnLevel err := NewLevel(lvl, msg) So(err.Error(), ShouldEqual, msg) werr, ok := err.(*ErrorEx) So(ok, ShouldBeTrue) So(werr.Level, ShouldEqual, lvl) So(werr.Error(), ShouldEqual, msg) So(werr.Fields, ShouldResemble, []zap.Field{zap.String("caller", "<fake>")}) }) Convey("Check NewDetails", t, func() { callerTest = true defer func() { callerTest = false }() msg := "message" details := "details message" err := NewDetails(msg, errors.New(details)) So(err.Error(), ShouldEqual, msg) werr, ok := err.(*ErrorEx) So(ok, ShouldBeTrue) So(werr.Level, ShouldEqual, zap.ErrorLevel) So(werr.Error(), ShouldEqual, msg) So(werr.Fields, ShouldResemble, []zap.Field{zap.String("caller", "<fake>"), zap.String("details", details)}) }) Convey("Check NewFields", t, func() { callerTest = true defer func() { callerTest = false }() msg := "message" err := NewFields(msg, zap.String("field1", "field1 data"), zap.Int("field2", 5)) So(err.Error(), ShouldEqual, msg) werr, ok := err.(*ErrorEx) So(ok, ShouldBeTrue) So(werr.Level, ShouldEqual, zap.ErrorLevel) So(werr.Error(), ShouldEqual, msg) So(werr.Fields, ShouldResemble, []zap.Field{zap.String("caller", "<fake>"), zap.String("field1", "field1 data"), zap.Int("field2", 5)}) }) Convey("Check AddFields for base error", t, func() { callerTest = true defer func() { callerTest = false }() msg := "message" err := errors.New(msg) err = AddFields(err, zap.String("field1", "field1 data"), zap.Int("field2", 5)) So(err.Error(), ShouldEqual, msg) werr, ok := err.(*ErrorEx) So(ok, ShouldBeTrue) So(werr.Level, ShouldEqual, zap.ErrorLevel) So(werr.Error(), ShouldEqual, msg) So(werr.Fields, ShouldResemble, []zap.Field{zap.String("caller", "<fake>"), zap.String("field1", "field1 data"), zap.Int("field2", 5)}) }) Convey("Check AddFields for ErrorEx", t, func() { callerTest = true defer func() { callerTest = false }() msg := "message" err := NewFields(msg, zap.String("field1", "field1 data")) err = AddFields(err, zap.Int("field2", 5)) So(err.Error(), ShouldEqual, msg) werr, ok := err.(*ErrorEx) So(ok, ShouldBeTrue) So(werr.Level, ShouldEqual, zap.ErrorLevel) So(werr.Error(), ShouldEqual, msg) So(werr.Fields, ShouldResemble, []zap.Field{zap.String("caller", "<fake>"), zap.String("field1", "field1 data"), zap.Int("field2", 5)}) }) }
// Consumes a partition func (cg *ConsumerGroup) partitionConsumer(topic string, partition int32, messages chan<- *sarama.ConsumerMessage, errors chan<- *sarama.ConsumerError, wg *sync.WaitGroup, stopper <-chan struct{}, logger zap.Logger) { defer wg.Done() select { case <-stopper: return default: } for maxRetries, tries := 3, 0; tries < maxRetries; tries++ { if err := cg.instance.ClaimPartition(topic, partition); err == nil { break } else if err == kazoo.ErrPartitionClaimedByOther && tries+1 < maxRetries { time.Sleep(1 * time.Second) } else { logger.Warn("KAFKA: Replica FAILED to claim partition", zap.Int("replicaId", cg.replicaId), zap.String("topic", topic), zap.Int64("partition", int64(partition)), zap.Error(err), ) return } } defer cg.instance.ReleasePartition(topic, partition) nextOffset, err := cg.offsetManager.InitializePartition(topic, partition) if err != nil { logger.Error("KAFKA: Replica FAILED to determine initial offset", zap.Int("replicaId", cg.replicaId), zap.String("topic", topic), zap.Int64("partition", int64(partition)), zap.Error(err), ) return } if nextOffset >= 0 { logger.Info("KAFKA: Replica partition consumer starting at offset", zap.Int("replicaId", cg.replicaId), zap.String("topic", topic), zap.Int64("partition", int64(partition)), zap.Int64("nextOffset", nextOffset), ) } else { nextOffset = cg.config.Offsets.Initial if nextOffset == sarama.OffsetOldest { logger.Info("KAFKA: Replica partition consumer starting at the oldest available offset", zap.Int("replicaId", cg.replicaId), zap.String("topic", topic), zap.Int64("partition", int64(partition)), ) } else if nextOffset == sarama.OffsetNewest { logger.Info("KAFKA: Replica partition consumer listening for new messages only", zap.Int("replicaId", cg.replicaId), zap.String("topic", topic), zap.Int64("partition", int64(partition)), ) } } consumer, err := cg.consumer.ConsumePartition(topic, partition, nextOffset) if err == sarama.ErrOffsetOutOfRange { logger.Warn("KAFKA: Replica partition consumer offset out of Range", zap.Int("replicaId", cg.replicaId), zap.String("topic", topic), zap.Int64("partition", int64(partition)), ) // if the offset is out of range, simplistically decide whether to use OffsetNewest or OffsetOldest // if the configuration specified offsetOldest, then switch to the oldest available offset, else // switch to the newest available offset. if cg.config.Offsets.Initial == sarama.OffsetOldest { nextOffset = sarama.OffsetOldest logger.Info("KAFKA: Replica partition consumer offset reset to oldest available offset", zap.Int("replicaId", cg.replicaId), zap.String("topic", topic), zap.Int64("partition", int64(partition)), ) } else { nextOffset = sarama.OffsetNewest logger.Info("KAFKA: Replica partition consumer offset reset to newest available offset", zap.Int("replicaId", cg.replicaId), zap.String("topic", topic), zap.Int64("partition", int64(partition)), ) } // retry the consumePartition with the adjusted offset consumer, err = cg.consumer.ConsumePartition(topic, partition, nextOffset) } if err != nil { logger.Fatal("KAFKA: Replica FAILED to start partition consumer", zap.Int("replicaId", cg.replicaId), zap.String("topic", topic), zap.Int64("partition", int64(partition)), zap.Error(err), ) return } defer consumer.Close() err = nil var lastOffset int64 = -1 // aka unknown partitionConsumerLoop: for { select { case <-stopper: break partitionConsumerLoop case err := <-consumer.Errors(): for { select { case errors <- err: continue partitionConsumerLoop case <-stopper: break partitionConsumerLoop } } case message := <-consumer.Messages(): for { select { case <-stopper: break partitionConsumerLoop case messages <- message: lastOffset = message.Offset continue partitionConsumerLoop } } } } logger.Info("KAFKA: Replica is stopping partition consumer at offset", zap.Int("replicaId", cg.replicaId), zap.String("topic", topic), zap.Int64("partition", int64(partition)), zap.Int64("lastOffset", lastOffset), ) if err = cg.offsetManager.FinalizePartition(topic, partition, lastOffset, cg.config.Offsets.ProcessingTimeout, cg.replicaId, logger); err != nil { logger.Fatal("KAFKA: Replica error trying to stop partition consumer", zap.Int("replicaId", cg.replicaId), zap.String("topic", topic), zap.Int64("partition", int64(partition)), zap.Error(err), ) } logger.Info("KAFKA: Replica successfully stoped partition", zap.Int("replicaId", cg.replicaId), zap.String("topic", topic), zap.Int64("partition", int64(partition)), ) }
func (cg *ConsumerGroup) topicConsumer(topic string, messages chan<- *sarama.ConsumerMessage, errors chan<- *sarama.ConsumerError, stopper <-chan struct{}, logger zap.Logger) { defer cg.wg.Done() select { case <-stopper: return default: } logger.Info("KAFKA: Replica started consumer for topic", zap.Int("replicaId", cg.replicaId), zap.String("topic", topic), ) // Fetch a list of partition IDs partitions, err := cg.kazoo.Topic(topic).Partitions() if err != nil { logger.Fatal("KAFKA: Replica FAILED to get list of partitions for topic", zap.Int("replicaId", cg.replicaId), zap.String("topic", topic), zap.Error(err), ) cg.errors <- &sarama.ConsumerError{ Topic: topic, Partition: -1, Err: err, } return } partitionLeaders, err := retrievePartitionLeaders(partitions) if err != nil { logger.Fatal("KAFKA: Replica FAILED to get leaders of partitions for topic", zap.Int("replicaId", cg.replicaId), zap.String("topic", topic), zap.Error(err), ) cg.errors <- &sarama.ConsumerError{ Topic: topic, Partition: -1, Err: err, } return } dividedPartitions := dividePartitionsBetweenConsumers(cg.consumers, partitionLeaders) myPartitions := dividedPartitions[cg.instance.ID] logger.Info("KAFKA: Replica is claiming partitions", zap.Int("replicaId", cg.replicaId), zap.String("topic", topic), zap.Int("claimedPartitions", len(myPartitions)), zap.Int("numPartitionLeaders", len(partitionLeaders)), ) // Consume all the assigned partitions var wg sync.WaitGroup myPartitionsStr := "" for _, pid := range myPartitions { myPartitionsStr += fmt.Sprintf("%d ", pid.ID) wg.Add(1) go cg.partitionConsumer(topic, pid.ID, messages, errors, &wg, stopper, logger) } logger.Info("KAFKA: Retrieved replica's partitions", zap.Int("replicaId", cg.replicaId), zap.String("myPartitions", myPartitionsStr), ) wg.Wait() logger.Info("KAFKA: Replica stopped consumer of a topic", zap.Int("replicaId", cg.replicaId), zap.String("topic", topic), ) }
func (conn *Connection) sender(queue <-chan PushNotification, sent chan PushNotification, logger zap.Logger) { i := 0 stopping := false defer conn.conn.Close() defer conn.connAux.Close() logger.Info("APNS: Starting sender for connection", zap.Int("connectionId", conn.id), ) for { select { case pn, ok := <-conn.queue: if !ok { logger.Info("APNS: Connection not okay; queue closed", zap.Int("connectionId", conn.id), ) //That means the Connection is stopped //close sent? return } //This means we saw a response; connection is over. select { case <-conn.shouldReconnect: conn.conn.Close() conn.conn = nil conn.connAux.Close() conn.connAux = nil conn.spinUntilReconnect(logger) default: } //Then send the push notification pn.Priority = 10 payload, err := pn.ToBytes() if err != nil { logger.Info("APNS: Connection Error", zap.Int("connectionId", conn.id), zap.Error(err), ) //Should report this on the bad notifications channel probably } else { if conn.conn == nil { conn.spinUntilReconnect(logger) } _, err = conn.conn.Write(payload) if err != nil { logger.Info("APNS: Error writing payload", zap.Error(err), ) go func() { conn.shouldReconnect <- true }() //Disconnect? } else { i++ sent <- pn if stopping && len(queue) == 0 { conn.senderFinished <- true } } } case <-conn.stopping: logger.Info("APNS: Connection sender - Got a stop message", zap.Int("connectionId", conn.id), ) stopping = true if len(queue) == 0 { logger.Info("APNS: Connection sender - Stopping because ran out of things to send. Let's see if limbo is empty", zap.Int("connectionId", conn.id), ) conn.senderFinished <- true } case <-conn.ackFinished: logger.Info("APNS: Connection sender - limbo is empty", zap.Int("connectionId", conn.id), ) if len(queue) == 0 { logger.Info("APNS: Connection sender - limbo is empty and so am I", zap.Int("connectionId", conn.id), ) close(sent) return } } } }
func (conn *Connection) limbo(sent <-chan PushNotification, responses chan Response, errors chan BadPushNotification, queue chan PushNotification, logger zap.Logger) { stopping := false limbo := make([]timedPushNotification, 0, SentBufferSize) ticker := time.NewTicker(1 * time.Second) for { select { case pn, ok := <-sent: limbo = append(limbo, pn.timed()) stopping = false if !ok { logger.Info("APNS: Connection limbo - sent is closed, so sender is done. So am I, then", zap.Int("connectionId", conn.id), ) close(errors) conn.stopped <- true return } case <-conn.senderFinished: //senderFinished means the sender thinks it's done. //However, sender might not be - limbo could resend some, if there are any left here. //So we just take note of this until limbo is empty too. stopping = true case resp, ok := <-responses: if !ok { //If the responses channel is closed, //that means we're shutting down the connection. } for i, pn := range limbo { if pn.Identifier == resp.Identifier { if resp.Status != 10 { //It was an error, we should report this on the error channel bad := BadPushNotification{PushNotification: pn.PushNotification, Status: resp.Status} errors <- bad } if len(limbo) > i { toRequeue := len(limbo) - (i + 1) if toRequeue > 0 { conn.requeue(limbo[i+1:]) //We resent some notifications: that means we should wait for sender to tell us it's done, again. stopping = false } } } } limbo = make([]timedPushNotification, 0, SentBufferSize) case <-ticker.C: flushed := false for i := range limbo { if limbo[i].After(time.Now().Add(-TimeoutSeconds * time.Second)) { if i > 0 { newLimbo := make([]timedPushNotification, len(limbo[i:]), SentBufferSize) copy(newLimbo, limbo[i:]) limbo = newLimbo flushed = true break } } } if !flushed { limbo = make([]timedPushNotification, 0, SentBufferSize) } if stopping && len(limbo) == 0 { //sender() is finished and so is limbo - so the connection is done. logger.Info("APNS: Connection limbo - I've flushed all my notifications. Tell sender I'm done", zap.Int("connectionId", conn.id), ) conn.ackFinished <- true } } } }
func (cg *ConsumerGroup) Load(logger zap.Logger) error { var kz *kazoo.Kazoo var err error if kz, err = kazoo.NewKazoo(cg.zookeeper, cg.config.Zookeeper); err != nil { return err } logger.Info("KAFKA: Getting broker list for replica", zap.Int("replicaId", cg.replicaId), ) brokers, err := kz.BrokerList() if err != nil { kz.Close() return err } group := kz.Consumergroup(cg.config.ClientID) instance := group.NewInstance() var consumer sarama.Consumer if consumer, err = sarama.NewConsumer(brokers, cg.config.Config); err != nil { kz.Close() return err } cg.kazoo = kz cg.group = group cg.instance = instance cg.messages = make(chan *sarama.ConsumerMessage, cg.config.ChannelBufferSize) cg.consumer = consumer cg.singleShutdown = sync.Once{} cg.errors = make(chan *sarama.ConsumerError, cg.config.ChannelBufferSize) cg.stopper = make(chan struct{}) if exists, err := cg.group.Exists(); err != nil { logger.Fatal("KAFKA: Replica failed to check existence of consumergroup", zap.Int("replicaId", cg.replicaId), zap.Error(err), ) consumer.Close() kz.Close() return err } else if !exists { logger.Info("KAFKA: Consumergroup does not exist, creating it", zap.Int("replicaId", cg.replicaId), zap.String("consumerGroupName", cg.group.Name), ) if err := cg.group.Create(); err != nil { logger.Fatal("KAFKA: Failed to create consumergroup in Zookeeper", zap.Int("replicaId", cg.replicaId), zap.Error(err), ) consumer.Close() kz.Close() return err } } if err := cg.instance.Register(cg.topics); err != nil { logger.Fatal("KAFKA: Failed to create consumer instance", zap.Int("replicaId", cg.replicaId), zap.Error(err), ) return err } else { logger.Info("KAFKA: Consumer instance registered", zap.Int("replicaId", cg.replicaId), ) } offsetConfig := OffsetManagerConfig{ CommitInterval: cg.config.Offsets.CommitInterval, EnableAutoCommit: cg.config.EnableOffsetAutoCommit, } cg.offsetManager = NewZookeeperOffsetManager(cg, &offsetConfig, logger) go cg.topicListConsumer(cg.topics, logger) return nil }