Example #1
0
func (cg *ConsumerGroup) topicListConsumer(topics []string, logger zap.Logger) {
	for {
		select {
		case <-cg.stopper:
			return
		default:
		}

		consumers, consumerChanges, err := cg.group.WatchInstances()
		if err != nil {
			logger.Fatal("KAFKA: FAILED to get list of registered consumer instances for replica",
				zap.Int("replicaId", cg.replicaId),
				zap.Error(err),
			)
			return
		}

		cg.consumers = consumers
		logger.Info("KAFKA: Got currently registered consumers for replica",
			zap.Int("replicaId", cg.replicaId),
			zap.Int("numRegisteredConsumers", len(cg.consumers)),
		)

		stopper := make(chan struct{})

		for _, topic := range topics {
			cg.wg.Add(1)
			go cg.topicConsumer(topic, cg.messages, cg.errors, stopper, logger)
		}

		select {
		case <-cg.stopper:
			close(stopper)
			return

		case event := <-consumerChanges:
			if event.Err == zk.ErrSessionExpired || event.Err == zk.ErrConnectionClosed {
				logger.Info("KAFKA: Session was expired, reloading consumer for replica",
					zap.Int("replicaId", cg.replicaId),
				)
				go cg.reload(logger)
				<-cg.stopper
				close(stopper)
				return
			} else {
				logger.Info("KAFKA: Triggering rebalance due to consumer list change in replica",
					zap.Int("replicaId", cg.replicaId),
				)
				close(stopper)
				cg.wg.Wait()
			}
		}
	}
}
Example #2
0
//Start initiates a connection to APNS and asnchronously sends notifications which have been queued.
func (conn *Connection) Start(logger zap.Logger) error {
	//Connect to APNS. The reason this is here as well as in sender is that this probably catches any unavoidable errors in a synchronous fashion, while in sender it can reconnect after temporary errors (which should work most of the time.)
	err := conn.connect(logger)
	if err != nil {
		logger.Fatal("APNS: Failed to connect",
			zap.Int("connectionId", conn.id),
			zap.Error(err),
		)
		return err
	}
	//Start sender goroutine
	sent := make(chan PushNotification, 10000)
	go conn.sender(conn.queue, sent, logger)
	//Start limbo goroutine
	go conn.limbo(sent, conn.responses, conn.errors, conn.queue, logger)
	return nil
}
Example #3
0
func (conn *Connection) connect(logger zap.Logger) error {
	if conn.conn != nil {
		conn.conn.Close()
	}
	if conn.connAux != nil {
		conn.connAux.Close()
	}

	var cert tls.Certificate
	var err error
	if len(conn.CertificateBase64) == 0 && len(conn.KeyBase64) == 0 {
		// The user did not specify raw block contents, so check the filesystem.
		cert, err = tls.LoadX509KeyPair(conn.CertificateFile, conn.KeyFile)
	} else {
		// The user provided the raw block contents, so use that.
		cert, err = tls.X509KeyPair([]byte(conn.CertificateBase64), []byte(conn.KeyBase64))
	}

	if err != nil {
		logger.Fatal("APNS: Failed to obtain certificate",
			zap.Error(err),
		)
		return err
	}

	conf := &tls.Config{
		Certificates: []tls.Certificate{cert},
		ServerName:   strings.Split(conn.Gateway, ":")[0],
	}

	connAux, err := net.Dial("tcp", conn.Gateway)
	if err != nil {
		logger.Fatal("APNS: Failed while dialing gateway",
			zap.String("gateway", conn.Gateway),
			zap.Error(err),
		)
		return err
	}
	tlsConn := tls.Client(connAux, conf)
	err = tlsConn.Handshake()
	if err != nil {
		logger.Fatal("APNS: Failed while handshaking",
			zap.Error(err),
		)
		_ = tlsConn.Close()
		return err
	}
	conn.conn = tlsConn
	conn.connAux = connAux
	//Start reader goroutine
	go conn.reader(conn.responses, logger)
	return nil
}
Example #4
0
// Consumes a partition
func (cg *ConsumerGroup) partitionConsumer(topic string, partition int32, messages chan<- *sarama.ConsumerMessage, errors chan<- *sarama.ConsumerError, wg *sync.WaitGroup, stopper <-chan struct{}, logger zap.Logger) {
	defer wg.Done()

	select {
	case <-stopper:
		return
	default:
	}

	for maxRetries, tries := 3, 0; tries < maxRetries; tries++ {
		if err := cg.instance.ClaimPartition(topic, partition); err == nil {
			break
		} else if err == kazoo.ErrPartitionClaimedByOther && tries+1 < maxRetries {
			time.Sleep(1 * time.Second)
		} else {
			logger.Warn("KAFKA: Replica FAILED to claim partition",
				zap.Int("replicaId", cg.replicaId),
				zap.String("topic", topic),
				zap.Int64("partition", int64(partition)),
				zap.Error(err),
			)
			return
		}
	}
	defer cg.instance.ReleasePartition(topic, partition)

	nextOffset, err := cg.offsetManager.InitializePartition(topic, partition)

	if err != nil {
		logger.Error("KAFKA: Replica FAILED to determine initial offset",
			zap.Int("replicaId", cg.replicaId),
			zap.String("topic", topic),
			zap.Int64("partition", int64(partition)),
			zap.Error(err),
		)
		return
	}

	if nextOffset >= 0 {
		logger.Info("KAFKA: Replica partition consumer starting at offset",
			zap.Int("replicaId", cg.replicaId),
			zap.String("topic", topic),
			zap.Int64("partition", int64(partition)),
			zap.Int64("nextOffset", nextOffset),
		)
	} else {
		nextOffset = cg.config.Offsets.Initial
		if nextOffset == sarama.OffsetOldest {
			logger.Info("KAFKA: Replica partition consumer starting at the oldest available offset",
				zap.Int("replicaId", cg.replicaId),
				zap.String("topic", topic),
				zap.Int64("partition", int64(partition)),
			)
		} else if nextOffset == sarama.OffsetNewest {
			logger.Info("KAFKA: Replica partition consumer listening for new messages only",
				zap.Int("replicaId", cg.replicaId),
				zap.String("topic", topic),
				zap.Int64("partition", int64(partition)),
			)
		}
	}

	consumer, err := cg.consumer.ConsumePartition(topic, partition, nextOffset)
	if err == sarama.ErrOffsetOutOfRange {
		logger.Warn("KAFKA: Replica partition consumer offset out of Range",
			zap.Int("replicaId", cg.replicaId),
			zap.String("topic", topic),
			zap.Int64("partition", int64(partition)),
		)
		// if the offset is out of range, simplistically decide whether to use OffsetNewest or OffsetOldest
		// if the configuration specified offsetOldest, then switch to the oldest available offset, else
		// switch to the newest available offset.
		if cg.config.Offsets.Initial == sarama.OffsetOldest {
			nextOffset = sarama.OffsetOldest
			logger.Info("KAFKA: Replica partition consumer offset reset to oldest available offset",
				zap.Int("replicaId", cg.replicaId),
				zap.String("topic", topic),
				zap.Int64("partition", int64(partition)),
			)
		} else {
			nextOffset = sarama.OffsetNewest
			logger.Info("KAFKA: Replica partition consumer offset reset to newest available offset",
				zap.Int("replicaId", cg.replicaId),
				zap.String("topic", topic),
				zap.Int64("partition", int64(partition)),
			)
		}
		// retry the consumePartition with the adjusted offset
		consumer, err = cg.consumer.ConsumePartition(topic, partition, nextOffset)
	}
	if err != nil {
		logger.Fatal("KAFKA: Replica FAILED to start partition consumer",
			zap.Int("replicaId", cg.replicaId),
			zap.String("topic", topic),
			zap.Int64("partition", int64(partition)),
			zap.Error(err),
		)
		return
	}
	defer consumer.Close()

	err = nil
	var lastOffset int64 = -1 // aka unknown
partitionConsumerLoop:
	for {
		select {
		case <-stopper:
			break partitionConsumerLoop

		case err := <-consumer.Errors():
			for {
				select {
				case errors <- err:
					continue partitionConsumerLoop

				case <-stopper:
					break partitionConsumerLoop
				}
			}

		case message := <-consumer.Messages():
			for {
				select {
				case <-stopper:
					break partitionConsumerLoop

				case messages <- message:
					lastOffset = message.Offset
					continue partitionConsumerLoop
				}
			}
		}
	}

	logger.Info("KAFKA: Replica is stopping partition consumer at offset",
		zap.Int("replicaId", cg.replicaId),
		zap.String("topic", topic),
		zap.Int64("partition", int64(partition)),
		zap.Int64("lastOffset", lastOffset),
	)
	if err = cg.offsetManager.FinalizePartition(topic, partition, lastOffset, cg.config.Offsets.ProcessingTimeout, cg.replicaId, logger); err != nil {
		logger.Fatal("KAFKA: Replica error trying to stop partition consumer",
			zap.Int("replicaId", cg.replicaId),
			zap.String("topic", topic),
			zap.Int64("partition", int64(partition)),
			zap.Error(err),
		)
	}
	logger.Info("KAFKA: Replica successfully stoped partition",
		zap.Int("replicaId", cg.replicaId),
		zap.String("topic", topic),
		zap.Int64("partition", int64(partition)),
	)
}
Example #5
0
func (cg *ConsumerGroup) topicConsumer(topic string, messages chan<- *sarama.ConsumerMessage, errors chan<- *sarama.ConsumerError, stopper <-chan struct{}, logger zap.Logger) {
	defer cg.wg.Done()

	select {
	case <-stopper:
		return
	default:
	}

	logger.Info("KAFKA: Replica started consumer for topic",
		zap.Int("replicaId", cg.replicaId),
		zap.String("topic", topic),
	)

	// Fetch a list of partition IDs
	partitions, err := cg.kazoo.Topic(topic).Partitions()
	if err != nil {
		logger.Fatal("KAFKA: Replica FAILED to get list of partitions for topic",
			zap.Int("replicaId", cg.replicaId),
			zap.String("topic", topic),
			zap.Error(err),
		)
		cg.errors <- &sarama.ConsumerError{
			Topic:     topic,
			Partition: -1,
			Err:       err,
		}
		return
	}

	partitionLeaders, err := retrievePartitionLeaders(partitions)
	if err != nil {
		logger.Fatal("KAFKA: Replica FAILED to get leaders of partitions for topic",
			zap.Int("replicaId", cg.replicaId),
			zap.String("topic", topic),
			zap.Error(err),
		)
		cg.errors <- &sarama.ConsumerError{
			Topic:     topic,
			Partition: -1,
			Err:       err,
		}
		return
	}

	dividedPartitions := dividePartitionsBetweenConsumers(cg.consumers, partitionLeaders)
	myPartitions := dividedPartitions[cg.instance.ID]
	logger.Info("KAFKA: Replica is claiming partitions",
		zap.Int("replicaId", cg.replicaId),
		zap.String("topic", topic),
		zap.Int("claimedPartitions", len(myPartitions)),
		zap.Int("numPartitionLeaders", len(partitionLeaders)),
	)
	// Consume all the assigned partitions
	var wg sync.WaitGroup
	myPartitionsStr := ""
	for _, pid := range myPartitions {
		myPartitionsStr += fmt.Sprintf("%d ", pid.ID)
		wg.Add(1)
		go cg.partitionConsumer(topic, pid.ID, messages, errors, &wg, stopper, logger)
	}
	logger.Info("KAFKA: Retrieved replica's partitions",
		zap.Int("replicaId", cg.replicaId),
		zap.String("myPartitions", myPartitionsStr),
	)
	wg.Wait()
	logger.Info("KAFKA: Replica stopped consumer of a topic",
		zap.Int("replicaId", cg.replicaId),
		zap.String("topic", topic),
	)
}
Example #6
0
func (cg *ConsumerGroup) Load(logger zap.Logger) error {
	var kz *kazoo.Kazoo
	var err error
	if kz, err = kazoo.NewKazoo(cg.zookeeper, cg.config.Zookeeper); err != nil {
		return err
	}

	logger.Info("KAFKA: Getting broker list for replica",
		zap.Int("replicaId", cg.replicaId),
	)
	brokers, err := kz.BrokerList()
	if err != nil {
		kz.Close()
		return err
	}

	group := kz.Consumergroup(cg.config.ClientID)
	instance := group.NewInstance()

	var consumer sarama.Consumer
	if consumer, err = sarama.NewConsumer(brokers, cg.config.Config); err != nil {
		kz.Close()
		return err
	}

	cg.kazoo = kz
	cg.group = group
	cg.instance = instance
	cg.messages = make(chan *sarama.ConsumerMessage, cg.config.ChannelBufferSize)
	cg.consumer = consumer
	cg.singleShutdown = sync.Once{}
	cg.errors = make(chan *sarama.ConsumerError, cg.config.ChannelBufferSize)
	cg.stopper = make(chan struct{})

	if exists, err := cg.group.Exists(); err != nil {
		logger.Fatal("KAFKA: Replica failed to check existence of consumergroup",
			zap.Int("replicaId", cg.replicaId),
			zap.Error(err),
		)
		consumer.Close()
		kz.Close()
		return err
	} else if !exists {
		logger.Info("KAFKA: Consumergroup does not exist, creating it",
			zap.Int("replicaId", cg.replicaId),
			zap.String("consumerGroupName", cg.group.Name),
		)
		if err := cg.group.Create(); err != nil {
			logger.Fatal("KAFKA: Failed to create consumergroup in Zookeeper",
				zap.Int("replicaId", cg.replicaId),
				zap.Error(err),
			)
			consumer.Close()
			kz.Close()
			return err
		}
	}

	if err := cg.instance.Register(cg.topics); err != nil {
		logger.Fatal("KAFKA: Failed to create consumer instance",
			zap.Int("replicaId", cg.replicaId),
			zap.Error(err),
		)
		return err
	} else {
		logger.Info("KAFKA: Consumer instance registered",
			zap.Int("replicaId", cg.replicaId),
		)
	}

	offsetConfig := OffsetManagerConfig{
		CommitInterval:   cg.config.Offsets.CommitInterval,
		EnableAutoCommit: cg.config.EnableOffsetAutoCommit,
	}
	cg.offsetManager = NewZookeeperOffsetManager(cg, &offsetConfig, logger)
	go cg.topicListConsumer(cg.topics, logger)

	return nil
}