Beispiel #1
0
func ExampleBroker_Consumer() {
	broker := NewBroker()
	msg := &proto.Message{Value: []byte("first")}

	// mock server actions, pushing data through consumer
	go func() {
		consumer, _ := broker.Consumer(kafka.NewConsumerConf("my-topic", 0))
		c := consumer.(*Consumer)
		// it is possible to send messages through consumer...
		c.Messages <- msg

		// every consumer fetch call is blocking untill there is either message
		// or error ready to return, this way we can test slow consumers
		time.Sleep(time.Millisecond * 20)

		// ...as well as push errors to mock failure
		c.Errors <- errors.New("expected error is expected")
	}()

	// test broker never fails creating consumer
	consumer, _ := broker.Consumer(kafka.NewConsumerConf("my-topic", 0))

	m, err := consumer.Consume()
	if err == nil {
		fmt.Printf("Value: %q\n", m.Value)
	}
	if _, err = consumer.Consume(); err != nil {
		fmt.Printf("Error: %s\n", err)
	}

	// output:
	//
	// Value: "first"
	// Error: expected error is expected
}
Beispiel #2
0
// setup is the initial worker that initializes the claim structure. Until this is done,
// our internal state is inconsistent.
func (c *claim) setup() {
	c.lock.Lock()
	defer c.lock.Unlock()

	// Of course, if the current offset is greater than the earliest, we must reset
	// to the earliest known
	if c.offsets.Current < c.offsets.Earliest {
		log.Warningf("[%s:%d] consumer fast-forwarding from %d to %d",
			c.topic, c.partID, c.offsets.Current, c.offsets.Earliest)
		c.offsets.Current = c.offsets.Earliest
	}

	// Since it's claimed, we now want to heartbeat with the last seen offset
	err := c.marshal.Heartbeat(c.topic, c.partID, c.offsets.Current)
	if err != nil {
		log.Errorf("[%s:%d] consumer failed to heartbeat: %s", c.topic, c.partID, err)
		go c.Release()
		return
	}
	c.lastHeartbeat = time.Now().Unix()

	// Set up Kafka consumer
	consumerConf := kafka.NewConsumerConf(c.topic, int32(c.partID))
	consumerConf.StartOffset = c.offsets.Current
	consumerConf.MaxFetchSize = c.marshal.cluster.options.MaxMessageSize
	consumerConf.RequestTimeout = c.marshal.cluster.options.ConsumeRequestTimeout
	// Do not retry. If we get back no data, we'll do our own retries.
	consumerConf.RetryLimit = 0

	kafkaConsumer, err := c.marshal.cluster.broker.Consumer(consumerConf)
	if err != nil {
		log.Errorf("[%s:%d] consumer failed to create Kafka Consumer: %s",
			c.topic, c.partID, err)
		go c.Release()
		return
	}
	c.kafkaConsumer = kafkaConsumer

	// Start our maintenance goroutines that keep this system healthy
	go c.messagePump()

	// Totally done, let the world know and move on
	log.Infof("[%s:%d] consumer %s claimed at offset %d (is %d behind)",
		c.topic, c.partID, c.marshal.clientID, c.offsets.Current, c.offsets.Latest-c.offsets.Current)
}
Beispiel #3
0
func TestProducerBrokenConnection(t *testing.T) {
	IntegrationTest(t)

	topics := []string{"Topic3", "Topic4"}

	cluster := NewKafkaCluster("kafka-docker/", 4)
	if err := cluster.Start(); err != nil {
		t.Fatalf("cannot start kafka cluster: %s", err)
	}
	defer func() {
		_ = cluster.Stop()
	}()

	bconf := kafka.NewBrokerConf("producer-broken-connection")
	addrs, err := cluster.KafkaAddrs()
	if err != nil {
		t.Fatalf("cannot get kafka address: %s", err)
	}
	broker, err := kafka.Dial(addrs, bconf)
	if err != nil {
		t.Fatalf("cannot connect to cluster (%q): %s", addrs, err)
	}
	defer broker.Close()

	// produce big message to enforce TCP buffer flush
	m := proto.Message{
		Value: []byte(strings.Repeat("producer broken connection message ", 1000)),
	}
	pconf := kafka.NewProducerConf()
	producer := broker.Producer(pconf)

	// send message to all topics to make sure it's working
	for _, name := range topics {
		if _, err := producer.Produce(name, 0, &m); err != nil {
			t.Fatalf("cannot produce to %q: %s", name, err)
		}
	}

	// close two kafka clusters and publish to all 3 topics - 2 of them should
	// retry sending, because lack of leader makes the request fail
	//
	// request should not succeed until nodes are back - bring them back after
	// small delay and make sure producing was successful
	containers, err := cluster.Containers()
	if err != nil {
		t.Fatalf("cannot get containers: %s", err)
	}
	var stopped []*Container
	for _, container := range containers {
		if container.RunningKafka() {
			if err := container.Kill(); err != nil {
				t.Fatalf("cannot kill %q kafka container: %s", container.ID, err)
			}
			stopped = append(stopped, container)
		}
		if len(stopped) == 2 {
			break
		}
	}

	// bring stopped containers back
	errc := make(chan error)
	go func() {
		time.Sleep(500 * time.Millisecond)
		for _, container := range stopped {
			if err := container.Start(); err != nil {
				errc <- err
			}
		}
		close(errc)
	}()

	// send message to all topics to make sure it's working
	for _, name := range topics {
		if _, err := producer.Produce(name, 0, &m); err != nil {
			t.Errorf("cannot produce to %q: %s", name, err)
		}
	}

	for err := range errc {
		t.Errorf("cannot start container: %s", err)
	}

	// make sure data was persisted
	for _, name := range topics {
		consumer, err := broker.Consumer(kafka.NewConsumerConf(name, 0))
		if err != nil {
			t.Errorf("cannot create consumer for %q: %s", name, err)
			continue
		}
		for i := 0; i < 2; i++ {
			if _, err := consumer.Consume(); err != nil {
				t.Errorf("cannot consume %d message from %q: %s", i, name, err)
			}
		}
	}
}
Beispiel #4
0
// consumeFromKafka will start consuming messages from Kafka and writing them to the given
// channel forever. It is important that this method closes the "out" channel when it's done,
// as that instructs the downstream goroutine to exit.
func (c *KafkaCluster) consumeFromKafka(partID int, out chan message, startOldest bool) {
	var err error
	var alive bool
	var offsetFirst, offsetNext int64

	// Exit logic -- make sure downstream knows we exited.
	defer func() {
		log.Debugf("[%s] rationalize[%d]: terminating.", c.name, partID)
		close(out)
	}()

	// Try to connect to Kafka. This might sleep a bit and retry since the broker could
	// be down a bit.
	retry := &backoff.Backoff{Min: 500 * time.Millisecond, Jitter: true}
	for ; true; time.Sleep(retry.Duration()) {
		// Figure out how many messages are in this topic. This can fail if the broker handling
		// this partition is down, so we will loop.
		offsetFirst, err = c.broker.OffsetEarliest(MarshalTopic, int32(partID))
		if err != nil {
			log.Errorf("[%s] rationalize[%d]: failed to get offset: %s", c.name, partID, err)
			continue
		}
		offsetNext, err = c.broker.OffsetLatest(MarshalTopic, int32(partID))
		if err != nil {
			log.Errorf("[%s] rationalize[%d]: failed to get offset: %s", c.name, partID, err)
			continue
		}
		log.Debugf("[%s] rationalize[%d]: offsets %d to %d",
			c.name, partID, offsetFirst, offsetNext)

		// TODO: Is there a case where the latest offset is X>0 but there is no data in
		// the partition? does the offset reset to 0?
		if offsetNext == 0 || offsetFirst == offsetNext {
			alive = true
			c.rationalizers.Done()
		}
		break
	}
	retry.Reset()

	// Assume we're starting at the oldest offset for consumption
	consumerConf := kafka.NewConsumerConf(MarshalTopic, int32(partID))
	consumerConf.RetryErrLimit = 1 // Do not retry
	consumerConf.StartOffset = kafka.StartOffsetOldest
	consumerConf.RequestTimeout = c.options.MarshalRequestTimeout
	consumerConf.RetryWait = c.options.MarshalRequestRetryWait

	// Get the offsets of this partition, we're going to arbitrarily pick something that
	// is ~100,000 from the end if there's more than that. This is only if startOldest is
	// false, i.e., we didn't run into a "message too new" situation.
	checkMessageTs := false
	if !startOldest && offsetNext-offsetFirst > 100000 {
		checkMessageTs = true
		consumerConf.StartOffset = offsetNext - 100000
		log.Infof("[%s] rationalize[%d]: fast forwarding to offset %d.",
			c.name, partID, consumerConf.StartOffset)
	}

	consumer, err := c.broker.Consumer(consumerConf)
	if err != nil {
		// Unfortunately this is a termination error, as without being able to consume this
		// partition we can't effectively rationalize.
		log.Errorf("[%s] rationalize[%d]: Failed to create consumer: %s", c.name, partID, err)
		c.Terminate()
		return
	}

	// Consume messages forever, or until told to quit.
	for !c.Terminated() {
		msgb, err := consumer.Consume()
		if err != nil {
			// The internal consumer will do a number of retries. If we get an error here,
			// we're probably in the middle of a partition handoff. We should pause so we
			// don't hammer the cluster, but otherwise continue.
			log.Warningf("[%s] rationalize[%d]: failed to consume: %s", c.name, partID, err)
			time.Sleep(retry.Duration())
			continue
		}
		retry.Reset()

		msg, err := decode(msgb.Value)
		if err != nil {
			// Invalid message in the streac. This should never happen, but if it does, just
			// continue on.
			// TODO: We should probably think about this. If we end up in a situation where
			// one version of this software has a bug that writes invalid messages, it could
			// be doing things we don't anticipate. Of course, crashing all consumers
			// reading that partition is also bad.
			log.Errorf("[%s] rationalize[%d]: %s", c.name, partID, err)

			// In the case where the first message is an invalid message, we need to
			// to notify that we're alive now
			if !alive {
				alive = true
				c.rationalizers.Done()
			}
			continue
		}

		// If we are on our first message, and we started at a non-zero offset, we need
		// to check to make sure that the timestamp is older than a given threshold. If it's
		// too new, that indicates our 100000 try didn't work, so let's go from the start.
		// TODO: This could be a binary search or something.
		if checkMessageTs {
			if int64(msg.Timestamp()) > time.Now().Unix()-HeartbeatInterval*2 {
				log.Warningf("[%s] rationalize[%d]: rewinding, fast-forwarded message was too new",
					c.name, partID)
				go c.consumeFromKafka(partID, out, true)
				return // terminate self.
			}
			checkMessageTs = false
		}

		log.Debugf("[%s] rationalize[%d]: @%d: [%s]", c.name, partID, msgb.Offset, msg.Encode())
		out <- msg

		// This is a one-time thing that fires the first time the rationalizer comes up
		// and makes sure we actually process all of the messages.
		if !alive && msgb.Offset >= offsetNext-1 {
			for len(out) > 0 {
				time.Sleep(100 * time.Millisecond)
			}
			log.Infof("[%s] rationalize[%d]: reached offset %d, now alive",
				c.name, partID, msgb.Offset)
			alive = true
			c.rationalizers.Done()
		}
	}
}