Example #1
0
func TestProducerBrokenConnection(t *testing.T) {
	IntegrationTest(t)

	topics := []string{"Topic3", "Topic4"}

	cluster := NewKafkaCluster("kafka-docker/", 4)
	if err := cluster.Start(); err != nil {
		t.Fatalf("cannot start kafka cluster: %s", err)
	}
	defer func() {
		_ = cluster.Stop()
	}()

	bconf := kafka.NewBrokerConf("producer-broken-connection")
	addrs, err := cluster.KafkaAddrs()
	if err != nil {
		t.Fatalf("cannot get kafka address: %s", err)
	}
	broker, err := kafka.Dial(addrs, bconf)
	if err != nil {
		t.Fatalf("cannot connect to cluster (%q): %s", addrs, err)
	}
	defer broker.Close()

	// produce big message to enforce TCP buffer flush
	m := proto.Message{
		Value: []byte(strings.Repeat("producer broken connection message ", 1000)),
	}
	pconf := kafka.NewProducerConf()
	producer := broker.Producer(pconf)

	// send message to all topics to make sure it's working
	for _, name := range topics {
		if _, err := producer.Produce(name, 0, &m); err != nil {
			t.Fatalf("cannot produce to %q: %s", name, err)
		}
	}

	// close two kafka clusters and publish to all 3 topics - 2 of them should
	// retry sending, because lack of leader makes the request fail
	//
	// request should not succeed until nodes are back - bring them back after
	// small delay and make sure producing was successful
	containers, err := cluster.Containers()
	if err != nil {
		t.Fatalf("cannot get containers: %s", err)
	}
	var stopped []*Container
	for _, container := range containers {
		if container.RunningKafka() {
			if err := container.Kill(); err != nil {
				t.Fatalf("cannot kill %q kafka container: %s", container.ID, err)
			}
			stopped = append(stopped, container)
		}
		if len(stopped) == 2 {
			break
		}
	}

	// bring stopped containers back
	errc := make(chan error)
	go func() {
		time.Sleep(500 * time.Millisecond)
		for _, container := range stopped {
			if err := container.Start(); err != nil {
				errc <- err
			}
		}
		close(errc)
	}()

	// send message to all topics to make sure it's working
	for _, name := range topics {
		if _, err := producer.Produce(name, 0, &m); err != nil {
			t.Errorf("cannot produce to %q: %s", name, err)
		}
	}

	for err := range errc {
		t.Errorf("cannot start container: %s", err)
	}

	// make sure data was persisted
	for _, name := range topics {
		consumer, err := broker.Consumer(kafka.NewConsumerConf(name, 0))
		if err != nil {
			t.Errorf("cannot create consumer for %q: %s", name, err)
			continue
		}
		for i := 0; i < 2; i++ {
			if _, err := consumer.Consume(); err != nil {
				t.Errorf("cannot consume %d message from %q: %s", i, name, err)
			}
		}
	}
}
Example #2
0
// Dial returns a new cluster object which can be used to instantiate a number of Marshalers
// that all use the same cluster. You may pass brokerConf or may set it to nil.
func Dial(name string, brokers []string, options MarshalOptions) (*KafkaCluster, error) {
	// Connect to Kafka
	brokerConf := kafka.NewBrokerConf("PortalMarshal")
	brokerConf.MetadataRefreshFrequency = time.Hour
	brokerConf.ConnectionLimit = options.BrokerConnectionLimit
	brokerConf.LeaderRetryLimit = 1 // Do not retry
	broker, err := kafka.Dial(brokers, brokerConf)
	if err != nil {
		return nil, err
	}

	c := &KafkaCluster{
		quit:          new(int32),
		rsteps:        new(int32),
		name:          name,
		options:       options,
		lock:          &sync.RWMutex{},
		rationalizers: &sync.WaitGroup{},
		broker:        broker,
		producer:      broker.Producer(kafka.NewProducerConf()),
		topics:        make(map[string]int),
		groups:        make(map[string]map[string]*topicState),
		pausedGroups:  make(map[string]time.Time),
		jitters:       make(chan time.Duration, 100),
	}

	// Do an initial metadata fetch, this will block a bit
	err = c.refreshMetadata()
	if err != nil {
		return nil, fmt.Errorf("Failed to get metadata: %s", err)
	}

	// If there is no marshal topic, then we can't run. The admins must go create the topic
	// before they can use this library. Please see the README.
	c.partitions = c.getTopicPartitions(MarshalTopic)
	if c.partitions == 0 {
		return nil, errors.New("Marshalling topic not found. Please see the documentation.")
	}

	// Now we start a goroutine to start consuming each of the partitions in the marshal
	// topic. Note that this doesn't handle increasing the partition count on that topic
	// without stopping all consumers.
	c.rationalizers.Add(c.partitions)
	for id := 0; id < c.partitions; id++ {
		go c.rationalize(id, c.kafkaConsumerChannel(id))
	}

	// A jitter calculator, just fills a channel with random numbers so that other
	// people don't have to build their own random generator. It is important that
	// these values be somewhat less than the HeartbeatInterval as we use this for
	// jittering our heartbeats.
	go func() {
		rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
		for {
			jitter := rnd.Intn(HeartbeatInterval/2) + (HeartbeatInterval / 4)
			c.jitters <- time.Duration(jitter) * time.Second
		}
	}()

	// Now start the metadata refreshing goroutine
	go func() {
		for !c.Terminated() {
			time.Sleep(<-c.jitters)
			log.Infof("[%s] Refreshing topic metadata.", c.name)
			c.refreshMetadata()

			// See if the number of partitions in the marshal topic changed. This is bad if
			// it happens, since it means we can no longer coordinate correctly.
			if c.getTopicPartitions(MarshalTopic) != c.partitions {
				log.Errorf("[%s] Marshal topic partition count changed. Terminating!", c.name)
				c.Terminate()
			}
		}
	}()

	// Wait for all rationalizers to come alive
	log.Infof("[%s] Waiting for all rationalizers to come alive.", c.name)
	c.rationalizers.Wait()
	log.Infof("[%s] All rationalizers alive, KafkaCluster now alive.", c.name)

	return c, nil
}