Пример #1
1
func (zom *zookeeperOffsetManager) commitOffset(topic string, partition int32, tracker *partitionOffsetTracker, logger zap.Logger) error {
	err := tracker.commit(func(offset int64) error {
		if offset >= 0 {
			return zom.cg.group.CommitOffset(topic, partition, offset+1)
		} else {
			return nil
		}
	})

	if err != nil {
		logger.Warn("ZOOKEEPER: FAILED to commit offset",
			zap.Int64("highestProcessedOffset", tracker.highestProcessedOffset),
			zap.String("topic", topic),
			zap.Int64("partition", int64(partition)),
		)
	} else if zom.config.VerboseLogging {
		logger.Debug("ZOOKEEPER: Committed offset",
			zap.Int64("lastCommittedOffset", tracker.lastCommittedOffset),
			zap.String("topic", topic),
			zap.Int64("partition", int64(partition)),
		)
	}

	return err
}
Пример #2
0
func (zom *zookeeperOffsetManager) FinalizePartition(topic string, partition int32, lastOffset int64, timeout time.Duration, replicaId int, logger zap.Logger) error {
	zom.l.RLock()
	tracker := zom.offsets[topic][partition]
	zom.l.RUnlock()

	if lastOffset >= 0 {
		if lastOffset-tracker.highestProcessedOffset > 0 {
			logger.Info("ZOOKEEPER: Finalizing partition. Waiting before processing remaining messages",
				zap.Int("replicaId", replicaId),
				zap.String("topic", topic),
				zap.Int64("partition", int64(partition)),
				zap.Int64("lastProcessedOffset", tracker.highestProcessedOffset),
				zap.Duration("waitingTimeToProcessMoreMessages", timeout/time.Second),
				zap.Int64("numMessagesToProcess", lastOffset-tracker.highestProcessedOffset),
			)
			if !tracker.waitForOffset(lastOffset, timeout) {
				return fmt.Errorf("REP %d - TIMEOUT waiting for offset %d. Last committed offset: %d", replicaId, lastOffset, tracker.lastCommittedOffset)
			}
		}

		if err := zom.commitOffset(topic, partition, tracker, logger); err != nil {
			return fmt.Errorf("REP %d - FAILED to commit offset %d to Zookeeper. Last committed offset: %d", replicaId, tracker.highestProcessedOffset, tracker.lastCommittedOffset)
		}
	}

	zom.l.Lock()
	delete(zom.offsets[topic], partition)
	zom.l.Unlock()

	return nil
}
Пример #3
0
func (cg *ConsumerGroup) GetBatchOfMessages(batchSize int, logger zap.Logger) []string {
	offsets := make(map[string]map[int32]int64)
	groupOfMessages := []string{}
	if batchSize == 0 {
		return groupOfMessages
	}
	counter := 0
	for {
		if counter == batchSize {
			break
		}
		cg.reloadMutex.Lock()
		select {
		case message := <-cg.messages:
			if offsets[message.Topic] == nil {
				offsets[message.Topic] = make(map[int32]int64)
			}
			if offsets[message.Topic][message.Partition] != 0 && offsets[message.Topic][message.Partition] != message.Offset-1 {
				logger.Error("KAFKA: Unexpected offset for message topic and partition",
					zap.String("messageTopic", message.Topic),
					zap.Int64("messagePartition", int64(message.Partition)),
					zap.Int64("offsetExpected", offsets[message.Topic][message.Partition]+1),
					zap.Int64("offsetFound", message.Offset),
					zap.Int64("offsetDifference", message.Offset-offsets[message.Topic][message.Partition]+1),
				)
				continue
			}
			groupOfMessages = append(groupOfMessages, string(message.Value))
			offsets[message.Topic][message.Partition] = message.Offset
			cg.CommitUpto(message)
			counter += 1
			cg.reloadMutex.Unlock()
		default:
			cg.reloadMutex.Unlock()
			continue
		}
	}
	return groupOfMessages
}
Пример #4
0
func (r *request) get(u *url.URL) (int64, error) {
	urlStr := u.String()
	r.urls = make(map[string]sql.NullInt64)
	hostID, robotOk := r.hostMng.CheckURL(u)
	r.meta = proxy.NewMeta(hostID, urlStr, nil)

	if !hostID.Valid {
		r.meta.SetState(database.StateExternal)
	}
	if !robotOk {
		r.meta.SetState(database.StateDisabledByRobotsTxt)
		log.Printf("INFO: URL %s blocked by robot.txt", urlStr)
		return 0, nil
	}

	startTime := time.Now()
	request := &http.Request{
		Method:     "GET",
		URL:        u,
		Proto:      "HTTP/1.1",
		ProtoMajor: 1,
		ProtoMinor: 1,
		Header: map[string][]string{
			"User-Agent":      {"Mozilla/5.0 (compatible; GoWebSearch/0.1)"},
			"Accept":          {"text/html;q=0.9,*/*;q=0.1"},
			"Accept-Encoding": {"gzip;q=0.9,identity;q=0.5,*;q=0.1"},
			"Accept-Language": {"ru-RU,ru;q=0.9,en-US;q=0.2,en;q=0.1"},
			"Accept-Charset":  {"utf-8;q=0.9,windows-1251;q=0.8,koi8-r;q=0.7,*;q=0.1"},
		},
		Body: nil,
		Host: u.Host,
	}

	response, err := r.client.Do(request)
	if err != nil {
		r.meta.SetState(database.StateConnectError)
		return 0, err
	}

	if r.meta.GetState() != database.StateSuccess {
		// here or early - logging!!!
		return 0, nil
	}

	// hostID, robotOk := r.hostMng.CheckURL(u)
	// 		if !robotOk {
	// 		r.meta.SetState(database.StateDisabledByRobotsTxt)
	// 		return fmt.Errorf("INFO: URL %s blocked by robot.txt", NormalizeURL(&copyURL))
	// 	}

	loggerURL := r.logger.With(zap.String("url", r.meta.GetURL()))

	RequestDurationMs := int64(time.Since(startTime) / time.Millisecond)
	loggerURL.Debug(DbgRequestDuration, zap.Int64("duration", RequestDurationMs))

	parser := newResponseParser(loggerURL, r.hostMng, r.meta)
	err = parser.Run(response)
	if err == nil {
		r.urls = parser.URLs
	} else {
		werrors.LogError(loggerURL, err)
		err = nil
	}

	return parser.BodyDurationMs, err
}
Пример #5
0
// Consumes a partition
func (cg *ConsumerGroup) partitionConsumer(topic string, partition int32, messages chan<- *sarama.ConsumerMessage, errors chan<- *sarama.ConsumerError, wg *sync.WaitGroup, stopper <-chan struct{}, logger zap.Logger) {
	defer wg.Done()

	select {
	case <-stopper:
		return
	default:
	}

	for maxRetries, tries := 3, 0; tries < maxRetries; tries++ {
		if err := cg.instance.ClaimPartition(topic, partition); err == nil {
			break
		} else if err == kazoo.ErrPartitionClaimedByOther && tries+1 < maxRetries {
			time.Sleep(1 * time.Second)
		} else {
			logger.Warn("KAFKA: Replica FAILED to claim partition",
				zap.Int("replicaId", cg.replicaId),
				zap.String("topic", topic),
				zap.Int64("partition", int64(partition)),
				zap.Error(err),
			)
			return
		}
	}
	defer cg.instance.ReleasePartition(topic, partition)

	nextOffset, err := cg.offsetManager.InitializePartition(topic, partition)

	if err != nil {
		logger.Error("KAFKA: Replica FAILED to determine initial offset",
			zap.Int("replicaId", cg.replicaId),
			zap.String("topic", topic),
			zap.Int64("partition", int64(partition)),
			zap.Error(err),
		)
		return
	}

	if nextOffset >= 0 {
		logger.Info("KAFKA: Replica partition consumer starting at offset",
			zap.Int("replicaId", cg.replicaId),
			zap.String("topic", topic),
			zap.Int64("partition", int64(partition)),
			zap.Int64("nextOffset", nextOffset),
		)
	} else {
		nextOffset = cg.config.Offsets.Initial
		if nextOffset == sarama.OffsetOldest {
			logger.Info("KAFKA: Replica partition consumer starting at the oldest available offset",
				zap.Int("replicaId", cg.replicaId),
				zap.String("topic", topic),
				zap.Int64("partition", int64(partition)),
			)
		} else if nextOffset == sarama.OffsetNewest {
			logger.Info("KAFKA: Replica partition consumer listening for new messages only",
				zap.Int("replicaId", cg.replicaId),
				zap.String("topic", topic),
				zap.Int64("partition", int64(partition)),
			)
		}
	}

	consumer, err := cg.consumer.ConsumePartition(topic, partition, nextOffset)
	if err == sarama.ErrOffsetOutOfRange {
		logger.Warn("KAFKA: Replica partition consumer offset out of Range",
			zap.Int("replicaId", cg.replicaId),
			zap.String("topic", topic),
			zap.Int64("partition", int64(partition)),
		)
		// if the offset is out of range, simplistically decide whether to use OffsetNewest or OffsetOldest
		// if the configuration specified offsetOldest, then switch to the oldest available offset, else
		// switch to the newest available offset.
		if cg.config.Offsets.Initial == sarama.OffsetOldest {
			nextOffset = sarama.OffsetOldest
			logger.Info("KAFKA: Replica partition consumer offset reset to oldest available offset",
				zap.Int("replicaId", cg.replicaId),
				zap.String("topic", topic),
				zap.Int64("partition", int64(partition)),
			)
		} else {
			nextOffset = sarama.OffsetNewest
			logger.Info("KAFKA: Replica partition consumer offset reset to newest available offset",
				zap.Int("replicaId", cg.replicaId),
				zap.String("topic", topic),
				zap.Int64("partition", int64(partition)),
			)
		}
		// retry the consumePartition with the adjusted offset
		consumer, err = cg.consumer.ConsumePartition(topic, partition, nextOffset)
	}
	if err != nil {
		logger.Fatal("KAFKA: Replica FAILED to start partition consumer",
			zap.Int("replicaId", cg.replicaId),
			zap.String("topic", topic),
			zap.Int64("partition", int64(partition)),
			zap.Error(err),
		)
		return
	}
	defer consumer.Close()

	err = nil
	var lastOffset int64 = -1 // aka unknown
partitionConsumerLoop:
	for {
		select {
		case <-stopper:
			break partitionConsumerLoop

		case err := <-consumer.Errors():
			for {
				select {
				case errors <- err:
					continue partitionConsumerLoop

				case <-stopper:
					break partitionConsumerLoop
				}
			}

		case message := <-consumer.Messages():
			for {
				select {
				case <-stopper:
					break partitionConsumerLoop

				case messages <- message:
					lastOffset = message.Offset
					continue partitionConsumerLoop
				}
			}
		}
	}

	logger.Info("KAFKA: Replica is stopping partition consumer at offset",
		zap.Int("replicaId", cg.replicaId),
		zap.String("topic", topic),
		zap.Int64("partition", int64(partition)),
		zap.Int64("lastOffset", lastOffset),
	)
	if err = cg.offsetManager.FinalizePartition(topic, partition, lastOffset, cg.config.Offsets.ProcessingTimeout, cg.replicaId, logger); err != nil {
		logger.Fatal("KAFKA: Replica error trying to stop partition consumer",
			zap.Int("replicaId", cg.replicaId),
			zap.String("topic", topic),
			zap.Int64("partition", int64(partition)),
			zap.Error(err),
		)
	}
	logger.Info("KAFKA: Replica successfully stoped partition",
		zap.Int("replicaId", cg.replicaId),
		zap.String("topic", topic),
		zap.Int64("partition", int64(partition)),
	)
}
Пример #6
0
func (s *Sentinel) findBestStandby(cd *cluster.ClusterData, masterDB *cluster.DB) (*cluster.DB, error) {
	var bestDB *cluster.DB
	for _, db := range cd.DBs {
		if db.UID == masterDB.UID {
			log.Debug("ignoring db since it's the current master", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID))
			continue
		}
		if db.Status.SystemID != masterDB.Status.SystemID {
			log.Debug("ignoring db since the postgres systemdID is different that the master one", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID), zap.String("dbSystemdID", db.Status.SystemID), zap.String("masterSystemID", masterDB.Status.SystemID))
			continue

		}
		if !db.Status.Healthy {
			log.Debug("ignoring db since it's not healthy", zap.String("db", db.UID), zap.String("keeper", db.Spec.KeeperUID))
			continue
		}
		if db.Status.CurrentGeneration != db.Generation {
			log.Debug("ignoring keeper since its generation is different that the current one", zap.String("db", db.UID), zap.Int64("currentGeneration", db.Status.CurrentGeneration), zap.Int64("generation", db.Generation))
			continue
		}
		if db.Status.TimelineID != masterDB.Status.TimelineID {
			log.Debug("ignoring keeper since its pg timeline is different than master timeline", zap.String("db", db.UID), zap.Uint64("dbTimeline", db.Status.TimelineID), zap.Uint64("masterTimeline", masterDB.Status.TimelineID))
			continue
		}
		if bestDB == nil {
			bestDB = db
			continue
		}
		if db.Status.XLogPos > bestDB.Status.XLogPos {
			bestDB = db
		}
	}
	if bestDB == nil {
		return nil, fmt.Errorf("no standbys available")
	}
	return bestDB, nil
}
Пример #7
0
func (r *responseParser) timeTrack(start time.Time) {
	r.BodyDurationMs = int64(time.Since(start) / time.Millisecond)
	r.logger.Debug(DbgBodyProcessingDuration, zap.Int64("duration", r.BodyDurationMs))
}