// Calculate the set of index blocks that need to be loaded.
//   blocksByVb stores which blocks need to be processed for each vbucket, in ascending vbucket order.  Multiple vb map
//   values can point to the same IndexBlock (i.e. when those vbs share a partition).
//   blocksByKey stores all IndexBlocks to be retrieved, indexed by block key - no duplicates
func (b *BitFlagStorage) calculateChangedBlocks(fromSeq base.SequenceClock, channelClock base.SequenceClock) (blocksByKey map[string]IndexBlock, blocksByVb []vbBlockSet, err error) {

	blocksByKey = make(map[string]IndexBlock, 1)
	blocksByVb = make([]vbBlockSet, 0)

	for vbNo, clockVbSeq := range channelClock.Value() {
		fromVbSeq := fromSeq.GetSequence(uint16(vbNo))
		// Verify that the requested from value is less than the channel clock sequence (there are
		// new entries for this vbucket in the channel)
		if fromVbSeq >= clockVbSeq {
			continue
		}

		blockSet := vbBlockSet{vbNo: uint16(vbNo)}
		partition := b.partitions.VbMap[uint16(vbNo)]
		for _, blockIndex := range generateBitFlagBlockIndexes(b.channelName, fromVbSeq, clockVbSeq, partition) {
			blockKey := getIndexBlockKey(b.channelName, blockIndex, partition)
			block, found := blocksByKey[blockKey]
			if !found {
				block = newBitFlagBufferBlockForKey(blockKey, b.channelName, blockIndex, partition, b.partitions.VbPositionMaps[partition])
				blocksByKey[blockKey] = block
			}
			blockSet.blocks = append(blockSet.blocks, block)
		}
		blocksByVb = append(blocksByVb, blockSet)

	}
	return blocksByKey, blocksByVb, nil
}
func (b *BitFlagStorage) GetChanges(fromSeq base.SequenceClock, toSeq base.SequenceClock) ([]*LogEntry, error) {

	// Determine which blocks have changed, and load those blocks
	blocksByKey, blocksByVb, err := b.calculateChangedBlocks(fromSeq, toSeq)
	if err != nil {
		return nil, err
	}
	b.bulkLoadBlocks(blocksByKey)

	// For each vbucket, create the entries from the blocks.  Create in reverse sequence order, for
	// deduplication once we've retrieved the full entry
	entries := make([]*LogEntry, 0)
	entryKeys := make([]string, 0)
	for blockSetIndex := len(blocksByVb) - 1; blockSetIndex >= 0; blockSetIndex-- {
		blockSet := blocksByVb[blockSetIndex]
		vbNo := blockSet.vbNo
		blocks := blockSet.blocks
		fromVbSeq := fromSeq.GetSequence(vbNo) + 1
		toVbSeq := toSeq.GetSequence(vbNo)

		for blockIndex := len(blocks) - 1; blockIndex >= 0; blockIndex-- {
			blockEntries, keys := blocks[blockIndex].GetEntries(vbNo, fromVbSeq, toVbSeq, true)
			entries = append(entries, blockEntries...)
			entryKeys = append(entryKeys, keys...)
		}
	}

	// Bulk retrieval of individual entries.  Performs deduplication, and reordering into ascending vb and sequence order
	results := b.bulkLoadEntries(entryKeys, entries)

	return results, nil

}
// calculateHash does a simple sum of the vector clock values, modulo s.mod.  This intentionally
// doesn't generate a random distribution of clocks to hash values.  Instead, since vbucket values within the
// clock are monotonically increasing, it's going to generate increasing, non-colliding hash values
// until s.mod value is reached.  The goal is to tune s.mod to the update frequency (and so the expiry
// of previously used hash values). so that the previously used hash value (n) is obsolete by the time
// the clock wraps around to (mod + n).
func (s *sequenceHasher) calculateHash(clock base.SequenceClock) uint64 {

	var sum uint64
	sum = uint64(0)
	for _, value := range clock.Value() {
		sum += value & s.modMinus1
	}
	return sum & s.modMinus1
}
func (k *kvChannelIndex) pollForChanges(stableClock base.SequenceClock, newChannelClock base.SequenceClock) (hasChanges bool, cancelPolling bool) {

	changeCacheExpvars.Add(fmt.Sprintf("pollCount-%s", k.channelName), 1)
	// Increment the overall poll count since a changes request (regardless of whether there have been polled changes)
	totalPollCount := atomic.AddUint32(&k.pollCount, 1)
	unreadPollCount := atomic.LoadUint32(&k.unreadPollCount)
	if unreadPollCount > kMaxUnreadPollCount {
		// We've sent a notify, but had (kMaxUnreadPollCount) polls without anyone calling getChanges.
		// Assume nobody is listening for updates - cancel polling for this channel
		return false, true
	}

	if unreadPollCount > 0 {
		// Give listeners more time to call getChanges, but increment
		atomic.AddUint32(&k.unreadPollCount, 1)
	}

	k.lastPolledLock.Lock()
	defer k.lastPolledLock.Unlock()

	// First poll handling
	if k.lastPolledChannelClock == nil {
		k.lastPolledChannelClock = k.clock.Copy()
		k.lastPolledSince = k.clock.Copy()
		k.lastPolledValidTo = k.clock.Copy()
	}

	if !newChannelClock.AnyAfter(k.lastPolledChannelClock) {
		// No changes to channel clock - update validTo based on the new stable sequence
		k.lastPolledValidTo.SetTo(stableClock)
		// If we've exceeded empty poll count, return hasChanges=true to trigger the "is
		// anyone listening" check
		if totalPollCount > kMaxEmptyPollCount {
			return true, false
		} else {
			return false, false
		}
	}

	// The clock has changed - load the changes and store in last polled
	if err := k.updateLastPolled(stableClock, newChannelClock); err != nil {
		base.Warn("Error updating last polled for channel %s: %v", k.channelName, err)
		return false, false
	}

	// We have changes - increment unread counter if we haven't already
	if unreadPollCount == 0 {
		atomic.AddUint32(&k.unreadPollCount, 1)
	}
	return true, false
}
func (k *kvChannelIndex) getChannelClock() (base.SequenceClock, error) {

	var channelClock base.SequenceClock
	var err error
	// If we're polling, return a copy
	k.lastPolledLock.RLock()
	defer k.lastPolledLock.RUnlock()
	if k.lastPolledChannelClock != nil {
		channelClock = base.NewSequenceClockImpl()
		channelClock.SetTo(k.lastPolledChannelClock)
	} else {
		channelClock, err = k.loadChannelClock()
		if err != nil {
			return nil, err
		}
	}
	return channelClock, nil

}
// calculateChangedPartitions identifies which vbuckets have changed after sinceClock until toClock, groups these
// by partition, and returns as a array of PartitionRanges, indexed by partition number.
func (ds *DenseStorageReader) calculateChanged(sinceClock, toClock base.SequenceClock) (changedVbs []uint16, changedPartitions []*PartitionRange) {

	changedVbs = make([]uint16, 0)
	changedPartitions = make([]*PartitionRange, ds.partitions.PartitionCount())
	for vbNoInt, toSeq := range toClock.Value() {
		vbNo := uint16(vbNoInt)
		sinceSeq := sinceClock.GetSequence(vbNo)
		if sinceSeq < toSeq {
			changedVbs = append(changedVbs, vbNo)
			partitionNo := ds.partitions.PartitionForVb(vbNo)
			if changedPartitions[partitionNo] == nil {
				partitionRange := NewPartitionRange()
				changedPartitions[partitionNo] = &partitionRange
			}
			changedPartitions[partitionNo].SetRange(vbNo, sinceSeq, toSeq)
		}
	}
	return changedVbs, changedPartitions
}
func (k *kvChannelIndex) checkLastPolled(since base.SequenceClock) (results []*LogEntry) {

	k.lastPolledLock.RLock()
	defer k.lastPolledLock.RUnlock()
	if k.lastPolledValidTo == nil || k.lastPolledSince == nil {
		return results
	}

	matchesLastPolledSince := true
	lastPolledValue := k.lastPolledSince.Value()
	validToValue := k.lastPolledValidTo.Value()
	sinceValue := since.Value()
	for vb, sequence := range sinceValue {
		lastPolledVbValue := lastPolledValue[vb]
		if sequence != lastPolledVbValue {
			matchesLastPolledSince = false
			if sequence < lastPolledVbValue || sequence > validToValue[vb] {
				// poll results aren't sufficient for this request - return empty set
				return results
			}
		}
	}

	// The last polled results are sufficient to serve this request.  If there's a match on the since values,
	// return the entire last polled changes.  If not, filter the last polled changes and return all entries greater
	// than the since value
	if matchesLastPolledSince {
		// TODO: come up with a solution that doesn't make as much GC work on every checkLastPolled hit,
		// but doesn't break when k.lastPolledChanges gets updated mid-request.
		results := make([]*LogEntry, len(k.lastPolledChanges))
		copy(results, k.lastPolledChanges)
		return results
	} else {
		for _, entry := range k.lastPolledChanges {
			if entry.Sequence > sinceValue[entry.VbNo] {
				results = append(results, entry)
			}
		}
	}
	return results
}
// Returns the set of index entries for the channel more recent than the
// specified since SequenceClock.  Index entries with sequence values greater than
// the index stable sequence are not returned.
func (k *kvChannelIndex) getChanges(since base.SequenceClock) ([]*LogEntry, error) {

	var results []*LogEntry

	// Someone is still interested in this channel - reset poll counts
	atomic.StoreUint32(&k.pollCount, 0)
	atomic.StoreUint32(&k.unreadPollCount, 0)

	chanClock, err := k.getChannelClock()
	if err != nil {
		// Note: gocb returns "Key not found.", go-couchbase returns "MCResponse status=KEY_ENOENT, opcode=GET, opaque=0, msg: Not found"
		// Using string matching to identify key not found for now - really need a better API in go-couchbase/gocb for gets that allows us to distinguish
		// between errors and key not found with something more robust than string matching.
		if IsNotFoundError(err) {
			// initialize chanClock as empty clock
			chanClock = base.NewSequenceClockImpl()
		} else {
			return results, err
		}
	}

	// If requested clock is later than the channel clock, return empty
	if since.AllAfter(chanClock) {
		base.LogTo("DIndex+", "requested clock is later than channel clock - no new changes to report")
		return results, nil
	}

	// If the since value is more recent than the last polled clock, return the results from the
	// last polling.  Has the potential to return values earlier than since and later than
	// lastPolledClock, but these duplicates will be ignored by replication.  Could validate
	// greater than since inside this if clause, but leaving out as a performance optimization for
	// now
	if lastPolledResults := k.checkLastPolled(since); len(lastPolledResults) > 0 {
		indexExpvars.Add("getChanges_lastPolled_hit", 1)
		return lastPolledResults, nil
	}
	indexExpvars.Add("getChanges_lastPolled_miss", 1)

	return k.channelStorage.GetChanges(since, chanClock)
}
func (s *sequenceHasher) GetHash(clock base.SequenceClock) (string, error) {

	if clock == nil {
		return "", errors.New("Can't calculate hash for nil clock")
	}

	hashValue := s.calculateHash(clock)

	// Load stored clocks for this hash, to see if it's already been defined.
	// Note: getCacheValue and load are handled as separate operations to optimize locking.
	//   1. getCacheValue locks the cache, and retrieves the current cache entry (or creates a new empty entry if not found)
	//   2. cachedValue.load locks the entry, and loads from the DB if no previous entry is found
	cachedValue := s.getCacheValue(hashValue)
	cachedClocks, err := cachedValue.load(s.loadClocks)
	if err != nil {
		return "", err
	}

	// Check whether the cached clocks for the hash value match our clock
	exists, index := cachedClocks.Contains(clock.Value())
	if exists {
		seqHash := sequenceHash{
			hashValue:      hashValue,
			collisionIndex: uint16(index),
		}
		indexExpvars.Add("seqHash_getHash_hits", 1)
		return seqHash.String(), nil
	}

	// Didn't find a match in cache - update the index and the cache.  Get a write lock on the index value
	// first, to ensure only one goroutine on this SG attempts to write.  writeCas handling below handles
	// the case where other SGs are updating the value concurrently
	indexExpvars.Add("seqHash_getHash_misses", 1)

	// First copy the clock value, to ensure we store a non-mutable version in the cache
	clockValue := make([]uint64, len(clock.Value()))
	copy(clockValue, clock.Value())

	updateErr := func() error {
		cachedValue.lock.Lock()
		defer cachedValue.lock.Unlock()

		// If the number of cached clocks has changed, check whether someone else has added this clock
		// while we waited for the lock
		if len(cachedValue.clocks.Sequences) > len(cachedClocks.Sequences) {
			exists, index = cachedValue.clocks.Contains(clockValue)
			if exists {
				return nil
			}
		}

		// Add our clock to the cached clocks for this hash
		existingClocks := cachedValue.clocks
		existingClocks.Sequences = append(existingClocks.Sequences, clockValue)

		// Update the hash entry in the bucket
		key := kHashPrefix + strconv.FormatUint(hashValue, 10)
		initialValue, err := existingClocks.Marshal()
		index = len(existingClocks.Sequences) - 1
		if err != nil {
			return err
		}
		_, err = base.WriteCasRaw(s.bucket, key, initialValue, existingClocks.cas, int(s.hashExpiry), func(value []byte) (updatedValue []byte, err error) {
			// Note: The following is invoked upon cas failure - may be called multiple times
			base.LogTo("DIndex+", "CAS fail - reapplying changes for hash storage for key: %s", key)
			var sClocks storedClocks
			err = sClocks.Unmarshal(value)
			if err != nil {
				base.Warn("Error unmarshalling hash storage during update", err)
				return nil, err
			}
			exists, index = sClocks.Contains(clockValue)
			if exists {
				// return empty byte array to cancel the update
				return []byte{}, nil
			}
			// Not found - add
			sClocks.Sequences = append(sClocks.Sequences, clockValue)
			base.LogTo("DIndex+", "Reattempting stored hash write for key %s:", key)
			index = len(sClocks.Sequences) - 1
			return sClocks.Marshal()
		})
		return nil
	}()

	if updateErr != nil {
		return "", updateErr
	}

	indexExpvars.Add("writeCasRaw_hash", 1)

	if err != nil && err.Error() != "Already Exists" {
		return "", err
	}

	seqHash := &sequenceHash{
		hashValue:      hashValue,
		collisionIndex: uint16(index),
	}
	return seqHash.String(), nil
}
func (clock PartitionClock) AddToClock(seqClock base.SequenceClock) {
	for vbNo, seq := range clock {
		seqClock.SetSequence(vbNo, seq)
	}
}