// Calculate the set of index blocks that need to be loaded. // blocksByVb stores which blocks need to be processed for each vbucket, in ascending vbucket order. Multiple vb map // values can point to the same IndexBlock (i.e. when those vbs share a partition). // blocksByKey stores all IndexBlocks to be retrieved, indexed by block key - no duplicates func (b *BitFlagStorage) calculateChangedBlocks(fromSeq base.SequenceClock, channelClock base.SequenceClock) (blocksByKey map[string]IndexBlock, blocksByVb []vbBlockSet, err error) { blocksByKey = make(map[string]IndexBlock, 1) blocksByVb = make([]vbBlockSet, 0) for vbNo, clockVbSeq := range channelClock.Value() { fromVbSeq := fromSeq.GetSequence(uint16(vbNo)) // Verify that the requested from value is less than the channel clock sequence (there are // new entries for this vbucket in the channel) if fromVbSeq >= clockVbSeq { continue } blockSet := vbBlockSet{vbNo: uint16(vbNo)} partition := b.partitions.VbMap[uint16(vbNo)] for _, blockIndex := range generateBitFlagBlockIndexes(b.channelName, fromVbSeq, clockVbSeq, partition) { blockKey := getIndexBlockKey(b.channelName, blockIndex, partition) block, found := blocksByKey[blockKey] if !found { block = newBitFlagBufferBlockForKey(blockKey, b.channelName, blockIndex, partition, b.partitions.VbPositionMaps[partition]) blocksByKey[blockKey] = block } blockSet.blocks = append(blockSet.blocks, block) } blocksByVb = append(blocksByVb, blockSet) } return blocksByKey, blocksByVb, nil }
func (b *BitFlagStorage) GetChanges(fromSeq base.SequenceClock, toSeq base.SequenceClock) ([]*LogEntry, error) { // Determine which blocks have changed, and load those blocks blocksByKey, blocksByVb, err := b.calculateChangedBlocks(fromSeq, toSeq) if err != nil { return nil, err } b.bulkLoadBlocks(blocksByKey) // For each vbucket, create the entries from the blocks. Create in reverse sequence order, for // deduplication once we've retrieved the full entry entries := make([]*LogEntry, 0) entryKeys := make([]string, 0) for blockSetIndex := len(blocksByVb) - 1; blockSetIndex >= 0; blockSetIndex-- { blockSet := blocksByVb[blockSetIndex] vbNo := blockSet.vbNo blocks := blockSet.blocks fromVbSeq := fromSeq.GetSequence(vbNo) + 1 toVbSeq := toSeq.GetSequence(vbNo) for blockIndex := len(blocks) - 1; blockIndex >= 0; blockIndex-- { blockEntries, keys := blocks[blockIndex].GetEntries(vbNo, fromVbSeq, toVbSeq, true) entries = append(entries, blockEntries...) entryKeys = append(entryKeys, keys...) } } // Bulk retrieval of individual entries. Performs deduplication, and reordering into ascending vb and sequence order results := b.bulkLoadEntries(entryKeys, entries) return results, nil }
// calculateHash does a simple sum of the vector clock values, modulo s.mod. This intentionally // doesn't generate a random distribution of clocks to hash values. Instead, since vbucket values within the // clock are monotonically increasing, it's going to generate increasing, non-colliding hash values // until s.mod value is reached. The goal is to tune s.mod to the update frequency (and so the expiry // of previously used hash values). so that the previously used hash value (n) is obsolete by the time // the clock wraps around to (mod + n). func (s *sequenceHasher) calculateHash(clock base.SequenceClock) uint64 { var sum uint64 sum = uint64(0) for _, value := range clock.Value() { sum += value & s.modMinus1 } return sum & s.modMinus1 }
func (k *kvChannelIndex) pollForChanges(stableClock base.SequenceClock, newChannelClock base.SequenceClock) (hasChanges bool, cancelPolling bool) { changeCacheExpvars.Add(fmt.Sprintf("pollCount-%s", k.channelName), 1) // Increment the overall poll count since a changes request (regardless of whether there have been polled changes) totalPollCount := atomic.AddUint32(&k.pollCount, 1) unreadPollCount := atomic.LoadUint32(&k.unreadPollCount) if unreadPollCount > kMaxUnreadPollCount { // We've sent a notify, but had (kMaxUnreadPollCount) polls without anyone calling getChanges. // Assume nobody is listening for updates - cancel polling for this channel return false, true } if unreadPollCount > 0 { // Give listeners more time to call getChanges, but increment atomic.AddUint32(&k.unreadPollCount, 1) } k.lastPolledLock.Lock() defer k.lastPolledLock.Unlock() // First poll handling if k.lastPolledChannelClock == nil { k.lastPolledChannelClock = k.clock.Copy() k.lastPolledSince = k.clock.Copy() k.lastPolledValidTo = k.clock.Copy() } if !newChannelClock.AnyAfter(k.lastPolledChannelClock) { // No changes to channel clock - update validTo based on the new stable sequence k.lastPolledValidTo.SetTo(stableClock) // If we've exceeded empty poll count, return hasChanges=true to trigger the "is // anyone listening" check if totalPollCount > kMaxEmptyPollCount { return true, false } else { return false, false } } // The clock has changed - load the changes and store in last polled if err := k.updateLastPolled(stableClock, newChannelClock); err != nil { base.Warn("Error updating last polled for channel %s: %v", k.channelName, err) return false, false } // We have changes - increment unread counter if we haven't already if unreadPollCount == 0 { atomic.AddUint32(&k.unreadPollCount, 1) } return true, false }
func (k *kvChannelIndex) getChannelClock() (base.SequenceClock, error) { var channelClock base.SequenceClock var err error // If we're polling, return a copy k.lastPolledLock.RLock() defer k.lastPolledLock.RUnlock() if k.lastPolledChannelClock != nil { channelClock = base.NewSequenceClockImpl() channelClock.SetTo(k.lastPolledChannelClock) } else { channelClock, err = k.loadChannelClock() if err != nil { return nil, err } } return channelClock, nil }
// calculateChangedPartitions identifies which vbuckets have changed after sinceClock until toClock, groups these // by partition, and returns as a array of PartitionRanges, indexed by partition number. func (ds *DenseStorageReader) calculateChanged(sinceClock, toClock base.SequenceClock) (changedVbs []uint16, changedPartitions []*PartitionRange) { changedVbs = make([]uint16, 0) changedPartitions = make([]*PartitionRange, ds.partitions.PartitionCount()) for vbNoInt, toSeq := range toClock.Value() { vbNo := uint16(vbNoInt) sinceSeq := sinceClock.GetSequence(vbNo) if sinceSeq < toSeq { changedVbs = append(changedVbs, vbNo) partitionNo := ds.partitions.PartitionForVb(vbNo) if changedPartitions[partitionNo] == nil { partitionRange := NewPartitionRange() changedPartitions[partitionNo] = &partitionRange } changedPartitions[partitionNo].SetRange(vbNo, sinceSeq, toSeq) } } return changedVbs, changedPartitions }
func (k *kvChannelIndex) checkLastPolled(since base.SequenceClock) (results []*LogEntry) { k.lastPolledLock.RLock() defer k.lastPolledLock.RUnlock() if k.lastPolledValidTo == nil || k.lastPolledSince == nil { return results } matchesLastPolledSince := true lastPolledValue := k.lastPolledSince.Value() validToValue := k.lastPolledValidTo.Value() sinceValue := since.Value() for vb, sequence := range sinceValue { lastPolledVbValue := lastPolledValue[vb] if sequence != lastPolledVbValue { matchesLastPolledSince = false if sequence < lastPolledVbValue || sequence > validToValue[vb] { // poll results aren't sufficient for this request - return empty set return results } } } // The last polled results are sufficient to serve this request. If there's a match on the since values, // return the entire last polled changes. If not, filter the last polled changes and return all entries greater // than the since value if matchesLastPolledSince { // TODO: come up with a solution that doesn't make as much GC work on every checkLastPolled hit, // but doesn't break when k.lastPolledChanges gets updated mid-request. results := make([]*LogEntry, len(k.lastPolledChanges)) copy(results, k.lastPolledChanges) return results } else { for _, entry := range k.lastPolledChanges { if entry.Sequence > sinceValue[entry.VbNo] { results = append(results, entry) } } } return results }
// Returns the set of index entries for the channel more recent than the // specified since SequenceClock. Index entries with sequence values greater than // the index stable sequence are not returned. func (k *kvChannelIndex) getChanges(since base.SequenceClock) ([]*LogEntry, error) { var results []*LogEntry // Someone is still interested in this channel - reset poll counts atomic.StoreUint32(&k.pollCount, 0) atomic.StoreUint32(&k.unreadPollCount, 0) chanClock, err := k.getChannelClock() if err != nil { // Note: gocb returns "Key not found.", go-couchbase returns "MCResponse status=KEY_ENOENT, opcode=GET, opaque=0, msg: Not found" // Using string matching to identify key not found for now - really need a better API in go-couchbase/gocb for gets that allows us to distinguish // between errors and key not found with something more robust than string matching. if IsNotFoundError(err) { // initialize chanClock as empty clock chanClock = base.NewSequenceClockImpl() } else { return results, err } } // If requested clock is later than the channel clock, return empty if since.AllAfter(chanClock) { base.LogTo("DIndex+", "requested clock is later than channel clock - no new changes to report") return results, nil } // If the since value is more recent than the last polled clock, return the results from the // last polling. Has the potential to return values earlier than since and later than // lastPolledClock, but these duplicates will be ignored by replication. Could validate // greater than since inside this if clause, but leaving out as a performance optimization for // now if lastPolledResults := k.checkLastPolled(since); len(lastPolledResults) > 0 { indexExpvars.Add("getChanges_lastPolled_hit", 1) return lastPolledResults, nil } indexExpvars.Add("getChanges_lastPolled_miss", 1) return k.channelStorage.GetChanges(since, chanClock) }
func (s *sequenceHasher) GetHash(clock base.SequenceClock) (string, error) { if clock == nil { return "", errors.New("Can't calculate hash for nil clock") } hashValue := s.calculateHash(clock) // Load stored clocks for this hash, to see if it's already been defined. // Note: getCacheValue and load are handled as separate operations to optimize locking. // 1. getCacheValue locks the cache, and retrieves the current cache entry (or creates a new empty entry if not found) // 2. cachedValue.load locks the entry, and loads from the DB if no previous entry is found cachedValue := s.getCacheValue(hashValue) cachedClocks, err := cachedValue.load(s.loadClocks) if err != nil { return "", err } // Check whether the cached clocks for the hash value match our clock exists, index := cachedClocks.Contains(clock.Value()) if exists { seqHash := sequenceHash{ hashValue: hashValue, collisionIndex: uint16(index), } indexExpvars.Add("seqHash_getHash_hits", 1) return seqHash.String(), nil } // Didn't find a match in cache - update the index and the cache. Get a write lock on the index value // first, to ensure only one goroutine on this SG attempts to write. writeCas handling below handles // the case where other SGs are updating the value concurrently indexExpvars.Add("seqHash_getHash_misses", 1) // First copy the clock value, to ensure we store a non-mutable version in the cache clockValue := make([]uint64, len(clock.Value())) copy(clockValue, clock.Value()) updateErr := func() error { cachedValue.lock.Lock() defer cachedValue.lock.Unlock() // If the number of cached clocks has changed, check whether someone else has added this clock // while we waited for the lock if len(cachedValue.clocks.Sequences) > len(cachedClocks.Sequences) { exists, index = cachedValue.clocks.Contains(clockValue) if exists { return nil } } // Add our clock to the cached clocks for this hash existingClocks := cachedValue.clocks existingClocks.Sequences = append(existingClocks.Sequences, clockValue) // Update the hash entry in the bucket key := kHashPrefix + strconv.FormatUint(hashValue, 10) initialValue, err := existingClocks.Marshal() index = len(existingClocks.Sequences) - 1 if err != nil { return err } _, err = base.WriteCasRaw(s.bucket, key, initialValue, existingClocks.cas, int(s.hashExpiry), func(value []byte) (updatedValue []byte, err error) { // Note: The following is invoked upon cas failure - may be called multiple times base.LogTo("DIndex+", "CAS fail - reapplying changes for hash storage for key: %s", key) var sClocks storedClocks err = sClocks.Unmarshal(value) if err != nil { base.Warn("Error unmarshalling hash storage during update", err) return nil, err } exists, index = sClocks.Contains(clockValue) if exists { // return empty byte array to cancel the update return []byte{}, nil } // Not found - add sClocks.Sequences = append(sClocks.Sequences, clockValue) base.LogTo("DIndex+", "Reattempting stored hash write for key %s:", key) index = len(sClocks.Sequences) - 1 return sClocks.Marshal() }) return nil }() if updateErr != nil { return "", updateErr } indexExpvars.Add("writeCasRaw_hash", 1) if err != nil && err.Error() != "Already Exists" { return "", err } seqHash := &sequenceHash{ hashValue: hashValue, collisionIndex: uint16(index), } return seqHash.String(), nil }
func (clock PartitionClock) AddToClock(seqClock base.SequenceClock) { for vbNo, seq := range clock { seqClock.SetSequence(vbNo, seq) } }