// Calculate the set of index blocks that need to be loaded. // blocksByVb stores which blocks need to be processed for each vbucket, in ascending vbucket order. Multiple vb map // values can point to the same IndexBlock (i.e. when those vbs share a partition). // blocksByKey stores all IndexBlocks to be retrieved, indexed by block key - no duplicates func (b *BitFlagStorage) calculateChangedBlocks(fromSeq base.SequenceClock, channelClock base.SequenceClock) (blocksByKey map[string]IndexBlock, blocksByVb []vbBlockSet, err error) { blocksByKey = make(map[string]IndexBlock, 1) blocksByVb = make([]vbBlockSet, 0) for vbNo, clockVbSeq := range channelClock.Value() { fromVbSeq := fromSeq.GetSequence(uint16(vbNo)) // Verify that the requested from value is less than the channel clock sequence (there are // new entries for this vbucket in the channel) if fromVbSeq >= clockVbSeq { continue } blockSet := vbBlockSet{vbNo: uint16(vbNo)} partition := b.partitions.VbMap[uint16(vbNo)] for _, blockIndex := range generateBitFlagBlockIndexes(b.channelName, fromVbSeq, clockVbSeq, partition) { blockKey := getIndexBlockKey(b.channelName, blockIndex, partition) block, found := blocksByKey[blockKey] if !found { block = newBitFlagBufferBlockForKey(blockKey, b.channelName, blockIndex, partition, b.partitions.VbPositionMaps[partition]) blocksByKey[blockKey] = block } blockSet.blocks = append(blockSet.blocks, block) } blocksByVb = append(blocksByVb, blockSet) } return blocksByKey, blocksByVb, nil }
// calculateHash does a simple sum of the vector clock values, modulo s.mod. This intentionally // doesn't generate a random distribution of clocks to hash values. Instead, since vbucket values within the // clock are monotonically increasing, it's going to generate increasing, non-colliding hash values // until s.mod value is reached. The goal is to tune s.mod to the update frequency (and so the expiry // of previously used hash values). so that the previously used hash value (n) is obsolete by the time // the clock wraps around to (mod + n). func (s *sequenceHasher) calculateHash(clock base.SequenceClock) uint64 { var sum uint64 sum = uint64(0) for _, value := range clock.Value() { sum += value & s.modMinus1 } return sum & s.modMinus1 }
// calculateChangedPartitions identifies which vbuckets have changed after sinceClock until toClock, groups these // by partition, and returns as a array of PartitionRanges, indexed by partition number. func (ds *DenseStorageReader) calculateChanged(sinceClock, toClock base.SequenceClock) (changedVbs []uint16, changedPartitions []*PartitionRange) { changedVbs = make([]uint16, 0) changedPartitions = make([]*PartitionRange, ds.partitions.PartitionCount()) for vbNoInt, toSeq := range toClock.Value() { vbNo := uint16(vbNoInt) sinceSeq := sinceClock.GetSequence(vbNo) if sinceSeq < toSeq { changedVbs = append(changedVbs, vbNo) partitionNo := ds.partitions.PartitionForVb(vbNo) if changedPartitions[partitionNo] == nil { partitionRange := NewPartitionRange() changedPartitions[partitionNo] = &partitionRange } changedPartitions[partitionNo].SetRange(vbNo, sinceSeq, toSeq) } } return changedVbs, changedPartitions }
func (k *kvChannelIndex) checkLastPolled(since base.SequenceClock) (results []*LogEntry) { k.lastPolledLock.RLock() defer k.lastPolledLock.RUnlock() if k.lastPolledValidTo == nil || k.lastPolledSince == nil { return results } matchesLastPolledSince := true lastPolledValue := k.lastPolledSince.Value() validToValue := k.lastPolledValidTo.Value() sinceValue := since.Value() for vb, sequence := range sinceValue { lastPolledVbValue := lastPolledValue[vb] if sequence != lastPolledVbValue { matchesLastPolledSince = false if sequence < lastPolledVbValue || sequence > validToValue[vb] { // poll results aren't sufficient for this request - return empty set return results } } } // The last polled results are sufficient to serve this request. If there's a match on the since values, // return the entire last polled changes. If not, filter the last polled changes and return all entries greater // than the since value if matchesLastPolledSince { // TODO: come up with a solution that doesn't make as much GC work on every checkLastPolled hit, // but doesn't break when k.lastPolledChanges gets updated mid-request. results := make([]*LogEntry, len(k.lastPolledChanges)) copy(results, k.lastPolledChanges) return results } else { for _, entry := range k.lastPolledChanges { if entry.Sequence > sinceValue[entry.VbNo] { results = append(results, entry) } } } return results }
func (s *sequenceHasher) GetHash(clock base.SequenceClock) (string, error) { if clock == nil { return "", errors.New("Can't calculate hash for nil clock") } hashValue := s.calculateHash(clock) // Load stored clocks for this hash, to see if it's already been defined. // Note: getCacheValue and load are handled as separate operations to optimize locking. // 1. getCacheValue locks the cache, and retrieves the current cache entry (or creates a new empty entry if not found) // 2. cachedValue.load locks the entry, and loads from the DB if no previous entry is found cachedValue := s.getCacheValue(hashValue) cachedClocks, err := cachedValue.load(s.loadClocks) if err != nil { return "", err } // Check whether the cached clocks for the hash value match our clock exists, index := cachedClocks.Contains(clock.Value()) if exists { seqHash := sequenceHash{ hashValue: hashValue, collisionIndex: uint16(index), } indexExpvars.Add("seqHash_getHash_hits", 1) return seqHash.String(), nil } // Didn't find a match in cache - update the index and the cache. Get a write lock on the index value // first, to ensure only one goroutine on this SG attempts to write. writeCas handling below handles // the case where other SGs are updating the value concurrently indexExpvars.Add("seqHash_getHash_misses", 1) // First copy the clock value, to ensure we store a non-mutable version in the cache clockValue := make([]uint64, len(clock.Value())) copy(clockValue, clock.Value()) updateErr := func() error { cachedValue.lock.Lock() defer cachedValue.lock.Unlock() // If the number of cached clocks has changed, check whether someone else has added this clock // while we waited for the lock if len(cachedValue.clocks.Sequences) > len(cachedClocks.Sequences) { exists, index = cachedValue.clocks.Contains(clockValue) if exists { return nil } } // Add our clock to the cached clocks for this hash existingClocks := cachedValue.clocks existingClocks.Sequences = append(existingClocks.Sequences, clockValue) // Update the hash entry in the bucket key := kHashPrefix + strconv.FormatUint(hashValue, 10) initialValue, err := existingClocks.Marshal() index = len(existingClocks.Sequences) - 1 if err != nil { return err } _, err = base.WriteCasRaw(s.bucket, key, initialValue, existingClocks.cas, int(s.hashExpiry), func(value []byte) (updatedValue []byte, err error) { // Note: The following is invoked upon cas failure - may be called multiple times base.LogTo("DIndex+", "CAS fail - reapplying changes for hash storage for key: %s", key) var sClocks storedClocks err = sClocks.Unmarshal(value) if err != nil { base.Warn("Error unmarshalling hash storage during update", err) return nil, err } exists, index = sClocks.Contains(clockValue) if exists { // return empty byte array to cancel the update return []byte{}, nil } // Not found - add sClocks.Sequences = append(sClocks.Sequences, clockValue) base.LogTo("DIndex+", "Reattempting stored hash write for key %s:", key) index = len(sClocks.Sequences) - 1 return sClocks.Marshal() }) return nil }() if updateErr != nil { return "", updateErr } indexExpvars.Add("writeCasRaw_hash", 1) if err != nil && err.Error() != "Already Exists" { return "", err } seqHash := &sequenceHash{ hashValue: hashValue, collisionIndex: uint16(index), } return seqHash.String(), nil }