//check for presence of large snapshot in a TS and set the flag func (ss *StreamState) checkLargeSnapshot(ts *common.TsVbuuid) { largeSnapshotThreshold := ss.config["settings.largeSnapshotThreshold"].Uint64() for _, s := range ts.Snapshots { if s[1]-s[0] > largeSnapshotThreshold { ts.SetLargeSnapshot(true) return } } }
// FromTsVbuuid converts timestamp from common.TsVbuuid to protobuf // format. func (ts *TsVbuuid) FromTsVbuuid(nativeTs *c.TsVbuuid) *TsVbuuid { for _, vbno := range nativeTs.GetVbnos() { s := nativeTs.Snapshots[vbno] snapshot := NewSnapshot(s[0], s[1]) ts.Snapshots = append(ts.Snapshots, snapshot) ts.Vbnos = append(ts.Vbnos, uint32(vbno)) ts.Seqnos = append(ts.Seqnos, nativeTs.Seqnos[vbno]) ts.Vbuuids = append(ts.Vbuuids, nativeTs.Vbuuids[vbno]) } return ts }
func compareIfActiveTsEqual(origTs, compTs *c.TsVbuuid) bool { vbnosOrig := origTs.GetVbnos() vbnosComp := compTs.GetVbnos() for i, vb := range vbnosOrig { if vbnosComp[i] != vb { return false } } return true }
//If a snapshot marker has been received but no mutation for that snapshot, //the repairTs seqno will be outside the snapshot marker range and //DCP will refuse to accept such seqno for restart. Such VBs need to //use lastFlushTs or restartTs. func (ss *StreamState) adjustNonSnapAlignedVbs(repairTs *common.TsVbuuid, streamId common.StreamId, bucket string) { for _, vbno := range repairTs.GetVbnos() { seqno := repairTs.Seqnos[vbno] snapBegin := repairTs.Snapshots[vbno][0] snapEnd := repairTs.Snapshots[vbno][1] if !(seqno >= snapBegin && seqno <= snapEnd) { // First, use the last flush TS seqno if avaliable if fts, ok := ss.streamBucketLastFlushedTsMap[streamId][bucket]; ok && fts != nil { repairTs.Snapshots[vbno][0] = fts.Snapshots[vbno][0] repairTs.Snapshots[vbno][1] = fts.Snapshots[vbno][1] repairTs.Seqnos[vbno] = fts.Seqnos[vbno] snapBegin = repairTs.Snapshots[vbno][0] snapEnd = repairTs.Snapshots[vbno][1] } // If last flush TS is still out-of-bound, use last Snap-aligned flushed TS if available if !(repairTs.Seqnos[vbno] >= snapBegin && repairTs.Seqnos[vbno] <= snapEnd) { if fts, ok := ss.streamBucketLastSnapAlignFlushedTsMap[streamId][bucket]; ok && fts != nil { repairTs.Snapshots[vbno][0] = fts.Snapshots[vbno][0] repairTs.Snapshots[vbno][1] = fts.Snapshots[vbno][1] repairTs.Seqnos[vbno] = fts.Seqnos[vbno] snapBegin = repairTs.Snapshots[vbno][0] snapEnd = repairTs.Snapshots[vbno][1] } } // If last snap-aligned flushed TS is not avail, then use restartTS if !(repairTs.Seqnos[vbno] >= snapBegin && repairTs.Seqnos[vbno] <= snapEnd) { if rts, ok := ss.streamBucketRestartTsMap[streamId][bucket]; ok && rts != nil { //if no flush has been done yet, use restart TS repairTs.Snapshots[vbno][0] = rts.Snapshots[vbno][0] repairTs.Snapshots[vbno][1] = rts.Snapshots[vbno][1] repairTs.Seqnos[vbno] = rts.Seqnos[vbno] snapBegin = repairTs.Snapshots[vbno][0] snapEnd = repairTs.Snapshots[vbno][1] } } //if seqno is still not with snapshot range, then it is likely a bug in state management if !(repairTs.Seqnos[vbno] >= snapBegin && repairTs.Seqnos[vbno] <= snapEnd) { common.CrashOnError(errors.New("No Valid Restart Seqno Found")) } } } }
func isSnapshotConsistent( ss IndexSnapshot, cons common.Consistency, reqTs *common.TsVbuuid) bool { if snapTs := ss.Timestamp(); snapTs != nil { if cons == common.QueryConsistency && snapTs.AsRecent(reqTs) { return true } else if cons == common.SessionConsistency { if ss.IsEpoch() && reqTs.IsEpoch() { return true } if snapTs.CheckCrc64(reqTs) && snapTs.AsRecentTs(reqTs) { return true } // don't return error because client might be ahead of // in receiving a rollback. // return nil, ErrVbuuidMismatch return false } return true // AnyConsistency } return false }
func (s *storageMgr) createSnapshotWorker(streamId common.StreamId, bucket string, tsVbuuid *common.TsVbuuid, indexSnapMap IndexSnapMap, numVbuckets int, indexInstMap common.IndexInstMap, indexPartnMap IndexPartnMap, stats *IndexerStats) { defer destroyIndexSnapMap(indexSnapMap) var needsCommit bool snapType := tsVbuuid.GetSnapType() if snapType == common.DISK_SNAP { needsCommit = true } var wg sync.WaitGroup //for every index managed by this indexer for idxInstId, partnMap := range indexPartnMap { // Create snapshots for all indexes in parallel wg.Add(1) go func(idxInstId common.IndexInstId, partnMap PartitionInstMap) { defer wg.Done() idxInst := indexInstMap[idxInstId] idxStats := stats.indexes[idxInst.InstId] lastIndexSnap := indexSnapMap[idxInstId] //if index belongs to the flushed bucket and stream if idxInst.Defn.Bucket == bucket && idxInst.Stream == streamId && idxInst.State != common.INDEX_STATE_DELETED { // List of snapshots for reading current timestamp var isSnapCreated bool = true partnSnaps := make(map[common.PartitionId]PartitionSnapshot) //for all partitions managed by this indexer for partnId, partnInst := range partnMap { var lastPartnSnap PartitionSnapshot if lastIndexSnap != nil { lastPartnSnap = lastIndexSnap.Partitions()[partnId] } sc := partnInst.Sc sliceSnaps := make(map[SliceId]SliceSnapshot) //create snapshot for all the slices for _, slice := range sc.GetAllSlices() { var latestSnapshot Snapshot if lastIndexSnap.Partitions() != nil { lastSliceSnap := lastPartnSnap.Slices()[slice.Id()] latestSnapshot = lastSliceSnap.Snapshot() } //if flush timestamp is greater than last //snapshot timestamp, create a new snapshot snapTs := NewTimestamp(numVbuckets) if latestSnapshot != nil { snapTsVbuuid := latestSnapshot.Timestamp() snapTs = getSeqTsFromTsVbuuid(snapTsVbuuid) } ts := getSeqTsFromTsVbuuid(tsVbuuid) //if the flush TS is greater than the last snapshot TS //and slice has some changes. Skip only in-memory snapshot //in case of unchanged data. if latestSnapshot == nil || (ts.GreaterThan(snapTs) && (slice.IsDirty() || needsCommit)) { newTsVbuuid := tsVbuuid.Copy() var err error var info SnapshotInfo var newSnapshot Snapshot logging.Tracef("StorageMgr::handleCreateSnapshot Creating New Snapshot "+ "Index: %v PartitionId: %v SliceId: %v Commit: %v", idxInstId, partnId, slice.Id(), needsCommit) snapCreateStart := time.Now() if info, err = slice.NewSnapshot(newTsVbuuid, needsCommit); err != nil { logging.Errorf("handleCreateSnapshot::handleCreateSnapshot Error "+ "Creating new snapshot Slice Index: %v Slice: %v. Skipped. Error %v", idxInstId, slice.Id(), err) isSnapCreated = false common.CrashOnError(err) continue } snapCreateDur := time.Since(snapCreateStart) idxStats := stats.indexes[idxInstId] idxStats.numSnapshots.Add(1) if needsCommit { idxStats.numCommits.Add(1) } snapOpenStart := time.Now() if newSnapshot, err = slice.OpenSnapshot(info); err != nil { logging.Errorf("StorageMgr::handleCreateSnapshot Error Creating Snapshot "+ "for Index: %v Slice: %v. Skipped. Error %v", idxInstId, slice.Id(), err) isSnapCreated = false common.CrashOnError(err) continue } snapOpenDur := time.Since(snapOpenStart) logging.Infof("StorageMgr::handleCreateSnapshot Added New Snapshot Index: %v "+ "PartitionId: %v SliceId: %v Crc64: %v (%v) SnapCreateDur %v SnapOpenDur %v", idxInstId, partnId, slice.Id(), tsVbuuid.Crc64, info, snapCreateDur, snapOpenDur) ss := &sliceSnapshot{ id: slice.Id(), snap: newSnapshot, } sliceSnaps[slice.Id()] = ss } else { // Increment reference latestSnapshot.Open() ss := &sliceSnapshot{ id: slice.Id(), snap: latestSnapshot, } sliceSnaps[slice.Id()] = ss logging.Warnf("StorageMgr::handleCreateSnapshot Skipped Creating New Snapshot for Index %v "+ "PartitionId %v SliceId %v. No New Mutations. IsDirty %v", idxInstId, partnId, slice.Id(), slice.IsDirty()) logging.Debugf("StorageMgr::handleCreateSnapshot SnapTs %v FlushTs %v", snapTs, ts) continue } } ps := &partitionSnapshot{ id: partnId, slices: sliceSnaps, } partnSnaps[partnId] = ps } is := &indexSnapshot{ instId: idxInstId, ts: tsVbuuid.Copy(), partns: partnSnaps, } if isSnapCreated { s.updateSnapMapAndNotify(is, idxStats) } else { DestroyIndexSnapshot(is) } s.updateSnapIntervalStat(idxStats) } }(idxInstId, partnMap) } wg.Wait() s.supvRespch <- &MsgMutMgrFlushDone{mType: STORAGE_SNAP_DONE, streamId: streamId, bucket: bucket, ts: tsVbuuid} }
func (k *kvSender) restartVbuckets(streamId c.StreamId, restartTs *c.TsVbuuid, connErrVbs []Vbucket, respCh MsgChannel, stopCh StopChannel) { addrs, err := k.getProjAddrsForVbuckets(restartTs.Bucket, restartTs.GetVbnos()) if err != nil { logging.Errorf("KVSender::restartVbuckets %v %v Error in fetching cluster info %v", streamId, restartTs.Bucket, err) respCh <- &MsgError{ err: Error{code: ERROR_KVSENDER_STREAM_REQUEST_ERROR, severity: FATAL, cause: err}} return } //convert TS to protobuf format var protoRestartTs *protobuf.TsVbuuid numVbuckets := k.config["numVbuckets"].Int() protoTs := protobuf.NewTsVbuuid(DEFAULT_POOL, restartTs.Bucket, numVbuckets) protoRestartTs = protoTs.FromTsVbuuid(restartTs) var rollbackTs *protobuf.TsVbuuid topic := getTopicForStreamId(streamId) rollback := false fn := func(r int, err error) error { for _, addr := range addrs { ap := newProjClient(addr) if res, ret := k.sendRestartVbuckets(ap, topic, connErrVbs, protoRestartTs); ret != nil { //retry for all errors logging.Errorf("KVSender::restartVbuckets %v %v Error Received %v from %v", streamId, restartTs.Bucket, ret, addr) err = ret } else { rollbackTs = updateRollbackTsFromResponse(restartTs.Bucket, rollbackTs, res) } } if rollbackTs != nil && checkVbListInTS(protoRestartTs.GetVbnos(), rollbackTs) { //if rollback, no need to retry rollback = true return nil } else { return err } } rh := c.NewRetryHelper(MAX_KV_REQUEST_RETRY, time.Second, BACKOFF_FACTOR, fn) err = rh.Run() //if any of the requested vb is in rollback ts, send rollback //msg to caller if rollback { //convert from protobuf to native format nativeTs := rollbackTs.ToTsVbuuid(numVbuckets) respCh <- &MsgRollback{streamId: streamId, rollbackTs: nativeTs} } else if err != nil { //if there is a topicMissing/genServer.Closed error, a fresh //MutationTopicRequest is required. if err.Error() == projClient.ErrorTopicMissing.Error() || err.Error() == c.ErrorClosed.Error() || err.Error() == projClient.ErrorInvalidBucket.Error() { respCh <- &MsgKVStreamRepair{ streamId: streamId, bucket: restartTs.Bucket, } } else { respCh <- &MsgError{ err: Error{code: ERROR_KVSENDER_STREAM_REQUEST_ERROR, severity: FATAL, cause: err}} } } else { respCh <- &MsgSuccess{} } }
//If a snapshot marker has been received but no mutation for that snapshot, //the repairTs seqno will be outside the snapshot marker range and //DCP will refuse to accept such seqno for restart. Such VBs need to //use lastFlushTs or restartTs. // func (ss *StreamState) adjustNonSnapAlignedVbs(repairTs *common.TsVbuuid, streamId common.StreamId, bucket string, repairVbs []Vbucket, forStreamRepair bool) { // The caller either provide a vector of vb to repair, or this function will // look for any vb in the repairTS that has a non-zero vbuuid. if repairVbs == nil { for _, vbno := range repairTs.GetVbnos() { repairVbs = append(repairVbs, Vbucket(vbno)) } } logging.Infof("StreamState::adjustNonSnapAlignedVbs\n\t"+ "Bucket %v StreamId %v Vbuckets %v.", bucket, streamId, repairVbs) for _, vbno := range repairVbs { if !(repairTs.Seqnos[vbno] >= repairTs.Snapshots[vbno][0] && repairTs.Seqnos[vbno] <= repairTs.Snapshots[vbno][1]) || repairTs.Vbuuids[vbno] == 0 { // First, use the last flush TS seqno if avaliable if fts, ok := ss.streamBucketLastFlushedTsMap[streamId][bucket]; ok && fts != nil { repairTs.Snapshots[vbno][0] = fts.Snapshots[vbno][0] repairTs.Snapshots[vbno][1] = fts.Snapshots[vbno][1] repairTs.Seqnos[vbno] = fts.Seqnos[vbno] repairTs.Vbuuids[vbno] = fts.Vbuuids[vbno] } // If last flush TS is still out-of-bound, use last Snap-aligned flushed TS if available if !(repairTs.Seqnos[vbno] >= repairTs.Snapshots[vbno][0] && repairTs.Seqnos[vbno] <= repairTs.Snapshots[vbno][1]) || repairTs.Vbuuids[vbno] == 0 { if fts, ok := ss.streamBucketLastSnapAlignFlushedTsMap[streamId][bucket]; ok && fts != nil { repairTs.Snapshots[vbno][0] = fts.Snapshots[vbno][0] repairTs.Snapshots[vbno][1] = fts.Snapshots[vbno][1] repairTs.Seqnos[vbno] = fts.Seqnos[vbno] repairTs.Vbuuids[vbno] = fts.Vbuuids[vbno] } } // If timestamp used for open stream is available, use it if !(repairTs.Seqnos[vbno] >= repairTs.Snapshots[vbno][0] && repairTs.Seqnos[vbno] <= repairTs.Snapshots[vbno][1]) || repairTs.Vbuuids[vbno] == 0 { if rts, ok := ss.streamBucketOpenTsMap[streamId][bucket]; ok && rts != nil { repairTs.Snapshots[vbno][0] = rts.Snapshots[vbno][0] repairTs.Snapshots[vbno][1] = rts.Snapshots[vbno][1] repairTs.Seqnos[vbno] = rts.Seqnos[vbno] repairTs.Vbuuids[vbno] = rts.Vbuuids[vbno] } } // If open stream TS is not avail, then use restartTS if !(repairTs.Seqnos[vbno] >= repairTs.Snapshots[vbno][0] && repairTs.Seqnos[vbno] <= repairTs.Snapshots[vbno][1]) || repairTs.Vbuuids[vbno] == 0 { if rts, ok := ss.streamBucketRestartTsMap[streamId][bucket]; ok && rts != nil { //if no flush has been done yet, use restart TS repairTs.Snapshots[vbno][0] = rts.Snapshots[vbno][0] repairTs.Snapshots[vbno][1] = rts.Snapshots[vbno][1] repairTs.Seqnos[vbno] = rts.Seqnos[vbno] repairTs.Vbuuids[vbno] = rts.Vbuuids[vbno] } } //if seqno is still not with snapshot range or invalid vbuuid, then create an 0 timestamp if !(repairTs.Seqnos[vbno] >= repairTs.Snapshots[vbno][0] && repairTs.Seqnos[vbno] <= repairTs.Snapshots[vbno][1]) || repairTs.Vbuuids[vbno] == 0 { repairTs.Snapshots[vbno][0] = 0 repairTs.Snapshots[vbno][1] = 0 repairTs.Seqnos[vbno] = 0 repairTs.Vbuuids[vbno] = 0 } } } }