// Range scan index between low and high. func (c *GsiClient) Range( defnID uint64, low, high common.SecondaryKey, inclusion Inclusion, distinct bool, limit int64, cons common.Consistency, vector *TsConsistency, callb ResponseHandler) (err error) { if c.bridge == nil { return ErrorClientUninitialized } // check whether the index is present and available. if _, err = c.bridge.IndexState(defnID); err != nil { protoResp := &protobuf.ResponseStream{ Err: &protobuf.Error{Error: proto.String(err.Error())}, } callb(protoResp) return } begin := time.Now() err = c.doScan( defnID, func(qc *GsiScanClient, index *common.IndexDefn) (error, bool) { var err error vector, err = c.getConsistency(cons, vector, index.Bucket) if err != nil { return err, false } if c.bridge.IsPrimary(uint64(index.DefnId)) { var l, h []byte // primary keys are plain sequence of binary. if low != nil && len(low) > 0 { l = []byte(low[0].(string)) } if high != nil && len(high) > 0 { h = []byte(high[0].(string)) } return qc.RangePrimary( uint64(index.DefnId), l, h, inclusion, distinct, limit, cons, vector, callb) } // dealing with secondary index. return qc.Range( uint64(index.DefnId), low, high, inclusion, distinct, limit, cons, vector, callb) }) if err != nil { // callback with error resp := &protobuf.ResponseStream{ Err: &protobuf.Error{Error: proto.String(err.Error())}, } callb(resp) } fmsg := "Range %v - elapsed(%v) err(%v)" logging.Verbosef(fmsg, defnID, time.Since(begin), err) return }
// CountRange to count number entries in the given range. func (c *GsiClient) CountRange( defnID uint64, low, high common.SecondaryKey, inclusion Inclusion, cons common.Consistency, vector *TsConsistency) (count int64, err error) { if c.bridge == nil { return count, ErrorClientUninitialized } // check whether the index is present and available. if _, err := c.bridge.IndexState(defnID); err != nil { return 0, err } begin := time.Now() err = c.doScan( defnID, func(qc *GsiScanClient, index *common.IndexDefn) (error, bool) { var err error vector, err = c.getConsistency(cons, vector, index.Bucket) if err != nil { return err, false } count, err = qc.CountRange( uint64(index.DefnId), low, high, inclusion, cons, vector) return err, false }) fmsg := "CountRange %v - elapsed(%v) err(%v)" logging.Verbosef(fmsg, defnID, time.Since(begin), err) return count, err }
//send the actual MutationStreamRequest on adminport func (k *kvSender) sendMutationTopicRequest(ap *projClient.Client, topic string, reqTimestamps *protobuf.TsVbuuid, instances []*protobuf.Instance) (*protobuf.TopicResponse, error) { logging.Infof("KVSender::sendMutationTopicRequest Projector %v Topic %v %v \n\tInstances %v", ap, topic, reqTimestamps.GetBucket(), instances) logging.LazyVerbosef("KVSender::sendMutationTopicRequest RequestTS %v", reqTimestamps.Repr) endpointType := "dataport" if res, err := ap.MutationTopicRequest(topic, endpointType, []*protobuf.TsVbuuid{reqTimestamps}, instances); err != nil { logging.Fatalf("KVSender::sendMutationTopicRequest Projector %v Topic %v %v \n\tUnexpected Error %v", ap, topic, reqTimestamps.GetBucket(), err) return res, err } else { logging.Infof("KVSender::sendMutationTopicRequest Success Projector %v Topic %v %v InstanceIds %v", ap, topic, reqTimestamps.GetBucket(), res.GetInstanceIds()) if logging.IsEnabled(logging.Verbose) { logging.Verbosef("KVSender::sendMutationTopicRequest ActiveTs %v \n\tRollbackTs %v", debugPrintTs(res.GetActiveTimestamps(), reqTimestamps.GetBucket()), debugPrintTs(res.GetRollbackTimestamps(), reqTimestamps.GetBucket())) } return res, nil } }
//Drain will keep flushing the mutation queue till caller closes //the stop channel without actually persisting the mutations //Can be stopped anytime by closing the StopChannel. //Any error condition is reported back on the MsgChannel. //Caller can wait on MsgChannel after closing StopChannel to get notified //about shutdown completion. func (f *flusher) Drain(q MutationQueue, streamId common.StreamId, bucket string, stopch StopChannel) MsgChannel { logging.Verbosef("Flusher::Drain %v %v", streamId, bucket) msgch := make(MsgChannel) go f.flushQueue(q, streamId, bucket, nil, nil, false, stopch, msgch) return msgch }
//DrainUptoTS will flush the mutation queue upto the Timestamp //provided without actually persisting it. //Can be stopped anytime by closing the StopChannel. //Sends SUCCESS on the MsgChannel when its done flushing till timestamp. //Any error condition is reported back on the MsgChannel. //Caller can wait on MsgChannel after closing StopChannel to get notified //about shutdown completion. func (f *flusher) DrainUptoTS(q MutationQueue, streamId common.StreamId, bucket string, ts Timestamp, changeVec []bool, stopch StopChannel) MsgChannel { logging.Verbosef("Flusher::DrainUptoTS %v %v Timestamp: %v", streamId, bucket, ts) msgch := make(MsgChannel) go f.flushQueue(q, streamId, bucket, ts, changeVec, false, stopch, msgch) return msgch }
func (k *kvSender) sendRestartVbuckets(ap *projClient.Client, topic string, connErrVbs []Vbucket, restartTs *protobuf.TsVbuuid) (*protobuf.TopicResponse, error) { logging.Infof("KVSender::sendRestartVbuckets Projector %v Topic %v %v", ap, topic, restartTs.GetBucket()) logging.LazyVerbosef("KVSender::sendRestartVbuckets RestartTs %v", restartTs.Repr) //Shutdown the vbucket before restart if there was a ConnErr. If the vbucket is already //running, projector will ignore the request otherwise if len(connErrVbs) != 0 { logging.Infof("KVSender::sendRestartVbuckets ShutdownVbuckets %v Topic %v %v ConnErrVbs %v", ap, topic, restartTs.GetBucket(), connErrVbs) // Only shutting down the Vb that receieve connection error. It is probably not harmful // to shutdown every VB in the repairTS, including those that only receive StreamEnd. // But due to network / projecctor latency, a VB StreamBegin may be coming on the way // for those VB (especially when RepairStream has already retried a couple of times). // So shutting all VB in restartTs may unnecessarily causing race condition and // make the protocol longer to converge. ShutdownVbuckets should have no effect on // projector that does not own the Vb. shutdownTs := k.computeShutdownTs(restartTs, connErrVbs) logging.Infof("KVSender::sendRestartVbuckets ShutdownVbuckets Projector %v Topic %v %v \n\tShutdownTs %v", ap, topic, restartTs.GetBucket(), shutdownTs.Repr()) if err := ap.ShutdownVbuckets(topic, []*protobuf.TsVbuuid{shutdownTs}); err != nil { logging.Errorf("KVSender::sendRestartVbuckets Unexpected Error During "+ "ShutdownVbuckets Request for Projector %v Topic %v. Err %v.", ap, topic, err) //all shutdownVbuckets errors are treated as success as it is a best-effort call. //RestartVbuckets errors will be acted upon. } } if res, err := ap.RestartVbuckets(topic, []*protobuf.TsVbuuid{restartTs}); err != nil { logging.Fatalf("KVSender::sendRestartVbuckets Unexpected Error During "+ "Restart Vbuckets Request for Projector %v Topic %v %v . Err %v.", ap, topic, restartTs.GetBucket(), err) return res, err } else { logging.Infof("KVSender::sendRestartVbuckets Success Projector %v Topic %v %v", ap, topic, restartTs.GetBucket()) if logging.IsEnabled(logging.Verbose) { logging.Verbosef("KVSender::sendRestartVbuckets \nActiveTs %v \nRollbackTs %v", debugPrintTs(res.GetActiveTimestamps(), restartTs.GetBucket()), debugPrintTs(res.GetRollbackTimestamps(), restartTs.GetBucket())) } return res, nil } }
//Persist will keep flushing the mutation queue till caller closes //the stop channel. This function will be used when: //1. Flushing Backfill Catchup Queue // //Can be stopped anytime by closing the StopChannel. //Any error condition is reported back on the MsgChannel. //Caller can wait on MsgChannel after closing StopChannel to get notified //about shutdown completion. func (f *flusher) Persist(q MutationQueue, streamId common.StreamId, bucket string, indexInstMap common.IndexInstMap, indexPartnMap IndexPartnMap, stopch StopChannel) MsgChannel { logging.Verbosef("Flusher::Persist %v %v", streamId, bucket) f.indexInstMap = common.CopyIndexInstMap(indexInstMap) f.indexPartnMap = CopyIndexPartnMap(indexPartnMap) msgch := make(MsgChannel) go f.flushQueue(q, streamId, bucket, nil, nil, true, stopch, msgch) return msgch }
//PersistUptoTS will flush the mutation queue upto the //Timestamp provided. This function will be used when: //1. Flushing Maintenance Queue //2. Flushing Maintenance Catchup Queue //3. Flushing Backfill Queue // //Can be stopped anytime by closing StopChannel. //Sends SUCCESS on the MsgChannel when its done flushing till timestamp. //Any error condition is reported back on the MsgChannel. //Caller can wait on MsgChannel after closing StopChannel to get notified //about shutdown completion. func (f *flusher) PersistUptoTS(q MutationQueue, streamId common.StreamId, bucket string, indexInstMap common.IndexInstMap, indexPartnMap IndexPartnMap, ts Timestamp, changeVec []bool, stopch StopChannel) MsgChannel { logging.Verbosef("Flusher::PersistUptoTS %v %v Timestamp: %v", streamId, bucket, ts) f.indexInstMap = common.CopyIndexInstMap(indexInstMap) f.indexPartnMap = CopyIndexPartnMap(indexPartnMap) msgch := make(MsgChannel) go f.flushQueue(q, streamId, bucket, ts, changeVec, true, stopch, msgch) return msgch }
// Lookup scan index between low and high. func (c *GsiClient) Lookup( defnID uint64, values []common.SecondaryKey, distinct bool, limit int64, cons common.Consistency, vector *TsConsistency, callb ResponseHandler) (err error) { if c.bridge == nil { return ErrorClientUninitialized } // check whether the index is present and available. if _, err = c.bridge.IndexState(defnID); err != nil { protoResp := &protobuf.ResponseStream{ Err: &protobuf.Error{Error: proto.String(err.Error())}, } callb(protoResp) return } begin := time.Now() err = c.doScan( defnID, func(qc *GsiScanClient, index *common.IndexDefn) (error, bool) { var err error vector, err = c.getConsistency(cons, vector, index.Bucket) if err != nil { return err, false } return qc.Lookup( uint64(index.DefnId), values, distinct, limit, cons, vector, callb) }) if err != nil { // callback with error resp := &protobuf.ResponseStream{ Err: &protobuf.Error{Error: proto.String(err.Error())}, } callb(resp) } fmsg := "Lookup %v - elapsed(%v) err(%v)" logging.Verbosef(fmsg, defnID, time.Since(begin), err) return }
// return adminports for all known indexers. func getIndexerAdminports(cinfo *common.ClusterInfoCache) ([]string, error) { iAdminports := make([]string, 0) for _, node := range cinfo.GetNodesByServiceType("indexAdmin") { status, err := cinfo.GetNodeStatus(node) if err != nil { return nil, err } logging.Verbosef("node %v status: %q", node, status) if status == "healthy" || status == "active" || status == "warmup" { adminport, err := cinfo.GetServiceAddress(node, "indexAdmin") if err != nil { return nil, err } iAdminports = append(iAdminports, adminport) } else { logging.Warnf("node %v status: %q", node, status) } } return iAdminports, nil }
func (s *scanCoordinator) handleCountRequest(req *ScanRequest, w ScanResponseWriter, is IndexSnapshot, t0 time.Time) { var rows uint64 var err error stopch := make(StopChannel) cancelCb := NewCancelCallback(req, func(e error) { err = e close(stopch) }) cancelCb.Run() defer cancelCb.Done() for _, s := range GetSliceSnapshots(is) { var r uint64 snap := s.Snapshot() if len(req.Keys) > 0 { r, err = snap.CountLookup(req.Keys, stopch) } else if req.Low.Bytes() == nil && req.Low.Bytes() == nil { r, err = snap.CountTotal(stopch) } else { r, err = snap.CountRange(req.Low, req.High, req.Incl, stopch) } if err != nil { break } rows += r } if s.tryRespondWithError(w, req, err) { return } logging.Verbosef("%s RESPONSE count:%d status:ok", req.LogPrefix, rows) err = w.Count(rows) s.handleError(req.LogPrefix, err) }
func (s *scanCoordinator) serverCallback(protoReq interface{}, conn net.Conn, cancelCh <-chan interface{}) { req, err := s.newRequest(protoReq, cancelCh) w := NewProtoWriter(req.ScanType, conn) defer func() { s.handleError(req.LogPrefix, w.Done()) req.Done() }() logging.Verbosef("%s REQUEST %s", req.LogPrefix, req) if req.Consistency != nil { logging.LazyVerbose(func() string { return fmt.Sprintf("%s requested timestamp: %s => %s Crc64 => %v", req.LogPrefix, strings.ToLower(req.Consistency.String()), ScanTStoString(req.Ts), req.Ts.GetCrc64()) }) } if s.tryRespondWithError(w, req, err) { return } req.Stats.numRequests.Add(1) t0 := time.Now() is, err := s.getRequestedIndexSnapshot(req) if s.tryRespondWithError(w, req, err) { return } defer DestroyIndexSnapshot(is) logging.LazyVerbose(func() string { return fmt.Sprintf("%s snapshot timestamp: %s", req.LogPrefix, ScanTStoString(is.Timestamp())) }) s.processRequest(req, w, is, t0) }
func (ss *StreamState) setHWTFromRestartTs(streamId common.StreamId, bucket string) { logging.Debugf("StreamState::setHWTFromRestartTs Stream %v "+ "Bucket %v", streamId, bucket) if bucketRestartTs, ok := ss.streamBucketRestartTsMap[streamId]; ok { if restartTs, ok := bucketRestartTs[bucket]; ok && restartTs != nil { //update HWT ss.streamBucketHWTMap[streamId][bucket] = restartTs.Copy() //update Last Flushed Ts ss.streamBucketLastFlushedTsMap[streamId][bucket] = restartTs.Copy() logging.Verbosef("StreamState::setHWTFromRestartTs HWT Set For "+ "Bucket %v StreamId %v. TS %v.", bucket, streamId, restartTs) } else { logging.Warnf("StreamState::setHWTFromRestartTs RestartTs Not Found For "+ "Bucket %v StreamId %v. No Value Set.", bucket, streamId) } } }
// This function gets the list of vb and seqno to repair stream. // Termination condition for stream repair: // 1) All vb are in StreamBegin state // 2) All vb have ref count == 1 // 3) There is no error in stream repair func (ss *StreamState) getRepairTsForBucket(streamId common.StreamId, bucket string) (*common.TsVbuuid, bool, []Vbucket) { // always repair if the last repair is not successful anythingToRepair := ss.streamBucketRestartVbErrMap[streamId][bucket] numVbuckets := ss.config["numVbuckets"].Int() repairTs := common.NewTsVbuuid(bucket, numVbuckets) var shutdownVbs []Vbucket = nil var repairVbs []Vbucket = nil var count = 0 hwtTs := ss.streamBucketHWTMap[streamId][bucket] hasConnError := ss.hasConnectionError(streamId, bucket) // First step : Find out if there is any StreamEnd or ConnError on any vb. for i, s := range ss.streamBucketVbStatusMap[streamId][bucket] { if s == VBS_STREAM_END || s == VBS_CONN_ERROR { repairVbs = ss.addRepairTs(repairTs, hwtTs, Vbucket(i), repairVbs) count++ anythingToRepair = true if hasConnError { // Make sure that we shutdown vb for BOTH StreamEnd and // ConnErr. This is to ensure to cover the case where // indexer may miss a StreamBegin from the new owner // due to connection error. Dataport will not be able // to tell indexer that vb needs to start since // StreamBegin never arrives. shutdownVbs = append(shutdownVbs, Vbucket(i)) } } } // Second step: Find out if any StreamEnd over max retry limit. If so, // add it to ShutdownVbs (for shutdown/restart). Only need to do this // if there is no vb marked with conn error because vb with StreamEnd // would already be in shutdownVbs, if there is connErr. if !hasConnError { for i, s := range ss.streamBucketVbStatusMap[streamId][bucket] { if s == VBS_STREAM_END { vbs := ss.streamBucketRestartVbRetryMap[streamId][bucket] vbs[i] = Seqno(int(vbs[i]) + 1) if int(vbs[i]) > REPAIR_RETRY_BEFORE_SHUTDOWN { logging.Infof("StreamState::getRepairTsForBucket\n\t"+ "Bucket %v StreamId %v Vbucket %v repair is being retried for %v times.", bucket, streamId, i, vbs[i]) ss.clearRestartVbRetry(streamId, bucket, Vbucket(i)) shutdownVbs = append(shutdownVbs, Vbucket(i)) } } } } // Third step: If there is nothing to repair, then double check if every vb has // exactly one vb owner. If not, then the accounting is wrong (most likely due // to connection error). Make the vb as ConnErr and continue to repair. // Note: Do not check for VBS_INIT. RepairMissingStreamBegin will ensure that // indexer is getting all StreamBegin. if !anythingToRepair { for i, s := range ss.streamBucketVbStatusMap[streamId][bucket] { count := ss.streamBucketVbRefCountMap[streamId][bucket][i] if count != 1 && s != VBS_INIT { logging.Infof("StreamState::getRepairTsForBucket\n\t"+ "Bucket %v StreamId %v Vbucket %v have ref count (%v != 1). Convert to CONN_ERROR.", bucket, streamId, i, count) // Make it a ConnErr such that subsequent retry will // force a shutdown/restart sequence. ss.makeConnectionError(streamId, bucket, Vbucket(i)) repairVbs = ss.addRepairTs(repairTs, hwtTs, Vbucket(i), repairVbs) count++ shutdownVbs = append(shutdownVbs, Vbucket(i)) anythingToRepair = true } } } // Forth Step: If there is something to repair, but indexer has received StreamBegin for // all vb, then retry with the last timestamp. if anythingToRepair && count == 0 { logging.Infof("StreamState::getRepairTsForBucket\n\t"+ "Bucket %v StreamId %v previous repair fails. Retry using previous repairTs", bucket, streamId) ts := ss.streamBucketRestartVbTsMap[streamId][bucket] if ts != nil { repairTs = ts.Copy() } else { repairTs = hwtTs.Copy() } shutdownVbs = nil vbnos := repairTs.GetVbnos() for _, vbno := range vbnos { shutdownVbs = append(shutdownVbs, Vbucket(vbno)) } } if !anythingToRepair { ss.streamBucketRestartVbTsMap[streamId][bucket] = nil ss.clearRestartVbError(streamId, bucket) } else { ss.streamBucketRestartVbTsMap[streamId][bucket] = repairTs.Copy() } ss.adjustNonSnapAlignedVbs(repairTs, streamId, bucket, repairVbs, true) logging.Verbosef("StreamState::getRepairTsForBucket\n\t"+ "Bucket %v StreamId %v repairTS %v", bucket, streamId, repairTs) return repairTs, anythingToRepair, shutdownVbs }
//NewForestDBSlice initiailizes a new slice with forestdb backend. //Both main and back index gets initialized with default config. //Slice methods are not thread-safe and application needs to //handle the synchronization. The only exception being Insert and //Delete can be called concurrently. //Returns error in case slice cannot be initialized. func NewForestDBSlice(path string, sliceId SliceId, idxDefnId common.IndexDefnId, idxInstId common.IndexInstId, isPrimary bool, sysconf common.Config, idxStats *IndexStats) (*fdbSlice, error) { info, err := os.Stat(path) if err != nil || err == nil && info.IsDir() { os.Mkdir(path, 0777) } filepath := newFdbFile(path, false) slice := &fdbSlice{} slice.idxStats = idxStats slice.get_bytes = platform.NewAlignedInt64(0) slice.insert_bytes = platform.NewAlignedInt64(0) slice.delete_bytes = platform.NewAlignedInt64(0) slice.extraSnapDataSize = platform.NewAlignedInt64(0) slice.flushedCount = platform.NewAlignedUint64(0) slice.committedCount = platform.NewAlignedUint64(0) config := forestdb.DefaultConfig() config.SetDurabilityOpt(forestdb.DRB_ASYNC) memQuota := sysconf["settings.memory_quota"].Uint64() logging.Debugf("NewForestDBSlice(): buffer cache size %d", memQuota) config.SetBufferCacheSize(memQuota) logging.Debugf("NewForestDBSlice(): buffer cache size %d", memQuota) prob := sysconf["settings.max_writer_lock_prob"].Int() config.SetMaxWriterLockProb(uint8(prob)) walSize := sysconf["settings.wal_size"].Uint64() config.SetWalThreshold(walSize) logging.Verbosef("NewForestDBSlice(): max writer lock prob %d", prob) logging.Verbosef("NewForestDBSlice(): wal size %d", walSize) kvconfig := forestdb.DefaultKVStoreConfig() retry: if slice.dbfile, err = forestdb.Open(filepath, config); err != nil { if err == forestdb.RESULT_NO_DB_HEADERS { logging.Warnf("NewForestDBSlice(): Open failed with no_db_header error...Resetting the forestdb file") os.Remove(filepath) goto retry } return nil, err } slice.config = config slice.sysconf = sysconf //open a separate file handle for compaction if slice.compactFd, err = forestdb.Open(filepath, config); err != nil { return nil, err } config.SetOpenFlags(forestdb.OPEN_FLAG_RDONLY) if slice.statFd, err = forestdb.Open(filepath, config); err != nil { return nil, err } slice.numWriters = sysconf["numSliceWriters"].Int() slice.main = make([]*forestdb.KVStore, slice.numWriters) for i := 0; i < slice.numWriters; i++ { if slice.main[i], err = slice.dbfile.OpenKVStore("main", kvconfig); err != nil { return nil, err } } //create a separate back-index for non-primary indexes if !isPrimary { slice.back = make([]*forestdb.KVStore, slice.numWriters) for i := 0; i < slice.numWriters; i++ { if slice.back[i], err = slice.dbfile.OpenKVStore("back", kvconfig); err != nil { return nil, err } } } // Make use of default kvstore provided by forestdb if slice.meta, err = slice.dbfile.OpenKVStore("default", kvconfig); err != nil { return nil, err } slice.path = path slice.currfile = filepath slice.idxInstId = idxInstId slice.idxDefnId = idxDefnId slice.id = sliceId sliceBufSize := sysconf["settings.sliceBufSize"].Uint64() slice.cmdCh = make(chan interface{}, sliceBufSize) slice.workerDone = make([]chan bool, slice.numWriters) slice.stopCh = make([]DoneChannel, slice.numWriters) slice.isPrimary = isPrimary for i := 0; i < slice.numWriters; i++ { slice.stopCh[i] = make(DoneChannel) slice.workerDone[i] = make(chan bool) go slice.handleCommandsWorker(i) } logging.Infof("ForestDBSlice:NewForestDBSlice Created New Slice Id %v IndexInstId %v "+ "WriterThreads %v", sliceId, idxInstId, slice.numWriters) slice.setCommittedCount() return slice, nil }