// Goroutine that runs the feed func (feed *UprFeed) run() { retryInterval := initialRetryInterval bucketOK := true for { // Connect to the UPR feed of each server node: if bucketOK { // Run until one of the sub-feeds fails: select { case <-feed.killSwitch: case <-feed.quit: return } //feed.closeNodeFeeds() retryInterval = initialRetryInterval } if feed.closing == true { // we have been asked to shut down return } // On error, try to refresh the bucket in case the list of nodes changed: logging.Infof("go-couchbase: UPR connection lost; reconnecting to bucket %q in %v", feed.bucket.Name, retryInterval) if err := feed.bucket.Refresh(); err != nil { // if we fail to refresh the bucket, exit the feed // MB-14917 logging.Infof("Unable to refresh bucket %s ", err.Error()) close(feed.output) feed.outputClosed = true feed.closeNodeFeeds() return } // this will only connect to nodes that are not connected or changed // user will have to reconnect the stream err := feed.connectToNodes() if err != nil { logging.Infof("Unable to connect to nodes..exit ") close(feed.output) feed.outputClosed = true feed.closeNodeFeeds() return } bucketOK = err == nil select { case <-time.After(retryInterval): case <-feed.quit: return } if retryInterval *= 2; retryInterval > maximumRetryInterval { retryInterval = maximumRetryInterval } } }
// Goroutine that forwards Upr events from a single node's feed to the aggregate feed. func (feed *UprFeed) forwardUprEvents(nodeFeed *FeedInfo, killSwitch chan bool, host string) { singleFeed := nodeFeed.uprFeed defer func() { feed.wg.Done() if r := recover(); r != nil { //if feed is not closing, re-throw the panic if feed.outputClosed != true && feed.closing != true { panic(r) } else { logging.Errorf("Panic is recovered. Since feed is closed, exit gracefully") } } }() for { select { case <-nodeFeed.quit: nodeFeed.connected = false return case event, ok := <-singleFeed.C: if !ok { if singleFeed.Error != nil { logging.Errorf("go-couchbase: Upr feed from %s failed: %v", host, singleFeed.Error) } killSwitch <- true return } if feed.outputClosed == true { // someone closed the node feed logging.Infof("Node need closed, returning from forwardUprEvent") return } feed.output <- event if event.Status == gomemcached.NOT_MY_VBUCKET { logging.Infof(" Got a not my vbucket error !! ") if err := feed.bucket.Refresh(); err != nil { logging.Errorf("Unable to refresh bucket %s ", err.Error()) feed.closeNodeFeeds() return } // this will only connect to nodes that are not connected or changed // user will have to reconnect the stream if err := feed.connectToNodes(); err != nil { logging.Errorf("Unable to connect to nodes %s", err.Error()) return } } } } }
// return the list of healthy nodes func (b *Bucket) HealthyNodes() []Node { nodes := []Node{} for _, n := range b.Nodes() { if n.Status == "healthy" && n.CouchAPIBase != "" { nodes = append(nodes, n) } if n.Status != "healthy" { // log non-healthy node logging.Infof("Non-healthy node; node details:") logging.Infof("Hostname=%v, Status=%v, CouchAPIBase=%v, ThisNode=%v", n.Hostname, n.Status, n.CouchAPIBase, n.ThisNode) } } return nodes }
func slowLog(startTime time.Time, format string, args ...interface{}) { if elapsed := time.Now().Sub(startTime); elapsed > SlowServerCallWarningThreshold { pc, _, _, _ := runtime.Caller(2) caller := runtime.FuncForPC(pc).Name() logging.Infof("go-couchbase: "+format+" in "+caller+" took "+elapsed.String(), args...) } }
// Goroutine that runs the feed func (feed *TapFeed) run() { retryInterval := initialRetryInterval bucketOK := true for { // Connect to the TAP feed of each server node: if bucketOK { killSwitch, err := feed.connectToNodes() if err == nil { // Run until one of the sub-feeds fails: select { case <-killSwitch: case <-feed.quit: return } feed.closeNodeFeeds() retryInterval = initialRetryInterval } } // On error, try to refresh the bucket in case the list of nodes changed: logging.Infof("go-couchbase: TAP connection lost; reconnecting to bucket %q in %v", feed.bucket.Name, retryInterval) err := feed.bucket.Refresh() bucketOK = err == nil select { case <-time.After(retryInterval): case <-feed.quit: return } if retryInterval *= 2; retryInterval > maximumRetryInterval { retryInterval = maximumRetryInterval } } }
// PutDDoc installs a design document. func (b *Bucket) PutDDoc(docname string, value interface{}) error { var Err error maxRetries, err := b.getMaxRetries() if err != nil { return err } lastNode := START_NODE_ID for retryCount := 0; retryCount < maxRetries; retryCount++ { Err = nil ddocU, selectedNode, err := b.ddocURLNext(lastNode, docname) if err != nil { return err } lastNode = selectedNode logging.Infof(" Trying with selected node %d", selectedNode) j, err := json.Marshal(value) if err != nil { return err } req, err := http.NewRequest("PUT", ddocU, bytes.NewReader(j)) if err != nil { return err } req.Header.Set("Content-Type", "application/json") err = maybeAddAuth(req, b.authHandler(false /* bucket not yet locked */)) if err != nil { return err } res, err := doHTTPRequest(req) if err != nil { return err } if res.StatusCode != 201 { body, _ := ioutil.ReadAll(res.Body) Err = fmt.Errorf("error installing view: %v / %s", res.Status, body) logging.Errorf(" Error in PutDDOC %v. Retrying...", Err) res.Body.Close() b.Refresh() continue } res.Body.Close() break } return Err }
func (feed *UprFeed) closeNodeFeeds() { for _, f := range feed.nodeFeeds { logging.Infof(" Sending close to forwardUprEvent ") close(f.quit) f.uprFeed.Close() } feed.nodeFeeds = nil }
// GetFailoverLogs, get the failover logs for a set of vbucket ids func (b *Bucket) GetFailoverLogs(vBuckets []uint16) (FailoverLog, error) { // map vbids to their corresponding hosts vbHostList := make(map[string][]uint16) vbm := b.VBServerMap() if len(vbm.VBucketMap) < len(vBuckets) { return nil, fmt.Errorf("vbmap smaller than vbucket list: %v vs. %v", vbm.VBucketMap, vBuckets) } for _, vb := range vBuckets { masterID := vbm.VBucketMap[vb][0] master := b.getMasterNode(masterID) if master == "" { return nil, fmt.Errorf("No master found for vb %d", vb) } vbList := vbHostList[master] if vbList == nil { vbList = make([]uint16, 0) } vbList = append(vbList, vb) vbHostList[master] = vbList } failoverLogMap := make(FailoverLog) for _, serverConn := range b.getConnPools(false /* not already locked */) { vbList := vbHostList[serverConn.host] if vbList == nil { continue } mc, err := serverConn.Get() if err != nil { logging.Infof("No Free connections for vblist %v", vbList) return nil, fmt.Errorf("No Free connections for host %s", serverConn.host) } // close the connection so that it doesn't get reused for upr data // connection defer mc.Close() failoverlogs, err := mc.UprGetFailoverLog(vbList) if err != nil { return nil, fmt.Errorf("Error getting failover log %s host %s", err.Error(), serverConn.host) } for vb, log := range failoverlogs { failoverLogMap[vb] = *log } } return failoverLogMap, nil }
// GetDDoc retrieves a specific a design doc. func (b *Bucket) GetDDoc(docname string, into interface{}) error { var Err error var res *http.Response maxRetries, err := b.getMaxRetries() if err != nil { return err } lastNode := START_NODE_ID for retryCount := 0; retryCount < maxRetries; retryCount++ { Err = nil ddocU, selectedNode, err := b.ddocURLNext(lastNode, docname) if err != nil { return err } lastNode = selectedNode logging.Infof(" Trying with selected node %d", selectedNode) req, err := http.NewRequest("GET", ddocU, nil) if err != nil { return err } req.Header.Set("Content-Type", "application/json") err = maybeAddAuth(req, b.authHandler(false /* bucket not yet locked */)) if err != nil { return err } res, err = doHTTPRequest(req) if err != nil { return err } if res.StatusCode != 200 { body, _ := ioutil.ReadAll(res.Body) Err = fmt.Errorf("error reading view: %v / %s", res.Status, body) logging.Errorf(" Error in GetDDOC %v Retrying...", Err) b.Refresh() res.Body.Close() continue } defer res.Body.Close() break } if Err != nil { return Err } d := json.NewDecoder(res.Body) return d.Decode(into) }
func (b *Bucket) OPJobPoll() { ok := true for ok == true { select { case job := <-OPJobChan: pool := b.getConnPoolByHost(job.hostname, false /* bucket not already locked */) if pool == nil { errRes := &OPErrResponse{vb: job.vb, vbuuid: job.vbuuid} errRes.err = fmt.Errorf("Pool not found for host %v", job.hostname) errRes.job = job job.errorChan <- errRes continue } conn, err := pool.Get() if err != nil { errRes := &OPErrResponse{vb: job.vb, vbuuid: job.vbuuid} errRes.err = fmt.Errorf("Unable to get connection from pool %v", err) errRes.job = job job.errorChan <- errRes continue } res, err := conn.ObserveSeq(job.vb, job.vbuuid) if err != nil { errRes := &OPErrResponse{vb: job.vb, vbuuid: job.vbuuid} errRes.err = fmt.Errorf("Command failed %v", err) errRes.job = job job.errorChan <- errRes continue } pool.Return(conn) job.lastPersistedSeqNo = res.LastPersistedSeqNo job.currentSeqNo = res.CurrentSeqNo job.failover = res.Failover job.resultChan <- job case <-OPJobDone: logging.Infof("Observe Persist Poller exitting") ok = false } } wg.Done() }
func (b *Bucket) GetNodeList(vb uint16) []string { vbm := b.VBServerMap() if len(vbm.VBucketMap) < int(vb) { logging.Infof("vbmap smaller than vblist") return nil } nodes := make([]string, len(vbm.VBucketMap[vb])) for i := 0; i < len(vbm.VBucketMap[vb]); i++ { n := vbm.VBucketMap[vb][i] if n < 0 { continue } node := b.getMasterNode(n) if len(node) > 1 { nodes[i] = node } continue } return nodes }
func (b *Bucket) ObserveAndPersistPoll(vb uint16, vbuuid uint64, seqNo uint64) (err error, failover bool) { b.RLock() ds := b.ds b.RUnlock() if ds == nil { return } nj := 0 // total number of jobs resultChan := make(chan *ObservePersistJob, 10) errChan := make(chan *OPErrResponse, 10) nodes := b.GetNodeList(vb) if int(ds.Observe) > len(nodes) || int(ds.Persist) > len(nodes) { return fmt.Errorf("Not enough healthy nodes in the cluster"), false } logging.Infof("Node list %v", nodes) if ds.Observe >= ObserveReplicateOne { // create a job for each host for i := ObserveReplicateOne; i < ds.Observe+1; i++ { opJob := ObservePersistPool.Get() opJob.vb = vb opJob.vbuuid = vbuuid opJob.jobType = OBSERVE opJob.hostname = nodes[i] opJob.resultChan = resultChan opJob.errorChan = errChan OPJobChan <- opJob nj++ } } if ds.Persist >= PersistMaster { for i := PersistMaster; i < ds.Persist+1; i++ { opJob := ObservePersistPool.Get() opJob.vb = vb opJob.vbuuid = vbuuid opJob.jobType = PERSIST opJob.hostname = nodes[i] opJob.resultChan = resultChan opJob.errorChan = errChan OPJobChan <- opJob nj++ } } ok := true for ok { select { case res := <-resultChan: jobDone := false if res.failover == 0 { // no failover if res.jobType == PERSIST { if res.lastPersistedSeqNo >= seqNo { jobDone = true } } else { if res.currentSeqNo >= seqNo { jobDone = true } } if jobDone == true { nj-- ObservePersistPool.Put(res) } else { // requeue this job OPJobChan <- res } } else { // Not currently handling failover scenarios TODO nj-- ObservePersistPool.Put(res) failover = true } if nj == 0 { // done with all the jobs ok = false close(resultChan) close(errChan) } case Err := <-errChan: logging.Errorf("Error in Observe/Persist %v", Err.err) err = fmt.Errorf("Error in Observe/Persist job %v", Err.err) nj-- ObservePersistPool.Put(Err.job) if nj == 0 { close(resultChan) close(errChan) ok = false } } } return }
func (b *Bucket) doBulkGet(vb uint16, keys []string, ch chan<- map[string]*gomemcached.MCResponse, ech chan<- error) { if SlowServerCallWarningThreshold > 0 { defer slowLog(time.Now(), "call to doBulkGet(%d, %d keys)", vb, len(keys)) } rv := _STRING_MCRESPONSE_POOL.Get() attempts := 0 done := false for attempts < MaxBulkRetries && !done { if len(b.VBServerMap().VBucketMap) < int(vb) { //fatal logging.Errorf("go-couchbase: vbmap smaller than requested vbucket number. vb %d vbmap len %d", vb, len(b.VBServerMap().VBucketMap)) err := fmt.Errorf("vbmap smaller than requested vbucket") ech <- err return } masterID := b.VBServerMap().VBucketMap[vb][0] attempts++ if masterID < 0 { // fatal logging.Errorf("No master node available for vb %d", vb) err := fmt.Errorf("No master node available for vb %d", vb) ech <- err return } // This stack frame exists to ensure we can clean up // connection at a reasonable time. err := func() error { pool := b.getConnPool(masterID) conn, err := pool.Get() if err != nil { if isAuthError(err) { logging.Errorf(" Fatal Auth Error %v", err) ech <- err return err } else if isConnError(err) { // for a connection error, refresh right away b.Refresh() } logging.Infof("Pool Get returned %v", err) // retry return nil } err = conn.GetBulk(vb, keys, rv) pool.Return(conn) switch err.(type) { case *gomemcached.MCResponse: st := err.(*gomemcached.MCResponse).Status if st == gomemcached.NOT_MY_VBUCKET { b.Refresh() // retry err = nil } return err case error: if !isConnError(err) { ech <- err ch <- rv return err } else if strings.EqualFold(err.Error(), "Bounds") { // We got an out of bound error, retry the operation return nil } logging.Errorf("Connection Error: %s. Refreshing bucket", err.Error()) b.Refresh() // retry return nil } done = true return nil }() if err != nil { return } } if attempts == MaxBulkRetries { ech <- fmt.Errorf("bulkget exceeded MaxBulkRetries for vbucket %d", vb) } ch <- rv }
func (b *Bucket) UpdateBucket() error { var failures int var returnErr error for { if failures == MAX_RETRY_COUNT { logging.Errorf(" Maximum failures reached. Exiting loop...") return fmt.Errorf("Max failures reached. Last Error %v", returnErr) } nodes := b.Nodes() if len(nodes) < 1 { return fmt.Errorf("No healthy nodes found") } startNode := rand.Intn(len(nodes)) node := nodes[(startNode)%len(nodes)] streamUrl := fmt.Sprintf("http://%s/pools/default/bucketsStreaming/%s", node.Hostname, b.GetName()) logging.Infof(" Trying with %s", streamUrl) req, err := http.NewRequest("GET", streamUrl, nil) if err != nil { return err } // Lock here to avoid having pool closed under us. b.RLock() err = maybeAddAuth(req, b.pool.client.ah) b.RUnlock() if err != nil { return err } res, err := doHTTPRequestForUpdate(req) if err != nil { return err } if res.StatusCode != 200 { bod, _ := ioutil.ReadAll(io.LimitReader(res.Body, 512)) logging.Errorf("Failed to connect to host, unexpected status code: %v. Body %s", res.StatusCode, bod) res.Body.Close() returnErr = fmt.Errorf("Failed to connect to host. Status %v Body %s", res.StatusCode, bod) failures++ continue } dec := json.NewDecoder(res.Body) tmpb := &Bucket{} for { err := dec.Decode(&tmpb) if err != nil { returnErr = err res.Body.Close() break } // if we got here, reset failure count failures = 0 b.Lock() // mark all the old connection pools for deletion pools := b.getConnPools(true /* already locked */) for _, pool := range pools { if pool != nil { pool.inUse = false } } newcps := make([]*connectionPool, len(tmpb.VBSMJson.ServerList)) for i := range newcps { // get the old connection pool and check if it is still valid pool := b.getConnPoolByHost(tmpb.VBSMJson.ServerList[i], true /* bucket already locked */) if pool != nil && pool.inUse == false { // if the hostname and index is unchanged then reuse this pool newcps[i] = pool pool.inUse = true continue } // else create a new pool if b.ah != nil { newcps[i] = newConnectionPool( tmpb.VBSMJson.ServerList[i], b.ah, PoolSize, PoolOverflow) } else { newcps[i] = newConnectionPool( tmpb.VBSMJson.ServerList[i], b.authHandler(true /* bucket already locked */), PoolSize, PoolOverflow) } } b.replaceConnPools2(newcps, true /* bucket already locked */) tmpb.ah = b.ah b.vBucketServerMap = unsafe.Pointer(&tmpb.VBSMJson) b.nodeList = unsafe.Pointer(&tmpb.NodesJSON) b.Unlock() logging.Infof("Got new configuration for bucket %s", b.GetName()) } // we are here because of an error failures++ continue } return nil }