Пример #1
0
// Goroutine that runs the feed
func (feed *UprFeed) run() {
	retryInterval := initialRetryInterval
	bucketOK := true
	for {
		// Connect to the UPR feed of each server node:
		if bucketOK {
			// Run until one of the sub-feeds fails:
			select {
			case <-feed.killSwitch:
			case <-feed.quit:
				return
			}
			//feed.closeNodeFeeds()
			retryInterval = initialRetryInterval
		}

		if feed.closing == true {
			// we have been asked to shut down
			return
		}

		// On error, try to refresh the bucket in case the list of nodes changed:
		logging.Infof("go-couchbase: UPR connection lost; reconnecting to bucket %q in %v",
			feed.bucket.Name, retryInterval)

		if err := feed.bucket.Refresh(); err != nil {
			// if we fail to refresh the bucket, exit the feed
			// MB-14917
			logging.Infof("Unable to refresh bucket %s ", err.Error())
			close(feed.output)
			feed.outputClosed = true
			feed.closeNodeFeeds()
			return
		}

		// this will only connect to nodes that are not connected or changed
		// user will have to reconnect the stream
		err := feed.connectToNodes()
		if err != nil {
			logging.Infof("Unable to connect to nodes..exit ")
			close(feed.output)
			feed.outputClosed = true
			feed.closeNodeFeeds()
			return
		}
		bucketOK = err == nil

		select {
		case <-time.After(retryInterval):
		case <-feed.quit:
			return
		}
		if retryInterval *= 2; retryInterval > maximumRetryInterval {
			retryInterval = maximumRetryInterval
		}
	}
}
Пример #2
0
// Goroutine that forwards Upr events from a single node's feed to the aggregate feed.
func (feed *UprFeed) forwardUprEvents(nodeFeed *FeedInfo, killSwitch chan bool, host string) {
	singleFeed := nodeFeed.uprFeed

	defer func() {
		feed.wg.Done()
		if r := recover(); r != nil {
			//if feed is not closing, re-throw the panic
			if feed.outputClosed != true && feed.closing != true {
				panic(r)
			} else {
				logging.Errorf("Panic is recovered. Since feed is closed, exit gracefully")

			}
		}
	}()

	for {
		select {
		case <-nodeFeed.quit:
			nodeFeed.connected = false
			return

		case event, ok := <-singleFeed.C:
			if !ok {
				if singleFeed.Error != nil {
					logging.Errorf("go-couchbase: Upr feed from %s failed: %v", host, singleFeed.Error)
				}
				killSwitch <- true
				return
			}
			if feed.outputClosed == true {
				// someone closed the node feed
				logging.Infof("Node need closed, returning from forwardUprEvent")
				return
			}
			feed.output <- event
			if event.Status == gomemcached.NOT_MY_VBUCKET {
				logging.Infof(" Got a not my vbucket error !! ")
				if err := feed.bucket.Refresh(); err != nil {
					logging.Errorf("Unable to refresh bucket %s ", err.Error())
					feed.closeNodeFeeds()
					return
				}
				// this will only connect to nodes that are not connected or changed
				// user will have to reconnect the stream
				if err := feed.connectToNodes(); err != nil {
					logging.Errorf("Unable to connect to nodes %s", err.Error())
					return
				}

			}
		}
	}
}
Пример #3
0
// return the list of healthy nodes
func (b *Bucket) HealthyNodes() []Node {
	nodes := []Node{}

	for _, n := range b.Nodes() {
		if n.Status == "healthy" && n.CouchAPIBase != "" {
			nodes = append(nodes, n)
		}
		if n.Status != "healthy" { // log non-healthy node
			logging.Infof("Non-healthy node; node details:")
			logging.Infof("Hostname=%v, Status=%v, CouchAPIBase=%v, ThisNode=%v", n.Hostname, n.Status, n.CouchAPIBase, n.ThisNode)
		}
	}

	return nodes
}
Пример #4
0
func slowLog(startTime time.Time, format string, args ...interface{}) {
	if elapsed := time.Now().Sub(startTime); elapsed > SlowServerCallWarningThreshold {
		pc, _, _, _ := runtime.Caller(2)
		caller := runtime.FuncForPC(pc).Name()
		logging.Infof("go-couchbase: "+format+" in "+caller+" took "+elapsed.String(), args...)
	}
}
Пример #5
0
// Goroutine that runs the feed
func (feed *TapFeed) run() {
	retryInterval := initialRetryInterval
	bucketOK := true
	for {
		// Connect to the TAP feed of each server node:
		if bucketOK {
			killSwitch, err := feed.connectToNodes()
			if err == nil {
				// Run until one of the sub-feeds fails:
				select {
				case <-killSwitch:
				case <-feed.quit:
					return
				}
				feed.closeNodeFeeds()
				retryInterval = initialRetryInterval
			}
		}

		// On error, try to refresh the bucket in case the list of nodes changed:
		logging.Infof("go-couchbase: TAP connection lost; reconnecting to bucket %q in %v",
			feed.bucket.Name, retryInterval)
		err := feed.bucket.Refresh()
		bucketOK = err == nil

		select {
		case <-time.After(retryInterval):
		case <-feed.quit:
			return
		}
		if retryInterval *= 2; retryInterval > maximumRetryInterval {
			retryInterval = maximumRetryInterval
		}
	}
}
Пример #6
0
// PutDDoc installs a design document.
func (b *Bucket) PutDDoc(docname string, value interface{}) error {

	var Err error

	maxRetries, err := b.getMaxRetries()
	if err != nil {
		return err
	}

	lastNode := START_NODE_ID

	for retryCount := 0; retryCount < maxRetries; retryCount++ {

		Err = nil

		ddocU, selectedNode, err := b.ddocURLNext(lastNode, docname)
		if err != nil {
			return err
		}

		lastNode = selectedNode

		logging.Infof(" Trying with selected node %d", selectedNode)
		j, err := json.Marshal(value)
		if err != nil {
			return err
		}

		req, err := http.NewRequest("PUT", ddocU, bytes.NewReader(j))
		if err != nil {
			return err
		}
		req.Header.Set("Content-Type", "application/json")
		err = maybeAddAuth(req, b.authHandler(false /* bucket not yet locked */))
		if err != nil {
			return err
		}

		res, err := doHTTPRequest(req)
		if err != nil {
			return err
		}

		if res.StatusCode != 201 {
			body, _ := ioutil.ReadAll(res.Body)
			Err = fmt.Errorf("error installing view: %v / %s",
				res.Status, body)
			logging.Errorf(" Error in PutDDOC %v. Retrying...", Err)
			res.Body.Close()
			b.Refresh()
			continue
		}

		res.Body.Close()
		break
	}

	return Err
}
Пример #7
0
func (feed *UprFeed) closeNodeFeeds() {
	for _, f := range feed.nodeFeeds {
		logging.Infof(" Sending close to forwardUprEvent ")
		close(f.quit)
		f.uprFeed.Close()
	}
	feed.nodeFeeds = nil
}
Пример #8
0
// GetFailoverLogs, get the failover logs for a set of vbucket ids
func (b *Bucket) GetFailoverLogs(vBuckets []uint16) (FailoverLog, error) {

	// map vbids to their corresponding hosts
	vbHostList := make(map[string][]uint16)
	vbm := b.VBServerMap()
	if len(vbm.VBucketMap) < len(vBuckets) {
		return nil, fmt.Errorf("vbmap smaller than vbucket list: %v vs. %v",
			vbm.VBucketMap, vBuckets)
	}

	for _, vb := range vBuckets {
		masterID := vbm.VBucketMap[vb][0]
		master := b.getMasterNode(masterID)
		if master == "" {
			return nil, fmt.Errorf("No master found for vb %d", vb)
		}

		vbList := vbHostList[master]
		if vbList == nil {
			vbList = make([]uint16, 0)
		}
		vbList = append(vbList, vb)
		vbHostList[master] = vbList
	}

	failoverLogMap := make(FailoverLog)
	for _, serverConn := range b.getConnPools(false /* not already locked */) {

		vbList := vbHostList[serverConn.host]
		if vbList == nil {
			continue
		}

		mc, err := serverConn.Get()
		if err != nil {
			logging.Infof("No Free connections for vblist %v", vbList)
			return nil, fmt.Errorf("No Free connections for host %s",
				serverConn.host)

		}
		// close the connection so that it doesn't get reused for upr data
		// connection
		defer mc.Close()
		failoverlogs, err := mc.UprGetFailoverLog(vbList)
		if err != nil {
			return nil, fmt.Errorf("Error getting failover log %s host %s",
				err.Error(), serverConn.host)

		}

		for vb, log := range failoverlogs {
			failoverLogMap[vb] = *log
		}
	}

	return failoverLogMap, nil
}
Пример #9
0
// GetDDoc retrieves a specific a design doc.
func (b *Bucket) GetDDoc(docname string, into interface{}) error {
	var Err error
	var res *http.Response

	maxRetries, err := b.getMaxRetries()
	if err != nil {
		return err
	}

	lastNode := START_NODE_ID
	for retryCount := 0; retryCount < maxRetries; retryCount++ {

		Err = nil
		ddocU, selectedNode, err := b.ddocURLNext(lastNode, docname)
		if err != nil {
			return err
		}

		lastNode = selectedNode
		logging.Infof(" Trying with selected node %d", selectedNode)

		req, err := http.NewRequest("GET", ddocU, nil)
		if err != nil {
			return err
		}
		req.Header.Set("Content-Type", "application/json")
		err = maybeAddAuth(req, b.authHandler(false /* bucket not yet locked */))
		if err != nil {
			return err
		}

		res, err = doHTTPRequest(req)
		if err != nil {
			return err
		}
		if res.StatusCode != 200 {
			body, _ := ioutil.ReadAll(res.Body)
			Err = fmt.Errorf("error reading view: %v / %s",
				res.Status, body)
			logging.Errorf(" Error in GetDDOC %v Retrying...", Err)
			b.Refresh()
			res.Body.Close()
			continue
		}
		defer res.Body.Close()
		break
	}

	if Err != nil {
		return Err
	}

	d := json.NewDecoder(res.Body)
	return d.Decode(into)
}
Пример #10
0
func (b *Bucket) OPJobPoll() {

	ok := true
	for ok == true {
		select {
		case job := <-OPJobChan:
			pool := b.getConnPoolByHost(job.hostname, false /* bucket not already locked */)
			if pool == nil {
				errRes := &OPErrResponse{vb: job.vb, vbuuid: job.vbuuid}
				errRes.err = fmt.Errorf("Pool not found for host %v", job.hostname)
				errRes.job = job
				job.errorChan <- errRes
				continue
			}
			conn, err := pool.Get()
			if err != nil {
				errRes := &OPErrResponse{vb: job.vb, vbuuid: job.vbuuid}
				errRes.err = fmt.Errorf("Unable to get connection from pool %v", err)
				errRes.job = job
				job.errorChan <- errRes
				continue
			}

			res, err := conn.ObserveSeq(job.vb, job.vbuuid)
			if err != nil {
				errRes := &OPErrResponse{vb: job.vb, vbuuid: job.vbuuid}
				errRes.err = fmt.Errorf("Command failed %v", err)
				errRes.job = job
				job.errorChan <- errRes
				continue

			}
			pool.Return(conn)
			job.lastPersistedSeqNo = res.LastPersistedSeqNo
			job.currentSeqNo = res.CurrentSeqNo
			job.failover = res.Failover

			job.resultChan <- job
		case <-OPJobDone:
			logging.Infof("Observe Persist Poller exitting")
			ok = false
		}
	}
	wg.Done()
}
Пример #11
0
func (b *Bucket) GetNodeList(vb uint16) []string {

	vbm := b.VBServerMap()
	if len(vbm.VBucketMap) < int(vb) {
		logging.Infof("vbmap smaller than vblist")
		return nil
	}

	nodes := make([]string, len(vbm.VBucketMap[vb]))
	for i := 0; i < len(vbm.VBucketMap[vb]); i++ {
		n := vbm.VBucketMap[vb][i]
		if n < 0 {
			continue
		}

		node := b.getMasterNode(n)
		if len(node) > 1 {
			nodes[i] = node
		}
		continue

	}
	return nodes
}
Пример #12
0
func (b *Bucket) ObserveAndPersistPoll(vb uint16, vbuuid uint64, seqNo uint64) (err error, failover bool) {
	b.RLock()
	ds := b.ds
	b.RUnlock()

	if ds == nil {
		return
	}

	nj := 0 // total number of jobs
	resultChan := make(chan *ObservePersistJob, 10)
	errChan := make(chan *OPErrResponse, 10)

	nodes := b.GetNodeList(vb)
	if int(ds.Observe) > len(nodes) || int(ds.Persist) > len(nodes) {
		return fmt.Errorf("Not enough healthy nodes in the cluster"), false
	}

	logging.Infof("Node list %v", nodes)

	if ds.Observe >= ObserveReplicateOne {
		// create a job for each host
		for i := ObserveReplicateOne; i < ds.Observe+1; i++ {
			opJob := ObservePersistPool.Get()
			opJob.vb = vb
			opJob.vbuuid = vbuuid
			opJob.jobType = OBSERVE
			opJob.hostname = nodes[i]
			opJob.resultChan = resultChan
			opJob.errorChan = errChan

			OPJobChan <- opJob
			nj++

		}
	}

	if ds.Persist >= PersistMaster {
		for i := PersistMaster; i < ds.Persist+1; i++ {
			opJob := ObservePersistPool.Get()
			opJob.vb = vb
			opJob.vbuuid = vbuuid
			opJob.jobType = PERSIST
			opJob.hostname = nodes[i]
			opJob.resultChan = resultChan
			opJob.errorChan = errChan

			OPJobChan <- opJob
			nj++

		}
	}

	ok := true
	for ok {
		select {
		case res := <-resultChan:
			jobDone := false
			if res.failover == 0 {
				// no failover
				if res.jobType == PERSIST {
					if res.lastPersistedSeqNo >= seqNo {
						jobDone = true
					}

				} else {
					if res.currentSeqNo >= seqNo {
						jobDone = true
					}
				}

				if jobDone == true {
					nj--
					ObservePersistPool.Put(res)
				} else {
					// requeue this job
					OPJobChan <- res
				}

			} else {
				// Not currently handling failover scenarios TODO
				nj--
				ObservePersistPool.Put(res)
				failover = true
			}

			if nj == 0 {
				// done with all the jobs
				ok = false
				close(resultChan)
				close(errChan)
			}

		case Err := <-errChan:
			logging.Errorf("Error in Observe/Persist %v", Err.err)
			err = fmt.Errorf("Error in Observe/Persist job %v", Err.err)
			nj--
			ObservePersistPool.Put(Err.job)
			if nj == 0 {
				close(resultChan)
				close(errChan)
				ok = false
			}
		}
	}

	return
}
Пример #13
0
func (b *Bucket) doBulkGet(vb uint16, keys []string,
	ch chan<- map[string]*gomemcached.MCResponse, ech chan<- error) {
	if SlowServerCallWarningThreshold > 0 {
		defer slowLog(time.Now(), "call to doBulkGet(%d, %d keys)", vb, len(keys))
	}

	rv := _STRING_MCRESPONSE_POOL.Get()
	attempts := 0
	done := false
	for attempts < MaxBulkRetries && !done {

		if len(b.VBServerMap().VBucketMap) < int(vb) {
			//fatal
			logging.Errorf("go-couchbase: vbmap smaller than requested vbucket number. vb %d vbmap len %d", vb, len(b.VBServerMap().VBucketMap))
			err := fmt.Errorf("vbmap smaller than requested vbucket")
			ech <- err
			return
		}

		masterID := b.VBServerMap().VBucketMap[vb][0]
		attempts++

		if masterID < 0 {
			// fatal
			logging.Errorf("No master node available for vb %d", vb)
			err := fmt.Errorf("No master node available for vb %d", vb)
			ech <- err
			return
		}

		// This stack frame exists to ensure we can clean up
		// connection at a reasonable time.
		err := func() error {
			pool := b.getConnPool(masterID)
			conn, err := pool.Get()
			if err != nil {
				if isAuthError(err) {
					logging.Errorf(" Fatal Auth Error %v", err)
					ech <- err
					return err
				} else if isConnError(err) {
					// for a connection error, refresh right away
					b.Refresh()
				}
				logging.Infof("Pool Get returned %v", err)
				// retry
				return nil
			}

			err = conn.GetBulk(vb, keys, rv)
			pool.Return(conn)

			switch err.(type) {
			case *gomemcached.MCResponse:
				st := err.(*gomemcached.MCResponse).Status
				if st == gomemcached.NOT_MY_VBUCKET {
					b.Refresh()
					// retry
					err = nil
				}
				return err
			case error:
				if !isConnError(err) {
					ech <- err
					ch <- rv
					return err
				} else if strings.EqualFold(err.Error(), "Bounds") {
					// We got an out of bound error, retry the operation
					return nil
				}

				logging.Errorf("Connection Error: %s. Refreshing bucket", err.Error())
				b.Refresh()
				// retry
				return nil
			}

			done = true
			return nil
		}()

		if err != nil {
			return
		}
	}

	if attempts == MaxBulkRetries {
		ech <- fmt.Errorf("bulkget exceeded MaxBulkRetries for vbucket %d", vb)
	}

	ch <- rv
}
Пример #14
0
func (b *Bucket) UpdateBucket() error {

	var failures int
	var returnErr error

	for {

		if failures == MAX_RETRY_COUNT {
			logging.Errorf(" Maximum failures reached. Exiting loop...")
			return fmt.Errorf("Max failures reached. Last Error %v", returnErr)
		}

		nodes := b.Nodes()
		if len(nodes) < 1 {
			return fmt.Errorf("No healthy nodes found")
		}

		startNode := rand.Intn(len(nodes))
		node := nodes[(startNode)%len(nodes)]

		streamUrl := fmt.Sprintf("http://%s/pools/default/bucketsStreaming/%s", node.Hostname, b.GetName())
		logging.Infof(" Trying with %s", streamUrl)
		req, err := http.NewRequest("GET", streamUrl, nil)
		if err != nil {
			return err
		}

		// Lock here to avoid having pool closed under us.
		b.RLock()
		err = maybeAddAuth(req, b.pool.client.ah)
		b.RUnlock()
		if err != nil {
			return err
		}

		res, err := doHTTPRequestForUpdate(req)
		if err != nil {
			return err
		}

		if res.StatusCode != 200 {
			bod, _ := ioutil.ReadAll(io.LimitReader(res.Body, 512))
			logging.Errorf("Failed to connect to host, unexpected status code: %v. Body %s", res.StatusCode, bod)
			res.Body.Close()
			returnErr = fmt.Errorf("Failed to connect to host. Status %v Body %s", res.StatusCode, bod)
			failures++
			continue
		}

		dec := json.NewDecoder(res.Body)

		tmpb := &Bucket{}
		for {

			err := dec.Decode(&tmpb)
			if err != nil {
				returnErr = err
				res.Body.Close()
				break
			}

			// if we got here, reset failure count
			failures = 0
			b.Lock()

			// mark all the old connection pools for deletion
			pools := b.getConnPools(true /* already locked */)
			for _, pool := range pools {
				if pool != nil {
					pool.inUse = false
				}
			}

			newcps := make([]*connectionPool, len(tmpb.VBSMJson.ServerList))
			for i := range newcps {
				// get the old connection pool and check if it is still valid
				pool := b.getConnPoolByHost(tmpb.VBSMJson.ServerList[i], true /* bucket already locked */)
				if pool != nil && pool.inUse == false {
					// if the hostname and index is unchanged then reuse this pool
					newcps[i] = pool
					pool.inUse = true
					continue
				}
				// else create a new pool
				if b.ah != nil {
					newcps[i] = newConnectionPool(
						tmpb.VBSMJson.ServerList[i],
						b.ah, PoolSize, PoolOverflow)

				} else {
					newcps[i] = newConnectionPool(
						tmpb.VBSMJson.ServerList[i],
						b.authHandler(true /* bucket already locked */), PoolSize, PoolOverflow)
				}
			}

			b.replaceConnPools2(newcps, true /* bucket already locked */)

			tmpb.ah = b.ah
			b.vBucketServerMap = unsafe.Pointer(&tmpb.VBSMJson)
			b.nodeList = unsafe.Pointer(&tmpb.NodesJSON)
			b.Unlock()

			logging.Infof("Got new configuration for bucket %s", b.GetName())

		}
		// we are here because of an error
		failures++
		continue

	}
	return nil
}