// reestablishRegion will continually attempt to reestablish a connection to a // given region func (c *Client) reestablishRegion(reg *regioninfo.Info) { // The meta client is not kept in the region client cache. if reg != c.metaRegionInfo { // This region is inaccessible, and a new client will be created, so the // client will be removed from the region client cache. c.clients.del(reg) } for { log.WithFields(log.Fields{ "Table": reg.Table, "RegionName": reg.RegionName, "StartKey": reg.StartKey, "StopKey": reg.StopKey, }).Warn("Attempting to re-establish region.") // A new context is created here because this is not specific to any // request that the user of gohbase initiated, and is instead an // internal goroutine that may be servicing any number of requests // initiated by the user. ctx, _ := context.WithTimeout(context.Background(), regionLookupTimeout) var err error if reg == c.metaRegionInfo { // If we're looking for the meta region.. err = c.locateMeta(ctx) // .. look it up in ZooKeeper. } else { // Otherwise do a normal meta lookup. _, _, err = c.locateRegion(ctx, reg.Table, reg.StartKey) } if err == nil { reg.MarkAvailable() return } // TODO: Make this configurable, or verify that it's a sane number time.Sleep(time.Millisecond * 100) } }
func (c *Client) waitOnRegion(rpc hrpc.Call, reg *regioninfo.Info) (proto.Message, error) { ch := reg.GetAvailabilityChan() if ch == nil { // WTF, this region is available? Maybe it was marked as such // since waitOnRegion was called. return c.sendRPC(rpc) } // The region is unavailable. Wait for it to become available, // or for the deadline to be exceeded. select { case <-ch: return c.sendRPC(rpc) case <-rpc.GetContext().Done(): return nil, ErrDeadline } }
func (c *Client) establishRegion(originalReg *regioninfo.Info, host string, port uint16) { var err error reg := originalReg backoff := backoffStart for { ctx, _ := context.WithTimeout(context.Background(), regionLookupTimeout) if port != 0 && err == nil { // If this isn't the admin or meta region, check if a client // for this host/port already exists if c.clientType != AdminClient && reg != c.metaRegionInfo { client := c.clients.checkForClient(host, port) if client != nil { // There's already a client, add it to the // cache and mark the new region as available. c.clients.put(reg, client) originalReg.MarkAvailable() return } } // Make this channel buffered so that if we time out we don't // block the newRegion goroutine forever. ch := make(chan newRegResult, 1) var clientType region.ClientType if c.clientType == StandardClient { clientType = region.RegionClient } else { clientType = region.MasterClient } go newRegion(ctx, ch, clientType, host, port, c.rpcQueueSize, c.flushInterval) select { case res := <-ch: if res.Err == nil { if c.clientType == AdminClient { c.adminClient = res.Client } else if reg == c.metaRegionInfo { c.metaClient = res.Client } else { c.clients.put(reg, res.Client) if reg != originalReg { // Here `reg' is guaranteed to be available, so we // must publish the region->client mapping first, // because as soon as we add it to the key->region // mapping here, concurrent readers are gonna want // to find the client. c.regions.put(reg.RegionName, reg) } } originalReg.MarkAvailable() return } else { err = res.Err } case <-ctx.Done(): err = ErrDeadline } } if err != nil { if err == TableNotFound { c.regions.del(originalReg.RegionName) originalReg.MarkAvailable() return } // This will be hit if either there was an error locating the // region, or the region was located but there was an error // connecting to it. backoff, err = sleepAndIncreaseBackoff(ctx, backoff) if err != nil { continue } } if c.clientType == AdminClient { host, port, err = c.zkLookup(ctx, zk.Master) } else if reg == c.metaRegionInfo { host, port, err = c.zkLookup(ctx, zk.Meta) } else { reg, host, port, err = c.locateRegion(ctx, originalReg.Table, originalReg.StartKey) } } }
func (c *Client) sendRPCToRegion(rpc hrpc.Call, reg *regioninfo.Info) (proto.Message, error) { // On the first sendRPC to the meta or admin regions, a goroutine must be // manually kicked off for the meta or admin region client if c.adminClient == nil && reg == c.adminRegionInfo && !c.adminRegionInfo.IsUnavailable() || c.metaClient == nil && reg == c.metaRegionInfo && !c.metaRegionInfo.IsUnavailable() { c.regionsLock.Lock() if reg == c.metaRegionInfo && !c.metaRegionInfo.IsUnavailable() || reg == c.adminRegionInfo && !c.adminRegionInfo.IsUnavailable() { reg.MarkUnavailable() go c.reestablishRegion(reg) } c.regionsLock.Unlock() } // The region was in the cache, check // if the region is marked as available if !reg.IsUnavailable() { // The region is available rpc.SetRegion(reg) // Queue the RPC to be sent to the region client := c.clientFor(reg) var err error if client == nil { err = errors.New("no client for this region") } else { err = client.QueueRPC(rpc) } if err != nil { // There was an error queueing the RPC. // Mark the region as unavailable. first := reg.MarkUnavailable() // If this was the first goroutine to mark the region as // unavailable, start a goroutine to reestablish a connection if first { go c.reestablishRegion(reg) } // Block until the region becomes available. return c.waitOnRegion(rpc, reg) } // Wait for the response var res hrpc.RPCResult select { case res = <-rpc.GetResultChan(): case <-rpc.GetContext().Done(): return nil, ErrDeadline } // Check for errors if _, ok := res.Error.(region.RetryableError); ok { // There's an error specific to this region, but // our region client is fine. Mark this region as // unavailable (as opposed to all regions sharing // the client), and start a goroutine to reestablish // it. first := reg.MarkUnavailable() if first { go c.reestablishRegion(reg) } if reg != c.metaRegionInfo && reg != c.adminRegionInfo { // The client won't be in the cache if this is the // meta or admin region c.clients.del(reg) } return c.waitOnRegion(rpc, reg) } else if _, ok := res.Error.(region.UnrecoverableError); ok { // If it was an unrecoverable error, the region client is // considered dead. if reg == c.metaRegionInfo || reg == c.adminRegionInfo { // If this is the admin client or the meta table, mark the // region as unavailable and start up a goroutine to // reconnect if it wasn't already marked as such. first := reg.MarkUnavailable() if first { go c.reestablishRegion(reg) } } else { // Else this is a normal region. Mark all the regions // sharing this region's client as unavailable, and start // a goroutine to reconnect for each of them. downregions := c.clients.clientDown(reg) for _, downreg := range downregions { go c.reestablishRegion(downreg) } } // Fall through to the case of the region being unavailable, // which will result in blocking until it's available again. return c.waitOnRegion(rpc, reg) } else { // RPC was successfully sent, or an unknown type of error // occurred. In either case, return the results. return res.Msg, res.Error } } return c.waitOnRegion(rpc, reg) }