// rpcCallTablet wil execute the RPC on the remote server. func (client *GoRpcTabletManagerClient) rpcCallTablet(ctx context.Context, tablet *topo.TabletInfo, name string, args, reply interface{}) error { // create the RPC client, using ctx.Deadline if set, or no timeout. var connectTimeout time.Duration deadline, ok := ctx.Deadline() if ok { connectTimeout = deadline.Sub(time.Now()) if connectTimeout < 0 { return fmt.Errorf("Timeout connecting to TabletManager.%v on %v", name, tablet.Alias) } } rpcClient, err := bsonrpc.DialHTTP("tcp", tablet.Addr(), connectTimeout, nil) if err != nil { return fmt.Errorf("RPC error for %v: %v", tablet.Alias, err.Error()) } defer rpcClient.Close() // use the context Done() channel. Will handle context timeout. call := rpcClient.Go(ctx, "TabletManager."+name, args, reply, nil) select { case <-ctx.Done(): return fmt.Errorf("Timeout waiting for TabletManager.%v to %v", name, tablet.Alias) case <-call.Done: if call.Error != nil { return fmt.Errorf("Remote error for %v: %v", tablet.Alias, call.Error.Error()) } else { return nil } } }
func (client *GoRpcTabletManagerClient) Snapshot(ctx context.Context, tablet *topo.TabletInfo, sa *actionnode.SnapshotArgs, waitTime time.Duration) (<-chan *logutil.LoggerEvent, tmclient.SnapshotReplyFunc, error) { rpcClient, err := bsonrpc.DialHTTP("tcp", tablet.Addr(), waitTime, nil) if err != nil { return nil, nil, err } logstream := make(chan *logutil.LoggerEvent, 10) rpcstream := make(chan *gorpcproto.SnapshotStreamingReply, 10) result := &actionnode.SnapshotReply{} c := rpcClient.StreamGo("TabletManager.Snapshot", sa, rpcstream) go func() { for ssr := range rpcstream { if ssr.Log != nil { logstream <- ssr.Log } if ssr.Result != nil { *result = *ssr.Result } } close(logstream) rpcClient.Close() }() return logstream, func() (*actionnode.SnapshotReply, error) { return result, c.Error }, nil }
// runSqlCommands will send the sql commands to the remote tablet. func runSqlCommands(wr *wrangler.Wrangler, ti *topo.TabletInfo, commands []string, abort chan struct{}, disableBinLogs bool) error { for _, command := range commands { command, err := fillStringTemplate(command, map[string]string{"DatabaseName": ti.DbName()}) if err != nil { return fmt.Errorf("fillStringTemplate failed: %v", err) } ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second) _, err = wr.TabletManagerClient().ExecuteFetch(ctx, ti, command, 0, false, disableBinLogs) if err != nil { return err } cancel() // check on abort select { case <-abort: return nil default: break } } return nil }
// updateReplicationGraphForPromotedSlave makes sure the newly promoted slave // is correctly represented in the replication graph func (agent *ActionAgent) updateReplicationGraphForPromotedSlave(ctx context.Context, tablet *topo.TabletInfo) error { // Update tablet regardless - trend towards consistency. tablet.State = topo.STATE_READ_WRITE tablet.Type = topo.TYPE_MASTER tablet.Parent.Cell = "" tablet.Parent.Uid = topo.NO_TABLET tablet.Health = nil err := topo.UpdateTablet(ctx, agent.TopoServer, tablet) if err != nil { return err } // NOTE(msolomon) A serving graph update is required, but in // order for the shard to be consistent the old master must be // scrapped first. That is externally coordinated by the // wrangler reparent action. // Insert the new tablet location in the replication graph now that // we've updated the tablet. err = topo.UpdateTabletReplicationData(ctx, agent.TopoServer, tablet.Tablet) if err != nil && err != topo.ErrNodeExists { return err } return nil }
func (wr *Wrangler) checkMasterElect(ti *topo.TabletInfo) error { // Check the master-elect is fit for duty - call out for hardware checks. // if the server was already serving live traffic, it's probably good if ti.IsInServingGraph() { return nil } return wr.ExecuteOptionalTabletInfoHook(ti, hook.NewSimpleHook("preflight_serving_type")) }
func newTabletNodeFromTabletInfo(ti *topo.TabletInfo) *TabletNode { if err := ti.ValidatePortmap(); err != nil { log.Errorf("ValidatePortmap(%v): %v", ti.Alias, err) } return &TabletNode{ Host: ti.Hostname, Port: ti.Portmap["vt"], Alias: ti.Alias, } }
// applySqlShard applies a given SQL change on a given tablet alias. It allows executing arbitrary // SQL statements, but doesn't return any results, so it's only useful for SQL statements // that would be run for their effects (e.g., CREATE). // It works by applying the SQL statement on the shard's master tablet with replication turned on. // Thus it should be used only for changes that can be applies on a live instance without causing issues; // it shouldn't be used for anything that will require a pivot. // The SQL statement string is expected to have {{.DatabaseName}} in place of the actual db name. func (wr *Wrangler) applySqlShard(tabletInfo *topo.TabletInfo, change string) error { filledChange, err := fillStringTemplate(change, map[string]string{"DatabaseName": tabletInfo.DbName()}) if err != nil { return fmt.Errorf("fillStringTemplate failed: %v", err) } ctx, cancel := context.WithTimeout(wr.ctx, 30*time.Second) defer cancel() // Need to make sure that we enable binlog, since we're only applying the statement on masters. _, err = wr.tmc.ExecuteFetch(ctx, tabletInfo, filledChange, 0, false, false) return err }
func (client *GoRpcTabletManagerClient) Restore(ctx context.Context, tablet *topo.TabletInfo, sa *actionnode.RestoreArgs, waitTime time.Duration) (<-chan *logutil.LoggerEvent, tmclient.ErrFunc, error) { rpcClient, err := bsonrpc.DialHTTP("tcp", tablet.Addr(), waitTime, nil) if err != nil { return nil, nil, err } logstream := make(chan *logutil.LoggerEvent, 10) c := rpcClient.StreamGo("TabletManager.Restore", sa, logstream) return logstream, func() error { rpcClient.Close() return c.Error }, nil }
func (zkts *Server) UpdateTablet(tablet *topo.TabletInfo, existingVersion int64) (int64, error) { zkTabletPath := TabletPathForAlias(tablet.Alias) stat, err := zkts.zconn.Set(zkTabletPath, tablet.Json(), int(existingVersion)) if err != nil { if zookeeper.IsError(err, zookeeper.ZBADVERSION) { err = topo.ErrBadVersion } else if zookeeper.IsError(err, zookeeper.ZNONODE) { err = topo.ErrNoNode } return 0, err } event.Dispatch(&events.TabletChange{ Tablet: *tablet.Tablet, Status: "updated", }) return int64(stat.Version()), nil }
// executeFetchLoop loops over the provided insertChannel // and sends the commands to the provided tablet. func executeFetchLoop(wr *wrangler.Wrangler, ti *topo.TabletInfo, insertChannel chan string, abort chan struct{}, disableBinLogs bool) error { for { select { case cmd, ok := <-insertChannel: if !ok { // no more to read, we're done return nil } cmd = "INSERT INTO `" + ti.DbName() + "`." + cmd ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second) _, err := wr.TabletManagerClient().ExecuteFetch(ctx, ti, cmd, 0, false, disableBinLogs) if err != nil { return fmt.Errorf("ExecuteFetch failed: %v", err) } cancel() case <-abort: // FIXME(alainjobart): note this select case // could be starved here, and we might miss // the abort in some corner cases. return nil } } }
// change a tablet type to RESTORE and set all the other arguments. // from now on, we can go to: // - back to IDLE if we don't use the tablet at all (after for instance // a successful ReserveForRestore but a failed Snapshot) // - to SCRAP if something in the process on the target host fails // - to SPARE if the clone works func (agent *ActionAgent) changeTypeToRestore(ctx context.Context, tablet, sourceTablet *topo.TabletInfo, parentAlias topo.TabletAlias, keyRange key.KeyRange) error { // run the optional preflight_assigned hook hk := hook.NewSimpleHook("preflight_assigned") topotools.ConfigureTabletHook(hk, agent.TabletAlias) if err := hk.ExecuteOptional(); err != nil { return err } // change the type tablet.Parent = parentAlias tablet.Keyspace = sourceTablet.Keyspace tablet.Shard = sourceTablet.Shard tablet.Type = topo.TYPE_RESTORE tablet.KeyRange = keyRange tablet.DbNameOverride = sourceTablet.DbNameOverride if err := topo.UpdateTablet(ctx, agent.TopoServer, tablet); err != nil { return err } // and create the replication graph items return topo.UpdateTabletReplicationData(ctx, agent.TopoServer, tablet.Tablet) }
// findChunks returns an array of chunks to use for splitting up a table // into multiple data chunks. It only works for tables with a primary key // (and the primary key first column is an integer type). // The array will always look like: // "", "value1", "value2", "" // A non-split tablet will just return: // "", "" func findChunks(wr *wrangler.Wrangler, ti *topo.TabletInfo, td *myproto.TableDefinition, minTableSizeForSplit uint64, sourceReaderCount int) ([]string, error) { result := []string{"", ""} // eliminate a few cases we don't split tables for if len(td.PrimaryKeyColumns) == 0 { // no primary key, what can we do? return result, nil } if td.DataLength < minTableSizeForSplit { // table is too small to split up return result, nil } // get the min and max of the leading column of the primary key query := fmt.Sprintf("SELECT MIN(%v), MAX(%v) FROM %v.%v", td.PrimaryKeyColumns[0], td.PrimaryKeyColumns[0], ti.DbName(), td.Name) ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second) qr, err := wr.TabletManagerClient().ExecuteFetch(ctx, ti, query, 1, true, false) if err != nil { wr.Logger().Infof("Not splitting table %v into multiple chunks: %v", td.Name, err) return result, nil } cancel() if len(qr.Rows) != 1 { wr.Logger().Infof("Not splitting table %v into multiple chunks, cannot get min and max", td.Name) return result, nil } if qr.Rows[0][0].IsNull() || qr.Rows[0][1].IsNull() { wr.Logger().Infof("Not splitting table %v into multiple chunks, min or max is NULL: %v %v", td.Name, qr.Rows[0][0], qr.Rows[0][1]) return result, nil } switch qr.Fields[0].Type { case mproto.VT_TINY, mproto.VT_SHORT, mproto.VT_LONG, mproto.VT_LONGLONG, mproto.VT_INT24: minNumeric := sqltypes.MakeNumeric(qr.Rows[0][0].Raw()) maxNumeric := sqltypes.MakeNumeric(qr.Rows[0][1].Raw()) if qr.Rows[0][0].Raw()[0] == '-' { // signed values, use int64 min, err := minNumeric.ParseInt64() if err != nil { wr.Logger().Infof("Not splitting table %v into multiple chunks, cannot convert min: %v %v", td.Name, minNumeric, err) return result, nil } max, err := maxNumeric.ParseInt64() if err != nil { wr.Logger().Infof("Not splitting table %v into multiple chunks, cannot convert max: %v %v", td.Name, maxNumeric, err) return result, nil } interval := (max - min) / int64(sourceReaderCount) if interval == 0 { wr.Logger().Infof("Not splitting table %v into multiple chunks, interval=0: %v %v", td.Name, max, min) return result, nil } result = make([]string, sourceReaderCount+1) result[0] = "" result[sourceReaderCount] = "" for i := int64(1); i < int64(sourceReaderCount); i++ { result[i] = fmt.Sprintf("%v", min+interval*i) } return result, nil } // unsigned values, use uint64 min, err := minNumeric.ParseUint64() if err != nil { wr.Logger().Infof("Not splitting table %v into multiple chunks, cannot convert min: %v %v", td.Name, minNumeric, err) return result, nil } max, err := maxNumeric.ParseUint64() if err != nil { wr.Logger().Infof("Not splitting table %v into multiple chunks, cannot convert max: %v %v", td.Name, maxNumeric, err) return result, nil } interval := (max - min) / uint64(sourceReaderCount) if interval == 0 { wr.Logger().Infof("Not splitting table %v into multiple chunks, interval=0: %v %v", td.Name, max, min) return result, nil } result = make([]string, sourceReaderCount+1) result[0] = "" result[sourceReaderCount] = "" for i := uint64(1); i < uint64(sourceReaderCount); i++ { result[i] = fmt.Sprintf("%v", min+interval*i) } return result, nil case mproto.VT_FLOAT, mproto.VT_DOUBLE: min, err := strconv.ParseFloat(qr.Rows[0][0].String(), 64) if err != nil { wr.Logger().Infof("Not splitting table %v into multiple chunks, cannot convert min: %v %v", td.Name, qr.Rows[0][0], err) return result, nil } max, err := strconv.ParseFloat(qr.Rows[0][1].String(), 64) if err != nil { wr.Logger().Infof("Not splitting table %v into multiple chunks, cannot convert max: %v %v", td.Name, qr.Rows[0][1].String(), err) return result, nil } interval := (max - min) / float64(sourceReaderCount) if interval == 0 { wr.Logger().Infof("Not splitting table %v into multiple chunks, interval=0: %v %v", td.Name, max, min) return result, nil } result = make([]string, sourceReaderCount+1) result[0] = "" result[sourceReaderCount] = "" for i := 1; i < sourceReaderCount; i++ { result[i] = fmt.Sprintf("%v", min+interval*float64(i)) } return result, nil } wr.Logger().Infof("Not splitting table %v into multiple chunks, primary key not numeric", td.Name) return result, nil }