func commandRestoreFromBackup(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { if err := subFlags.Parse(args); err != nil { return err } if subFlags.NArg() != 1 { return fmt.Errorf("The RestoreFromBackup command requires the <tablet alias> argument.") } tabletAlias, err := topoproto.ParseTabletAlias(subFlags.Arg(0)) if err != nil { return err } tabletInfo, err := wr.TopoServer().GetTablet(ctx, tabletAlias) if err != nil { return err } stream, err := wr.TabletManagerClient().RestoreFromBackup(ctx, tabletInfo.Tablet) if err != nil { return err } for { e, err := stream.Recv() switch err { case nil: logutil.LogEvent(wr.Logger(), e) case io.EOF: return nil default: return err } } }
func commandVtTabletBegin(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { connectTimeout := subFlags.Duration("connect_timeout", 30*time.Second, "Connection timeout for vttablet client") if err := subFlags.Parse(args); err != nil { return err } if subFlags.NArg() != 1 { return fmt.Errorf("the <tablet_alias> argument is required for the VtTabletBegin command") } tabletAlias, err := topoproto.ParseTabletAlias(subFlags.Arg(0)) if err != nil { return err } tabletInfo, err := wr.TopoServer().GetTablet(ctx, tabletAlias) if err != nil { return err } conn, err := tabletconn.GetDialer()(ctx, tabletInfo.Tablet, *connectTimeout) if err != nil { return fmt.Errorf("cannot connect to tablet %v: %v", tabletAlias, err) } defer conn.Close() transactionID, err := conn.Begin(ctx) if err != nil { return fmt.Errorf("Begin failed: %v", err) } result := map[string]int64{ "transaction_id": transactionID, } return printJSON(wr.Logger(), result) }
func commandVtTabletRollback(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { connectTimeout := subFlags.Duration("connect_timeout", 30*time.Second, "Connection timeout for vttablet client") if err := subFlags.Parse(args); err != nil { return err } if subFlags.NArg() != 2 { return fmt.Errorf("the <tablet_alias> and <transaction_id> arguments are required for the VtTabletRollback command") } transactionID, err := strconv.ParseInt(subFlags.Arg(1), 10, 64) if err != nil { return err } tabletAlias, err := topoproto.ParseTabletAlias(subFlags.Arg(0)) if err != nil { return err } tabletInfo, err := wr.TopoServer().GetTablet(ctx, tabletAlias) if err != nil { return err } conn, err := tabletconn.GetDialer()(tabletInfo.Tablet, *connectTimeout) if err != nil { return fmt.Errorf("cannot connect to tablet %v: %v", tabletAlias, err) } defer conn.Close(ctx) return conn.Rollback(ctx, &querypb.Target{ Keyspace: tabletInfo.Tablet.Keyspace, Shard: tabletInfo.Tablet.Shard, TabletType: tabletInfo.Tablet.Type, }, transactionID) }
func zkResolveWildcards(wr *wrangler.Wrangler, args []string) ([]string, error) { zkts, ok := wr.TopoServer().(*zktopo.Server) if !ok { return args, nil } return zk.ResolveWildcards(zkts.GetZConn(), args) }
// StartActionLoop will start the action loop for a fake tablet, // using ft.FakeMysqlDaemon as the backing mysqld. func (ft *FakeTablet) StartActionLoop(t *testing.T, wr *wrangler.Wrangler) { if ft.Agent != nil { t.Fatalf("Agent for %v is already running", ft.Tablet.Alias) } // Listen on a random port var err error ft.Listener, err = net.Listen("tcp", ":0") if err != nil { t.Fatalf("Cannot listen: %v", err) } port := ft.Listener.Addr().(*net.TCPAddr).Port // create a test agent on that port, and re-read the record // (it has new ports and IP) ft.Agent = tabletmanager.NewTestActionAgent(context.Background(), wr.TopoServer(), ft.Tablet.Alias, port, ft.FakeMysqlDaemon) ft.Tablet = ft.Agent.Tablet().Tablet // create the RPC server ft.RPCServer = rpcplus.NewServer() gorpctmserver.RegisterForTest(ft.RPCServer, ft.Agent) // create the HTTP server, serve the server from it handler := http.NewServeMux() bsonrpc.ServeCustomRPC(handler, ft.RPCServer, false) ft.HTTPServer = http.Server{ Handler: handler, } go ft.HTTPServer.Serve(ft.Listener) }
// FindHealthyRdonlyEndPoint returns a random healthy endpoint. // Since we don't want to use them all, we require at least // minHealthyEndPoints servers to be healthy. // May block up to -wait_for_healthy_rdonly_endpoints_timeout. func FindHealthyRdonlyEndPoint(ctx context.Context, wr *wrangler.Wrangler, cell, keyspace, shard string) (*topodatapb.TabletAlias, error) { busywaitCtx, busywaitCancel := context.WithTimeout(ctx, *WaitForHealthyEndPointsTimeout) defer busywaitCancel() // create a discovery healthcheck, wait for it to have one rdonly // endpoints at this point healthCheck := discovery.NewHealthCheck(*remoteActionsTimeout, *healthcheckRetryDelay, *healthCheckTimeout, "" /* statsSuffix */) watcher := discovery.NewShardReplicationWatcher(wr.TopoServer(), healthCheck, cell, keyspace, shard, *healthCheckTopologyRefresh, 5 /*topoReadConcurrency*/) defer watcher.Stop() defer healthCheck.Close() if err := discovery.WaitForEndPoints(ctx, healthCheck, cell, keyspace, shard, []topodatapb.TabletType{topodatapb.TabletType_RDONLY}); err != nil { return nil, fmt.Errorf("error waiting for rdonly endpoints for (%v,%v/%v): %v", cell, keyspace, shard, err) } var healthyEndpoints []*topodatapb.EndPoint for { select { case <-busywaitCtx.Done(): return nil, fmt.Errorf("Not enough endpoints to choose from in (%v,%v/%v), have %v healthy ones, need at least %v Context Error: %v", cell, keyspace, shard, len(healthyEndpoints), *minHealthyEndPoints, busywaitCtx.Err()) default: } addrs := healthCheck.GetEndPointStatsFromTarget(keyspace, shard, topodatapb.TabletType_RDONLY) healthyEndpoints = make([]*topodatapb.EndPoint, 0, len(addrs)) for _, addr := range addrs { // Note we do not check the 'Serving' flag here. // This is mainly to avoid the case where we run a // Diff between a source and destination, and the source // is not serving (disabled by TabletControl). // When we switch the tablet to 'worker', it will // go back to serving state. if addr.Stats == nil || addr.Stats.HealthError != "" || addr.Stats.SecondsBehindMaster > 30 { continue } healthyEndpoints = append(healthyEndpoints, addr.EndPoint) } if len(healthyEndpoints) >= *minHealthyEndPoints { break } deadlineForLog, _ := busywaitCtx.Deadline() wr.Logger().Infof("Waiting for enough endpoints to become available. available: %v required: %v Waiting up to %.1f more seconds.", len(healthyEndpoints), *minHealthyEndPoints, deadlineForLog.Sub(time.Now()).Seconds()) // Block for 1 second because 2 seconds is the -health_check_interval flag value in integration tests. timer := time.NewTimer(1 * time.Second) select { case <-busywaitCtx.Done(): timer.Stop() case <-timer.C: } } // random server in the list is what we want index := rand.Intn(len(healthyEndpoints)) return &topodatapb.TabletAlias{ Cell: cell, Uid: healthyEndpoints[index].Uid, }, nil }
func commandListShardActions(wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) (string, error) { subFlags.Parse(args) if subFlags.NArg() != 1 { log.Fatalf("action ListShardActions requires <keyspace/shard|zk shard path>") } keyspace, shard := shardParamToKeyspaceShard(subFlags.Arg(0)) return "", listActionsByShard(wr.TopoServer(), keyspace, shard) }
// StartActionLoop will start the action loop for a fake tablet, // using ft.FakeMysqlDaemon as the backing mysqld. func (ft *FakeTablet) StartActionLoop(t *testing.T, wr *wrangler.Wrangler) { if ft.Agent != nil { t.Fatalf("Agent for %v is already running", ft.Tablet.Alias) } // Listen on a random port for gRPC var err error ft.Listener, err = net.Listen("tcp", ":0") if err != nil { t.Fatalf("Cannot listen: %v", err) } gRPCPort := int32(ft.Listener.Addr().(*net.TCPAddr).Port) // if needed, listen on a random port for HTTP vtPort := ft.Tablet.PortMap["vt"] if ft.StartHTTPServer { ft.HTTPListener, err = net.Listen("tcp", ":0") if err != nil { t.Fatalf("Cannot listen on http port: %v", err) } handler := http.NewServeMux() ft.HTTPServer = http.Server{ Handler: handler, } go ft.HTTPServer.Serve(ft.HTTPListener) vtPort = int32(ft.HTTPListener.Addr().(*net.TCPAddr).Port) } // create a test agent on that port, and re-read the record // (it has new ports and IP) ft.Agent = tabletmanager.NewTestActionAgent(context.Background(), wr.TopoServer(), ft.Tablet.Alias, vtPort, gRPCPort, ft.FakeMysqlDaemon) ft.Tablet = ft.Agent.Tablet() // create the gRPC server ft.RPCServer = grpc.NewServer() grpctmserver.RegisterForTest(ft.RPCServer, ft.Agent) go ft.RPCServer.Serve(ft.Listener) // and wait for it to serve, so we don't start using it before it's // ready. timeout := 5 * time.Second step := 10 * time.Millisecond c := tmclient.NewTabletManagerClient() for timeout >= 0 { ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) err := c.Ping(ctx, topo.NewTabletInfo(ft.Agent.Tablet(), -1)) cancel() if err == nil { break } time.Sleep(step) timeout -= step } if timeout < 0 { panic("StartActionLoop failed.") } }
// FindHealthyRdonlyEndPoint returns a random healthy endpoint. // Since we don't want to use them all, we require at least // minHealthyEndPoints servers to be healthy. // May block up to -wait_for_healthy_rdonly_endpoints_timeout. func FindHealthyRdonlyEndPoint(ctx context.Context, wr *wrangler.Wrangler, cell, keyspace, shard string) (*topodatapb.TabletAlias, error) { busywaitCtx, busywaitCancel := context.WithTimeout(ctx, *WaitForHealthyEndPointsTimeout) defer busywaitCancel() var healthyEndpoints []*topodatapb.EndPoint for { select { case <-busywaitCtx.Done(): return nil, fmt.Errorf("Not enough endpoints to choose from in (%v,%v/%v), have %v healthy ones, need at least %v Context Error: %v", cell, keyspace, shard, len(healthyEndpoints), *minHealthyEndPoints, busywaitCtx.Err()) default: } shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) endPoints, _, err := wr.TopoServer().GetEndPoints(shortCtx, cell, keyspace, shard, topodatapb.TabletType_RDONLY) cancel() if err != nil { if err == topo.ErrNoNode { // If the node doesn't exist, count that as 0 available rdonly instances. endPoints = &topodatapb.EndPoints{} } else { return nil, fmt.Errorf("GetEndPoints(%v,%v,%v,rdonly) failed: %v", cell, keyspace, shard, err) } } healthyEndpoints = make([]*topodatapb.EndPoint, 0, len(endPoints.Entries)) for _, entry := range endPoints.Entries { if len(entry.HealthMap) == 0 { healthyEndpoints = append(healthyEndpoints, entry) } } if len(healthyEndpoints) < *minHealthyEndPoints { deadlineForLog, _ := busywaitCtx.Deadline() wr.Logger().Infof("Waiting for enough endpoints to become available. available: %v required: %v Waiting up to %.1f more seconds.", len(healthyEndpoints), *minHealthyEndPoints, deadlineForLog.Sub(time.Now()).Seconds()) // Block for 1 second because 2 seconds is the -health_check_interval flag value in integration tests. timer := time.NewTimer(1 * time.Second) select { case <-busywaitCtx.Done(): timer.Stop() case <-timer.C: } } else { break } } // random server in the list is what we want index := rand.Intn(len(healthyEndpoints)) return &topodatapb.TabletAlias{ Cell: cell, Uid: healthyEndpoints[index].Uid, }, nil }
// StartActionLoop will start the action loop for a fake tablet, // using ft.FakeMysqlDaemon as the backing mysqld. func (ft *FakeTablet) StartActionLoop(t *testing.T, wr *wrangler.Wrangler) { if ft.Done != nil { t.Fatalf("ActionLoop for %v is already running", ft.Tablet.Alias) } ft.Done = make(chan struct{}, 1) go func() { wr.TopoServer().ActionEventLoop(ft.Tablet.Alias, func(actionPath, data string) error { actionNode, err := actionnode.ActionNodeFromJson(data, actionPath) if err != nil { t.Fatalf("ActionNodeFromJson failed: %v\n%v", err, data) } ta := actor.NewTabletActor(nil, ft.FakeMysqlDaemon, wr.TopoServer(), ft.Tablet.Alias) if err := ta.HandleAction(actionPath, actionNode.Action, actionNode.ActionGuid, false); err != nil { // action may just fail for any good reason t.Logf("HandleAction failed for %v: %v", actionNode.Action, err) } // this part would also be done by the agent tablet, err := wr.TopoServer().GetTablet(ft.Tablet.Alias) if err != nil { t.Logf("Cannot get tablet: %v", err) } else { updatedTablet := actor.CheckTabletMysqlPort(wr.TopoServer(), ft.FakeMysqlDaemon, tablet) if updatedTablet != nil { t.Logf("Updated tablet record") } } return nil }, ft.Done) }() }
func commandStaleActions(wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) (string, error) { maxStaleness := subFlags.Duration("max-staleness", 5*time.Minute, "how long since the last modification before an action considered stale") purge := subFlags.Bool("purge", false, "purge stale actions") if err := subFlags.Parse(args); err != nil { return "", err } if subFlags.NArg() == 0 { return "", fmt.Errorf("action StaleActions requires <zk action path>") } zkts, ok := wr.TopoServer().(*zktopo.Server) if !ok { return "", fmt.Errorf("StaleActions requires a zktopo.Server") } zkPaths, err := resolveWildcards(wr, subFlags.Args()) if err != nil { return "", err } var errCount sync2.AtomicInt32 wg := sync.WaitGroup{} for _, apath := range zkPaths { wg.Add(1) go func(zkActionPath string) { defer wg.Done() staleActions, err := staleActions(zkts, zkActionPath, *maxStaleness) if err != nil { errCount.Add(1) wr.Logger().Errorf("can't check stale actions: %v %v", zkActionPath, err) return } for _, action := range staleActions { wr.Logger().Printf("%v\n", fmtAction(action)) } if *purge && len(staleActions) > 0 { err := zkts.PurgeActions(zkActionPath, actionnode.ActionNodeCanBePurged) if err != nil { errCount.Add(1) wr.Logger().Errorf("can't purge stale actions: %v %v", zkActionPath, err) return } } }(apath) } wg.Wait() if errCount.Get() > 0 { return "", fmt.Errorf("some errors occurred, check the log") } return "", nil }
func commandVtTabletExecute(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { transactionID := subFlags.Int("transaction_id", 0, "transaction id to use, if inside a transaction.") bindVariables := newBindvars(subFlags) keyspace := subFlags.String("keyspace", "", "keyspace the tablet belongs to") shard := subFlags.String("shard", "", "shard the tablet belongs to") tabletType := subFlags.String("tablet_type", "unknown", "tablet type we expect from the tablet (use unknown to use sessionId)") connectTimeout := subFlags.Duration("connect_timeout", 30*time.Second, "Connection timeout for vttablet client") json := subFlags.Bool("json", false, "Output JSON instead of human-readable table") if err := subFlags.Parse(args); err != nil { return err } if subFlags.NArg() != 2 { return fmt.Errorf("the <tablet_alias> and <sql> arguments are required for the VtTabletExecute command") } tt, err := topoproto.ParseTabletType(*tabletType) if err != nil { return err } tabletAlias, err := topoproto.ParseTabletAlias(subFlags.Arg(0)) if err != nil { return err } tabletInfo, err := wr.TopoServer().GetTablet(ctx, tabletAlias) if err != nil { return err } ep, err := topo.TabletEndPoint(tabletInfo.Tablet) if err != nil { return fmt.Errorf("cannot get EndPoint from tablet record: %v", err) } conn, err := tabletconn.GetDialer()(ctx, ep, *keyspace, *shard, tt, *connectTimeout) if err != nil { return fmt.Errorf("cannot connect to tablet %v: %v", tabletAlias, err) } defer conn.Close() qr, err := conn.Execute(ctx, subFlags.Arg(1), *bindVariables, int64(*transactionID)) if err != nil { return fmt.Errorf("Execute failed: %v", err) } if *json { return printJSON(wr.Logger(), qr) } printQueryResult(loggerWriter{wr.Logger()}, qr) return nil }
// findHealthyEndPoint returns the first healthy endpoint. func findHealthyEndPoint(wr *wrangler.Wrangler, cell, keyspace, shard string) (topo.TabletAlias, error) { endPoints, err := wr.TopoServer().GetEndPoints(cell, keyspace, shard, topo.TYPE_RDONLY) if err != nil { return topo.TabletAlias{}, fmt.Errorf("GetEndPoints(%v,%v,%v,rdonly) failed: %v", cell, keyspace, shard, err) } for _, entry := range endPoints.Entries { if len(entry.Health) == 0 { // first healthy server is what we want return topo.TabletAlias{ Cell: cell, Uid: entry.Uid, }, nil } } return topo.TabletAlias{}, fmt.Errorf("No endpoint to chose from in (%v,%v/%v)", cell, keyspace, shard) }
func commandVtTabletStreamHealth(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { count := subFlags.Int("count", 1, "number of responses to wait for") connectTimeout := subFlags.Duration("connect_timeout", 30*time.Second, "Connection timeout for vttablet client") if err := subFlags.Parse(args); err != nil { return err } if subFlags.NArg() != 1 { return fmt.Errorf("The <tablet alias> argument is required for the VtTabletStreamHealth command.") } tabletAlias, err := topoproto.ParseTabletAlias(subFlags.Arg(0)) if err != nil { return err } tabletInfo, err := wr.TopoServer().GetTablet(ctx, tabletAlias) if err != nil { return err } ep, err := topo.TabletEndPoint(tabletInfo.Tablet) if err != nil { return fmt.Errorf("cannot get EndPoint from tablet record: %v", err) } // pass in a non-UNKNOWN tablet type to not use sessionId conn, err := tabletconn.GetDialer()(ctx, ep, "", "", pb.TabletType_MASTER, *connectTimeout) if err != nil { return fmt.Errorf("cannot connect to tablet %v: %v", tabletAlias, err) } stream, errFunc, err := conn.StreamHealth(ctx) if err != nil { return err } for i := 0; i < *count; i++ { shr, ok := <-stream if !ok { return fmt.Errorf("stream ended early: %v", errFunc()) } data, err := json.Marshal(shr) if err != nil { wr.Logger().Errorf("cannot json-marshal structure: %v", err) } else { wr.Logger().Printf("%v\n", string(data)) } } return nil }
func commandVtTabletUpdateStream(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { count := subFlags.Int("count", 1, "number of responses to wait for") timestamp := subFlags.Int("timestamp", 0, "timestamp to start the stream from") position := subFlags.String("position", "", "position to start the stream from") connectTimeout := subFlags.Duration("connect_timeout", 30*time.Second, "Connection timeout for vttablet client") if err := subFlags.Parse(args); err != nil { return err } if subFlags.NArg() != 1 { return fmt.Errorf("The <tablet alias> argument is required for the VtTabletUpdateStream command.") } tabletAlias, err := topoproto.ParseTabletAlias(subFlags.Arg(0)) if err != nil { return err } tabletInfo, err := wr.TopoServer().GetTablet(ctx, tabletAlias) if err != nil { return err } conn, err := tabletconn.GetDialer()(tabletInfo.Tablet, *connectTimeout) if err != nil { return fmt.Errorf("cannot connect to tablet %v: %v", tabletAlias, err) } stream, err := conn.UpdateStream(ctx, &querypb.Target{ Keyspace: tabletInfo.Tablet.Keyspace, Shard: tabletInfo.Tablet.Shard, TabletType: tabletInfo.Tablet.Type, }, *position, int64(*timestamp)) if err != nil { return err } for i := 0; i < *count; i++ { se, err := stream.Recv() if err != nil { return fmt.Errorf("stream ended early: %v", err) } data, err := json.Marshal(se) if err != nil { wr.Logger().Errorf("cannot json-marshal structure: %v", err) } else { wr.Logger().Printf("%v\n", string(data)) } } return nil }
func commandVtTabletExecute(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { transactionID := subFlags.Int("transaction_id", 0, "transaction id to use, if inside a transaction.") bindVariables := newBindvars(subFlags) connectTimeout := subFlags.Duration("connect_timeout", 30*time.Second, "Connection timeout for vttablet client") options := subFlags.String("options", "", "execute options values as a text encoded proto of the ExecuteOptions structure") json := subFlags.Bool("json", false, "Output JSON instead of human-readable table") if err := subFlags.Parse(args); err != nil { return err } if subFlags.NArg() != 2 { return fmt.Errorf("the <tablet_alias> and <sql> arguments are required for the VtTabletExecute command") } tabletAlias, err := topoproto.ParseTabletAlias(subFlags.Arg(0)) if err != nil { return err } tabletInfo, err := wr.TopoServer().GetTablet(ctx, tabletAlias) if err != nil { return err } executeOptions, err := parseExecuteOptions(*options) if err != nil { return err } conn, err := tabletconn.GetDialer()(tabletInfo.Tablet, *connectTimeout) if err != nil { return fmt.Errorf("cannot connect to tablet %v: %v", tabletAlias, err) } defer conn.Close(ctx) qr, err := conn.Execute(ctx, &querypb.Target{ Keyspace: tabletInfo.Tablet.Keyspace, Shard: tabletInfo.Tablet.Shard, TabletType: tabletInfo.Tablet.Type, }, subFlags.Arg(1), *bindVariables, int64(*transactionID), executeOptions) if err != nil { return fmt.Errorf("Execute failed: %v", err) } if *json { return printJSON(wr.Logger(), qr) } printQueryResult(loggerWriter{wr.Logger()}, qr) return nil }
func commandDemoteMaster(wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { if err := subFlags.Parse(args); err != nil { return err } if subFlags.NArg() != 1 { return fmt.Errorf("action DemoteMaster requires <tablet alias|zk tablet path>") } tabletAlias, err := tabletParamToTabletAlias(subFlags.Arg(0)) if err != nil { return err } tabletInfo, err := wr.TopoServer().GetTablet(tabletAlias) if err != nil { return err } return wr.TabletManagerClient().DemoteMaster(tabletInfo, wr.ActionTimeout()) }
// FindHealthyRdonlyTablet returns a random healthy RDONLY tablet. // Since we don't want to use them all, we require at least // minHealthyRdonlyTablets servers to be healthy. // May block up to -wait_for_healthy_rdonly_tablets_timeout. func FindHealthyRdonlyTablet(ctx context.Context, wr *wrangler.Wrangler, healthCheck discovery.HealthCheck, cell, keyspace, shard string, minHealthyRdonlyTablets int) (*topodatapb.TabletAlias, error) { if healthCheck == nil { // No healthcheck instance provided. Create one. healthCheck = discovery.NewHealthCheck(*remoteActionsTimeout, *healthcheckRetryDelay, *healthCheckTimeout) watcher := discovery.NewShardReplicationWatcher(wr.TopoServer(), healthCheck, cell, keyspace, shard, *healthCheckTopologyRefresh, discovery.DefaultTopoReadConcurrency) defer watcher.Stop() defer healthCheck.Close() } healthyTablets, err := waitForHealthyRdonlyTablets(ctx, wr, healthCheck, cell, keyspace, shard, minHealthyRdonlyTablets, *waitForHealthyTabletsTimeout) if err != nil { return nil, err } // random server in the list is what we want index := rand.Intn(len(healthyTablets)) return healthyTablets[index].Tablet.Alias, nil }
func commandDemoteMaster(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { if err := subFlags.Parse(args); err != nil { return err } if subFlags.NArg() != 1 { return fmt.Errorf("action DemoteMaster requires <tablet alias>") } tabletAlias, err := topoproto.ParseTabletAlias(subFlags.Arg(0)) if err != nil { return err } tabletInfo, err := wr.TopoServer().GetTablet(ctx, tabletAlias) if err != nil { return err } _, err = wr.TabletManagerClient().DemoteMaster(ctx, tabletInfo.Tablet) return err }
// findChecker: // - find a rdonly instance in the keyspace / shard // - mark it as checker // - tag it with our worker process func findChecker(wr *wrangler.Wrangler, cleaner *wrangler.Cleaner, cell, keyspace, shard string) (topo.TabletAlias, error) { endPoints, err := wr.TopoServer().GetEndPoints(cell, keyspace, shard, topo.TYPE_RDONLY) if err != nil { return topo.TabletAlias{}, fmt.Errorf("GetEndPoints(%v,%v,%v,rdonly) failed: %v", cell, keyspace, shard, err) } if len(endPoints.Entries) == 0 { return topo.TabletAlias{}, fmt.Errorf("No endpoint to chose from in (%v,%v/%v)", cell, keyspace, shard) } tabletAlias := topo.TabletAlias{ Cell: cell, Uid: endPoints.Entries[0].Uid, } // We add the tag before calling ChangeSlaveType, so the destination // vttablet reloads the worker URL when it reloads the tablet. ourURL := servenv.ListeningURL.String() log.Infof("Adding tag[worker]=%v to tablet %v", ourURL, tabletAlias) if err := wr.TopoServer().UpdateTabletFields(tabletAlias, func(tablet *topo.Tablet) error { if tablet.Tags == nil { tablet.Tags = make(map[string]string) } tablet.Tags["worker"] = ourURL return nil }); err != nil { return topo.TabletAlias{}, err } // we remove the tag *before* calling ChangeSlaveType back, so // we need to record this tag change after the change slave // type change in the cleaner. defer wrangler.RecordTabletTagAction(cleaner, tabletAlias, "worker", "") log.Infof("Changing tablet %v to 'checker'", tabletAlias) wr.ResetActionTimeout(30 * time.Second) if err := wr.ChangeType(tabletAlias, topo.TYPE_CHECKER, false /*force*/); err != nil { return topo.TabletAlias{}, err } // Record a clean-up action to take the tablet back to rdonly. // We will alter this one later on and let the tablet go back to // 'spare' if we have stopped replication for too long on it. wrangler.RecordChangeSlaveTypeAction(cleaner, tabletAlias, topo.TYPE_RDONLY) return tabletAlias, nil }
// Does a topo lookup for a single shard, and returns: // 1. Slice of all tablet aliases for the shard. // 2. Map of tablet alias : tablet record for all tablets. func resolveRefreshTabletsForShard(ctx context.Context, keyspace, shard string, wr *wrangler.Wrangler) (refreshAliases []*topodatapb.TabletAlias, refreshTablets map[topodatapb.TabletAlias]*topo.TabletInfo, err error) { // Keep a long timeout, because we really don't want the copying to succeed, and then the worker to fail at the end. shortCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) refreshAliases, err = wr.TopoServer().FindAllTabletAliasesInShard(shortCtx, keyspace, shard) cancel() if err != nil { return nil, nil, fmt.Errorf("cannot find all refresh target tablets in %v/%v: %v", keyspace, shard, err) } wr.Logger().Infof("Found %v refresh target aliases in shard %v/%v", len(refreshAliases), keyspace, shard) shortCtx, cancel = context.WithTimeout(ctx, 5*time.Minute) refreshTablets, err = wr.TopoServer().GetTabletMap(shortCtx, refreshAliases) cancel() if err != nil { return nil, nil, fmt.Errorf("cannot read all refresh target tablets in %v/%v: %v", keyspace, shard, err) } return refreshAliases, refreshTablets, nil }
func keyspacesWithOverlappingShards(ctx context.Context, wr *wrangler.Wrangler) ([]map[string]string, error) { shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) keyspaces, err := wr.TopoServer().GetKeyspaces(shortCtx) cancel() if err != nil { return nil, fmt.Errorf("failed to get list of keyspaces: %v", err) } wg := sync.WaitGroup{} mu := sync.Mutex{} // protects result result := make([]map[string]string, 0, len(keyspaces)) rec := concurrency.AllErrorRecorder{} for _, keyspace := range keyspaces { wg.Add(1) go func(keyspace string) { defer wg.Done() shortCtx, cancel = context.WithTimeout(ctx, *remoteActionsTimeout) osList, err := topotools.FindOverlappingShards(shortCtx, wr.TopoServer(), keyspace) cancel() if err != nil { rec.RecordError(err) return } mu.Lock() for _, os := range osList { result = append(result, map[string]string{ "Keyspace": os.Left[0].Keyspace(), "Shard": os.Left[0].ShardName(), }) } mu.Unlock() }(keyspace) } wg.Wait() if rec.HasErrors() { return nil, rec.Error() } if len(result) == 0 { return nil, fmt.Errorf("There are no keyspaces with overlapping shards") } return result, nil }
func commandVtTabletBegin(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { keyspace := subFlags.String("keyspace", "", "keyspace the tablet belongs to") shard := subFlags.String("shard", "", "shard the tablet belongs to") tabletType := subFlags.String("tablet_type", "unknown", "tablet type we expect from the tablet (use unknown to use sessionId)") connectTimeout := subFlags.Duration("connect_timeout", 30*time.Second, "Connection timeout for vttablet client") if err := subFlags.Parse(args); err != nil { return err } if subFlags.NArg() != 1 { return fmt.Errorf("the <tablet_alias> argument is required for the VtTabletBegin command") } tt, err := topoproto.ParseTabletType(*tabletType) if err != nil { return err } tabletAlias, err := topoproto.ParseTabletAlias(subFlags.Arg(0)) if err != nil { return err } tabletInfo, err := wr.TopoServer().GetTablet(ctx, tabletAlias) if err != nil { return err } ep, err := topo.TabletEndPoint(tabletInfo.Tablet) if err != nil { return fmt.Errorf("cannot get EndPoint from tablet record: %v", err) } conn, err := tabletconn.GetDialer()(ctx, ep, *keyspace, *shard, tt, *connectTimeout) if err != nil { return fmt.Errorf("cannot connect to tablet %v: %v", tabletAlias, err) } defer conn.Close() transactionID, err := conn.Begin(ctx) if err != nil { return fmt.Errorf("Begin failed: %v", err) } result := map[string]int64{ "transaction_id": transactionID, } return printJSON(wr, result) }
// FindWorkerTablet will: // - find a rdonly instance in the keyspace / shard // - mark it as worker // - tag it with our worker process func FindWorkerTablet(ctx context.Context, wr *wrangler.Wrangler, cleaner *wrangler.Cleaner, cell, keyspace, shard string) (*topodatapb.TabletAlias, error) { tabletAlias, err := FindHealthyRdonlyEndPoint(ctx, wr, cell, keyspace, shard) if err != nil { return nil, err } // We add the tag before calling ChangeSlaveType, so the destination // vttablet reloads the worker URL when it reloads the tablet. ourURL := servenv.ListeningURL.String() wr.Logger().Infof("Adding tag[worker]=%v to tablet %v", ourURL, topoproto.TabletAliasString(tabletAlias)) shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) _, err = wr.TopoServer().UpdateTabletFields(shortCtx, tabletAlias, func(tablet *topodatapb.Tablet) error { if tablet.Tags == nil { tablet.Tags = make(map[string]string) } tablet.Tags["worker"] = ourURL return nil }) cancel() if err != nil { return nil, err } // Using "defer" here because we remove the tag *before* calling // ChangeSlaveType back, so we need to record this tag change after the change // slave type change in the cleaner. defer wrangler.RecordTabletTagAction(cleaner, tabletAlias, "worker", "") wr.Logger().Infof("Changing tablet %v to '%v'", topoproto.TabletAliasString(tabletAlias), topodatapb.TabletType_WORKER) shortCtx, cancel = context.WithTimeout(ctx, *remoteActionsTimeout) err = wr.ChangeSlaveType(shortCtx, tabletAlias, topodatapb.TabletType_WORKER) cancel() if err != nil { return nil, err } // Record a clean-up action to take the tablet back to rdonly. // We will alter this one later on and let the tablet go back to // 'spare' if we have stopped replication for too long on it. wrangler.RecordChangeSlaveTypeAction(cleaner, tabletAlias, topodatapb.TabletType_RDONLY) return tabletAlias, nil }
func commandPruneActionLogs(wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { keepCount := subFlags.Int("keep-count", 10, "count to keep") if err := subFlags.Parse(args); err != nil { return err } if subFlags.NArg() == 0 { return fmt.Errorf("action PruneActionLogs requires <zk action log path> ...") } paths, err := resolveWildcards(wr, subFlags.Args()) if err != nil { return err } zkts, ok := wr.TopoServer().(*zktopo.Server) if !ok { return fmt.Errorf("PruneActionLogs requires a zktopo.Server") } var errCount sync2.AtomicInt32 wg := sync.WaitGroup{} for _, zkActionLogPath := range paths { wg.Add(1) go func(zkActionLogPath string) { defer wg.Done() purgedCount, err := zkts.PruneActionLogs(zkActionLogPath, *keepCount) if err == nil { wr.Logger().Infof("%v pruned %v", zkActionLogPath, purgedCount) } else { wr.Logger().Errorf("%v pruning failed: %v", zkActionLogPath, err) errCount.Add(1) } }(zkActionLogPath) } wg.Wait() if errCount.Get() > 0 { return fmt.Errorf("some errors occurred, check the log") } return nil }
// FindWorkerTablet will: // - find a rdonly instance in the keyspace / shard // - mark it as worker // - tag it with our worker process func FindWorkerTablet(ctx context.Context, wr *wrangler.Wrangler, cleaner *wrangler.Cleaner, tsc *discovery.TabletStatsCache, cell, keyspace, shard string, minHealthyRdonlyTablets int) (*topodatapb.TabletAlias, error) { tabletAlias, err := FindHealthyRdonlyTablet(ctx, wr, tsc, cell, keyspace, shard, minHealthyRdonlyTablets) if err != nil { return nil, err } // We add the tag before calling ChangeSlaveType, so the destination // vttablet reloads the worker URL when it reloads the tablet. ourURL := servenv.ListeningURL.String() wr.Logger().Infof("Adding tag[worker]=%v to tablet %v", ourURL, topoproto.TabletAliasString(tabletAlias)) shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) _, err = wr.TopoServer().UpdateTabletFields(shortCtx, tabletAlias, func(tablet *topodatapb.Tablet) error { if tablet.Tags == nil { tablet.Tags = make(map[string]string) } tablet.Tags["worker"] = ourURL tablet.Tags["drain_reason"] = "Used by vtworker" return nil }) cancel() if err != nil { return nil, err } // Using "defer" here because we remove the tag *before* calling // ChangeSlaveType back, so we need to record this tag change after the change // slave type change in the cleaner. defer wrangler.RecordTabletTagAction(cleaner, tabletAlias, "worker", "") defer wrangler.RecordTabletTagAction(cleaner, tabletAlias, "drain_reason", "") wr.Logger().Infof("Changing tablet %v to '%v'", topoproto.TabletAliasString(tabletAlias), topodatapb.TabletType_DRAINED) shortCtx, cancel = context.WithTimeout(ctx, *remoteActionsTimeout) err = wr.ChangeSlaveType(shortCtx, tabletAlias, topodatapb.TabletType_DRAINED) cancel() if err != nil { return nil, err } // Record a clean-up action to take the tablet back to rdonly. wrangler.RecordChangeSlaveTypeAction(cleaner, tabletAlias, topodatapb.TabletType_DRAINED, topodatapb.TabletType_RDONLY) return tabletAlias, nil }
func commandPurgeActions(wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) (string, error) { subFlags.Parse(args) if subFlags.NArg() == 0 { log.Fatalf("action PurgeActions requires <zk action path> ...") } zkts, ok := wr.TopoServer().(*zktopo.Server) if !ok { return "", fmt.Errorf("PurgeActions requires a zktopo.Server") } zkActionPaths, err := resolveWildcards(wr, subFlags.Args()) if err != nil { return "", err } for _, zkActionPath := range zkActionPaths { err := zkts.PurgeActions(zkActionPath, tm.ActionNodeCanBePurged) if err != nil { return "", err } } return "", nil }
func commandVtTabletRollback(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { keyspace := subFlags.String("keyspace", "", "keyspace the tablet belongs to") shard := subFlags.String("shard", "", "shard the tablet belongs to") tabletType := subFlags.String("tablet_type", "unknown", "tablet type we expect from the tablet (use unknown to use sessionId)") connectTimeout := subFlags.Duration("connect_timeout", 30*time.Second, "Connection timeout for vttablet client") if err := subFlags.Parse(args); err != nil { return err } if subFlags.NArg() != 2 { return fmt.Errorf("the <tablet_alias> and <transaction_id> arguments are required for the VtTabletRollback command") } transactionID, err := strconv.ParseInt(subFlags.Arg(1), 10, 64) if err != nil { return err } tt, err := topo.ParseTabletType(*tabletType) if err != nil { return err } tabletAlias, err := topo.ParseTabletAliasString(subFlags.Arg(0)) if err != nil { return err } tabletInfo, err := wr.TopoServer().GetTablet(ctx, tabletAlias) if err != nil { return err } ep, err := topo.TabletEndPoint(tabletInfo.Tablet) if err != nil { return fmt.Errorf("cannot get EndPoint from tablet record: %v", err) } conn, err := tabletconn.GetDialer()(ctx, ep, *keyspace, *shard, tt, *connectTimeout) if err != nil { return fmt.Errorf("cannot connect to tablet %v: %v", tabletAlias, err) } defer conn.Close() return conn.Rollback(ctx, transactionID) }
// keyspacesWithServedFrom returns all the keyspaces that have ServedFrom set // to one value. func keyspacesWithServedFrom(ctx context.Context, wr *wrangler.Wrangler) ([]string, error) { shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) keyspaces, err := wr.TopoServer().GetKeyspaces(shortCtx) cancel() if err != nil { return nil, fmt.Errorf("failed to get list of keyspaces: %v", err) } wg := sync.WaitGroup{} mu := sync.Mutex{} // protects result result := make([]string, 0, len(keyspaces)) rec := concurrency.AllErrorRecorder{} for _, keyspace := range keyspaces { wg.Add(1) go func(keyspace string) { defer wg.Done() shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) ki, err := wr.TopoServer().GetKeyspace(shortCtx, keyspace) cancel() if err != nil { rec.RecordError(fmt.Errorf("failed to get details for keyspace '%v': %v", keyspace, err)) return } if len(ki.ServedFroms) > 0 { mu.Lock() result = append(result, keyspace) mu.Unlock() } }(keyspace) } wg.Wait() if rec.HasErrors() { return nil, rec.Error() } if len(result) == 0 { return nil, fmt.Errorf("there are no keyspaces with ServedFrom") } return result, nil }
func commandVtTabletExecute(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { transactionID := subFlags.Int("transaction_id", 0, "transaction id to use, if inside a transaction.") bindVariables := newBindvars(subFlags) keyspace := subFlags.String("keyspace", "", "keyspace the tablet belongs to") shard := subFlags.String("shard", "", "shard the tablet belongs to") connectTimeout := subFlags.Duration("connect_timeout", 30*time.Second, "Connection timeout for vttablet client") if err := subFlags.Parse(args); err != nil { return err } if subFlags.NArg() != 2 { return fmt.Errorf("the <tablet_alis> and <sql> arguments are required for the VtTabletExecute command") } tabletAlias, err := topo.ParseTabletAliasString(subFlags.Arg(0)) if err != nil { return err } tabletInfo, err := wr.TopoServer().GetTablet(ctx, tabletAlias) if err != nil { return err } ep, err := tabletInfo.EndPoint() if err != nil { return fmt.Errorf("cannot get EndPoint from tablet record: %v", err) } // pass in empty keyspace and shard to not ask for sessionId conn, err := tabletconn.GetDialer()(ctx, ep, *keyspace, *shard, *connectTimeout) if err != nil { return fmt.Errorf("cannot connect to tablet %v: %v", tabletAlias, err) } defer conn.Close() qr, err := conn.Execute(ctx, subFlags.Arg(1), *bindVariables, int64(*transactionID)) if err != nil { return fmt.Errorf("Execute failed: %v", err) } wr.Logger().Printf("%v\n", jscfg.ToJSON(qr)) return nil }