// ExecuteVtworkerCommand is part of the pb.VtworkerServer interface func (s *VtworkerServer) ExecuteVtworkerCommand(args *pb.ExecuteVtworkerCommandRequest, stream pbs.Vtworker_ExecuteVtworkerCommandServer) (err error) { // Please note that this panic handler catches only panics occuring in the code below. // The actual execution of the vtworker command takes place in a new go routine // (started in Instance.setAndStartWorker()) which has its own panic handler. defer servenv.HandlePanic("vtworker", &err) // create a logger, send the result back to the caller logstream := logutil.NewChannelLogger(10) logger := logutil.NewTeeLogger(logstream, logutil.NewMemoryLogger()) // send logs to the caller wg := sync.WaitGroup{} wg.Add(1) go func() { for e := range logstream { // Note we don't interrupt the loop here, as // we still need to flush and finish the // command, even if the channel to the client // has been broken. We'll just keep trying. stream.Send(&pb.ExecuteVtworkerCommandResponse{ Event: &pbl.Event{ Time: &pbl.Time{ Seconds: e.Time.Unix(), Nanoseconds: int32(e.Time.Nanosecond()), }, Level: pbl.Level(e.Level), File: e.File, Line: int64(e.Line), Value: e.Value, }, }) } wg.Done() }() // create the wrangler wr := s.wi.CreateWrangler(logger) // execute the command if len(args.Args) >= 1 && args.Args[0] == "Reset" { err = s.wi.Reset() } else { // Make sure we use the global "err" variable and do not redeclare it in this scope. var worker worker.Worker var done chan struct{} worker, done, err = s.wi.RunCommand(args.Args, wr, false /*runFromCli*/) if err == nil { err = s.wi.WaitForCommand(worker, done) } } // close the log channel, and wait for them all to be sent close(logstream) wg.Wait() return err }
// Instantiate is part of the workflow.Factory interface. func (f *SleepWorkflowFactory) Instantiate(w *workflowpb.Workflow) (Workflow, error) { data := &SleepWorkflowData{} if err := json.Unmarshal(w.Data, data); err != nil { return nil, err } return &SleepWorkflow{ data: data, logger: logutil.NewMemoryLogger(), }, nil }
// setAndStartWorker will set the current worker. // We always log to both memory logger (for display on the web) and // console logger (for records / display of command line worker). func (wi *Instance) setAndStartWorker(wrk Worker, wr *wrangler.Wrangler) (chan struct{}, error) { wi.currentWorkerMutex.Lock() defer wi.currentWorkerMutex.Unlock() if wi.currentWorker != nil { return nil, fmt.Errorf("A worker is already in progress: %v", wi.currentWorker) } wi.currentWorker = wrk wi.currentMemoryLogger = logutil.NewMemoryLogger() wi.currentContext, wi.currentCancelFunc = context.WithCancel(wi.backgroundContext) wi.lastRunError = nil done := make(chan struct{}) wranglerLogger := wr.Logger() if wr == wi.wr { // If it's the default wrangler, do not reuse its logger because it may have been set before. // Resuing it would result into an endless recursion. wranglerLogger = logutil.NewConsoleLogger() } wr.SetLogger(logutil.NewTeeLogger(wi.currentMemoryLogger, wranglerLogger)) // one go function runs the worker, changes state when done go func() { log.Infof("Starting worker...") var err error // Catch all panics and always save the execution state at the end. defer func() { // The recovery code is a copy of servenv.HandlePanic(). if x := recover(); x != nil { err = fmt.Errorf("uncaught %v panic: %v", "vtworker", x) } wi.currentWorkerMutex.Lock() wi.currentContext = nil wi.currentCancelFunc = nil wi.lastRunError = err wi.currentWorkerMutex.Unlock() close(done) }() // run will take a long time err = wrk.Run(wi.currentContext) }() return done, nil }
func TestShard(t *testing.T) { cell := "cell1" keyspace := "ks1" shard := "sh1" ctx := context.Background() ts := zktestserver.New(t, []string{cell}) // Create a Keyspace / Shard if err := ts.CreateKeyspace(ctx, keyspace, &topodatapb.Keyspace{}); err != nil { t.Fatalf("CreateKeyspace failed: %v", err) } if err := ts.CreateShard(ctx, keyspace, shard); err != nil { t.Fatalf("CreateShard failed: %v", err) } // Hack the zookeeper backend to create an error for GetShard. zconn := ts.Impl.(*zktestserver.TestServer).Impl.(*zktopo.Server).GetZConn() if _, err := zconn.Set(path.Join(zktopo.GlobalKeyspacesPath, keyspace, "shards", shard), []byte{}, -1); err != nil { t.Fatalf("failed to hack the shard: %v", err) } // Create the workflow, run the validator. w := &Workflow{ logger: logutil.NewMemoryLogger(), } sv := &ShardValidator{} if err := sv.Audit(ctx, ts, w); err != nil { t.Fatalf("Audit failed: %v", err) } if len(w.fixers) != 1 { t.Fatalf("fixer not added: %v", w.fixers) } if !strings.Contains(w.fixers[0].message, "bad shard data") { t.Errorf("bad message: %v ", w.fixers[0].message) } // Run Delete, make sure the entry is removed. if err := w.fixers[0].fixer.Action(ctx, "Delete"); err != nil { t.Fatalf("Action failed: %v", err) } shards, err := ts.GetShardNames(ctx, keyspace) if err != nil || len(shards) != 0 { t.Errorf("bad GetShardNames output: %v %v ", shards, err) } }
// ExecuteVtworkerCommand is part of the pb.VtworkerServer interface func (s *VtworkerServer) ExecuteVtworkerCommand(args *pb.ExecuteVtworkerCommandRequest, stream pbs.Vtworker_ExecuteVtworkerCommandServer) (err error) { // Please note that this panic handler catches only panics occuring in the code below. // The actual execution of the vtworker command takes place in a new go routine // (started in Instance.setAndStartWorker()) which has its own panic handler. defer servenv.HandlePanic("vtworker", &err) // create a logger, send the result back to the caller logstream := logutil.NewChannelLogger(10) logger := logutil.NewTeeLogger(logstream, logutil.NewMemoryLogger()) // send logs to the caller wg := sync.WaitGroup{} wg.Add(1) go func() { for e := range logstream { // Note we don't interrupt the loop here, as // we still need to flush and finish the // command, even if the channel to the client // has been broken. We'll just keep trying. stream.Send(&pb.ExecuteVtworkerCommandResponse{ Event: e, }) } wg.Done() }() // create the wrangler wr := s.wi.CreateWrangler(logger) // execute the command worker, done, err := s.wi.RunCommand(args.Args, wr, false /*runFromCli*/) if err == nil && worker != nil && done != nil { err = s.wi.WaitForCommand(worker, done) } // close the log channel, and wait for them all to be sent close(logstream) wg.Wait() return err }
// setAndStartWorker will set the current worker. // We always log to both memory logger (for display on the web) and // console logger (for records / display of command line worker). func setAndStartWorker(wrk worker.Worker) (chan struct{}, error) { currentWorkerMutex.Lock() defer currentWorkerMutex.Unlock() if currentWorker != nil { return nil, fmt.Errorf("A worker is already in progress: %v", currentWorker) } currentWorker = wrk currentMemoryLogger = logutil.NewMemoryLogger() currentDone = make(chan struct{}) wr.SetLogger(logutil.NewTeeLogger(currentMemoryLogger, logutil.NewConsoleLogger())) // one go function runs the worker, closes 'done' when done go func() { log.Infof("Starting worker...") wrk.Run() close(currentDone) }() return currentDone, nil }
func TestRebuildShard(t *testing.T) { ctx := context.Background() cells := []string{"test_cell"} logger := logutil.NewMemoryLogger() // Set up topology. ts := zktopo.NewTestServer(t, cells) si, err := GetOrCreateShard(ctx, ts, testKeyspace, testShard) if err != nil { t.Fatalf("GetOrCreateShard: %v", err) } si.Cells = append(si.Cells, cells[0]) if err := topo.UpdateShard(ctx, ts, si); err != nil { t.Fatalf("UpdateShard: %v", err) } masterInfo := addTablet(ctx, t, ts, 1, cells[0], topo.TYPE_MASTER) replicaInfo := addTablet(ctx, t, ts, 2, cells[0], topo.TYPE_REPLICA) // Do an initial rebuild. if _, err := RebuildShard(ctx, logger, ts, testKeyspace, testShard, cells, time.Minute); err != nil { t.Fatalf("RebuildShard: %v", err) } // Check initial state. ep, _, err := ts.GetEndPoints(ctx, cells[0], testKeyspace, testShard, topo.TYPE_MASTER) if err != nil { t.Fatalf("GetEndPoints: %v", err) } if got, want := len(ep.Entries), 1; got != want { t.Fatalf("len(Entries) = %v, want %v", got, want) } ep, _, err = ts.GetEndPoints(ctx, cells[0], testKeyspace, testShard, topo.TYPE_REPLICA) if err != nil { t.Fatalf("GetEndPoints: %v", err) } if got, want := len(ep.Entries), 1; got != want { t.Fatalf("len(Entries) = %v, want %v", got, want) } // Make a change. masterInfo.Type = topo.TYPE_SPARE if err := topo.UpdateTablet(ctx, ts, masterInfo); err != nil { t.Fatalf("UpdateTablet: %v", err) } if _, err := RebuildShard(ctx, logger, ts, testKeyspace, testShard, cells, time.Minute); err != nil { t.Fatalf("RebuildShard: %v", err) } // Make another change. replicaInfo.Type = topo.TYPE_SPARE if err := topo.UpdateTablet(ctx, ts, replicaInfo); err != nil { t.Fatalf("UpdateTablet: %v", err) } if _, err := RebuildShard(ctx, logger, ts, testKeyspace, testShard, cells, time.Minute); err != nil { t.Fatalf("RebuildShard: %v", err) } // Check that the rebuild picked up both changes. if _, _, err := ts.GetEndPoints(ctx, cells[0], testKeyspace, testShard, topo.TYPE_MASTER); err == nil || !strings.Contains(err.Error(), "node doesn't exist") { t.Errorf("first change wasn't picked up by second rebuild") } if _, _, err := ts.GetEndPoints(ctx, cells[0], testKeyspace, testShard, topo.TYPE_REPLICA); err == nil || !strings.Contains(err.Error(), "node doesn't exist") { t.Errorf("second change was overwritten by first rebuild finishing late") } }
func TestFixShardReplication(t *testing.T) { cell := "cell1" keyspace := "ks1" shard := "shard1" ctx := context.Background() ts := zktestserver.New(t, []string{cell}) // Create a tablet. alias := &topodatapb.TabletAlias{ Cell: cell, Uid: 1, } tablet := &topodatapb.Tablet{ Keyspace: keyspace, Shard: shard, Alias: alias, } if err := ts.CreateTablet(ctx, tablet); err != nil { t.Fatalf("CreateTablet failed: %v", err) } // Make sure it's in the ShardReplication. sri, err := ts.GetShardReplication(ctx, cell, keyspace, shard) if err != nil { t.Fatalf("GetShardReplication failed: %v", err) } if len(sri.Nodes) != 1 || !proto.Equal(sri.Nodes[0].TabletAlias, alias) { t.Errorf("Missing or wrong alias in ShardReplication: %v", sri) } // Run FixShardReplication, should do nothing. logger := logutil.NewMemoryLogger() if err := topo.FixShardReplication(ctx, ts, logger, cell, keyspace, shard); err != nil { t.Errorf("FixShardReplication failed: %v", err) } sri, err = ts.GetShardReplication(ctx, cell, keyspace, shard) if err != nil { t.Fatalf("GetShardReplication failed: %v", err) } if len(sri.Nodes) != 1 || !proto.Equal(sri.Nodes[0].TabletAlias, alias) { t.Errorf("Missing or wrong alias in ShardReplication: %v", sri) } if !strings.Contains(logger.String(), "All entries in replication graph are valid") { t.Errorf("Wrong log: %v", logger.String()) } // Add a bogus entries: a non-existing tablet. if err := ts.UpdateShardReplicationFields(ctx, cell, keyspace, shard, func(sr *topodatapb.ShardReplication) error { sr.Nodes = append(sr.Nodes, &topodatapb.ShardReplication_Node{ TabletAlias: &topodatapb.TabletAlias{ Cell: cell, Uid: 2, }, }) return nil }); err != nil { t.Fatalf("UpdateShardReplicationFields failed: %v", err) } logger.Clear() if err := topo.FixShardReplication(ctx, ts, logger, cell, keyspace, shard); err != nil { t.Errorf("FixShardReplication failed: %v", err) } sri, err = ts.GetShardReplication(ctx, cell, keyspace, shard) if err != nil { t.Fatalf("GetShardReplication failed: %v", err) } if len(sri.Nodes) != 1 || !proto.Equal(sri.Nodes[0].TabletAlias, alias) { t.Errorf("Missing or wrong alias in ShardReplication: %v", sri) } if !strings.Contains(logger.String(), "but does not exist, removing it") { t.Errorf("Wrong log: %v", logger.String()) } // Add a bogus entries: a tablet with wrong keyspace. if err := ts.CreateTablet(ctx, &topodatapb.Tablet{ Keyspace: "other" + keyspace, Shard: shard, Alias: &topodatapb.TabletAlias{ Cell: cell, Uid: 3, }, }); err != nil { t.Fatalf("CreateTablet failed: %v", err) } if err := ts.UpdateShardReplicationFields(ctx, cell, keyspace, shard, func(sr *topodatapb.ShardReplication) error { sr.Nodes = append(sr.Nodes, &topodatapb.ShardReplication_Node{ TabletAlias: &topodatapb.TabletAlias{ Cell: cell, Uid: 3, }, }) return nil }); err != nil { t.Fatalf("UpdateShardReplicationFields failed: %v", err) } logger.Clear() if err := topo.FixShardReplication(ctx, ts, logger, cell, keyspace, shard); err != nil { t.Errorf("FixShardReplication failed: %v", err) } sri, err = ts.GetShardReplication(ctx, cell, keyspace, shard) if err != nil { t.Fatalf("GetShardReplication failed: %v", err) } if len(sri.Nodes) != 1 || !proto.Equal(sri.Nodes[0].TabletAlias, alias) { t.Errorf("Missing or wrong alias in ShardReplication: %v", sri) } if !strings.Contains(logger.String(), "but has wrong keyspace/shard/cell, removing it") { t.Errorf("Wrong log: %v", logger.String()) } }
// setAndStartWorker will set the current worker. // We always log to both memory logger (for display on the web) and // console logger (for records / display of command line worker). func (wi *Instance) setAndStartWorker(wrk Worker, wr *wrangler.Wrangler) (chan struct{}, error) { wi.currentWorkerMutex.Lock() defer wi.currentWorkerMutex.Unlock() if wi.currentContext != nil { return nil, vterrors.FromError(vtrpcpb.ErrorCode_TRANSIENT_ERROR, fmt.Errorf("A worker job is already in progress: %v", wi.currentWorker)) } if wi.currentWorker != nil { // During the grace period, we answer with a retryable error. const gracePeriod = 1 * time.Minute gracePeriodEnd := time.Now().Add(gracePeriod) if wi.lastRunStopTime.Before(gracePeriodEnd) { return nil, vterrors.FromError(vtrpcpb.ErrorCode_TRANSIENT_ERROR, fmt.Errorf("A worker job was recently stopped (%f seconds ago): %v", time.Now().Sub(wi.lastRunStopTime).Seconds(), wi.currentWorker)) } // QUERY_NOT_SERVED = FailedPrecondition => manual resolution required. return nil, vterrors.FromError(vtrpcpb.ErrorCode_QUERY_NOT_SERVED, fmt.Errorf("The worker job was stopped %.1f minutes ago, but not reset. You have to reset it manually. Job: %v", time.Now().Sub(wi.lastRunStopTime).Minutes(), wi.currentWorker)) } wi.currentWorker = wrk wi.currentMemoryLogger = logutil.NewMemoryLogger() wi.currentContext, wi.currentCancelFunc = context.WithCancel(wi.backgroundContext) wi.lastRunError = nil wi.lastRunStopTime = time.Unix(0, 0) done := make(chan struct{}) wranglerLogger := wr.Logger() if wr == wi.wr { // If it's the default wrangler, do not reuse its logger because it may have been set before. // Resuing it would result into an endless recursion. wranglerLogger = logutil.NewConsoleLogger() } wr.SetLogger(logutil.NewTeeLogger(wi.currentMemoryLogger, wranglerLogger)) // one go function runs the worker, changes state when done go func() { log.Infof("Starting worker...") var err error // Catch all panics and always save the execution state at the end. defer func() { // The recovery code is a copy of servenv.HandlePanic(). if x := recover(); x != nil { log.Errorf("uncaught vtworker panic: %v\n%s", x, tb.Stack(4)) err = fmt.Errorf("uncaught vtworker panic: %v", x) } wi.currentWorkerMutex.Lock() wi.currentContext = nil wi.currentCancelFunc = nil wi.lastRunError = err wi.lastRunStopTime = time.Now() wi.currentWorkerMutex.Unlock() close(done) }() // run will take a long time err = wrk.Run(wi.currentContext) }() return done, nil }
func initAPI(ctx context.Context, ts topo.Server, actions *ActionRepository, realtimeStats *realtimeStats) { tabletHealthCache := newTabletHealthCache(ts) tmClient := tmclient.NewTabletManagerClient() // Cells handleCollection("cells", func(r *http.Request) (interface{}, error) { if getItemPath(r.URL.Path) != "" { return nil, errors.New("cells can only be listed, not retrieved") } return ts.GetKnownCells(ctx) }) // Keyspaces handleCollection("keyspaces", func(r *http.Request) (interface{}, error) { keyspace := getItemPath(r.URL.Path) switch r.Method { case "GET": // List all keyspaces. if keyspace == "" { return ts.GetKeyspaces(ctx) } // Get the keyspace record. k, err := ts.GetKeyspace(ctx, keyspace) // Pass the embedded proto directly or jsonpb will panic. return k.Keyspace, err // Perform an action on a keyspace. case "POST": if keyspace == "" { return nil, errors.New("A POST request needs a keyspace in the URL") } if err := r.ParseForm(); err != nil { return nil, err } action := r.FormValue("action") if action == "" { return nil, errors.New("A POST request must specify action") } return actions.ApplyKeyspaceAction(ctx, action, keyspace, r), nil default: return nil, fmt.Errorf("unsupported HTTP method: %v", r.Method) } }) // Shards handleCollection("shards", func(r *http.Request) (interface{}, error) { shardPath := getItemPath(r.URL.Path) if !strings.Contains(shardPath, "/") { return nil, fmt.Errorf("invalid shard path: %q", shardPath) } parts := strings.SplitN(shardPath, "/", 2) keyspace := parts[0] shard := parts[1] // List the shards in a keyspace. if shard == "" { return ts.GetShardNames(ctx, keyspace) } // Perform an action on a shard. if r.Method == "POST" { if err := r.ParseForm(); err != nil { return nil, err } action := r.FormValue("action") if action == "" { return nil, errors.New("must specify action") } return actions.ApplyShardAction(ctx, action, keyspace, shard, r), nil } // Get the shard record. si, err := ts.GetShard(ctx, keyspace, shard) // Pass the embedded proto directly or jsonpb will panic. return si.Shard, err }) // SrvKeyspace handleCollection("srv_keyspace", func(r *http.Request) (interface{}, error) { keyspacePath := getItemPath(r.URL.Path) parts := strings.SplitN(keyspacePath, "/", 2) // Request was incorrectly formatted. if len(parts) != 2 { return nil, fmt.Errorf("invalid srvkeyspace path: %q expected path: /srv_keyspace/<cell>/<keyspace>", keyspacePath) } cell := parts[0] keyspace := parts[1] if cell == "local" { if *localCell == "" { return nil, fmt.Errorf("local cell requested, but not specified. Please set with -cell flag") } cell = *localCell } // If a keyspace is provided then return the specified srvkeyspace. if keyspace != "" { srvKeyspace, err := ts.GetSrvKeyspace(ctx, cell, keyspace) if err != nil { return nil, fmt.Errorf("Can't get server keyspace: %v", err) } return srvKeyspace, nil } // Else return the srvKeyspace from all keyspaces. srvKeyspaces := make(map[string]interface{}) keyspaceNamesList, err := ts.GetSrvKeyspaceNames(ctx, cell) if err != nil { return nil, fmt.Errorf("can't get list of SrvKeyspaceNames for cell %q: GetSrvKeyspaceNames returned: %v", cell, err) } for _, keyspaceName := range keyspaceNamesList { err := addSrvkeyspace(ctx, ts, cell, keyspaceName, srvKeyspaces) if err != nil { return nil, err } } return srvKeyspaces, nil }) // Tablets handleCollection("tablets", func(r *http.Request) (interface{}, error) { tabletPath := getItemPath(r.URL.Path) // List tablets based on query params. if tabletPath == "" { if err := r.ParseForm(); err != nil { return nil, err } shardRef := r.FormValue("shard") cell := r.FormValue("cell") if shardRef != "" { // Look up by keyspace/shard, and optionally cell. keyspace, shard, err := topoproto.ParseKeyspaceShard(shardRef) if err != nil { return nil, err } if cell != "" { result, err := ts.FindAllTabletAliasesInShardByCell(ctx, keyspace, shard, []string{cell}) if err != nil && err != topo.ErrPartialResult { return result, err } return result, nil } result, err := ts.FindAllTabletAliasesInShard(ctx, keyspace, shard) if err != nil && err != topo.ErrPartialResult { return result, err } return result, nil } // Get all tablets in a cell. if cell == "" { return nil, errors.New("cell param required") } return ts.GetTabletsByCell(ctx, cell) } // Get tablet health. if parts := strings.Split(tabletPath, "/"); len(parts) == 2 && parts[1] == "health" { tabletAlias, err := topoproto.ParseTabletAlias(parts[0]) if err != nil { return nil, err } return tabletHealthCache.Get(ctx, tabletAlias) } tabletAlias, err := topoproto.ParseTabletAlias(tabletPath) if err != nil { return nil, err } // Perform an action on a tablet. if r.Method == "POST" { if err := r.ParseForm(); err != nil { return nil, err } action := r.FormValue("action") if action == "" { return nil, errors.New("must specify action") } return actions.ApplyTabletAction(ctx, action, tabletAlias, r), nil } // Get the tablet record. t, err := ts.GetTablet(ctx, tabletAlias) // Pass the embedded proto directly or jsonpb will panic. return t.Tablet, err }) // Healthcheck real time status per (cell, keyspace, tablet type, metric). handleCollection("tablet_statuses", func(r *http.Request) (interface{}, error) { targetPath := getItemPath(r.URL.Path) // Get the heatmap data based on query parameters. if targetPath == "" { if err := r.ParseForm(); err != nil { return nil, err } keyspace := r.FormValue("keyspace") cell := r.FormValue("cell") tabletType := r.FormValue("type") _, err := topoproto.ParseTabletType(tabletType) // Excluding the case where parse fails because all tabletTypes was chosen. if err != nil && tabletType != "all" { return nil, fmt.Errorf("invalid tablet type: %v ", err) } metric := r.FormValue("metric") // Setting default values if none was specified in the query params. if keyspace == "" { keyspace = "all" } if cell == "" { cell = "all" } if tabletType == "" { tabletType = "all" } if metric == "" { metric = "health" } if realtimeStats == nil { return nil, fmt.Errorf("realtimeStats not initialized") } heatmap, err := realtimeStats.heatmapData(keyspace, cell, tabletType, metric) if err != nil { return nil, fmt.Errorf("couldn't get heatmap data: %v", err) } return heatmap, nil } return nil, fmt.Errorf("invalid target path: %q expected path: ?keyspace=<keyspace>&cell=<cell>&type=<type>&metric=<metric>", targetPath) }) handleCollection("tablet_health", func(r *http.Request) (interface{}, error) { tabletPath := getItemPath(r.URL.Path) parts := strings.SplitN(tabletPath, "/", 2) // Request was incorrectly formatted. if len(parts) != 2 { return nil, fmt.Errorf("invalid tablet_health path: %q expected path: /tablet_health/<cell>/<uid>", tabletPath) } if realtimeStats == nil { return nil, fmt.Errorf("realtimeStats not initialized") } cell := parts[0] uidStr := parts[1] uid, err := topoproto.ParseUID(uidStr) if err != nil { return nil, fmt.Errorf("incorrect uid: %v", err) } tabletAlias := topodatapb.TabletAlias{ Cell: cell, Uid: uid, } tabletStat, err := realtimeStats.tabletStats(&tabletAlias) if err != nil { return nil, fmt.Errorf("could not get tabletStats: %v", err) } return tabletStat, nil }) handleCollection("topology_info", func(r *http.Request) (interface{}, error) { targetPath := getItemPath(r.URL.Path) // Retrieving topology information (keyspaces, cells, and types) based on query params. if targetPath == "" { if err := r.ParseForm(); err != nil { return nil, err } keyspace := r.FormValue("keyspace") cell := r.FormValue("cell") // Setting default values if none was specified in the query params. if keyspace == "" { keyspace = "all" } if cell == "" { cell = "all" } if realtimeStats == nil { return nil, fmt.Errorf("realtimeStats not initialized") } return realtimeStats.topologyInfo(keyspace, cell), nil } return nil, fmt.Errorf("invalid target path: %q expected path: ?keyspace=<keyspace>&cell=<cell>", targetPath) }) // Vtctl Command http.HandleFunc(apiPrefix+"vtctl/", func(w http.ResponseWriter, r *http.Request) { if err := acl.CheckAccessHTTP(r, acl.ADMIN); err != nil { httpErrorf(w, r, "Access denied") return } var args []string resp := struct { Error string Output string }{} if err := unmarshalRequest(r, &args); err != nil { httpErrorf(w, r, "can't unmarshal request: %v", err) return } logstream := logutil.NewMemoryLogger() wr := wrangler.New(logstream, ts, tmClient) // TODO(enisoc): Context for run command should be request-scoped. err := vtctl.RunCommand(ctx, wr, args) if err != nil { resp.Error = err.Error() } resp.Output = logstream.String() data, err := json.MarshalIndent(resp, "", " ") if err != nil { httpErrorf(w, r, "json error: %v", err) return } w.Header().Set("Content-Type", jsonContentType) w.Write(data) }) // Schema Change http.HandleFunc(apiPrefix+"schema/apply", func(w http.ResponseWriter, r *http.Request) { if err := acl.CheckAccessHTTP(r, acl.ADMIN); err != nil { httpErrorf(w, r, "Access denied") return } req := struct { Keyspace, SQL string SlaveTimeoutSeconds int }{} if err := unmarshalRequest(r, &req); err != nil { httpErrorf(w, r, "can't unmarshal request: %v", err) return } if req.SlaveTimeoutSeconds <= 0 { req.SlaveTimeoutSeconds = 10 } logger := logutil.NewCallbackLogger(func(ev *logutilpb.Event) { w.Write([]byte(logutil.EventString(ev))) }) wr := wrangler.New(logger, ts, tmClient) executor := schemamanager.NewTabletExecutor( wr, time.Duration(req.SlaveTimeoutSeconds)*time.Second) schemamanager.Run(ctx, schemamanager.NewUIController(req.SQL, req.Keyspace, w), executor) }) }
func TestRebuildShardRace(t *testing.T) { cells := []string{"test_cell"} logger := logutil.NewMemoryLogger() timeout := 10 * time.Second interrupted := make(chan struct{}) // Set up topology. ts := zktopo.NewTestServer(t, cells) f := faketopo.New(t, logger, ts, cells) defer f.TearDown() keyspace := faketopo.TestKeyspace shard := faketopo.TestShard master := f.AddTablet(1, "test_cell", topo.TYPE_MASTER, nil) f.AddTablet(2, "test_cell", topo.TYPE_REPLICA, master) // Do an initial rebuild. if _, err := RebuildShard(logger, f.Topo, keyspace, shard, cells, timeout, interrupted); err != nil { t.Fatalf("RebuildShard: %v", err) } // Check initial state. ep, err := ts.GetEndPoints(cells[0], keyspace, shard, topo.TYPE_MASTER) if err != nil { t.Fatalf("GetEndPoints: %v", err) } if got, want := len(ep.Entries), 1; got != want { t.Fatalf("len(Entries) = %v, want %v", got, want) } ep, err = ts.GetEndPoints(cells[0], keyspace, shard, topo.TYPE_REPLICA) if err != nil { t.Fatalf("GetEndPoints: %v", err) } if got, want := len(ep.Entries), 1; got != want { t.Fatalf("len(Entries) = %v, want %v", got, want) } // Install a hook that hands out locks out of order to simulate a race. trigger := make(chan struct{}) stalled := make(chan struct{}) done := make(chan struct{}) wait := make(chan bool, 2) wait <- true // first guy waits for trigger wait <- false // second guy doesn't wait ts.HookLockSrvShardForAction = func() { if <-wait { close(stalled) <-trigger } } // Make a change and start a rebuild that will stall when it tries to get // the SrvShard lock. masterInfo := f.GetTablet(1) masterInfo.Type = topo.TYPE_SPARE if err := topo.UpdateTablet(ts, masterInfo); err != nil { t.Fatalf("UpdateTablet: %v", err) } go func() { if _, err := RebuildShard(logger, f.Topo, keyspace, shard, cells, timeout, interrupted); err != nil { t.Fatalf("RebuildShard: %v", err) } close(done) }() // Wait for first rebuild to stall. <-stalled // While the first rebuild is stalled, make another change and start a rebuild // that doesn't stall. replicaInfo := f.GetTablet(2) replicaInfo.Type = topo.TYPE_SPARE if err := topo.UpdateTablet(ts, replicaInfo); err != nil { t.Fatalf("UpdateTablet: %v", err) } if _, err := RebuildShard(logger, f.Topo, keyspace, shard, cells, timeout, interrupted); err != nil { t.Fatalf("RebuildShard: %v", err) } // Now that the second rebuild is done, un-stall the first rebuild and wait // for it to finish. close(trigger) <-done // Check that the rebuild picked up both changes. if _, err := ts.GetEndPoints(cells[0], keyspace, shard, topo.TYPE_MASTER); err == nil || !strings.Contains(err.Error(), "node doesn't exist") { t.Errorf("first change wasn't picked up by second rebuild") } if _, err := ts.GetEndPoints(cells[0], keyspace, shard, topo.TYPE_REPLICA); err == nil || !strings.Contains(err.Error(), "node doesn't exist") { t.Errorf("second change was overwritten by first rebuild finishing late") } }
func TestRebuildShardRace(t *testing.T) { ctx := context.Background() cells := []string{"test_cell"} logger := logutil.NewMemoryLogger() // Set up topology. ts := zktopo.NewTestServer(t, cells) si, err := GetOrCreateShard(ctx, ts, testKeyspace, testShard) if err != nil { t.Fatalf("GetOrCreateShard: %v", err) } si.Cells = append(si.Cells, cells[0]) if err := topo.UpdateShard(ctx, ts, si); err != nil { t.Fatalf("UpdateShard: %v", err) } masterInfo := addTablet(ctx, t, ts, 1, cells[0], topo.TYPE_MASTER) replicaInfo := addTablet(ctx, t, ts, 2, cells[0], topo.TYPE_REPLICA) // Do an initial rebuild. if _, err := RebuildShard(ctx, logger, ts, testKeyspace, testShard, cells, time.Minute); err != nil { t.Fatalf("RebuildShard: %v", err) } // Check initial state. ep, _, err := ts.GetEndPoints(ctx, cells[0], testKeyspace, testShard, topo.TYPE_MASTER) if err != nil { t.Fatalf("GetEndPoints: %v", err) } if got, want := len(ep.Entries), 1; got != want { t.Fatalf("len(Entries) = %v, want %v", got, want) } ep, _, err = ts.GetEndPoints(ctx, cells[0], testKeyspace, testShard, topo.TYPE_REPLICA) if err != nil { t.Fatalf("GetEndPoints: %v", err) } if got, want := len(ep.Entries), 1; got != want { t.Fatalf("len(Entries) = %v, want %v", got, want) } // Install a hook that hands out locks out of order to simulate a race. trigger := make(chan struct{}) stalled := make(chan struct{}) done := make(chan struct{}) wait := make(chan bool, 2) wait <- true // first guy waits for trigger wait <- false // second guy doesn't wait ts.HookLockSrvShardForAction = func() { if <-wait { close(stalled) <-trigger } } // Make a change and start a rebuild that will stall when it // tries to get the SrvShard lock. masterInfo.Type = topo.TYPE_SPARE if err := topo.UpdateTablet(ctx, ts, masterInfo); err != nil { t.Fatalf("UpdateTablet: %v", err) } go func() { if _, err := RebuildShard(ctx, logger, ts, testKeyspace, testShard, cells, time.Minute); err != nil { t.Fatalf("RebuildShard: %v", err) } close(done) }() // Wait for first rebuild to stall. <-stalled // While the first rebuild is stalled, make another change and start a rebuild // that doesn't stall. replicaInfo.Type = topo.TYPE_SPARE if err := topo.UpdateTablet(ctx, ts, replicaInfo); err != nil { t.Fatalf("UpdateTablet: %v", err) } if _, err := RebuildShard(ctx, logger, ts, testKeyspace, testShard, cells, time.Minute); err != nil { t.Fatalf("RebuildShard: %v", err) } // Now that the second rebuild is done, un-stall the first rebuild and wait // for it to finish. close(trigger) <-done // Check that the rebuild picked up both changes. if _, _, err := ts.GetEndPoints(ctx, cells[0], testKeyspace, testShard, topo.TYPE_MASTER); err == nil || !strings.Contains(err.Error(), "node doesn't exist") { t.Errorf("first change wasn't picked up by second rebuild") } if _, _, err := ts.GetEndPoints(ctx, cells[0], testKeyspace, testShard, topo.TYPE_REPLICA); err == nil || !strings.Contains(err.Error(), "node doesn't exist") { t.Errorf("second change was overwritten by first rebuild finishing late") } }
// Instantiate is part of the workflow.Factory interface. func (f *WorkflowFactory) Instantiate(w *workflowpb.Workflow) (workflow.Workflow, error) { return &Workflow{ logger: logutil.NewMemoryLogger(), }, nil }