Beispiel #1
0
// synchronizeReplication phase:
// 1 - ask the subset slave to stop replication
// 2 - sleep for 5 seconds
// 3 - ask the superset slave to stop replication
// Note this is not 100% correct, but good enough for now
func (worker *SQLDiffWorker) synchronizeReplication() error {
	worker.setState(SQLDiffSynchronizeReplication)

	// stop replication on subset slave
	worker.wr.Logger().Infof("Stopping replication on subset slave %v", worker.subset.alias)
	subsetTablet, err := worker.wr.TopoServer().GetTablet(worker.subset.alias)
	if err != nil {
		return err
	}
	ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
	if err := worker.wr.TabletManagerClient().StopSlave(ctx, subsetTablet); err != nil {
		return fmt.Errorf("Cannot stop slave %v: %v", worker.subset.alias, err)
	}
	cancel()
	if worker.CheckInterrupted() {
		return topo.ErrInterrupted
	}

	// change the cleaner actions from ChangeSlaveType(rdonly)
	// to StartSlave() + ChangeSlaveType(spare)
	wrangler.RecordStartSlaveAction(worker.cleaner, subsetTablet, 30*time.Second)
	action, err := wrangler.FindChangeSlaveTypeActionByTarget(worker.cleaner, worker.subset.alias)
	if err != nil {
		return fmt.Errorf("cannot find ChangeSlaveType action for %v: %v", worker.subset.alias, err)
	}
	action.TabletType = topo.TYPE_SPARE

	// sleep for a few seconds
	time.Sleep(5 * time.Second)
	if worker.CheckInterrupted() {
		return topo.ErrInterrupted
	}

	// stop replication on superset slave
	worker.wr.Logger().Infof("Stopping replication on superset slave %v", worker.superset.alias)
	supersetTablet, err := worker.wr.TopoServer().GetTablet(worker.superset.alias)
	if err != nil {
		return err
	}
	ctx, cancel = context.WithTimeout(context.TODO(), 30*time.Second)
	if err := worker.wr.TabletManagerClient().StopSlave(ctx, supersetTablet); err != nil {
		return fmt.Errorf("Cannot stop slave %v: %v", worker.superset.alias, err)
	}
	cancel()

	// change the cleaner actions from ChangeSlaveType(rdonly)
	// to StartSlave() + ChangeSlaveType(spare)
	wrangler.RecordStartSlaveAction(worker.cleaner, supersetTablet, 30*time.Second)
	action, err = wrangler.FindChangeSlaveTypeActionByTarget(worker.cleaner, worker.superset.alias)
	if err != nil {
		return fmt.Errorf("cannot find ChangeSlaveType action for %v: %v", worker.superset.alias, err)
	}
	action.TabletType = topo.TYPE_SPARE

	return nil
}
// findTargets phase:
// - find one rdonly in the source shard
// - mark it as 'checker' pointing back to us
// - get the aliases of all the targets
func (vscw *VerticalSplitCloneWorker) findTargets() error {
	vscw.setState(stateVSCFindTargets)

	// find an appropriate endpoint in the source shard
	var err error
	vscw.sourceAlias, err = findChecker(vscw.wr, vscw.cleaner, vscw.cell, vscw.sourceKeyspace, "0")
	if err != nil {
		return fmt.Errorf("cannot find checker for %v/%v/0: %v", vscw.cell, vscw.sourceKeyspace, err)
	}
	vscw.wr.Logger().Infof("Using tablet %v as the source", vscw.sourceAlias)

	// get the tablet info for it
	vscw.sourceTablet, err = vscw.wr.TopoServer().GetTablet(vscw.sourceAlias)
	if err != nil {
		return fmt.Errorf("cannot read tablet %v: %v", vscw.sourceTablet, err)
	}

	// stop replication on it
	ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
	if err := vscw.wr.TabletManagerClient().StopSlave(ctx, vscw.sourceTablet); err != nil {
		return fmt.Errorf("cannot stop replication on tablet %v", vscw.sourceAlias)
	}
	cancel()

	wrangler.RecordStartSlaveAction(vscw.cleaner, vscw.sourceTablet, 30*time.Second)
	action, err := wrangler.FindChangeSlaveTypeActionByTarget(vscw.cleaner, vscw.sourceAlias)
	if err != nil {
		return fmt.Errorf("cannot find ChangeSlaveType action for %v: %v", vscw.sourceAlias, err)
	}
	action.TabletType = topo.TYPE_SPARE

	return vscw.findMasterTargets()
}
Beispiel #3
0
func main() {
	flag.Parse()
	client, err := zrpc.Dial("tcp://127.0.0.1:1337")
	if err != nil {
		log.Fatal(err)
	}

	start := time.Now()
	runs := 10000

	for i := 0; i < runs; i++ {
		// Create the request and response
		req := &pb.ReverseRequest{
			NormalString: proto.String("teststring"),
		}
		resp := &pb.ReverseResponse{}

		// Create the context and pass request timeout and service name
		ctx, _ := context.WithTimeout(context.Background(), time.Second*1)
		ctx = zrpc.NewServiceNameContext(ctx, "reverseservice")
		if err := client.Call(ctx, req, resp); err != nil {
			log.Println("error:", err)
		} else {
			log.Println("received:", resp)
		}

		log.Printf("%d goroutines", runtime.NumGoroutine())
		// time.Sleep(time.Millisecond * 500)
	}

	totalTime := time.Since(start)

	log.Printf("Performed %d reqs in %s (avg %s)", runs, totalTime, totalTime/time.Duration(runs))
}
Beispiel #4
0
// runSqlCommands will send the sql commands to the remote tablet.
func runSqlCommands(wr *wrangler.Wrangler, ti *topo.TabletInfo, commands []string, abort chan struct{}, disableBinLogs bool) error {
	for _, command := range commands {
		command, err := fillStringTemplate(command, map[string]string{"DatabaseName": ti.DbName()})
		if err != nil {
			return fmt.Errorf("fillStringTemplate failed: %v", err)
		}

		ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
		_, err = wr.TabletManagerClient().ExecuteFetch(ctx, ti, command, 0, false, disableBinLogs)
		if err != nil {
			return err
		}
		cancel()

		// check on abort
		select {
		case <-abort:
			return nil
		default:
			break
		}
	}

	return nil
}
Beispiel #5
0
func main() {
	const numRequests = 3
	// create a channel to capture our results
	forecasts := make(chan *openweathermap.Forecast, numRequests)

	// create our channel of requests
	requests := make(chan par.RequestFunc, numRequests)
	requests <- findById(4288809, forecasts) // Covington, VA
	requests <- findById(4288809, forecasts)
	requests <- findById(4140963, forecasts) // DC
	close(requests)                          // important to remember to close the channel

	// resolver := par.Requests(requests).WithRedundancy(1)
	resolver := par.Requests(requests).WithConcurrency(numRequests)
	ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
	// ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
	err := resolver.DoWithContext(ctx)
	cancel()
	ok(err)

	// the forecasts channel now contains all our forecasts
	close(forecasts)
	cities := map[string]*openweathermap.Forecast{}
	for forecast := range forecasts {
		cities[forecast.Name] = forecast
	}
}
Beispiel #6
0
// BasicMngrHandler can be use in "manager" ServeHTTP after initital required interface like
// authmodel.UserManager, authmodel.GroupManager, conf.Configurator...etc
func BasicMngrHandler(authCtx *AuthContext, rw http.ResponseWriter, req *http.Request, cond *Condition, fn HandleFunc) {
	var cancel context.CancelFunc
	authCtx.Context, cancel = context.WithTimeout(context.Background(), HandleTimeout)
	defer cancel()

	authCtx.req = req
	token := strings.TrimPrefix(req.Header.Get("Authorization"), "Bearer ")
	authCtx.saveToken(token)
	authCtx.saveId(mux.Vars(req)["user_id"])

	authCtx.Notifications = DEFAULT_NOTIFICATOR
	authCtx.Logs = DEFAULT_LOGGER

	rw.Header().Set("Content-Type", "application/json; charset=utf-8")
	if cond.RequiredPri != nil || cond.Owner {
		_, err := authCtx.ValidCurrentUser(cond.Owner, cond.RequiredPri)
		if err != nil {
			JSONError(rw, err.Error(), http.StatusForbidden)
			return
		}
	}

	status, err := fn(authCtx, rw, req)
	if err != nil {
		authCtx.Logs.Errorf("HTTP %d: %q", status, err)
		JSONError(rw, err.Error(), status)
	}
}
Beispiel #7
0
func (h handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
	ctx := context.Background()
	ctx, cancel := context.WithTimeout(ctx, RequestTimeout)
	defer cancel()
	ctx = reqid.NewContext(ctx, reqid.New())
	log.Printf(ctx, "http request: remote=%q method=%q url=%q",
		r.RemoteAddr, r.Method, r.URL)
	h(ctx, w, r)
}
Beispiel #8
0
func (tm *TabletManager) TabletExternallyReparented(ctx context.Context, args *gorpcproto.TabletExternallyReparentedArgs, reply *rpc.Unused) error {
	// TODO(alainjobart) we should forward the RPC deadline from
	// the original gorpc call. Until we support that, use a
	// reasonable hard-coded value.
	ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
	defer cancel()
	return tm.agent.RpcWrapLock(ctx, actionnode.TABLET_ACTION_EXTERNALLY_REPARENTED, args, reply, false, func() error {
		return tm.agent.TabletExternallyReparented(ctx, args.ExternalID)
	})
}
Beispiel #9
0
// applySqlShard applies a given SQL change on a given tablet alias. It allows executing arbitrary
// SQL statements, but doesn't return any results, so it's only useful for SQL statements
// that would be run for their effects (e.g., CREATE).
// It works by applying the SQL statement on the shard's master tablet with replication turned on.
// Thus it should be used only for changes that can be applies on a live instance without causing issues;
// it shouldn't be used for anything that will require a pivot.
// The SQL statement string is expected to have {{.DatabaseName}} in place of the actual db name.
func (wr *Wrangler) applySqlShard(tabletInfo *topo.TabletInfo, change string) error {
	filledChange, err := fillStringTemplate(change, map[string]string{"DatabaseName": tabletInfo.DbName()})
	if err != nil {
		return fmt.Errorf("fillStringTemplate failed: %v", err)
	}
	ctx, cancel := context.WithTimeout(wr.ctx, 30*time.Second)
	defer cancel()
	// Need to make sure that we enable binlog, since we're only applying the statement on masters.
	_, err = wr.tmc.ExecuteFetch(ctx, tabletInfo, filledChange, 0, false, false)
	return err
}
Beispiel #10
0
// New creates a new Wrangler object.
//
// actionTimeout: how long should we wait for an action to complete?
// - if using wrangler for just one action, this is set properly
//   upon wrangler creation.
// - if re-using wrangler multiple times, call ResetActionTimeout before
//   every action. Do not use this too much, just for corner cases.
//   It is just much easier to create a new Wrangler object per action.
//
// lockTimeout: how long should we wait for the initial lock to start
// a complex action?  This is distinct from actionTimeout because most
// of the time, we want to immediately know that our action will
// fail. However, automated action will need some time to arbitrate
// the locks.
func New(logger logutil.Logger, ts topo.Server, actionTimeout, lockTimeout time.Duration) *Wrangler {
	ctx, cancel := context.WithTimeout(context.Background(), actionTimeout)
	return &Wrangler{
		logger:      logger,
		ts:          ts,
		tmc:         tmclient.NewTabletManagerClient(),
		ctx:         ctx,
		cancel:      cancel,
		deadline:    time.Now().Add(actionTimeout),
		lockTimeout: lockTimeout,
	}
}
Beispiel #11
0
// handleSearch handles URLs like /search?q=golang&timeout=1s by forwarding the
// query to google.Search. If the query param includes timeout, the search is
// canceled after that duration elapses.
func handleSearch(w http.ResponseWriter, req *http.Request) {
	// ctx is the Context for this handler. Calling cancel closes the
	// ctx.Done channel, which is the cancellation signal for requests
	// started by this handler.
	var (
		ctx    context.Context
		cancel context.CancelFunc
	)
	timeout, err := time.ParseDuration(req.FormValue("timeout"))
	if err == nil {
		// The request has a timeout, so create a context that is
		// canceled automatically when the timeout expires.
		ctx, cancel = context.WithTimeout(context.Background(), timeout)
	} else {
		ctx, cancel = context.WithCancel(context.Background())
	}
	defer cancel() // Cancel ctx as soon as handleSearch returns.

	// Check the search query.
	query := req.FormValue("q")
	if query == "" {
		http.Error(w, "no query", http.StatusBadRequest)
		return
	}

	// Store the user IP in ctx for use by code in other packages.
	userIP, err := userip.FromRequest(req)
	if err != nil {
		http.Error(w, err.Error(), http.StatusBadRequest)
		return
	}
	ctx = userip.NewContext(ctx, userIP)

	// Run the Google search and print the results.
	start := time.Now()
	results, err := google.Search(ctx, query)
	elapsed := time.Since(start)
	if err != nil {
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}
	if err := resultsTemplate.Execute(w, struct {
		Results          google.Results
		Timeout, Elapsed time.Duration
	}{
		Results: results,
		Timeout: timeout,
		Elapsed: elapsed,
	}); err != nil {
		log.Print(err)
		return
	}
}
Beispiel #12
0
func (s *BaseSuite) SetUpTest(c *C, clientAddr string) {
	if s.client == nil {
		log.Println("setting up client at:", clientAddr)
		s.client, _ = Dial(clientAddr)
	}

	s.req = &pb.ReverseRequest{
		NormalString: proto.String("test"),
	}
	s.resp = &pb.ReverseResponse{}
	ctx, _ := context.WithTimeout(context.Background(), time.Second*3)
	s.ctx = NewServiceNameContext(ctx, "reverseservice")
}
Beispiel #13
0
func ExampleWithTimeout() {
	// Pass a context with a timeout to tell a blocking function that it
	// should abandon its work after the timeout elapses.
	ctx, _ := context.WithTimeout(context.Background(), 100*time.Millisecond)
	select {
	case <-time.After(200 * time.Millisecond):
		fmt.Println("overslept")
	case <-ctx.Done():
		fmt.Println(ctx.Err()) // prints "context deadline exceeded"
	}
	// Output:
	// context deadline exceeded
}
Beispiel #14
0
// findTargets phase:
// - find one rdonly in the source shard
// - mark it as 'checker' pointing back to us
// - get the aliases of all the targets
func (scw *SplitCloneWorker) findTargets() error {
	scw.setState(stateSCFindTargets)
	var err error

	// find an appropriate endpoint in the source shards
	scw.sourceAliases = make([]topo.TabletAlias, len(scw.sourceShards))
	for i, si := range scw.sourceShards {
		scw.sourceAliases[i], err = findChecker(scw.wr, scw.cleaner, scw.cell, si.Keyspace(), si.ShardName())
		if err != nil {
			return fmt.Errorf("cannot find checker for %v/%v/%v: %v", scw.cell, si.Keyspace(), si.ShardName(), err)
		}
		scw.wr.Logger().Infof("Using tablet %v as source for %v/%v", scw.sourceAliases[i], si.Keyspace(), si.ShardName())
	}

	// get the tablet info for them, and stop their replication
	scw.sourceTablets = make([]*topo.TabletInfo, len(scw.sourceAliases))
	for i, alias := range scw.sourceAliases {
		scw.sourceTablets[i], err = scw.wr.TopoServer().GetTablet(alias)
		if err != nil {
			return fmt.Errorf("cannot read tablet %v: %v", alias, err)
		}

		ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
		if err := scw.wr.TabletManagerClient().StopSlave(ctx, scw.sourceTablets[i]); err != nil {
			return fmt.Errorf("cannot stop replication on tablet %v", alias)
		}
		cancel()

		wrangler.RecordStartSlaveAction(scw.cleaner, scw.sourceTablets[i], 30*time.Second)
		action, err := wrangler.FindChangeSlaveTypeActionByTarget(scw.cleaner, alias)
		if err != nil {
			return fmt.Errorf("cannot find ChangeSlaveType action for %v: %v", alias, err)
		}
		action.TabletType = topo.TYPE_SPARE
	}

	return scw.findMasterTargets()
}
Beispiel #15
0
// executeFetchLoop loops over the provided insertChannel
// and sends the commands to the provided tablet.
func executeFetchLoop(wr *wrangler.Wrangler, ti *topo.TabletInfo, insertChannel chan string, abort chan struct{}, disableBinLogs bool) error {
	for {
		select {
		case cmd, ok := <-insertChannel:
			if !ok {
				// no more to read, we're done
				return nil
			}
			cmd = "INSERT INTO `" + ti.DbName() + "`." + cmd
			ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
			_, err := wr.TabletManagerClient().ExecuteFetch(ctx, ti, cmd, 0, false, disableBinLogs)
			if err != nil {
				return fmt.Errorf("ExecuteFetch failed: %v", err)
			}
			cancel()
		case <-abort:
			// FIXME(alainjobart): note this select case
			// could be starved here, and we might miss
			// the abort in some corner cases.
			return nil
		}
	}
}
// copy phase:
//	- copy the data from source tablets to destination masters (wtih replication on)
// Assumes that the schema has already been created on each destination tablet
// (probably from vtctl's CopySchemaShard)
func (vscw *VerticalSplitCloneWorker) copy() error {
	vscw.setState(stateVSCCopy)

	// get source schema
	sourceSchemaDefinition, err := vscw.wr.GetSchema(vscw.sourceAlias, vscw.tables, nil, true)
	if err != nil {
		return fmt.Errorf("cannot get schema from source %v: %v", vscw.sourceAlias, err)
	}
	if len(sourceSchemaDefinition.TableDefinitions) == 0 {
		return fmt.Errorf("no tables matching the table filter")
	}
	vscw.wr.Logger().Infof("Source tablet has %v tables to copy", len(sourceSchemaDefinition.TableDefinitions))
	vscw.mu.Lock()
	vscw.tableStatus = make([]*tableStatus, len(sourceSchemaDefinition.TableDefinitions))
	for i, td := range sourceSchemaDefinition.TableDefinitions {
		vscw.tableStatus[i] = &tableStatus{
			name:     td.Name,
			rowCount: td.RowCount,
		}
	}
	vscw.startTime = time.Now()
	vscw.mu.Unlock()

	// Count rows
	for i, td := range sourceSchemaDefinition.TableDefinitions {
		vscw.tableStatus[i].mu.Lock()
		if td.Type == myproto.TABLE_BASE_TABLE {
			vscw.tableStatus[i].rowCount = td.RowCount
		} else {
			vscw.tableStatus[i].isView = true
		}
		vscw.tableStatus[i].mu.Unlock()
	}

	// In parallel, setup the channels to send SQL data chunks to for each destination tablet.
	//
	// mu protects the abort channel for closing, and firstError
	mu := sync.Mutex{}
	abort := make(chan struct{})
	var firstError error

	processError := func(format string, args ...interface{}) {
		vscw.wr.Logger().Errorf(format, args...)
		mu.Lock()
		if abort != nil {
			close(abort)
			abort = nil
			firstError = fmt.Errorf(format, args...)
		}
		mu.Unlock()
	}

	// since we're writing only to masters, we need to enable bin logs so that replication happens
	disableBinLogs := false

	insertChannels := make([]chan string, len(vscw.destinationAliases))
	destinationWaitGroup := sync.WaitGroup{}
	for i, tabletAlias := range vscw.destinationAliases {
		// we create one channel per destination tablet.  It
		// is sized to have a buffer of a maximum of
		// destinationWriterCount * 2 items, to hopefully
		// always have data. We then have
		// destinationWriterCount go routines reading from it.
		insertChannels[i] = make(chan string, vscw.destinationWriterCount*2)

		go func(ti *topo.TabletInfo, insertChannel chan string) {
			for j := 0; j < vscw.destinationWriterCount; j++ {
				destinationWaitGroup.Add(1)
				go func() {
					defer destinationWaitGroup.Done()

					if err := executeFetchLoop(vscw.wr, ti, insertChannel, abort, disableBinLogs); err != nil {
						processError("executeFetchLoop failed: %v", err)
					}
				}()
			}
		}(vscw.destinationTablets[tabletAlias], insertChannels[i])
	}

	// Now for each table, read data chunks and send them to all
	// insertChannels
	sourceWaitGroup := sync.WaitGroup{}
	sema := sync2.NewSemaphore(vscw.sourceReaderCount, 0)
	for tableIndex, td := range sourceSchemaDefinition.TableDefinitions {
		if td.Type == myproto.TABLE_VIEW {
			continue
		}

		chunks, err := findChunks(vscw.wr, vscw.sourceTablet, td, vscw.minTableSizeForSplit, vscw.sourceReaderCount)
		if err != nil {
			return err
		}
		vscw.tableStatus[tableIndex].setThreadCount(len(chunks) - 1)

		for chunkIndex := 0; chunkIndex < len(chunks)-1; chunkIndex++ {
			sourceWaitGroup.Add(1)
			go func(td *myproto.TableDefinition, tableIndex, chunkIndex int) {
				defer sourceWaitGroup.Done()

				sema.Acquire()
				defer sema.Release()

				vscw.tableStatus[tableIndex].threadStarted()

				// build the query, and start the streaming
				selectSQL := buildSQLFromChunks(vscw.wr, td, chunks, chunkIndex, vscw.sourceAlias.String())
				qrr, err := NewQueryResultReaderForTablet(vscw.wr.TopoServer(), vscw.sourceAlias, selectSQL)
				if err != nil {
					processError("NewQueryResultReaderForTablet failed: %v", err)
					return
				}
				defer qrr.Close()

				// process the data
				if err := vscw.processData(td, tableIndex, qrr, insertChannels, vscw.destinationPackCount, abort); err != nil {
					processError("QueryResultReader failed: %v", err)
				}
				vscw.tableStatus[tableIndex].threadDone()
			}(td, tableIndex, chunkIndex)
		}
	}
	sourceWaitGroup.Wait()

	for _, c := range insertChannels {
		close(c)
	}
	destinationWaitGroup.Wait()
	if firstError != nil {
		return firstError
	}

	// then create and populate the blp_checkpoint table
	if vscw.strategy.PopulateBlpCheckpoint {
		// get the current position from the source
		ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
		status, err := vscw.wr.TabletManagerClient().SlaveStatus(ctx, vscw.sourceTablet)
		if err != nil {
			return err
		}
		cancel()

		queries := make([]string, 0, 4)
		queries = append(queries, binlogplayer.CreateBlpCheckpoint()...)
		flags := ""
		if vscw.strategy.DontStartBinlogPlayer {
			flags = binlogplayer.BLP_FLAG_DONT_START
		}
		queries = append(queries, binlogplayer.PopulateBlpCheckpoint(0, status.Position, time.Now().Unix(), flags))
		for _, tabletAlias := range vscw.destinationAliases {
			destinationWaitGroup.Add(1)
			go func(ti *topo.TabletInfo) {
				defer destinationWaitGroup.Done()
				vscw.wr.Logger().Infof("Making and populating blp_checkpoint table on tablet %v", ti.Alias)
				if err := runSqlCommands(vscw.wr, ti, queries, abort, disableBinLogs); err != nil {
					processError("blp_checkpoint queries failed on tablet %v: %v", ti.Alias, err)
				}
			}(vscw.destinationTablets[tabletAlias])
		}
		destinationWaitGroup.Wait()
		if firstError != nil {
			return firstError
		}
	}

	// Now we're done with data copy, update the shard's source info.
	if vscw.strategy.SkipSetSourceShards {
		vscw.wr.Logger().Infof("Skipping setting SourceShard on destination shard.")
	} else {
		vscw.wr.Logger().Infof("Setting SourceShard on shard %v/%v", vscw.destinationKeyspace, vscw.destinationShard)
		if err := vscw.wr.SetSourceShards(vscw.destinationKeyspace, vscw.destinationShard, []topo.TabletAlias{vscw.sourceAlias}, vscw.tables); err != nil {
			return fmt.Errorf("Failed to set source shards: %v", err)
		}
	}

	// And force a schema reload on all destination tablets.
	// The master tablet will end up starting filtered replication
	// at this point.
	for _, tabletAlias := range vscw.reloadAliases {
		destinationWaitGroup.Add(1)
		go func(ti *topo.TabletInfo) {
			defer destinationWaitGroup.Done()
			vscw.wr.Logger().Infof("Reloading schema on tablet %v", ti.Alias)
			ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
			if err := vscw.wr.TabletManagerClient().ReloadSchema(ctx, ti); err != nil {
				processError("ReloadSchema failed on tablet %v: %v", ti.Alias, err)
			}
			cancel()
		}(vscw.reloadTablets[tabletAlias])
	}
	destinationWaitGroup.Wait()
	return firstError
}
Beispiel #17
0
// copy phase:
//	- copy the data from source tablets to destination masters (wtih replication on)
// Assumes that the schema has already been created on each destination tablet
// (probably from vtctl's CopySchemaShard)
func (scw *SplitCloneWorker) copy() error {
	scw.setState(stateSCCopy)

	// get source schema from the first shard
	// TODO(alainjobart): for now, we assume the schema is compatible
	// on all source shards. Furthermore, we estimate the number of rows
	// in each source shard for each table to be about the same
	// (rowCount is used to estimate an ETA)
	sourceSchemaDefinition, err := scw.wr.GetSchema(scw.sourceAliases[0], nil, scw.excludeTables, true)
	if err != nil {
		return fmt.Errorf("cannot get schema from source %v: %v", scw.sourceAliases[0], err)
	}
	if len(sourceSchemaDefinition.TableDefinitions) == 0 {
		return fmt.Errorf("no tables matching the table filter in tablet %v", scw.sourceAliases[0])
	}
	scw.wr.Logger().Infof("Source tablet 0 has %v tables to copy", len(sourceSchemaDefinition.TableDefinitions))
	scw.mu.Lock()
	scw.tableStatus = make([]*tableStatus, len(sourceSchemaDefinition.TableDefinitions))
	for i, td := range sourceSchemaDefinition.TableDefinitions {
		scw.tableStatus[i] = &tableStatus{
			name:     td.Name,
			rowCount: td.RowCount * uint64(len(scw.sourceAliases)),
		}
	}
	scw.startTime = time.Now()
	scw.mu.Unlock()

	// Find the column index for the sharding columns in all the databases, and count rows
	columnIndexes := make([]int, len(sourceSchemaDefinition.TableDefinitions))
	for tableIndex, td := range sourceSchemaDefinition.TableDefinitions {
		if td.Type == myproto.TABLE_BASE_TABLE {
			// find the column to split on
			columnIndexes[tableIndex] = -1
			for i, name := range td.Columns {
				if name == scw.keyspaceInfo.ShardingColumnName {
					columnIndexes[tableIndex] = i
					break
				}
			}
			if columnIndexes[tableIndex] == -1 {
				return fmt.Errorf("table %v doesn't have a column named '%v'", td.Name, scw.keyspaceInfo.ShardingColumnName)
			}

			scw.tableStatus[tableIndex].mu.Lock()
			scw.tableStatus[tableIndex].rowCount = td.RowCount
			scw.tableStatus[tableIndex].mu.Unlock()
		} else {
			scw.tableStatus[tableIndex].mu.Lock()
			scw.tableStatus[tableIndex].isView = true
			scw.tableStatus[tableIndex].mu.Unlock()
		}
	}

	// In parallel, setup the channels to send SQL data chunks to for each destination tablet:
	//
	// mu protects the abort channel for closing, and firstError
	mu := sync.Mutex{}
	abort := make(chan struct{})
	var firstError error

	processError := func(format string, args ...interface{}) {
		scw.wr.Logger().Errorf(format, args...)
		mu.Lock()
		if abort != nil {
			close(abort)
			abort = nil
			firstError = fmt.Errorf(format, args...)
		}
		mu.Unlock()
	}

	// since we're writing only to masters, we need to enable bin logs so that replication happens
	disableBinLogs := false

	insertChannels := make([][]chan string, len(scw.destinationShards))
	destinationWaitGroup := sync.WaitGroup{}
	for shardIndex, _ := range scw.destinationShards {
		insertChannels[shardIndex] = make([]chan string, len(scw.destinationAliases[shardIndex]))
		for i, tabletAlias := range scw.destinationAliases[shardIndex] {
			// we create one channel per destination tablet.  It
			// is sized to have a buffer of a maximum of
			// destinationWriterCount * 2 items, to hopefully
			// always have data. We then have
			// destinationWriterCount go routines reading from it.
			insertChannels[shardIndex][i] = make(chan string, scw.destinationWriterCount*2)

			go func(ti *topo.TabletInfo, insertChannel chan string) {
				for j := 0; j < scw.destinationWriterCount; j++ {
					destinationWaitGroup.Add(1)
					go func() {
						defer destinationWaitGroup.Done()
						if err := executeFetchLoop(scw.wr, ti, insertChannel, abort, disableBinLogs); err != nil {
							processError("executeFetchLoop failed: %v", err)
						}
					}()
				}
			}(scw.destinationTablets[shardIndex][tabletAlias], insertChannels[shardIndex][i])
		}
	}

	// Now for each table, read data chunks and send them to all
	// insertChannels
	sourceWaitGroup := sync.WaitGroup{}
	for shardIndex, _ := range scw.sourceShards {
		sema := sync2.NewSemaphore(scw.sourceReaderCount, 0)
		for tableIndex, td := range sourceSchemaDefinition.TableDefinitions {
			if td.Type == myproto.TABLE_VIEW {
				continue
			}

			rowSplitter := NewRowSplitter(scw.destinationShards, scw.keyspaceInfo.ShardingColumnType, columnIndexes[tableIndex])

			chunks, err := findChunks(scw.wr, scw.sourceTablets[shardIndex], td, scw.minTableSizeForSplit, scw.sourceReaderCount)
			if err != nil {
				return err
			}
			scw.tableStatus[tableIndex].setThreadCount(len(chunks) - 1)

			for chunkIndex := 0; chunkIndex < len(chunks)-1; chunkIndex++ {
				sourceWaitGroup.Add(1)
				go func(td *myproto.TableDefinition, tableIndex, chunkIndex int) {
					defer sourceWaitGroup.Done()

					sema.Acquire()
					defer sema.Release()

					scw.tableStatus[tableIndex].threadStarted()

					// build the query, and start the streaming
					selectSQL := buildSQLFromChunks(scw.wr, td, chunks, chunkIndex, scw.sourceAliases[shardIndex].String())
					qrr, err := NewQueryResultReaderForTablet(scw.wr.TopoServer(), scw.sourceAliases[shardIndex], selectSQL)
					if err != nil {
						processError("NewQueryResultReaderForTablet failed: %v", err)
						return
					}
					defer qrr.Close()

					// process the data
					if err := scw.processData(td, tableIndex, qrr, rowSplitter, insertChannels, scw.destinationPackCount, abort); err != nil {
						processError("processData failed: %v", err)
					}
					scw.tableStatus[tableIndex].threadDone()
				}(td, tableIndex, chunkIndex)
			}
		}
	}
	sourceWaitGroup.Wait()

	for shardIndex, _ := range scw.destinationShards {
		for _, c := range insertChannels[shardIndex] {
			close(c)
		}
	}
	destinationWaitGroup.Wait()
	if firstError != nil {
		return firstError
	}

	// then create and populate the blp_checkpoint table
	if scw.strategy.PopulateBlpCheckpoint {
		queries := make([]string, 0, 4)
		queries = append(queries, binlogplayer.CreateBlpCheckpoint()...)
		flags := ""
		if scw.strategy.DontStartBinlogPlayer {
			flags = binlogplayer.BLP_FLAG_DONT_START
		}

		// get the current position from the sources
		for shardIndex, _ := range scw.sourceShards {
			ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
			status, err := scw.wr.TabletManagerClient().SlaveStatus(ctx, scw.sourceTablets[shardIndex])
			if err != nil {
				return err
			}
			cancel()

			queries = append(queries, binlogplayer.PopulateBlpCheckpoint(0, status.Position, time.Now().Unix(), flags))
		}

		for shardIndex, _ := range scw.destinationShards {
			for _, tabletAlias := range scw.destinationAliases[shardIndex] {
				destinationWaitGroup.Add(1)
				go func(ti *topo.TabletInfo) {
					defer destinationWaitGroup.Done()
					scw.wr.Logger().Infof("Making and populating blp_checkpoint table on tablet %v", ti.Alias)
					if err := runSqlCommands(scw.wr, ti, queries, abort, disableBinLogs); err != nil {
						processError("blp_checkpoint queries failed on tablet %v: %v", ti.Alias, err)
					}
				}(scw.destinationTablets[shardIndex][tabletAlias])
			}
		}
		destinationWaitGroup.Wait()
		if firstError != nil {
			return firstError
		}
	}

	// Now we're done with data copy, update the shard's source info.
	// TODO(alainjobart) this is a superset, some shards may not
	// overlap, have to deal with this better (for N -> M splits
	// where both N>1 and M>1)
	if scw.strategy.SkipSetSourceShards {
		scw.wr.Logger().Infof("Skipping setting SourceShard on destination shards.")
	} else {
		for _, si := range scw.destinationShards {
			scw.wr.Logger().Infof("Setting SourceShard on shard %v/%v", si.Keyspace(), si.ShardName())
			if err := scw.wr.SetSourceShards(si.Keyspace(), si.ShardName(), scw.sourceAliases, nil); err != nil {
				return fmt.Errorf("Failed to set source shards: %v", err)
			}
		}
	}

	// And force a schema reload on all destination tablets.
	// The master tablet will end up starting filtered replication
	// at this point.
	for shardIndex, _ := range scw.destinationShards {
		for _, tabletAlias := range scw.reloadAliases[shardIndex] {
			destinationWaitGroup.Add(1)
			go func(ti *topo.TabletInfo) {
				defer destinationWaitGroup.Done()
				scw.wr.Logger().Infof("Reloading schema on tablet %v", ti.Alias)
				ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
				if err := scw.wr.TabletManagerClient().ReloadSchema(ctx, ti); err != nil {
					processError("ReloadSchema failed on tablet %v: %v", ti.Alias, err)
				}
				cancel()
			}(scw.reloadTablets[shardIndex][tabletAlias])
		}
	}
	destinationWaitGroup.Wait()
	return firstError
}
Beispiel #18
0
// ResetActionTimeout should be used before every action on a wrangler
// object that is going to be re-used:
// - vtctl will not call this, as it does one action.
// - vtctld will not call this, as it creates a new Wrangler every time.
// However, some actions may need to do a cleanup phase where the
// original Context may have expired or been cancelled, but still do
// the action.  Wrangler cleaner module is one of these, or the vt
// worker in some corner cases,
func (wr *Wrangler) ResetActionTimeout(actionTimeout time.Duration) {
	wr.ctx, wr.cancel = context.WithTimeout(context.Background(), actionTimeout)
	wr.deadline = time.Now().Add(actionTimeout)
}
Beispiel #19
0
// ForkWithTimeout returns a ForkContext which adds a d timeout to the supplied
// context.
func ForkWithTimeout(d time.Duration) ForkContext {
	return func(c context.Context) (context.Context, context.CancelFunc) {
		return context.WithTimeout(c, d)
	}
}
Beispiel #20
0
// findChunks returns an array of chunks to use for splitting up a table
// into multiple data chunks. It only works for tables with a primary key
// (and the primary key first column is an integer type).
// The array will always look like:
// "", "value1", "value2", ""
// A non-split tablet will just return:
// "", ""
func findChunks(wr *wrangler.Wrangler, ti *topo.TabletInfo, td *myproto.TableDefinition, minTableSizeForSplit uint64, sourceReaderCount int) ([]string, error) {
	result := []string{"", ""}

	// eliminate a few cases we don't split tables for
	if len(td.PrimaryKeyColumns) == 0 {
		// no primary key, what can we do?
		return result, nil
	}
	if td.DataLength < minTableSizeForSplit {
		// table is too small to split up
		return result, nil
	}

	// get the min and max of the leading column of the primary key
	query := fmt.Sprintf("SELECT MIN(%v), MAX(%v) FROM %v.%v", td.PrimaryKeyColumns[0], td.PrimaryKeyColumns[0], ti.DbName(), td.Name)
	ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
	qr, err := wr.TabletManagerClient().ExecuteFetch(ctx, ti, query, 1, true, false)
	if err != nil {
		wr.Logger().Infof("Not splitting table %v into multiple chunks: %v", td.Name, err)
		return result, nil
	}
	cancel()
	if len(qr.Rows) != 1 {
		wr.Logger().Infof("Not splitting table %v into multiple chunks, cannot get min and max", td.Name)
		return result, nil
	}
	if qr.Rows[0][0].IsNull() || qr.Rows[0][1].IsNull() {
		wr.Logger().Infof("Not splitting table %v into multiple chunks, min or max is NULL: %v %v", td.Name, qr.Rows[0][0], qr.Rows[0][1])
		return result, nil
	}
	switch qr.Fields[0].Type {
	case mproto.VT_TINY, mproto.VT_SHORT, mproto.VT_LONG, mproto.VT_LONGLONG, mproto.VT_INT24:
		minNumeric := sqltypes.MakeNumeric(qr.Rows[0][0].Raw())
		maxNumeric := sqltypes.MakeNumeric(qr.Rows[0][1].Raw())
		if qr.Rows[0][0].Raw()[0] == '-' {
			// signed values, use int64
			min, err := minNumeric.ParseInt64()
			if err != nil {
				wr.Logger().Infof("Not splitting table %v into multiple chunks, cannot convert min: %v %v", td.Name, minNumeric, err)
				return result, nil
			}
			max, err := maxNumeric.ParseInt64()
			if err != nil {
				wr.Logger().Infof("Not splitting table %v into multiple chunks, cannot convert max: %v %v", td.Name, maxNumeric, err)
				return result, nil
			}
			interval := (max - min) / int64(sourceReaderCount)
			if interval == 0 {
				wr.Logger().Infof("Not splitting table %v into multiple chunks, interval=0: %v %v", td.Name, max, min)
				return result, nil
			}

			result = make([]string, sourceReaderCount+1)
			result[0] = ""
			result[sourceReaderCount] = ""
			for i := int64(1); i < int64(sourceReaderCount); i++ {
				result[i] = fmt.Sprintf("%v", min+interval*i)
			}
			return result, nil
		}

		// unsigned values, use uint64
		min, err := minNumeric.ParseUint64()
		if err != nil {
			wr.Logger().Infof("Not splitting table %v into multiple chunks, cannot convert min: %v %v", td.Name, minNumeric, err)
			return result, nil
		}
		max, err := maxNumeric.ParseUint64()
		if err != nil {
			wr.Logger().Infof("Not splitting table %v into multiple chunks, cannot convert max: %v %v", td.Name, maxNumeric, err)
			return result, nil
		}
		interval := (max - min) / uint64(sourceReaderCount)
		if interval == 0 {
			wr.Logger().Infof("Not splitting table %v into multiple chunks, interval=0: %v %v", td.Name, max, min)
			return result, nil
		}

		result = make([]string, sourceReaderCount+1)
		result[0] = ""
		result[sourceReaderCount] = ""
		for i := uint64(1); i < uint64(sourceReaderCount); i++ {
			result[i] = fmt.Sprintf("%v", min+interval*i)
		}
		return result, nil

	case mproto.VT_FLOAT, mproto.VT_DOUBLE:
		min, err := strconv.ParseFloat(qr.Rows[0][0].String(), 64)
		if err != nil {
			wr.Logger().Infof("Not splitting table %v into multiple chunks, cannot convert min: %v %v", td.Name, qr.Rows[0][0], err)
			return result, nil
		}
		max, err := strconv.ParseFloat(qr.Rows[0][1].String(), 64)
		if err != nil {
			wr.Logger().Infof("Not splitting table %v into multiple chunks, cannot convert max: %v %v", td.Name, qr.Rows[0][1].String(), err)
			return result, nil
		}
		interval := (max - min) / float64(sourceReaderCount)
		if interval == 0 {
			wr.Logger().Infof("Not splitting table %v into multiple chunks, interval=0: %v %v", td.Name, max, min)
			return result, nil
		}

		result = make([]string, sourceReaderCount+1)
		result[0] = ""
		result[sourceReaderCount] = ""
		for i := 1; i < sourceReaderCount; i++ {
			result[i] = fmt.Sprintf("%v", min+interval*float64(i))
		}
		return result, nil
	}

	wr.Logger().Infof("Not splitting table %v into multiple chunks, primary key not numeric", td.Name)
	return result, nil
}
func (vsdw *VerticalSplitDiffWorker) synchronizeReplication() error {
	vsdw.setState(stateVSDSynchronizeReplication)

	masterInfo, err := vsdw.wr.TopoServer().GetTablet(vsdw.shardInfo.MasterAlias)
	if err != nil {
		return fmt.Errorf("synchronizeReplication: cannot get Tablet record for master %v: %v", vsdw.shardInfo.MasterAlias, err)
	}

	// 1 - stop the master binlog replication, get its current position
	vsdw.wr.Logger().Infof("Stopping master binlog replication on %v", vsdw.shardInfo.MasterAlias)
	ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
	blpPositionList, err := vsdw.wr.TabletManagerClient().StopBlp(ctx, masterInfo)
	if err != nil {
		return fmt.Errorf("StopBlp on master %v failed: %v", vsdw.shardInfo.MasterAlias, err)
	}
	cancel()
	wrangler.RecordStartBlpAction(vsdw.cleaner, masterInfo, 30*time.Second)

	// 2 - stop the source 'checker' at a binlog position
	//     higher than the destination master
	stopPositionList := blproto.BlpPositionList{
		Entries: make([]blproto.BlpPosition, 1),
	}
	ss := vsdw.shardInfo.SourceShards[0]
	// find where we should be stopping
	pos, err := blpPositionList.FindBlpPositionById(ss.Uid)
	if err != nil {
		return fmt.Errorf("no binlog position on the master for Uid %v", ss.Uid)
	}

	// stop replication
	vsdw.wr.Logger().Infof("Stopping slave %v at a minimum of %v", vsdw.sourceAlias, pos.Position)
	sourceTablet, err := vsdw.wr.TopoServer().GetTablet(vsdw.sourceAlias)
	if err != nil {
		return err
	}
	stoppedAt, err := vsdw.wr.TabletManagerClient().StopSlaveMinimum(context.TODO(), sourceTablet, pos.Position, 30*time.Second)
	if err != nil {
		return fmt.Errorf("cannot stop slave %v at right binlog position %v: %v", vsdw.sourceAlias, pos.Position, err)
	}
	stopPositionList.Entries[0].Uid = ss.Uid
	stopPositionList.Entries[0].Position = stoppedAt.Position

	// change the cleaner actions from ChangeSlaveType(rdonly)
	// to StartSlave() + ChangeSlaveType(spare)
	wrangler.RecordStartSlaveAction(vsdw.cleaner, sourceTablet, 30*time.Second)
	action, err := wrangler.FindChangeSlaveTypeActionByTarget(vsdw.cleaner, vsdw.sourceAlias)
	if err != nil {
		return fmt.Errorf("cannot find ChangeSlaveType action for %v: %v", vsdw.sourceAlias, err)
	}
	action.TabletType = topo.TYPE_SPARE

	// 3 - ask the master of the destination shard to resume filtered
	//     replication up to the new list of positions
	vsdw.wr.Logger().Infof("Restarting master %v until it catches up to %v", vsdw.shardInfo.MasterAlias, stopPositionList)
	masterPos, err := vsdw.wr.TabletManagerClient().RunBlpUntil(context.TODO(), masterInfo, &stopPositionList, 30*time.Second)
	if err != nil {
		return fmt.Errorf("RunBlpUntil on %v until %v failed: %v", vsdw.shardInfo.MasterAlias, stopPositionList, err)
	}

	// 4 - wait until the destination checker is equal or passed
	//     that master binlog position, and stop its replication.
	vsdw.wr.Logger().Infof("Waiting for destination checker %v to catch up to %v", vsdw.destinationAlias, masterPos)
	destinationTablet, err := vsdw.wr.TopoServer().GetTablet(vsdw.destinationAlias)
	if err != nil {
		return err
	}
	_, err = vsdw.wr.TabletManagerClient().StopSlaveMinimum(context.TODO(), destinationTablet, masterPos, 30*time.Second)
	if err != nil {
		return fmt.Errorf("StopSlaveMinimum on %v at %v failed: %v", vsdw.destinationAlias, masterPos, err)
	}
	wrangler.RecordStartSlaveAction(vsdw.cleaner, destinationTablet, 30*time.Second)
	action, err = wrangler.FindChangeSlaveTypeActionByTarget(vsdw.cleaner, vsdw.destinationAlias)
	if err != nil {
		return fmt.Errorf("cannot find ChangeSlaveType action for %v: %v", vsdw.destinationAlias, err)
	}
	action.TabletType = topo.TYPE_SPARE

	// 5 - restart filtered replication on destination master
	vsdw.wr.Logger().Infof("Restarting filtered replication on master %v", vsdw.shardInfo.MasterAlias)
	ctx, cancel = context.WithTimeout(context.TODO(), 30*time.Second)
	err = vsdw.wr.TabletManagerClient().StartBlp(ctx, masterInfo)
	if err := vsdw.cleaner.RemoveActionByName(wrangler.StartBlpActionName, vsdw.shardInfo.MasterAlias.String()); err != nil {
		vsdw.wr.Logger().Warningf("Cannot find cleaning action %v/%v: %v", wrangler.StartBlpActionName, vsdw.shardInfo.MasterAlias.String(), err)
	}
	cancel()
	if err != nil {
		return fmt.Errorf("StartBlp on %v failed: %v", vsdw.shardInfo.MasterAlias, err)
	}

	return nil
}
Beispiel #22
0
// CleanUp is part of CleanerAction interface.
func (sba StartBlpAction) CleanUp(wr *Wrangler) error {
	ctx, _ := context.WithTimeout(context.TODO(), sba.WaitTime)
	return wr.TabletManagerClient().StartBlp(ctx, sba.TabletInfo)
}