func (sdw *SplitDiffWorker) diff(ctx context.Context) error { sdw.SetState(WorkerStateDiff) sdw.wr.Logger().Infof("Gathering schema information...") sdw.sourceSchemaDefinitions = make([]*myproto.SchemaDefinition, len(sdw.sourceAliases)) wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} wg.Add(1) go func() { var err error shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) sdw.destinationSchemaDefinition, err = sdw.wr.GetSchema( shortCtx, sdw.destinationAlias, nil /* tables */, sdw.excludeTables, false /* includeViews */) cancel() rec.RecordError(err) sdw.wr.Logger().Infof("Got schema from destination %v", sdw.destinationAlias) wg.Done() }() for i, sourceAlias := range sdw.sourceAliases { wg.Add(1) go func(i int, sourceAlias topo.TabletAlias) { var err error shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) sdw.sourceSchemaDefinitions[i], err = sdw.wr.GetSchema( shortCtx, sourceAlias, nil /* tables */, sdw.excludeTables, false /* includeViews */) cancel() rec.RecordError(err) sdw.wr.Logger().Infof("Got schema from source[%v] %v", i, sourceAlias) wg.Done() }(i, sourceAlias) } wg.Wait() if rec.HasErrors() { return rec.Error() } // TODO(alainjobart) Checking against each source may be // overkill, if all sources have the same schema? sdw.wr.Logger().Infof("Diffing the schema...") rec = concurrency.AllErrorRecorder{} for i, sourceSchemaDefinition := range sdw.sourceSchemaDefinitions { sourceName := fmt.Sprintf("source[%v]", i) myproto.DiffSchema("destination", sdw.destinationSchemaDefinition, sourceName, sourceSchemaDefinition, &rec) } if rec.HasErrors() { sdw.wr.Logger().Warningf("Different schemas: %v", rec.Error().Error()) } else { sdw.wr.Logger().Infof("Schema match, good.") } // run the diffs, 8 at a time sdw.wr.Logger().Infof("Running the diffs...") sem := sync2.NewSemaphore(8, 0) for _, tableDefinition := range sdw.destinationSchemaDefinition.TableDefinitions { wg.Add(1) go func(tableDefinition *myproto.TableDefinition) { defer wg.Done() sem.Acquire() defer sem.Release() sdw.wr.Logger().Infof("Starting the diff on table %v", tableDefinition.Name) if len(sdw.sourceAliases) != 1 { err := fmt.Errorf("Don't support more than one source for table yet: %v", tableDefinition.Name) rec.RecordError(err) sdw.wr.Logger().Errorf(err.Error()) return } overlap, err := key.KeyRangesOverlap3(sdw.shardInfo.KeyRange, sdw.shardInfo.SourceShards[0].KeyRange) if err != nil { newErr := fmt.Errorf("Source shard doesn't overlap with destination????: %v", err) rec.RecordError(newErr) sdw.wr.Logger().Errorf(newErr.Error()) return } sourceQueryResultReader, err := TableScanByKeyRange(ctx, sdw.wr.Logger(), sdw.wr.TopoServer(), sdw.sourceAliases[0], tableDefinition, overlap, key.ProtoToKeyspaceIdType(sdw.keyspaceInfo.ShardingColumnType)) if err != nil { newErr := fmt.Errorf("TableScanByKeyRange(source) failed: %v", err) rec.RecordError(newErr) sdw.wr.Logger().Errorf(newErr.Error()) return } defer sourceQueryResultReader.Close() destinationQueryResultReader, err := TableScanByKeyRange(ctx, sdw.wr.Logger(), sdw.wr.TopoServer(), sdw.destinationAlias, tableDefinition, nil, key.ProtoToKeyspaceIdType(sdw.keyspaceInfo.ShardingColumnType)) if err != nil { newErr := fmt.Errorf("TableScanByKeyRange(destination) failed: %v", err) rec.RecordError(newErr) sdw.wr.Logger().Errorf(newErr.Error()) return } defer destinationQueryResultReader.Close() differ, err := NewRowDiffer(sourceQueryResultReader, destinationQueryResultReader, tableDefinition) if err != nil { newErr := fmt.Errorf("NewRowDiffer() failed: %v", err) rec.RecordError(newErr) sdw.wr.Logger().Errorf(newErr.Error()) return } report, err := differ.Go(sdw.wr.Logger()) if err != nil { newErr := fmt.Errorf("Differ.Go failed: %v", err.Error()) rec.RecordError(newErr) sdw.wr.Logger().Errorf(newErr.Error()) } else { if report.HasDifferences() { err := fmt.Errorf("Table %v has differences: %v", tableDefinition.Name, report.String()) rec.RecordError(err) sdw.wr.Logger().Warningf(err.Error()) } else { sdw.wr.Logger().Infof("Table %v checks out (%v rows processed, %v qps)", tableDefinition.Name, report.processedRows, report.processingQPS) } } }(tableDefinition) } wg.Wait() return rec.Error() }
// Iteration is a single iteration for the player: get the current status, // try to play, and plays until interrupted, or until an error occurs. func (bpc *BinlogPlayerController) Iteration() (err error) { defer func() { if x := recover(); x != nil { log.Errorf("%v: caught panic: %v", bpc, x) err = fmt.Errorf("panic: %v", x) } }() // Apply any special settings necessary for playback of binlogs. // We do it on every iteration to be sure, in case MySQL was restarted. if err := bpc.mysqld.EnableBinlogPlayback(); err != nil { // We failed to apply the required settings, so we shouldn't keep going. return err } // create the db connection, connect it vtClient := binlogplayer.NewDbClient(bpc.dbConfig) if err := vtClient.Connect(); err != nil { return fmt.Errorf("can't connect to database: %v", err) } defer vtClient.Close() // Read the start position startPosition, flags, err := binlogplayer.ReadStartPosition(vtClient, bpc.sourceShard.Uid) if err != nil { return fmt.Errorf("can't read startPosition: %v", err) } // if we shouldn't start, we just error out and try again later if strings.Index(flags, binlogplayer.BlpFlagDontStart) != -1 { return fmt.Errorf("not starting because flag '%v' is set", binlogplayer.BlpFlagDontStart) } // Find the server list for the source shard in our cell addrs, _, err := bpc.ts.GetEndPoints(bpc.ctx, bpc.cell, bpc.sourceShard.Keyspace, bpc.sourceShard.Shard, topo.TYPE_REPLICA) if err != nil { // If this calls fails because the context was canceled, // we need to return nil. select { case <-bpc.ctx.Done(): if bpc.ctx.Err() == context.Canceled { return nil } default: } return fmt.Errorf("can't find any source tablet for %v %v %v: %v", bpc.cell, bpc.sourceShard.String(), topo.TYPE_REPLICA, err) } if len(addrs.Entries) == 0 { return fmt.Errorf("empty source tablet list for %v %v %v", bpc.cell, bpc.sourceShard.String(), topo.TYPE_REPLICA) } newServerIndex := rand.Intn(len(addrs.Entries)) endPoint := addrs.Entries[newServerIndex] // save our current server bpc.playerMutex.Lock() bpc.sourceTablet = topo.TabletAlias{ Cell: bpc.cell, Uid: addrs.Entries[newServerIndex].Uid, } bpc.lastError = nil bpc.playerMutex.Unlock() // check which kind of replication we're doing, tables or keyrange if len(bpc.sourceShard.Tables) > 0 { // tables, first resolve wildcards tables, err := mysqlctl.ResolveTables(bpc.mysqld, bpc.dbName, bpc.sourceShard.Tables) if err != nil { return fmt.Errorf("failed to resolve table names: %v", err) } // tables, just get them player := binlogplayer.NewBinlogPlayerTables(vtClient, endPoint, tables, startPosition, bpc.stopPosition, bpc.binlogPlayerStats) return player.ApplyBinlogEvents(bpc.ctx) } // the data we have to replicate is the intersection of the // source keyrange and our keyrange overlap, err := key.KeyRangesOverlap3(bpc.sourceShard.KeyRange, bpc.keyRange) if err != nil { return fmt.Errorf("Source shard %v doesn't overlap destination shard %v", bpc.sourceShard.KeyRange, bpc.keyRange) } player := binlogplayer.NewBinlogPlayerKeyRange(vtClient, endPoint, bpc.keyspaceIDType, overlap, startPosition, bpc.stopPosition, bpc.binlogPlayerStats) return player.ApplyBinlogEvents(bpc.ctx) }