// helper method to asynchronously diff a schema func (wr *Wrangler) diffSchema(masterSchema *myproto.SchemaDefinition, masterTabletAlias, alias topo.TabletAlias, excludeTables []string, includeViews bool, wg *sync.WaitGroup, er concurrency.ErrorRecorder) { defer wg.Done() log.Infof("Gathering schema for %v", alias) slaveSchema, err := wr.GetSchema(alias, nil, excludeTables, includeViews) if err != nil { er.RecordError(err) return } log.Infof("Diffing schema for %v", alias) myproto.DiffSchema(masterTabletAlias.String(), masterSchema, alias.String(), slaveSchema, er) }
func (vsdw *VerticalSplitDiffWorker) diff() error { vsdw.setState(stateVSDDiff) vsdw.wr.Logger().Infof("Gathering schema information...") wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} wg.Add(1) go func() { var err error vsdw.destinationSchemaDefinition, err = vsdw.wr.GetSchema(vsdw.destinationAlias, nil, nil, false) rec.RecordError(err) vsdw.wr.Logger().Infof("Got schema from destination %v", vsdw.destinationAlias) wg.Done() }() wg.Add(1) go func() { var err error vsdw.sourceSchemaDefinition, err = vsdw.wr.GetSchema(vsdw.sourceAlias, nil, nil, false) rec.RecordError(err) vsdw.wr.Logger().Infof("Got schema from source %v", vsdw.sourceAlias) wg.Done() }() wg.Wait() if rec.HasErrors() { return rec.Error() } // Build a list of regexp to exclude tables from source schema tableRegexps := make([]*regexp.Regexp, len(vsdw.shardInfo.SourceShards[0].Tables)) for i, table := range vsdw.shardInfo.SourceShards[0].Tables { var err error tableRegexps[i], err = regexp.Compile(table) if err != nil { return fmt.Errorf("cannot compile regexp %v for table: %v", table, err) } } // Remove the tables we don't need from the source schema newSourceTableDefinitions := make([]*myproto.TableDefinition, 0, len(vsdw.destinationSchemaDefinition.TableDefinitions)) for _, tableDefinition := range vsdw.sourceSchemaDefinition.TableDefinitions { found := false for _, tableRegexp := range tableRegexps { if tableRegexp.MatchString(tableDefinition.Name) { found = true break } } if !found { vsdw.wr.Logger().Infof("Removing table %v from source schema", tableDefinition.Name) continue } newSourceTableDefinitions = append(newSourceTableDefinitions, tableDefinition) } vsdw.sourceSchemaDefinition.TableDefinitions = newSourceTableDefinitions // Check the schema vsdw.wr.Logger().Infof("Diffing the schema...") rec = concurrency.AllErrorRecorder{} myproto.DiffSchema("destination", vsdw.destinationSchemaDefinition, "source", vsdw.sourceSchemaDefinition, &rec) if rec.HasErrors() { vsdw.wr.Logger().Warningf("Different schemas: %v", rec.Error()) } else { vsdw.wr.Logger().Infof("Schema match, good.") } // run the diffs, 8 at a time vsdw.wr.Logger().Infof("Running the diffs...") sem := sync2.NewSemaphore(8, 0) for _, tableDefinition := range vsdw.destinationSchemaDefinition.TableDefinitions { wg.Add(1) go func(tableDefinition *myproto.TableDefinition) { defer wg.Done() sem.Acquire() defer sem.Release() vsdw.wr.Logger().Infof("Starting the diff on table %v", tableDefinition.Name) sourceQueryResultReader, err := TableScan(vsdw.wr.Logger(), vsdw.wr.TopoServer(), vsdw.sourceAlias, tableDefinition) if err != nil { vsdw.wr.Logger().Errorf("TableScan(source) failed: %v", err) return } defer sourceQueryResultReader.Close() destinationQueryResultReader, err := TableScan(vsdw.wr.Logger(), vsdw.wr.TopoServer(), vsdw.destinationAlias, tableDefinition) if err != nil { vsdw.wr.Logger().Errorf("TableScan(destination) failed: %v", err) return } defer destinationQueryResultReader.Close() differ, err := NewRowDiffer(sourceQueryResultReader, destinationQueryResultReader, tableDefinition) if err != nil { vsdw.wr.Logger().Errorf("NewRowDiffer() failed: %v", err) return } report, err := differ.Go(vsdw.wr.Logger()) if err != nil { vsdw.wr.Logger().Errorf("Differ.Go failed: %v", err) } else { if report.HasDifferences() { vsdw.wr.Logger().Errorf("Table %v has differences: %v", tableDefinition.Name, report.String()) } else { vsdw.wr.Logger().Infof("Table %v checks out (%v rows processed, %v qps)", tableDefinition.Name, report.processedRows, report.processingQPS) } } }(tableDefinition) } wg.Wait() return nil }
func (sdw *SplitDiffWorker) diff() error { sdw.setState(stateSDDiff) sdw.wr.Logger().Infof("Gathering schema information...") sdw.sourceSchemaDefinitions = make([]*myproto.SchemaDefinition, len(sdw.sourceAliases)) wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} wg.Add(1) go func() { var err error sdw.destinationSchemaDefinition, err = sdw.wr.GetSchema(sdw.destinationAlias, nil, nil, false) rec.RecordError(err) sdw.wr.Logger().Infof("Got schema from destination %v", sdw.destinationAlias) wg.Done() }() for i, sourceAlias := range sdw.sourceAliases { wg.Add(1) go func(i int, sourceAlias topo.TabletAlias) { var err error sdw.sourceSchemaDefinitions[i], err = sdw.wr.GetSchema(sourceAlias, nil, nil, false) rec.RecordError(err) sdw.wr.Logger().Infof("Got schema from source[%v] %v", i, sourceAlias) wg.Done() }(i, sourceAlias) } wg.Wait() if rec.HasErrors() { return rec.Error() } // TODO(alainjobart) Checking against each source may be // overkill, if all sources have the same schema? sdw.wr.Logger().Infof("Diffing the schema...") rec = concurrency.AllErrorRecorder{} for i, sourceSchemaDefinition := range sdw.sourceSchemaDefinitions { sourceName := fmt.Sprintf("source[%v]", i) myproto.DiffSchema("destination", sdw.destinationSchemaDefinition, sourceName, sourceSchemaDefinition, &rec) } if rec.HasErrors() { sdw.wr.Logger().Warningf("Different schemas: %v", rec.Error().Error()) } else { sdw.wr.Logger().Infof("Schema match, good.") } // run the diffs, 8 at a time sdw.wr.Logger().Infof("Running the diffs...") sem := sync2.NewSemaphore(8, 0) for _, tableDefinition := range sdw.destinationSchemaDefinition.TableDefinitions { wg.Add(1) go func(tableDefinition *myproto.TableDefinition) { defer wg.Done() sem.Acquire() defer sem.Release() sdw.wr.Logger().Infof("Starting the diff on table %v", tableDefinition.Name) if len(sdw.sourceAliases) != 1 { sdw.wr.Logger().Errorf("Don't support more than one source for table yet: %v", tableDefinition.Name) return } overlap, err := key.KeyRangesOverlap(sdw.shardInfo.KeyRange, sdw.shardInfo.SourceShards[0].KeyRange) if err != nil { sdw.wr.Logger().Errorf("Source shard doesn't overlap with destination????: %v", err) return } sourceQueryResultReader, err := TableScanByKeyRange(sdw.wr.Logger(), sdw.wr.TopoServer(), sdw.sourceAliases[0], tableDefinition, overlap, sdw.keyspaceInfo.ShardingColumnType) if err != nil { sdw.wr.Logger().Errorf("TableScanByKeyRange(source) failed: %v", err) return } defer sourceQueryResultReader.Close() destinationQueryResultReader, err := TableScanByKeyRange(sdw.wr.Logger(), sdw.wr.TopoServer(), sdw.destinationAlias, tableDefinition, key.KeyRange{}, sdw.keyspaceInfo.ShardingColumnType) if err != nil { sdw.wr.Logger().Errorf("TableScanByKeyRange(destination) failed: %v", err) return } defer destinationQueryResultReader.Close() differ, err := NewRowDiffer(sourceQueryResultReader, destinationQueryResultReader, tableDefinition) if err != nil { sdw.wr.Logger().Errorf("NewRowDiffer() failed: %v", err) return } report, err := differ.Go(sdw.wr.Logger()) if err != nil { sdw.wr.Logger().Errorf("Differ.Go failed: %v", err.Error()) } else { if report.HasDifferences() { sdw.wr.Logger().Warningf("Table %v has differences: %v", tableDefinition.Name, report.String()) } else { sdw.wr.Logger().Infof("Table %v checks out (%v rows processed, %v qps)", tableDefinition.Name, report.processedRows, report.processingQPS) } } }(tableDefinition) } wg.Wait() return nil }