func init() { // The zookeeper C module logs quite a bit of useful information, // but much of it does not come back in the error API. To aid // debugging, enable the log to stderr for warnings. //zookeeper.SetLogLevel(zookeeper.LOG_WARN) maxConcurrency := 64 x := os.Getenv("ZK_CLIENT_MAX_CONCURRENCY") if x != "" { var err error maxConcurrency, err = strconv.Atoi(x) if err != nil { log.Infof("invalid ZK_CLIENT_MAX_CONCURRENCY: %v", err) } } sem = sync2.NewSemaphore(maxConcurrency, 0) }
// NewResourceConstraint creates a ResourceConstraint with // max concurrency. func NewResourceConstraint(max int) *ResourceConstraint { return &ResourceConstraint{semaphore: sync2.NewSemaphore(max, 0)} }
func (vsdw *VerticalSplitDiffWorker) diff() error { vsdw.setState(stateVSDDiff) vsdw.wr.Logger().Infof("Gathering schema information...") wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} wg.Add(1) go func() { var err error vsdw.destinationSchemaDefinition, err = vsdw.wr.GetSchema(vsdw.destinationAlias, nil, nil, false) rec.RecordError(err) vsdw.wr.Logger().Infof("Got schema from destination %v", vsdw.destinationAlias) wg.Done() }() wg.Add(1) go func() { var err error vsdw.sourceSchemaDefinition, err = vsdw.wr.GetSchema(vsdw.sourceAlias, nil, nil, false) rec.RecordError(err) vsdw.wr.Logger().Infof("Got schema from source %v", vsdw.sourceAlias) wg.Done() }() wg.Wait() if rec.HasErrors() { return rec.Error() } // Build a list of regexp to exclude tables from source schema tableRegexps := make([]*regexp.Regexp, len(vsdw.shardInfo.SourceShards[0].Tables)) for i, table := range vsdw.shardInfo.SourceShards[0].Tables { var err error tableRegexps[i], err = regexp.Compile(table) if err != nil { return fmt.Errorf("cannot compile regexp %v for table: %v", table, err) } } // Remove the tables we don't need from the source schema newSourceTableDefinitions := make([]*myproto.TableDefinition, 0, len(vsdw.destinationSchemaDefinition.TableDefinitions)) for _, tableDefinition := range vsdw.sourceSchemaDefinition.TableDefinitions { found := false for _, tableRegexp := range tableRegexps { if tableRegexp.MatchString(tableDefinition.Name) { found = true break } } if !found { vsdw.wr.Logger().Infof("Removing table %v from source schema", tableDefinition.Name) continue } newSourceTableDefinitions = append(newSourceTableDefinitions, tableDefinition) } vsdw.sourceSchemaDefinition.TableDefinitions = newSourceTableDefinitions // Check the schema vsdw.wr.Logger().Infof("Diffing the schema...") rec = concurrency.AllErrorRecorder{} myproto.DiffSchema("destination", vsdw.destinationSchemaDefinition, "source", vsdw.sourceSchemaDefinition, &rec) if rec.HasErrors() { vsdw.wr.Logger().Warningf("Different schemas: %v", rec.Error()) } else { vsdw.wr.Logger().Infof("Schema match, good.") } // run the diffs, 8 at a time vsdw.wr.Logger().Infof("Running the diffs...") sem := sync2.NewSemaphore(8, 0) for _, tableDefinition := range vsdw.destinationSchemaDefinition.TableDefinitions { wg.Add(1) go func(tableDefinition *myproto.TableDefinition) { defer wg.Done() sem.Acquire() defer sem.Release() vsdw.wr.Logger().Infof("Starting the diff on table %v", tableDefinition.Name) sourceQueryResultReader, err := TableScan(vsdw.wr.Logger(), vsdw.wr.TopoServer(), vsdw.sourceAlias, tableDefinition) if err != nil { vsdw.wr.Logger().Errorf("TableScan(source) failed: %v", err) return } defer sourceQueryResultReader.Close() destinationQueryResultReader, err := TableScan(vsdw.wr.Logger(), vsdw.wr.TopoServer(), vsdw.destinationAlias, tableDefinition) if err != nil { vsdw.wr.Logger().Errorf("TableScan(destination) failed: %v", err) return } defer destinationQueryResultReader.Close() differ, err := NewRowDiffer(sourceQueryResultReader, destinationQueryResultReader, tableDefinition) if err != nil { vsdw.wr.Logger().Errorf("NewRowDiffer() failed: %v", err) return } report, err := differ.Go(vsdw.wr.Logger()) if err != nil { vsdw.wr.Logger().Errorf("Differ.Go failed: %v", err) } else { if report.HasDifferences() { vsdw.wr.Logger().Errorf("Table %v has differences: %v", tableDefinition.Name, report.String()) } else { vsdw.wr.Logger().Infof("Table %v checks out (%v rows processed, %v qps)", tableDefinition.Name, report.processedRows, report.processingQPS) } } }(tableDefinition) } wg.Wait() return nil }
func (sdw *SplitDiffWorker) diff() error { sdw.setState(stateSDDiff) sdw.wr.Logger().Infof("Gathering schema information...") sdw.sourceSchemaDefinitions = make([]*myproto.SchemaDefinition, len(sdw.sourceAliases)) wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} wg.Add(1) go func() { var err error sdw.destinationSchemaDefinition, err = sdw.wr.GetSchema(sdw.destinationAlias, nil, nil, false) rec.RecordError(err) sdw.wr.Logger().Infof("Got schema from destination %v", sdw.destinationAlias) wg.Done() }() for i, sourceAlias := range sdw.sourceAliases { wg.Add(1) go func(i int, sourceAlias topo.TabletAlias) { var err error sdw.sourceSchemaDefinitions[i], err = sdw.wr.GetSchema(sourceAlias, nil, nil, false) rec.RecordError(err) sdw.wr.Logger().Infof("Got schema from source[%v] %v", i, sourceAlias) wg.Done() }(i, sourceAlias) } wg.Wait() if rec.HasErrors() { return rec.Error() } // TODO(alainjobart) Checking against each source may be // overkill, if all sources have the same schema? sdw.wr.Logger().Infof("Diffing the schema...") rec = concurrency.AllErrorRecorder{} for i, sourceSchemaDefinition := range sdw.sourceSchemaDefinitions { sourceName := fmt.Sprintf("source[%v]", i) myproto.DiffSchema("destination", sdw.destinationSchemaDefinition, sourceName, sourceSchemaDefinition, &rec) } if rec.HasErrors() { sdw.wr.Logger().Warningf("Different schemas: %v", rec.Error().Error()) } else { sdw.wr.Logger().Infof("Schema match, good.") } // run the diffs, 8 at a time sdw.wr.Logger().Infof("Running the diffs...") sem := sync2.NewSemaphore(8, 0) for _, tableDefinition := range sdw.destinationSchemaDefinition.TableDefinitions { wg.Add(1) go func(tableDefinition *myproto.TableDefinition) { defer wg.Done() sem.Acquire() defer sem.Release() sdw.wr.Logger().Infof("Starting the diff on table %v", tableDefinition.Name) if len(sdw.sourceAliases) != 1 { sdw.wr.Logger().Errorf("Don't support more than one source for table yet: %v", tableDefinition.Name) return } overlap, err := key.KeyRangesOverlap(sdw.shardInfo.KeyRange, sdw.shardInfo.SourceShards[0].KeyRange) if err != nil { sdw.wr.Logger().Errorf("Source shard doesn't overlap with destination????: %v", err) return } sourceQueryResultReader, err := TableScanByKeyRange(sdw.wr.Logger(), sdw.wr.TopoServer(), sdw.sourceAliases[0], tableDefinition, overlap, sdw.keyspaceInfo.ShardingColumnType) if err != nil { sdw.wr.Logger().Errorf("TableScanByKeyRange(source) failed: %v", err) return } defer sourceQueryResultReader.Close() destinationQueryResultReader, err := TableScanByKeyRange(sdw.wr.Logger(), sdw.wr.TopoServer(), sdw.destinationAlias, tableDefinition, key.KeyRange{}, sdw.keyspaceInfo.ShardingColumnType) if err != nil { sdw.wr.Logger().Errorf("TableScanByKeyRange(destination) failed: %v", err) return } defer destinationQueryResultReader.Close() differ, err := NewRowDiffer(sourceQueryResultReader, destinationQueryResultReader, tableDefinition) if err != nil { sdw.wr.Logger().Errorf("NewRowDiffer() failed: %v", err) return } report, err := differ.Go(sdw.wr.Logger()) if err != nil { sdw.wr.Logger().Errorf("Differ.Go failed: %v", err.Error()) } else { if report.HasDifferences() { sdw.wr.Logger().Warningf("Table %v has differences: %v", tableDefinition.Name, report.String()) } else { sdw.wr.Logger().Infof("Table %v checks out (%v rows processed, %v qps)", tableDefinition.Name, report.processedRows, report.processingQPS) } } }(tableDefinition) } wg.Wait() return nil }
// copy phase: // - copy the data from source tablets to destination masters (wtih replication on) // Assumes that the schema has already been created on each destination tablet // (probably from vtctl's CopySchemaShard) func (scw *SplitCloneWorker) copy() error { scw.setState(stateSCCopy) // get source schema from the first shard // TODO(alainjobart): for now, we assume the schema is compatible // on all source shards. Furthermore, we estimate the number of rows // in each source shard for each table to be about the same // (rowCount is used to estimate an ETA) sourceSchemaDefinition, err := scw.wr.GetSchema(scw.sourceAliases[0], nil, scw.excludeTables, true) if err != nil { return fmt.Errorf("cannot get schema from source %v: %v", scw.sourceAliases[0], err) } if len(sourceSchemaDefinition.TableDefinitions) == 0 { return fmt.Errorf("no tables matching the table filter in tablet %v", scw.sourceAliases[0]) } scw.wr.Logger().Infof("Source tablet 0 has %v tables to copy", len(sourceSchemaDefinition.TableDefinitions)) scw.mu.Lock() scw.tableStatus = make([]*tableStatus, len(sourceSchemaDefinition.TableDefinitions)) for i, td := range sourceSchemaDefinition.TableDefinitions { scw.tableStatus[i] = &tableStatus{ name: td.Name, rowCount: td.RowCount * uint64(len(scw.sourceAliases)), } } scw.startTime = time.Now() scw.mu.Unlock() // Find the column index for the sharding columns in all the databases, and count rows columnIndexes := make([]int, len(sourceSchemaDefinition.TableDefinitions)) for tableIndex, td := range sourceSchemaDefinition.TableDefinitions { if td.Type == myproto.TABLE_BASE_TABLE { // find the column to split on columnIndexes[tableIndex] = -1 for i, name := range td.Columns { if name == scw.keyspaceInfo.ShardingColumnName { columnIndexes[tableIndex] = i break } } if columnIndexes[tableIndex] == -1 { return fmt.Errorf("table %v doesn't have a column named '%v'", td.Name, scw.keyspaceInfo.ShardingColumnName) } scw.tableStatus[tableIndex].mu.Lock() scw.tableStatus[tableIndex].rowCount = td.RowCount scw.tableStatus[tableIndex].mu.Unlock() } else { scw.tableStatus[tableIndex].mu.Lock() scw.tableStatus[tableIndex].isView = true scw.tableStatus[tableIndex].mu.Unlock() } } // In parallel, setup the channels to send SQL data chunks to for each destination tablet: // // mu protects the abort channel for closing, and firstError mu := sync.Mutex{} abort := make(chan struct{}) var firstError error processError := func(format string, args ...interface{}) { scw.wr.Logger().Errorf(format, args...) mu.Lock() if abort != nil { close(abort) abort = nil firstError = fmt.Errorf(format, args...) } mu.Unlock() } // since we're writing only to masters, we need to enable bin logs so that replication happens disableBinLogs := false insertChannels := make([][]chan string, len(scw.destinationShards)) destinationWaitGroup := sync.WaitGroup{} for shardIndex, _ := range scw.destinationShards { insertChannels[shardIndex] = make([]chan string, len(scw.destinationAliases[shardIndex])) for i, tabletAlias := range scw.destinationAliases[shardIndex] { // we create one channel per destination tablet. It // is sized to have a buffer of a maximum of // destinationWriterCount * 2 items, to hopefully // always have data. We then have // destinationWriterCount go routines reading from it. insertChannels[shardIndex][i] = make(chan string, scw.destinationWriterCount*2) go func(ti *topo.TabletInfo, insertChannel chan string) { for j := 0; j < scw.destinationWriterCount; j++ { destinationWaitGroup.Add(1) go func() { defer destinationWaitGroup.Done() if err := executeFetchLoop(scw.wr, ti, insertChannel, abort, disableBinLogs); err != nil { processError("executeFetchLoop failed: %v", err) } }() } }(scw.destinationTablets[shardIndex][tabletAlias], insertChannels[shardIndex][i]) } } // Now for each table, read data chunks and send them to all // insertChannels sourceWaitGroup := sync.WaitGroup{} for shardIndex, _ := range scw.sourceShards { sema := sync2.NewSemaphore(scw.sourceReaderCount, 0) for tableIndex, td := range sourceSchemaDefinition.TableDefinitions { if td.Type == myproto.TABLE_VIEW { continue } rowSplitter := NewRowSplitter(scw.destinationShards, scw.keyspaceInfo.ShardingColumnType, columnIndexes[tableIndex]) chunks, err := findChunks(scw.wr, scw.sourceTablets[shardIndex], td, scw.minTableSizeForSplit, scw.sourceReaderCount) if err != nil { return err } scw.tableStatus[tableIndex].setThreadCount(len(chunks) - 1) for chunkIndex := 0; chunkIndex < len(chunks)-1; chunkIndex++ { sourceWaitGroup.Add(1) go func(td *myproto.TableDefinition, tableIndex, chunkIndex int) { defer sourceWaitGroup.Done() sema.Acquire() defer sema.Release() scw.tableStatus[tableIndex].threadStarted() // build the query, and start the streaming selectSQL := buildSQLFromChunks(scw.wr, td, chunks, chunkIndex, scw.sourceAliases[shardIndex].String()) qrr, err := NewQueryResultReaderForTablet(scw.wr.TopoServer(), scw.sourceAliases[shardIndex], selectSQL) if err != nil { processError("NewQueryResultReaderForTablet failed: %v", err) return } defer qrr.Close() // process the data if err := scw.processData(td, tableIndex, qrr, rowSplitter, insertChannels, scw.destinationPackCount, abort); err != nil { processError("processData failed: %v", err) } scw.tableStatus[tableIndex].threadDone() }(td, tableIndex, chunkIndex) } } } sourceWaitGroup.Wait() for shardIndex, _ := range scw.destinationShards { for _, c := range insertChannels[shardIndex] { close(c) } } destinationWaitGroup.Wait() if firstError != nil { return firstError } // then create and populate the blp_checkpoint table if scw.strategy.PopulateBlpCheckpoint { queries := make([]string, 0, 4) queries = append(queries, binlogplayer.CreateBlpCheckpoint()...) flags := "" if scw.strategy.DontStartBinlogPlayer { flags = binlogplayer.BLP_FLAG_DONT_START } // get the current position from the sources for shardIndex, _ := range scw.sourceShards { ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second) status, err := scw.wr.TabletManagerClient().SlaveStatus(ctx, scw.sourceTablets[shardIndex]) if err != nil { return err } cancel() queries = append(queries, binlogplayer.PopulateBlpCheckpoint(0, status.Position, time.Now().Unix(), flags)) } for shardIndex, _ := range scw.destinationShards { for _, tabletAlias := range scw.destinationAliases[shardIndex] { destinationWaitGroup.Add(1) go func(ti *topo.TabletInfo) { defer destinationWaitGroup.Done() scw.wr.Logger().Infof("Making and populating blp_checkpoint table on tablet %v", ti.Alias) if err := runSqlCommands(scw.wr, ti, queries, abort, disableBinLogs); err != nil { processError("blp_checkpoint queries failed on tablet %v: %v", ti.Alias, err) } }(scw.destinationTablets[shardIndex][tabletAlias]) } } destinationWaitGroup.Wait() if firstError != nil { return firstError } } // Now we're done with data copy, update the shard's source info. // TODO(alainjobart) this is a superset, some shards may not // overlap, have to deal with this better (for N -> M splits // where both N>1 and M>1) if scw.strategy.SkipSetSourceShards { scw.wr.Logger().Infof("Skipping setting SourceShard on destination shards.") } else { for _, si := range scw.destinationShards { scw.wr.Logger().Infof("Setting SourceShard on shard %v/%v", si.Keyspace(), si.ShardName()) if err := scw.wr.SetSourceShards(si.Keyspace(), si.ShardName(), scw.sourceAliases, nil); err != nil { return fmt.Errorf("Failed to set source shards: %v", err) } } } // And force a schema reload on all destination tablets. // The master tablet will end up starting filtered replication // at this point. for shardIndex, _ := range scw.destinationShards { for _, tabletAlias := range scw.reloadAliases[shardIndex] { destinationWaitGroup.Add(1) go func(ti *topo.TabletInfo) { defer destinationWaitGroup.Done() scw.wr.Logger().Infof("Reloading schema on tablet %v", ti.Alias) ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second) if err := scw.wr.TabletManagerClient().ReloadSchema(ctx, ti); err != nil { processError("ReloadSchema failed on tablet %v: %v", ti.Alias, err) } cancel() }(scw.reloadTablets[shardIndex][tabletAlias]) } } destinationWaitGroup.Wait() return firstError }
// copy phase: // - copy the data from source tablets to destination masters (wtih replication on) // Assumes that the schema has already been created on each destination tablet // (probably from vtctl's CopySchemaShard) func (vscw *VerticalSplitCloneWorker) copy() error { vscw.setState(stateVSCCopy) // get source schema sourceSchemaDefinition, err := vscw.wr.GetSchema(vscw.sourceAlias, vscw.tables, nil, true) if err != nil { return fmt.Errorf("cannot get schema from source %v: %v", vscw.sourceAlias, err) } if len(sourceSchemaDefinition.TableDefinitions) == 0 { return fmt.Errorf("no tables matching the table filter") } vscw.wr.Logger().Infof("Source tablet has %v tables to copy", len(sourceSchemaDefinition.TableDefinitions)) vscw.mu.Lock() vscw.tableStatus = make([]*tableStatus, len(sourceSchemaDefinition.TableDefinitions)) for i, td := range sourceSchemaDefinition.TableDefinitions { vscw.tableStatus[i] = &tableStatus{ name: td.Name, rowCount: td.RowCount, } } vscw.startTime = time.Now() vscw.mu.Unlock() // Count rows for i, td := range sourceSchemaDefinition.TableDefinitions { vscw.tableStatus[i].mu.Lock() if td.Type == myproto.TABLE_BASE_TABLE { vscw.tableStatus[i].rowCount = td.RowCount } else { vscw.tableStatus[i].isView = true } vscw.tableStatus[i].mu.Unlock() } // In parallel, setup the channels to send SQL data chunks to for each destination tablet. // // mu protects the abort channel for closing, and firstError mu := sync.Mutex{} abort := make(chan struct{}) var firstError error processError := func(format string, args ...interface{}) { vscw.wr.Logger().Errorf(format, args...) mu.Lock() if abort != nil { close(abort) abort = nil firstError = fmt.Errorf(format, args...) } mu.Unlock() } // since we're writing only to masters, we need to enable bin logs so that replication happens disableBinLogs := false insertChannels := make([]chan string, len(vscw.destinationAliases)) destinationWaitGroup := sync.WaitGroup{} for i, tabletAlias := range vscw.destinationAliases { // we create one channel per destination tablet. It // is sized to have a buffer of a maximum of // destinationWriterCount * 2 items, to hopefully // always have data. We then have // destinationWriterCount go routines reading from it. insertChannels[i] = make(chan string, vscw.destinationWriterCount*2) go func(ti *topo.TabletInfo, insertChannel chan string) { for j := 0; j < vscw.destinationWriterCount; j++ { destinationWaitGroup.Add(1) go func() { defer destinationWaitGroup.Done() if err := executeFetchLoop(vscw.wr, ti, insertChannel, abort, disableBinLogs); err != nil { processError("executeFetchLoop failed: %v", err) } }() } }(vscw.destinationTablets[tabletAlias], insertChannels[i]) } // Now for each table, read data chunks and send them to all // insertChannels sourceWaitGroup := sync.WaitGroup{} sema := sync2.NewSemaphore(vscw.sourceReaderCount, 0) for tableIndex, td := range sourceSchemaDefinition.TableDefinitions { if td.Type == myproto.TABLE_VIEW { continue } chunks, err := findChunks(vscw.wr, vscw.sourceTablet, td, vscw.minTableSizeForSplit, vscw.sourceReaderCount) if err != nil { return err } vscw.tableStatus[tableIndex].setThreadCount(len(chunks) - 1) for chunkIndex := 0; chunkIndex < len(chunks)-1; chunkIndex++ { sourceWaitGroup.Add(1) go func(td *myproto.TableDefinition, tableIndex, chunkIndex int) { defer sourceWaitGroup.Done() sema.Acquire() defer sema.Release() vscw.tableStatus[tableIndex].threadStarted() // build the query, and start the streaming selectSQL := buildSQLFromChunks(vscw.wr, td, chunks, chunkIndex, vscw.sourceAlias.String()) qrr, err := NewQueryResultReaderForTablet(vscw.wr.TopoServer(), vscw.sourceAlias, selectSQL) if err != nil { processError("NewQueryResultReaderForTablet failed: %v", err) return } defer qrr.Close() // process the data if err := vscw.processData(td, tableIndex, qrr, insertChannels, vscw.destinationPackCount, abort); err != nil { processError("QueryResultReader failed: %v", err) } vscw.tableStatus[tableIndex].threadDone() }(td, tableIndex, chunkIndex) } } sourceWaitGroup.Wait() for _, c := range insertChannels { close(c) } destinationWaitGroup.Wait() if firstError != nil { return firstError } // then create and populate the blp_checkpoint table if vscw.strategy.PopulateBlpCheckpoint { // get the current position from the source ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second) status, err := vscw.wr.TabletManagerClient().SlaveStatus(ctx, vscw.sourceTablet) if err != nil { return err } cancel() queries := make([]string, 0, 4) queries = append(queries, binlogplayer.CreateBlpCheckpoint()...) flags := "" if vscw.strategy.DontStartBinlogPlayer { flags = binlogplayer.BLP_FLAG_DONT_START } queries = append(queries, binlogplayer.PopulateBlpCheckpoint(0, status.Position, time.Now().Unix(), flags)) for _, tabletAlias := range vscw.destinationAliases { destinationWaitGroup.Add(1) go func(ti *topo.TabletInfo) { defer destinationWaitGroup.Done() vscw.wr.Logger().Infof("Making and populating blp_checkpoint table on tablet %v", ti.Alias) if err := runSqlCommands(vscw.wr, ti, queries, abort, disableBinLogs); err != nil { processError("blp_checkpoint queries failed on tablet %v: %v", ti.Alias, err) } }(vscw.destinationTablets[tabletAlias]) } destinationWaitGroup.Wait() if firstError != nil { return firstError } } // Now we're done with data copy, update the shard's source info. if vscw.strategy.SkipSetSourceShards { vscw.wr.Logger().Infof("Skipping setting SourceShard on destination shard.") } else { vscw.wr.Logger().Infof("Setting SourceShard on shard %v/%v", vscw.destinationKeyspace, vscw.destinationShard) if err := vscw.wr.SetSourceShards(vscw.destinationKeyspace, vscw.destinationShard, []topo.TabletAlias{vscw.sourceAlias}, vscw.tables); err != nil { return fmt.Errorf("Failed to set source shards: %v", err) } } // And force a schema reload on all destination tablets. // The master tablet will end up starting filtered replication // at this point. for _, tabletAlias := range vscw.reloadAliases { destinationWaitGroup.Add(1) go func(ti *topo.TabletInfo) { defer destinationWaitGroup.Done() vscw.wr.Logger().Infof("Reloading schema on tablet %v", ti.Alias) ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second) if err := vscw.wr.TabletManagerClient().ReloadSchema(ctx, ti); err != nil { processError("ReloadSchema failed on tablet %v: %v", ti.Alias, err) } cancel() }(vscw.reloadTablets[tabletAlias]) } destinationWaitGroup.Wait() return firstError }