// ProtoToSrvKeyspace turns a proto to a Tablet func ProtoToSrvKeyspace(s *pb.SrvKeyspace) *SrvKeyspace { result := &SrvKeyspace{ Partitions: make(map[TabletType]*KeyspacePartition), ShardingColumnName: s.ShardingColumnName, ShardingColumnType: key.ProtoToKeyspaceIdType(s.ShardingColumnType), SplitShardCount: s.SplitShardCount, } for _, p := range s.Partitions { tt := ProtoToTabletType(p.ServedType) partition := &KeyspacePartition{} for _, sr := range p.ShardReferences { partition.ShardReferences = append(partition.ShardReferences, ShardReference{ Name: sr.Name, KeyRange: key.ProtoToKeyRange(sr.KeyRange), }) } result.Partitions[tt] = partition } if len(s.ServedFrom) > 0 { result.ServedFrom = make(map[TabletType]string) for _, sf := range s.ServedFrom { tt := ProtoToTabletType(sf.TabletType) result.ServedFrom[tt] = sf.Keyspace } } return result }
// StreamKeyRange is part of the pbs.UpdateStreamServer interface func (server *UpdateStream) StreamKeyRange(req *pb.StreamKeyRangeRequest, stream pbs.UpdateStream_StreamKeyRangeServer) (err error) { defer server.updateStream.HandlePanic(&err) return server.updateStream.StreamKeyRange(&proto.KeyRangeRequest{ Position: myproto.ProtoToReplicationPosition(req.Position), KeyspaceIdType: key.ProtoToKeyspaceIdType(req.KeyspaceIdType), KeyRange: key.ProtoToKeyRange(req.KeyRange), Charset: mproto.ProtoToCharset(req.Charset), }, func(reply *proto.BinlogTransaction) error { return stream.Send(&pb.StreamKeyRangeResponse{ BinlogTransaction: proto.BinlogTransactionToProto(reply), }) }) }
// findCellsForRebuild will find all the cells in the given keyspace // and create an entry if the map for them func (wr *Wrangler) findCellsForRebuild(ki *topo.KeyspaceInfo, shardMap map[string]*topo.ShardInfo, cells []string, srvKeyspaceMap map[string]*topo.SrvKeyspace) { for _, si := range shardMap { for _, cell := range si.Cells { if !topo.InCellList(cell, cells) { continue } if _, ok := srvKeyspaceMap[cell]; !ok { srvKeyspaceMap[cell] = &topo.SrvKeyspace{ ShardingColumnName: ki.ShardingColumnName, ShardingColumnType: key.ProtoToKeyspaceIdType(ki.ShardingColumnType), ServedFrom: ki.ComputeCellServedFrom(cell), SplitShardCount: ki.SplitShardCount, } } } } }
func (sdw *SplitDiffWorker) diff(ctx context.Context) error { sdw.SetState(WorkerStateDiff) sdw.wr.Logger().Infof("Gathering schema information...") sdw.sourceSchemaDefinitions = make([]*myproto.SchemaDefinition, len(sdw.sourceAliases)) wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} wg.Add(1) go func() { var err error shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) sdw.destinationSchemaDefinition, err = sdw.wr.GetSchema( shortCtx, sdw.destinationAlias, nil /* tables */, sdw.excludeTables, false /* includeViews */) cancel() rec.RecordError(err) sdw.wr.Logger().Infof("Got schema from destination %v", sdw.destinationAlias) wg.Done() }() for i, sourceAlias := range sdw.sourceAliases { wg.Add(1) go func(i int, sourceAlias pb.TabletAlias) { var err error shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) sdw.sourceSchemaDefinitions[i], err = sdw.wr.GetSchema( shortCtx, &sourceAlias, nil /* tables */, sdw.excludeTables, false /* includeViews */) cancel() rec.RecordError(err) sdw.wr.Logger().Infof("Got schema from source[%v] %v", i, sourceAlias) wg.Done() }(i, *sourceAlias) } wg.Wait() if rec.HasErrors() { return rec.Error() } // TODO(alainjobart) Checking against each source may be // overkill, if all sources have the same schema? sdw.wr.Logger().Infof("Diffing the schema...") rec = concurrency.AllErrorRecorder{} for i, sourceSchemaDefinition := range sdw.sourceSchemaDefinitions { sourceName := fmt.Sprintf("source[%v]", i) myproto.DiffSchema("destination", sdw.destinationSchemaDefinition, sourceName, sourceSchemaDefinition, &rec) } if rec.HasErrors() { sdw.wr.Logger().Warningf("Different schemas: %v", rec.Error().Error()) } else { sdw.wr.Logger().Infof("Schema match, good.") } // run the diffs, 8 at a time sdw.wr.Logger().Infof("Running the diffs...") sem := sync2.NewSemaphore(8, 0) for _, tableDefinition := range sdw.destinationSchemaDefinition.TableDefinitions { wg.Add(1) go func(tableDefinition *myproto.TableDefinition) { defer wg.Done() sem.Acquire() defer sem.Release() sdw.wr.Logger().Infof("Starting the diff on table %v", tableDefinition.Name) if len(sdw.sourceAliases) != 1 { err := fmt.Errorf("Don't support more than one source for table yet: %v", tableDefinition.Name) rec.RecordError(err) sdw.wr.Logger().Errorf(err.Error()) return } overlap, err := key.KeyRangesOverlap(sdw.shardInfo.KeyRange, sdw.shardInfo.SourceShards[0].KeyRange) if err != nil { newErr := fmt.Errorf("Source shard doesn't overlap with destination????: %v", err) rec.RecordError(newErr) sdw.wr.Logger().Errorf(newErr.Error()) return } sourceQueryResultReader, err := TableScanByKeyRange(ctx, sdw.wr.Logger(), sdw.wr.TopoServer(), sdw.sourceAliases[0], tableDefinition, overlap, key.ProtoToKeyspaceIdType(sdw.keyspaceInfo.ShardingColumnType)) if err != nil { newErr := fmt.Errorf("TableScanByKeyRange(source) failed: %v", err) rec.RecordError(newErr) sdw.wr.Logger().Errorf(newErr.Error()) return } defer sourceQueryResultReader.Close() destinationQueryResultReader, err := TableScanByKeyRange(ctx, sdw.wr.Logger(), sdw.wr.TopoServer(), sdw.destinationAlias, tableDefinition, nil, key.ProtoToKeyspaceIdType(sdw.keyspaceInfo.ShardingColumnType)) if err != nil { newErr := fmt.Errorf("TableScanByKeyRange(destination) failed: %v", err) rec.RecordError(newErr) sdw.wr.Logger().Errorf(newErr.Error()) return } defer destinationQueryResultReader.Close() differ, err := NewRowDiffer(sourceQueryResultReader, destinationQueryResultReader, tableDefinition) if err != nil { newErr := fmt.Errorf("NewRowDiffer() failed: %v", err) rec.RecordError(newErr) sdw.wr.Logger().Errorf(newErr.Error()) return } report, err := differ.Go(sdw.wr.Logger()) if err != nil { newErr := fmt.Errorf("Differ.Go failed: %v", err.Error()) rec.RecordError(newErr) sdw.wr.Logger().Errorf(newErr.Error()) } else { if report.HasDifferences() { err := fmt.Errorf("Table %v has differences: %v", tableDefinition.Name, report.String()) rec.RecordError(err) sdw.wr.Logger().Warningf(err.Error()) } else { sdw.wr.Logger().Infof("Table %v checks out (%v rows processed, %v qps)", tableDefinition.Name, report.processedRows, report.processingQPS) } } }(tableDefinition) } wg.Wait() return rec.Error() }
// copy phase: // - copy the data from source tablets to destination masters (with replication on) // Assumes that the schema has already been created on each destination tablet // (probably from vtctl's CopySchemaShard) func (scw *SplitCloneWorker) copy(ctx context.Context) error { scw.setState(WorkerStateCopy) // get source schema from the first shard // TODO(alainjobart): for now, we assume the schema is compatible // on all source shards. Furthermore, we estimate the number of rows // in each source shard for each table to be about the same // (rowCount is used to estimate an ETA) shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) sourceSchemaDefinition, err := scw.wr.GetSchema(shortCtx, scw.sourceAliases[0], nil, scw.excludeTables, true) cancel() if err != nil { return fmt.Errorf("cannot get schema from source %v: %v", topoproto.TabletAliasString(scw.sourceAliases[0]), err) } if len(sourceSchemaDefinition.TableDefinitions) == 0 { return fmt.Errorf("no tables matching the table filter in tablet %v", topoproto.TabletAliasString(scw.sourceAliases[0])) } scw.wr.Logger().Infof("Source tablet 0 has %v tables to copy", len(sourceSchemaDefinition.TableDefinitions)) scw.Mu.Lock() scw.tableStatus = make([]*tableStatus, len(sourceSchemaDefinition.TableDefinitions)) for i, td := range sourceSchemaDefinition.TableDefinitions { scw.tableStatus[i] = &tableStatus{ name: td.Name, rowCount: td.RowCount * uint64(len(scw.sourceAliases)), } } scw.startTime = time.Now() scw.Mu.Unlock() // Find the column index for the sharding columns in all the databases, and count rows columnIndexes := make([]int, len(sourceSchemaDefinition.TableDefinitions)) for tableIndex, td := range sourceSchemaDefinition.TableDefinitions { if td.Type == myproto.TableBaseTable { // find the column to split on columnIndexes[tableIndex] = -1 for i, name := range td.Columns { if name == scw.keyspaceInfo.ShardingColumnName { columnIndexes[tableIndex] = i break } } if columnIndexes[tableIndex] == -1 { return fmt.Errorf("table %v doesn't have a column named '%v'", td.Name, scw.keyspaceInfo.ShardingColumnName) } scw.tableStatus[tableIndex].mu.Lock() scw.tableStatus[tableIndex].rowCount = td.RowCount scw.tableStatus[tableIndex].mu.Unlock() } else { scw.tableStatus[tableIndex].mu.Lock() scw.tableStatus[tableIndex].isView = true scw.tableStatus[tableIndex].mu.Unlock() } } // In parallel, setup the channels to send SQL data chunks to for each destination tablet: // // mu protects the context for cancelation, and firstError mu := sync.Mutex{} var firstError error ctx, cancelCopy := context.WithCancel(ctx) processError := func(format string, args ...interface{}) { scw.wr.Logger().Errorf(format, args...) mu.Lock() if firstError == nil { firstError = fmt.Errorf(format, args...) cancelCopy() } mu.Unlock() } insertChannels := make([]chan string, len(scw.destinationShards)) destinationWaitGroup := sync.WaitGroup{} for shardIndex, si := range scw.destinationShards { // we create one channel per destination tablet. It // is sized to have a buffer of a maximum of // destinationWriterCount * 2 items, to hopefully // always have data. We then have // destinationWriterCount go routines reading from it. insertChannels[shardIndex] = make(chan string, scw.destinationWriterCount*2) go func(shardName string, insertChannel chan string) { for j := 0; j < scw.destinationWriterCount; j++ { destinationWaitGroup.Add(1) go func() { defer destinationWaitGroup.Done() if err := executeFetchLoop(ctx, scw.wr, scw, shardName, insertChannel); err != nil { processError("executeFetchLoop failed: %v", err) } }() } }(si.ShardName(), insertChannels[shardIndex]) } // Now for each table, read data chunks and send them to all // insertChannels sourceWaitGroup := sync.WaitGroup{} for shardIndex := range scw.sourceShards { sema := sync2.NewSemaphore(scw.sourceReaderCount, 0) for tableIndex, td := range sourceSchemaDefinition.TableDefinitions { if td.Type == myproto.TableView { continue } rowSplitter := NewRowSplitter(scw.destinationShards, key.ProtoToKeyspaceIdType(scw.keyspaceInfo.ShardingColumnType), columnIndexes[tableIndex]) chunks, err := FindChunks(ctx, scw.wr, scw.sourceTablets[shardIndex], td, scw.minTableSizeForSplit, scw.sourceReaderCount) if err != nil { return err } scw.tableStatus[tableIndex].setThreadCount(len(chunks) - 1) for chunkIndex := 0; chunkIndex < len(chunks)-1; chunkIndex++ { sourceWaitGroup.Add(1) go func(td *myproto.TableDefinition, tableIndex, chunkIndex int) { defer sourceWaitGroup.Done() sema.Acquire() defer sema.Release() scw.tableStatus[tableIndex].threadStarted() // build the query, and start the streaming selectSQL := buildSQLFromChunks(scw.wr, td, chunks, chunkIndex, scw.sourceAliases[shardIndex].String()) qrr, err := NewQueryResultReaderForTablet(ctx, scw.wr.TopoServer(), scw.sourceAliases[shardIndex], selectSQL) if err != nil { processError("NewQueryResultReaderForTablet failed: %v", err) return } defer qrr.Close() // process the data if err := scw.processData(td, tableIndex, qrr, rowSplitter, insertChannels, scw.destinationPackCount, ctx.Done()); err != nil { processError("processData failed: %v", err) } scw.tableStatus[tableIndex].threadDone() }(td, tableIndex, chunkIndex) } } } sourceWaitGroup.Wait() for shardIndex := range scw.destinationShards { close(insertChannels[shardIndex]) } destinationWaitGroup.Wait() if firstError != nil { return firstError } // then create and populate the blp_checkpoint table if scw.strategy.PopulateBlpCheckpoint { queries := make([]string, 0, 4) queries = append(queries, binlogplayer.CreateBlpCheckpoint()...) flags := "" if scw.strategy.DontStartBinlogPlayer { flags = binlogplayer.BlpFlagDontStart } // get the current position from the sources for shardIndex := range scw.sourceShards { shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) status, err := scw.wr.TabletManagerClient().SlaveStatus(shortCtx, scw.sourceTablets[shardIndex]) cancel() if err != nil { return err } queries = append(queries, binlogplayer.PopulateBlpCheckpoint(0, status.Position, time.Now().Unix(), flags)) } for _, si := range scw.destinationShards { destinationWaitGroup.Add(1) go func(shardName string) { defer destinationWaitGroup.Done() scw.wr.Logger().Infof("Making and populating blp_checkpoint table") if err := runSQLCommands(ctx, scw.wr, scw, shardName, queries); err != nil { processError("blp_checkpoint queries failed: %v", err) } }(si.ShardName()) } destinationWaitGroup.Wait() if firstError != nil { return firstError } } // Now we're done with data copy, update the shard's source info. // TODO(alainjobart) this is a superset, some shards may not // overlap, have to deal with this better (for N -> M splits // where both N>1 and M>1) if scw.strategy.SkipSetSourceShards { scw.wr.Logger().Infof("Skipping setting SourceShard on destination shards.") } else { for _, si := range scw.destinationShards { scw.wr.Logger().Infof("Setting SourceShard on shard %v/%v", si.Keyspace(), si.ShardName()) shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) err := scw.wr.SetSourceShards(shortCtx, si.Keyspace(), si.ShardName(), scw.sourceAliases, nil) cancel() if err != nil { return fmt.Errorf("failed to set source shards: %v", err) } } } err = scw.findReloadTargets(ctx) if err != nil { return fmt.Errorf("failed before reloading schema on destination tablets: %v", err) } // And force a schema reload on all destination tablets. // The master tablet will end up starting filtered replication // at this point. for shardIndex := range scw.destinationShards { for _, tabletAlias := range scw.reloadAliases[shardIndex] { destinationWaitGroup.Add(1) go func(ti *topo.TabletInfo) { defer destinationWaitGroup.Done() scw.wr.Logger().Infof("Reloading schema on tablet %v", ti.AliasString()) shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) err := scw.wr.TabletManagerClient().ReloadSchema(shortCtx, ti) cancel() if err != nil { processError("ReloadSchema failed on tablet %v: %v", ti.AliasString(), err) } }(scw.reloadTablets[shardIndex][*tabletAlias]) } } destinationWaitGroup.Wait() return firstError }
// ApplyBinlogEvents makes an RPC request to BinlogServer // and processes the events. It will return nil if the provided context // was canceled, or if we reached the stopping point. // It will return io.EOF if the server stops sending us updates. // It may return any other error it encounters. func (blp *BinlogPlayer) ApplyBinlogEvents(ctx context.Context) error { if len(blp.tables) > 0 { log.Infof("BinlogPlayer client %v for tables %v starting @ '%v', server: %v", blp.blpPos.Uid, blp.tables, blp.blpPos.Position, blp.endPoint, ) } else { log.Infof("BinlogPlayer client %v for keyrange '%v-%v' starting @ '%v', server: %v", blp.blpPos.Uid, hex.EncodeToString(blp.keyRange.Start), hex.EncodeToString(blp.keyRange.End), blp.blpPos.Position, blp.endPoint, ) } if !blp.stopPosition.IsZero() { // We need to stop at some point. Sanity check the point. switch { case blp.blpPos.Position.Equal(blp.stopPosition): log.Infof("Not starting BinlogPlayer, we're already at the desired position %v", blp.stopPosition) return nil case blp.blpPos.Position.AtLeast(blp.stopPosition): return fmt.Errorf("starting point %v greater than stopping point %v", blp.blpPos.Position, blp.stopPosition) default: log.Infof("Will stop player when reaching %v", blp.stopPosition) } } clientFactory, ok := clientFactories[*binlogPlayerProtocol] if !ok { return fmt.Errorf("no binlog player client factory named %v", *binlogPlayerProtocol) } blplClient := clientFactory() err := blplClient.Dial(blp.endPoint, *binlogPlayerConnTimeout) if err != nil { log.Errorf("Error dialing binlog server: %v", err) return fmt.Errorf("error dialing binlog server: %v", err) } defer blplClient.Close() // Get the current charset of our connection, so we can ask the stream server // to check that they match. The streamer will also only send per-statement // charset data if that statement's charset is different from what we specify. if dbClient, ok := blp.dbClient.(*DBClient); ok { blp.defaultCharset, err = dbClient.dbConn.GetCharset() if err != nil { return fmt.Errorf("can't get charset to request binlog stream: %v", err) } log.Infof("original charset: %v", blp.defaultCharset) blp.currentCharset = blp.defaultCharset // Restore original charset when we're done. defer func() { log.Infof("restoring original charset %v", blp.defaultCharset) if csErr := dbClient.dbConn.SetCharset(blp.defaultCharset); csErr != nil { log.Errorf("can't restore original charset %v: %v", blp.defaultCharset, csErr) } }() } var responseChan chan *proto.BinlogTransaction var errFunc ErrFunc if len(blp.tables) > 0 { responseChan, errFunc, err = blplClient.StreamTables(ctx, myproto.EncodeReplicationPosition(blp.blpPos.Position), blp.tables, &blp.defaultCharset) } else { responseChan, errFunc, err = blplClient.StreamKeyRange(ctx, myproto.EncodeReplicationPosition(blp.blpPos.Position), key.ProtoToKeyspaceIdType(blp.keyspaceIdType), blp.keyRange, &blp.defaultCharset) } if err != nil { log.Errorf("Error sending streaming query to binlog server: %v", err) return fmt.Errorf("error sending streaming query to binlog server: %v", err) } for response := range responseChan { for { ok, err = blp.processTransaction(response) if err != nil { return fmt.Errorf("Error in processing binlog event %v", err) } if ok { if !blp.stopPosition.IsZero() { if blp.blpPos.Position.AtLeast(blp.stopPosition) { log.Infof("Reached stopping position, done playing logs") return nil } } break } log.Infof("Retrying txn") time.Sleep(1 * time.Second) } } switch err := errFunc(); err { case nil: return io.EOF case context.Canceled: return nil default: // if the context is canceled, we return nil (some RPC // implementations will remap the context error to their own // errors) select { case <-ctx.Done(): if ctx.Err() == context.Canceled { return nil } default: } return fmt.Errorf("Error received from ServeBinlog %v", err) } }