예제 #1
파일: zkconn.go 프로젝트: henryanand/vitess
func init() {
	// The zookeeper C module logs quite a bit of useful information,
	// but much of it does not come back in the error API. To aid
	// debugging, enable the log to stderr for warnings.

	maxConcurrency := 64
	if x != "" {
		var err error
		maxConcurrency, err = strconv.Atoi(x)
		if err != nil {
			log.Infof("invalid ZK_CLIENT_MAX_CONCURRENCY: %v", err)

	sem = sync2.NewSemaphore(maxConcurrency, 0)
예제 #2
// NewResourceConstraint creates a ResourceConstraint with
// max concurrency.
func NewResourceConstraint(max int) *ResourceConstraint {
	return &ResourceConstraint{semaphore: sync2.NewSemaphore(max, 0)}
예제 #3
func (vsdw *VerticalSplitDiffWorker) diff() error {

	vsdw.wr.Logger().Infof("Gathering schema information...")
	wg := sync.WaitGroup{}
	rec := concurrency.AllErrorRecorder{}
	go func() {
		var err error
		vsdw.destinationSchemaDefinition, err = vsdw.wr.GetSchema(vsdw.destinationAlias, nil, nil, false)
		vsdw.wr.Logger().Infof("Got schema from destination %v", vsdw.destinationAlias)
	go func() {
		var err error
		vsdw.sourceSchemaDefinition, err = vsdw.wr.GetSchema(vsdw.sourceAlias, nil, nil, false)
		vsdw.wr.Logger().Infof("Got schema from source %v", vsdw.sourceAlias)
	if rec.HasErrors() {
		return rec.Error()

	// Build a list of regexp to exclude tables from source schema
	tableRegexps := make([]*regexp.Regexp, len(vsdw.shardInfo.SourceShards[0].Tables))
	for i, table := range vsdw.shardInfo.SourceShards[0].Tables {
		var err error
		tableRegexps[i], err = regexp.Compile(table)
		if err != nil {
			return fmt.Errorf("cannot compile regexp %v for table: %v", table, err)

	// Remove the tables we don't need from the source schema
	newSourceTableDefinitions := make([]*myproto.TableDefinition, 0, len(vsdw.destinationSchemaDefinition.TableDefinitions))
	for _, tableDefinition := range vsdw.sourceSchemaDefinition.TableDefinitions {
		found := false
		for _, tableRegexp := range tableRegexps {
			if tableRegexp.MatchString(tableDefinition.Name) {
				found = true
		if !found {
			vsdw.wr.Logger().Infof("Removing table %v from source schema", tableDefinition.Name)
		newSourceTableDefinitions = append(newSourceTableDefinitions, tableDefinition)
	vsdw.sourceSchemaDefinition.TableDefinitions = newSourceTableDefinitions

	// Check the schema
	vsdw.wr.Logger().Infof("Diffing the schema...")
	rec = concurrency.AllErrorRecorder{}
	myproto.DiffSchema("destination", vsdw.destinationSchemaDefinition, "source", vsdw.sourceSchemaDefinition, &rec)
	if rec.HasErrors() {
		vsdw.wr.Logger().Warningf("Different schemas: %v", rec.Error())
	} else {
		vsdw.wr.Logger().Infof("Schema match, good.")

	// run the diffs, 8 at a time
	vsdw.wr.Logger().Infof("Running the diffs...")
	sem := sync2.NewSemaphore(8, 0)
	for _, tableDefinition := range vsdw.destinationSchemaDefinition.TableDefinitions {
		go func(tableDefinition *myproto.TableDefinition) {
			defer wg.Done()
			defer sem.Release()

			vsdw.wr.Logger().Infof("Starting the diff on table %v", tableDefinition.Name)
			sourceQueryResultReader, err := TableScan(vsdw.wr.Logger(), vsdw.wr.TopoServer(), vsdw.sourceAlias, tableDefinition)
			if err != nil {
				vsdw.wr.Logger().Errorf("TableScan(source) failed: %v", err)
			defer sourceQueryResultReader.Close()

			destinationQueryResultReader, err := TableScan(vsdw.wr.Logger(), vsdw.wr.TopoServer(), vsdw.destinationAlias, tableDefinition)
			if err != nil {
				vsdw.wr.Logger().Errorf("TableScan(destination) failed: %v", err)
			defer destinationQueryResultReader.Close()

			differ, err := NewRowDiffer(sourceQueryResultReader, destinationQueryResultReader, tableDefinition)
			if err != nil {
				vsdw.wr.Logger().Errorf("NewRowDiffer() failed: %v", err)

			report, err := differ.Go(vsdw.wr.Logger())
			if err != nil {
				vsdw.wr.Logger().Errorf("Differ.Go failed: %v", err)
			} else {
				if report.HasDifferences() {
					vsdw.wr.Logger().Errorf("Table %v has differences: %v", tableDefinition.Name, report.String())
				} else {
					vsdw.wr.Logger().Infof("Table %v checks out (%v rows processed, %v qps)", tableDefinition.Name, report.processedRows, report.processingQPS)

	return nil
예제 #4
func (sdw *SplitDiffWorker) diff() error {

	sdw.wr.Logger().Infof("Gathering schema information...")
	sdw.sourceSchemaDefinitions = make([]*myproto.SchemaDefinition, len(sdw.sourceAliases))
	wg := sync.WaitGroup{}
	rec := concurrency.AllErrorRecorder{}
	go func() {
		var err error
		sdw.destinationSchemaDefinition, err = sdw.wr.GetSchema(sdw.destinationAlias, nil, nil, false)
		sdw.wr.Logger().Infof("Got schema from destination %v", sdw.destinationAlias)
	for i, sourceAlias := range sdw.sourceAliases {
		go func(i int, sourceAlias topo.TabletAlias) {
			var err error
			sdw.sourceSchemaDefinitions[i], err = sdw.wr.GetSchema(sourceAlias, nil, nil, false)
			sdw.wr.Logger().Infof("Got schema from source[%v] %v", i, sourceAlias)
		}(i, sourceAlias)
	if rec.HasErrors() {
		return rec.Error()

	// TODO(alainjobart) Checking against each source may be
	// overkill, if all sources have the same schema?
	sdw.wr.Logger().Infof("Diffing the schema...")
	rec = concurrency.AllErrorRecorder{}
	for i, sourceSchemaDefinition := range sdw.sourceSchemaDefinitions {
		sourceName := fmt.Sprintf("source[%v]", i)
		myproto.DiffSchema("destination", sdw.destinationSchemaDefinition, sourceName, sourceSchemaDefinition, &rec)
	if rec.HasErrors() {
		sdw.wr.Logger().Warningf("Different schemas: %v", rec.Error().Error())
	} else {
		sdw.wr.Logger().Infof("Schema match, good.")

	// run the diffs, 8 at a time
	sdw.wr.Logger().Infof("Running the diffs...")
	sem := sync2.NewSemaphore(8, 0)
	for _, tableDefinition := range sdw.destinationSchemaDefinition.TableDefinitions {
		go func(tableDefinition *myproto.TableDefinition) {
			defer wg.Done()
			defer sem.Release()

			sdw.wr.Logger().Infof("Starting the diff on table %v", tableDefinition.Name)
			if len(sdw.sourceAliases) != 1 {
				sdw.wr.Logger().Errorf("Don't support more than one source for table yet: %v", tableDefinition.Name)

			overlap, err := key.KeyRangesOverlap(sdw.shardInfo.KeyRange, sdw.shardInfo.SourceShards[0].KeyRange)
			if err != nil {
				sdw.wr.Logger().Errorf("Source shard doesn't overlap with destination????: %v", err)
			sourceQueryResultReader, err := TableScanByKeyRange(sdw.wr.Logger(), sdw.wr.TopoServer(), sdw.sourceAliases[0], tableDefinition, overlap, sdw.keyspaceInfo.ShardingColumnType)
			if err != nil {
				sdw.wr.Logger().Errorf("TableScanByKeyRange(source) failed: %v", err)
			defer sourceQueryResultReader.Close()

			destinationQueryResultReader, err := TableScanByKeyRange(sdw.wr.Logger(), sdw.wr.TopoServer(), sdw.destinationAlias, tableDefinition, key.KeyRange{}, sdw.keyspaceInfo.ShardingColumnType)
			if err != nil {
				sdw.wr.Logger().Errorf("TableScanByKeyRange(destination) failed: %v", err)
			defer destinationQueryResultReader.Close()

			differ, err := NewRowDiffer(sourceQueryResultReader, destinationQueryResultReader, tableDefinition)
			if err != nil {
				sdw.wr.Logger().Errorf("NewRowDiffer() failed: %v", err)

			report, err := differ.Go(sdw.wr.Logger())
			if err != nil {
				sdw.wr.Logger().Errorf("Differ.Go failed: %v", err.Error())
			} else {
				if report.HasDifferences() {
					sdw.wr.Logger().Warningf("Table %v has differences: %v", tableDefinition.Name, report.String())
				} else {
					sdw.wr.Logger().Infof("Table %v checks out (%v rows processed, %v qps)", tableDefinition.Name, report.processedRows, report.processingQPS)

	return nil
예제 #5
// copy phase:
//	- copy the data from source tablets to destination masters (wtih replication on)
// Assumes that the schema has already been created on each destination tablet
// (probably from vtctl's CopySchemaShard)
func (scw *SplitCloneWorker) copy() error {

	// get source schema from the first shard
	// TODO(alainjobart): for now, we assume the schema is compatible
	// on all source shards. Furthermore, we estimate the number of rows
	// in each source shard for each table to be about the same
	// (rowCount is used to estimate an ETA)
	sourceSchemaDefinition, err := scw.wr.GetSchema(scw.sourceAliases[0], nil, scw.excludeTables, true)
	if err != nil {
		return fmt.Errorf("cannot get schema from source %v: %v", scw.sourceAliases[0], err)
	if len(sourceSchemaDefinition.TableDefinitions) == 0 {
		return fmt.Errorf("no tables matching the table filter in tablet %v", scw.sourceAliases[0])
	scw.wr.Logger().Infof("Source tablet 0 has %v tables to copy", len(sourceSchemaDefinition.TableDefinitions))
	scw.tableStatus = make([]*tableStatus, len(sourceSchemaDefinition.TableDefinitions))
	for i, td := range sourceSchemaDefinition.TableDefinitions {
		scw.tableStatus[i] = &tableStatus{
			name:     td.Name,
			rowCount: td.RowCount * uint64(len(scw.sourceAliases)),
	scw.startTime = time.Now()

	// Find the column index for the sharding columns in all the databases, and count rows
	columnIndexes := make([]int, len(sourceSchemaDefinition.TableDefinitions))
	for tableIndex, td := range sourceSchemaDefinition.TableDefinitions {
		if td.Type == myproto.TABLE_BASE_TABLE {
			// find the column to split on
			columnIndexes[tableIndex] = -1
			for i, name := range td.Columns {
				if name == scw.keyspaceInfo.ShardingColumnName {
					columnIndexes[tableIndex] = i
			if columnIndexes[tableIndex] == -1 {
				return fmt.Errorf("table %v doesn't have a column named '%v'", td.Name, scw.keyspaceInfo.ShardingColumnName)

			scw.tableStatus[tableIndex].rowCount = td.RowCount
		} else {
			scw.tableStatus[tableIndex].isView = true

	// In parallel, setup the channels to send SQL data chunks to for each destination tablet:
	// mu protects the abort channel for closing, and firstError
	mu := sync.Mutex{}
	abort := make(chan struct{})
	var firstError error

	processError := func(format string, args ...interface{}) {
		scw.wr.Logger().Errorf(format, args...)
		if abort != nil {
			abort = nil
			firstError = fmt.Errorf(format, args...)

	// since we're writing only to masters, we need to enable bin logs so that replication happens
	disableBinLogs := false

	insertChannels := make([][]chan string, len(scw.destinationShards))
	destinationWaitGroup := sync.WaitGroup{}
	for shardIndex, _ := range scw.destinationShards {
		insertChannels[shardIndex] = make([]chan string, len(scw.destinationAliases[shardIndex]))
		for i, tabletAlias := range scw.destinationAliases[shardIndex] {
			// we create one channel per destination tablet.  It
			// is sized to have a buffer of a maximum of
			// destinationWriterCount * 2 items, to hopefully
			// always have data. We then have
			// destinationWriterCount go routines reading from it.
			insertChannels[shardIndex][i] = make(chan string, scw.destinationWriterCount*2)

			go func(ti *topo.TabletInfo, insertChannel chan string) {
				for j := 0; j < scw.destinationWriterCount; j++ {
					go func() {
						defer destinationWaitGroup.Done()
						if err := executeFetchLoop(scw.wr, ti, insertChannel, abort, disableBinLogs); err != nil {
							processError("executeFetchLoop failed: %v", err)
			}(scw.destinationTablets[shardIndex][tabletAlias], insertChannels[shardIndex][i])

	// Now for each table, read data chunks and send them to all
	// insertChannels
	sourceWaitGroup := sync.WaitGroup{}
	for shardIndex, _ := range scw.sourceShards {
		sema := sync2.NewSemaphore(scw.sourceReaderCount, 0)
		for tableIndex, td := range sourceSchemaDefinition.TableDefinitions {
			if td.Type == myproto.TABLE_VIEW {

			rowSplitter := NewRowSplitter(scw.destinationShards, scw.keyspaceInfo.ShardingColumnType, columnIndexes[tableIndex])

			chunks, err := findChunks(scw.wr, scw.sourceTablets[shardIndex], td, scw.minTableSizeForSplit, scw.sourceReaderCount)
			if err != nil {
				return err
			scw.tableStatus[tableIndex].setThreadCount(len(chunks) - 1)

			for chunkIndex := 0; chunkIndex < len(chunks)-1; chunkIndex++ {
				go func(td *myproto.TableDefinition, tableIndex, chunkIndex int) {
					defer sourceWaitGroup.Done()

					defer sema.Release()


					// build the query, and start the streaming
					selectSQL := buildSQLFromChunks(scw.wr, td, chunks, chunkIndex, scw.sourceAliases[shardIndex].String())
					qrr, err := NewQueryResultReaderForTablet(scw.wr.TopoServer(), scw.sourceAliases[shardIndex], selectSQL)
					if err != nil {
						processError("NewQueryResultReaderForTablet failed: %v", err)
					defer qrr.Close()

					// process the data
					if err := scw.processData(td, tableIndex, qrr, rowSplitter, insertChannels, scw.destinationPackCount, abort); err != nil {
						processError("processData failed: %v", err)
				}(td, tableIndex, chunkIndex)

	for shardIndex, _ := range scw.destinationShards {
		for _, c := range insertChannels[shardIndex] {
	if firstError != nil {
		return firstError

	// then create and populate the blp_checkpoint table
	if scw.strategy.PopulateBlpCheckpoint {
		queries := make([]string, 0, 4)
		queries = append(queries, binlogplayer.CreateBlpCheckpoint()...)
		flags := ""
		if scw.strategy.DontStartBinlogPlayer {
			flags = binlogplayer.BLP_FLAG_DONT_START

		// get the current position from the sources
		for shardIndex, _ := range scw.sourceShards {
			ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
			status, err := scw.wr.TabletManagerClient().SlaveStatus(ctx, scw.sourceTablets[shardIndex])
			if err != nil {
				return err

			queries = append(queries, binlogplayer.PopulateBlpCheckpoint(0, status.Position, time.Now().Unix(), flags))

		for shardIndex, _ := range scw.destinationShards {
			for _, tabletAlias := range scw.destinationAliases[shardIndex] {
				go func(ti *topo.TabletInfo) {
					defer destinationWaitGroup.Done()
					scw.wr.Logger().Infof("Making and populating blp_checkpoint table on tablet %v", ti.Alias)
					if err := runSqlCommands(scw.wr, ti, queries, abort, disableBinLogs); err != nil {
						processError("blp_checkpoint queries failed on tablet %v: %v", ti.Alias, err)
		if firstError != nil {
			return firstError

	// Now we're done with data copy, update the shard's source info.
	// TODO(alainjobart) this is a superset, some shards may not
	// overlap, have to deal with this better (for N -> M splits
	// where both N>1 and M>1)
	if scw.strategy.SkipSetSourceShards {
		scw.wr.Logger().Infof("Skipping setting SourceShard on destination shards.")
	} else {
		for _, si := range scw.destinationShards {
			scw.wr.Logger().Infof("Setting SourceShard on shard %v/%v", si.Keyspace(), si.ShardName())
			if err := scw.wr.SetSourceShards(si.Keyspace(), si.ShardName(), scw.sourceAliases, nil); err != nil {
				return fmt.Errorf("Failed to set source shards: %v", err)

	// And force a schema reload on all destination tablets.
	// The master tablet will end up starting filtered replication
	// at this point.
	for shardIndex, _ := range scw.destinationShards {
		for _, tabletAlias := range scw.reloadAliases[shardIndex] {
			go func(ti *topo.TabletInfo) {
				defer destinationWaitGroup.Done()
				scw.wr.Logger().Infof("Reloading schema on tablet %v", ti.Alias)
				ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
				if err := scw.wr.TabletManagerClient().ReloadSchema(ctx, ti); err != nil {
					processError("ReloadSchema failed on tablet %v: %v", ti.Alias, err)
	return firstError
예제 #6
// copy phase:
//	- copy the data from source tablets to destination masters (wtih replication on)
// Assumes that the schema has already been created on each destination tablet
// (probably from vtctl's CopySchemaShard)
func (vscw *VerticalSplitCloneWorker) copy() error {

	// get source schema
	sourceSchemaDefinition, err := vscw.wr.GetSchema(vscw.sourceAlias, vscw.tables, nil, true)
	if err != nil {
		return fmt.Errorf("cannot get schema from source %v: %v", vscw.sourceAlias, err)
	if len(sourceSchemaDefinition.TableDefinitions) == 0 {
		return fmt.Errorf("no tables matching the table filter")
	vscw.wr.Logger().Infof("Source tablet has %v tables to copy", len(sourceSchemaDefinition.TableDefinitions))
	vscw.tableStatus = make([]*tableStatus, len(sourceSchemaDefinition.TableDefinitions))
	for i, td := range sourceSchemaDefinition.TableDefinitions {
		vscw.tableStatus[i] = &tableStatus{
			name:     td.Name,
			rowCount: td.RowCount,
	vscw.startTime = time.Now()

	// Count rows
	for i, td := range sourceSchemaDefinition.TableDefinitions {
		if td.Type == myproto.TABLE_BASE_TABLE {
			vscw.tableStatus[i].rowCount = td.RowCount
		} else {
			vscw.tableStatus[i].isView = true

	// In parallel, setup the channels to send SQL data chunks to for each destination tablet.
	// mu protects the abort channel for closing, and firstError
	mu := sync.Mutex{}
	abort := make(chan struct{})
	var firstError error

	processError := func(format string, args ...interface{}) {
		vscw.wr.Logger().Errorf(format, args...)
		if abort != nil {
			abort = nil
			firstError = fmt.Errorf(format, args...)

	// since we're writing only to masters, we need to enable bin logs so that replication happens
	disableBinLogs := false

	insertChannels := make([]chan string, len(vscw.destinationAliases))
	destinationWaitGroup := sync.WaitGroup{}
	for i, tabletAlias := range vscw.destinationAliases {
		// we create one channel per destination tablet.  It
		// is sized to have a buffer of a maximum of
		// destinationWriterCount * 2 items, to hopefully
		// always have data. We then have
		// destinationWriterCount go routines reading from it.
		insertChannels[i] = make(chan string, vscw.destinationWriterCount*2)

		go func(ti *topo.TabletInfo, insertChannel chan string) {
			for j := 0; j < vscw.destinationWriterCount; j++ {
				go func() {
					defer destinationWaitGroup.Done()

					if err := executeFetchLoop(vscw.wr, ti, insertChannel, abort, disableBinLogs); err != nil {
						processError("executeFetchLoop failed: %v", err)
		}(vscw.destinationTablets[tabletAlias], insertChannels[i])

	// Now for each table, read data chunks and send them to all
	// insertChannels
	sourceWaitGroup := sync.WaitGroup{}
	sema := sync2.NewSemaphore(vscw.sourceReaderCount, 0)
	for tableIndex, td := range sourceSchemaDefinition.TableDefinitions {
		if td.Type == myproto.TABLE_VIEW {

		chunks, err := findChunks(vscw.wr, vscw.sourceTablet, td, vscw.minTableSizeForSplit, vscw.sourceReaderCount)
		if err != nil {
			return err
		vscw.tableStatus[tableIndex].setThreadCount(len(chunks) - 1)

		for chunkIndex := 0; chunkIndex < len(chunks)-1; chunkIndex++ {
			go func(td *myproto.TableDefinition, tableIndex, chunkIndex int) {
				defer sourceWaitGroup.Done()

				defer sema.Release()


				// build the query, and start the streaming
				selectSQL := buildSQLFromChunks(vscw.wr, td, chunks, chunkIndex, vscw.sourceAlias.String())
				qrr, err := NewQueryResultReaderForTablet(vscw.wr.TopoServer(), vscw.sourceAlias, selectSQL)
				if err != nil {
					processError("NewQueryResultReaderForTablet failed: %v", err)
				defer qrr.Close()

				// process the data
				if err := vscw.processData(td, tableIndex, qrr, insertChannels, vscw.destinationPackCount, abort); err != nil {
					processError("QueryResultReader failed: %v", err)
			}(td, tableIndex, chunkIndex)

	for _, c := range insertChannels {
	if firstError != nil {
		return firstError

	// then create and populate the blp_checkpoint table
	if vscw.strategy.PopulateBlpCheckpoint {
		// get the current position from the source
		ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
		status, err := vscw.wr.TabletManagerClient().SlaveStatus(ctx, vscw.sourceTablet)
		if err != nil {
			return err

		queries := make([]string, 0, 4)
		queries = append(queries, binlogplayer.CreateBlpCheckpoint()...)
		flags := ""
		if vscw.strategy.DontStartBinlogPlayer {
			flags = binlogplayer.BLP_FLAG_DONT_START
		queries = append(queries, binlogplayer.PopulateBlpCheckpoint(0, status.Position, time.Now().Unix(), flags))
		for _, tabletAlias := range vscw.destinationAliases {
			go func(ti *topo.TabletInfo) {
				defer destinationWaitGroup.Done()
				vscw.wr.Logger().Infof("Making and populating blp_checkpoint table on tablet %v", ti.Alias)
				if err := runSqlCommands(vscw.wr, ti, queries, abort, disableBinLogs); err != nil {
					processError("blp_checkpoint queries failed on tablet %v: %v", ti.Alias, err)
		if firstError != nil {
			return firstError

	// Now we're done with data copy, update the shard's source info.
	if vscw.strategy.SkipSetSourceShards {
		vscw.wr.Logger().Infof("Skipping setting SourceShard on destination shard.")
	} else {
		vscw.wr.Logger().Infof("Setting SourceShard on shard %v/%v", vscw.destinationKeyspace, vscw.destinationShard)
		if err := vscw.wr.SetSourceShards(vscw.destinationKeyspace, vscw.destinationShard, []topo.TabletAlias{vscw.sourceAlias}, vscw.tables); err != nil {
			return fmt.Errorf("Failed to set source shards: %v", err)

	// And force a schema reload on all destination tablets.
	// The master tablet will end up starting filtered replication
	// at this point.
	for _, tabletAlias := range vscw.reloadAliases {
		go func(ti *topo.TabletInfo) {
			defer destinationWaitGroup.Done()
			vscw.wr.Logger().Infof("Reloading schema on tablet %v", ti.Alias)
			ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
			if err := vscw.wr.TabletManagerClient().ReloadSchema(ctx, ti); err != nil {
				processError("ReloadSchema failed on tablet %v: %v", ti.Alias, err)
	return firstError