Beispiel #1
0
// CleanUp will run the recorded actions.
// If an action on a target fails, it will not run the next action on
// the same target.
// We return the aggregate errors for all cleanups.
// TODO(alainjobart) Actions should run concurrently on a per target
// basis. They are then serialized on each target.
func (cleaner *Cleaner) CleanUp(wr *Wrangler) error {
	actionMap := make(map[string]*cleanUpHelper)
	rec := concurrency.AllErrorRecorder{}
	cleaner.mu.Lock()
	for i := len(cleaner.actions) - 1; i >= 0; i-- {
		actionReference := cleaner.actions[i]
		helper, ok := actionMap[actionReference.target]
		if !ok {
			helper = &cleanUpHelper{
				err: nil,
			}
			actionMap[actionReference.target] = helper
		}
		if helper.err != nil {
			wr.Logger().Warningf("previous action failed on target %v, no running %v", actionReference.target, actionReference.name)
			continue
		}
		err := actionReference.action.CleanUp(wr)
		if err != nil {
			helper.err = err
			rec.RecordError(err)
			wr.Logger().Errorf("action %v failed on %v: %v", actionReference.name, actionReference.target, err)
		} else {
			wr.Logger().Infof("action %v successful on %v", actionReference.name, actionReference.target)
		}
	}
	cleaner.mu.Unlock()
	return rec.Error()
}
Beispiel #2
0
func DiffSchemaToArray(leftName string, left *SchemaDefinition, rightName string, right *SchemaDefinition) (result []string) {
	er := concurrency.AllErrorRecorder{}
	DiffSchema(leftName, left, rightName, right, &er)
	if er.HasErrors() {
		return er.ErrorStrings()
	} else {
		return nil
	}
}
Beispiel #3
0
func DiffPermissionsToArray(leftName string, left *Permissions, rightName string, right *Permissions) (result []string) {
	er := concurrency.AllErrorRecorder{}
	DiffPermissions(leftName, left, rightName, right, &er)
	if er.HasErrors() {
		return er.ErrorStrings()
	} else {
		return nil
	}
}
Beispiel #4
0
// CopyKeyspaces will create the keyspaces in the destination topo
func CopyKeyspaces(fromTS, toTS topo.Server) {
	keyspaces, err := fromTS.GetKeyspaces()
	if err != nil {
		log.Fatalf("GetKeyspaces: %v", err)
	}

	wg := sync.WaitGroup{}
	rec := concurrency.AllErrorRecorder{}
	for _, keyspace := range keyspaces {
		wg.Add(1)
		go func(keyspace string) {
			defer wg.Done()

			k, err := fromTS.GetKeyspace(keyspace)
			if err != nil {
				rec.RecordError(fmt.Errorf("GetKeyspace(%v): %v", keyspace, err))
				return
			}

			if err := toTS.CreateKeyspace(keyspace, k.Keyspace); err != nil {
				if err == topo.ErrNodeExists {
					log.Warningf("keyspace %v already exists", keyspace)
				} else {
					rec.RecordError(fmt.Errorf("CreateKeyspace(%v): %v", keyspace, err))
				}
			}
		}(keyspace)
	}
	wg.Wait()
	if rec.HasErrors() {
		log.Fatalf("copyKeyspaces failed: %v", rec.Error())
	}
}
Beispiel #5
0
// shardsWithSources returns all the shards that have SourceShards set
// with no Tables list.
func shardsWithSources(wr *wrangler.Wrangler) ([]map[string]string, error) {
	keyspaces, err := wr.TopoServer().GetKeyspaces()
	if err != nil {
		return nil, err
	}

	wg := sync.WaitGroup{}
	mu := sync.Mutex{} // protects result
	result := make([]map[string]string, 0, len(keyspaces))
	rec := concurrency.AllErrorRecorder{}
	for _, keyspace := range keyspaces {
		wg.Add(1)
		go func(keyspace string) {
			defer wg.Done()
			shards, err := wr.TopoServer().GetShardNames(keyspace)
			if err != nil {
				rec.RecordError(err)
				return
			}
			for _, shard := range shards {
				wg.Add(1)
				go func(keyspace, shard string) {
					defer wg.Done()
					si, err := wr.TopoServer().GetShard(keyspace, shard)
					if err != nil {
						rec.RecordError(err)
						return
					}

					if len(si.SourceShards) > 0 && len(si.SourceShards[0].Tables) == 0 {
						mu.Lock()
						result = append(result, map[string]string{
							"Keyspace": keyspace,
							"Shard":    shard,
						})
						mu.Unlock()
					}
				}(keyspace, shard)
			}
		}(keyspace)
	}
	wg.Wait()

	if rec.HasErrors() {
		return nil, rec.Error()
	}
	if len(result) == 0 {
		return nil, fmt.Errorf("There are no shards with SourceShards")
	}
	return result, nil
}
Beispiel #6
0
func (wr *Wrangler) getMastersPosition(shards []*topo.ShardInfo) (map[*topo.ShardInfo]myproto.ReplicationPosition, error) {
	mu := sync.Mutex{}
	result := make(map[*topo.ShardInfo]myproto.ReplicationPosition)

	wg := sync.WaitGroup{}
	rec := concurrency.AllErrorRecorder{}
	for _, si := range shards {
		wg.Add(1)
		go func(si *topo.ShardInfo) {
			defer wg.Done()
			wr.Logger().Infof("Gathering master position for %v", si.MasterAlias)
			ti, err := wr.ts.GetTablet(si.MasterAlias)
			if err != nil {
				rec.RecordError(err)
				return
			}

			pos, err := wr.tmc.MasterPosition(wr.ctx, ti)
			if err != nil {
				rec.RecordError(err)
				return
			}

			wr.Logger().Infof("Got master position for %v", si.MasterAlias)
			mu.Lock()
			result[si] = pos
			mu.Unlock()
		}(si)
	}
	wg.Wait()
	return result, rec.Error()
}
Beispiel #7
0
// RunUntil will run all the players until they reach the given position.
// Holds the map lock during that exercise, shouldn't take long at all.
func (blm *BinlogPlayerMap) RunUntil(blpPositionList *blproto.BlpPositionList, waitTimeout time.Duration) error {
	// lock and check state
	blm.mu.Lock()
	defer blm.mu.Unlock()
	if blm.state != BPM_STATE_STOPPED {
		return fmt.Errorf("RunUntil: player not stopped: %v", blm.state)
	}
	log.Infof("Starting map of binlog players until position")

	// find the exact stop position for all players, to be sure
	// we're not doing anything wrong
	posMap := make(map[uint32]myproto.ReplicationPosition)
	for _, bpc := range blm.players {
		blpPos, err := blpPositionList.FindBlpPositionById(bpc.sourceShard.Uid)
		if err != nil {
			return fmt.Errorf("No binlog position passed in for player Uid %v", bpc.sourceShard.Uid)
		}
		posMap[bpc.sourceShard.Uid] = blpPos.Position
	}

	// start all the players giving them where to stop
	for _, bpc := range blm.players {
		if err := bpc.StartUntil(posMap[bpc.sourceShard.Uid]); err != nil {
			return err
		}
	}

	// wait for all players to be stopped, or timeout
	wg := sync.WaitGroup{}
	rec := concurrency.AllErrorRecorder{}
	for _, bpc := range blm.players {
		wg.Add(1)
		go func(bpc *BinlogPlayerController) {
			if err := bpc.WaitForStop(waitTimeout); err != nil {
				rec.RecordError(err)
			}
			wg.Done()
		}(bpc)
	}
	wg.Wait()

	return rec.Error()
}
Beispiel #8
0
func (wr *Wrangler) ValidateVersionKeyspace(keyspace string) error {
	// find all the shards
	shards, err := wr.ts.GetShardNames(keyspace)
	if err != nil {
		return err
	}

	// corner cases
	if len(shards) == 0 {
		return fmt.Errorf("No shards in keyspace %v", keyspace)
	}
	sort.Strings(shards)
	if len(shards) == 1 {
		return wr.ValidateVersionShard(keyspace, shards[0])
	}

	// find the reference version using the first shard's master
	si, err := wr.ts.GetShard(keyspace, shards[0])
	if err != nil {
		return err
	}
	if si.MasterAlias.Uid == topo.NO_TABLET {
		return fmt.Errorf("No master in shard %v/%v", keyspace, shards[0])
	}
	referenceAlias := si.MasterAlias
	log.Infof("Gathering version for reference master %v", referenceAlias)
	referenceVersion, err := wr.GetVersion(referenceAlias)
	if err != nil {
		return err
	}

	// then diff with all tablets but master 0
	er := concurrency.AllErrorRecorder{}
	wg := sync.WaitGroup{}
	for _, shard := range shards {
		aliases, err := topo.FindAllTabletAliasesInShard(context.TODO(), wr.ts, keyspace, shard)
		if err != nil {
			er.RecordError(err)
			continue
		}

		for _, alias := range aliases {
			if alias == si.MasterAlias {
				continue
			}

			wg.Add(1)
			go wr.diffVersion(referenceVersion, referenceAlias, alias, &wg, &er)
		}
	}
	wg.Wait()
	if er.HasErrors() {
		return fmt.Errorf("Version diffs:\n%v", er.Error().Error())
	}
	return nil
}
Beispiel #9
0
func (wr *Wrangler) ValidateVersionShard(keyspace, shard string) error {
	si, err := wr.ts.GetShard(keyspace, shard)
	if err != nil {
		return err
	}

	// get version from the master, or error
	if si.MasterAlias.Uid == topo.NO_TABLET {
		return fmt.Errorf("No master in shard %v/%v", keyspace, shard)
	}
	log.Infof("Gathering version for master %v", si.MasterAlias)
	masterVersion, err := wr.GetVersion(si.MasterAlias)
	if err != nil {
		return err
	}

	// read all the aliases in the shard, that is all tablets that are
	// replicating from the master
	aliases, err := topo.FindAllTabletAliasesInShard(context.TODO(), wr.ts, keyspace, shard)
	if err != nil {
		return err
	}

	// then diff with all slaves
	er := concurrency.AllErrorRecorder{}
	wg := sync.WaitGroup{}
	for _, alias := range aliases {
		if alias == si.MasterAlias {
			continue
		}

		wg.Add(1)
		go wr.diffVersion(masterVersion, si.MasterAlias, alias, &wg, &er)
	}
	wg.Wait()
	if er.HasErrors() {
		return fmt.Errorf("Version diffs:\n%v", er.Error().Error())
	}
	return nil
}
Beispiel #10
0
// Run runs aggregates health statuses from all the reporters. If any
// errors occur during the reporting, they will be logged, but only
// the first error will be returned.
// It may return an empty map if no health condition is detected. Note
// it will not return nil, but an empty map.
func (ag *Aggregator) Run(tabletType topo.TabletType, shouldQueryServiceBeRunning bool) (map[string]string, error) {
	var (
		wg  sync.WaitGroup
		rec concurrency.AllErrorRecorder
	)

	results := make(chan map[string]string, len(ag.reporters))
	ag.mu.Lock()
	for name, rep := range ag.reporters {
		wg.Add(1)
		go func(name string, rep Reporter) {
			defer wg.Done()
			status, err := rep.Report(tabletType, shouldQueryServiceBeRunning)
			if err != nil {
				rec.RecordError(fmt.Errorf("%v: %v", name, err))
				return
			}
			results <- status
		}(name, rep)
	}
	ag.mu.Unlock()
	wg.Wait()
	close(results)
	if err := rec.Error(); err != nil {
		return nil, err
	}

	// merge and return the results
	result := make(map[string]string)
	for part := range results {
		for k, v := range part {
			if _, ok := result[k]; ok {
				return nil, fmt.Errorf("duplicate key: %v", k)
			}
			result[k] = v
		}
	}
	return result, nil
}
Beispiel #11
0
func keyspacesWithOverlappingShards(wr *wrangler.Wrangler) ([]map[string]string, error) {
	keyspaces, err := wr.TopoServer().GetKeyspaces()
	if err != nil {
		return nil, err
	}

	wg := sync.WaitGroup{}
	mu := sync.Mutex{} // protects result
	result := make([]map[string]string, 0, len(keyspaces))
	rec := concurrency.AllErrorRecorder{}
	for _, keyspace := range keyspaces {
		wg.Add(1)
		go func(keyspace string) {
			defer wg.Done()
			osList, err := topotools.FindOverlappingShards(wr.TopoServer(), keyspace)
			if err != nil {
				rec.RecordError(err)
				return
			}
			mu.Lock()
			for _, os := range osList {
				result = append(result, map[string]string{
					"Keyspace": os.Left[0].Keyspace(),
					"Shard":    os.Left[0].ShardName(),
				})
			}
			mu.Unlock()
		}(keyspace)
	}
	wg.Wait()

	if rec.HasErrors() {
		return nil, rec.Error()
	}
	if len(result) == 0 {
		return nil, fmt.Errorf("There are no keyspaces with overlapping shards")
	}
	return result, nil
}
Beispiel #12
0
func (wr *Wrangler) waitForFilteredReplication(sourcePositions map[*topo.ShardInfo]myproto.ReplicationPosition, destinationShards []*topo.ShardInfo) error {
	wg := sync.WaitGroup{}
	rec := concurrency.AllErrorRecorder{}
	for _, si := range destinationShards {
		wg.Add(1)
		go func(si *topo.ShardInfo) {
			defer wg.Done()
			for _, sourceShard := range si.SourceShards {
				// we're waiting on this guy
				blpPosition := blproto.BlpPosition{
					Uid: sourceShard.Uid,
				}

				// find the position it should be at
				for s, pos := range sourcePositions {
					if s.Keyspace() == sourceShard.Keyspace && s.ShardName() == sourceShard.Shard {
						blpPosition.Position = pos
					}
				}

				// and wait for it
				wr.Logger().Infof("Waiting for %v to catch up", si.MasterAlias)
				tablet, err := wr.ts.GetTablet(si.MasterAlias)
				if err != nil {
					rec.RecordError(err)
					return
				}

				if err := wr.tmc.WaitBlpPosition(context.TODO(), tablet, blpPosition, wr.ActionTimeout()); err != nil {
					rec.RecordError(err)
				} else {
					wr.Logger().Infof("%v caught up", si.MasterAlias)
				}
			}
		}(si)
	}
	wg.Wait()
	return rec.Error()
}
// keyspacesWithServedFrom returns all the keyspaces that have ServedFrom set
// to one value.
func keyspacesWithServedFrom(wr *wrangler.Wrangler) ([]string, error) {
	keyspaces, err := wr.TopoServer().GetKeyspaces()
	if err != nil {
		return nil, err
	}

	wg := sync.WaitGroup{}
	mu := sync.Mutex{} // protects result
	result := make([]string, 0, len(keyspaces))
	rec := concurrency.AllErrorRecorder{}
	for _, keyspace := range keyspaces {
		wg.Add(1)
		go func(keyspace string) {
			defer wg.Done()
			ki, err := wr.TopoServer().GetKeyspace(keyspace)
			if err != nil {
				rec.RecordError(err)
				return
			}
			if len(ki.ServedFromMap) > 0 {
				mu.Lock()
				result = append(result, keyspace)
				mu.Unlock()
			}
		}(keyspace)
	}
	wg.Wait()

	if rec.HasErrors() {
		return nil, rec.Error()
	}
	if len(result) == 0 {
		return nil, fmt.Errorf("There are no keyspaces with ServedFrom")
	}
	return result, nil
}
Beispiel #14
0
// refreshMasters will just RPC-ping all the masters with RefreshState
func (wr *Wrangler) refreshMasters(shards []*topo.ShardInfo) error {
	wg := sync.WaitGroup{}
	rec := concurrency.AllErrorRecorder{}
	for _, si := range shards {
		wg.Add(1)
		go func(si *topo.ShardInfo) {
			defer wg.Done()
			wr.Logger().Infof("RefreshState master %v", si.MasterAlias)
			ti, err := wr.ts.GetTablet(si.MasterAlias)
			if err != nil {
				rec.RecordError(err)
				return
			}

			if err := wr.tmc.RefreshState(wr.ctx, ti); err != nil {
				rec.RecordError(err)
			} else {
				wr.Logger().Infof("%v responded", si.MasterAlias)
			}
		}(si)
	}
	wg.Wait()
	return rec.Error()
}
Beispiel #15
0
// DbServingGraph returns the ServingGraph for the given cell.
func DbServingGraph(ts topo.Server, cell string) (servingGraph *ServingGraph) {
	servingGraph = &ServingGraph{
		Cell:      cell,
		Keyspaces: make(map[string]*KeyspaceNodes),
	}
	rec := concurrency.AllErrorRecorder{}

	keyspaces, err := ts.GetSrvKeyspaceNames(cell)
	if err != nil {
		servingGraph.Errors = append(servingGraph.Errors, fmt.Sprintf("GetSrvKeyspaceNames failed: %v", err))
		return
	}
	wg := sync.WaitGroup{}
	servingTypes := []topo.TabletType{topo.TYPE_MASTER, topo.TYPE_REPLICA, topo.TYPE_RDONLY}
	for _, keyspace := range keyspaces {
		kn := newKeyspaceNodes()
		servingGraph.Keyspaces[keyspace] = kn
		wg.Add(1)
		go func(keyspace string, kn *KeyspaceNodes) {
			defer wg.Done()

			ks, err := ts.GetSrvKeyspace(cell, keyspace)
			if err != nil {
				rec.RecordError(fmt.Errorf("GetSrvKeyspace(%v, %v) failed: %v", cell, keyspace, err))
				return
			}
			kn.ServedFrom = ks.ServedFrom

			displayedShards := make(map[string]bool)
			for _, partitionTabletType := range servingTypes {
				kp, ok := ks.Partitions[partitionTabletType]
				if !ok {
					continue
				}
				for _, srvShard := range kp.Shards {
					shard := srvShard.ShardName()
					if displayedShards[shard] {
						continue
					}
					displayedShards[shard] = true

					sn := &ShardNodes{
						Name:        shard,
						TabletNodes: make(TabletNodesByType),
						ServedTypes: srvShard.ServedTypes,
					}
					kn.ShardNodes = append(kn.ShardNodes, sn)
					wg.Add(1)
					go func(shard string, sn *ShardNodes) {
						defer wg.Done()
						tabletTypes, err := ts.GetSrvTabletTypesPerShard(cell, keyspace, shard)
						if err != nil {
							rec.RecordError(fmt.Errorf("GetSrvTabletTypesPerShard(%v, %v, %v) failed: %v", cell, keyspace, shard, err))
							return
						}
						for _, tabletType := range tabletTypes {
							endPoints, err := ts.GetEndPoints(cell, keyspace, shard, tabletType)
							if err != nil {
								rec.RecordError(fmt.Errorf("GetEndPoints(%v, %v, %v, %v) failed: %v", cell, keyspace, shard, tabletType, err))
								continue
							}
							for _, endPoint := range endPoints.Entries {
								sn.TabletNodes[tabletType] = append(sn.TabletNodes[tabletType], newTabletNodeFromEndPoint(endPoint, cell))
							}
						}
					}(shard, sn)
				}
			}
		}(keyspace, kn)
	}
	wg.Wait()
	servingGraph.Errors = rec.ErrorStrings()
	return
}
Beispiel #16
0
// InitializeConnections pre-initializes all ShardConn which create underlying connections.
// It also populates topology cache by accessing it.
// It is not necessary to call this function before serving queries,
// but it would reduce connection overhead when serving.
func (stc *ScatterConn) InitializeConnections(ctx context.Context) error {
	ksNames, err := stc.toposerv.GetSrvKeyspaceNames(ctx, stc.cell)
	if err != nil {
		return err
	}
	var wg sync.WaitGroup
	var errRecorder concurrency.AllErrorRecorder
	for _, ksName := range ksNames {
		wg.Add(1)
		go func(keyspace string) {
			defer wg.Done()
			// get SrvKeyspace for cell/keyspace
			ks, err := stc.toposerv.GetSrvKeyspace(ctx, stc.cell, keyspace)
			if err != nil {
				errRecorder.RecordError(err)
				return
			}
			// work on all shards of all serving tablet types
			for _, tabletType := range ks.TabletTypes {
				ksPartition, ok := ks.Partitions[tabletType]
				if !ok {
					errRecorder.RecordError(fmt.Errorf("%v.%v is not in SrvKeyspace.Partitions", keyspace, string(tabletType)))
					continue
				}
				for _, shard := range ksPartition.Shards {
					wg.Add(1)
					go func(shardName string, tabletType topo.TabletType) {
						defer wg.Done()
						shardConn := stc.getConnection(ctx, keyspace, shardName, tabletType)
						err = shardConn.Dial(ctx)
						if err != nil {
							errRecorder.RecordError(err)
							return
						}
					}(shard.ShardName(), tabletType)
				}
			}
		}(ksName)
	}
	wg.Wait()
	if errRecorder.HasErrors() {
		return errRecorder.Error()
	}
	return nil
}
Beispiel #17
0
// ChangeType changes the type of the tablet and possibly also updates
// the health informaton for it. Make this external, since these
// transitions need to be forced from time to time.
//
// - if health is nil, we don't touch the Tablet's Health record.
// - if health is an empty map, we clear the Tablet's Health record.
// - if health has values, we overwrite the Tablet's Health record.
func ChangeType(ts topo.Server, tabletAlias topo.TabletAlias, newType topo.TabletType, health map[string]string, runHooks bool) error {
	tablet, err := ts.GetTablet(tabletAlias)
	if err != nil {
		return err
	}

	if !topo.IsTrivialTypeChange(tablet.Type, newType) || !topo.IsValidTypeChange(tablet.Type, newType) {
		return fmt.Errorf("cannot change tablet type %v -> %v %v", tablet.Type, newType, tabletAlias)
	}

	if runHooks {
		// Only run the preflight_serving_type hook when
		// transitioning from non-serving to serving.
		if !topo.IsInServingGraph(tablet.Type) && topo.IsInServingGraph(newType) {
			if err := hook.NewSimpleHook("preflight_serving_type").ExecuteOptional(); err != nil {
				return err
			}
		}
	}

	tablet.Type = newType
	if newType == topo.TYPE_IDLE {
		if tablet.Parent.IsZero() {
			si, err := ts.GetShard(tablet.Keyspace, tablet.Shard)
			if err != nil {
				return err
			}
			rec := concurrency.AllErrorRecorder{}
			wg := sync.WaitGroup{}
			for _, cell := range si.Cells {
				wg.Add(1)
				go func(cell string) {
					defer wg.Done()
					sri, err := ts.GetShardReplication(cell, tablet.Keyspace, tablet.Shard)
					if err != nil {
						log.Warningf("Cannot check cell %v for extra replication paths, assuming it's good", cell)
						return
					}
					for _, rl := range sri.ReplicationLinks {
						if rl.Parent == tabletAlias {
							rec.RecordError(fmt.Errorf("Still have a ReplicationLink in cell %v", cell))
						}
					}
				}(cell)
			}
			wg.Wait()
			if rec.HasErrors() {
				return rec.Error()
			}
		}
		tablet.Parent = topo.TabletAlias{}
		tablet.Keyspace = ""
		tablet.Shard = ""
		tablet.KeyRange = key.KeyRange{}
		tablet.Health = health
	}
	if health != nil {
		if len(health) == 0 {
			tablet.Health = nil
		} else {
			tablet.Health = health
		}
	}
	return topo.UpdateTablet(context.TODO(), ts, tablet)
}
Beispiel #18
0
// CopyTablets will create the tablets in the destination topo
func CopyTablets(fromTS, toTS topo.Server) {
	cells, err := fromTS.GetKnownCells()
	if err != nil {
		log.Fatalf("fromTS.GetKnownCells: %v", err)
	}

	wg := sync.WaitGroup{}
	rec := concurrency.AllErrorRecorder{}
	for _, cell := range cells {
		wg.Add(1)
		go func(cell string) {
			defer wg.Done()
			tabletAliases, err := fromTS.GetTabletsByCell(cell)
			if err != nil {
				rec.RecordError(fmt.Errorf("GetTabletsByCell(%v): %v", cell, err))
			} else {
				for _, tabletAlias := range tabletAliases {
					wg.Add(1)
					go func(tabletAlias topo.TabletAlias) {
						defer wg.Done()

						// read the source tablet
						ti, err := fromTS.GetTablet(tabletAlias)
						if err != nil {
							rec.RecordError(fmt.Errorf("GetTablet(%v): %v", tabletAlias, err))
							return
						}

						// try to create the destination
						err = toTS.CreateTablet(ti.Tablet)
						if err == topo.ErrNodeExists {
							// update the destination tablet
							log.Warningf("tablet %v already exists, updating it", tabletAlias)
							err = toTS.UpdateTabletFields(ti.Alias, func(t *topo.Tablet) error {
								*t = *ti.Tablet
								return nil
							})
						}
						if err != nil {
							rec.RecordError(fmt.Errorf("CreateTablet(%v): %v", tabletAlias, err))
							return
						}
					}(tabletAlias)
				}
			}
		}(cell)
	}
	wg.Wait()
	if rec.HasErrors() {
		log.Fatalf("copyTablets failed: %v", rec.Error())
	}
}
Beispiel #19
0
// Update shard file with new master, replicas, etc.
//
// Re-read from TopologyServer to make sure we are using the side
// effects of all actions.
//
// This function locks individual SvrShard paths, so it doesn't need a lock
// on the shard.
func RebuildShard(ctx context.Context, log logutil.Logger, ts topo.Server, keyspace, shard string, cells []string, timeout time.Duration, interrupted chan struct{}) (*topo.ShardInfo, error) {
	log.Infof("RebuildShard %v/%v", keyspace, shard)

	span := trace.NewSpanFromContext(ctx)
	span.StartLocal("topotools.RebuildShard")
	defer span.Finish()
	ctx = trace.NewContext(ctx, span)

	// read the existing shard info. It has to exist.
	shardInfo, err := ts.GetShard(keyspace, shard)
	if err != nil {
		return nil, err
	}

	// rebuild all cells in parallel
	wg := sync.WaitGroup{}
	rec := concurrency.AllErrorRecorder{}
	for _, cell := range shardInfo.Cells {
		// skip this cell if we shouldn't rebuild it
		if !topo.InCellList(cell, cells) {
			continue
		}

		// start with the master if it's in the current cell
		tabletsAsMap := make(map[topo.TabletAlias]bool)
		if shardInfo.MasterAlias.Cell == cell {
			tabletsAsMap[shardInfo.MasterAlias] = true
		}

		wg.Add(1)
		go func(cell string) {
			defer wg.Done()

			// Lock the SrvShard so we don't race with other rebuilds of the same
			// shard in the same cell (e.g. from our peer tablets).
			actionNode := actionnode.RebuildSrvShard()
			lockPath, err := actionNode.LockSrvShard(ctx, ts, cell, keyspace, shard, timeout, interrupted)
			if err != nil {
				rec.RecordError(err)
				return
			}

			// read the ShardReplication object to find tablets
			sri, err := ts.GetShardReplication(cell, keyspace, shard)
			if err != nil {
				rec.RecordError(fmt.Errorf("GetShardReplication(%v, %v, %v) failed: %v", cell, keyspace, shard, err))
				return
			}

			// add all relevant tablets to the map
			for _, rl := range sri.ReplicationLinks {
				tabletsAsMap[rl.TabletAlias] = true
				if rl.Parent.Cell == cell {
					tabletsAsMap[rl.Parent] = true
				}
			}

			// convert the map to a list
			aliases := make([]topo.TabletAlias, 0, len(tabletsAsMap))
			for a := range tabletsAsMap {
				aliases = append(aliases, a)
			}

			// read all the Tablet records
			tablets, err := topo.GetTabletMap(ctx, ts, aliases)
			switch err {
			case nil:
				// keep going, we're good
			case topo.ErrPartialResult:
				log.Warningf("Got ErrPartialResult from topo.GetTabletMap in cell %v, some tablets may not be added properly to serving graph", cell)
			default:
				rec.RecordError(fmt.Errorf("GetTabletMap in cell %v failed: %v", cell, err))
				return
			}

			// write the data we need to
			rebuildErr := rebuildCellSrvShard(ctx, log, ts, shardInfo, cell, tablets)

			// and unlock
			if err := actionNode.UnlockSrvShard(ctx, ts, cell, keyspace, shard, lockPath, rebuildErr); err != nil {
				rec.RecordError(err)
			}
		}(cell)
	}
	wg.Wait()

	return shardInfo, rec.Error()
}
Beispiel #20
0
// rebuildCellSrvShard computes and writes the serving graph data to a
// single cell
func rebuildCellSrvShard(ctx context.Context, log logutil.Logger, ts topo.Server, shardInfo *topo.ShardInfo, cell string, tablets map[topo.TabletAlias]*topo.TabletInfo) error {
	log.Infof("rebuildCellSrvShard %v/%v in cell %v", shardInfo.Keyspace(), shardInfo.ShardName(), cell)

	// Get all existing db types so they can be removed if nothing
	// had been edited.
	existingTabletTypes, err := ts.GetSrvTabletTypesPerShard(cell, shardInfo.Keyspace(), shardInfo.ShardName())
	if err != nil {
		if err != topo.ErrNoNode {
			return err
		}
	}

	// Update db type addresses in the serving graph
	//
	// locationAddrsMap is a map:
	//   key: tabletType
	//   value: EndPoints (list of server records)
	locationAddrsMap := make(map[topo.TabletType]*topo.EndPoints)
	for _, tablet := range tablets {
		if !tablet.IsInReplicationGraph() {
			// only valid case is a scrapped master in the
			// catastrophic reparent case
			if tablet.Parent.Uid != topo.NO_TABLET {
				log.Warningf("Tablet %v should not be in the replication graph, please investigate (it is being ignored in the rebuild)", tablet.Alias)
			}
			continue
		}

		// Check IsInServingGraph, we don't want to add tablets that
		// are not serving
		if !tablet.IsInServingGraph() {
			continue
		}

		// Check the Keyspace and Shard for the tablet are right
		if tablet.Keyspace != shardInfo.Keyspace() || tablet.Shard != shardInfo.ShardName() {
			return fmt.Errorf("CRITICAL: tablet %v is in replication graph for shard %v/%v but belongs to shard %v:%v", tablet.Alias, shardInfo.Keyspace(), shardInfo.ShardName(), tablet.Keyspace, tablet.Shard)
		}

		// Add the tablet to the list
		addrs, ok := locationAddrsMap[tablet.Type]
		if !ok {
			addrs = topo.NewEndPoints()
			locationAddrsMap[tablet.Type] = addrs
		}
		entry, err := tablet.Tablet.EndPoint()
		if err != nil {
			log.Warningf("EndPointForTablet failed for tablet %v: %v", tablet.Alias, err)
			continue
		}
		addrs.Entries = append(addrs.Entries, *entry)
	}

	// we're gonna parallelize a lot here:
	// - writing all the tabletTypes records
	// - removing the unused records
	// - writing SrvShard
	rec := concurrency.AllErrorRecorder{}
	wg := sync.WaitGroup{}

	// write all the EndPoints nodes everywhere we want them
	for tabletType, addrs := range locationAddrsMap {
		wg.Add(1)
		go func(tabletType topo.TabletType, addrs *topo.EndPoints) {
			log.Infof("saving serving graph for cell %v shard %v/%v tabletType %v", cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType)
			span := trace.NewSpanFromContext(ctx)
			span.StartClient("TopoServer.UpdateEndPoints")
			span.Annotate("tablet_type", string(tabletType))
			if err := ts.UpdateEndPoints(cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType, addrs); err != nil {
				rec.RecordError(fmt.Errorf("writing endpoints for cell %v shard %v/%v tabletType %v failed: %v", cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType, err))
			}
			span.Finish()
			wg.Done()
		}(tabletType, addrs)
	}

	// Delete any pre-existing paths that were not updated by this process.
	// That's the existingTabletTypes - locationAddrsMap
	for _, tabletType := range existingTabletTypes {
		if _, ok := locationAddrsMap[tabletType]; !ok {
			wg.Add(1)
			go func(tabletType topo.TabletType) {
				log.Infof("removing stale db type from serving graph: %v", tabletType)
				span := trace.NewSpanFromContext(ctx)
				span.StartClient("TopoServer.DeleteEndPoints")
				span.Annotate("tablet_type", string(tabletType))
				if err := ts.DeleteEndPoints(cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType); err != nil {
					log.Warningf("unable to remove stale db type %v from serving graph: %v", tabletType, err)
				}
				span.Finish()
				wg.Done()
			}(tabletType)
		}
	}

	// Update srvShard object
	wg.Add(1)
	go func() {
		log.Infof("updating shard serving graph in cell %v for %v/%v", cell, shardInfo.Keyspace(), shardInfo.ShardName())
		srvShard := &topo.SrvShard{
			Name:        shardInfo.ShardName(),
			KeyRange:    shardInfo.KeyRange,
			ServedTypes: shardInfo.GetServedTypesPerCell(cell),
			MasterCell:  shardInfo.MasterAlias.Cell,
			TabletTypes: make([]topo.TabletType, 0, len(locationAddrsMap)),
		}
		for tabletType := range locationAddrsMap {
			srvShard.TabletTypes = append(srvShard.TabletTypes, tabletType)
		}

		span := trace.NewSpanFromContext(ctx)
		span.StartClient("TopoServer.UpdateSrvShard")
		span.Annotate("keyspace", shardInfo.Keyspace())
		span.Annotate("shard", shardInfo.ShardName())
		span.Annotate("cell", cell)
		if err := ts.UpdateSrvShard(cell, shardInfo.Keyspace(), shardInfo.ShardName(), srvShard); err != nil {
			rec.RecordError(fmt.Errorf("writing serving data in cell %v for %v/%v failed: %v", cell, shardInfo.Keyspace(), shardInfo.ShardName(), err))
		}
		span.Finish()
		wg.Done()
	}()

	wg.Wait()
	return rec.Error()
}
Beispiel #21
0
// CopyShards will create the shards in the destination topo
func CopyShards(fromTS, toTS topo.Server, deleteKeyspaceShards bool) {
	keyspaces, err := fromTS.GetKeyspaces()
	if err != nil {
		log.Fatalf("fromTS.GetKeyspaces: %v", err)
	}

	wg := sync.WaitGroup{}
	rec := concurrency.AllErrorRecorder{}
	for _, keyspace := range keyspaces {
		wg.Add(1)
		go func(keyspace string) {
			defer wg.Done()
			shards, err := fromTS.GetShardNames(keyspace)
			if err != nil {
				rec.RecordError(fmt.Errorf("GetShardNames(%v): %v", keyspace, err))
				return
			}

			if deleteKeyspaceShards {
				if err := toTS.DeleteKeyspaceShards(keyspace); err != nil {
					rec.RecordError(fmt.Errorf("DeleteKeyspaceShards(%v): %v", keyspace, err))
					return
				}
			}

			for _, shard := range shards {
				wg.Add(1)
				go func(keyspace, shard string) {
					defer wg.Done()
					if err := topo.CreateShard(toTS, keyspace, shard); err != nil {
						if err == topo.ErrNodeExists {
							log.Warningf("shard %v/%v already exists", keyspace, shard)
						} else {
							rec.RecordError(fmt.Errorf("CreateShard(%v, %v): %v", keyspace, shard, err))
							return
						}
					}

					si, err := fromTS.GetShard(keyspace, shard)
					if err != nil {
						rec.RecordError(fmt.Errorf("GetShard(%v, %v): %v", keyspace, shard, err))
						return
					}

					toSi, err := toTS.GetShard(keyspace, shard)
					if err != nil {
						rec.RecordError(fmt.Errorf("toTS.GetShard(%v, %v): %v", keyspace, shard, err))
						return
					}

					if _, err := toTS.UpdateShard(si, toSi.Version()); err != nil {
						rec.RecordError(fmt.Errorf("UpdateShard(%v, %v): %v", keyspace, shard, err))
					}
				}(keyspace, shard)
			}
		}(keyspace)
	}
	wg.Wait()
	if rec.HasErrors() {
		log.Fatalf("copyShards failed: %v", rec.Error())
	}
}
func (vsdw *VerticalSplitDiffWorker) diff() error {
	vsdw.setState(stateVSDDiff)

	vsdw.wr.Logger().Infof("Gathering schema information...")
	wg := sync.WaitGroup{}
	rec := concurrency.AllErrorRecorder{}
	wg.Add(1)
	go func() {
		var err error
		vsdw.destinationSchemaDefinition, err = vsdw.wr.GetSchema(vsdw.destinationAlias, nil, nil, false)
		rec.RecordError(err)
		vsdw.wr.Logger().Infof("Got schema from destination %v", vsdw.destinationAlias)
		wg.Done()
	}()
	wg.Add(1)
	go func() {
		var err error
		vsdw.sourceSchemaDefinition, err = vsdw.wr.GetSchema(vsdw.sourceAlias, nil, nil, false)
		rec.RecordError(err)
		vsdw.wr.Logger().Infof("Got schema from source %v", vsdw.sourceAlias)
		wg.Done()
	}()
	wg.Wait()
	if rec.HasErrors() {
		return rec.Error()
	}

	// Build a list of regexp to exclude tables from source schema
	tableRegexps := make([]*regexp.Regexp, len(vsdw.shardInfo.SourceShards[0].Tables))
	for i, table := range vsdw.shardInfo.SourceShards[0].Tables {
		var err error
		tableRegexps[i], err = regexp.Compile(table)
		if err != nil {
			return fmt.Errorf("cannot compile regexp %v for table: %v", table, err)
		}
	}

	// Remove the tables we don't need from the source schema
	newSourceTableDefinitions := make([]*myproto.TableDefinition, 0, len(vsdw.destinationSchemaDefinition.TableDefinitions))
	for _, tableDefinition := range vsdw.sourceSchemaDefinition.TableDefinitions {
		found := false
		for _, tableRegexp := range tableRegexps {
			if tableRegexp.MatchString(tableDefinition.Name) {
				found = true
				break
			}
		}
		if !found {
			vsdw.wr.Logger().Infof("Removing table %v from source schema", tableDefinition.Name)
			continue
		}
		newSourceTableDefinitions = append(newSourceTableDefinitions, tableDefinition)
	}
	vsdw.sourceSchemaDefinition.TableDefinitions = newSourceTableDefinitions

	// Check the schema
	vsdw.wr.Logger().Infof("Diffing the schema...")
	rec = concurrency.AllErrorRecorder{}
	myproto.DiffSchema("destination", vsdw.destinationSchemaDefinition, "source", vsdw.sourceSchemaDefinition, &rec)
	if rec.HasErrors() {
		vsdw.wr.Logger().Warningf("Different schemas: %v", rec.Error())
	} else {
		vsdw.wr.Logger().Infof("Schema match, good.")
	}

	// run the diffs, 8 at a time
	vsdw.wr.Logger().Infof("Running the diffs...")
	sem := sync2.NewSemaphore(8, 0)
	for _, tableDefinition := range vsdw.destinationSchemaDefinition.TableDefinitions {
		wg.Add(1)
		go func(tableDefinition *myproto.TableDefinition) {
			defer wg.Done()
			sem.Acquire()
			defer sem.Release()

			vsdw.wr.Logger().Infof("Starting the diff on table %v", tableDefinition.Name)
			sourceQueryResultReader, err := TableScan(vsdw.wr.Logger(), vsdw.wr.TopoServer(), vsdw.sourceAlias, tableDefinition)
			if err != nil {
				vsdw.wr.Logger().Errorf("TableScan(source) failed: %v", err)
				return
			}
			defer sourceQueryResultReader.Close()

			destinationQueryResultReader, err := TableScan(vsdw.wr.Logger(), vsdw.wr.TopoServer(), vsdw.destinationAlias, tableDefinition)
			if err != nil {
				vsdw.wr.Logger().Errorf("TableScan(destination) failed: %v", err)
				return
			}
			defer destinationQueryResultReader.Close()

			differ, err := NewRowDiffer(sourceQueryResultReader, destinationQueryResultReader, tableDefinition)
			if err != nil {
				vsdw.wr.Logger().Errorf("NewRowDiffer() failed: %v", err)
				return
			}

			report, err := differ.Go(vsdw.wr.Logger())
			if err != nil {
				vsdw.wr.Logger().Errorf("Differ.Go failed: %v", err)
			} else {
				if report.HasDifferences() {
					vsdw.wr.Logger().Errorf("Table %v has differences: %v", tableDefinition.Name, report.String())
				} else {
					vsdw.wr.Logger().Infof("Table %v checks out (%v rows processed, %v qps)", tableDefinition.Name, report.processedRows, report.processingQPS)
				}
			}
		}(tableDefinition)
	}
	wg.Wait()

	return nil
}
Beispiel #23
0
// MigrateServedTypes is used during horizontal splits to migrate a
// served type from a list of shards to another.
func (wr *Wrangler) MigrateServedTypes(keyspace, shard string, cells []string, servedType topo.TabletType, reverse, skipReFreshState bool) error {
	if servedType == topo.TYPE_MASTER {
		// we cannot migrate a master back, since when master migration
		// is done, the source shards are dead
		if reverse {
			return fmt.Errorf("Cannot migrate master back to %v/%v", keyspace, shard)
		}
		// we cannot skip refresh state for a master
		if skipReFreshState {
			return fmt.Errorf("Cannot skip refresh state for master migration on %v/%v", keyspace, shard)
		}
	}

	// find overlapping shards in this keyspace
	wr.Logger().Infof("Finding the overlapping shards in keyspace %v", keyspace)
	osList, err := topotools.FindOverlappingShards(wr.ts, keyspace)
	if err != nil {
		return fmt.Errorf("FindOverlappingShards failed: %v", err)
	}

	// find our shard in there
	os := topotools.OverlappingShardsForShard(osList, shard)
	if os == nil {
		return fmt.Errorf("Shard %v is not involved in any overlapping shards", shard)
	}

	// find which list is which: the sources have no source
	// shards, the destination have source shards. We check the
	// first entry in the lists, then just check they're
	// consistent
	var sourceShards []*topo.ShardInfo
	var destinationShards []*topo.ShardInfo
	if len(os.Left[0].SourceShards) == 0 {
		sourceShards = os.Left
		destinationShards = os.Right
	} else {
		sourceShards = os.Right
		destinationShards = os.Left
	}

	// Verify the sources has the type we're migrating (or not if reverse)
	for _, si := range sourceShards {
		if err := si.CheckServedTypesMigration(servedType, cells, !reverse); err != nil {
			return err
		}
	}

	// Verify the destinations do not have the type we're
	// migrating (or do if reverse)
	for _, si := range destinationShards {
		if err := si.CheckServedTypesMigration(servedType, cells, reverse); err != nil {
			return err
		}
	}

	// lock the shards: sources, then destinations
	// (note they're all ordered by shard name)
	actionNode := actionnode.MigrateServedTypes(servedType)
	sourceLockPath := make([]string, len(sourceShards))
	for i, si := range sourceShards {
		sourceLockPath[i], err = wr.lockShard(si.Keyspace(), si.ShardName(), actionNode)
		if err != nil {
			wr.Logger().Errorf("Failed to lock source shard %v/%v, may need to unlock other shards manually", si.Keyspace(), si.ShardName())
			return err
		}
	}
	destinationLockPath := make([]string, len(destinationShards))
	for i, si := range destinationShards {
		destinationLockPath[i], err = wr.lockShard(si.Keyspace(), si.ShardName(), actionNode)
		if err != nil {
			wr.Logger().Errorf("Failed to lock destination shard %v/%v, may need to unlock other shards manually", si.Keyspace(), si.ShardName())
			return err
		}
	}

	// record the action error and all unlock errors
	rec := concurrency.AllErrorRecorder{}

	// execute the migration
	rec.RecordError(wr.migrateServedTypes(keyspace, sourceShards, destinationShards, cells, servedType, reverse))

	// unlock the shards, we're done
	for i := len(destinationShards) - 1; i >= 0; i-- {
		rec.RecordError(wr.unlockShard(destinationShards[i].Keyspace(), destinationShards[i].ShardName(), actionNode, destinationLockPath[i], nil))
	}
	for i := len(sourceShards) - 1; i >= 0; i-- {
		rec.RecordError(wr.unlockShard(sourceShards[i].Keyspace(), sourceShards[i].ShardName(), actionNode, sourceLockPath[i], nil))
	}

	// rebuild the keyspace serving graph if there was no error
	if !rec.HasErrors() {
		rec.RecordError(wr.RebuildKeyspaceGraph(keyspace, nil))
	}

	// Send a refresh to the source tablets we just disabled, iff:
	// - we're not migrating a master
	// - it is not a reverse migration
	// - we don't have any errors
	// - we're not told to skip the refresh
	if servedType != topo.TYPE_MASTER && !reverse && !rec.HasErrors() && !skipReFreshState {
		for _, si := range sourceShards {
			rec.RecordError(wr.RefreshTablesByShard(si, servedType, cells))
		}
	}

	return rec.Error()
}
Beispiel #24
0
// MigrateServedFrom is used during vertical splits to migrate a
// served type from a keyspace to another.
func (wr *Wrangler) MigrateServedFrom(keyspace, shard string, servedType topo.TabletType, cells []string, reverse bool) error {
	// read the destination keyspace, check it
	ki, err := wr.ts.GetKeyspace(keyspace)
	if err != nil {
		return err
	}
	if len(ki.ServedFromMap) == 0 {
		return fmt.Errorf("Destination keyspace %v is not a vertical split target", keyspace)
	}

	// read the destination shard, check it
	si, err := wr.ts.GetShard(keyspace, shard)
	if err != nil {
		return err
	}
	if len(si.SourceShards) != 1 || len(si.SourceShards[0].Tables) == 0 {
		return fmt.Errorf("Destination shard %v/%v is not a vertical split target", keyspace, shard)
	}

	// check the migration is valid before locking (will also be checked
	// after locking to be sure)
	if err := ki.CheckServedFromMigration(servedType, cells, si.SourceShards[0].Keyspace, !reverse); err != nil {
		return err
	}

	// lock the keyspace and shards
	actionNode := actionnode.MigrateServedFrom(servedType)
	keyspaceLockPath, err := wr.lockKeyspace(keyspace, actionNode)
	if err != nil {
		wr.Logger().Errorf("Failed to lock destination keyspace %v", keyspace)
		return err
	}
	destinationShardLockPath, err := wr.lockShard(keyspace, shard, actionNode)
	if err != nil {
		wr.Logger().Errorf("Failed to lock destination shard %v/%v", keyspace, shard)
		wr.unlockKeyspace(keyspace, actionNode, keyspaceLockPath, nil)
		return err
	}
	sourceKeyspace := si.SourceShards[0].Keyspace
	sourceShard := si.SourceShards[0].Shard
	sourceShardLockPath, err := wr.lockShard(sourceKeyspace, sourceShard, actionNode)
	if err != nil {
		wr.Logger().Errorf("Failed to lock source shard %v/%v", sourceKeyspace, sourceShard)
		wr.unlockShard(keyspace, shard, actionNode, destinationShardLockPath, nil)
		wr.unlockKeyspace(keyspace, actionNode, keyspaceLockPath, nil)
		return err
	}

	// record the action error and all unlock errors
	rec := concurrency.AllErrorRecorder{}

	// execute the migration
	rec.RecordError(wr.migrateServedFrom(ki, si, servedType, cells, reverse))

	rec.RecordError(wr.unlockShard(sourceKeyspace, sourceShard, actionNode, sourceShardLockPath, nil))
	rec.RecordError(wr.unlockShard(keyspace, shard, actionNode, destinationShardLockPath, nil))
	rec.RecordError(wr.unlockKeyspace(keyspace, actionNode, keyspaceLockPath, nil))

	// rebuild the keyspace serving graph if there was no error
	if rec.Error() == nil {
		rec.RecordError(wr.RebuildKeyspaceGraph(keyspace, cells))
	}

	return rec.Error()
}
Beispiel #25
0
// ValidateSchemaShard will diff the schema from all the tablets in
// the keyspace.
func (wr *Wrangler) ValidateSchemaKeyspace(keyspace string, excludeTables []string, includeViews bool) error {
	// find all the shards
	shards, err := wr.ts.GetShardNames(keyspace)
	if err != nil {
		return err
	}

	// corner cases
	if len(shards) == 0 {
		return fmt.Errorf("No shards in keyspace %v", keyspace)
	}
	sort.Strings(shards)
	if len(shards) == 1 {
		return wr.ValidateSchemaShard(keyspace, shards[0], excludeTables, includeViews)
	}

	// find the reference schema using the first shard's master
	si, err := wr.ts.GetShard(keyspace, shards[0])
	if err != nil {
		return err
	}
	if si.MasterAlias.Uid == topo.NO_TABLET {
		return fmt.Errorf("No master in shard %v/%v", keyspace, shards[0])
	}
	referenceAlias := si.MasterAlias
	log.Infof("Gathering schema for reference master %v", referenceAlias)
	referenceSchema, err := wr.GetSchema(referenceAlias, nil, excludeTables, includeViews)
	if err != nil {
		return err
	}

	// then diff with all other tablets everywhere
	er := concurrency.AllErrorRecorder{}
	wg := sync.WaitGroup{}

	// first diff the slaves in the reference shard 0
	aliases, err := topo.FindAllTabletAliasesInShard(context.TODO(), wr.ts, keyspace, shards[0])
	if err != nil {
		return err
	}

	for _, alias := range aliases {
		if alias == si.MasterAlias {
			continue
		}

		wg.Add(1)
		go wr.diffSchema(referenceSchema, referenceAlias, alias, excludeTables, includeViews, &wg, &er)
	}

	// then diffs all tablets in the other shards
	for _, shard := range shards[1:] {
		si, err := wr.ts.GetShard(keyspace, shard)
		if err != nil {
			er.RecordError(err)
			continue
		}

		if si.MasterAlias.Uid == topo.NO_TABLET {
			er.RecordError(fmt.Errorf("No master in shard %v/%v", keyspace, shard))
			continue
		}

		aliases, err := topo.FindAllTabletAliasesInShard(context.TODO(), wr.ts, keyspace, shard)
		if err != nil {
			er.RecordError(err)
			continue
		}

		for _, alias := range aliases {
			wg.Add(1)
			go wr.diffSchema(referenceSchema, referenceAlias, alias, excludeTables, includeViews, &wg, &er)
		}
	}
	wg.Wait()
	if er.HasErrors() {
		return fmt.Errorf("Schema diffs:\n%v", er.Error().Error())
	}
	return nil
}
Beispiel #26
0
func (sdw *SplitDiffWorker) diff() error {
	sdw.setState(stateSDDiff)

	sdw.wr.Logger().Infof("Gathering schema information...")
	sdw.sourceSchemaDefinitions = make([]*myproto.SchemaDefinition, len(sdw.sourceAliases))
	wg := sync.WaitGroup{}
	rec := concurrency.AllErrorRecorder{}
	wg.Add(1)
	go func() {
		var err error
		sdw.destinationSchemaDefinition, err = sdw.wr.GetSchema(sdw.destinationAlias, nil, nil, false)
		rec.RecordError(err)
		sdw.wr.Logger().Infof("Got schema from destination %v", sdw.destinationAlias)
		wg.Done()
	}()
	for i, sourceAlias := range sdw.sourceAliases {
		wg.Add(1)
		go func(i int, sourceAlias topo.TabletAlias) {
			var err error
			sdw.sourceSchemaDefinitions[i], err = sdw.wr.GetSchema(sourceAlias, nil, nil, false)
			rec.RecordError(err)
			sdw.wr.Logger().Infof("Got schema from source[%v] %v", i, sourceAlias)
			wg.Done()
		}(i, sourceAlias)
	}
	wg.Wait()
	if rec.HasErrors() {
		return rec.Error()
	}

	// TODO(alainjobart) Checking against each source may be
	// overkill, if all sources have the same schema?
	sdw.wr.Logger().Infof("Diffing the schema...")
	rec = concurrency.AllErrorRecorder{}
	for i, sourceSchemaDefinition := range sdw.sourceSchemaDefinitions {
		sourceName := fmt.Sprintf("source[%v]", i)
		myproto.DiffSchema("destination", sdw.destinationSchemaDefinition, sourceName, sourceSchemaDefinition, &rec)
	}
	if rec.HasErrors() {
		sdw.wr.Logger().Warningf("Different schemas: %v", rec.Error().Error())
	} else {
		sdw.wr.Logger().Infof("Schema match, good.")
	}

	// run the diffs, 8 at a time
	sdw.wr.Logger().Infof("Running the diffs...")
	sem := sync2.NewSemaphore(8, 0)
	for _, tableDefinition := range sdw.destinationSchemaDefinition.TableDefinitions {
		wg.Add(1)
		go func(tableDefinition *myproto.TableDefinition) {
			defer wg.Done()
			sem.Acquire()
			defer sem.Release()

			sdw.wr.Logger().Infof("Starting the diff on table %v", tableDefinition.Name)
			if len(sdw.sourceAliases) != 1 {
				sdw.wr.Logger().Errorf("Don't support more than one source for table yet: %v", tableDefinition.Name)
				return
			}

			overlap, err := key.KeyRangesOverlap(sdw.shardInfo.KeyRange, sdw.shardInfo.SourceShards[0].KeyRange)
			if err != nil {
				sdw.wr.Logger().Errorf("Source shard doesn't overlap with destination????: %v", err)
				return
			}
			sourceQueryResultReader, err := TableScanByKeyRange(sdw.wr.Logger(), sdw.wr.TopoServer(), sdw.sourceAliases[0], tableDefinition, overlap, sdw.keyspaceInfo.ShardingColumnType)
			if err != nil {
				sdw.wr.Logger().Errorf("TableScanByKeyRange(source) failed: %v", err)
				return
			}
			defer sourceQueryResultReader.Close()

			destinationQueryResultReader, err := TableScanByKeyRange(sdw.wr.Logger(), sdw.wr.TopoServer(), sdw.destinationAlias, tableDefinition, key.KeyRange{}, sdw.keyspaceInfo.ShardingColumnType)
			if err != nil {
				sdw.wr.Logger().Errorf("TableScanByKeyRange(destination) failed: %v", err)
				return
			}
			defer destinationQueryResultReader.Close()

			differ, err := NewRowDiffer(sourceQueryResultReader, destinationQueryResultReader, tableDefinition)
			if err != nil {
				sdw.wr.Logger().Errorf("NewRowDiffer() failed: %v", err)
				return
			}

			report, err := differ.Go(sdw.wr.Logger())
			if err != nil {
				sdw.wr.Logger().Errorf("Differ.Go failed: %v", err.Error())
			} else {
				if report.HasDifferences() {
					sdw.wr.Logger().Warningf("Table %v has differences: %v", tableDefinition.Name, report.String())
				} else {
					sdw.wr.Logger().Infof("Table %v checks out (%v rows processed, %v qps)", tableDefinition.Name, report.processedRows, report.processingQPS)
				}
			}
		}(tableDefinition)
	}
	wg.Wait()

	return nil
}
Beispiel #27
0
// CopyShardReplications will create the ShardReplication objects in
// the destination topo
func CopyShardReplications(fromTS, toTS topo.Server) {
	keyspaces, err := fromTS.GetKeyspaces()
	if err != nil {
		log.Fatalf("fromTS.GetKeyspaces: %v", err)
	}

	wg := sync.WaitGroup{}
	rec := concurrency.AllErrorRecorder{}
	for _, keyspace := range keyspaces {
		wg.Add(1)
		go func(keyspace string) {
			defer wg.Done()
			shards, err := fromTS.GetShardNames(keyspace)
			if err != nil {
				rec.RecordError(fmt.Errorf("GetShardNames(%v): %v", keyspace, err))
				return
			}

			for _, shard := range shards {
				wg.Add(1)
				go func(keyspace, shard string) {
					defer wg.Done()

					// read the source shard to get the cells
					si, err := fromTS.GetShard(keyspace, shard)
					if err != nil {
						rec.RecordError(fmt.Errorf("GetShard(%v, %v): %v", keyspace, shard, err))
						return
					}

					for _, cell := range si.Cells {
						sri, err := fromTS.GetShardReplication(cell, keyspace, shard)
						if err != nil {
							rec.RecordError(fmt.Errorf("GetShardReplication(%v, %v, %v): %v", cell, keyspace, shard, err))
							continue
						}

						if err := toTS.UpdateShardReplicationFields(cell, keyspace, shard, func(oldSR *topo.ShardReplication) error {
							*oldSR = *sri.ShardReplication
							return nil
						}); err != nil {
							rec.RecordError(fmt.Errorf("UpdateShardReplicationFields(%v, %v, %v): %v", cell, keyspace, shard, err))
						}
					}
				}(keyspace, shard)
			}
		}(keyspace)
	}
	wg.Wait()
	if rec.HasErrors() {
		log.Fatalf("copyShards failed: %v", rec.Error())
	}
}
Beispiel #28
0
// FindAllTabletAliasesInShard uses the replication graph to find all the
// tablet aliases in the given shard.
// It can return ErrPartialResult if some cells were not fetched,
// in which case the result only contains the cells that were fetched.
func FindAllTabletAliasesInShardByCell(ctx context.Context, ts Server, keyspace, shard string, cells []string) ([]TabletAlias, error) {
	span := trace.NewSpanFromContext(ctx)
	span.StartLocal("topo.FindAllTabletAliasesInShardbyCell")
	span.Annotate("keyspace", keyspace)
	span.Annotate("shard", shard)
	span.Annotate("num_cells", len(cells))
	defer span.Finish()

	// read the shard information to find the cells
	si, err := ts.GetShard(keyspace, shard)
	if err != nil {
		return nil, err
	}

	resultAsMap := make(map[TabletAlias]bool)
	if !si.MasterAlias.IsZero() {
		if InCellList(si.MasterAlias.Cell, cells) {
			resultAsMap[si.MasterAlias] = true
		}
	}

	// read the replication graph in each cell and add all found tablets
	wg := sync.WaitGroup{}
	mutex := sync.Mutex{}
	rec := concurrency.AllErrorRecorder{}
	for _, cell := range si.Cells {
		if !InCellList(cell, cells) {
			continue
		}
		wg.Add(1)
		go func(cell string) {
			defer wg.Done()
			sri, err := ts.GetShardReplication(cell, keyspace, shard)
			if err != nil {
				rec.RecordError(fmt.Errorf("GetShardReplication(%v, %v, %v) failed: %v", cell, keyspace, shard, err))
				return
			}

			mutex.Lock()
			for _, rl := range sri.ReplicationLinks {
				resultAsMap[rl.TabletAlias] = true
				if !rl.Parent.IsZero() && InCellList(rl.Parent.Cell, cells) {
					resultAsMap[rl.Parent] = true
				}
			}
			mutex.Unlock()
		}(cell)
	}
	wg.Wait()
	err = nil
	if rec.HasErrors() {
		log.Warningf("FindAllTabletAliasesInShard(%v,%v): got partial result: %v", keyspace, shard, rec.Error())
		err = ErrPartialResult
	}

	result := make([]TabletAlias, 0, len(resultAsMap))
	for a := range resultAsMap {
		result = append(result, a)
	}
	return result, err
}