Example #1
0
// Clone will do all the necessary actions to copy all the data from a
// source to a set of destinations.
func (wr *Wrangler) Clone(srcTabletAlias topo.TabletAlias, dstTabletAliases []topo.TabletAlias, forceMasterSnapshot bool, snapshotConcurrency, fetchConcurrency, fetchRetryCount int, serverMode bool) error {
	// make sure the destination can be restored into (otherwise
	// there is no point in taking the snapshot in the first place),
	// and reserve it.
	reserved := make([]topo.TabletAlias, 0, len(dstTabletAliases))
	for _, dstTabletAlias := range dstTabletAliases {
		err := wr.ReserveForRestore(srcTabletAlias, dstTabletAlias)
		if err != nil {
			wr.UnreserveForRestoreMulti(reserved)
			return err
		}
		reserved = append(reserved, dstTabletAlias)
		wr.Logger().Infof("Successfully reserved %v for restore", dstTabletAlias)
	}

	// take the snapshot, or put the server in SnapshotSource mode
	// srcFilePath, parentAlias, slaveStartRequired, readWrite
	sr, originalType, err := wr.Snapshot(srcTabletAlias, forceMasterSnapshot, snapshotConcurrency, serverMode)
	if err != nil {
		// The snapshot failed so un-reserve the destinations and return
		wr.UnreserveForRestoreMulti(reserved)
		return err
	}

	// try to restore the snapshot
	// In serverMode, and in the case where we're replicating from
	// the master, we can't wait for replication, as the master is down.
	wg := sync.WaitGroup{}
	rec := concurrency.FirstErrorRecorder{}
	for _, dstTabletAlias := range dstTabletAliases {
		wg.Add(1)
		go func(dstTabletAlias topo.TabletAlias) {
			e := wr.Restore(srcTabletAlias, sr.ManifestPath, dstTabletAlias, sr.ParentAlias, fetchConcurrency, fetchRetryCount, true, serverMode && originalType == topo.TYPE_MASTER)
			rec.RecordError(e)
			wg.Done()
		}(dstTabletAlias)
	}
	wg.Wait()
	err = rec.Error()

	// in any case, fix the server
	if serverMode {
		resetErr := wr.SnapshotSourceEnd(srcTabletAlias, sr.SlaveStartRequired, sr.ReadOnly, originalType)
		if resetErr != nil {
			if err == nil {
				// If there is no other error, this matters.
				err = resetErr
			} else {
				// In the context of a larger failure, just log a note to cleanup.
				wr.Logger().Errorf("Failed to reset snapshot source: %v - vtctl SnapshotSourceEnd is required", resetErr)
			}
		}
	}

	return err
}
Example #2
0
// FindAllShardsInKeyspace reads and returns all the existing shards in
// a keyspace. It doesn't take any lock.
func FindAllShardsInKeyspace(ts Server, keyspace string) (map[string]*ShardInfo, error) {
	shards, err := ts.GetShardNames(keyspace)
	if err != nil {
		return nil, err
	}

	result := make(map[string]*ShardInfo, len(shards))
	wg := sync.WaitGroup{}
	mu := sync.Mutex{}
	rec := concurrency.FirstErrorRecorder{}
	for _, shard := range shards {
		wg.Add(1)
		go func(shard string) {
			defer wg.Done()
			si, err := ts.GetShard(keyspace, shard)
			if err != nil {
				rec.RecordError(fmt.Errorf("GetShard(%v,%v) failed: %v", keyspace, shard, err))
				return
			}
			mu.Lock()
			result[shard] = si
			mu.Unlock()
		}(shard)
	}
	wg.Wait()
	if rec.HasErrors() {
		return nil, rec.Error()
	}
	return result, nil
}
Example #3
0
// This function should only be used with an action lock on the keyspace
// - otherwise the consistency of the serving graph data can't be
// guaranteed.
//
// Take data from the global keyspace and rebuild the local serving
// copies in each cell.
func (wr *Wrangler) rebuildKeyspace(keyspace string, cells []string) error {
	wr.logger.Infof("rebuildKeyspace %v", keyspace)

	ki, err := wr.ts.GetKeyspace(keyspace)
	if err != nil {
		return err
	}

	shards, err := wr.ts.GetShardNames(keyspace)
	if err != nil {
		return err
	}

	// Rebuild all shards in parallel, save the shards
	shardCache := make(map[string]*topo.ShardInfo)
	wg := sync.WaitGroup{}
	mu := sync.Mutex{}
	rec := concurrency.FirstErrorRecorder{}
	for _, shard := range shards {
		wg.Add(1)
		go func(shard string) {
			if shardInfo, err := wr.RebuildShardGraph(keyspace, shard, cells); err != nil {
				rec.RecordError(fmt.Errorf("RebuildShardGraph failed: %v/%v %v", keyspace, shard, err))
			} else {
				mu.Lock()
				shardCache[shard] = shardInfo
				mu.Unlock()
			}
			wg.Done()
		}(shard)
	}
	wg.Wait()
	if rec.HasErrors() {
		return rec.Error()
	}

	// Build the list of cells to work on: we get the union
	// of all the Cells of all the Shards, limited to the provided cells.
	//
	// srvKeyspaceMap is a map:
	//   key: cell
	//   value: topo.SrvKeyspace object being built
	srvKeyspaceMap := make(map[string]*topo.SrvKeyspace)
	wr.findCellsForRebuild(ki, shardCache, cells, srvKeyspaceMap)

	// Then we add the cells from the keyspaces we might be 'ServedFrom'.
	for _, ksf := range ki.ServedFromMap {
		servedFromShards, err := topo.FindAllShardsInKeyspace(wr.ts, ksf.Keyspace)
		if err != nil {
			return err
		}
		wr.findCellsForRebuild(ki, servedFromShards, cells, srvKeyspaceMap)
	}

	// for each entry in the srvKeyspaceMap map, we do the following:
	// - read the SrvShard structures for each shard / cell
	// - if not present, build an empty one from global Shard
	// - compute the union of the db types (replica, master, ...)
	// - sort the shards in the list by range
	// - check the ranges are compatible (no hole, covers everything)
	for cell, srvKeyspace := range srvKeyspaceMap {
		keyspaceDbTypes := make(map[topo.TabletType]bool)
		srvKeyspace.Partitions = make(map[topo.TabletType]*topo.KeyspacePartition)
		for shard, si := range shardCache {
			srvShard, err := wr.ts.GetSrvShard(cell, keyspace, shard)
			switch err {
			case nil:
				// we keep going
			case topo.ErrNoNode:
				wr.logger.Infof("Cell %v for %v/%v has no SvrShard, using Shard data with no TabletTypes instead", cell, keyspace, shard)
				srvShard = &topo.SrvShard{
					Name:        si.ShardName(),
					KeyRange:    si.KeyRange,
					ServedTypes: si.GetServedTypesPerCell(cell),
					MasterCell:  si.MasterAlias.Cell,
				}
			default:
				return err
			}
			for _, tabletType := range srvShard.TabletTypes {
				keyspaceDbTypes[tabletType] = true
			}

			// for each type this shard is supposed to serve,
			// add it to srvKeyspace.Partitions
			for _, tabletType := range srvShard.ServedTypes {
				if _, ok := srvKeyspace.Partitions[tabletType]; !ok {
					srvKeyspace.Partitions[tabletType] = &topo.KeyspacePartition{
						Shards: make([]topo.SrvShard, 0)}
				}
				srvKeyspace.Partitions[tabletType].Shards = append(srvKeyspace.Partitions[tabletType].Shards, *srvShard)
			}
		}

		srvKeyspace.TabletTypes = make([]topo.TabletType, 0, len(keyspaceDbTypes))
		for dbType := range keyspaceDbTypes {
			srvKeyspace.TabletTypes = append(srvKeyspace.TabletTypes, dbType)
		}

		if err := wr.checkPartitions(cell, srvKeyspace); err != nil {
			return err
		}
	}

	// and then finally save the keyspace objects
	for cell, srvKeyspace := range srvKeyspaceMap {
		wr.logger.Infof("updating keyspace serving graph in cell %v for %v", cell, keyspace)
		if err := wr.ts.UpdateSrvKeyspace(cell, keyspace, srvKeyspace); err != nil {
			return fmt.Errorf("writing serving data failed: %v", err)
		}
	}
	return nil
}