Exemple #1
0
// rebuildCellSrvShard computes and writes the serving graph data to a
// single cell
func rebuildCellSrvShard(ctx context.Context, log logutil.Logger, ts topo.Server, shardInfo *topo.ShardInfo, cell string, tablets map[topo.TabletAlias]*topo.TabletInfo) error {
	log.Infof("rebuildCellSrvShard %v/%v in cell %v", shardInfo.Keyspace(), shardInfo.ShardName(), cell)

	// Get all existing db types so they can be removed if nothing
	// had been edited.
	existingTabletTypes, err := ts.GetSrvTabletTypesPerShard(cell, shardInfo.Keyspace(), shardInfo.ShardName())
	if err != nil {
		if err != topo.ErrNoNode {
			return err
		}
	}

	// Update db type addresses in the serving graph
	//
	// locationAddrsMap is a map:
	//   key: tabletType
	//   value: EndPoints (list of server records)
	locationAddrsMap := make(map[topo.TabletType]*topo.EndPoints)
	for _, tablet := range tablets {
		if !tablet.IsInReplicationGraph() {
			// only valid case is a scrapped master in the
			// catastrophic reparent case
			if tablet.Parent.Uid != topo.NO_TABLET {
				log.Warningf("Tablet %v should not be in the replication graph, please investigate (it is being ignored in the rebuild)", tablet.Alias)
			}
			continue
		}

		// Check IsInServingGraph, we don't want to add tablets that
		// are not serving
		if !tablet.IsInServingGraph() {
			continue
		}

		// Check the Keyspace and Shard for the tablet are right
		if tablet.Keyspace != shardInfo.Keyspace() || tablet.Shard != shardInfo.ShardName() {
			return fmt.Errorf("CRITICAL: tablet %v is in replication graph for shard %v/%v but belongs to shard %v:%v", tablet.Alias, shardInfo.Keyspace(), shardInfo.ShardName(), tablet.Keyspace, tablet.Shard)
		}

		// Add the tablet to the list
		addrs, ok := locationAddrsMap[tablet.Type]
		if !ok {
			addrs = topo.NewEndPoints()
			locationAddrsMap[tablet.Type] = addrs
		}
		entry, err := tablet.Tablet.EndPoint()
		if err != nil {
			log.Warningf("EndPointForTablet failed for tablet %v: %v", tablet.Alias, err)
			continue
		}
		addrs.Entries = append(addrs.Entries, *entry)
	}

	// we're gonna parallelize a lot here:
	// - writing all the tabletTypes records
	// - removing the unused records
	// - writing SrvShard
	rec := concurrency.AllErrorRecorder{}
	wg := sync.WaitGroup{}

	// write all the EndPoints nodes everywhere we want them
	for tabletType, addrs := range locationAddrsMap {
		wg.Add(1)
		go func(tabletType topo.TabletType, addrs *topo.EndPoints) {
			log.Infof("saving serving graph for cell %v shard %v/%v tabletType %v", cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType)
			span := trace.NewSpanFromContext(ctx)
			span.StartClient("TopoServer.UpdateEndPoints")
			span.Annotate("tablet_type", string(tabletType))
			if err := ts.UpdateEndPoints(cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType, addrs); err != nil {
				rec.RecordError(fmt.Errorf("writing endpoints for cell %v shard %v/%v tabletType %v failed: %v", cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType, err))
			}
			span.Finish()
			wg.Done()
		}(tabletType, addrs)
	}

	// Delete any pre-existing paths that were not updated by this process.
	// That's the existingTabletTypes - locationAddrsMap
	for _, tabletType := range existingTabletTypes {
		if _, ok := locationAddrsMap[tabletType]; !ok {
			wg.Add(1)
			go func(tabletType topo.TabletType) {
				log.Infof("removing stale db type from serving graph: %v", tabletType)
				span := trace.NewSpanFromContext(ctx)
				span.StartClient("TopoServer.DeleteEndPoints")
				span.Annotate("tablet_type", string(tabletType))
				if err := ts.DeleteEndPoints(cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType); err != nil {
					log.Warningf("unable to remove stale db type %v from serving graph: %v", tabletType, err)
				}
				span.Finish()
				wg.Done()
			}(tabletType)
		}
	}

	// Update srvShard object
	wg.Add(1)
	go func() {
		log.Infof("updating shard serving graph in cell %v for %v/%v", cell, shardInfo.Keyspace(), shardInfo.ShardName())
		srvShard := &topo.SrvShard{
			Name:        shardInfo.ShardName(),
			KeyRange:    shardInfo.KeyRange,
			ServedTypes: shardInfo.GetServedTypesPerCell(cell),
			MasterCell:  shardInfo.MasterAlias.Cell,
			TabletTypes: make([]topo.TabletType, 0, len(locationAddrsMap)),
		}
		for tabletType := range locationAddrsMap {
			srvShard.TabletTypes = append(srvShard.TabletTypes, tabletType)
		}

		span := trace.NewSpanFromContext(ctx)
		span.StartClient("TopoServer.UpdateSrvShard")
		span.Annotate("keyspace", shardInfo.Keyspace())
		span.Annotate("shard", shardInfo.ShardName())
		span.Annotate("cell", cell)
		if err := ts.UpdateSrvShard(cell, shardInfo.Keyspace(), shardInfo.ShardName(), srvShard); err != nil {
			rec.RecordError(fmt.Errorf("writing serving data in cell %v for %v/%v failed: %v", cell, shardInfo.Keyspace(), shardInfo.ShardName(), err))
		}
		span.Finish()
		wg.Done()
	}()

	wg.Wait()
	return rec.Error()
}
Exemple #2
0
func CheckServingGraph(t *testing.T, ts topo.Server) {
	cell := getLocalCell(t, ts)

	// test individual cell/keyspace/shard/type entries
	if _, err := ts.GetSrvTabletTypesPerShard(cell, "test_keyspace", "-10"); err != topo.ErrNoNode {
		t.Errorf("GetSrvTabletTypesPerShard(invalid): %v", err)
	}
	if _, err := ts.GetEndPoints(cell, "test_keyspace", "-10", topo.TYPE_MASTER); err != topo.ErrNoNode {
		t.Errorf("GetEndPoints(invalid): %v", err)
	}

	endPoints := topo.EndPoints{
		Entries: []topo.EndPoint{
			topo.EndPoint{
				Uid:          1,
				Host:         "host1",
				NamedPortMap: map[string]int{"vt": 1234, "mysql": 1235, "vts": 1236},
			},
		},
	}

	if err := ts.UpdateEndPoints(cell, "test_keyspace", "-10", topo.TYPE_MASTER, &endPoints); err != nil {
		t.Errorf("UpdateEndPoints(master): %v", err)
	}
	if types, err := ts.GetSrvTabletTypesPerShard(cell, "test_keyspace", "-10"); err != nil || len(types) != 1 || types[0] != topo.TYPE_MASTER {
		t.Errorf("GetSrvTabletTypesPerShard(1): %v %v", err, types)
	}

	addrs, err := ts.GetEndPoints(cell, "test_keyspace", "-10", topo.TYPE_MASTER)
	if err != nil {
		t.Errorf("GetEndPoints: %v", err)
	}
	if len(addrs.Entries) != 1 || addrs.Entries[0].Uid != 1 {
		t.Errorf("GetEndPoints(1): %v", addrs)
	}
	if pm := addrs.Entries[0].NamedPortMap; pm["vt"] != 1234 || pm["mysql"] != 1235 || pm["vts"] != 1236 {
		t.Errorf("GetSrcTabletType(1).NamedPortmap: want %v, got %v", endPoints.Entries[0].NamedPortMap, pm)
	}

	if err := ts.UpdateTabletEndpoint(cell, "test_keyspace", "-10", topo.TYPE_REPLICA, &topo.EndPoint{Uid: 2, Host: "host2"}); err != nil {
		t.Errorf("UpdateTabletEndpoint(invalid): %v", err)
	}
	if err := ts.UpdateTabletEndpoint(cell, "test_keyspace", "-10", topo.TYPE_MASTER, &topo.EndPoint{Uid: 1, Host: "host2"}); err != nil {
		t.Errorf("UpdateTabletEndpoint(master): %v", err)
	}
	if addrs, err := ts.GetEndPoints(cell, "test_keyspace", "-10", topo.TYPE_MASTER); err != nil || len(addrs.Entries) != 1 || addrs.Entries[0].Uid != 1 {
		t.Errorf("GetEndPoints(2): %v %v", err, addrs)
	}
	if err := ts.UpdateTabletEndpoint(cell, "test_keyspace", "-10", topo.TYPE_MASTER, &topo.EndPoint{Uid: 3, Host: "host3"}); err != nil {
		t.Errorf("UpdateTabletEndpoint(master): %v", err)
	}
	if addrs, err := ts.GetEndPoints(cell, "test_keyspace", "-10", topo.TYPE_MASTER); err != nil || len(addrs.Entries) != 2 {
		t.Errorf("GetEndPoints(2): %v %v", err, addrs)
	}

	if err := ts.DeleteEndPoints(cell, "test_keyspace", "-10", topo.TYPE_REPLICA); err != topo.ErrNoNode {
		t.Errorf("DeleteEndPoints(unknown): %v", err)
	}
	if err := ts.DeleteEndPoints(cell, "test_keyspace", "-10", topo.TYPE_MASTER); err != nil {
		t.Errorf("DeleteEndPoints(master): %v", err)
	}

	// test cell/keyspace/shard entries (SrvShard)
	srvShard := topo.SrvShard{
		ServedTypes: []topo.TabletType{topo.TYPE_MASTER},
		TabletTypes: []topo.TabletType{topo.TYPE_REPLICA, topo.TYPE_RDONLY},
	}
	if err := ts.UpdateSrvShard(cell, "test_keyspace", "-10", &srvShard); err != nil {
		t.Errorf("UpdateSrvShard(1): %v", err)
	}
	if _, err := ts.GetSrvShard(cell, "test_keyspace", "666"); err != topo.ErrNoNode {
		t.Errorf("GetSrvShard(invalid): %v", err)
	}
	if s, err := ts.GetSrvShard(cell, "test_keyspace", "-10"); err != nil ||
		len(s.ServedTypes) != 1 ||
		s.ServedTypes[0] != topo.TYPE_MASTER ||
		len(s.TabletTypes) != 2 ||
		s.TabletTypes[0] != topo.TYPE_REPLICA ||
		s.TabletTypes[1] != topo.TYPE_RDONLY {
		t.Errorf("GetSrvShard(valid): %v", err)
	}

	// test cell/keyspace entries (SrvKeyspace)
	srvKeyspace := topo.SrvKeyspace{
		Partitions: map[topo.TabletType]*topo.KeyspacePartition{
			topo.TYPE_MASTER: &topo.KeyspacePartition{
				Shards: []topo.SrvShard{
					topo.SrvShard{
						ServedTypes: []topo.TabletType{topo.TYPE_MASTER},
					},
				},
			},
		},
		TabletTypes:        []topo.TabletType{topo.TYPE_MASTER},
		ShardingColumnName: "video_id",
		ShardingColumnType: key.KIT_UINT64,
		ServedFrom: map[topo.TabletType]string{
			topo.TYPE_REPLICA: "other_keyspace",
		},
	}
	if err := ts.UpdateSrvKeyspace(cell, "test_keyspace", &srvKeyspace); err != nil {
		t.Errorf("UpdateSrvKeyspace(1): %v", err)
	}
	if _, err := ts.GetSrvKeyspace(cell, "test_keyspace666"); err != topo.ErrNoNode {
		t.Errorf("GetSrvKeyspace(invalid): %v", err)
	}
	if k, err := ts.GetSrvKeyspace(cell, "test_keyspace"); err != nil ||
		len(k.TabletTypes) != 1 ||
		k.TabletTypes[0] != topo.TYPE_MASTER ||
		len(k.Partitions) != 1 ||
		len(k.Partitions[topo.TYPE_MASTER].Shards) != 1 ||
		len(k.Partitions[topo.TYPE_MASTER].Shards[0].ServedTypes) != 1 ||
		k.Partitions[topo.TYPE_MASTER].Shards[0].ServedTypes[0] != topo.TYPE_MASTER ||
		k.ShardingColumnName != "video_id" ||
		k.ShardingColumnType != key.KIT_UINT64 ||
		k.ServedFrom[topo.TYPE_REPLICA] != "other_keyspace" {
		t.Errorf("GetSrvKeyspace(valid): %v %v", err, k)
	}
	if k, err := ts.GetSrvKeyspaceNames(cell); err != nil || len(k) != 1 || k[0] != "test_keyspace" {
		t.Errorf("GetSrvKeyspaceNames(): %v", err)
	}

	// check that updating a SrvKeyspace out of the blue works
	if err := ts.UpdateSrvKeyspace(cell, "unknown_keyspace_so_far", &srvKeyspace); err != nil {
		t.Errorf("UpdateSrvKeyspace(2): %v", err)
	}
	if k, err := ts.GetSrvKeyspace(cell, "unknown_keyspace_so_far"); err != nil ||
		len(k.TabletTypes) != 1 ||
		k.TabletTypes[0] != topo.TYPE_MASTER ||
		len(k.Partitions) != 1 ||
		len(k.Partitions[topo.TYPE_MASTER].Shards) != 1 ||
		len(k.Partitions[topo.TYPE_MASTER].Shards[0].ServedTypes) != 1 ||
		k.Partitions[topo.TYPE_MASTER].Shards[0].ServedTypes[0] != topo.TYPE_MASTER ||
		k.ShardingColumnName != "video_id" ||
		k.ShardingColumnType != key.KIT_UINT64 ||
		k.ServedFrom[topo.TYPE_REPLICA] != "other_keyspace" {
		t.Errorf("GetSrvKeyspace(out of the blue): %v %v", err, *k)
	}
}
Exemple #3
0
// DbServingGraph returns the ServingGraph for the given cell.
func DbServingGraph(ts topo.Server, cell string) (servingGraph *ServingGraph) {
	servingGraph = &ServingGraph{
		Cell:      cell,
		Keyspaces: make(map[string]*KeyspaceNodes),
	}
	rec := concurrency.AllErrorRecorder{}

	keyspaces, err := ts.GetSrvKeyspaceNames(cell)
	if err != nil {
		servingGraph.Errors = append(servingGraph.Errors, fmt.Sprintf("GetSrvKeyspaceNames failed: %v", err))
		return
	}
	wg := sync.WaitGroup{}
	servingTypes := []topo.TabletType{topo.TYPE_MASTER, topo.TYPE_REPLICA, topo.TYPE_RDONLY}
	for _, keyspace := range keyspaces {
		kn := newKeyspaceNodes()
		servingGraph.Keyspaces[keyspace] = kn
		wg.Add(1)
		go func(keyspace string, kn *KeyspaceNodes) {
			defer wg.Done()

			ks, err := ts.GetSrvKeyspace(cell, keyspace)
			if err != nil {
				rec.RecordError(fmt.Errorf("GetSrvKeyspace(%v, %v) failed: %v", cell, keyspace, err))
				return
			}
			kn.ServedFrom = ks.ServedFrom

			displayedShards := make(map[string]bool)
			for _, partitionTabletType := range servingTypes {
				kp, ok := ks.Partitions[partitionTabletType]
				if !ok {
					continue
				}
				for _, srvShard := range kp.Shards {
					shard := srvShard.ShardName()
					if displayedShards[shard] {
						continue
					}
					displayedShards[shard] = true

					sn := &ShardNodes{
						Name:        shard,
						TabletNodes: make(TabletNodesByType),
						ServedTypes: srvShard.ServedTypes,
					}
					kn.ShardNodes = append(kn.ShardNodes, sn)
					wg.Add(1)
					go func(shard string, sn *ShardNodes) {
						defer wg.Done()
						tabletTypes, err := ts.GetSrvTabletTypesPerShard(cell, keyspace, shard)
						if err != nil {
							rec.RecordError(fmt.Errorf("GetSrvTabletTypesPerShard(%v, %v, %v) failed: %v", cell, keyspace, shard, err))
							return
						}
						for _, tabletType := range tabletTypes {
							endPoints, err := ts.GetEndPoints(cell, keyspace, shard, tabletType)
							if err != nil {
								rec.RecordError(fmt.Errorf("GetEndPoints(%v, %v, %v, %v) failed: %v", cell, keyspace, shard, tabletType, err))
								continue
							}
							for _, endPoint := range endPoints.Entries {
								sn.TabletNodes[tabletType] = append(sn.TabletNodes[tabletType], newTabletNodeFromEndPoint(endPoint, cell))
							}
						}
					}(shard, sn)
				}
			}
		}(keyspace, kn)
	}
	wg.Wait()
	servingGraph.Errors = rec.ErrorStrings()
	return
}