// rebuildCellSrvShard computes and writes the serving graph data to a // single cell func rebuildCellSrvShard(ctx context.Context, log logutil.Logger, ts topo.Server, shardInfo *topo.ShardInfo, cell string, tablets map[topo.TabletAlias]*topo.TabletInfo) error { log.Infof("rebuildCellSrvShard %v/%v in cell %v", shardInfo.Keyspace(), shardInfo.ShardName(), cell) // Get all existing db types so they can be removed if nothing // had been edited. existingTabletTypes, err := ts.GetSrvTabletTypesPerShard(cell, shardInfo.Keyspace(), shardInfo.ShardName()) if err != nil { if err != topo.ErrNoNode { return err } } // Update db type addresses in the serving graph // // locationAddrsMap is a map: // key: tabletType // value: EndPoints (list of server records) locationAddrsMap := make(map[topo.TabletType]*topo.EndPoints) for _, tablet := range tablets { if !tablet.IsInReplicationGraph() { // only valid case is a scrapped master in the // catastrophic reparent case if tablet.Parent.Uid != topo.NO_TABLET { log.Warningf("Tablet %v should not be in the replication graph, please investigate (it is being ignored in the rebuild)", tablet.Alias) } continue } // Check IsInServingGraph, we don't want to add tablets that // are not serving if !tablet.IsInServingGraph() { continue } // Check the Keyspace and Shard for the tablet are right if tablet.Keyspace != shardInfo.Keyspace() || tablet.Shard != shardInfo.ShardName() { return fmt.Errorf("CRITICAL: tablet %v is in replication graph for shard %v/%v but belongs to shard %v:%v", tablet.Alias, shardInfo.Keyspace(), shardInfo.ShardName(), tablet.Keyspace, tablet.Shard) } // Add the tablet to the list addrs, ok := locationAddrsMap[tablet.Type] if !ok { addrs = topo.NewEndPoints() locationAddrsMap[tablet.Type] = addrs } entry, err := tablet.Tablet.EndPoint() if err != nil { log.Warningf("EndPointForTablet failed for tablet %v: %v", tablet.Alias, err) continue } addrs.Entries = append(addrs.Entries, *entry) } // we're gonna parallelize a lot here: // - writing all the tabletTypes records // - removing the unused records // - writing SrvShard rec := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} // write all the EndPoints nodes everywhere we want them for tabletType, addrs := range locationAddrsMap { wg.Add(1) go func(tabletType topo.TabletType, addrs *topo.EndPoints) { log.Infof("saving serving graph for cell %v shard %v/%v tabletType %v", cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType) span := trace.NewSpanFromContext(ctx) span.StartClient("TopoServer.UpdateEndPoints") span.Annotate("tablet_type", string(tabletType)) if err := ts.UpdateEndPoints(cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType, addrs); err != nil { rec.RecordError(fmt.Errorf("writing endpoints for cell %v shard %v/%v tabletType %v failed: %v", cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType, err)) } span.Finish() wg.Done() }(tabletType, addrs) } // Delete any pre-existing paths that were not updated by this process. // That's the existingTabletTypes - locationAddrsMap for _, tabletType := range existingTabletTypes { if _, ok := locationAddrsMap[tabletType]; !ok { wg.Add(1) go func(tabletType topo.TabletType) { log.Infof("removing stale db type from serving graph: %v", tabletType) span := trace.NewSpanFromContext(ctx) span.StartClient("TopoServer.DeleteEndPoints") span.Annotate("tablet_type", string(tabletType)) if err := ts.DeleteEndPoints(cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType); err != nil { log.Warningf("unable to remove stale db type %v from serving graph: %v", tabletType, err) } span.Finish() wg.Done() }(tabletType) } } // Update srvShard object wg.Add(1) go func() { log.Infof("updating shard serving graph in cell %v for %v/%v", cell, shardInfo.Keyspace(), shardInfo.ShardName()) srvShard := &topo.SrvShard{ Name: shardInfo.ShardName(), KeyRange: shardInfo.KeyRange, ServedTypes: shardInfo.GetServedTypesPerCell(cell), MasterCell: shardInfo.MasterAlias.Cell, TabletTypes: make([]topo.TabletType, 0, len(locationAddrsMap)), } for tabletType := range locationAddrsMap { srvShard.TabletTypes = append(srvShard.TabletTypes, tabletType) } span := trace.NewSpanFromContext(ctx) span.StartClient("TopoServer.UpdateSrvShard") span.Annotate("keyspace", shardInfo.Keyspace()) span.Annotate("shard", shardInfo.ShardName()) span.Annotate("cell", cell) if err := ts.UpdateSrvShard(cell, shardInfo.Keyspace(), shardInfo.ShardName(), srvShard); err != nil { rec.RecordError(fmt.Errorf("writing serving data in cell %v for %v/%v failed: %v", cell, shardInfo.Keyspace(), shardInfo.ShardName(), err)) } span.Finish() wg.Done() }() wg.Wait() return rec.Error() }
func CheckServingGraph(t *testing.T, ts topo.Server) { cell := getLocalCell(t, ts) // test individual cell/keyspace/shard/type entries if _, err := ts.GetSrvTabletTypesPerShard(cell, "test_keyspace", "-10"); err != topo.ErrNoNode { t.Errorf("GetSrvTabletTypesPerShard(invalid): %v", err) } if _, err := ts.GetEndPoints(cell, "test_keyspace", "-10", topo.TYPE_MASTER); err != topo.ErrNoNode { t.Errorf("GetEndPoints(invalid): %v", err) } endPoints := topo.EndPoints{ Entries: []topo.EndPoint{ topo.EndPoint{ Uid: 1, Host: "host1", NamedPortMap: map[string]int{"vt": 1234, "mysql": 1235, "vts": 1236}, }, }, } if err := ts.UpdateEndPoints(cell, "test_keyspace", "-10", topo.TYPE_MASTER, &endPoints); err != nil { t.Errorf("UpdateEndPoints(master): %v", err) } if types, err := ts.GetSrvTabletTypesPerShard(cell, "test_keyspace", "-10"); err != nil || len(types) != 1 || types[0] != topo.TYPE_MASTER { t.Errorf("GetSrvTabletTypesPerShard(1): %v %v", err, types) } addrs, err := ts.GetEndPoints(cell, "test_keyspace", "-10", topo.TYPE_MASTER) if err != nil { t.Errorf("GetEndPoints: %v", err) } if len(addrs.Entries) != 1 || addrs.Entries[0].Uid != 1 { t.Errorf("GetEndPoints(1): %v", addrs) } if pm := addrs.Entries[0].NamedPortMap; pm["vt"] != 1234 || pm["mysql"] != 1235 || pm["vts"] != 1236 { t.Errorf("GetSrcTabletType(1).NamedPortmap: want %v, got %v", endPoints.Entries[0].NamedPortMap, pm) } if err := ts.UpdateTabletEndpoint(cell, "test_keyspace", "-10", topo.TYPE_REPLICA, &topo.EndPoint{Uid: 2, Host: "host2"}); err != nil { t.Errorf("UpdateTabletEndpoint(invalid): %v", err) } if err := ts.UpdateTabletEndpoint(cell, "test_keyspace", "-10", topo.TYPE_MASTER, &topo.EndPoint{Uid: 1, Host: "host2"}); err != nil { t.Errorf("UpdateTabletEndpoint(master): %v", err) } if addrs, err := ts.GetEndPoints(cell, "test_keyspace", "-10", topo.TYPE_MASTER); err != nil || len(addrs.Entries) != 1 || addrs.Entries[0].Uid != 1 { t.Errorf("GetEndPoints(2): %v %v", err, addrs) } if err := ts.UpdateTabletEndpoint(cell, "test_keyspace", "-10", topo.TYPE_MASTER, &topo.EndPoint{Uid: 3, Host: "host3"}); err != nil { t.Errorf("UpdateTabletEndpoint(master): %v", err) } if addrs, err := ts.GetEndPoints(cell, "test_keyspace", "-10", topo.TYPE_MASTER); err != nil || len(addrs.Entries) != 2 { t.Errorf("GetEndPoints(2): %v %v", err, addrs) } if err := ts.DeleteEndPoints(cell, "test_keyspace", "-10", topo.TYPE_REPLICA); err != topo.ErrNoNode { t.Errorf("DeleteEndPoints(unknown): %v", err) } if err := ts.DeleteEndPoints(cell, "test_keyspace", "-10", topo.TYPE_MASTER); err != nil { t.Errorf("DeleteEndPoints(master): %v", err) } // test cell/keyspace/shard entries (SrvShard) srvShard := topo.SrvShard{ ServedTypes: []topo.TabletType{topo.TYPE_MASTER}, TabletTypes: []topo.TabletType{topo.TYPE_REPLICA, topo.TYPE_RDONLY}, } if err := ts.UpdateSrvShard(cell, "test_keyspace", "-10", &srvShard); err != nil { t.Errorf("UpdateSrvShard(1): %v", err) } if _, err := ts.GetSrvShard(cell, "test_keyspace", "666"); err != topo.ErrNoNode { t.Errorf("GetSrvShard(invalid): %v", err) } if s, err := ts.GetSrvShard(cell, "test_keyspace", "-10"); err != nil || len(s.ServedTypes) != 1 || s.ServedTypes[0] != topo.TYPE_MASTER || len(s.TabletTypes) != 2 || s.TabletTypes[0] != topo.TYPE_REPLICA || s.TabletTypes[1] != topo.TYPE_RDONLY { t.Errorf("GetSrvShard(valid): %v", err) } // test cell/keyspace entries (SrvKeyspace) srvKeyspace := topo.SrvKeyspace{ Partitions: map[topo.TabletType]*topo.KeyspacePartition{ topo.TYPE_MASTER: &topo.KeyspacePartition{ Shards: []topo.SrvShard{ topo.SrvShard{ ServedTypes: []topo.TabletType{topo.TYPE_MASTER}, }, }, }, }, TabletTypes: []topo.TabletType{topo.TYPE_MASTER}, ShardingColumnName: "video_id", ShardingColumnType: key.KIT_UINT64, ServedFrom: map[topo.TabletType]string{ topo.TYPE_REPLICA: "other_keyspace", }, } if err := ts.UpdateSrvKeyspace(cell, "test_keyspace", &srvKeyspace); err != nil { t.Errorf("UpdateSrvKeyspace(1): %v", err) } if _, err := ts.GetSrvKeyspace(cell, "test_keyspace666"); err != topo.ErrNoNode { t.Errorf("GetSrvKeyspace(invalid): %v", err) } if k, err := ts.GetSrvKeyspace(cell, "test_keyspace"); err != nil || len(k.TabletTypes) != 1 || k.TabletTypes[0] != topo.TYPE_MASTER || len(k.Partitions) != 1 || len(k.Partitions[topo.TYPE_MASTER].Shards) != 1 || len(k.Partitions[topo.TYPE_MASTER].Shards[0].ServedTypes) != 1 || k.Partitions[topo.TYPE_MASTER].Shards[0].ServedTypes[0] != topo.TYPE_MASTER || k.ShardingColumnName != "video_id" || k.ShardingColumnType != key.KIT_UINT64 || k.ServedFrom[topo.TYPE_REPLICA] != "other_keyspace" { t.Errorf("GetSrvKeyspace(valid): %v %v", err, k) } if k, err := ts.GetSrvKeyspaceNames(cell); err != nil || len(k) != 1 || k[0] != "test_keyspace" { t.Errorf("GetSrvKeyspaceNames(): %v", err) } // check that updating a SrvKeyspace out of the blue works if err := ts.UpdateSrvKeyspace(cell, "unknown_keyspace_so_far", &srvKeyspace); err != nil { t.Errorf("UpdateSrvKeyspace(2): %v", err) } if k, err := ts.GetSrvKeyspace(cell, "unknown_keyspace_so_far"); err != nil || len(k.TabletTypes) != 1 || k.TabletTypes[0] != topo.TYPE_MASTER || len(k.Partitions) != 1 || len(k.Partitions[topo.TYPE_MASTER].Shards) != 1 || len(k.Partitions[topo.TYPE_MASTER].Shards[0].ServedTypes) != 1 || k.Partitions[topo.TYPE_MASTER].Shards[0].ServedTypes[0] != topo.TYPE_MASTER || k.ShardingColumnName != "video_id" || k.ShardingColumnType != key.KIT_UINT64 || k.ServedFrom[topo.TYPE_REPLICA] != "other_keyspace" { t.Errorf("GetSrvKeyspace(out of the blue): %v %v", err, *k) } }
// DbServingGraph returns the ServingGraph for the given cell. func DbServingGraph(ts topo.Server, cell string) (servingGraph *ServingGraph) { servingGraph = &ServingGraph{ Cell: cell, Keyspaces: make(map[string]*KeyspaceNodes), } rec := concurrency.AllErrorRecorder{} keyspaces, err := ts.GetSrvKeyspaceNames(cell) if err != nil { servingGraph.Errors = append(servingGraph.Errors, fmt.Sprintf("GetSrvKeyspaceNames failed: %v", err)) return } wg := sync.WaitGroup{} servingTypes := []topo.TabletType{topo.TYPE_MASTER, topo.TYPE_REPLICA, topo.TYPE_RDONLY} for _, keyspace := range keyspaces { kn := newKeyspaceNodes() servingGraph.Keyspaces[keyspace] = kn wg.Add(1) go func(keyspace string, kn *KeyspaceNodes) { defer wg.Done() ks, err := ts.GetSrvKeyspace(cell, keyspace) if err != nil { rec.RecordError(fmt.Errorf("GetSrvKeyspace(%v, %v) failed: %v", cell, keyspace, err)) return } kn.ServedFrom = ks.ServedFrom displayedShards := make(map[string]bool) for _, partitionTabletType := range servingTypes { kp, ok := ks.Partitions[partitionTabletType] if !ok { continue } for _, srvShard := range kp.Shards { shard := srvShard.ShardName() if displayedShards[shard] { continue } displayedShards[shard] = true sn := &ShardNodes{ Name: shard, TabletNodes: make(TabletNodesByType), ServedTypes: srvShard.ServedTypes, } kn.ShardNodes = append(kn.ShardNodes, sn) wg.Add(1) go func(shard string, sn *ShardNodes) { defer wg.Done() tabletTypes, err := ts.GetSrvTabletTypesPerShard(cell, keyspace, shard) if err != nil { rec.RecordError(fmt.Errorf("GetSrvTabletTypesPerShard(%v, %v, %v) failed: %v", cell, keyspace, shard, err)) return } for _, tabletType := range tabletTypes { endPoints, err := ts.GetEndPoints(cell, keyspace, shard, tabletType) if err != nil { rec.RecordError(fmt.Errorf("GetEndPoints(%v, %v, %v, %v) failed: %v", cell, keyspace, shard, tabletType, err)) continue } for _, endPoint := range endPoints.Entries { sn.TabletNodes[tabletType] = append(sn.TabletNodes[tabletType], newTabletNodeFromEndPoint(endPoint, cell)) } } }(shard, sn) } } }(keyspace, kn) } wg.Wait() servingGraph.Errors = rec.ErrorStrings() return }