func (zkts *Server) updateTabletEndpoint(oldValue string, oldStat zk.Stat, addr *topodatapb.EndPoint) (newValue string, err error) { if oldStat == nil { // The incoming object doesn't exist - we haven't been placed in the serving // graph yet, so don't update. Assume the next process that rebuilds the graph // will get the updated tablet location. return "", errSkipUpdate } var addrs *topodatapb.EndPoints if oldValue != "" { addrs = &topodatapb.EndPoints{} if len(oldValue) > 0 { if err := json.Unmarshal([]byte(oldValue), addrs); err != nil { return "", fmt.Errorf("EndPoints unmarshal failed: %v %v", oldValue, err) } } foundTablet := false for i, entry := range addrs.Entries { if entry.Uid == addr.Uid { foundTablet = true if !topo.EndPointEquality(entry, addr) { addrs.Entries[i] = addr } break } } if !foundTablet { addrs.Entries = append(addrs.Entries, addr) } } else { addrs = topo.NewEndPoints() addrs.Entries = append(addrs.Entries, addr) } data, err := json.MarshalIndent(addrs, "", " ") if err != nil { return "", err } return string(data), nil }
// Write serving graph data to the cells func (wr *Wrangler) rebuildShardSrvGraph(shardInfo *topo.ShardInfo, tablets []*topo.TabletInfo, cells []string) error { log.Infof("rebuildShardSrvGraph %v/%v", shardInfo.Keyspace(), shardInfo.ShardName()) // Get all existing db types so they can be removed if nothing // had been editted. This applies to all cells, which can't // be determined until you walk through all the tablets. // // existingDbTypeLocations is a map: // key: {cell,keyspace,shard,tabletType} // value: true existingDbTypeLocations := make(map[cellKeyspaceShardType]bool) // Update db type addresses in the serving graph // // locationAddrsMap is a map: // key: {cell,keyspace,shard,tabletType} // value: topo.EndPoints (list of server records) locationAddrsMap := make(map[cellKeyspaceShardType]*topo.EndPoints) // we keep track of the existingDbTypeLocations we've already looked at knownShardLocations := make(map[cellKeyspaceShard]bool) for _, tablet := range tablets { // only look at tablets in the cells we want to rebuild if !topo.InCellList(tablet.Tablet.Alias.Cell, cells) { continue } // this is {cell,keyspace,shard} // we'll get the children to find the existing types shardLocation := cellKeyspaceShard{tablet.Tablet.Alias.Cell, tablet.Tablet.Keyspace, tablet.Shard} // only need to do this once per cell if !knownShardLocations[shardLocation] { log.Infof("Getting tablet types on cell %v for %v/%v", tablet.Tablet.Alias.Cell, tablet.Tablet.Keyspace, tablet.Shard) tabletTypes, err := wr.ts.GetSrvTabletTypesPerShard(tablet.Tablet.Alias.Cell, tablet.Tablet.Keyspace, tablet.Shard) if err != nil { if err != topo.ErrNoNode { return err } } else { for _, tabletType := range tabletTypes { existingDbTypeLocations[cellKeyspaceShardType{tablet.Tablet.Alias.Cell, tablet.Tablet.Keyspace, tablet.Shard, tabletType}] = true } } knownShardLocations[shardLocation] = true } // Check IsInServingGraph after we have populated // existingDbTypeLocations so we properly prune data // if the definition of serving type changes. if !tablet.IsInServingGraph() { continue } location := cellKeyspaceShardType{tablet.Tablet.Alias.Cell, tablet.Keyspace, tablet.Shard, tablet.Type} addrs, ok := locationAddrsMap[location] if !ok { addrs = topo.NewEndPoints() locationAddrsMap[location] = addrs } entry, err := tabletmanager.EndPointForTablet(tablet.Tablet) if err != nil { log.Warningf("EndPointForTablet failed for tablet %v: %v", tablet.Alias, err) continue } addrs.Entries = append(addrs.Entries, *entry) } // we're gonna parallelize a lot here rec := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} // write all the {cell,keyspace,shard,type} // nodes everywhere we want them for location, addrs := range locationAddrsMap { wg.Add(1) go func(location cellKeyspaceShardType, addrs *topo.EndPoints) { log.Infof("saving serving graph for cell %v shard %v/%v tabletType %v", location.cell, location.keyspace, location.shard, location.tabletType) if err := wr.ts.UpdateEndPoints(location.cell, location.keyspace, location.shard, location.tabletType, addrs); err != nil { rec.RecordError(fmt.Errorf("writing endpoints for cell %v shard %v/%v tabletType %v failed: %v", location.cell, location.keyspace, location.shard, location.tabletType, err)) } wg.Done() }(location, addrs) } // Delete any pre-existing paths that were not updated by this process. // That's the existingDbTypeLocations - locationAddrsMap for dbTypeLocation := range existingDbTypeLocations { if _, ok := locationAddrsMap[dbTypeLocation]; !ok { cell := dbTypeLocation.cell if !topo.InCellList(cell, cells) { continue } wg.Add(1) go func(dbTypeLocation cellKeyspaceShardType) { log.Infof("removing stale db type from serving graph: %v", dbTypeLocation) if err := wr.ts.DeleteSrvTabletType(dbTypeLocation.cell, dbTypeLocation.keyspace, dbTypeLocation.shard, dbTypeLocation.tabletType); err != nil { log.Warningf("unable to remove stale db type %v from serving graph: %v", dbTypeLocation, err) } wg.Done() }(dbTypeLocation) } } // wait until we're done with the background stuff to do the rest // FIXME(alainjobart) this wouldn't be necessary if UpdateSrvShard // below was creating the zookeeper nodes recursively. wg.Wait() if err := rec.Error(); err != nil { return err } // Update per-shard information per cell-specific serving path. // // srvShardByPath is a map: // key: {cell,keyspace,shard} // value: topo.SrvShard // this will create all the SrvShard objects srvShardByPath := make(map[cellKeyspaceShard]*topo.SrvShard) for location := range locationAddrsMap { // location will be {cell,keyspace,shard,type} srvShardPath := cellKeyspaceShard{location.cell, location.keyspace, location.shard} srvShard, ok := srvShardByPath[srvShardPath] if !ok { srvShard = &topo.SrvShard{ KeyRange: shardInfo.KeyRange, ServedTypes: shardInfo.ServedTypes, TabletTypes: make([]topo.TabletType, 0, 2), } srvShardByPath[srvShardPath] = srvShard } foundType := false for _, t := range srvShard.TabletTypes { if t == location.tabletType { foundType = true } } if !foundType { srvShard.TabletTypes = append(srvShard.TabletTypes, location.tabletType) } } // Save the shard entries for cks, srvShard := range srvShardByPath { wg.Add(1) go func(cks cellKeyspaceShard, srvShard *topo.SrvShard) { log.Infof("updating shard serving graph in cell %v for %v/%v", cks.cell, cks.keyspace, cks.shard) if err := wr.ts.UpdateSrvShard(cks.cell, cks.keyspace, cks.shard, srvShard); err != nil { rec.RecordError(fmt.Errorf("writing serving data in cell %v for %v/%v failed: %v", cks.cell, cks.keyspace, cks.shard, err)) } wg.Done() }(cks, srvShard) } wg.Wait() return rec.Error() }
// rebuildCellSrvShard computes and writes the serving graph data to a // single cell func rebuildCellSrvShard(ctx context.Context, log logutil.Logger, ts topo.Server, si *topo.ShardInfo, cell string) (err error) { log.Infof("rebuildCellSrvShard %v/%v in cell %v", si.Keyspace(), si.ShardName(), cell) for { select { case <-ctx.Done(): return ctx.Err() default: } // Read existing EndPoints node versions, so we know if any // changes sneak in after we read the tablets. versions, err := getEndPointsVersions(ctx, ts, cell, si.Keyspace(), si.ShardName()) // Get all tablets in this cell/shard. tablets, err := ts.GetTabletMapForShardByCell(ctx, si.Keyspace(), si.ShardName(), []string{cell}) if err != nil { if err != topo.ErrPartialResult { return err } log.Warningf("Got ErrPartialResult from topo.GetTabletMapForShardByCell(%v), some tablets may not be added properly to serving graph", cell) } // Build up the serving graph from scratch. serving := make(map[pb.TabletType]*pb.EndPoints) for _, tablet := range tablets { // Only add serving types. if !tablet.IsInServingGraph() { continue } // Check the Keyspace and Shard for the tablet are right. if tablet.Keyspace != si.Keyspace() || tablet.Shard != si.ShardName() { return fmt.Errorf("CRITICAL: tablet %v is in replication graph for shard %v/%v but belongs to shard %v:%v", tablet.Alias, si.Keyspace(), si.ShardName(), tablet.Keyspace, tablet.Shard) } // Add the tablet to the list. endpoints, ok := serving[tablet.Type] if !ok { endpoints = topo.NewEndPoints() serving[tablet.Type] = endpoints } entry, err := topo.TabletEndPoint(tablet.Tablet) if err != nil { log.Warningf("EndPointForTablet failed for tablet %v: %v", tablet.Alias, err) continue } endpoints.Entries = append(endpoints.Entries, entry) } wg := sync.WaitGroup{} fatalErrs := concurrency.AllErrorRecorder{} retryErrs := concurrency.AllErrorRecorder{} // Write nodes that should exist. for tabletType, endpoints := range serving { wg.Add(1) go func(tabletType pb.TabletType, endpoints *pb.EndPoints) { defer wg.Done() log.Infof("saving serving graph for cell %v shard %v/%v tabletType %v", cell, si.Keyspace(), si.ShardName(), tabletType) version, ok := versions[tabletType] if !ok { // This type didn't exist when we first checked. // Try to create, but only if it still doesn't exist. if err := ts.CreateEndPoints(ctx, cell, si.Keyspace(), si.ShardName(), tabletType, endpoints); err != nil { log.Warningf("CreateEndPoints(%v, %v, %v) failed during rebuild: %v", cell, si, tabletType, err) switch err { case topo.ErrNodeExists: retryErrs.RecordError(err) default: fatalErrs.RecordError(err) } } return } // Update only if the version matches. if err := ts.UpdateEndPoints(ctx, cell, si.Keyspace(), si.ShardName(), tabletType, endpoints, version); err != nil { log.Warningf("UpdateEndPoints(%v, %v, %v) failed during rebuild: %v", cell, si, tabletType, err) switch err { case topo.ErrBadVersion, topo.ErrNoNode: retryErrs.RecordError(err) default: fatalErrs.RecordError(err) } } }(tabletType, endpoints) } // Delete nodes that shouldn't exist. for tabletType, version := range versions { if _, ok := serving[tabletType]; !ok { wg.Add(1) go func(tabletType pb.TabletType, version int64) { defer wg.Done() log.Infof("removing stale db type from serving graph: %v", tabletType) if err := ts.DeleteEndPoints(ctx, cell, si.Keyspace(), si.ShardName(), tabletType, version); err != nil && err != topo.ErrNoNode { log.Warningf("DeleteEndPoints(%v, %v, %v) failed during rebuild: %v", cell, si, tabletType, err) switch err { case topo.ErrNoNode: // Someone else deleted it, which is fine. case topo.ErrBadVersion: retryErrs.RecordError(err) default: fatalErrs.RecordError(err) } } }(tabletType, version) } } // Update srvShard object wg.Add(1) go func() { defer wg.Done() log.Infof("updating shard serving graph in cell %v for %v/%v", cell, si.Keyspace(), si.ShardName()) if err := UpdateSrvShard(ctx, ts, cell, si); err != nil { fatalErrs.RecordError(err) log.Warningf("writing serving data in cell %v for %v/%v failed: %v", cell, si.Keyspace(), si.ShardName(), err) } }() wg.Wait() // If there are any fatal errors, give up. if fatalErrs.HasErrors() { return fatalErrs.Error() } // If there are any retry errors, try again. if retryErrs.HasErrors() { continue } // Otherwise, success! return nil } }
// rebuildCellSrvShard computes and writes the serving graph data to a // single cell func rebuildCellSrvShard(log logutil.Logger, ts topo.Server, shardInfo *topo.ShardInfo, cell string, tablets map[topo.TabletAlias]*topo.TabletInfo) error { log.Infof("rebuildCellSrvShard %v/%v in cell %v", shardInfo.Keyspace(), shardInfo.ShardName(), cell) // Get all existing db types so they can be removed if nothing // had been edited. existingTabletTypes, err := ts.GetSrvTabletTypesPerShard(cell, shardInfo.Keyspace(), shardInfo.ShardName()) if err != nil { if err != topo.ErrNoNode { return err } } // Update db type addresses in the serving graph // // locationAddrsMap is a map: // key: tabletType // value: EndPoints (list of server records) locationAddrsMap := make(map[topo.TabletType]*topo.EndPoints) for _, tablet := range tablets { if !tablet.IsInReplicationGraph() { // only valid case is a scrapped master in the // catastrophic reparent case if tablet.Parent.Uid != topo.NO_TABLET { log.Warningf("Tablet %v should not be in the replication graph, please investigate (it is being ignored in the rebuild)", tablet.Alias) } continue } // Check IsInServingGraph, we don't want to add tablets that // are not serving if !tablet.IsInServingGraph() { continue } // Check the Keyspace and Shard for the tablet are right if tablet.Keyspace != shardInfo.Keyspace() || tablet.Shard != shardInfo.ShardName() { return fmt.Errorf("CRITICAL: tablet %v is in replication graph for shard %v/%v but belongs to shard %v:%v", tablet.Alias, shardInfo.Keyspace(), shardInfo.ShardName(), tablet.Keyspace, tablet.Shard) } // Add the tablet to the list addrs, ok := locationAddrsMap[tablet.Type] if !ok { addrs = topo.NewEndPoints() locationAddrsMap[tablet.Type] = addrs } entry, err := tablet.Tablet.EndPoint() if err != nil { log.Warningf("EndPointForTablet failed for tablet %v: %v", tablet.Alias, err) continue } addrs.Entries = append(addrs.Entries, *entry) } // we're gonna parallelize a lot here: // - writing all the tabletTypes records // - removing the unused records // - writing SrvShard rec := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} // write all the EndPoints nodes everywhere we want them for tabletType, addrs := range locationAddrsMap { wg.Add(1) go func(tabletType topo.TabletType, addrs *topo.EndPoints) { log.Infof("saving serving graph for cell %v shard %v/%v tabletType %v", cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType) if err := ts.UpdateEndPoints(cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType, addrs); err != nil { rec.RecordError(fmt.Errorf("writing endpoints for cell %v shard %v/%v tabletType %v failed: %v", cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType, err)) } wg.Done() }(tabletType, addrs) } // Delete any pre-existing paths that were not updated by this process. // That's the existingTabletTypes - locationAddrsMap for _, tabletType := range existingTabletTypes { if _, ok := locationAddrsMap[tabletType]; !ok { wg.Add(1) go func(tabletType topo.TabletType) { log.Infof("removing stale db type from serving graph: %v", tabletType) if err := ts.DeleteEndPoints(cell, shardInfo.Keyspace(), shardInfo.ShardName(), tabletType); err != nil { log.Warningf("unable to remove stale db type %v from serving graph: %v", tabletType, err) } wg.Done() }(tabletType) } } // Update srvShard object wg.Add(1) go func() { log.Infof("updating shard serving graph in cell %v for %v/%v", cell, shardInfo.Keyspace(), shardInfo.ShardName()) srvShard := &topo.SrvShard{ Name: shardInfo.ShardName(), KeyRange: shardInfo.KeyRange, ServedTypes: shardInfo.ServedTypes, MasterCell: shardInfo.MasterAlias.Cell, TabletTypes: make([]topo.TabletType, 0, len(locationAddrsMap)), } for tabletType := range locationAddrsMap { srvShard.TabletTypes = append(srvShard.TabletTypes, tabletType) } if err := ts.UpdateSrvShard(cell, shardInfo.Keyspace(), shardInfo.ShardName(), srvShard); err != nil { rec.RecordError(fmt.Errorf("writing serving data in cell %v for %v/%v failed: %v", cell, shardInfo.Keyspace(), shardInfo.ShardName(), err)) } wg.Done() }() wg.Wait() return rec.Error() }