// queryFailover runs an algorithm to determine which DCs to try and then calls // them to try to locate alternative services. func queryFailover(q queryServer, query *structs.PreparedQuery, limit int, options structs.QueryOptions, reply *structs.PreparedQueryExecuteResponse) error { // Pull the list of other DCs. This is sorted by RTT in case the user // has selected that. nearest, err := q.GetOtherDatacentersByDistance() if err != nil { return err } // This will help us filter unknown DCs supplied by the user. known := make(map[string]struct{}) for _, dc := range nearest { known[dc] = struct{}{} } // Build a candidate list of DCs to try, starting with the nearest N // from RTTs. var dcs []string index := make(map[string]struct{}) if query.Service.Failover.NearestN > 0 { for i, dc := range nearest { if !(i < query.Service.Failover.NearestN) { break } dcs = append(dcs, dc) index[dc] = struct{}{} } } // Then add any DCs explicitly listed that weren't selected above. for _, dc := range query.Service.Failover.Datacenters { // This will prevent a log of other log spammage if we do not // attempt to talk to datacenters we don't know about. if _, ok := known[dc]; !ok { q.GetLogger().Printf("[DEBUG] consul.prepared_query: Skipping unknown datacenter '%s' in prepared query", dc) continue } // This will make sure we don't re-try something that fails // from the NearestN list. if _, ok := index[dc]; !ok { dcs = append(dcs, dc) } } // Now try the selected DCs in priority order. failovers := 0 for _, dc := range dcs { // This keeps track of how many iterations we actually run. failovers++ // Be super paranoid and set the nodes slice to nil since it's // the same slice we used before. We know there's nothing in // there, but the underlying msgpack library has a policy of // updating the slice when it's non-nil, and that feels dirty. // Let's just set it to nil so there's no way to communicate // through this slice across successive RPC calls. reply.Nodes = nil // Note that we pass along the limit since it can be applied // remotely to save bandwidth. We also pass along the consistency // mode information we were given, so that applies to the remote // query as well. remote := &structs.PreparedQueryExecuteRemoteRequest{ Datacenter: dc, Query: *query, Limit: limit, QueryOptions: options, } if err := q.ForwardDC("PreparedQuery.ExecuteRemote", dc, remote, reply); err != nil { q.GetLogger().Printf("[WARN] consul.prepared_query: Failed querying for service '%s' in datacenter '%s': %s", query.Service.Service, dc, err) continue } // We can stop if we found some nodes. if len(reply.Nodes) > 0 { break } } // Set this at the end because the response from the remote doesn't have // this information. reply.Failovers = failovers return nil }