// getUnitByHash retrieves from the Registry the Unit associated with the given Hash func (r *EtcdRegistry) getUnitByHash(hash unit.Hash) *unit.UnitFile { key := r.hashedUnitPath(hash) opts := &etcd.GetOptions{ Recursive: true, } resp, err := r.kAPI.Get(r.ctx(), key, opts) if err != nil { if isEtcdError(err, etcd.ErrorCodeKeyNotFound) { err = nil } return nil } var um unitModel if err := unmarshal(resp.Node.Value, &um); err != nil { log.Errorf("error unmarshaling Unit(%s): %v", hash, err) return nil } u, err := unit.NewUnitFile(um.Raw) if err != nil { log.Errorf("error parsing Unit(%s): %v", hash, err) return nil } return u }
// getAllUnitsHashMap retrieves from the Registry all Units and returns a map of hash to UnitFile func (r *EtcdRegistry) getAllUnitsHashMap() (map[string]*unit.UnitFile, error) { key := r.prefixed(unitPrefix) opts := &etcd.GetOptions{ Recursive: true, Quorum: true, } hashToUnit := map[string]*unit.UnitFile{} resp, err := r.kAPI.Get(r.ctx(), key, opts) if err != nil { return nil, err } for _, node := range resp.Node.Nodes { parts := strings.Split(node.Key, "/") if len(parts) == 0 { log.Errorf("key '%v' doesn't have enough parts", node.Key) continue } stringHash := parts[len(parts)-1] hash, err := unit.HashFromHexString(stringHash) if err != nil { log.Errorf("failed to get Hash for key '%v' with stringHash '%v': %v", node.Key, stringHash, err) continue } unit := r.unitFromEtcdNode(hash, node) if unit == nil { continue } hashToUnit[stringHash] = unit } return hashToUnit, nil }
// getUnitByHash retrieves from the Registry the Unit associated with the given Hash func (r *EtcdRegistry) getUnitByHash(hash unit.Hash) *unit.UnitFile { req := etcd.Get{ Key: r.hashedUnitPath(hash), Recursive: true, } resp, err := r.etcd.Do(&req) if err != nil { if isKeyNotFound(err) { err = nil } return nil } var um unitModel if err := unmarshal(resp.Node.Value, &um); err != nil { log.Errorf("error unmarshaling Unit(%s): %v", hash, err) return nil } u, err := unit.NewUnitFile(um.Raw) if err != nil { log.Errorf("error parsing Unit(%s): %v", hash, err) return nil } return u }
// Supervise monitors the life of the Server and coordinates its shutdown. // A shutdown occurs when the monitor returns, either because a health check // fails or a user triggers a shutdown. If the shutdown is due to a health // check failure, the Server is restarted. Supervise will block shutdown until // all components have finished shutting down or a timeout occurs; if this // happens, the Server will not automatically be restarted. func (s *Server) Supervise() { sd, err := s.mon.Monitor(s.hrt, s.killc) if sd { log.Infof("Server monitor triggered: told to shut down") } else { log.Errorf("Server monitor triggered: %v", err) } close(s.stopc) done := make(chan struct{}) go func() { s.wg.Wait() close(done) }() select { case <-done: case <-time.After(shutdownTimeout): log.Errorf("Timed out waiting for server to shut down. Panicking the server without cleanup.") panic("Failed server shutdown. Panic") } if !sd { log.Infof("Restarting server") s.SetRestartServer(true) s.Run() s.SetRestartServer(false) } }
// newPublisher returns a publishFunc that publishes a single UnitState // by the given name to the provided Registry, with the given TTL func newPublisher(reg registry.Registry, ttl time.Duration) publishFunc { return func(name string, us *unit.UnitState) { if us == nil { log.V(1).Infof("Destroying UnitState(%s) in Registry", name) err := reg.RemoveUnitState(name) if err != nil { log.Errorf("Failed to destroy UnitState(%s) in Registry: %v", name, err) } } else { // Sanity check - don't want to publish incomplete UnitStates // TODO(jonboulle): consider teasing apart a separate UnitState-like struct // so we can rely on a UnitState always being fully hydrated? // See https://github.com/coreos/fleet/issues/720 //if len(us.UnitHash) == 0 { // log.Errorf("Refusing to push UnitState(%s), no UnitHash: %#v", name, us) if len(us.MachineID) == 0 { log.Errorf("Refusing to push UnitState(%s), no MachineID: %#v", name, us) } else { log.V(1).Infof("Pushing UnitState(%s) to Registry: %#v", name, us) reg.SaveUnitState(name, us, ttl) } } } }
// SaveUnitState persists the given UnitState to the Registry func (r *EtcdRegistry) SaveUnitState(jobName string, unitState *unit.UnitState, ttl time.Duration) { usm := unitStateToModel(unitState) if usm == nil { log.Errorf("Unable to save nil UnitState model") return } json, err := marshal(usm) if err != nil { log.Errorf("Error marshalling UnitState: %v", err) return } legacyKey := r.legacyUnitStatePath(jobName) req := etcd.Set{ Key: legacyKey, Value: json, TTL: ttl, } r.etcd.Do(&req) newKey := r.unitStatePath(unitState.MachineID, jobName) req = etcd.Set{ Key: newKey, Value: json, TTL: ttl, } r.etcd.Do(&req) }
// IsGrpcLeader checks if the current leader has gRPC capabilities enabled or error // if there is not a elected leader yet. func (e *Engine) IsGrpcLeader() (bool, error) { leader, err := e.lManager.GetLease(engineLeaseName) if err != nil { log.Errorf("Unable to determine current lease: %v", err) return false, err } // It can happen that the leader is not yet stored in etcd and nor error (line 122 pkg/lease/etcd.go) if leader == nil { return false, errors.New("Unable to get the current leader") } leaderState, err := e.getMachineState(leader.MachineID()) if err != nil { log.Errorf("Unable to determine current lease: %v", err) return false, err } if leaderState.Capabilities != nil && leaderState.Capabilities.Has(machine.CapGRPC) { return true, nil } log.Info("Engine leader has no gRPC capabilities enabled!") return false, nil }
// desiredAgentState builds an *AgentState object that represents what the // provided Agent should currently be doing. func desiredAgentState(a *Agent, reg registry.Registry) (*AgentState, error) { units, err := reg.Units() if err != nil { log.Errorf("Failed fetching Units from Registry: %v", err) return nil, err } sUnits, err := reg.Schedule() if err != nil { log.Errorf("Failed fetching schedule from Registry: %v", err) return nil, err } // fetch full machine state from registry instead of // using the local version to allow for dynamic metadata ms, err := reg.MachineState(a.Machine.State().ID) if err != nil { log.Errorf("Failed fetching machine state from Registry: %v", err) return nil, err } as := AgentState{ MState: &ms, Units: make(map[string]*job.Unit), } sUnitMap := make(map[string]*job.ScheduledUnit) for _, sUnit := range sUnits { sUnit := sUnit sUnitMap[sUnit.Name] = &sUnit } for _, u := range units { u := u md := u.RequiredTargetMetadata() if u.IsGlobal() { if !machine.HasMetadata(&ms, md) { log.Debugf("Agent unable to run global unit %s: missing required metadata", u.Name) continue } } if !u.IsGlobal() { sUnit, ok := sUnitMap[u.Name] if !ok || sUnit.TargetMachineID == "" || sUnit.TargetMachineID != ms.ID { continue } } if cExists, _ := as.HasConflict(u.Name, u.Conflicts()); cExists { continue } as.Units[u.Name] = &u } return &as, nil }
// statesByMUSKey returns a map of all UnitStates stored in the registry indexed by MUSKey func (r *EtcdRegistry) statesByMUSKey() (map[MUSKey]*unit.UnitState, error) { mus := make(map[MUSKey]*unit.UnitState) // For backwards compatibility, first retrieve any states stored in the // old format req := etcd.Get{ Key: path.Join(r.keyPrefix, statePrefix), Recursive: true, } res, err := r.etcd.Do(&req) if err != nil && !isKeyNotFound(err) { return nil, err } if res != nil { for _, node := range res.Node.Nodes { _, name := path.Split(node.Key) var usm unitStateModel if err := unmarshal(node.Value, &usm); err != nil { log.Errorf("Error unmarshalling UnitState(%s): %v", name, err) continue } us := modelToUnitState(&usm, name) if us != nil { key := MUSKey{name, us.MachineID} mus[key] = us } } } // Now retrieve states stored in the new format and overlay them req = etcd.Get{ Key: path.Join(r.keyPrefix, statesPrefix), Recursive: true, } res, err = r.etcd.Do(&req) if err != nil && !isKeyNotFound(err) { return nil, err } if res != nil { for _, dir := range res.Node.Nodes { _, name := path.Split(dir.Key) for _, node := range dir.Nodes { _, machID := path.Split(node.Key) var usm unitStateModel if err := unmarshal(node.Value, &usm); err != nil { log.Errorf("Error unmarshalling UnitState(%s) from Machine(%s): %v", name, machID, err) continue } us := modelToUnitState(&usm, name) if us != nil { key := MUSKey{name, machID} mus[key] = us } } } } return mus, nil }
// Units lists all Units stored in the Registry, ordered by name. This includes both global and non-global units. func (r *EtcdRegistry) Units() ([]job.Unit, error) { key := r.prefixed(jobPrefix) opts := &etcd.GetOptions{ Sort: true, Recursive: true, } res, err := r.kAPI.Get(r.ctx(), key, opts) if err != nil { if isEtcdError(err, etcd.ErrorCodeKeyNotFound) { err = nil } return nil, err } // Fetch all units by hash recursively to avoid sending N requests to Etcd. hashToUnit, err := r.getAllUnitsHashMap() if err != nil { log.Errorf("failed fetching all Units from etcd: %v", err) return nil, err } unitHashLookupFunc := func(hash unit.Hash) *unit.UnitFile { stringHash := hash.String() unit, ok := hashToUnit[stringHash] if !ok { log.Errorf("did not find Unit %v in list of all units", stringHash) return nil } return unit } uMap := make(map[string]*job.Unit) for _, dir := range res.Node.Nodes { u, err := r.dirToUnit(dir, unitHashLookupFunc) if err != nil { log.Errorf("Failed to parse Unit from etcd: %v", err) continue } if u == nil { continue } uMap[u.Name] = u } var sortable sort.StringSlice for name, _ := range uMap { sortable = append(sortable, name) } sortable.Sort() units := make([]job.Unit, 0, len(sortable)) for _, name := range sortable { units = append(units, *uMap[name]) } return units, nil }
func rpcAcquireLeadership(reg registry.Registry, lManager lease.Manager, machID string, ver int, ttl time.Duration) lease.Lease { existing, err := lManager.GetLease(engineLeaseName) if err != nil { log.Errorf("Unable to determine current lease: %v", err) return nil } var l lease.Lease if (existing == nil && reg.UseEtcdRegistry()) || (existing == nil && !reg.IsRegistryReady()) { l, err = lManager.AcquireLease(engineLeaseName, machID, ver, ttl) if err != nil { log.Errorf("Engine leadership acquisition failed: %v", err) return nil } else if l == nil { log.Infof("Unable to acquire engine leadership") return nil } log.Infof("Engine leadership acquired") return l } if existing != nil && existing.Version() >= ver { log.Debugf("Lease already held by Machine(%s) operating at acceptable version %d", existing.MachineID(), existing.Version()) return existing } // TODO(hector): Here we could add a possible SLA to determine when the leader // is too busy. In such a case, we can trigger a new leader election if (existing != nil && reg.UseEtcdRegistry()) || (existing != nil && !reg.IsRegistryReady()) { rem := existing.TimeRemaining() l, err = lManager.StealLease(engineLeaseName, machID, ver, ttl+rem, existing.Index()) if err != nil { log.Errorf("Engine leadership steal failed: %v", err) return nil } else if l == nil { log.Infof("Unable to steal engine leadership") return nil } log.Infof("Stole engine leadership from Machine(%s)", existing.MachineID()) if rem > 0 { log.Infof("Waiting %v for previous lease to expire before continuing reconciliation", rem) <-time.After(rem) } return l } log.Infof("Engine leader is BUSY!") return existing }
func (r *RegistryMux) rpcDialerNoEngine(_ string, timeout time.Duration) (net.Conn, error) { ticker := time.Tick(dialRegistryReconnectTimeout) // Timeout re-defined to call etcd every 5secs to get the leader timeout = 5 * time.Second check := time.After(timeout) for { select { case <-check: log.Errorf("Unable to connect to engine %s\n", r.currentEngine.PublicIP) // Get the new engine leader of the cluster out of etcd lease, err := r.leaseManager.GetLease(engineLeaderKeyPath) // Key found if err == nil && lease != nil { var err error machines, err := r.etcdRegistry.Machines() if err != nil { log.Errorf("Unable to get the machines of the cluster %v\n", err) return nil, errors.New("Unable to get the machines of the cluster") } for _, s := range machines { // Update the currentEngine with the new one... otherwise wait until // there is one if s.ID == lease.MachineID() { // New leader has not gRPC capabilities enabled. if !s.Capabilities.Has(machine.CapGRPC) { log.Error("New leader engine has not gRPC enabled!") return nil, errors.New("New leader engine has not gRPC enabled!") } r.currentEngine = s log.Infof("Found a new engine to connect to: %s\n", r.currentEngine.PublicIP) // Restore initial check configuration timeout = 5 * time.Second check = time.After(timeout) } } } else { timeout = 2 * time.Second log.Errorf("Unable to get the leader engine, retrying in %v...", timeout) check = time.After(timeout) } case <-ticker: addr := fmt.Sprintf("%s:%d", r.currentEngine.PublicIP, rpcServerPort) conn, err := net.Dial("tcp", addr) if err == nil { log.Infof("Connected to engine on %s\n", r.currentEngine.PublicIP) return conn, nil } log.Errorf("Retry to connect to new engine: %+v", err) } } }
func (r *EtcdRegistry) unitFromEtcdNode(hash unit.Hash, etcdNode *etcd.Node) *unit.UnitFile { var um unitModel if err := unmarshal(etcdNode.Value, &um); err != nil { log.Errorf("error unmarshaling Unit(%s): %v", hash, err) return nil } u, err := unit.NewUnitFile(um.Raw) if err != nil { log.Errorf("error parsing Unit(%s): %v", hash, err) return nil } return u }
// statesByMUSKey returns a map of all UnitStates stored in the registry indexed by MUSKey func (r *EtcdRegistry) statesByMUSKey() (map[MUSKey]*unit.UnitState, error) { mus := make(map[MUSKey]*unit.UnitState) key := r.prefixed(statesPrefix) opts := &etcd.GetOptions{ Recursive: true, } res, err := r.kAPI.Get(r.ctx(), key, opts) if err != nil && !isEtcdError(err, etcd.ErrorCodeKeyNotFound) { return nil, err } if res != nil { for _, dir := range res.Node.Nodes { _, name := path.Split(dir.Key) for _, node := range dir.Nodes { _, machID := path.Split(node.Key) var usm unitStateModel if err := unmarshal(node.Value, &usm); err != nil { log.Errorf("Error unmarshalling UnitState(%s) from Machine(%s): %v", name, machID, err) continue } us := modelToUnitState(&usm, name) if us != nil { key := MUSKey{name, machID} mus[key] = us } } } } return mus, nil }
func (mr *machinesResource) ServeHTTP(rw http.ResponseWriter, req *http.Request) { if req.Method != "GET" { sendError(rw, http.StatusBadRequest, fmt.Errorf("only HTTP GET supported against this resource")) return } token, err := findNextPageToken(req.URL) if err != nil { sendError(rw, http.StatusBadRequest, err) return } if token == nil { def := DefaultPageToken() token = &def } page, err := getMachinePage(mr.cAPI, *token) if err != nil { log.Errorf("Failed fetching page of Machines: %v", err) sendError(rw, http.StatusInternalServerError, nil) return } sendResponse(rw, http.StatusOK, page) }
// statesByMUSKey returns a map of all UnitStates stored in the registry indexed by MUSKey func (r *EtcdRegistry) statesByMUSKey() (map[MUSKey]*unit.UnitState, error) { mus := make(map[MUSKey]*unit.UnitState) req := etcd.Get{ Key: path.Join(r.keyPrefix, statesPrefix), Recursive: true, } res, err := r.etcd.Do(&req) if err != nil && !etcd.IsKeyNotFound(err) { return nil, err } if res != nil { for _, dir := range res.Node.Nodes { _, name := path.Split(dir.Key) for _, node := range dir.Nodes { _, machID := path.Split(node.Key) var usm unitStateModel if err := unmarshal(node.Value, &usm); err != nil { log.Errorf("Error unmarshalling UnitState(%s) from Machine(%s): %v", name, machID, err) continue } us := modelToUnitState(&usm, name) if us != nil { key := MUSKey{name, machID} mus[key] = us } } } } return mus, nil }
func watch(kAPI etcd.KeysAPI, key string, stop chan struct{}) (res *etcd.Response) { for res == nil { select { case <-stop: log.Debugf("Gracefully closing etcd watch loop: key=%s", key) return default: opts := &etcd.WatcherOptions{ AfterIndex: 0, Recursive: true, } watcher := kAPI.Watcher(key, opts) log.Debugf("Creating etcd watcher: %s", key) var err error res, err = watcher.Next(context.Background()) if err != nil { log.Errorf("etcd watcher %v returned error: %v", key, err) } } // Let's not slam the etcd server in the event that we know // an unexpected error occurred. time.Sleep(time.Second) } return }
func watch(client etcd.Client, key string, stop chan struct{}) (res *etcd.Result) { for res == nil { select { case <-stop: log.V(1).Infof("Gracefully closing etcd watch loop: key=%s", key) return default: req := &etcd.Watch{ Key: key, WaitIndex: 0, Recursive: true, } log.V(1).Infof("Creating etcd watcher: %v", req) var err error res, err = client.Wait(req, stop) if err != nil { log.Errorf("etcd watcher %v returned error: %v", req, err) } } // Let's not slam the etcd server in the event that we know // an unexpected error occurred. time.Sleep(time.Second) } return }
// stateByMUSKey returns a single UnitState stored in the registry indexed by MUSKey // that matches with the given unit name func (r *EtcdRegistry) stateByMUSKey(uName string) (*unit.UnitState, error) { key := r.prefixed(statesPrefix) opts := &etcd.GetOptions{ Recursive: true, } res, err := r.kAPI.Get(context.Background(), key, opts) if err != nil && !isEtcdError(err, etcd.ErrorCodeKeyNotFound) { return nil, err } if res == nil { return nil, nil } for _, dir := range res.Node.Nodes { _, name := path.Split(dir.Key) if name != uName { continue } for _, node := range dir.Nodes { _, machID := path.Split(node.Key) var usm unitStateModel if err := unmarshal(node.Value, &usm); err != nil { log.Errorf("Error unmarshalling UnitState(%s) from Machine(%s): %v", name, machID, err) continue } us := modelToUnitState(&usm, name) if us != nil { return us, nil } } } return nil, nil }
// sendResponse attempts to marshal an arbitrary thing to JSON then write // it to the http.ResponseWriter func sendResponse(rw http.ResponseWriter, code int, resp interface{}) { enc, err := json.Marshal(resp) if err != nil { log.Errorf("Failed JSON-encoding HTTP response: %v", err) rw.WriteHeader(http.StatusInternalServerError) return } rw.Header().Set("Content-Type", "application/json") rw.WriteHeader(code) _, err = rw.Write(enc) if err != nil { log.Errorf("Failed sending HTTP response body: %v", err) } }
// Purge attempts to unload all Units that have been loaded locally func (ar *AgentReconciler) Purge(a *Agent) { for { cAgentState, err := a.units() if err != nil { log.Errorf("Unable to determine agent's current state: %v", err) return } if len(cAgentState) == 0 { return } for name, _ := range cAgentState { t := task{ typ: taskTypeUnloadUnit, reason: taskReasonPurgingAgent, } u := &job.Unit{ Name: name, } tc := newTaskChain(u, t) ar.launchTaskChain(tc, a) } time.Sleep(time.Second) } }
func (e *Engine) rpcLeadership(leaseTTL time.Duration, machID string) lease.Lease { var previousEngine string if e.lease != nil { previousEngine = e.lease.MachineID() } var l lease.Lease if isLeader(e.lease, machID) { l = rpcRenewLeadership(e.lManager, e.lease, engineVersion, leaseTTL) } else { l = rpcAcquireLeadership(e.registry, e.lManager, machID, engineVersion, leaseTTL) } // log all leadership changes if l != nil && e.lease == nil && l.MachineID() != machID { log.Infof("Engine leader is %s", l.MachineID()) } else if l != nil && e.lease != nil && l.MachineID() != e.lease.MachineID() { log.Infof("Engine leadership changed from %s to %s", e.lease.MachineID(), l.MachineID()) } e.lease = l if e.lease != nil && previousEngine != e.lease.MachineID() { engineState, err := e.getMachineState(e.lease.MachineID()) if err != nil { log.Errorf("Failed to get machine state for machine %s %v", e.lease.MachineID(), err) } if engineState != nil { log.Infof("Updating engine state... engineState: %v previous: %s lease: %v", engineState, previousEngine, e.lease) go e.updateEngineState(*engineState) } } return e.lease }
// Purge attempts to unload all Jobs that have been loaded locally func (ar *AgentReconciler) Purge(a *Agent) { for { cAgentState, err := currentAgentState(a) if err != nil { log.Errorf("Unable to determine agent's current state: %v", err) return } if len(cAgentState.Jobs) == 0 { return } for _, cJob := range cAgentState.Jobs { cJob := cJob t := task{ typ: taskTypeUnloadJob, reason: taskReasonPurgingAgent, } tc := newTaskChain(cJob, t) ar.launchTaskChain(tc, a) } time.Sleep(time.Second) } }
func (m *systemdUnitManager) stopUnit(name string) { if stat, err := m.systemd.StopUnit(name, "replace"); err != nil { log.Errorf("Failed to stop systemd unit %s: %v", name, err) } else { log.Infof("Stopped systemd unit %s(%s)", name, stat) } }
func (r *Reconciler) Reconcile(e *Engine, stop chan struct{}) { log.V(1).Infof("Polling Registry for actionable work") clust, err := e.clusterState() if err != nil { log.Errorf("Failed getting current cluster state: %v", err) return } for t := range r.calculateClusterTasks(clust, stop) { err = doTask(t, e) if err != nil { log.Errorf("Failed resolving task: task=%s err=%v", t, err) } } }
// Purge attempts to unload all Units that have been loaded locally func (ar *AgentReconciler) Purge(a *Agent) { for { cAgentState, err := a.units() if err != nil { log.Errorf("Unable to determine agent's current state: %v", err) return } if len(cAgentState) == 0 { return } var tasks []task for name, _ := range cAgentState { tasks = append(tasks, task{ typ: taskTypeUnloadUnit, reason: taskReasonPurgingAgent, unit: &job.Unit{ Name: name, }, }) } ar.launchTasks(tasks, a) time.Sleep(time.Second) } }
// Reconcile drives the local Agent's state towards the desired state // stored in the Registry. func (ar *AgentReconciler) Reconcile(a *Agent) { dAgentState, err := desiredAgentState(a, ar.reg) if err != nil { log.Errorf("Unable to determine agent's desired state: %v", err) return } cAgentState, err := a.units() if err != nil { log.Errorf("Unable to determine agent's current state: %v", err) return } tasks := ar.calculateTasksForUnits(dAgentState, cAgentState) ar.launchTasks(tasks, a) }
func (sr *stateResource) list(rw http.ResponseWriter, req *http.Request) { token, err := findNextPageToken(req.URL, sr.tokenLimit) if err != nil { sendError(rw, http.StatusBadRequest, err) return } if token == nil { def := DefaultPageToken(sr.tokenLimit) token = &def } var machineID, unitName string for _, val := range req.URL.Query()["machineID"] { machineID = val break } for _, val := range req.URL.Query()["unitName"] { unitName = val break } page, err := getUnitStatePage(sr.cAPI, machineID, unitName, *token) if err != nil { log.Errorf("Failed fetching page of UnitStates: %v", err) sendError(rw, http.StatusInternalServerError, nil) return } sendResponse(rw, http.StatusOK, &page) }
// Units lists all Units stored in the Registry, ordered by name. This includes both global and non-global units. func (r *EtcdRegistry) Units() ([]job.Unit, error) { key := r.prefixed(jobPrefix) opts := &etcd.GetOptions{ // We need Job Units to be sorted Sort: true, Recursive: true, } res, err := r.kAPI.Get(context.Background(), key, opts) if err != nil { if isEtcdError(err, etcd.ErrorCodeKeyNotFound) { err = nil } return nil, err } // Fetch all units by hash recursively to avoid sending N requests to Etcd. hashToUnit, err := r.getAllUnitsHashMap() if err != nil { log.Errorf("failed fetching all Units from etcd: %v", err) return nil, err } unitHashLookupFunc := func(hash unit.Hash) *unit.UnitFile { stringHash := hash.String() unit, ok := hashToUnit[stringHash] if !ok { log.Errorf("did not find Unit %v in list of all units", stringHash) return nil } return unit } units := make([]job.Unit, 0) for _, dir := range res.Node.Nodes { u, err := r.dirToUnit(dir, unitHashLookupFunc) if err != nil { log.Errorf("Failed to parse Unit from etcd: %v", err) continue } if u == nil { continue } units = append(units, *u) } return units, nil }
func acquireLeadership(lManager lease.Manager, machID string, ver int, ttl time.Duration) lease.Lease { existing, err := lManager.GetLease(engineLeaseName) if err != nil { log.Errorf("Unable to determine current lease: %v", err) return nil } var l lease.Lease if existing == nil { l, err = lManager.AcquireLease(engineLeaseName, machID, ver, ttl) if err != nil { log.Errorf("Engine leadership acquisition failed: %v", err) return nil } else if l == nil { log.Debugf("Unable to acquire engine leadership") return nil } log.Infof("Engine leadership acquired") metrics.ReportEngineLeader() return l } if existing.Version() >= ver { log.Debugf("Lease already held by Machine(%s) operating at acceptable version %d", existing.MachineID(), existing.Version()) return existing } rem := existing.TimeRemaining() l, err = lManager.StealLease(engineLeaseName, machID, ver, ttl+rem, existing.Index()) if err != nil { log.Errorf("Engine leadership steal failed: %v", err) return nil } else if l == nil { log.Debugf("Unable to steal engine leadership") return nil } log.Infof("Stole engine leadership from Machine(%s)", existing.MachineID()) metrics.ReportEngineLeader() if rem > 0 { log.Infof("Waiting %v for previous lease to expire before continuing reconciliation", rem) <-time.After(rem) } return l }