// Updates the channel blocking for recovery func (e *EtcdBackend) handleRecovery(node *client.Node) { m := cluster.DeserializeMachine(node.Value) chn, ok := e.nodeListeners[m.Id] if ok { chn <- m } }
func (e *EtcdBackend) loadMachines() { result, err := e.kapi.Get(context.Background(), e.backendConfig.MachinePrefix+"/status/", nil) if err != nil { // TODO we probably need to try again, what happens if this is never successful log.Warning.Printf("Could not load cluster machine list") return } nodes := result.Node.Nodes for _, node := range nodes { //log.Info.Printf("Node: %s = %s ", node.Key, node.Value) m := cluster.DeserializeMachine(node.Value) if m.Id != e.machine.Id { e.cluster.AddMachine(m) } } }
func (e *EtcdBackend) handleFailure(node *client.Node) { e.Status <- "Waiting to recover" m := cluster.DeserializeMachine(node.Value) if m.Id == e.machine.Id { log.Error.Printf("Our key expired but we are still alive! %s", m.Id) return } check := e.cluster.GetMachine(m.Id) if check == nil { log.Error.Printf("Received expiration of a machine we weren't aware of %s", m.Id) return } // Create a listener for this machine to be notified // if the machine returns // Do we need a buffered channel for this use case? log.Info.Printf("Handling machine failure of %v", m) listener := make(chan *cluster.Machine, 8) e.nodeListeners[m.Id] = listener select { case <-listener: // The machine appears to have recovered, which is great news // for us because we don't have to do any work e.Status <- "Not Recovering" delete(e.nodeListeners, m.Id) return case sig := <-e.Signal: e.Signal <- sig log.Trace.Printf("Recieved shutdown: aborting recovery") e.Status <- "Not Recovering" return case <-time.After(e.backendConfig.FailoverUnit * time.Duration(e.backendConfig.FailoverTimeout)): // The machine has expired, so we will start the recovery // process. // We'll start by fighting against every machine // in the cluster to obtain a lock, once we do // we'll grab all the machines and identify the one // with the least deployments. // Then we'll go through all of the downed machine's // Deployments, and start forwarding them to the // machine with the least, failing over to the next // least if the previouys machine already has said // deployment until all the deployments have been // reassigned. e.AttemptRecovery(m) return } }
func (e *EtcdBackend) handleNewNode(node *client.Node) { m := cluster.DeserializeMachine(node.Value) e.cluster.AddMachine(m) }