Example #1
0
// Updates the channel blocking for recovery
func (e *EtcdBackend) handleRecovery(node *client.Node) {
	m := cluster.DeserializeMachine(node.Value)
	chn, ok := e.nodeListeners[m.Id]
	if ok {
		chn <- m
	}
}
Example #2
0
func (e *EtcdBackend) loadMachines() {
	result, err := e.kapi.Get(context.Background(), e.backendConfig.MachinePrefix+"/status/", nil)
	if err != nil {
		// TODO we probably need to try again, what happens if this is never successful
		log.Warning.Printf("Could not load cluster machine list")
		return
	}
	nodes := result.Node.Nodes
	for _, node := range nodes {
		//log.Info.Printf("Node: %s = %s ", node.Key, node.Value)
		m := cluster.DeserializeMachine(node.Value)
		if m.Id != e.machine.Id {
			e.cluster.AddMachine(m)
		}
	}
}
Example #3
0
func (e *EtcdBackend) handleFailure(node *client.Node) {
	e.Status <- "Waiting to recover"
	m := cluster.DeserializeMachine(node.Value)
	if m.Id == e.machine.Id {
		log.Error.Printf("Our key expired but we are still alive! %s", m.Id)
		return
	}
	check := e.cluster.GetMachine(m.Id)
	if check == nil {
		log.Error.Printf("Received expiration of a machine we weren't aware of %s", m.Id)
		return
	}
	// Create a listener for this machine to be notified
	// if the machine returns
	// Do we need a buffered channel for this use case?
	log.Info.Printf("Handling machine failure of %v", m)
	listener := make(chan *cluster.Machine, 8)
	e.nodeListeners[m.Id] = listener
	select {
	case <-listener:
		// The machine appears to have recovered, which is great news
		// for us because we don't have to do any work
		e.Status <- "Not Recovering"
		delete(e.nodeListeners, m.Id)
		return
	case sig := <-e.Signal:
		e.Signal <- sig
		log.Trace.Printf("Recieved shutdown: aborting recovery")
		e.Status <- "Not Recovering"
		return
	case <-time.After(e.backendConfig.FailoverUnit * time.Duration(e.backendConfig.FailoverTimeout)):
		// The machine has expired, so we will start the recovery
		// process.
		// We'll start by fighting against every machine
		// in the cluster to obtain a lock, once we do
		// we'll grab all the machines and identify the one
		// with the least deployments.
		// Then we'll go through all of the downed machine's
		// Deployments, and start forwarding them to the
		// machine with the least, failing over to the next
		// least if the previouys machine already has said
		// deployment until all the deployments have been
		// reassigned.
		e.AttemptRecovery(m)
		return
	}
}
Example #4
0
func (e *EtcdBackend) handleNewNode(node *client.Node) {
	m := cluster.DeserializeMachine(node.Value)
	e.cluster.AddMachine(m)
}