// nodeRefresher is in charge of keeping the node entry in etcd alive. If it's // unable to communicate with etcd it must shutdown the coordinator. // // watch retries on errors and taskmgr calls Lost(task) on tasks it can't // refresh, so it's up to nodeRefresher to cause the coordinator to close if // it's unable to communicate with etcd. func (ec *EtcdCoordinator) nodeRefresher() { ttl := ec.conf.NodeTTL >> 1 // have some leeway before ttl expires if ttl < 1 { metafora.Warnf("%s Dangerously low NodeTTL: %d", ec.name, ec.conf.NodeTTL) ttl = 1 } // Create a local etcd client since it's not threadsafe, but don't bother // checking for errors at this point. client, _ := newEtcdClient(ec.conf.Hosts) for { // Deadline for refreshes to finish by or the coordinator closes. deadline := time.Now().Add(time.Duration(ec.conf.NodeTTL) * time.Second) select { case <-ec.stop: return case <-time.After(time.Duration(ttl) * time.Second): if err := ec.refreshBy(client, deadline); err != nil { // We're in a bad state; shut everything down metafora.Errorf("Unable to refresh node key before deadline %s. Last error: %v", deadline, err) ec.Close() } } } }
func (e *etcdClusterState) NodeTaskCount() (map[string]int, error) { state := map[string]int{} // First initialize state with nodes as keys resp, err := e.client.Get(e.nodePath, unsorted, recursive) if err != nil { return nil, err } if resp == nil || resp.Node == nil { metafora.Warnf("balancer received empty response from GET %s", e.nodePath) return state, nil } for _, node := range resp.Node.Nodes { state[path.Base(node.Key)] = 0 } // Then count how many tasks each node has resp, err = e.client.Get(e.taskPath, unsorted, recursive) if err != nil { return nil, err } // No current tasks if resp == nil { return state, nil } // Get the list of all claimed work, create a map of the counts and // node values // We ignore tasks which have no claims for _, task := range resp.Node.Nodes { for _, claim := range task.Nodes { if path.Base(claim.Key) == OwnerMarker { val := ownerValue{} if err := json.Unmarshal([]byte(claim.Value), &val); err == nil { // We want to only include those nodes which were initially included, // as some nodes may be shutting down, etc, and should not be counted _, ok := state[val.Node] if ok { state[val.Node]++ } } } } } return state, nil }
func newManager(ctx metafora.CoordinatorContext, client client, path, nodeID string, ttl uint64) *taskManager { if ttl == 0 { panic("refresher: TTL must be > 0") } // refresh more often than strictly necessary to be safe interval := time.Duration((ttl>>1)+(ttl>>2)) * time.Second if ttl == 1 { interval = 750 * time.Millisecond metafora.Warnf("Dangerously low TTL: %d; consider raising.", ttl) } return &taskManager{ ctx: ctx, client: client, tasks: make(map[string]taskStates), path: path, node: nodeID, ttl: ttl, interval: interval, } }
// refreshBy retries refreshing the node key until the deadline is reached. func (ec *EtcdCoordinator) refreshBy(c *etcd.Client, deadline time.Time) (err error) { for time.Now().Before(deadline) { // Make sure we shouldn't exit select { case <-ec.stop: return err default: } _, err = c.UpdateDir(ec.nodePath, ec.conf.NodeTTL) if err == nil { // It worked! return nil } metafora.Warnf("Unexpected error updating node key: %v", err) transport.CloseIdleConnections() // paranoia; let's get fresh connections on errors. time.Sleep(500 * time.Millisecond) // rate limit retries a bit } // Didn't get a successful response before deadline, exit with error return err }
// execute non-terminal states func (s *stateMachine) exec(state *State) *Message { switch state.Code { case Runnable: // Runnable passes control to the stateful handler return run(s.h, s.task, s.cmds) case Paused: // Paused until a message arrives return <-s.cmds case Sleeping: // Sleeping until the specified time (or a message) if state.Until == nil { metafora.Warnf("task=%q told to sleep without a time. Resuming.", s.task.ID()) return RunMessage() } dur := state.Until.Sub(time.Now()) metafora.Infof("task=%q sleeping for %s", s.task.ID(), dur) timer := time.NewTimer(dur) select { case <-timer.C: return RunMessage() case msg := <-s.cmds: timer.Stop() // Checkpoint & Release are special cases that shouldn't affect sleep // time, so maintain it across the state transition if msg.Code == Checkpoint || msg.Code == Release { msg.Until = state.Until } return msg } case Fault: // Special case where we potentially trim the current state to keep // errors from growing without bound. var msg *Message msg, state.Errors = s.errHandler(s.task, state.Errors) return msg default: panic("invalid state: " + state.String()) } }
// Run the state machine enabled handler. Loads the initial state and passes // control to the internal stateful handler passing commands from the command // listener into the handler's commands chan. func (s *stateMachine) Run() (done bool) { // Multiplex external (Stop) messages and internal ones s.cmds = make(chan *Message) go func() { for { select { case m := <-s.cl.Receive(): if !m.Valid() { metafora.Warnf("Ignoring invalid command: %q", m) continue } select { case s.cmds <- m: case <-s.stopped: return } case <-s.stopped: return } } }() // Stop the command listener and internal message multiplexer when Run exits defer func() { s.cl.Stop() s.stop() }() tid := s.task.ID() // Load the initial state state, err := s.ss.Load(s.task) if err != nil { // A failure to load the state for a task is *fatal* - the task will be // unscheduled and requires operator intervention to reschedule. metafora.Errorf("task=%q could not load initial state. Marking done! Error: %v", tid, err) return true } if state == nil { // Note to StateStore implementors: This should not happen! Either state or // err must be non-nil. This code is simply to prevent a nil pointer panic. metafora.Errorf("statestore %T returned nil state and err for task=%q - unscheduling") return true } if state.Code.Terminal() { metafora.Warnf("task=%q in terminal state %s - exiting.", tid, state.Code) return true } s.setState(state) // for introspection/debugging // Main Statemachine Loop done = false for { // Enter State metafora.Debugf("task=%q in state %s", tid, state.Code) msg := s.exec(state) // Apply Message newstate, ok := apply(state, msg) if !ok { metafora.Warnf("task=%q Invalid state transition=%q returned by task. Old state=%q", tid, msg.Code, state.Code) msg = ErrorMessage(err) if newstate, ok = apply(state, msg); !ok { metafora.Errorf("task=%q Unable to transition to error state! Exiting with state=%q", tid, state.Code) return state.Code.Terminal() } } metafora.Infof("task=%q transitioning %s --> %s --> %s", tid, state, msg, newstate) // Save state if err := s.ss.Store(s.task, newstate); err != nil { metafora.Errorf("task=%q Unable to persist state=%q. Unscheduling.", tid, newstate.Code) return true } // Set next state and loop if non-terminal state = newstate // Expose the state for introspection s.setState(state) // Exit and unschedule task on terminal state. if state.Code.Terminal() { return true } // Release messages indicate the task should exit but not unschedule. if msg.Code == Release { return false } // Alternatively Stop() may have been called but the handler may not have // returned the Release message. Always exit if we've been told to Stop() // even if the handler has returned a different Message. select { case <-s.stopped: return false default: } } }
// add starts refreshing a given key+value pair for a task asynchronously. func (m *taskManager) add(task metafora.Task) bool { tid := task.ID() // Attempt to claim the node key, value := m.ownerNode(tid) resp, err := m.client.Create(key, value, m.ttl) if err != nil { etcdErr, ok := err.(*etcd.EtcdError) if !ok || etcdErr.ErrorCode != EcodeNodeExist { metafora.Errorf("Claim of %s failed with an unexpected error: %v", key, err) } else { metafora.Debugf("Claim of %s failed, already claimed", key) } return false } index := resp.Node.CreatedIndex // lytics/metafora#124 - the successful create above may have resurrected a // deleted (done) task. Compare the CreatedIndex of the directory with the // CreatedIndex of the claim key, if they're equal this claim ressurected a // done task and should cleanup. resp, err = m.client.Get(m.taskPath(tid), unsorted, notrecursive) if err != nil { // Erroring here is BAD as we may have resurrected a done task, and because // of this failure there's no way to tell. The claim will eventually // timeout and the task will get reclaimed. metafora.Errorf("Error retrieving task path %q after claiming %q: %v", m.taskPath(tid), tid, err) return false } if resp.Node.CreatedIndex == index { metafora.Debugf("Task %s resurrected due to claim/done race. Re-deleting.", tid) if _, err = m.client.Delete(m.taskPath(tid), recursive); err != nil { // This is as bad as it gets. We *know* we resurrected a task, but we // failed to re-delete it. metafora.Errorf("Task %s was resurrected and could not be removed! %s should be manually removed. Error: %v", tid, m.taskPath(tid), err) } // Regardless of whether or not the delete succeeded, never treat // resurrected tasks as claimed. return false } // Claim successful, start the refresher metafora.Debugf("Claim successful: %s", key) done := make(chan struct{}) release := make(chan struct{}) finished := make(chan struct{}) m.taskL.Lock() m.tasks[tid] = taskStates{done: done, release: release, finished: finished} m.taskL.Unlock() metafora.Debugf("Starting claim refresher for task %s", tid) go func() { defer func() { m.taskL.Lock() delete(m.tasks, tid) m.taskL.Unlock() close(finished) }() for { select { case <-time.After(m.interval): // Try to refresh the claim node (0 index means compare by value) if _, err := m.client.CompareAndSwap(key, value, m.ttl, value, 0); err != nil { metafora.Errorf("Error trying to update task %s ttl: %v", tid, err) m.ctx.Lost(task) // On errors, don't even try to Delete as we're in a bad state return } case <-done: metafora.Debugf("Deleting directory for task %s as it's done.", tid) const recursive = true if _, err := m.client.Delete(m.taskPath(tid), recursive); err != nil { metafora.Errorf("Error deleting task %s while stopping: %v", tid, err) } return case <-release: metafora.Debugf("Deleting claim for task %s as it's released.", tid) // Not done, releasing; just delete the claim node if _, err := m.client.CompareAndDelete(key, value, 0); err != nil { metafora.Warnf("Error releasing task %s while stopping: %v", tid, err) } return } } }() return true }
func main() { mlvl := metafora.LogLevelInfo hostname, _ := os.Hostname() peers := flag.String("etcd", "http://127.0.0.1:2379", "comma delimited etcd peer list") namespace := flag.String("namespace", "koalemos", "metafora namespace") name := flag.String("name", hostname, "node name or empty for automatic") loglvl := flag.String("log", mlvl.String(), "set log level: [debug], info, warn, error") flag.Parse() hosts := strings.Split(*peers, ",") etcdc := etcd.NewClient(hosts) switch strings.ToLower(*loglvl) { case "debug": mlvl = metafora.LogLevelDebug case "info": mlvl = metafora.LogLevelInfo case "warn": mlvl = metafora.LogLevelWarn case "error": mlvl = metafora.LogLevelError default: metafora.Warnf("Invalid log level %q - using %s", *loglvl, mlvl) } metafora.SetLogLevel(mlvl) conf := m_etcd.NewConfig(*name, *namespace, hosts) // Replace NewTask func with one that returns a *koalemos.Task conf.NewTaskFunc = func(id, value string) metafora.Task { t := koalemos.NewTask(id) if value == "" { return t } if err := json.Unmarshal([]byte(value), t); err != nil { metafora.Errorf("Unable to unmarshal task %s: %v", t.ID(), err) return nil } return t } hfunc := makeHandlerFunc(etcdc) ec, err := m_etcd.NewEtcdCoordinator(conf) if err != nil { metafora.Errorf("Error creating etcd coordinator: %v", err) } bal := m_etcd.NewFairBalancer(conf) c, err := metafora.NewConsumer(ec, hfunc, bal) if err != nil { metafora.Errorf("Error creating consumer: %v", err) os.Exit(2) } metafora.Infof( "Starting koalsmosd with etcd=%s; namespace=%s; name=%s; loglvl=%s", *peers, conf.Namespace, conf.Name, mlvl) consumerRunning := make(chan struct{}) go func() { defer close(consumerRunning) c.Run() }() sigC := make(chan os.Signal, 1) signal.Notify(sigC, os.Interrupt, os.Kill, syscall.SIGTERM) select { case s := <-sigC: metafora.Infof("Received signal %s, shutting down", s) case <-consumerRunning: metafora.Warn("Consumer exited. Shutting down.") } c.Shutdown() metafora.Info("Shutdown") }