func (c *cmdrListener) sendMsg(resp *etcd.Response) (index uint64, ok bool) { // Delete/Expire events shouldn't be processed if releaseActions[resp.Action] { return resp.Node.ModifiedIndex + 1, true } // Remove command so it's not processed twice cadresp, err := c.cli.CompareAndDelete(resp.Node.Key, resp.Node.Value, 0) if err != nil { if ee, ok := err.(*etcd.EtcdError); ok && ee.ErrorCode == EcodeCompareFailed { metafora.Infof("Received successive commands; attempting to retrieve the latest: %v", err) return resp.Node.ModifiedIndex + 1, true } metafora.Errorf("Error deleting command %s: %s - sending error to stateful handler: %v", c.path, resp.Node.Value, err) c.sendErr(err) return 0, false } msg := &statemachine.Message{} if err := json.Unmarshal([]byte(resp.Node.Value), msg); err != nil { metafora.Errorf("Error unmarshalling command from %s - sending error to stateful handler: %v", c.path, err) c.sendErr(err) return 0, false } select { case c.commands <- msg: return cadresp.Node.ModifiedIndex + 1, true case <-c.stop: return 0, false } }
func (ec *EtcdCoordinator) parseTask(resp *etcd.Response) metafora.Task { // Sanity check / test path invariant if !strings.HasPrefix(resp.Node.Key, ec.taskPath) { metafora.Errorf("%s received task from outside task path: %s", ec.name, resp.Node.Key) return nil } key := strings.Trim(resp.Node.Key, "/") // strip leading and trailing /s parts := strings.Split(key, "/") // Pickup new tasks if newActions[resp.Action] && len(parts) == 3 && resp.Node.Dir { // Make sure it's not already claimed before returning it for _, n := range resp.Node.Nodes { if strings.HasSuffix(n.Key, OwnerMarker) { metafora.Debugf("%s ignoring task as it's already claimed: %s", ec.name, parts[2]) return nil } } metafora.Debugf("%s received new task: %s", ec.name, parts[2]) props := "" for _, n := range resp.Node.Nodes { if strings.HasSuffix(n.Key, "/"+PropsKey) { props = n.Value break } } return ec.newTask(parts[2], props) } if newActions[resp.Action] && len(parts) == 4 && parts[3] == PropsKey { metafora.Debugf("%s received task with properties: %s", ec.name, parts[2]) return ec.newTask(parts[2], resp.Node.Value) } // If a claim key is removed, try to claim the task if releaseActions[resp.Action] && len(parts) == 4 && parts[3] == OwnerMarker { metafora.Debugf("%s received released task: %s", ec.name, parts[2]) // Sadly we need to fail parsing this task if there's an error getting the // props file as trying to claim a task without properly knowing its // properties could cause major issues. parts[3] = PropsKey propsnode, err := ec.client.Get(path.Join(parts...), unsorted, notrecursive) if err != nil { if ee, ok := err.(*etcd.EtcdError); ok && ee.ErrorCode == EcodeKeyNotFound { // No props file return ec.newTask(parts[2], "") } metafora.Errorf("%s error getting properties while handling %s", ec.name, parts[2]) return nil } return ec.newTask(parts[2], propsnode.Node.Value) } // Ignore any other key events (_metafora keys, task deletion, etc.) return nil }
// watch will return either an etcd Response or an error. Two errors returned // by this method should be treated specially: // // 1. etcd.ErrWatchStoppedByUser - the coordinator has closed, exit // accordingly // // 2. restartWatchError - the specified index is too old, try again with a // newer index func (ec *EtcdCoordinator) watch(c *etcd.Client, path string, index uint64, stop chan bool) (*etcd.Response, error) { const recursive = true for { // Start the blocking watch after the last response's index. rawResp, err := protectedRawWatch(c, path, index+1, recursive, nil, stop) if err != nil { if err == etcd.ErrWatchStoppedByUser { // This isn't actually an error, the stop chan was closed. Time to stop! return nil, err } // This is probably a canceled request panic // Wait a little bit, then continue as normal // Can be removed after Go 1.5 is released if ispanic(err) { time.Sleep(250 * time.Millisecond) continue } // Other RawWatch errors should be retried forever. If the node refresher // also fails to communicate with etcd it will close the coordinator, // closing ec.stop in the process which will cause this function to with // ErrWatchStoppedByUser. metafora.Errorf("%s Retrying after unexpected watch error: %v", path, err) transport.CloseIdleConnections() // paranoia; let's get fresh connections on errors. continue } if len(rawResp.Body) == 0 { // This is a bug in Go's HTTP + go-etcd + etcd which causes the // connection to timeout perdiocally and need to be restarted *after* // closing idle connections. transport.CloseIdleConnections() continue } resp, err := rawResp.Unmarshal() if err != nil { if ee, ok := err.(*etcd.EtcdError); ok { if ee.ErrorCode == EcodeExpiredIndex { metafora.Debugf("%s Too many events have happened since index was updated. Restarting watch.", ec.taskPath) // We need to retrieve all existing tasks to update our index // without potentially missing some events. return nil, restartWatchError } } metafora.Errorf("%s Unexpected error unmarshalling etcd response: %+v", ec.taskPath, err) return nil, err } return resp, nil } }
func run(f StatefulHandler, task metafora.Task, cmd <-chan *Message) (m *Message) { defer func() { if r := recover(); r != nil { stackBuf := make([]byte, 6000) stackBufLen := runtime.Stack(stackBuf, false) stackTraceStr := string(stackBuf[0:stackBufLen]) metafora.Errorf("task=%q Run method panic()d! Applying Error message. Panic: %v\nStack: %s", task.ID(), r, stackTraceStr) m = &Message{Code: Error, Err: fmt.Errorf("panic: %v\nstack: %s", r, stackTraceStr)} } }() // Defensive code to give handlers a *copy* of the command chan. That way if // a handler keeps receiving on the command chan in a goroutine past the // handler's lifetime it doesn't intercept commands intended for the // statemachine. internalcmd := make(chan *Message) stopped := make(chan struct{}) go func() { for { select { case c := <-cmd: internalcmd <- c case <-stopped: return } } }() defer close(stopped) return f(task, internalcmd) }
// Close stops the coordinator and causes blocking Watch and Command methods to // return zero values. It does not release tasks. func (ec *EtcdCoordinator) Close() { // Gracefully handle multiple close calls mostly to ease testing. This block // isn't threadsafe, so you shouldn't try to call Close() concurrently. select { case <-ec.stop: return default: } close(ec.stop) // Finally remove the node entry const recursive = true _, err := ec.client.Delete(ec.nodePath, recursive) if err != nil { if eerr, ok := err.(*etcd.EtcdError); ok { if eerr.ErrorCode == EcodeKeyNotFound { // The node's TTL was up before we were able to delete it or there was // another problem that's already being handled. // The first is unlikely, the latter is already being handled, so // there's nothing to do here. return } } // All other errors are unexpected metafora.Errorf("Error deleting node path %s: %v", ec.nodePath, err) } }
// nodeRefresher is in charge of keeping the node entry in etcd alive. If it's // unable to communicate with etcd it must shutdown the coordinator. // // watch retries on errors and taskmgr calls Lost(task) on tasks it can't // refresh, so it's up to nodeRefresher to cause the coordinator to close if // it's unable to communicate with etcd. func (ec *EtcdCoordinator) nodeRefresher() { ttl := ec.conf.NodeTTL >> 1 // have some leeway before ttl expires if ttl < 1 { metafora.Warnf("%s Dangerously low NodeTTL: %d", ec.name, ec.conf.NodeTTL) ttl = 1 } // Create a local etcd client since it's not threadsafe, but don't bother // checking for errors at this point. client, _ := newEtcdClient(ec.conf.Hosts) for { // Deadline for refreshes to finish by or the coordinator closes. deadline := time.Now().Add(time.Duration(ec.conf.NodeTTL) * time.Second) select { case <-ec.stop: return case <-time.After(time.Duration(ttl) * time.Second): if err := ec.refreshBy(client, deadline); err != nil { // We're in a bad state; shut everything down metafora.Errorf("Unable to refresh node key before deadline %s. Last error: %v", deadline, err) ec.Close() } } } }
// NewEtcdCoordinator creates a new Metafora Coordinator implementation using // etcd as the broker. If no node ID is specified, a unique one will be // generated. // // Coordinator methods will be called by the core Metafora Consumer. Calling // Init, Close, etc. from your own code will lead to undefined behavior. func NewEtcdCoordinator(conf *Config) (*EtcdCoordinator, error) { client, err := newEtcdClient(conf.Hosts) if err != nil { return nil, err } ec := &EtcdCoordinator{ client: client, conf: conf, name: conf.String(), commandPath: path.Join(conf.Namespace, NodesPath, conf.Name, CommandsPath), nodePath: path.Join(conf.Namespace, NodesPath, conf.Name), taskPath: path.Join(conf.Namespace, TasksPath), stop: make(chan bool), } // Protect callers of task functions from panics. ec.newTask = func(id, value string) metafora.Task { defer func() { if p := recover(); p != nil { metafora.Errorf("%s panic when creating task: %v", ec.name, p) } }() return conf.NewTaskFunc(id, value) } return ec, nil }
func (ec *EtcdCoordinator) parseCommand(c *etcd.Client, resp *etcd.Response) metafora.Command { if strings.HasSuffix(resp.Node.Key, MetadataKey) { // Skip metadata marker return nil } const recurse = false if _, err := c.Delete(resp.Node.Key, recurse); err != nil { metafora.Errorf("Error deleting handled command %s: %v", resp.Node.Key, err) } cmd, err := metafora.UnmarshalCommand([]byte(resp.Node.Value)) if err != nil { metafora.Errorf("Invalid command %s: %v", resp.Node.Key, err) return nil } return cmd }
// Command blocks until a command for this node is received from the broker // by the coordinator. func (ec *EtcdCoordinator) Command() (metafora.Command, error) { if ec.closed() { // already closed, don't restart watch return nil, nil } client, err := newEtcdClient(ec.conf.Hosts) if err != nil { return nil, err } startWatch: for { // Get existing commands resp, err := client.Get(ec.commandPath, sorted, recursive) if err != nil { metafora.Errorf("%s Error getting the existing commands: %v", ec.commandPath, err) return nil, err } // Start watching at the index the Get retrieved since we've retrieved all // tasks up to that point. index := resp.EtcdIndex // Act like existing keys are newly created for _, node := range resp.Node.Nodes { if cmd := ec.parseCommand(client, &etcd.Response{Action: "create", Node: node}); cmd != nil { return cmd, nil } } for { resp, err := ec.watch(client, ec.commandPath, index, ec.stop) if err != nil { if err == restartWatchError { continue startWatch } if err == etcd.ErrWatchStoppedByUser { return nil, nil } } if cmd := ec.parseCommand(client, resp); cmd != nil { return cmd, nil } index = resp.Node.ModifiedIndex } } }
// SubmitCommand creates a new command for a particular nodeId, the // command has a random name and is added to the particular nodeId // directory in etcd. func (mc *mclient) SubmitCommand(node string, command metafora.Command) error { cmdPath := mc.cmdPath(node) body, err := command.Marshal() if err != nil { // This is either a bug in metafora or someone implemented their own // command incorrectly. return err } if _, err := mc.etcd.AddChild(cmdPath, string(body), foreverTTL); err != nil { metafora.Errorf("Error submitting command: %s to node: %s", command, node) return err } metafora.Debugf("Submitted command: %s to node: %s", string(body), node) return nil }
// Run the state machine enabled handler. Loads the initial state and passes // control to the internal stateful handler passing commands from the command // listener into the handler's commands chan. func (s *stateMachine) Run() (done bool) { // Multiplex external (Stop) messages and internal ones s.cmds = make(chan *Message) go func() { for { select { case m := <-s.cl.Receive(): if !m.Valid() { metafora.Warnf("Ignoring invalid command: %q", m) continue } select { case s.cmds <- m: case <-s.stopped: return } case <-s.stopped: return } } }() // Stop the command listener and internal message multiplexer when Run exits defer func() { s.cl.Stop() s.stop() }() tid := s.task.ID() // Load the initial state state, err := s.ss.Load(s.task) if err != nil { // A failure to load the state for a task is *fatal* - the task will be // unscheduled and requires operator intervention to reschedule. metafora.Errorf("task=%q could not load initial state. Marking done! Error: %v", tid, err) return true } if state == nil { // Note to StateStore implementors: This should not happen! Either state or // err must be non-nil. This code is simply to prevent a nil pointer panic. metafora.Errorf("statestore %T returned nil state and err for task=%q - unscheduling") return true } if state.Code.Terminal() { metafora.Warnf("task=%q in terminal state %s - exiting.", tid, state.Code) return true } s.setState(state) // for introspection/debugging // Main Statemachine Loop done = false for { // Enter State metafora.Debugf("task=%q in state %s", tid, state.Code) msg := s.exec(state) // Apply Message newstate, ok := apply(state, msg) if !ok { metafora.Warnf("task=%q Invalid state transition=%q returned by task. Old state=%q", tid, msg.Code, state.Code) msg = ErrorMessage(err) if newstate, ok = apply(state, msg); !ok { metafora.Errorf("task=%q Unable to transition to error state! Exiting with state=%q", tid, state.Code) return state.Code.Terminal() } } metafora.Infof("task=%q transitioning %s --> %s --> %s", tid, state, msg, newstate) // Save state if err := s.ss.Store(s.task, newstate); err != nil { metafora.Errorf("task=%q Unable to persist state=%q. Unscheduling.", tid, newstate.Code) return true } // Set next state and loop if non-terminal state = newstate // Expose the state for introspection s.setState(state) // Exit and unschedule task on terminal state. if state.Code.Terminal() { return true } // Release messages indicate the task should exit but not unschedule. if msg.Code == Release { return false } // Alternatively Stop() may have been called but the handler may not have // returned the Release message. Always exit if we've been told to Stop() // even if the handler has returned a different Message. select { case <-s.stopped: return false default: } } }
// add starts refreshing a given key+value pair for a task asynchronously. func (m *taskManager) add(task metafora.Task) bool { tid := task.ID() // Attempt to claim the node key, value := m.ownerNode(tid) resp, err := m.client.Create(key, value, m.ttl) if err != nil { etcdErr, ok := err.(*etcd.EtcdError) if !ok || etcdErr.ErrorCode != EcodeNodeExist { metafora.Errorf("Claim of %s failed with an unexpected error: %v", key, err) } else { metafora.Debugf("Claim of %s failed, already claimed", key) } return false } index := resp.Node.CreatedIndex // lytics/metafora#124 - the successful create above may have resurrected a // deleted (done) task. Compare the CreatedIndex of the directory with the // CreatedIndex of the claim key, if they're equal this claim ressurected a // done task and should cleanup. resp, err = m.client.Get(m.taskPath(tid), unsorted, notrecursive) if err != nil { // Erroring here is BAD as we may have resurrected a done task, and because // of this failure there's no way to tell. The claim will eventually // timeout and the task will get reclaimed. metafora.Errorf("Error retrieving task path %q after claiming %q: %v", m.taskPath(tid), tid, err) return false } if resp.Node.CreatedIndex == index { metafora.Debugf("Task %s resurrected due to claim/done race. Re-deleting.", tid) if _, err = m.client.Delete(m.taskPath(tid), recursive); err != nil { // This is as bad as it gets. We *know* we resurrected a task, but we // failed to re-delete it. metafora.Errorf("Task %s was resurrected and could not be removed! %s should be manually removed. Error: %v", tid, m.taskPath(tid), err) } // Regardless of whether or not the delete succeeded, never treat // resurrected tasks as claimed. return false } // Claim successful, start the refresher metafora.Debugf("Claim successful: %s", key) done := make(chan struct{}) release := make(chan struct{}) finished := make(chan struct{}) m.taskL.Lock() m.tasks[tid] = taskStates{done: done, release: release, finished: finished} m.taskL.Unlock() metafora.Debugf("Starting claim refresher for task %s", tid) go func() { defer func() { m.taskL.Lock() delete(m.tasks, tid) m.taskL.Unlock() close(finished) }() for { select { case <-time.After(m.interval): // Try to refresh the claim node (0 index means compare by value) if _, err := m.client.CompareAndSwap(key, value, m.ttl, value, 0); err != nil { metafora.Errorf("Error trying to update task %s ttl: %v", tid, err) m.ctx.Lost(task) // On errors, don't even try to Delete as we're in a bad state return } case <-done: metafora.Debugf("Deleting directory for task %s as it's done.", tid) const recursive = true if _, err := m.client.Delete(m.taskPath(tid), recursive); err != nil { metafora.Errorf("Error deleting task %s while stopping: %v", tid, err) } return case <-release: metafora.Debugf("Deleting claim for task %s as it's released.", tid) // Not done, releasing; just delete the claim node if _, err := m.client.CompareAndDelete(key, value, 0); err != nil { metafora.Warnf("Error releasing task %s while stopping: %v", tid, err) } return } } }() return true }
func main() { mlvl := metafora.LogLevelInfo hostname, _ := os.Hostname() peers := flag.String("etcd", "http://127.0.0.1:2379", "comma delimited etcd peer list") namespace := flag.String("namespace", "koalemos", "metafora namespace") name := flag.String("name", hostname, "node name or empty for automatic") loglvl := flag.String("log", mlvl.String(), "set log level: [debug], info, warn, error") flag.Parse() hosts := strings.Split(*peers, ",") etcdc := etcd.NewClient(hosts) switch strings.ToLower(*loglvl) { case "debug": mlvl = metafora.LogLevelDebug case "info": mlvl = metafora.LogLevelInfo case "warn": mlvl = metafora.LogLevelWarn case "error": mlvl = metafora.LogLevelError default: metafora.Warnf("Invalid log level %q - using %s", *loglvl, mlvl) } metafora.SetLogLevel(mlvl) conf := m_etcd.NewConfig(*name, *namespace, hosts) // Replace NewTask func with one that returns a *koalemos.Task conf.NewTaskFunc = func(id, value string) metafora.Task { t := koalemos.NewTask(id) if value == "" { return t } if err := json.Unmarshal([]byte(value), t); err != nil { metafora.Errorf("Unable to unmarshal task %s: %v", t.ID(), err) return nil } return t } hfunc := makeHandlerFunc(etcdc) ec, err := m_etcd.NewEtcdCoordinator(conf) if err != nil { metafora.Errorf("Error creating etcd coordinator: %v", err) } bal := m_etcd.NewFairBalancer(conf) c, err := metafora.NewConsumer(ec, hfunc, bal) if err != nil { metafora.Errorf("Error creating consumer: %v", err) os.Exit(2) } metafora.Infof( "Starting koalsmosd with etcd=%s; namespace=%s; name=%s; loglvl=%s", *peers, conf.Namespace, conf.Name, mlvl) consumerRunning := make(chan struct{}) go func() { defer close(consumerRunning) c.Run() }() sigC := make(chan os.Signal, 1) signal.Notify(sigC, os.Interrupt, os.Kill, syscall.SIGTERM) select { case s := <-sigC: metafora.Infof("Received signal %s, shutting down", s) case <-consumerRunning: metafora.Warn("Consumer exited. Shutting down.") } c.Shutdown() metafora.Info("Shutdown") }
func (c *cmdrListener) watcher() { var index uint64 var ok bool startWatch: resp, err := c.cli.Get(c.path, notrecursive, unsorted) if err != nil { if ee, ok := err.(*etcd.EtcdError); ok && ee.ErrorCode == EcodeKeyNotFound { // No command found; this is normal. Grab index and skip to watching index = ee.Index goto watchLoop } metafora.Errorf("Error GETting %s - sending error to stateful handler: %v", c.path, err) c.sendErr(err) return } if index, ok = c.sendMsg(resp); !ok { return } watchLoop: for { rr, err := protectedRawWatch(c.cli, c.path, index, notrecursive, nil, c.stop) if err != nil { if err == etcd.ErrWatchStoppedByUser { return } // This is probably a canceled request panic // Wait a little bit, then continue as normal // Can be removed after Go 1.5 is released if ispanic(err) { continue } metafora.Errorf("Error watching %s - sending error to stateful handler: %v", c.path, err) c.sendErr(err) return } if len(rr.Body) == 0 { // This is a bug in Go's HTTP + go-etcd + etcd which causes the // connection to timeout perdiocally and need to be restarted *after* // closing idle connections. transport.CloseIdleConnections() continue watchLoop } resp, err := rr.Unmarshal() if err != nil { if ee, ok := err.(*etcd.EtcdError); ok { if ee.ErrorCode == EcodeExpiredIndex { goto startWatch } } metafora.Errorf("Error watching %s - sending error to stateful handler: %v", c.path, err) c.sendErr(err) return } metafora.Debugf("Received command via %s -- sending to statemachine", c.path) if index, ok = c.sendMsg(resp); !ok { return } } }
// Watch streams tasks from etcd watches or GETs until Close is called or etcd // is unreachable (in which case an error is returned). func (ec *EtcdCoordinator) Watch(out chan<- metafora.Task) error { var index uint64 client, err := newEtcdClient(ec.conf.Hosts) if err != nil { return err } startWatch: for { // Make sure we haven't been told to exit select { case <-ec.stop: return nil default: } // Get existing tasks resp, err := client.Get(ec.taskPath, unsorted, recursive) if err != nil { metafora.Errorf("%s Error getting the existing tasks: %v", ec.taskPath, err) return err } // Start watching at the index the Get retrieved since we've retrieved all // tasks up to that point. index = resp.EtcdIndex // Act like existing keys are newly created for _, node := range resp.Node.Nodes { if task := ec.parseTask(&etcd.Response{Action: "create", Node: node}); task != nil { select { case out <- task: case <-ec.stop: return nil } } } // Start blocking watch for { resp, err := ec.watch(client, ec.taskPath, index, ec.stop) if err != nil { if err == restartWatchError { continue startWatch } if err == etcd.ErrWatchStoppedByUser { return nil } return err } // Found a claimable task! Return it if it's not Ignored. if task := ec.parseTask(resp); task != nil { select { case out <- task: case <-ec.stop: return nil } } // Start the next watch from the latest index seen index = resp.Node.ModifiedIndex } } }