func (ec *EtcdCoordinator) parseTask(resp *etcd.Response) metafora.Task { // Sanity check / test path invariant if !strings.HasPrefix(resp.Node.Key, ec.taskPath) { metafora.Errorf("%s received task from outside task path: %s", ec.name, resp.Node.Key) return nil } key := strings.Trim(resp.Node.Key, "/") // strip leading and trailing /s parts := strings.Split(key, "/") // Pickup new tasks if newActions[resp.Action] && len(parts) == 3 && resp.Node.Dir { // Make sure it's not already claimed before returning it for _, n := range resp.Node.Nodes { if strings.HasSuffix(n.Key, OwnerMarker) { metafora.Debugf("%s ignoring task as it's already claimed: %s", ec.name, parts[2]) return nil } } metafora.Debugf("%s received new task: %s", ec.name, parts[2]) props := "" for _, n := range resp.Node.Nodes { if strings.HasSuffix(n.Key, "/"+PropsKey) { props = n.Value break } } return ec.newTask(parts[2], props) } if newActions[resp.Action] && len(parts) == 4 && parts[3] == PropsKey { metafora.Debugf("%s received task with properties: %s", ec.name, parts[2]) return ec.newTask(parts[2], resp.Node.Value) } // If a claim key is removed, try to claim the task if releaseActions[resp.Action] && len(parts) == 4 && parts[3] == OwnerMarker { metafora.Debugf("%s received released task: %s", ec.name, parts[2]) // Sadly we need to fail parsing this task if there's an error getting the // props file as trying to claim a task without properly knowing its // properties could cause major issues. parts[3] = PropsKey propsnode, err := ec.client.Get(path.Join(parts...), unsorted, notrecursive) if err != nil { if ee, ok := err.(*etcd.EtcdError); ok && ee.ErrorCode == EcodeKeyNotFound { // No props file return ec.newTask(parts[2], "") } metafora.Errorf("%s error getting properties while handling %s", ec.name, parts[2]) return nil } return ec.newTask(parts[2], propsnode.Node.Value) } // Ignore any other key events (_metafora keys, task deletion, etc.) return nil }
// remove tells a single task's refresher to stop and blocks until the task is // handled. func (m *taskManager) remove(taskID string, done bool) { m.taskL.Lock() states, ok := m.tasks[taskID] if !ok { m.taskL.Unlock() metafora.Debugf("Cannot remove task %s from refresher: not present.", taskID) return } select { case <-states.release: // already stopping case <-states.done: // already stopping default: if done { close(states.done) } else { close(states.release) } } m.taskL.Unlock() // Block until task is released/deleted to prevent races on shutdown where // the process could exit before all tasks are released. <-states.finished }
func (ec *EtcdCoordinator) upsertDir(path string, ttl uint64) { //hidden etcd key that isn't visible to ls commands on the directory, // you have to know about it to find it :). I'm using it to add some // info about when the cluster's schema was setup. pathMarker := path + "/" + MetadataKey _, err := ec.client.Get(path, unsorted, notrecursive) if err == nil { return } etcdErr, ok := err.(*etcd.EtcdError) if ok && etcdErr.ErrorCode == EcodeKeyNotFound { _, err := ec.client.CreateDir(path, ttl) if err != nil { metafora.Debugf("Error trying to create directory. path:[%s] error:[ %v ]", path, err) } host, _ := os.Hostname() metadata := struct { Host string `json:"host"` CreatedTime string `json:"created"` ownerValue }{ Host: host, CreatedTime: time.Now().String(), ownerValue: ownerValue{Node: ec.conf.Name}, } metadataB, _ := json.Marshal(metadata) metadataStr := string(metadataB) ec.client.Create(pathMarker, metadataStr, ttl) } }
// Init is called once by the consumer to provide a Logger to Coordinator // implementations. func (ec *EtcdCoordinator) Init(cordCtx metafora.CoordinatorContext) error { metafora.Debugf("Initializing coordinator with namespace: %s and etcd cluster: %s", ec.conf.Namespace, strings.Join(ec.client.GetCluster(), ", ")) ec.cordCtx = cordCtx ec.upsertDir(ec.conf.Namespace, foreverTTL) ec.upsertDir(ec.taskPath, foreverTTL) if _, err := ec.client.CreateDir(ec.nodePath, ec.conf.NodeTTL); err != nil { return err } // Create etcd client for task manager tmc, err := newEtcdClient(ec.conf.Hosts) if err != nil { return err } // Start goroutine to heartbeat node key in etcd go ec.nodeRefresher() ec.upsertDir(ec.commandPath, foreverTTL) ec.taskManager = newManager(cordCtx, tmc, ec.taskPath, ec.conf.Name, ec.conf.ClaimTTL) return nil }
// SubmitTask creates a new task in etcd func (mc *mclient) SubmitTask(task metafora.Task) error { fullpath := path.Join(mc.tskPath(task.ID()), PropsKey) buf, err := json.Marshal(task) if err != nil { return err } if _, err := mc.etcd.Create(fullpath, string(buf), foreverTTL); err != nil { return err } metafora.Debugf("task %s submitted: %s", task.ID(), fullpath) return nil }
// watch will return either an etcd Response or an error. Two errors returned // by this method should be treated specially: // // 1. etcd.ErrWatchStoppedByUser - the coordinator has closed, exit // accordingly // // 2. restartWatchError - the specified index is too old, try again with a // newer index func (ec *EtcdCoordinator) watch(c *etcd.Client, path string, index uint64, stop chan bool) (*etcd.Response, error) { const recursive = true for { // Start the blocking watch after the last response's index. rawResp, err := protectedRawWatch(c, path, index+1, recursive, nil, stop) if err != nil { if err == etcd.ErrWatchStoppedByUser { // This isn't actually an error, the stop chan was closed. Time to stop! return nil, err } // This is probably a canceled request panic // Wait a little bit, then continue as normal // Can be removed after Go 1.5 is released if ispanic(err) { time.Sleep(250 * time.Millisecond) continue } // Other RawWatch errors should be retried forever. If the node refresher // also fails to communicate with etcd it will close the coordinator, // closing ec.stop in the process which will cause this function to with // ErrWatchStoppedByUser. metafora.Errorf("%s Retrying after unexpected watch error: %v", path, err) transport.CloseIdleConnections() // paranoia; let's get fresh connections on errors. continue } if len(rawResp.Body) == 0 { // This is a bug in Go's HTTP + go-etcd + etcd which causes the // connection to timeout perdiocally and need to be restarted *after* // closing idle connections. transport.CloseIdleConnections() continue } resp, err := rawResp.Unmarshal() if err != nil { if ee, ok := err.(*etcd.EtcdError); ok { if ee.ErrorCode == EcodeExpiredIndex { metafora.Debugf("%s Too many events have happened since index was updated. Restarting watch.", ec.taskPath) // We need to retrieve all existing tasks to update our index // without potentially missing some events. return nil, restartWatchError } } metafora.Errorf("%s Unexpected error unmarshalling etcd response: %+v", ec.taskPath, err) return nil, err } return resp, nil } }
// SubmitCommand creates a new command for a particular nodeId, the // command has a random name and is added to the particular nodeId // directory in etcd. func (mc *mclient) SubmitCommand(node string, command metafora.Command) error { cmdPath := mc.cmdPath(node) body, err := command.Marshal() if err != nil { // This is either a bug in metafora or someone implemented their own // command incorrectly. return err } if _, err := mc.etcd.AddChild(cmdPath, string(body), foreverTTL); err != nil { metafora.Errorf("Error submitting command: %s to node: %s", command, node) return err } metafora.Debugf("Submitted command: %s to node: %s", string(body), node) return nil }
// apply a message to cause a state transition. Returns false if the state // transition is invalid. func apply(cur *State, m *Message) (*State, bool) { //XXX Is a linear scan of all rules really the best option here? for _, trans := range Rules { if trans.Event == m.Code && trans.From == cur.Code { metafora.Debugf("Transitioned %s", trans) if m.Err != nil { // Append errors from message cur.Errors = append(cur.Errors, Err{Time: time.Now(), Err: m.Err.Error()}) } // New State + Message's Until + Combined Errors return &State{Code: trans.To, Until: m.Until, Errors: cur.Errors}, true } } return cur, false }
// Run the state machine enabled handler. Loads the initial state and passes // control to the internal stateful handler passing commands from the command // listener into the handler's commands chan. func (s *stateMachine) Run() (done bool) { // Multiplex external (Stop) messages and internal ones s.cmds = make(chan *Message) go func() { for { select { case m := <-s.cl.Receive(): if !m.Valid() { metafora.Warnf("Ignoring invalid command: %q", m) continue } select { case s.cmds <- m: case <-s.stopped: return } case <-s.stopped: return } } }() // Stop the command listener and internal message multiplexer when Run exits defer func() { s.cl.Stop() s.stop() }() tid := s.task.ID() // Load the initial state state, err := s.ss.Load(s.task) if err != nil { // A failure to load the state for a task is *fatal* - the task will be // unscheduled and requires operator intervention to reschedule. metafora.Errorf("task=%q could not load initial state. Marking done! Error: %v", tid, err) return true } if state == nil { // Note to StateStore implementors: This should not happen! Either state or // err must be non-nil. This code is simply to prevent a nil pointer panic. metafora.Errorf("statestore %T returned nil state and err for task=%q - unscheduling") return true } if state.Code.Terminal() { metafora.Warnf("task=%q in terminal state %s - exiting.", tid, state.Code) return true } s.setState(state) // for introspection/debugging // Main Statemachine Loop done = false for { // Enter State metafora.Debugf("task=%q in state %s", tid, state.Code) msg := s.exec(state) // Apply Message newstate, ok := apply(state, msg) if !ok { metafora.Warnf("task=%q Invalid state transition=%q returned by task. Old state=%q", tid, msg.Code, state.Code) msg = ErrorMessage(err) if newstate, ok = apply(state, msg); !ok { metafora.Errorf("task=%q Unable to transition to error state! Exiting with state=%q", tid, state.Code) return state.Code.Terminal() } } metafora.Infof("task=%q transitioning %s --> %s --> %s", tid, state, msg, newstate) // Save state if err := s.ss.Store(s.task, newstate); err != nil { metafora.Errorf("task=%q Unable to persist state=%q. Unscheduling.", tid, newstate.Code) return true } // Set next state and loop if non-terminal state = newstate // Expose the state for introspection s.setState(state) // Exit and unschedule task on terminal state. if state.Code.Terminal() { return true } // Release messages indicate the task should exit but not unschedule. if msg.Code == Release { return false } // Alternatively Stop() may have been called but the handler may not have // returned the Release message. Always exit if we've been told to Stop() // even if the handler has returned a different Message. select { case <-s.stopped: return false default: } } }
// add starts refreshing a given key+value pair for a task asynchronously. func (m *taskManager) add(task metafora.Task) bool { tid := task.ID() // Attempt to claim the node key, value := m.ownerNode(tid) resp, err := m.client.Create(key, value, m.ttl) if err != nil { etcdErr, ok := err.(*etcd.EtcdError) if !ok || etcdErr.ErrorCode != EcodeNodeExist { metafora.Errorf("Claim of %s failed with an unexpected error: %v", key, err) } else { metafora.Debugf("Claim of %s failed, already claimed", key) } return false } index := resp.Node.CreatedIndex // lytics/metafora#124 - the successful create above may have resurrected a // deleted (done) task. Compare the CreatedIndex of the directory with the // CreatedIndex of the claim key, if they're equal this claim ressurected a // done task and should cleanup. resp, err = m.client.Get(m.taskPath(tid), unsorted, notrecursive) if err != nil { // Erroring here is BAD as we may have resurrected a done task, and because // of this failure there's no way to tell. The claim will eventually // timeout and the task will get reclaimed. metafora.Errorf("Error retrieving task path %q after claiming %q: %v", m.taskPath(tid), tid, err) return false } if resp.Node.CreatedIndex == index { metafora.Debugf("Task %s resurrected due to claim/done race. Re-deleting.", tid) if _, err = m.client.Delete(m.taskPath(tid), recursive); err != nil { // This is as bad as it gets. We *know* we resurrected a task, but we // failed to re-delete it. metafora.Errorf("Task %s was resurrected and could not be removed! %s should be manually removed. Error: %v", tid, m.taskPath(tid), err) } // Regardless of whether or not the delete succeeded, never treat // resurrected tasks as claimed. return false } // Claim successful, start the refresher metafora.Debugf("Claim successful: %s", key) done := make(chan struct{}) release := make(chan struct{}) finished := make(chan struct{}) m.taskL.Lock() m.tasks[tid] = taskStates{done: done, release: release, finished: finished} m.taskL.Unlock() metafora.Debugf("Starting claim refresher for task %s", tid) go func() { defer func() { m.taskL.Lock() delete(m.tasks, tid) m.taskL.Unlock() close(finished) }() for { select { case <-time.After(m.interval): // Try to refresh the claim node (0 index means compare by value) if _, err := m.client.CompareAndSwap(key, value, m.ttl, value, 0); err != nil { metafora.Errorf("Error trying to update task %s ttl: %v", tid, err) m.ctx.Lost(task) // On errors, don't even try to Delete as we're in a bad state return } case <-done: metafora.Debugf("Deleting directory for task %s as it's done.", tid) const recursive = true if _, err := m.client.Delete(m.taskPath(tid), recursive); err != nil { metafora.Errorf("Error deleting task %s while stopping: %v", tid, err) } return case <-release: metafora.Debugf("Deleting claim for task %s as it's released.", tid) // Not done, releasing; just delete the claim node if _, err := m.client.CompareAndDelete(key, value, 0); err != nil { metafora.Warnf("Error releasing task %s while stopping: %v", tid, err) } return } } }() return true }
// Delete a task func (mc *mclient) DeleteTask(taskId string) error { fullpath := mc.tskPath(taskId) _, err := mc.etcd.Delete(fullpath, recursive) metafora.Debugf("task %s deleted: %s", taskId, fullpath) return err }
// Fair balancer shouldn't consider a shutting-down node // See https://github.com/lytics/metafora/issues/92 func TestFairBalancerShutdown(t *testing.T) { coord1, conf1 := setupEtcd(t) conf2 := conf1.Copy() conf2.Name = "node2" coord2, _ := NewEtcdCoordinator(conf2) cli := NewClient(conf1.Namespace, conf1.Hosts) // This handler always returns immediately h1 := metafora.SimpleHandler(func(task metafora.Task, stop <-chan bool) bool { metafora.Debugf("H1 Starting %s", task.ID()) <-stop metafora.Debugf("H1 Stopping %s", task.ID()) return false // never done }) // Block forever on a single task stop2 := make(chan struct{}) stopr := make(chan chan struct{}, 1) stopr <- stop2 h2 := metafora.SimpleHandler(func(task metafora.Task, stop <-chan bool) bool { metafora.Debugf("H2 Starting %s", task.ID()) blockchan, ok := <-stopr if ok { <-blockchan } <-stop metafora.Debugf("H2 Stopping %s", task.ID()) return false // never done }) // Create two consumers b1 := NewFairBalancer(conf1) con1, err := metafora.NewConsumer(coord1, h1, b1) if err != nil { t.Fatal(err) } b2 := NewFairBalancer(conf2) con2, err := metafora.NewConsumer(coord2, h2, b2) if err != nil { t.Fatal(err) } // Start the first and let it claim a bunch of tasks go con1.Run() defer con1.Shutdown() cli.SubmitTask(DefaultTaskFunc("t1", "")) cli.SubmitTask(DefaultTaskFunc("t2", "")) cli.SubmitTask(DefaultTaskFunc("t3", "")) cli.SubmitTask(DefaultTaskFunc("t4", "")) cli.SubmitTask(DefaultTaskFunc("t5", "")) cli.SubmitTask(DefaultTaskFunc("t6", "")) time.Sleep(500 * time.Millisecond) if len(con1.Tasks()) != 6 { t.Fatalf("con1 should have claimed 6 tasks: %d", len(con1.Tasks())) } // Start the second consumer and force the 1st to rebalance go con2.Run() close(stopr) // Wait for node to startup and register time.Sleep(500 * time.Millisecond) cli.SubmitCommand(conf1.Name, metafora.CommandBalance()) time.Sleep(2 * time.Second) c1Tasks := con1.Tasks() c2Tasks := con2.Tasks() if len(c1Tasks) != 4 || len(c2Tasks) != 2 { t.Fatalf("expected consumers to have 4|2 tasks: %d|%d", len(c1Tasks), len(c2Tasks)) } // Make sure that balancing the other node does nothing cli.SubmitCommand("node2", metafora.CommandBalance()) time.Sleep(2 * time.Second) c1Tasks2 := con1.Tasks() c2Tasks2 := con2.Tasks() if len(c1Tasks2) != 4 || len(c2Tasks2) != 2 { t.Fatalf("expected consumers to have 4|2 tasks: %d|%d", len(c1Tasks2), len(c2Tasks2)) } for i := 0; i < 4; i++ { if c1Tasks[i] != c1Tasks2[i] { t.Errorf("task mismatch: %s != %s", c1Tasks[i], c1Tasks2[i]) } } for i := 0; i < 2; i++ { if c2Tasks[i] != c2Tasks2[i] { t.Errorf("task mismatch: %s != %s", c2Tasks[i], c2Tasks2[i]) } } // Second consumer should block on a single task forever // Rebalancing the first node should then cause it to pickup all but // one task c2stop := make(chan struct{}) go func() { con2.Shutdown() close(c2stop) }() time.Sleep(500 * time.Millisecond) cli.SubmitCommand(conf1.Name, metafora.CommandBalance()) time.Sleep(2 * time.Second) c1Tasks3 := con1.Tasks() c2Tasks3 := con2.Tasks() if len(c1Tasks3) != 5 || len(c2Tasks3) != 1 { t.Fatalf("Expected consumers to have 5|1 tasks: %d|%d", len(c1Tasks3), len(c2Tasks3)) } // Now stop blocking task, rebalance and make sure the first node picked up the remaining close(stop2) time.Sleep(500 * time.Millisecond) // Consumer 2 should stop now <-c2stop cli.SubmitCommand(conf1.Name, metafora.CommandBalance()) time.Sleep(2 * time.Second) // con2 is out of the picture. con1 has all the tasks. c1Tasks4 := con1.Tasks() c2Tasks4 := con2.Tasks() if len(c1Tasks4) != 6 || len(c2Tasks4) != 0 { t.Fatalf("Expected consumers to have 6|0 tasks: %d|%d", len(c1Tasks4), len(c2Tasks4)) } }
func TestFairBalancer(t *testing.T) { t.Parallel() coord1, conf1 := setupEtcd(t) conf2 := conf1.Copy() conf2.Name = "coord2" coord2, _ := NewEtcdCoordinator(conf2) cli := NewClient(conf1.Namespace, conf1.Hosts) h := metafora.SimpleHandler(func(task metafora.Task, stop <-chan bool) bool { metafora.Debugf("Starting %s", task.ID()) <-stop metafora.Debugf("Stopping %s", task.ID()) return false // never done }) // Create two consumers b1 := NewFairBalancer(conf1) con1, err := metafora.NewConsumer(coord1, h, b1) if err != nil { t.Fatal(err) } b2 := NewFairBalancer(conf2) con2, err := metafora.NewConsumer(coord2, h, b2) if err != nil { t.Fatal(err) } // Start the first and let it claim a bunch of tasks go con1.Run() defer con1.Shutdown() cli.SubmitTask(DefaultTaskFunc("t1", "")) cli.SubmitTask(DefaultTaskFunc("t2", "")) cli.SubmitTask(DefaultTaskFunc("t3", "")) cli.SubmitTask(DefaultTaskFunc("t4", "")) cli.SubmitTask(DefaultTaskFunc("t5", "")) cli.SubmitTask(DefaultTaskFunc("t6", "")) time.Sleep(1 * time.Second) if len(con1.Tasks()) != 6 { t.Fatalf("con1 should have claimed 6 tasks: %d", len(con1.Tasks())) } // Start the second consumer and force the 1st to rebalance go con2.Run() defer con2.Shutdown() // Wait for node to startup and register time.Sleep(1 * time.Second) cli.SubmitCommand(conf1.Name, metafora.CommandBalance()) time.Sleep(2 * time.Second) c1Tasks := con1.Tasks() c2Tasks := con2.Tasks() if len(c1Tasks) != 4 || len(c2Tasks) != 2 { t.Fatalf("expected consumers to have 4|2 tasks: %d|%d", len(c1Tasks), len(c2Tasks)) } // Finally make sure that balancing the other node does nothing cli.SubmitCommand("node2", metafora.CommandBalance()) time.Sleep(2 * time.Second) c1Tasks2 := con1.Tasks() c2Tasks2 := con2.Tasks() if len(c1Tasks2) != 4 || len(c2Tasks2) != 2 { t.Fatalf("expected consumers to have 4|2 tasks: %d|%d", len(c1Tasks2), len(c2Tasks2)) } for i := 0; i < 4; i++ { if c1Tasks[i] != c1Tasks2[i] { t.Errorf("task mismatch: %s != %s", c1Tasks[i], c1Tasks2[i]) } } for i := 0; i < 2; i++ { if c2Tasks[i] != c2Tasks2[i] { t.Errorf("task mismatch: %s != %s", c2Tasks[i], c2Tasks2[i]) } } }
func (c *cmdrListener) watcher() { var index uint64 var ok bool startWatch: resp, err := c.cli.Get(c.path, notrecursive, unsorted) if err != nil { if ee, ok := err.(*etcd.EtcdError); ok && ee.ErrorCode == EcodeKeyNotFound { // No command found; this is normal. Grab index and skip to watching index = ee.Index goto watchLoop } metafora.Errorf("Error GETting %s - sending error to stateful handler: %v", c.path, err) c.sendErr(err) return } if index, ok = c.sendMsg(resp); !ok { return } watchLoop: for { rr, err := protectedRawWatch(c.cli, c.path, index, notrecursive, nil, c.stop) if err != nil { if err == etcd.ErrWatchStoppedByUser { return } // This is probably a canceled request panic // Wait a little bit, then continue as normal // Can be removed after Go 1.5 is released if ispanic(err) { continue } metafora.Errorf("Error watching %s - sending error to stateful handler: %v", c.path, err) c.sendErr(err) return } if len(rr.Body) == 0 { // This is a bug in Go's HTTP + go-etcd + etcd which causes the // connection to timeout perdiocally and need to be restarted *after* // closing idle connections. transport.CloseIdleConnections() continue watchLoop } resp, err := rr.Unmarshal() if err != nil { if ee, ok := err.(*etcd.EtcdError); ok { if ee.ErrorCode == EcodeExpiredIndex { goto startWatch } } metafora.Errorf("Error watching %s - sending error to stateful handler: %v", c.path, err) c.sendErr(err) return } metafora.Debugf("Received command via %s -- sending to statemachine", c.path) if index, ok = c.sendMsg(resp); !ok { return } } }