Beispiel #1
func (ec *EtcdCoordinator) parseTask(resp *etcd.Response) metafora.Task {
	// Sanity check / test path invariant
	if !strings.HasPrefix(resp.Node.Key, ec.taskPath) {
		metafora.Errorf("%s received task from outside task path: %s",, resp.Node.Key)
		return nil

	key := strings.Trim(resp.Node.Key, "/") // strip leading and trailing /s
	parts := strings.Split(key, "/")

	// Pickup new tasks
	if newActions[resp.Action] && len(parts) == 3 && resp.Node.Dir {
		// Make sure it's not already claimed before returning it
		for _, n := range resp.Node.Nodes {
			if strings.HasSuffix(n.Key, OwnerMarker) {
				metafora.Debugf("%s ignoring task as it's already claimed: %s",, parts[2])
				return nil
		metafora.Debugf("%s received new task: %s",, parts[2])
		props := ""
		for _, n := range resp.Node.Nodes {
			if strings.HasSuffix(n.Key, "/"+PropsKey) {
				props = n.Value
		return ec.newTask(parts[2], props)

	if newActions[resp.Action] && len(parts) == 4 && parts[3] == PropsKey {
		metafora.Debugf("%s received task with properties: %s",, parts[2])
		return ec.newTask(parts[2], resp.Node.Value)

	// If a claim key is removed, try to claim the task
	if releaseActions[resp.Action] && len(parts) == 4 && parts[3] == OwnerMarker {
		metafora.Debugf("%s received released task: %s",, parts[2])

		// Sadly we need to fail parsing this task if there's an error getting the
		// props file as trying to claim a task without properly knowing its
		// properties could cause major issues.
		parts[3] = PropsKey
		propsnode, err := ec.client.Get(path.Join(parts...), unsorted, notrecursive)
		if err != nil {
			if ee, ok := err.(*etcd.EtcdError); ok && ee.ErrorCode == EcodeKeyNotFound {
				// No props file
				return ec.newTask(parts[2], "")

			metafora.Errorf("%s error getting properties while handling %s",, parts[2])
			return nil
		return ec.newTask(parts[2], propsnode.Node.Value)

	// Ignore any other key events (_metafora keys, task deletion, etc.)
	return nil
Beispiel #2
// remove tells a single task's refresher to stop and blocks until the task is
// handled.
func (m *taskManager) remove(taskID string, done bool) {
	states, ok := m.tasks[taskID]
	if !ok {
		metafora.Debugf("Cannot remove task %s from refresher: not present.", taskID)

	select {
	case <-states.release:
		// already stopping
	case <-states.done:
		// already stopping
		if done {
		} else {

	// Block until task is released/deleted to prevent races on shutdown where
	// the process could exit before all tasks are released.
Beispiel #3
func (ec *EtcdCoordinator) upsertDir(path string, ttl uint64) {
	//hidden etcd key that isn't visible to ls commands on the directory,
	//  you have to know about it to find it :).  I'm using it to add some
	//  info about when the cluster's schema was setup.
	pathMarker := path + "/" + MetadataKey

	_, err := ec.client.Get(path, unsorted, notrecursive)
	if err == nil {

	etcdErr, ok := err.(*etcd.EtcdError)
	if ok && etcdErr.ErrorCode == EcodeKeyNotFound {
		_, err := ec.client.CreateDir(path, ttl)
		if err != nil {
			metafora.Debugf("Error trying to create directory. path:[%s] error:[ %v ]", path, err)
		host, _ := os.Hostname()

		metadata := struct {
			Host        string `json:"host"`
			CreatedTime string `json:"created"`
			Host:        host,
			CreatedTime: time.Now().String(),
			ownerValue:  ownerValue{Node: ec.conf.Name},
		metadataB, _ := json.Marshal(metadata)
		metadataStr := string(metadataB)
		ec.client.Create(pathMarker, metadataStr, ttl)
Beispiel #4
// Init is called once by the consumer to provide a Logger to Coordinator
// implementations.
func (ec *EtcdCoordinator) Init(cordCtx metafora.CoordinatorContext) error {
	metafora.Debugf("Initializing coordinator with namespace: %s and etcd cluster: %s",
		ec.conf.Namespace, strings.Join(ec.client.GetCluster(), ", "))

	ec.cordCtx = cordCtx

	ec.upsertDir(ec.conf.Namespace, foreverTTL)
	ec.upsertDir(ec.taskPath, foreverTTL)
	if _, err := ec.client.CreateDir(ec.nodePath, ec.conf.NodeTTL); err != nil {
		return err

	// Create etcd client for task manager
	tmc, err := newEtcdClient(ec.conf.Hosts)
	if err != nil {
		return err

	// Start goroutine to heartbeat node key in etcd
	go ec.nodeRefresher()
	ec.upsertDir(ec.commandPath, foreverTTL)

	ec.taskManager = newManager(cordCtx, tmc, ec.taskPath, ec.conf.Name, ec.conf.ClaimTTL)
	return nil
Beispiel #5
// SubmitTask creates a new task in etcd
func (mc *mclient) SubmitTask(task metafora.Task) error {
	fullpath := path.Join(mc.tskPath(task.ID()), PropsKey)
	buf, err := json.Marshal(task)
	if err != nil {
		return err
	if _, err := mc.etcd.Create(fullpath, string(buf), foreverTTL); err != nil {
		return err
	metafora.Debugf("task %s submitted: %s", task.ID(), fullpath)
	return nil
Beispiel #6
// watch will return either an etcd Response or an error. Two errors returned
// by this method should be treated specially:
//   1. etcd.ErrWatchStoppedByUser - the coordinator has closed, exit
//                                   accordingly
//   2. restartWatchError - the specified index is too old, try again with a
//                          newer index
func (ec *EtcdCoordinator) watch(c *etcd.Client, path string, index uint64, stop chan bool) (*etcd.Response, error) {
	const recursive = true
	for {
		// Start the blocking watch after the last response's index.
		rawResp, err := protectedRawWatch(c, path, index+1, recursive, nil, stop)
		if err != nil {
			if err == etcd.ErrWatchStoppedByUser {
				// This isn't actually an error, the stop chan was closed. Time to stop!
				return nil, err

			// This is probably a canceled request panic
			// Wait a little bit, then continue as normal
			// Can be removed after Go 1.5 is released
			if ispanic(err) {
				time.Sleep(250 * time.Millisecond)

			// Other RawWatch errors should be retried forever. If the node refresher
			// also fails to communicate with etcd it will close the coordinator,
			// closing ec.stop in the process which will cause this function to with
			// ErrWatchStoppedByUser.
			metafora.Errorf("%s Retrying after unexpected watch error: %v", path, err)
			transport.CloseIdleConnections() // paranoia; let's get fresh connections on errors.

		if len(rawResp.Body) == 0 {
			// This is a bug in Go's HTTP + go-etcd + etcd which causes the
			// connection to timeout perdiocally and need to be restarted *after*
			// closing idle connections.

		resp, err := rawResp.Unmarshal()
		if err != nil {
			if ee, ok := err.(*etcd.EtcdError); ok {
				if ee.ErrorCode == EcodeExpiredIndex {
					metafora.Debugf("%s Too many events have happened since index was updated. Restarting watch.", ec.taskPath)
					// We need to retrieve all existing tasks to update our index
					// without potentially missing some events.
					return nil, restartWatchError
			metafora.Errorf("%s Unexpected error unmarshalling etcd response: %+v", ec.taskPath, err)
			return nil, err
		return resp, nil
Beispiel #7
// SubmitCommand creates a new command for a particular nodeId, the
// command has a random name and is added to the particular nodeId
// directory in etcd.
func (mc *mclient) SubmitCommand(node string, command metafora.Command) error {
	cmdPath := mc.cmdPath(node)
	body, err := command.Marshal()
	if err != nil {
		// This is either a bug in metafora or someone implemented their own
		// command incorrectly.
		return err
	if _, err := mc.etcd.AddChild(cmdPath, string(body), foreverTTL); err != nil {
		metafora.Errorf("Error submitting command: %s to node: %s", command, node)
		return err
	metafora.Debugf("Submitted command: %s to node: %s", string(body), node)
	return nil
Beispiel #8
// apply a message to cause a state transition. Returns false if the state
// transition is invalid.
func apply(cur *State, m *Message) (*State, bool) {
	//XXX Is a linear scan of all rules really the best option here?
	for _, trans := range Rules {
		if trans.Event == m.Code && trans.From == cur.Code {
			metafora.Debugf("Transitioned %s", trans)
			if m.Err != nil {
				// Append errors from message
				cur.Errors = append(cur.Errors, Err{Time: time.Now(), Err: m.Err.Error()})

			// New State + Message's Until + Combined Errors
			return &State{Code: trans.To, Until: m.Until, Errors: cur.Errors}, true
	return cur, false
Beispiel #9
// Run the state machine enabled handler. Loads the initial state and passes
// control to the internal stateful handler passing commands from the command
// listener into the handler's commands chan.
func (s *stateMachine) Run() (done bool) {
	// Multiplex external (Stop) messages and internal ones
	s.cmds = make(chan *Message)
	go func() {
		for {
			select {
			case m := <
				if !m.Valid() {
					metafora.Warnf("Ignoring invalid command: %q", m)
				select {
				case s.cmds <- m:
				case <-s.stopped:
			case <-s.stopped:

	// Stop the command listener and internal message multiplexer when Run exits
	defer func() {

	tid := s.task.ID()

	// Load the initial state
	state, err :=
	if err != nil {
		// A failure to load the state for a task is *fatal* - the task will be
		// unscheduled and requires operator intervention to reschedule.
		metafora.Errorf("task=%q could not load initial state. Marking done! Error: %v", tid, err)
		return true
	if state == nil {
		// Note to StateStore implementors: This should not happen! Either state or
		// err must be non-nil. This code is simply to prevent a nil pointer panic.
		metafora.Errorf("statestore %T returned nil state and err for task=%q - unscheduling")
		return true
	if state.Code.Terminal() {
		metafora.Warnf("task=%q in terminal state %s - exiting.", tid, state.Code)
		return true

	s.setState(state) // for introspection/debugging

	// Main Statemachine Loop
	done = false
	for {
		// Enter State
		metafora.Debugf("task=%q in state %s", tid, state.Code)
		msg := s.exec(state)

		// Apply Message
		newstate, ok := apply(state, msg)
		if !ok {
			metafora.Warnf("task=%q Invalid state transition=%q returned by task. Old state=%q", tid, msg.Code, state.Code)
			msg = ErrorMessage(err)
			if newstate, ok = apply(state, msg); !ok {
				metafora.Errorf("task=%q Unable to transition to error state! Exiting with state=%q", tid, state.Code)
				return state.Code.Terminal()

		metafora.Infof("task=%q transitioning %s --> %s --> %s", tid, state, msg, newstate)

		// Save state
		if err :=, newstate); err != nil {
			metafora.Errorf("task=%q Unable to persist state=%q. Unscheduling.", tid, newstate.Code)
			return true

		// Set next state and loop if non-terminal
		state = newstate

		// Expose the state for introspection

		// Exit and unschedule task on terminal state.
		if state.Code.Terminal() {
			return true

		// Release messages indicate the task should exit but not unschedule.
		if msg.Code == Release {
			return false

		// Alternatively Stop() may have been called but the handler may not have
		// returned the Release message. Always exit if we've been told to Stop()
		// even if the handler has returned a different Message.
		select {
		case <-s.stopped:
			return false
Beispiel #10
// add starts refreshing a given key+value pair for a task asynchronously.
func (m *taskManager) add(task metafora.Task) bool {
	tid := task.ID()
	// Attempt to claim the node
	key, value := m.ownerNode(tid)
	resp, err := m.client.Create(key, value, m.ttl)
	if err != nil {
		etcdErr, ok := err.(*etcd.EtcdError)
		if !ok || etcdErr.ErrorCode != EcodeNodeExist {
			metafora.Errorf("Claim of %s failed with an unexpected error: %v", key, err)
		} else {
			metafora.Debugf("Claim of %s failed, already claimed", key)
		return false

	index := resp.Node.CreatedIndex

	// lytics/metafora#124 - the successful create above may have resurrected a
	// deleted (done) task. Compare the CreatedIndex of the directory with the
	// CreatedIndex of the claim key, if they're equal this claim ressurected a
	// done task and should cleanup.
	resp, err = m.client.Get(m.taskPath(tid), unsorted, notrecursive)
	if err != nil {
		// Erroring here is BAD as we may have resurrected a done task, and because
		// of this failure there's no way to tell. The claim will eventually
		// timeout and the task will get reclaimed.
		metafora.Errorf("Error retrieving task path %q after claiming %q: %v", m.taskPath(tid), tid, err)
		return false

	if resp.Node.CreatedIndex == index {
		metafora.Debugf("Task %s resurrected due to claim/done race. Re-deleting.", tid)
		if _, err = m.client.Delete(m.taskPath(tid), recursive); err != nil {
			// This is as bad as it gets. We *know* we resurrected a task, but we
			// failed to re-delete it.
			metafora.Errorf("Task %s was resurrected and could not be removed! %s should be manually removed. Error: %v",
				tid, m.taskPath(tid), err)

		// Regardless of whether or not the delete succeeded, never treat
		// resurrected tasks as claimed.
		return false

	// Claim successful, start the refresher
	metafora.Debugf("Claim successful: %s", key)
	done := make(chan struct{})
	release := make(chan struct{})
	finished := make(chan struct{})
	m.tasks[tid] = taskStates{done: done, release: release, finished: finished}

	metafora.Debugf("Starting claim refresher for task %s", tid)
	go func() {
		defer func() {
			delete(m.tasks, tid)

		for {
			select {
			case <-time.After(m.interval):
				// Try to refresh the claim node (0 index means compare by value)
				if _, err := m.client.CompareAndSwap(key, value, m.ttl, value, 0); err != nil {
					metafora.Errorf("Error trying to update task %s ttl: %v", tid, err)
					// On errors, don't even try to Delete as we're in a bad state
			case <-done:
				metafora.Debugf("Deleting directory for task %s as it's done.", tid)
				const recursive = true
				if _, err := m.client.Delete(m.taskPath(tid), recursive); err != nil {
					metafora.Errorf("Error deleting task %s while stopping: %v", tid, err)
			case <-release:
				metafora.Debugf("Deleting claim for task %s as it's released.", tid)
				// Not done, releasing; just delete the claim node
				if _, err := m.client.CompareAndDelete(key, value, 0); err != nil {
					metafora.Warnf("Error releasing task %s while stopping: %v", tid, err)
	return true
Beispiel #11
// Delete a task
func (mc *mclient) DeleteTask(taskId string) error {
	fullpath := mc.tskPath(taskId)
	_, err := mc.etcd.Delete(fullpath, recursive)
	metafora.Debugf("task %s deleted: %s", taskId, fullpath)
	return err
Beispiel #12
// Fair balancer shouldn't consider a shutting-down node
// See
func TestFairBalancerShutdown(t *testing.T) {
	coord1, conf1 := setupEtcd(t)
	conf2 := conf1.Copy()
	conf2.Name = "node2"
	coord2, _ := NewEtcdCoordinator(conf2)

	cli := NewClient(conf1.Namespace, conf1.Hosts)

	// This handler always returns immediately
	h1 := metafora.SimpleHandler(func(task metafora.Task, stop <-chan bool) bool {
		metafora.Debugf("H1 Starting %s", task.ID())
		metafora.Debugf("H1 Stopping %s", task.ID())
		return false // never done

	// Block forever on a single task
	stop2 := make(chan struct{})
	stopr := make(chan chan struct{}, 1)
	stopr <- stop2
	h2 := metafora.SimpleHandler(func(task metafora.Task, stop <-chan bool) bool {
		metafora.Debugf("H2 Starting %s", task.ID())
		blockchan, ok := <-stopr
		if ok {
		metafora.Debugf("H2 Stopping %s", task.ID())
		return false // never done

	// Create two consumers
	b1 := NewFairBalancer(conf1)
	con1, err := metafora.NewConsumer(coord1, h1, b1)
	if err != nil {

	b2 := NewFairBalancer(conf2)
	con2, err := metafora.NewConsumer(coord2, h2, b2)
	if err != nil {

	// Start the first and let it claim a bunch of tasks
	go con1.Run()
	defer con1.Shutdown()
	cli.SubmitTask(DefaultTaskFunc("t1", ""))
	cli.SubmitTask(DefaultTaskFunc("t2", ""))
	cli.SubmitTask(DefaultTaskFunc("t3", ""))
	cli.SubmitTask(DefaultTaskFunc("t4", ""))
	cli.SubmitTask(DefaultTaskFunc("t5", ""))
	cli.SubmitTask(DefaultTaskFunc("t6", ""))

	time.Sleep(500 * time.Millisecond)

	if len(con1.Tasks()) != 6 {
		t.Fatalf("con1 should have claimed 6 tasks: %d", len(con1.Tasks()))

	// Start the second consumer and force the 1st to rebalance
	go con2.Run()


	// Wait for node to startup and register
	time.Sleep(500 * time.Millisecond)

	cli.SubmitCommand(conf1.Name, metafora.CommandBalance())

	time.Sleep(2 * time.Second)

	c1Tasks := con1.Tasks()
	c2Tasks := con2.Tasks()
	if len(c1Tasks) != 4 || len(c2Tasks) != 2 {
		t.Fatalf("expected consumers to have 4|2 tasks: %d|%d", len(c1Tasks), len(c2Tasks))

	// Make sure that balancing the other node does nothing
	cli.SubmitCommand("node2", metafora.CommandBalance())

	time.Sleep(2 * time.Second)

	c1Tasks2 := con1.Tasks()
	c2Tasks2 := con2.Tasks()
	if len(c1Tasks2) != 4 || len(c2Tasks2) != 2 {
		t.Fatalf("expected consumers to have 4|2 tasks: %d|%d", len(c1Tasks2), len(c2Tasks2))
	for i := 0; i < 4; i++ {
		if c1Tasks[i] != c1Tasks2[i] {
			t.Errorf("task mismatch: %s != %s", c1Tasks[i], c1Tasks2[i])
	for i := 0; i < 2; i++ {
		if c2Tasks[i] != c2Tasks2[i] {
			t.Errorf("task mismatch: %s != %s", c2Tasks[i], c2Tasks2[i])

	// Second consumer should block on a single task forever
	// Rebalancing the first node should then cause it to pickup all but
	// one task
	c2stop := make(chan struct{})
	go func() {

	time.Sleep(500 * time.Millisecond)

	cli.SubmitCommand(conf1.Name, metafora.CommandBalance())

	time.Sleep(2 * time.Second)

	c1Tasks3 := con1.Tasks()
	c2Tasks3 := con2.Tasks()
	if len(c1Tasks3) != 5 || len(c2Tasks3) != 1 {
		t.Fatalf("Expected consumers to have 5|1 tasks: %d|%d", len(c1Tasks3), len(c2Tasks3))

	// Now stop blocking task, rebalance and make sure the first node picked up the remaining

	time.Sleep(500 * time.Millisecond)
	// Consumer 2 should stop now

	cli.SubmitCommand(conf1.Name, metafora.CommandBalance())

	time.Sleep(2 * time.Second)

	// con2 is out of the picture. con1 has all the tasks.
	c1Tasks4 := con1.Tasks()
	c2Tasks4 := con2.Tasks()
	if len(c1Tasks4) != 6 || len(c2Tasks4) != 0 {
		t.Fatalf("Expected consumers to have 6|0 tasks: %d|%d", len(c1Tasks4), len(c2Tasks4))
Beispiel #13
func TestFairBalancer(t *testing.T) {
	coord1, conf1 := setupEtcd(t)
	conf2 := conf1.Copy()
	conf2.Name = "coord2"
	coord2, _ := NewEtcdCoordinator(conf2)

	cli := NewClient(conf1.Namespace, conf1.Hosts)

	h := metafora.SimpleHandler(func(task metafora.Task, stop <-chan bool) bool {
		metafora.Debugf("Starting %s", task.ID())
		metafora.Debugf("Stopping %s", task.ID())
		return false // never done

	// Create two consumers
	b1 := NewFairBalancer(conf1)
	con1, err := metafora.NewConsumer(coord1, h, b1)
	if err != nil {

	b2 := NewFairBalancer(conf2)
	con2, err := metafora.NewConsumer(coord2, h, b2)
	if err != nil {

	// Start the first and let it claim a bunch of tasks
	go con1.Run()
	defer con1.Shutdown()
	cli.SubmitTask(DefaultTaskFunc("t1", ""))
	cli.SubmitTask(DefaultTaskFunc("t2", ""))
	cli.SubmitTask(DefaultTaskFunc("t3", ""))
	cli.SubmitTask(DefaultTaskFunc("t4", ""))
	cli.SubmitTask(DefaultTaskFunc("t5", ""))
	cli.SubmitTask(DefaultTaskFunc("t6", ""))

	time.Sleep(1 * time.Second)

	if len(con1.Tasks()) != 6 {
		t.Fatalf("con1 should have claimed 6 tasks: %d", len(con1.Tasks()))

	// Start the second consumer and force the 1st to rebalance
	go con2.Run()
	defer con2.Shutdown()

	// Wait for node to startup and register
	time.Sleep(1 * time.Second)

	cli.SubmitCommand(conf1.Name, metafora.CommandBalance())

	time.Sleep(2 * time.Second)

	c1Tasks := con1.Tasks()
	c2Tasks := con2.Tasks()
	if len(c1Tasks) != 4 || len(c2Tasks) != 2 {
		t.Fatalf("expected consumers to have 4|2 tasks: %d|%d", len(c1Tasks), len(c2Tasks))

	// Finally make sure that balancing the other node does nothing
	cli.SubmitCommand("node2", metafora.CommandBalance())

	time.Sleep(2 * time.Second)

	c1Tasks2 := con1.Tasks()
	c2Tasks2 := con2.Tasks()
	if len(c1Tasks2) != 4 || len(c2Tasks2) != 2 {
		t.Fatalf("expected consumers to have 4|2 tasks: %d|%d", len(c1Tasks2), len(c2Tasks2))
	for i := 0; i < 4; i++ {
		if c1Tasks[i] != c1Tasks2[i] {
			t.Errorf("task mismatch: %s != %s", c1Tasks[i], c1Tasks2[i])
	for i := 0; i < 2; i++ {
		if c2Tasks[i] != c2Tasks2[i] {
			t.Errorf("task mismatch: %s != %s", c2Tasks[i], c2Tasks2[i])
Beispiel #14
func (c *cmdrListener) watcher() {
	var index uint64
	var ok bool
	resp, err := c.cli.Get(c.path, notrecursive, unsorted)
	if err != nil {
		if ee, ok := err.(*etcd.EtcdError); ok && ee.ErrorCode == EcodeKeyNotFound {
			// No command found; this is normal. Grab index and skip to watching
			index = ee.Index
			goto watchLoop
		metafora.Errorf("Error GETting %s - sending error to stateful handler: %v", c.path, err)

	if index, ok = c.sendMsg(resp); !ok {

	for {
		rr, err := protectedRawWatch(c.cli, c.path, index, notrecursive, nil, c.stop)
		if err != nil {
			if err == etcd.ErrWatchStoppedByUser {
			// This is probably a canceled request panic
			// Wait a little bit, then continue as normal
			// Can be removed after Go 1.5 is released
			if ispanic(err) {
			metafora.Errorf("Error watching %s - sending error to stateful handler: %v", c.path, err)

		if len(rr.Body) == 0 {
			// This is a bug in Go's HTTP + go-etcd + etcd which causes the
			// connection to timeout perdiocally and need to be restarted *after*
			// closing idle connections.
			continue watchLoop

		resp, err := rr.Unmarshal()
		if err != nil {
			if ee, ok := err.(*etcd.EtcdError); ok {
				if ee.ErrorCode == EcodeExpiredIndex {
					goto startWatch
			metafora.Errorf("Error watching %s - sending error to stateful handler: %v", c.path, err)

		metafora.Debugf("Received command via %s -- sending to statemachine", c.path)
		if index, ok = c.sendMsg(resp); !ok {