Exemplo n.º 1
0
// XXX pulled this out of donut, maybe i should make a zk util lib?
// Watch the children at path until a byte is sent on the returned channel
// Uses the SafeMap more like a set, so you'll have to use Contains() for entries
func watchZKChildren(zk *zookeeper.Conn, path string, children *donut.SafeMap, onChange func(*donut.SafeMap)) (chan byte, error) {
	initial, _, watch, err := zk.ChildrenW(path)
	if err != nil {
		return nil, err
	}
	m := children.RangeLock()
	for _, node := range initial {
		m[node] = nil
	}
	children.RangeUnlock()
	kill := make(chan byte, 1)
	log.Printf("watching "+path+" len is %d", children.Len())
	go func() {
		defer close(kill)
		var nodes []string
		var err error
		for {
			select {
			case <-kill:
				// close(watch)
				log.Printf("got kill")
				return
			case event := <-watch:
				if !event.Ok() {
					continue
				}
				// close(watch)
				nodes, _, watch, err = zk.ChildrenW(path)
				if err != nil {
					log.Printf("Error in watchZkChildren: %v", err)
					// XXX I should really provide some way for the client to find out about this error...
					return
				}
				m := children.RangeLock()
				// mark all dead
				for k := range m {
					m[k] = 0
				}
				for _, node := range nodes {
					m[node] = 1
				}
				for k, v := range m {
					if v.(int) == 0 {
						delete(m, k)
					}
				}
				children.RangeUnlock()
				onChange(children)
			}
		}
	}()
	log.Printf("watcher setup on %s", path)
	return kill, nil
}
Exemplo n.º 2
0
func (c *Coordinator) onWorkersChange(m *donut.SafeMap) {
	log.Println("workers updated")
	if atomic.LoadInt32(&c.state) > SetupState {
		// invalidate current step
		// update partition mapping
		// roll back to last checkpoint
	} else {
		if m.Len() == c.config.InitialWorkers {
			// go into prepare state
			if !atomic.CompareAndSwapInt32(&c.state, SetupState, PrepareState) {
				log.Println("Could not properly move from SetupState to PrepareState")
				return
			}
			log.Printf("InitialWorkers met, preparing node for work")
			// everyone is here, create the partition mapping
			lm := m.RangeLock()
			var workers []string
			for k := range lm {
				workers = append(workers, k)
			}
			m.RangeUnlock()
			sort.Strings(workers)
			for i := 0; i < len(workers); i++ {
				c.partitions[i] = workers[i]
				if workers[i] == c.config.NodeId {
					c.graph.partitionId = i
				}
			}

			// set up connections to all the other nodes
			c.cachedWorkerInfo = make(map[string]map[string]interface{})
			c.rpcClients = make(map[string]*rpc.Client)
			for _, w := range workers {
				// pull down worker info for all of the existing workers
				c.cachedWorkerInfo[w] = c.workerInfo(w)
				c.rpcClients[w], _ = rpc.DialHTTP("tcp", net.JoinHostPort(c.cachedWorkerInfo[w]["host"].(string), c.cachedWorkerInfo[w]["port"].(string)))
			}

			// go into loadstate
			if !atomic.CompareAndSwapInt32(&c.state, PrepareState, LoadState) {
				log.Println("Could not properly move from PrepareState to LoadState")
				return
			}
			go c.createLoadWork()
		}
	}
}