Ejemplo n.º 1
2
// Close the release channel when you want to clean up nicely.
func CreatePidNode(zconn zookeeper.Conn, zkPath string, contents string, done chan struct{}) error {
	// On the first try, assume the cluster is up and running, that will
	// help hunt down any config issues present at startup
	if _, err := zconn.Create(zkPath, []byte(contents), zookeeper.FlagEphemeral, zookeeper.WorldACL(PERM_FILE)); err != nil {
		if ZkErrorEqual(err, zookeeper.ErrNodeExists) {
			err = zconn.Delete(zkPath, -1)
		}
		if err != nil {
			return fmt.Errorf("zkutil: failed deleting pid node: %v: %v", zkPath, err)
		}
		_, err = zconn.Create(zkPath, []byte(contents), zookeeper.FlagEphemeral, zookeeper.WorldACL(PERM_FILE))
		if err != nil {
			return fmt.Errorf("zkutil: failed creating pid node: %v: %v", zkPath, err)
		}
	}

	go func() {
		for {
			_, _, watch, err := zconn.GetW(zkPath)
			if err != nil {
				if ZkErrorEqual(err, zookeeper.ErrNoNode) {
					_, err = zconn.Create(zkPath, []byte(contents), zookeeper.FlagEphemeral, zookeeper.WorldACL(zookeeper.PermAll))
					if err != nil {
						log.Warningf("failed recreating pid node: %v: %v", zkPath, err)
					} else {
						log.Infof("recreated pid node: %v", zkPath)
						continue
					}
				} else {
					log.Warningf("failed reading pid node: %v", err)
				}
			} else {
				select {
				case event := <-watch:
					if ZkEventOk(event) && event.Type == zookeeper.EventNodeDeleted {
						// Most likely another process has started up. However,
						// there is a chance that an ephemeral node is deleted by
						// the session expiring, yet that same session gets a watch
						// notification. This seems like buggy behavior, but rather
						// than race too hard on the node, just wait a bit and see
						// if the situation resolves itself.
						log.Warningf("pid deleted: %v", zkPath)
					} else {
						log.Infof("pid node event: %v", event)
					}
					// break here and wait for a bit before attempting
				case <-done:
					log.Infof("pid watcher stopped on done: %v", zkPath)
					return
				}
			}
			select {
			// No one likes a thundering herd, least of all zookeeper.
			case <-time.After(5*time.Second + time.Duration(rand.Int63n(55e9))):
			case <-done:
				log.Infof("pid watcher stopped on done: %v", zkPath)
				return
			}
		}
	}()

	return nil
}
Ejemplo n.º 2
0
func DeleteRecursive(zconn zookeeper.Conn, zkPath string, version int) error {
	// version: -1 delete any version of the node at path - only applies to the top node
	err := zconn.Delete(zkPath, int32(version))
	if err == nil {
		return nil
	}
	if !ZkErrorEqual(err, zookeeper.ErrNotEmpty) {
		return err
	}
	// Remove the ability for other nodes to get created while we are trying to delete.
	// Otherwise, you can enter a race condition, or get starved out from deleting.
	_, err = zconn.SetACL(zkPath, zookeeper.WorldACL(zookeeper.PermAdmin|zookeeper.PermDelete|zookeeper.PermRead), int32(version))
	if err != nil {
		return err
	}
	children, _, err := zconn.Children(zkPath)
	if err != nil {
		return err
	}
	for _, child := range children {
		err := DeleteRecursive(zconn, path.Join(zkPath, child), -1)
		if err != nil && !ZkErrorEqual(err, zookeeper.ErrNoNode) {
			return fmt.Errorf("zkutil: recursive delete failed: %v", err)
		}
	}

	err = zconn.Delete(zkPath, int32(version))
	if err != nil && !ZkErrorEqual(err, zookeeper.ErrNotEmpty) {
		err = fmt.Errorf("zkutil: nodes getting recreated underneath delete (app race condition): %v", zkPath)
	}
	return err
}
Ejemplo n.º 3
0
func recursiveDelete(c *zk.Conn, path string) error {
	children, _, err := c.Children(path)
	if err != nil && err != zk.ErrNoNode {
		logError("err finding children of %s", path)
		return err
	}
	for _, child := range children {
		err := recursiveDelete(c, path+"/"+child)
		if err != nil && err != zk.ErrNoNode {
			logError("err deleting %s", child)
			return err
		}
	}

	// get version
	_, stat, err := c.Get(path)
	if err != nil && err != zk.ErrNoNode {
		logError("err getting version of %s", path)
		return err
	}

	if err := c.Delete(path, stat.Version); err != nil && err != zk.ErrNoNode {
		return err
	}
	return nil
}
Ejemplo n.º 4
0
func DeleteNodebyData(path string, conn *zk.Conn, data []byte) error {

	children, _, err := conn.Children(path)
	if err != nil {
		glog.Warning("Could not list children")
		return err
	}
	for _, child := range children {
		child_path := path + "/" + child
		child_data, _, err := conn.Get(child_path)
		if err != nil {
			glog.Warning("Could not get data for %s", child_path)
			continue
		}
		if bytes.Compare(data, child_data) == 0 {
			for i := 0; i < 5; i++ {
				_, stats, _ := conn.Get(child_path)
				err = conn.Delete(child_path, stats.Version)
				if err == nil || err == zk.ErrNoNode {
					return nil
				}
			}
			glog.Error("Could not delete matched node %s", child_path)
		}
	}
	return nil
}
Ejemplo n.º 5
0
// notify every Comet node to migrate
func notifyMigrate(conn *zk.Conn, migrateLockPath, znode, key string, update bool, nodeWeightMap map[string]int) (err error) {
	// try lock
	if _, err = conn.Create(migrateLockPath, []byte("1"), zk.FlagEphemeral, zk.WorldACL(zk.PermAll)); err != nil {
		log.Error("conn.Create(\"/gopush-migrate-lock\", \"1\", zk.FlagEphemeral) error(%v)", err)
		return
	}
	// call comet migrate rpc
	wg := &sync.WaitGroup{}
	wg.Add(len(cometNodeInfoMap))
	for node, nodeInfo := range cometNodeInfoMap {
		go func(n string, info *CometNodeInfo) {
			if info.Rpc == nil {
				log.Error("notify migrate failed, no rpc found, node:%s", n)
				wg.Done()
				return
			}
			r := info.Rpc.Get()
			if r == nil {
				log.Error("notify migrate failed, no rpc found, node:%s", n)
				wg.Done()
				return
			}
			reply := 0
			args := &CometMigrateArgs{Nodes: nodeWeightMap}
			if err = r.Call(CometServiceMigrate, args, &reply); err != nil {
				log.Error("rpc.Call(\"%s\") error(%v)", CometServiceMigrate, err)
				wg.Done()
				return
			}
			log.Debug("notify node:%s migrate succeed", n)
			wg.Done()
		}(node, nodeInfo)
	}
	wg.Wait()
	// update znode info
	if update {
		var data []byte
		data, err = json.Marshal(cometNodeInfoMap[key])
		if err != nil {
			log.Error("json.Marshal() node:%s error(%v)", key, err)
			return
		}
		if _, err = conn.Set(znode, data, -1); err != nil {
			log.Error("conn.Set(\"%s\",\"%s\",\"-1\") error(%v)", znode, string(data), err)
			return
		}
	}

	// release lock
	if err = conn.Delete(migrateLockPath, -1); err != nil {
		log.Error("conn.Delete(\"%s\") error(%v)", migrateLockPath, err)
	}
	return
}
Ejemplo n.º 6
0
func zk_unreginster(path string, c *zk.Conn, exit chan os.Signal) {
	for sig := range exit {
		log.Warnf("zk_unreginster |  received ctrl+c(%v)\n", sig)
		err := c.Delete(path, -1)
		log.Infof("zk_unreginster | path :%+v", path)
		if err != nil {
			log.Warnf("zk_unreginster | Delete returned: %+v", err)
		}
		os.Exit(0)
	}

}
Ejemplo n.º 7
0
func bench(conn *zk.Conn, value []byte, ratio float64) {
	choice := rand.Float64()
	atomic.AddInt32(&reqCnt, 1)
	if choice >= ratio {
		// write
		conn.Delete("/_test_load_obliterator", 0)
		conn.Create("/_test_load_obliterator", value, 1, zk.WorldACL(zk.PermAll))
	} else {
		// read
		conn.Get("/_test_load_obliterator")
	}
}
Ejemplo n.º 8
0
func deleteIfExists(path string, conn *zk.Conn) error {
	_, stat, err := conn.Get(path)
	if err != nil && err != zk.ErrNoNode {
		return err
	}
	if err != zk.ErrNoNode {
		if err := conn.Delete(path, stat.Version); err != nil {
			return err
		}
	}
	return nil
}
Ejemplo n.º 9
0
func zkDelete(conn *zk.Conn, zpath string) error {
	if zpath != "" {
		_, stat, _ := conn.Get(zpath)
		err := conn.Delete(zpath, stat.Version)
		if err != nil {
			return err
		}

		index := strings.LastIndex(zpath, "/")
		if index != -1 {
			return zkDelete(conn, zpath[:index])
		} else {
			return nil
		}
	} else {
		return nil
	}
}
Ejemplo n.º 10
0
func zkDeleteChildren(conn *zk.Conn, path string) {
	children, _, _ := conn.Children(path)

	// Leaf
	if len(children) == 0 {
		fmt.Println("Deleting ", path)
		err := conn.Delete(path, -1)
		if err != nil {
			log.Panic(err)
		}
		return
	}

	// Branches
	for _, name := range children {
		zkDeleteChildren(conn, path+"/"+name)
	}

	return
}
Ejemplo n.º 11
0
func (zk *ZookeeperClusterRunner) deleteRecursively(client *zkClient.Conn, key string) {
	if key != "/zookeeper" {
		children, _, err := client.Children(key)
		Ω(err).ShouldNot(HaveOccured(), "Failed to fetch children for '%s'", key)

		for _, child := range children {
			childPath := key + "/" + child
			if key == "/" {
				childPath = key + child
			}
			zk.deleteRecursively(client, childPath)
		}

		if key != "/" {
			err = client.Delete(key, -1)
		}

		Ω(err).ShouldNot(HaveOccured(), "Failed to delete key '%s'", key)
	}

	return
}
Ejemplo n.º 12
0
func markTerminated(conn *zk.Conn, hss *zzk.HostServiceState) {
	ssPath := zzk.ServiceStatePath(hss.ServiceId, hss.ServiceStateId)
	_, stats, err := conn.Get(ssPath)
	if err != nil {
		glog.V(0).Infof("Unable to get service state %s for delete because: %v", ssPath, err)
		return
	}
	err = conn.Delete(ssPath, stats.Version)
	if err != nil {
		glog.V(0).Infof("Unable to delete service state %s because: %v", ssPath, err)
		return
	}

	hssPath := zzk.HostServiceStatePath(hss.HostId, hss.ServiceStateId)
	_, stats, err = conn.Get(hssPath)
	if err != nil {
		glog.V(0).Infof("Unable to get host service state %s for delete becaus: %v", hssPath, err)
		return
	}
	err = conn.Delete(hssPath, stats.Version)
	if err != nil {
		glog.V(0).Infof("Unable to delete host service state %s", hssPath)
	}
}
Ejemplo n.º 13
0
func DeleteNode(conn *zk.Conn, fpath string) error {
	log.Debug("delete znode %s", fpath)
	err := conn.Delete(fpath, -1)
	return err
}
Ejemplo n.º 14
0
						Key:   "/restaurant/menu/dinner",
						Value: []byte{},
						TTL:   0,
						Dir:   true,
					}))
				})

				It("deletes the expired entries", func() {
					_, _, err := client.Get("/restauraunt/menu/breakfast")
					Ω(err).Should(HaveOccured())
				})
			})

			Context("when the node is empty", func() {
				BeforeEach(func() {
					err := client.Delete("/restaurant/menu/dinner/first_course", -1)
					Ω(err).ShouldNot(HaveOccured())
				})

				It("should return an empty list without erroring", func() {
					nodes, err := adapter.List("/restaurant/menu/dinner")
					Ω(nodes).Should(HaveLen(0))
					Ω(err).ShouldNot(HaveOccured())
				})
			})
		})

		Context("when the node exists, but is not a directory", func() {
			It("should return an error", func() {
				nodes, err := adapter.List("/restaurant/menu/breakfast")
				Ω(nodes).Should(BeEmpty())
Ejemplo n.º 15
0
func Delete(conn *zk.Conn, zkConf conf.Zookeeper, appId string) error {
	path := concatPath(zkConf.Path, appId)
	return conn.Delete(path, -1)
}
Ejemplo n.º 16
0
func (a *HostAgent) processServiceState(conn *zk.Conn, shutdown <-chan int, done chan<- stateResult, ssId string) {
	procFinished := make(chan int, 1)
	var attached bool

	for {
		var hss zzk.HostServiceState
		hssStats, zkEvent, err := zzk.LoadHostServiceStateW(conn, a.hostId, ssId, &hss)
		if err != nil {
			errS := fmt.Sprintf("Unable to load host service state %s: %v", ssId, err)
			glog.Error(errS)
			done <- stateResult{ssId, errors.New(errS)}
			return
		}
		if len(hss.ServiceStateId) == 0 || len(hss.ServiceId) == 0 {
			errS := fmt.Sprintf("Service for %s is invalid", zzk.HostServiceStatePath(a.hostId, ssId))
			glog.Error(errS)
			done <- stateResult{ssId, errors.New(errS)}
			return
		}

		var ss dao.ServiceState
		ssStats, err := zzk.LoadServiceState(conn, hss.ServiceId, hss.ServiceStateId, &ss)
		if err != nil {
			errS := fmt.Sprintf("Host service state unable to load service state %s", ssId)
			glog.Error(errS)
			// This goroutine is watching a node for a service state that does not
			// exist or could not be loaded. We should *probably* delete this node.
			hssPath := zzk.HostServiceStatePath(a.hostId, ssId)
			err = conn.Delete(hssPath, hssStats.Version)
			if err != nil {
				glog.Warningf("Unable to delete host service state %s", hssPath)
			}
			done <- stateResult{ssId, errors.New(errS)}
			return
		}

		var service dao.Service
		_, err = zzk.LoadService(conn, ss.ServiceId, &service)
		if err != nil {
			errS := fmt.Sprintf("Host service state unable to load service %s", ss.ServiceId)
			glog.Errorf(errS)
			done <- stateResult{ssId, errors.New(errS)}
			return
		}

		glog.V(1).Infof("Processing %s, desired state: %d", service.Name, hss.DesiredState)

		switch {

		case hss.DesiredState == dao.SVC_STOP:
			// This node is marked for death
			glog.V(1).Infof("Service %s was marked for death, quitting", service.Name)
			if attached {
				err = a.terminateAttached(conn, procFinished, &ss)
			} else {
				err = a.terminateInstance(conn, &ss)
			}
			done <- stateResult{ssId, err}
			return

		case attached:
			// Something uninteresting happened. Why are we here?
			glog.V(1).Infof("Service %s is attached in a child goroutine", service.Name)

		case hss.DesiredState == dao.SVC_RUN &&
			ss.Started.Year() <= 1 || ss.Terminated.Year() > 2:
			// Should run, and either not started or process died
			glog.V(1).Infof("Service %s does not appear to be running; starting", service.Name)
			attached, err = a.startService(conn, procFinished, ssStats, &service, &ss)

		case ss.Started.Year() > 1 && ss.Terminated.Year() <= 1:
			// Service superficially seems to be running. We need to attach
			glog.V(1).Infof("Service %s appears to be running; attaching", service.Name)
			attached, err = a.attachToService(conn, procFinished, &ss, &hss)

		default:
			glog.V(0).Infof("Unhandled service %s", service.Name)
		}

		if !attached || err != nil {
			errS := fmt.Sprintf("Service state %s unable to start or attach to process", ssId)
			glog.V(1).Info(errS)
			a.terminateInstance(conn, &ss)
			done <- stateResult{ssId, errors.New(errS)}
			return
		}

		glog.V(3).Infoln("Successfully processed state for %s", service.Name)

		select {

		case <-shutdown:
			glog.V(0).Info("Agent goroutine will stop watching ", ssId)
			err = a.terminateAttached(conn, procFinished, &ss)
			if err != nil {
				glog.Errorf("Error terminating %s: %v", service.Name, err)
			}
			done <- stateResult{ssId, err}
			return

		case <-procFinished:
			glog.V(1).Infof("Process finished %s", ssId)
			attached = false
			continue

		case evt := <-zkEvent:
			if evt.Type == zk.EventNodeDeleted {
				glog.V(0).Info("Host service state deleted: ", ssId)
				err = a.terminateAttached(conn, procFinished, &ss)
				if err != nil {
					glog.Errorf("Error terminating %s: %v", service.Name, err)
				}
				done <- stateResult{ssId, err}
				return
			}

			glog.V(1).Infof("Host service state %s received event %v", ssId, evt)
			continue
		}
	}
}
Ejemplo n.º 17
0
func (z *Zookeeper) deleteNode(conn *zk.Conn, keyPath string) error {
	log.Debugf("Deleting zk://%s", keyPath)
	return conn.Delete(keyPath, -1)
}