Exemple #1
0
func setupDumpStackTrap(root string) {
	// Windows does not support signals like *nix systems. So instead of
	// trapping on SIGUSR1 to dump stacks, we wait on a Win32 event to be
	// signaled. ACL'd to builtin administrators and local system
	ev := "Global\\docker-daemon-" + fmt.Sprint(os.Getpid())
	sd, err := winio.SddlToSecurityDescriptor("D:P(A;;GA;;;BA)(A;;GA;;;SY)")
	if err != nil {
		logrus.Errorf("failed to get security descriptor for debug stackdump event %s: %s", ev, err.Error())
		return
	}
	var sa syscall.SecurityAttributes
	sa.Length = uint32(unsafe.Sizeof(sa))
	sa.InheritHandle = 1
	sa.SecurityDescriptor = uintptr(unsafe.Pointer(&sd[0]))
	h, err := system.CreateEvent(&sa, false, false, ev)
	if h == 0 || err != nil {
		logrus.Errorf("failed to create debug stackdump event %s: %s", ev, err.Error())
		return
	}
	go func() {
		logrus.Debugf("Stackdump - waiting signal at %s", ev)
		for {
			syscall.WaitForSingleObject(h, syscall.INFINITE)
			signal.DumpStacks(root)
		}
	}()
}
Exemple #2
0
// Cleanup stops active swarm node. This is run before daemon shutdown.
func (c *Cluster) Cleanup() {
	c.controlMutex.Lock()
	defer c.controlMutex.Unlock()

	c.mu.Lock()
	node := c.nr
	if node == nil {
		c.mu.Unlock()
		return
	}
	defer c.mu.Unlock()
	state := c.currentNodeState()
	if state.IsActiveManager() {
		active, reachable, unreachable, err := managerStats(state.controlClient, state.NodeID())
		if err == nil {
			singlenode := active && isLastManager(reachable, unreachable)
			if active && !singlenode && removingManagerCausesLossOfQuorum(reachable, unreachable) {
				logrus.Errorf("Leaving cluster with %v managers left out of %v. Raft quorum will be lost.", reachable-1, reachable+unreachable)
			}
		}
	}
	if err := node.Stop(); err != nil {
		logrus.Errorf("failed to shut down cluster node: %v", err)
		signal.DumpStacks("")
	}
	c.nr = nil
}
Exemple #3
0
func setupDumpStackTrap() {
	c := make(chan os.Signal, 1)
	signal.Notify(c, syscall.SIGUSR1)
	go func() {
		for range c {
			psignal.DumpStacks()
		}
	}()
}
Exemple #4
0
// Leave shuts down Cluster and removes current state.
func (c *Cluster) Leave(force bool) error {
	c.Lock()
	node := c.node
	if node == nil {
		c.Unlock()
		return ErrNoSwarm
	}

	if node.Manager() != nil && !force {
		msg := "You are attempting to leave the swarm on a node that is participating as a manager. "
		if c.isActiveManager() {
			active, reachable, unreachable, err := c.managerStats()
			if err == nil {
				if active && removingManagerCausesLossOfQuorum(reachable, unreachable) {
					if isLastManager(reachable, unreachable) {
						msg += "Removing the last manager erases all current state of the swarm. Use `--force` to ignore this message. "
						c.Unlock()
						return fmt.Errorf(msg)
					}
					msg += fmt.Sprintf("Removing this node leaves %v managers out of %v. Without a Raft quorum your swarm will be inaccessible. ", reachable-1, reachable+unreachable)
				}
			}
		} else {
			msg += "Doing so may lose the consensus of your cluster. "
		}

		msg += "The only way to restore a swarm that has lost consensus is to reinitialize it with `--force-new-cluster`. Use `--force` to suppress this message."
		c.Unlock()
		return fmt.Errorf(msg)
	}
	if err := c.stopNode(); err != nil {
		logrus.Errorf("failed to shut down cluster node: %v", err)
		signal.DumpStacks("")
		c.Unlock()
		return err
	}
	c.Unlock()
	if nodeID := node.NodeID(); nodeID != "" {
		nodeContainers, err := c.listContainerForNode(nodeID)
		if err != nil {
			return err
		}
		for _, id := range nodeContainers {
			if err := c.config.Backend.ContainerRm(id, &apitypes.ContainerRmConfig{ForceRemove: true}); err != nil {
				logrus.Errorf("error removing %v: %v", id, err)
			}
		}
	}
	c.configEvent <- struct{}{}
	// todo: cleanup optional?
	if err := c.clearState(); err != nil {
		return err
	}
	return nil
}
Exemple #5
0
// Copied over from docker/daemon/debugtrap_windows.go
func setupDumpStackTrap() {
	go func() {
		sa := syscall.SecurityAttributes{
			Length: 0,
		}
		ev := "Global\\docker-daemon-" + fmt.Sprint(os.Getpid())
		if h, _ := system.CreateEvent(&sa, false, false, ev); h != 0 {
			logrus.Debugf("Stackdump - waiting signal at %s", ev)
			for {
				syscall.WaitForSingleObject(h, syscall.INFINITE)
				signal.DumpStacks()
			}
		}
	}()
}
func setupDumpStackTrap() {
	// Windows does not support signals like *nix systems. So instead of
	// trapping on SIGUSR1 to dump stacks, we wait on a Win32 event to be
	// signaled.
	go func() {
		sa := syscall.SecurityAttributes{
			Length: 0,
		}
		ev := "Global\\docker-daemon-" + fmt.Sprint(os.Getpid())
		if h, _ := system.CreateEvent(&sa, false, false, ev); h != 0 {
			logrus.Debugf("Stackdump - waiting signal at %s", ev)
			for {
				syscall.WaitForSingleObject(h, syscall.INFINITE)
				psignal.DumpStacks()
			}
		}
	}()
}
Exemple #7
0
func (d *Daemon) setupDumpStackTrap(root string) {
	c := make(chan os.Signal, 1)
	signal.Notify(c, syscall.SIGUSR1)
	go func() {
		for range c {
			path, err := stackdump.DumpStacks(root)
			if err != nil {
				logrus.WithError(err).Error("failed to write goroutines dump")
			} else {
				logrus.Infof("goroutine stacks written to %s", path)
			}
			path, err = d.dumpDaemon(root)
			if err != nil {
				logrus.WithError(err).Error("failed to write daemon datastructure dump")
			} else {
				logrus.Infof("daemon datastructure dump written to %s", path)
			}
		}
	}()
}
Exemple #8
0
// Leave shuts down Cluster and removes current state.
func (c *Cluster) Leave(force bool) error {
	c.controlMutex.Lock()
	defer c.controlMutex.Unlock()

	c.mu.Lock()
	nr := c.nr
	if nr == nil {
		c.mu.Unlock()
		return errNoSwarm
	}

	state := c.currentNodeState()

	if errors.Cause(state.err) == errSwarmLocked && !force {
		// leave a locked swarm without --force is not allowed
		c.mu.Unlock()
		return errors.New("Swarm is encrypted and locked. Please unlock it first or use `--force` to ignore this message.")
	}

	if state.IsManager() && !force {
		msg := "You are attempting to leave the swarm on a node that is participating as a manager. "
		if state.IsActiveManager() {
			active, reachable, unreachable, err := managerStats(state.controlClient, state.NodeID())
			if err == nil {
				if active && removingManagerCausesLossOfQuorum(reachable, unreachable) {
					if isLastManager(reachable, unreachable) {
						msg += "Removing the last manager erases all current state of the swarm. Use `--force` to ignore this message. "
						c.mu.Unlock()
						return errors.New(msg)
					}
					msg += fmt.Sprintf("Removing this node leaves %v managers out of %v. Without a Raft quorum your swarm will be inaccessible. ", reachable-1, reachable+unreachable)
				}
			}
		} else {
			msg += "Doing so may lose the consensus of your cluster. "
		}

		msg += "The only way to restore a swarm that has lost consensus is to reinitialize it with `--force-new-cluster`. Use `--force` to suppress this message."
		c.mu.Unlock()
		return errors.New(msg)
	}
	// release readers in here
	if err := nr.Stop(); err != nil {
		logrus.Errorf("failed to shut down cluster node: %v", err)
		signal.DumpStacks("")
		c.mu.Unlock()
		return err
	}
	c.nr = nil
	c.mu.Unlock()
	if nodeID := state.NodeID(); nodeID != "" {
		nodeContainers, err := c.listContainerForNode(nodeID)
		if err != nil {
			return err
		}
		for _, id := range nodeContainers {
			if err := c.config.Backend.ContainerRm(id, &apitypes.ContainerRmConfig{ForceRemove: true}); err != nil {
				logrus.Errorf("error removing %v: %v", id, err)
			}
		}
	}

	c.configEvent <- struct{}{}
	// todo: cleanup optional?
	if err := clearPersistentState(c.root); err != nil {
		return err
	}
	c.config.Backend.SetClusterProvider(nil)
	return nil
}