示例#1
0
// Snapshot performs a writable snapshot on the subvolume
func (c *RsyncConn) Snapshot(label string) (err error) {
	c.Lock()
	defer c.Unlock()
	dest := c.SnapshotPath(label)
	if exists, err := volume.IsDir(dest); exists || err != nil {
		if exists {
			return fmt.Errorf("snapshot %s already exists", label)
		}
		return err
	}

	exe, err := exec.LookPath("rsync")
	if err != nil {
		return err
	}
	argv := []string{"-a", c.Path() + "/", dest + "/"}
	glog.Infof("Performing snapshot rsync command: %s %s", exe, argv)

	var output []byte
	for i := 0; i < 3; i++ {
		rsync := exec.Command(exe, argv...)
		done := make(chan interface{})
		go func() {
			defer close(done)
			output, err = rsync.CombinedOutput()
		}()

		select {
		case <-time.After(c.timeout):
			glog.V(2).Infof("Received signal to kill rsync")
			rsync.Process.Kill()
			<-done
		case <-done:
		}
		if err == nil {
			return nil
		}
		if exitStatus, ok := utils.GetExitStatus(err); !ok || exitStatus != 24 {
			glog.Errorf("Could not perform rsync: %s", string(output))
			return err
		}
		glog.Infof("trying snapshot again: %s", label)
	}
	if exitStatus, _ := utils.GetExitStatus(err); exitStatus == 24 {
		glog.Warningf("snapshot completed with errors: Partial transfer due to vanished source files")
		return nil
	}
	glog.Errorf("Could not perform rsync: %s", string(output))
	return err
}
示例#2
0
func (e *Executor) Exec(cfg *ProcessConfig) (p *ProcessInstance) {
	p = &ProcessInstance{
		Stdin:  make(chan byte, 1024),
		Stdout: make(chan byte, 1024),
		Stderr: make(chan byte, 1024),
		Result: make(chan Result, 2),
	}

	cmd, err := StartDocker(cfg, e.port)
	if err != nil {
		p.Result <- Result{0, err.Error(), ABNORMAL}
		return
	}

	cmd.Stdin = ShellReader{p.Stdin}
	cmd.Stdout = ShellWriter{p.Stdout}
	cmd.Stderr = ShellWriter{p.Stderr}

	go func() {
		defer p.Close()
		err := cmd.Run()
		if exitcode, ok := utils.GetExitStatus(err); !ok {
			p.Result <- Result{exitcode, err.Error(), ABNORMAL}
		} else if exitcode == 0 {
			p.Result <- Result{exitcode, "", NORMAL}
		} else {
			p.Result <- Result{exitcode, err.Error(), NORMAL}
		}
	}()

	return
}
示例#3
0
func (d *NFSDriver) Mount(remotePath, localPath string, timeout time.Duration) error {
	glog.Infof("Mounting %s -> %s", remotePath, localPath)
	cmd := commandFactory("mount.nfs4", "-o", "intr", remotePath, localPath)
	errC := make(chan error, 1)
	go func() {
		output, err := cmd.CombinedOutput()
		glog.V(1).Infof("Mounting %s -> %s: %s (%s)", remotePath, localPath, string(output), err)
		if exitCode, ok := utils.GetExitStatus(err); exitCode == 32 || !ok {
			errC <- fmt.Errorf("%s (%s)", string(output), err)
		} else {
			errC <- nil
		}
	}()

	select {
	case <-time.After(timeout):
		err := fmt.Errorf("timeout waiting for nfs mount")
		if execCmd, ok := cmd.(*exec.Cmd); ok {
			execCmd.Process.Kill()
		}
		return err
	case err := <-errC:
		return err
	}
}
示例#4
0
func isNFSMountStale(mountpoint string) bool {
	if err := exec.Command("/bin/bash", "-c", fmt.Sprintf("read -t1 < <(stat -t '%s' 2>&-)", mountpoint)).Run(); err != nil {
		if status, iscode := utils.GetExitStatus(err); iscode && status == 142 {
			// EREMDEV; wait for NFS to come back
			glog.Infof("Distributed storage temporarily unavailable. Waiting for it to return.")
			return false

		}
		glog.Errorf("Mount point %s check had error (%s); considering stale", mountpoint, err)
		return true
	}
	return false
}
示例#5
0
// bindMountImp performs a bind mount of src to dst.
func bindMountImp(src, dst string) error {

	if mounted, err := isBindMounted(dst); err != nil || mounted {
		return err
	}
	runMountCommand := func(options ...string) error {
		cmd, args := mntArgs(src, dst, "", options...)
		mount := exec.Command(cmd, args...)
		return mount.Run()
	}
	returnErr := runMountCommand("bind")
	if returnErr != nil {
		// If the mount fails, it could be due to a stale NFS handle, signalled
		// by a return code of 32. Stale handle can occur if e.g., the source
		// directory has been deleted and restored (a common occurrence in the
		// dev workflow) Try again, with remount option.
		if exitcode, ok := utils.GetExitStatus(returnErr); ok && (exitcode&32) != 0 {
			returnErr = runMountCommand("bind", "remount")
		}
	}
	return returnErr
}
示例#6
0
// RunShell runs a predefined service shell command via the service definition
func (a *api) RunShell(config ShellConfig) error {
	client, err := a.connectDAO()
	if err != nil {
		return err
	}

	svc, err := a.GetService(config.ServiceID)
	if err != nil {
		return err
	}

	getSvc := func(svcID string) (service.Service, error) {
		s := service.Service{}
		err := client.GetService(svcID, &s)
		return s, err
	}

	findChild := func(svcID, childName string) (service.Service, error) {
		s := service.Service{}
		err := client.FindChildService(dao.FindChildRequest{svcID, childName}, &s)
		return s, err
	}

	if err := svc.EvaluateRunsTemplate(getSvc, findChild); err != nil {
		fmt.Errorf("error evaluating service:%s Runs:%+v  error:%s", svc.ID, svc.Runs, err)
	}
	command, ok := svc.Runs[config.Command]
	if !ok {
		return fmt.Errorf("command not found for service")
	}
	mounts, err := buildMounts(config.ServicedEndpoint, config.ServiceID, config.Mounts)
	if err != nil {
		return err
	}

	quotedArgs := utils.ShellQuoteArgs(config.Args)
	command = strings.Join([]string{command, quotedArgs}, " ")

	asUser := "******"
	if config.Username != "" && config.Username != "root" {
		asUser = fmt.Sprintf("su - %s -c ", config.Username)
	}

	cfg := shell.ProcessConfig{
		ServiceID:   config.ServiceID,
		IsTTY:       config.IsTTY,
		SaveAs:      config.SaveAs,
		Mount:       mounts,
		Command:     asUser + utils.ShellQuoteArg(command),
		LogToStderr: config.LogToStderr,
	}

	cfg.LogStash.Enable = config.LogStash.Enable
	cfg.LogStash.SettleTime, err = time.ParseDuration(config.LogStash.SettleTime)
	if err != nil {
		return err
	}

	cfg.LogStash.IdleFlushTime, err = time.ParseDuration(config.LogStash.IdleFlushTime)
	if err != nil {
		return err
	}

	// TODO: change me to use sockets
	cmd, err := shell.StartDocker(&cfg, options.Endpoint)
	if err != nil {
		return fmt.Errorf("failed to connect to service: %s", err)
	}

	cmd.Stdin = os.Stdin
	cmd.Stdout = os.Stdout
	cmd.Stderr = os.Stderr

	err = cmd.Run()
	if _, ok := utils.GetExitStatus(err); !ok {
		glog.Fatalf("abnormal termination from shell command: %s", err)
	}

	dockercli, err := a.connectDocker()
	if err != nil {
		glog.Fatalf("unable to connect to the docker service: %s", err)
	}
	exitcode, err := dockercli.WaitContainer(config.SaveAs)
	if err != nil {
		glog.Fatalf("failure waiting for container: %s", err)
	}
	container, err := dockercli.InspectContainer(config.SaveAs)
	if err != nil {
		glog.Fatalf("cannot acquire information about container: %s (%s)", config.SaveAs, err)
	}
	glog.V(2).Infof("Container ID: %s", container.ID)

	switch exitcode {
	case 0:
		// Commit the container
		label := ""
		glog.V(0).Infof("Committing container")
		if err := client.Commit(container.ID, &label); err != nil {
			glog.Fatalf("Error committing container: %s (%s)", container.ID, err)
		}
		var layers = 0
		if err := client.ImageLayerCount(container.Image, &layers); err != nil {
			glog.Errorf("Counting layers for image %s", svc.ImageID)
		}
		if layers > layer.WARN_LAYER_COUNT {
			glog.Warningf("Image '%s' number of layers (%d) approaching maximum (%d).  Please squash image layers.",
				svc.ImageID, layers, layer.MAX_LAYER_COUNT)
		}
	default:
		// Delete the container

		if err := dockercli.StopContainer(container.ID, 10); err != nil {
			glog.Fatalf("failed to stop container: %s (%s)", container.ID, err)
		} else if err := dockercli.RemoveContainer(dockerclient.RemoveContainerOptions{ID: container.ID}); err != nil {
			glog.Fatalf("failed to remove container: %s (%s)", container.ID, err)
		}
		return fmt.Errorf("Command returned non-zero exit code %d.  Container not commited.", exitcode)
	}

	return nil
}
示例#7
0
// Run executes the controller's main loop and block until the service exits
// according to it's restart policy or Close() is called.
func (c *Controller) Run() (err error) {
	defer c.shutdown()
	sigc := make(chan os.Signal, 1)
	signal.Notify(sigc,
		syscall.SIGINT,
		syscall.SIGTERM,
		syscall.SIGQUIT)

	env := os.Environ()
	env = append(env, "CONTROLPLANE=1")
	env = append(env, fmt.Sprintf("CONTROLPLANE_CONSUMER_URL=http://localhost%s/api/metrics/store", c.options.Metric.Address))
	env = append(env, fmt.Sprintf("CONTROLPLANE_HOST_ID=%s", c.hostID))
	env = append(env, fmt.Sprintf("CONTROLPLANE_TENANT_ID=%s", c.tenantID))
	env = append(env, fmt.Sprintf("CONTROLPLANE_INSTANCE_ID=%s", c.options.Service.InstanceID))
	env = append(env, fmt.Sprintf("CONTROLPLANE_SERVICED_ID=%s", c.options.Service.ID))

	if err := writeEnvFile(env); err != nil {
		return err
	}

	args := []string{"-c", "exec " + strings.Join(c.options.Service.Command, " ")}

	startService := func() (*subprocess.Instance, chan error) {
		service, serviceExited, _ := subprocess.New(time.Second*10, env, "/bin/sh", args...)
		return service, serviceExited
	}

	sendSignal := func(service *subprocess.Instance, sig os.Signal) bool {
		switch {
		case c.PIDFile != "":
			c.forwardSignal(sig)
		case service != nil:
			service.Notify(sig)
		default:
			return false
		}
		return true
	}

	rpcDead, err := c.rpcHealthCheck()
	if err != nil {
		glog.Error("Could not setup RPC ping check: %s", err)
		return err
	}

	storageDead, err := c.storageHealthCheck()
	if err != nil {
		glog.Errorf("Could not set up storage check: %s", err)
		return err
	}

	prereqsPassed := make(chan bool)
	var startAfter <-chan time.Time
	var exitAfter <-chan time.Time
	var service *subprocess.Instance = nil
	serviceExited := make(chan error, 1)
	c.watchRemotePorts()
	if err := c.handleControlCenterImports(rpcDead); err != nil {
		glog.Error("Could not setup Control Center specific imports: ", err)
		return err
	}
	go c.checkPrereqs(prereqsPassed, rpcDead)
	go c.reapZombies(rpcDead)
	healthExit := make(chan struct{})
	defer close(healthExit)
	c.kickOffHealthChecks(healthExit)
	doRegisterEndpoints := true
	exited := false

	var shutdownService = func(service *subprocess.Instance, sig os.Signal) {
		c.options.Service.Autorestart = false
		if sendSignal(service, sig) {
			sigc = nil
			prereqsPassed = nil
			startAfter = nil
			rpcDead = nil
			exitAfter = time.After(time.Second * 30)
			close(healthExit)
		} else {
			c.exitStatus = 1
			exited = true
		}
	}

	var reregister <-chan struct{}

	for !exited {
		select {
		case sig := <-sigc:
			glog.Infof("Notifying subprocess of signal %v", sig)
			shutdownService(service, sig)

		case <-exitAfter:
			glog.Infof("Killing unresponsive subprocess")
			sendSignal(service, syscall.SIGKILL)
			c.exitStatus = 1
			exited = true

		case <-prereqsPassed:
			startAfter = time.After(time.Millisecond * 1)
			prereqsPassed = nil

		case exitError := <-serviceExited:
			if !c.options.Service.Autorestart {
				exitStatus, _ := utils.GetExitStatus(exitError)
				if c.options.Logforwarder.Enabled {
					time.Sleep(c.options.Logforwarder.SettleTime)
				}
				glog.Infof("Service Exited with status:%d due to %+v", exitStatus, exitError)
				//set loop to end
				exited = true
				//exit with exit code, defer so that other cleanup can happen
				c.exitStatus = exitStatus

			} else {
				glog.Infof("Restarting service process in 10 seconds.")
				service = nil
				startAfter = time.After(time.Second * 10)
			}

		case <-startAfter:
			glog.Infof("Starting service process.")
			service, serviceExited = startService()
			if doRegisterEndpoints {
				reregister = registerExportedEndpoints(c, rpcDead)
				doRegisterEndpoints = false
			}
			startAfter = nil
		case <-reregister:
			reregister = registerExportedEndpoints(c, rpcDead)
		case <-rpcDead:
			glog.Infof("RPC Server has gone away, cleaning up")
			shutdownService(service, syscall.SIGTERM)
		case <-storageDead:
			glog.Infof("Distributed storage for service %s has gone away; shutting down", c.options.Service.ID)
			shutdownService(service, syscall.SIGTERM)
		}
	}
	// Signal to health check registry that this instance is giving up the ghost.
	client, err := node.NewLBClient(c.options.ServicedEndpoint)
	if err != nil {
		glog.Errorf("Could not create a client to endpoint: %s, %s", c.options.ServicedEndpoint, err)
		return nil
	}
	defer client.Close()
	c.Close()
	var unused int
	client.LogHealthCheck(domain.HealthCheckResult{c.options.Service.ID, c.options.Service.InstanceID, "__instance_shutdown", time.Now().String(), "passed"}, &unused)
	return nil
}