// Snapshot performs a writable snapshot on the subvolume func (c *RsyncConn) Snapshot(label string) (err error) { c.Lock() defer c.Unlock() dest := c.SnapshotPath(label) if exists, err := volume.IsDir(dest); exists || err != nil { if exists { return fmt.Errorf("snapshot %s already exists", label) } return err } exe, err := exec.LookPath("rsync") if err != nil { return err } argv := []string{"-a", c.Path() + "/", dest + "/"} glog.Infof("Performing snapshot rsync command: %s %s", exe, argv) var output []byte for i := 0; i < 3; i++ { rsync := exec.Command(exe, argv...) done := make(chan interface{}) go func() { defer close(done) output, err = rsync.CombinedOutput() }() select { case <-time.After(c.timeout): glog.V(2).Infof("Received signal to kill rsync") rsync.Process.Kill() <-done case <-done: } if err == nil { return nil } if exitStatus, ok := utils.GetExitStatus(err); !ok || exitStatus != 24 { glog.Errorf("Could not perform rsync: %s", string(output)) return err } glog.Infof("trying snapshot again: %s", label) } if exitStatus, _ := utils.GetExitStatus(err); exitStatus == 24 { glog.Warningf("snapshot completed with errors: Partial transfer due to vanished source files") return nil } glog.Errorf("Could not perform rsync: %s", string(output)) return err }
func (e *Executor) Exec(cfg *ProcessConfig) (p *ProcessInstance) { p = &ProcessInstance{ Stdin: make(chan byte, 1024), Stdout: make(chan byte, 1024), Stderr: make(chan byte, 1024), Result: make(chan Result, 2), } cmd, err := StartDocker(cfg, e.port) if err != nil { p.Result <- Result{0, err.Error(), ABNORMAL} return } cmd.Stdin = ShellReader{p.Stdin} cmd.Stdout = ShellWriter{p.Stdout} cmd.Stderr = ShellWriter{p.Stderr} go func() { defer p.Close() err := cmd.Run() if exitcode, ok := utils.GetExitStatus(err); !ok { p.Result <- Result{exitcode, err.Error(), ABNORMAL} } else if exitcode == 0 { p.Result <- Result{exitcode, "", NORMAL} } else { p.Result <- Result{exitcode, err.Error(), NORMAL} } }() return }
func (d *NFSDriver) Mount(remotePath, localPath string, timeout time.Duration) error { glog.Infof("Mounting %s -> %s", remotePath, localPath) cmd := commandFactory("mount.nfs4", "-o", "intr", remotePath, localPath) errC := make(chan error, 1) go func() { output, err := cmd.CombinedOutput() glog.V(1).Infof("Mounting %s -> %s: %s (%s)", remotePath, localPath, string(output), err) if exitCode, ok := utils.GetExitStatus(err); exitCode == 32 || !ok { errC <- fmt.Errorf("%s (%s)", string(output), err) } else { errC <- nil } }() select { case <-time.After(timeout): err := fmt.Errorf("timeout waiting for nfs mount") if execCmd, ok := cmd.(*exec.Cmd); ok { execCmd.Process.Kill() } return err case err := <-errC: return err } }
func isNFSMountStale(mountpoint string) bool { if err := exec.Command("/bin/bash", "-c", fmt.Sprintf("read -t1 < <(stat -t '%s' 2>&-)", mountpoint)).Run(); err != nil { if status, iscode := utils.GetExitStatus(err); iscode && status == 142 { // EREMDEV; wait for NFS to come back glog.Infof("Distributed storage temporarily unavailable. Waiting for it to return.") return false } glog.Errorf("Mount point %s check had error (%s); considering stale", mountpoint, err) return true } return false }
// bindMountImp performs a bind mount of src to dst. func bindMountImp(src, dst string) error { if mounted, err := isBindMounted(dst); err != nil || mounted { return err } runMountCommand := func(options ...string) error { cmd, args := mntArgs(src, dst, "", options...) mount := exec.Command(cmd, args...) return mount.Run() } returnErr := runMountCommand("bind") if returnErr != nil { // If the mount fails, it could be due to a stale NFS handle, signalled // by a return code of 32. Stale handle can occur if e.g., the source // directory has been deleted and restored (a common occurrence in the // dev workflow) Try again, with remount option. if exitcode, ok := utils.GetExitStatus(returnErr); ok && (exitcode&32) != 0 { returnErr = runMountCommand("bind", "remount") } } return returnErr }
// RunShell runs a predefined service shell command via the service definition func (a *api) RunShell(config ShellConfig) error { client, err := a.connectDAO() if err != nil { return err } svc, err := a.GetService(config.ServiceID) if err != nil { return err } getSvc := func(svcID string) (service.Service, error) { s := service.Service{} err := client.GetService(svcID, &s) return s, err } findChild := func(svcID, childName string) (service.Service, error) { s := service.Service{} err := client.FindChildService(dao.FindChildRequest{svcID, childName}, &s) return s, err } if err := svc.EvaluateRunsTemplate(getSvc, findChild); err != nil { fmt.Errorf("error evaluating service:%s Runs:%+v error:%s", svc.ID, svc.Runs, err) } command, ok := svc.Runs[config.Command] if !ok { return fmt.Errorf("command not found for service") } mounts, err := buildMounts(config.ServicedEndpoint, config.ServiceID, config.Mounts) if err != nil { return err } quotedArgs := utils.ShellQuoteArgs(config.Args) command = strings.Join([]string{command, quotedArgs}, " ") asUser := "******" if config.Username != "" && config.Username != "root" { asUser = fmt.Sprintf("su - %s -c ", config.Username) } cfg := shell.ProcessConfig{ ServiceID: config.ServiceID, IsTTY: config.IsTTY, SaveAs: config.SaveAs, Mount: mounts, Command: asUser + utils.ShellQuoteArg(command), LogToStderr: config.LogToStderr, } cfg.LogStash.Enable = config.LogStash.Enable cfg.LogStash.SettleTime, err = time.ParseDuration(config.LogStash.SettleTime) if err != nil { return err } cfg.LogStash.IdleFlushTime, err = time.ParseDuration(config.LogStash.IdleFlushTime) if err != nil { return err } // TODO: change me to use sockets cmd, err := shell.StartDocker(&cfg, options.Endpoint) if err != nil { return fmt.Errorf("failed to connect to service: %s", err) } cmd.Stdin = os.Stdin cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr err = cmd.Run() if _, ok := utils.GetExitStatus(err); !ok { glog.Fatalf("abnormal termination from shell command: %s", err) } dockercli, err := a.connectDocker() if err != nil { glog.Fatalf("unable to connect to the docker service: %s", err) } exitcode, err := dockercli.WaitContainer(config.SaveAs) if err != nil { glog.Fatalf("failure waiting for container: %s", err) } container, err := dockercli.InspectContainer(config.SaveAs) if err != nil { glog.Fatalf("cannot acquire information about container: %s (%s)", config.SaveAs, err) } glog.V(2).Infof("Container ID: %s", container.ID) switch exitcode { case 0: // Commit the container label := "" glog.V(0).Infof("Committing container") if err := client.Commit(container.ID, &label); err != nil { glog.Fatalf("Error committing container: %s (%s)", container.ID, err) } var layers = 0 if err := client.ImageLayerCount(container.Image, &layers); err != nil { glog.Errorf("Counting layers for image %s", svc.ImageID) } if layers > layer.WARN_LAYER_COUNT { glog.Warningf("Image '%s' number of layers (%d) approaching maximum (%d). Please squash image layers.", svc.ImageID, layers, layer.MAX_LAYER_COUNT) } default: // Delete the container if err := dockercli.StopContainer(container.ID, 10); err != nil { glog.Fatalf("failed to stop container: %s (%s)", container.ID, err) } else if err := dockercli.RemoveContainer(dockerclient.RemoveContainerOptions{ID: container.ID}); err != nil { glog.Fatalf("failed to remove container: %s (%s)", container.ID, err) } return fmt.Errorf("Command returned non-zero exit code %d. Container not commited.", exitcode) } return nil }
// Run executes the controller's main loop and block until the service exits // according to it's restart policy or Close() is called. func (c *Controller) Run() (err error) { defer c.shutdown() sigc := make(chan os.Signal, 1) signal.Notify(sigc, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) env := os.Environ() env = append(env, "CONTROLPLANE=1") env = append(env, fmt.Sprintf("CONTROLPLANE_CONSUMER_URL=http://localhost%s/api/metrics/store", c.options.Metric.Address)) env = append(env, fmt.Sprintf("CONTROLPLANE_HOST_ID=%s", c.hostID)) env = append(env, fmt.Sprintf("CONTROLPLANE_TENANT_ID=%s", c.tenantID)) env = append(env, fmt.Sprintf("CONTROLPLANE_INSTANCE_ID=%s", c.options.Service.InstanceID)) env = append(env, fmt.Sprintf("CONTROLPLANE_SERVICED_ID=%s", c.options.Service.ID)) if err := writeEnvFile(env); err != nil { return err } args := []string{"-c", "exec " + strings.Join(c.options.Service.Command, " ")} startService := func() (*subprocess.Instance, chan error) { service, serviceExited, _ := subprocess.New(time.Second*10, env, "/bin/sh", args...) return service, serviceExited } sendSignal := func(service *subprocess.Instance, sig os.Signal) bool { switch { case c.PIDFile != "": c.forwardSignal(sig) case service != nil: service.Notify(sig) default: return false } return true } rpcDead, err := c.rpcHealthCheck() if err != nil { glog.Error("Could not setup RPC ping check: %s", err) return err } storageDead, err := c.storageHealthCheck() if err != nil { glog.Errorf("Could not set up storage check: %s", err) return err } prereqsPassed := make(chan bool) var startAfter <-chan time.Time var exitAfter <-chan time.Time var service *subprocess.Instance = nil serviceExited := make(chan error, 1) c.watchRemotePorts() if err := c.handleControlCenterImports(rpcDead); err != nil { glog.Error("Could not setup Control Center specific imports: ", err) return err } go c.checkPrereqs(prereqsPassed, rpcDead) go c.reapZombies(rpcDead) healthExit := make(chan struct{}) defer close(healthExit) c.kickOffHealthChecks(healthExit) doRegisterEndpoints := true exited := false var shutdownService = func(service *subprocess.Instance, sig os.Signal) { c.options.Service.Autorestart = false if sendSignal(service, sig) { sigc = nil prereqsPassed = nil startAfter = nil rpcDead = nil exitAfter = time.After(time.Second * 30) close(healthExit) } else { c.exitStatus = 1 exited = true } } var reregister <-chan struct{} for !exited { select { case sig := <-sigc: glog.Infof("Notifying subprocess of signal %v", sig) shutdownService(service, sig) case <-exitAfter: glog.Infof("Killing unresponsive subprocess") sendSignal(service, syscall.SIGKILL) c.exitStatus = 1 exited = true case <-prereqsPassed: startAfter = time.After(time.Millisecond * 1) prereqsPassed = nil case exitError := <-serviceExited: if !c.options.Service.Autorestart { exitStatus, _ := utils.GetExitStatus(exitError) if c.options.Logforwarder.Enabled { time.Sleep(c.options.Logforwarder.SettleTime) } glog.Infof("Service Exited with status:%d due to %+v", exitStatus, exitError) //set loop to end exited = true //exit with exit code, defer so that other cleanup can happen c.exitStatus = exitStatus } else { glog.Infof("Restarting service process in 10 seconds.") service = nil startAfter = time.After(time.Second * 10) } case <-startAfter: glog.Infof("Starting service process.") service, serviceExited = startService() if doRegisterEndpoints { reregister = registerExportedEndpoints(c, rpcDead) doRegisterEndpoints = false } startAfter = nil case <-reregister: reregister = registerExportedEndpoints(c, rpcDead) case <-rpcDead: glog.Infof("RPC Server has gone away, cleaning up") shutdownService(service, syscall.SIGTERM) case <-storageDead: glog.Infof("Distributed storage for service %s has gone away; shutting down", c.options.Service.ID) shutdownService(service, syscall.SIGTERM) } } // Signal to health check registry that this instance is giving up the ghost. client, err := node.NewLBClient(c.options.ServicedEndpoint) if err != nil { glog.Errorf("Could not create a client to endpoint: %s, %s", c.options.ServicedEndpoint, err) return nil } defer client.Close() c.Close() var unused int client.LogHealthCheck(domain.HealthCheckResult{c.options.Service.ID, c.options.Service.InstanceID, "__instance_shutdown", time.Now().String(), "passed"}, &unused) return nil }