//fetch input data func moveInputData(work *core.Workunit) (size int64, err error) { for _, io := range work.Inputs { inputname := io.FileName dataUrl, uerr := io.DataUrl() if uerr != nil { return 0, uerr } if work.Rank > 0 { dataUrl = fmt.Sprintf("%s&index=%s&part=%s", dataUrl, work.IndexType(), work.Part()) } inputFilePath := path.Join(work.Path(), inputname) logger.Debug(2, "mover: fetching input from url:"+dataUrl) logger.Event(event.FILE_IN, "workid="+work.Id+" url="+dataUrl) // this gets file from any downloadable url, not just shock if datamoved, _, err := shock.FetchFile(inputFilePath, dataUrl, work.Info.DataToken, io.Uncompress, false); err != nil { return size, err } else { size += datamoved } logger.Event(event.FILE_READY, "workid="+work.Id+";url="+dataUrl) } return }
//fetch prerequisite data (e.g. reference dbs) func movePreData(workunit *core.Workunit) (size int64, err error) { for name, io := range workunit.Predata { predata_directory := path.Join(conf.DATA_PATH, "predata") err = os.MkdirAll(predata_directory, 755) if err != nil { return 0, errors.New("error creating predata_directory: " + err.Error()) } file_path := path.Join(predata_directory, name) if !isFileExisting(file_path) { size, err = shock.FetchFile(file_path, io.Url, workunit.Info.DataToken, io.Uncompress) if err != nil { return 0, errors.New("error in fetchFile:" + err.Error()) } } use_symlink := false linkname := path.Join(workunit.Path(), name) if workunit.Cmd.Dockerimage != "" || strings.HasPrefix(workunit.Cmd.Name, "app:") { // TODO need more save way to detect use of docker use_symlink = false // TODO mechanism if use_symlink { file_path = path.Join(conf.DOCKER_WORKUNIT_PREDATA_DIR, name) // some tasks want to write in predata dir, thus need symlink logger.Debug(1, "dangling symlink:"+linkname+" -> "+file_path) // creation of dangling symlinks is not possible with with os.Symlink, thus use system ln link_out, err := exec.Command("ln", "-s", file_path, linkname).CombinedOutput() logger.Debug(1, fmt.Sprintf("ln returned %s", link_out)) if err != nil { return 0, errors.New("error creating predata file symlink (dangling version): " + err.Error()) } } else { // some programs do not accept symlinks (e.g. emirge), need to copy the file into the work directory // linkname refers to target file now. logger.Debug(1, "copy predata:"+file_path+" -> "+linkname) _, err := shock.CopyFile(file_path, linkname) if err != nil { return 0, fmt.Errorf("error copying file from %s to % s: ", file_path, linkname, err.Error()) } } } else { //linkname := path.Join(workunit.Path(), name) logger.Debug(1, "symlink:"+linkname+" -> "+file_path) err = os.Symlink(file_path, linkname) if err != nil { return 0, errors.New("error creating predata file symlink: " + err.Error()) } } } return }
//fetch prerequisite data (e.g. reference dbs) func movePreData(workunit *core.Workunit) (err error) { for name, io := range workunit.Predata { file_path := fmt.Sprintf("%s/%s", conf.DATA_PATH, name) if !isFileExisting(file_path) { if err = fetchFile(file_path, io.Url, ""); err != nil { return } } //make a link in work dir to predata in conf.DATA_PATH linkname := fmt.Sprintf("%s/%s", workunit.Path(), name) fmt.Printf(linkname + " -> " + file_path + "\n") os.Symlink(file_path, linkname) } return }
//parse workunit, fetch input data, compose command arguments func ParseWorkunitArgs(work *core.Workunit) (err error) { args := []string{} argstr := work.Cmd.Args if argstr == "" { return } argList := parse_arg_string(argstr) for _, arg := range argList { match, err := regexp.Match(`\$\{\w+\}`, []byte(arg)) if err == nil && match { //replace environment variable with its value reg := regexp.MustCompile(`\$\{\w+\}`) vabs := reg.FindAll([]byte(arg), -1) parsedArg := arg for _, vab := range vabs { vb := bytes.TrimPrefix(vab, []byte("${")) vb = bytes.TrimSuffix(vb, []byte("}")) envvalue := os.Getenv(string(vb)) fmt.Printf("%s=%s\n", vb, envvalue) parsedArg = strings.Replace(parsedArg, string(vab), envvalue, 1) } args = append(args, parsedArg) continue } if strings.Contains(arg, "@") { //parse input/output to accessible local file segs := strings.Split(arg, "@") if len(segs) > 2 { return errors.New("invalid format in command args, multiple @ within one arg") } inputname := segs[1] if work.Inputs.Has(inputname) { inputFilePath := path.Join(work.Path(), inputname) parsedArg := fmt.Sprintf("%s%s", segs[0], inputFilePath) args = append(args, parsedArg) } continue } //no @ or $, append directly args = append(args, arg) } work.Cmd.ParsedArgs = args work.State = core.WORK_STAT_PREPARED return nil }
//parse workunit, fetch input data, compose command arguments func ParseWorkunitArgs(work *core.Workunit) (args []string, err error) { argstr := work.Cmd.Args if argstr == "" { return } argList := strings.Fields(argstr) inputsMap := work.Inputs for _, arg := range argList { if strings.Contains(arg, "@") { //parse input/output to accessible local file segs := strings.Split(arg, "@") if len(segs) > 2 { return []string{}, errors.New("invalid format in command args, multiple @ within one arg") } inputname := segs[1] if inputsMap.Has(inputname) { io := inputsMap[inputname] var dataUrl string if work.Rank == 0 { dataUrl = io.DataUrl() } else { dataUrl = fmt.Sprintf("%s&index=%s&part=%s", io.DataUrl(), work.IndexType(), work.Part()) } inputFilePath := fmt.Sprintf("%s/%s", work.Path(), inputname) logger.Debug(2, "mover: fetching input from url:"+dataUrl) logger.Event(event.FILE_IN, "workid="+work.Id+" url="+dataUrl) if err := fetchFile(inputFilePath, dataUrl, work.Info.DataToken); err != nil { //get file from Shock return []string{}, err } logger.Event(event.FILE_READY, "workid="+work.Id+" url="+dataUrl) parsedArg := fmt.Sprintf("%s%s", segs[0], inputFilePath) args = append(args, parsedArg) } } else { //no @, has nothing to do with input/output, append directly args = append(args, arg) } } return args, nil }
//fetch input data func MoveInputData(work *core.Workunit) (size int64, err error) { for _, io := range work.Inputs { inputname := io.FileName // skip if NoFile == true if !io.NoFile { // is file ! dataUrl, uerr := io.DataUrl() if uerr != nil { return 0, uerr } inputFilePath := fmt.Sprintf("%s/%s", work.Path(), inputname) if work.Rank == 0 { if conf.CACHE_ENABLED && io.Node != "" { if file_path, err := StatCacheFilePath(io.Node); err == nil { //make a link in work dir from cached file linkname := fmt.Sprintf("%s/%s", work.Path(), inputname) fmt.Printf("input found in cache, making link: " + file_path + " -> " + linkname + "\n") err = os.Symlink(file_path, linkname) if err == nil { logger.Event(event.FILE_READY, "workid="+work.Id+";url="+dataUrl) } return 0, err } } } else { dataUrl = fmt.Sprintf("%s&index=%s&part=%s", dataUrl, work.IndexType(), work.Part()) } logger.Debug(2, "mover: fetching input file from url:"+dataUrl) logger.Event(event.FILE_IN, "workid="+work.Id+";url="+dataUrl) // download file if datamoved, _, err := shock.FetchFile(inputFilePath, dataUrl, work.Info.DataToken, io.Uncompress, false); err != nil { return size, errors.New("shock.FetchFile returned: " + err.Error()) } else { size += datamoved } logger.Event(event.FILE_READY, "workid="+work.Id+";url="+dataUrl) } // download node attributes if requested if io.AttrFile != "" { // get node node, err := shock.ShockGet(io.Host, io.Node, work.Info.DataToken) if err != nil { //return size, err return size, errors.New("shock.ShockGet (node attributes) returned: " + err.Error()) } logger.Debug(2, "mover: fetching input attributes from node:"+node.Id) logger.Event(event.ATTR_IN, "workid="+work.Id+";node="+node.Id) // print node attributes attrFilePath := fmt.Sprintf("%s/%s", work.Path(), io.AttrFile) attr_json, _ := json.Marshal(node.Attributes) if err := ioutil.WriteFile(attrFilePath, attr_json, 0644); err != nil { return size, err } logger.Event(event.ATTR_READY, "workid="+work.Id+";path="+attrFilePath) } } return }
//fetch input data func moveInputData(work *core.Workunit) (size int64, err error) { for inputname, io := range work.Inputs { var dataUrl string if work.Rank == 0 { dataUrl = io.DataUrl() } else { dataUrl = fmt.Sprintf("%s&index=%s&part=%s", io.DataUrl(), work.IndexType(), work.Part()) } inputFilePath := path.Join(work.Path(), inputname) logger.Debug(2, "mover: fetching input from url:"+dataUrl) logger.Event(event.FILE_IN, "workid="+work.Id+" url="+dataUrl) if datamoved, err := shock.FetchFile(inputFilePath, dataUrl, work.Info.DataToken, io.Uncompress); err != nil { return size, err } else { size += datamoved } logger.Event(event.FILE_READY, "workid="+work.Id+";url="+dataUrl) } return }
func runPreWorkExecutionScript(work *core.Workunit) (err error) { // conf.PreWorkScript is a string // conf.PreWorkScriptArgs is a string array args := conf.PRE_WORK_SCRIPT_ARGS commandName := conf.PRE_WORK_SCRIPT if commandName == "" { return nil } cmd := exec.Command(commandName, args...) msg := fmt.Sprintf("worker: start pre-work cmd=%s, args=%v", commandName, args) fmt.Println(msg) logger.Debug(1, msg) logger.Event(event.PRE_WORK_START, "workid="+work.Id, "pre-work cmd="+commandName, fmt.Sprintf("args=%v", args)) var stdout, stderr io.ReadCloser if conf.PRINT_APP_MSG { stdout, err = cmd.StdoutPipe() if err != nil { return } stderr, err = cmd.StderrPipe() if err != nil { return } } stdoutFilePath := fmt.Sprintf("%s/%s", work.Path(), conf.STDOUT_FILENAME) stderrFilePath := fmt.Sprintf("%s/%s", work.Path(), conf.STDERR_FILENAME) outfile, err := os.Create(stdoutFilePath) defer outfile.Close() errfile, err := os.Create(stderrFilePath) defer errfile.Close() out_writer := bufio.NewWriter(outfile) defer out_writer.Flush() err_writer := bufio.NewWriter(errfile) defer err_writer.Flush() if conf.PRINT_APP_MSG { go io.Copy(out_writer, stdout) go io.Copy(err_writer, stderr) } if err := cmd.Start(); err != nil { msg := fmt.Sprintf(fmt.Sprintf("start pre-work cmd=%s, err=%s", commandName, err.Error())) fmt.Println(msg) logger.Debug(1, msg) return errors.New(msg) } done := make(chan error) go func() { done <- cmd.Wait() }() select { case <-chankill: if err := cmd.Process.Kill(); err != nil { fmt.Println("failed to kill" + err.Error()) } <-done // allow goroutine to exit fmt.Println("process killed") return errors.New("process killed") case err := <-done: if err != nil { return errors.New(fmt.Sprintf("wait on pre-work cmd=%s, err=%s", commandName, err.Error())) } } logger.Event(event.PRE_WORK_END, "workid="+work.Id) return }
func RunWorkunitDirect(work *core.Workunit) (pstats *core.WorkPerf, err error) { pstats = new(core.WorkPerf) args := work.Cmd.ParsedArgs //change cwd to the workunit's working directory if err := work.CDworkpath(); err != nil { return nil, err } commandName := work.Cmd.Name if commandName == "" { return nil, errors.New(fmt.Sprintf("error: command name is empty")) } cmd := exec.Command(commandName, args...) msg := fmt.Sprintf("worker: start cmd=%s, args=%v", commandName, args) fmt.Println(msg) logger.Debug(1, msg) logger.Event(event.WORK_START, "workid="+work.Id, "cmd="+commandName, fmt.Sprintf("args=%v", args)) var stdout, stderr io.ReadCloser if conf.PRINT_APP_MSG { stdout, err = cmd.StdoutPipe() if err != nil { return nil, err } stderr, err = cmd.StderrPipe() if err != nil { return nil, err } } stdoutFilePath := fmt.Sprintf("%s/%s", work.Path(), conf.STDOUT_FILENAME) stderrFilePath := fmt.Sprintf("%s/%s", work.Path(), conf.STDERR_FILENAME) outfile, err := os.Create(stdoutFilePath) defer outfile.Close() errfile, err := os.Create(stderrFilePath) defer errfile.Close() out_writer := bufio.NewWriter(outfile) defer out_writer.Flush() err_writer := bufio.NewWriter(errfile) defer err_writer.Flush() if conf.PRINT_APP_MSG { go io.Copy(out_writer, stdout) go io.Copy(err_writer, stderr) } if err := cmd.Start(); err != nil { return nil, errors.New(fmt.Sprintf("start_cmd=%s, err=%s", commandName, err.Error())) } var MaxMem uint64 = 0 done := make(chan error) memcheck_done := make(chan bool) go func() { done <- cmd.Wait() memcheck_done <- true }() mem_check_interval_here := conf.MEM_CHECK_INTERVAL if mem_check_interval_here == 0 { mem_check_interval_here = 10 * time.Second } go func() { mstats := new(runtime.MemStats) runtime.ReadMemStats(mstats) MaxMem = mstats.Alloc time.Sleep(2 * time.Second) for { select { default: mstats := new(runtime.MemStats) runtime.ReadMemStats(mstats) if mstats.Alloc > MaxMem { MaxMem = mstats.Alloc } time.Sleep(mem_check_interval_here) case <-memcheck_done: return } } }() select { case <-chankill: if err := cmd.Process.Kill(); err != nil { fmt.Println("failed to kill" + err.Error()) } <-done // allow goroutine to exit fmt.Println("process killed") return nil, errors.New("process killed") case err := <-done: if err != nil { return nil, errors.New(fmt.Sprintf("wait_cmd=%s, err=%s", commandName, err.Error())) } } logger.Event(event.WORK_END, "workid="+work.Id) pstats.MaxMemUsage = int64(MaxMem) return }
func RunWorkunitDocker(work *core.Workunit) (pstats *core.WorkPerf, err error) { pstats = new(core.WorkPerf) pstats.MaxMemUsage = -1 pstats.MaxMemoryTotalRss = -1 pstats.MaxMemoryTotalSwap = -1 args := work.Cmd.ParsedArgs //change cwd to the workunit's working directory if err := work.CDworkpath(); err != nil { return nil, err } docker_preparation_start := time.Now().Unix() commandName := work.Cmd.Name use_wrapper_script := false wrapper_script_filename := "awe_workunit_wrapper.sh" wrapper_script_filename_host := path.Join(work.Path(), wrapper_script_filename) wrapper_script_filename_docker := path.Join(conf.DOCKER_WORK_DIR, wrapper_script_filename) if len(work.Cmd.Cmd_script) > 0 { use_wrapper_script = true // create wrapper script //conf.DOCKER_WORK_DIR var wrapper_content_string = "#!/bin/bash\n" + strings.Join(work.Cmd.Cmd_script, "\n") + "\n" logger.Debug(1, fmt.Sprintf("write wrapper script: %s\n%s", wrapper_script_filename_host, strings.Join(work.Cmd.Cmd_script, ", "))) var wrapper_content_bytes = []byte(wrapper_content_string) err = ioutil.WriteFile(wrapper_script_filename_host, wrapper_content_bytes, 0755) // not executable: 0644 if err != nil { return nil, errors.New(fmt.Sprintf("error writing wrapper script, err=%s", err.Error())) } } //cmd := exec.Command(commandName, args...) container_name := "AWE_workunit" Dockerimage := work.Cmd.Dockerimage if work.App != nil && work.App.Name != "" { Dockerimage = work.App.AppDef.Dockerimage } if Dockerimage == "" { return nil, errors.New(fmt.Sprintf("Error Dockerimage string empty")) } logger.Debug(1, fmt.Sprintf("Dockerimage: %s", Dockerimage)) use_docker_api := true if conf.DOCKER_BINARY != "API" { use_docker_api = false } var client *docker.Client = nil if use_docker_api { logger.Debug(1, fmt.Sprintf("Using docker API...")) client, err = docker.NewClient(conf.DOCKER_SOCKET) if err != nil { return nil, errors.New(fmt.Sprintf("error creating docker client", err.Error())) } } else { logger.Debug(1, fmt.Sprintf("Using docker docker binary...")) } //imgs, _ := client.ListImages(false) //for _, img := range imgs { // spew.Dump(img) //} // delete any old AWE_container err = RemoveOldAWEContainers(client, container_name) if err != nil { return nil, err } //var node *core.ShockNode = nil // find image in repo (e.g. extract docker image id) node, dockerimage_download_url, err := findDockerImageInShock(Dockerimage, work.Info.DataToken) if err != nil { return nil, errors.New(fmt.Sprintf("Error getting docker url, err=%s", err.Error())) } // TODO attr_json, _ := json.Marshal(node.Attributes) might be better node_attr_map, ok := node.Attributes.(map[string]interface{}) if !ok { return nil, errors.New(fmt.Sprintf("(1) could not type assert Shock_Dockerimage_attributes, Dockerimage=%s", Dockerimage)) } dockerimage_id, ok := node_attr_map["id"].(string) if !ok { return nil, errors.New(fmt.Sprintf("(2) could not type assert Shock_Dockerimage_attributes, Dockerimage=%s", Dockerimage)) } if dockerimage_id == "" { return nil, errors.New(fmt.Sprintf("Id of Dockerimage=%s not found", Dockerimage)) } logger.Debug(1, fmt.Sprintf("using dockerimage id %s instead of name %s ", dockerimage_id, Dockerimage)) // *** find/inspect image image, err := InspectImage(client, dockerimage_id) if err != nil { logger.Debug(1, fmt.Sprintf("docker image %s is not yet in local repository", Dockerimage)) image_retrieval := "load" // TODO only load is guaraneed to work switch { case image_retrieval == "load": { // for images that have been saved err = dockerLoadImage(client, dockerimage_download_url, work.Info.DataToken) } case image_retrieval == "import": { // for containers that have been exported err = dockerImportImage(client, Dockerimage, work.Info.DataToken) } case image_retrieval == "build": { // to create image from Dockerfile err = dockerBuildImage(client, Dockerimage) } } if err != nil { return nil, errors.New(fmt.Sprintf("Docker image was not correctly imported or built, err=%s", err.Error())) } // example urls // find image : http://shock.metagenomics.anl.gov/node/?query&docker=1&tag=wgerlach/bowtie2:2.2.0 // view node: http://shock.metagenomics.anl.gov/node/ed0a6b20-c535-40d7-92e8-754bb8b6b48f // download http://shock.metagenomics.anl.gov/node/ed0a6b20-c535-40d7-92e8-754bb8b6b48f?download if node != nil { } // last test if dockerimage_id != "" { image, err = InspectImage(client, dockerimage_id) if err != nil { return nil, errors.New(fmt.Sprintf("(InspectImage) Docker image (%s , %s) was not correctly imported or built, err=%s", Dockerimage, dockerimage_id, err.Error())) } } else { image, err = InspectImage(client, Dockerimage) if err != nil { return nil, errors.New(fmt.Sprintf("(InspectImage) Docker image (%s) was not correctly imported or built, err=%s", Dockerimage, err.Error())) } } } else { logger.Debug(1, fmt.Sprintf("docker image %s is already in local repository", Dockerimage)) } if dockerimage_id != image.ID { return nil, errors.New(fmt.Sprintf("error: dockerimage_id != image.ID, %s != %s (%s)", dockerimage_id, image.ID, Dockerimage)) } // tag image to make debugging easier if Dockerimage != "" { Dockerimage_array := strings.Split(Dockerimage, ":") // TODO split by colon is risky tag_opts := docker.TagImageOptions{Repo: Dockerimage_array[0], Tag: Dockerimage_array[1]} err = TagImage(client, dockerimage_id, tag_opts) if err != nil { logger.Error(fmt.Sprintf("warning: tagging of image %s with %s failed, err:", dockerimage_id, Dockerimage, err.Error())) } } // collect environment var docker_environment []string docker_environment_string := "" // this is only for the debug output for key, val := range work.Cmd.Environ.Public { env_pair := key + "=" + val docker_environment = append(docker_environment, env_pair) docker_environment_string += " --env=" + env_pair } if work.Cmd.HasPrivateEnv { private_envs, err := FetchPrivateEnvByWorkId(work.Id) if err != nil { return nil, err } for key, val := range private_envs { env_pair := key + "=" + val docker_environment = append(docker_environment, env_pair) docker_environment_string += " -e " + env_pair } } pipe_output := fmt.Sprintf(" 2> %s 1> %s", conf.STDERR_FILENAME, conf.STDOUT_FILENAME) bash_command := "" if use_wrapper_script { //bash_command = fmt.Sprint("/bin/bash", " ", wrapper_script_filename_docker, " ", pipe_output) // bash for wrapper script bash_command = fmt.Sprint(wrapper_script_filename_docker, " ", pipe_output) } else { bash_command = fmt.Sprint(commandName, " ", strings.Join(args, " "), " ", pipe_output) } logger.Debug(1, fmt.Sprint("bash_command: ", bash_command)) // example: "/bin/bash", "-c", "bowtie2 -h 2> awe_stderr.txt 1> awe_stdout.txt" container_cmd := []string{"/bin/bash", "-c", bash_command} // TODO remove bash if possible, but is needed for piping //var empty_struct struct{} bindstr_workdir := work.Path() + "/:" + conf.DOCKER_WORK_DIR logger.Debug(1, "bindstr_workdir: "+bindstr_workdir) var bindarray = []string{} // only mount predata if it is actually used //fake_predata := "" bindstr_predata := "" volume_str := "" if len(work.Predata) > 0 { predata_directory := path.Join(conf.DATA_PATH, "predata") bindstr_predata = predata_directory + "/:" + conf.DOCKER_WORKUNIT_PREDATA_DIR + ":ro" bindarray = []string{bindstr_workdir, bindstr_predata} //old version volume_str = "--volume=" + bindstr_workdir + " --volume=" + bindstr_predata } else { bindarray = []string{bindstr_workdir} volume_str = "--volume=" + bindstr_workdir } logger.Debug(1, "volume_str: "+volume_str) // version for docker command line docker_commandline_create := []string{ // "-t" would be required if I want to attach to the container later, check again documentation if needed "--name=" + container_name, "--workdir=" + conf.DOCKER_WORK_DIR, volume_str, // for workdir and optionally predata } if docker_environment_string != "" { docker_commandline_create = append(docker_commandline_create, docker_environment_string) } // version for docker API config := docker.Config{Image: dockerimage_id, WorkingDir: conf.DOCKER_WORK_DIR, AttachStdout: true, AttachStderr: true, AttachStdin: false, Cmd: container_cmd, //Volumes: map[string]struct{}{conf.DOCKER_WORK_DIR: struct{}{}}, // old version Volumes: map[string]struct{}{bindstr_workdir: struct{}{}}, Env: docker_environment, } if len(work.Predata) > 0 { config.Volumes[bindstr_predata] = struct{}{} } docker_commandline_create = append(docker_commandline_create, dockerimage_id) // docker_commandline_create = append(docker_commandline_create, container_cmd...) // argument to the "docker create" command opts := docker.CreateContainerOptions{Name: container_name, Config: &config} // note: docker binary mounts on creation, while docker API mounts on start of container container_id := "" // *** create container logger.Debug(1, fmt.Sprintf("creating docker container from image %s (%s)", Dockerimage, dockerimage_id)) if client != nil { container_obj, err := client.CreateContainer(opts) if err == nil { container_id = container_obj.ID } else { return nil, errors.New(fmt.Sprintf("error creating container, err=%s", err.Error())) } } else { container_id, err = CreateContainer(docker_commandline_create) if err != nil { return nil, errors.New(fmt.Sprintf("error creating container, err=%s", err.Error())) } } if container_id == "" { return nil, errors.New(fmt.Sprintf("error creating container, container_id is empty")) } logger.Debug(1, fmt.Sprintf("created docker container with ID: %s", container_id)) // *** start container fake_docker_cmd := "sudo docker run -t -i --name test " + volume_str + " " + docker_environment_string + " --workdir=" + conf.DOCKER_WORK_DIR + " " + dockerimage_id + " " + strings.Join(container_cmd, " ") logger.Debug(1, "fake_docker_cmd ("+Dockerimage+"): "+fake_docker_cmd) logger.Debug(1, "starting docker container...") docker_preparation_end := time.Now().Unix() pstats.DockerPrep = docker_preparation_end - docker_preparation_start logger.Debug(1, fmt.Sprintf("DockerPrep time in seconds: %d", pstats.DockerPrep)) if client != nil { err = client.StartContainer(container_id, &docker.HostConfig{Binds: bindarray}) // weired, seems to be needed //err = client.StartContainer(container_id, &docker.HostConfig{}) } else { err = StartContainer(container_id, volume_str) } if err != nil { return nil, errors.New(fmt.Sprintf("error starting container, id=%s, err=%s", container_id, err.Error())) } defer func(container_id string) { // *** clean up // ** kill container var err_kill error if client != nil { err_kill = client.KillContainer(docker.KillContainerOptions{ID: container_id}) } else { err_kill = KillContainer(container_id) } if err_kill != nil { logger.Error(fmt.Sprintf("error killing container id=%s, err=%s", container_id, err_kill.Error())) } // *** remove Container var err error if client != nil { opts_remove := docker.RemoveContainerOptions{ID: container_id} err = client.RemoveContainer(opts_remove) } else { err = RemoveContainer(container_id) } if err != nil { logger.Error(fmt.Sprintf("error removing container id=%s, err=%s", container_id, err.Error())) } else { logger.Debug(1, "(deferred func) removed docker container") } }(container_id) if client != nil { cont, err := client.InspectContainer(container_id) if err != nil { logger.Error(fmt.Sprintf("error inspecting container=%s, err=%s", container_id, err.Error())) } inspect_filename := path.Join(work.Path(), "container_inspect.json") b_inspect, _ := json.MarshalIndent(cont, "", " ") err = ioutil.WriteFile(inspect_filename, b_inspect, 0666) if err != nil { logger.Error(fmt.Sprintf("error writing inspect file for container=%s, err=%s", container_id, err.Error())) } else { logger.Debug(1, fmt.Sprintf("wrote %s for container %s", inspect_filename, container_id)) } } var status int = 0 // wait for container to finish done := make(chan error) go func() { var errwait error if client != nil { status, errwait = client.WaitContainer(container_id) } else { status, errwait = WaitContainer(container_id) } done <- errwait // inform main function if conf.MEM_CHECK_INTERVAL != 0 { done <- errwait // inform memory checker } }() var MaxMem int64 = -1 var max_memory_total_rss int64 = -1 var max_memory_total_swap int64 = -1 memory_stat_filename := "" if conf.MEM_CHECK_INTERVAL != 0 { // documentation: https://docs.docker.com/articles/runmetrics/ // e.g. ubuntu: /sys/fs/cgroup/memory/docker/[ID]/memory.stat // coreos: /sys/fs/cgroup/memory/system.slice/docker-[ID].scope/memory.stat memory_stat_filename = strings.Replace(conf.CGROUP_MEMORY_DOCKER_DIR, "[ID]", container_id, -1) if _, err := os.Stat(memory_stat_filename); os.IsNotExist(err) { logger.Error("warning: memory measurement requested, but no memory.stat found: %s" + memory_stat_filename) memory_stat_filename = "" } } if conf.MEM_CHECK_INTERVAL != 0 && memory_stat_filename != "" { go func() { // memory checker for { select { case err_mem := <-done: if err_mem != nil { logger.Error("channel done returned error: " + err_mem.Error()) } return default: } var memory_total_rss int64 = -1 var memory_total_swap int64 = -1 memory_stat_file, err_mem := os.Open(memory_stat_filename) if err_mem != nil { logger.Error("warning: error opening memory_stat_file file:" + err_mem.Error()) time.Sleep(conf.MEM_CHECK_INTERVAL) continue } // Closes the file when we leave the scope of the current function, // this makes sure we never forget to close the file if the // function can exit in multiple places. memory_stat_file_scanner := bufio.NewScanner(memory_stat_file) memory_total_rss_read := false memory_total_swap_read := false // scanner.Scan() advances to the next token returning false if an error was encountered for memory_stat_file_scanner.Scan() { line := memory_stat_file_scanner.Text() if strings.HasPrefix(line, "total_rss ") { // TODO what is total_rss_huge //logger.Debug(1, fmt.Sprint("inspecting container with memory line=", line)) memory_total_rss, err = strconv.ParseInt(strings.TrimPrefix(line, "total_rss "), 10, 64) if err != nil { memory_total_rss = -1 } memory_total_rss_read = true } else if strings.HasPrefix(line, "total_swap ") { // TODO what is total_rss_huge //logger.Debug(1, fmt.Sprint("inspecting container with memory line=", line)) memory_total_swap, err = strconv.ParseInt(strings.TrimPrefix(line, "total_swap "), 10, 64) if err != nil { memory_total_swap = -1 } memory_total_swap_read = true } else { continue } if memory_total_rss_read && memory_total_swap_read { // we found all information we need, leave the loop break } } // When finished scanning if any error other than io.EOF occured // it will be returned by scanner.Err(). if err := memory_stat_file_scanner.Err(); err != nil { logger.Error(fmt.Sprintf("warning: could no read memory usage from cgroups=%s", memory_stat_file_scanner.Err())) //err = nil } else { // RSS maxium if memory_total_rss >= 0 && memory_total_rss > max_memory_total_rss { max_memory_total_rss = memory_total_rss } // SWAP maximum if memory_total_swap >= 0 && memory_total_swap > max_memory_total_swap { max_memory_total_swap = memory_total_swap } // RSS+SWAP maximum if memory_total_rss >= 0 && memory_total_swap >= 0 { memory_combined := memory_total_rss + memory_total_swap if memory_combined > MaxMem { MaxMem = memory_combined } } logger.Debug(1, fmt.Sprintf("memory: rss=%d, swap=%d, max_rss=%d max_swap=%d max_combined=%d", memory_total_rss, memory_total_swap, max_memory_total_rss, max_memory_total_swap, MaxMem)) } memory_stat_file.Close() // defer does not work in for loop ! //time.Sleep(5 * time.Second) time.Sleep(conf.MEM_CHECK_INTERVAL) } }() } else { logger.Debug(1, "memory checking disabled") } select { case <-chankill: logger.Debug(1, fmt.Sprint("chankill, try to kill conatiner %s... ", container_id)) if client != nil { err = client.KillContainer(docker.KillContainerOptions{ID: container_id}) } else { err = KillContainer(container_id) } if err != nil { return nil, errors.New(fmt.Sprintf("error killing container id=%s, err=%s", container_id, err.Error())) } <-done // allow goroutine to exit return nil, errors.New("process killed as requested from chankill") case err = <-done: logger.Debug(1, fmt.Sprint("(1)docker wait returned with status ", status)) if err != nil { return nil, errors.New(fmt.Sprintf("dockerWait=%s, err=%s", commandName, err.Error())) } } logger.Debug(1, fmt.Sprint("(2)docker wait returned with status ", status)) if status != 0 { logger.Debug(1, fmt.Sprint("WaitContainer returned non-zero status ", status)) return nil, errors.New(fmt.Sprintf("error WaitContainer returned non-zero status=%d", status)) } logger.Debug(1, fmt.Sprint("pstats.MaxMemUsage: ", pstats.MaxMemUsage)) pstats.MaxMemUsage = MaxMem pstats.MaxMemoryTotalRss = max_memory_total_rss pstats.MaxMemoryTotalSwap = max_memory_total_swap logger.Debug(1, fmt.Sprint("pstats.MaxMemUsage: ", pstats.MaxMemUsage)) return }
//fetch prerequisite data (e.g. reference dbs) func movePreData(workunit *core.Workunit) (size int64, err error) { for _, io := range workunit.Predata { name := io.FileName predata_directory := path.Join(conf.DATA_PATH, "predata") err = os.MkdirAll(predata_directory, 755) if err != nil { return 0, errors.New("error creating predata_directory: " + err.Error()) } file_path := path.Join(predata_directory, name) dataUrl, uerr := io.DataUrl() if uerr != nil { return 0, uerr } // get shock and local md5sums isShockPredata := true node_md5 := "" if io.Node == "-" { isShockPredata = false } else { node, err := shock.ShockGet(io.Host, io.Node, workunit.Info.DataToken) if err != nil { return 0, errors.New("error in ShockGet: " + err.Error()) } // rename file to be md5sum node_md5 = node.File.Checksum["md5"] file_path = path.Join(predata_directory, node_md5) } // file does not exist or its md5sum is wrong if !isFileExisting(file_path) { logger.Debug(2, "mover: fetching predata from url: "+dataUrl) logger.Event(event.PRE_IN, "workid="+workunit.Id+" url="+dataUrl) var md5sum string file_path_part := file_path + ".part" // temporary name // this gets file from any downloadable url, not just shock size, md5sum, err = shock.FetchFile(file_path_part, dataUrl, workunit.Info.DataToken, io.Uncompress, isShockPredata) if err != nil { return 0, errors.New("error in fetchFile: " + err.Error()) } os.Rename(file_path_part, file_path) if err != nil { return 0, errors.New("error renaming after download of preData: " + err.Error()) } if isShockPredata { if node_md5 != md5sum { return 0, errors.New("error downloaded file md5 does not mach shock md5, node: " + io.Node) } else { logger.Debug(2, "mover: predata "+name+" has md5sum "+md5sum) } } } else { logger.Debug(2, "mover: predata already exists: "+name) } // timstamp for last access - future caching accessfile, err := os.Create(file_path + ".access") if err != nil { return 0, errors.New("error creating predata access file: " + err.Error()) } defer accessfile.Close() accessfile.WriteString(time.Now().String()) // determine if running with docker wants_docker := false if workunit.Cmd.Dockerimage != "" || workunit.App != nil { // TODO need more save way to detect use of docker wants_docker = true } if wants_docker && conf.USE_DOCKER == "no" { return 0, errors.New("error: use of docker images has been disabled by administrator") } if wants_docker == false && conf.USE_DOCKER == "only" { return 0, errors.New("error: use of docker images is enforced by administrator") } // copy or create symlink in work dir linkname := path.Join(workunit.Path(), name) if conf.NO_SYMLINK { // some programs do not accept symlinks (e.g. emirge), need to copy the file into the work directory logger.Debug(1, "copy predata: "+file_path+" -> "+linkname) _, err := shock.CopyFile(file_path, linkname) if err != nil { return 0, fmt.Errorf("error copying file from %s to % s: ", file_path, linkname, err.Error()) } } else { if wants_docker { // new filepath for predata dir in container var docker_file_path string if isShockPredata { docker_file_path = path.Join(conf.DOCKER_WORKUNIT_PREDATA_DIR, node_md5) } else { docker_file_path = path.Join(conf.DOCKER_WORKUNIT_PREDATA_DIR, name) } logger.Debug(1, "creating dangling symlink: "+linkname+" -> "+docker_file_path) // dangling link will give error, we ignore that here _ = os.Symlink(docker_file_path, linkname) } else { logger.Debug(1, "symlink:"+linkname+" -> "+file_path) err = os.Symlink(file_path, linkname) if err != nil { return 0, errors.New("error creating predata file symlink: " + err.Error()) } } } logger.Event(event.PRE_READY, "workid="+workunit.Id+";url="+dataUrl) } return }
//parse workunit, fetch input data, compose command arguments func ParseWorkunitArgs(work *core.Workunit) (err error) { args := []string{} argstr := work.Cmd.Args if argstr == "" { return } workpath := work.Path() if len(work.Cmd.Dockerimage) > 0 { workpath = conf.DOCKER_WORK_DIR } // use better file name replacement technique var virtual_cmd_script = []string{argstr} replace_filepath_with_full_filepath(work.Inputs, workpath, virtual_cmd_script) argstr = virtual_cmd_script[0] argList := parse_arg_string(argstr) for _, arg := range argList { match, err := regexp.Match(`\$\{\w+\}`, []byte(arg)) if err == nil && match { //replace environment variable with its value reg := regexp.MustCompile(`\$\{\w+\}`) vabs := reg.FindAll([]byte(arg), -1) parsedArg := arg for _, vab := range vabs { vb := bytes.TrimPrefix(vab, []byte("${")) vb = bytes.TrimSuffix(vb, []byte("}")) envvalue := os.Getenv(string(vb)) fmt.Printf("%s=%s\n", vb, envvalue) parsedArg = strings.Replace(parsedArg, string(vab), envvalue, 1) } args = append(args, parsedArg) continue } // this might be deprecated by replace_filepath_with_full_filepath if strings.Contains(arg, "@") { //parse input/output to accessible local file segs := strings.Split(arg, "@") if len(segs) > 2 { return errors.New("invalid format in command args, multiple @ within one arg") } inputname := segs[1] for _, io := range work.Inputs { if io.FileName == inputname { inputFilePath := path.Join(workpath, inputname) parsedArg := fmt.Sprintf("%s%s", segs[0], inputFilePath) args = append(args, parsedArg) } } continue } //no @ or $, append directly args = append(args, arg) } work.Cmd.ParsedArgs = args work.State = core.WORK_STAT_PREPARED return nil }
func UploadOutputData(work *core.Workunit) (size int64, err error) { for _, io := range work.Outputs { name := io.FileName var local_filepath string //local file name generated by the cmd var file_path string //file name to be uploaded to shock if io.Directory != "" { local_filepath = fmt.Sprintf("%s/%s/%s", work.Path(), io.Directory, name) //if specified, rename the local file name to the specified shock node file name //otherwise use the local name as shock file name file_path = local_filepath if io.ShockFilename != "" { file_path = fmt.Sprintf("%s/%s/%s", work.Path(), io.Directory, io.ShockFilename) os.Rename(local_filepath, file_path) } } else { local_filepath = fmt.Sprintf("%s/%s", work.Path(), name) file_path = local_filepath if io.ShockFilename != "" { file_path = fmt.Sprintf("%s/%s", work.Path(), io.ShockFilename) os.Rename(local_filepath, file_path) } } if (io.Type == "copy") || (io.Type == "update") || io.NoFile { file_path = "" } else if fi, err := os.Stat(file_path); err != nil { //skip this output if missing file and optional if io.Optional { continue } else { return size, errors.New(fmt.Sprintf("output %s not generated for workunit %s", name, work.Id)) } } else { if io.Nonzero && fi.Size() == 0 { return size, errors.New(fmt.Sprintf("workunit %s generated zero-sized output %s while non-zero-sized file required", work.Id, name)) } size += fi.Size() } logger.Debug(1, "deliverer: push output to shock, filename="+name) logger.Event(event.FILE_OUT, "workid="+work.Id, "filename="+name, fmt.Sprintf("url=%s/node/%s", io.Host, io.Node)) //upload attribute file to shock IF attribute file is specified in outputs AND it is found in local directory. var attrfile_path string = "" if io.AttrFile != "" { attrfile_path = fmt.Sprintf("%s/%s", work.Path(), io.AttrFile) if fi, err := os.Stat(attrfile_path); err != nil || fi.Size() == 0 { attrfile_path = "" } } //set io.FormOptions["parent_node"] if not present and io.FormOptions["parent_name"] exists if parent_name, ok := io.FormOptions["parent_name"]; ok { for _, in_io := range work.Inputs { if in_io.FileName == parent_name { io.FormOptions["parent_node"] = in_io.Node } } } logger.Debug(1, "UploadOutputData, core.PutFileToShock: "+file_path) if err := core.PutFileToShock(file_path, io.Host, io.Node, work.Rank, work.Info.DataToken, attrfile_path, io.Type, io.FormOptions, io.NodeAttr); err != nil { time.Sleep(3 * time.Second) //wait for 3 seconds and try again if err := core.PutFileToShock(file_path, io.Host, io.Node, work.Rank, work.Info.DataToken, attrfile_path, io.Type, io.FormOptions, io.NodeAttr); err != nil { fmt.Errorf("push file error\n") logger.Error("op=pushfile,err=" + err.Error()) return size, err } } logger.Event(event.FILE_DONE, "workid="+work.Id, "filename="+name, fmt.Sprintf("url=%s/node/%s", io.Host, io.Node)) if io.ShockIndex != "" { if err := core.ShockPutIndex(io.Host, io.Node, io.ShockIndex, work.Info.DataToken); err != nil { logger.Error("warning: fail to create index on shock for shock node: " + io.Node) } } if conf.CACHE_ENABLED { //move output files to cache cacheDir := getCacheDir(io.Node) if err := os.MkdirAll(cacheDir, 0777); err != nil { logger.Error("cache os.MkdirAll():" + err.Error()) } cacheFilePath := getCacheFilePath(io.Node) //use the same naming mechanism used by shock server //fmt.Printf("moving file from %s to %s\n", file_path, cacheFilePath) if err := os.Rename(file_path, cacheFilePath); err != nil { logger.Error("cache os.Rename():" + err.Error()) } } } return }
func RunWorkunitDocker(work *core.Workunit) (pstats *core.WorkPerf, err error) { pstats = new(core.WorkPerf) pstats.MaxMemUsage = -1 pstats.MaxMemoryTotalRss = -1 pstats.MaxMemoryTotalSwap = -1 args := work.Cmd.ParsedArgs //change cwd to the workunit's working directory if err := work.CDworkpath(); err != nil { return nil, err } commandName := work.Cmd.Name use_wrapper_script := false wrapper_script_filename := "awe_workunit_wrapper.sh" wrapper_script_filename_host := path.Join(work.Path(), wrapper_script_filename) wrapper_script_filename_docker := path.Join(conf.DOCKER_WORK_DIR, wrapper_script_filename) if strings.HasPrefix(commandName, "app:") { if len(work.Cmd.ParsedArgs) > 0 { use_wrapper_script = true // create wrapper script //conf.DOCKER_WORK_DIR var wrapper_content_string = "#!/bin/bash\n" + strings.Join(work.Cmd.Cmd_script, "\n") + "\n" logger.Debug(1, fmt.Sprintf("write wrapper script: %s\n%s", wrapper_script_filename_host, strings.Join(work.Cmd.Cmd_script, ", "))) var wrapper_content_bytes = []byte(wrapper_content_string) err = ioutil.WriteFile(wrapper_script_filename_host, wrapper_content_bytes, 0644) if err != nil { return nil, errors.New(fmt.Sprintf("error writing wrapper script, err=%s", err.Error())) } } } //cmd := exec.Command(commandName, args...) container_name := "AWE_workunit" Dockerimage := work.Cmd.Dockerimage logger.Debug(1, fmt.Sprintf("Dockerimage: %s", Dockerimage)) endpoint := "unix:///var/run/docker.sock" client, err := docker.NewClient(endpoint) if err != nil { return nil, errors.New(fmt.Sprintf("error creating docker client", err.Error())) } //imgs, _ := client.ListImages(false) //for _, img := range imgs { // spew.Dump(img) //} // delete any old AWE_container err = RemoveOldAWEContainers(client, container_name) if err != nil { return nil, err } //var node *core.ShockNode = nil // find image in repo (e.g. extract docker image id) node, dockerimage_download_url, err := findDockerImageInShock(Dockerimage) if err != nil { return nil, errors.New(fmt.Sprintf("Error getting docker url, err=%s", err.Error())) } // TODO attr_json, _ := json.Marshal(node.Attributes) might be better node_attr_map, ok := node.Attributes.(map[string]interface{}) if !ok { return nil, errors.New(fmt.Sprintf("(1) could not type assert Shock_Dockerimage_attributes, Dockerimage=%s", Dockerimage)) } dockerimage_id, ok := node_attr_map["id"].(string) if !ok { return nil, errors.New(fmt.Sprintf("(2) could not type assert Shock_Dockerimage_attributes, Dockerimage=%s", Dockerimage)) } if dockerimage_id == "" { return nil, errors.New(fmt.Sprintf("Id of Dockerimage=%s not found", Dockerimage)) } logger.Debug(1, fmt.Sprintf("using dockerimage id %s instead of name %s ", dockerimage_id, Dockerimage)) // *** find/inspect image image, err := client.InspectImage(dockerimage_id) if err != nil { logger.Debug(1, fmt.Sprintf("docker image %s is not yet in local repository", Dockerimage)) image_retrieval := "load" // TODO only load is guaraneed to work switch { case image_retrieval == "load": { // for images that have been saved err = dockerLoadImage(client, dockerimage_download_url) } case image_retrieval == "import": { // for containers that have been exported err = dockerImportImage(client, Dockerimage) } case image_retrieval == "build": { // to create image from Dockerfile err = dockerBuildImage(client, Dockerimage) } } if err != nil { return nil, errors.New(fmt.Sprintf("Docker image was not correctly imported or built, err=%s", err.Error())) } // example urls // find image : http://shock.metagenomics.anl.gov/node/?query&docker=1&tag=wgerlach/bowtie2:2.2.0 // view node: http://shock.metagenomics.anl.gov/node/ed0a6b20-c535-40d7-92e8-754bb8b6b48f // download http://shock.metagenomics.anl.gov/node/ed0a6b20-c535-40d7-92e8-754bb8b6b48f?download if node != nil { } // last test if dockerimage_id != "" { image, err = client.InspectImage(dockerimage_id) if err != nil { return nil, errors.New(fmt.Sprintf("(InspectImage) Docker image (%s , %s) was not correctly imported or built, err=%s", Dockerimage, dockerimage_id, err.Error())) } } else { image, err = client.InspectImage(Dockerimage) if err != nil { return nil, errors.New(fmt.Sprintf("(InspectImage) Docker image (%s) was not correctly imported or built, err=%s", Dockerimage, err.Error())) } } } else { logger.Debug(1, fmt.Sprintf("docker image %s is already in local repository", Dockerimage)) } if dockerimage_id != image.ID { return nil, errors.New(fmt.Sprintf("error: dockerimage_id != image.ID, %s != %s (%s)", dockerimage_id, image.ID, Dockerimage)) } // tag image to make debugging easier if Dockerimage != "" { Dockerimage_array := strings.Split(Dockerimage, ":") // TODO split by colon is risky tag_opts := docker.TagImageOptions{Repo: Dockerimage_array[0], Tag: Dockerimage_array[1]} err = client.TagImage(dockerimage_id, tag_opts) if err != nil { logger.Error(fmt.Sprintf("warning: tagging of image %s with %s failed, err:", dockerimage_id, Dockerimage, err.Error())) } } pipe_output := fmt.Sprintf(" 2> %s 1> %s", conf.STDERR_FILENAME, conf.STDOUT_FILENAME) bash_command := "" if use_wrapper_script { bash_command = fmt.Sprint("/bin/bash", " ", wrapper_script_filename_docker, " ", pipe_output) } else { bash_command = fmt.Sprint(commandName, " ", strings.Join(args, " "), " ", pipe_output) } logger.Debug(1, fmt.Sprint("bash_command: ", bash_command)) // example: "/bin/bash", "-c", "bowtie2 -h 2> awe_stderr.txt 1> awe_stdout.txt" container_cmd := []string{"/bin/bash", "-c", bash_command} // TODO remove bash if possible, but is needed for piping config := docker.Config{Image: dockerimage_id, WorkingDir: conf.DOCKER_WORK_DIR, AttachStdout: true, AttachStderr: true, AttachStdin: false, Cmd: container_cmd, Volumes: map[string]struct{}{conf.DOCKER_WORK_DIR: {}}} opts := docker.CreateContainerOptions{Name: container_name, Config: &config} // *** create container (or find container ?) logger.Debug(1, fmt.Sprintf("creating docker container from image %s (%s)", Dockerimage, dockerimage_id)) container_incomplete, err := client.CreateContainer(opts) if err != nil { return nil, errors.New(fmt.Sprintf("error creating container, err=%s", err.Error())) } container_id := container_incomplete.ID logger.Debug(1, fmt.Sprintf("created docker container with ID: %s", container_id)) // *** inspect the new container if false { container, err := client.InspectContainer(container_id) if err != nil { return nil, errors.New(fmt.Sprintf("error inspecting container, err=%s", err.Error())) } //spew.Dump(container) //spew.Dump(container.Config) fmt.Println("name: ", container.Name) } // *** start container var bindarray = []string{} bindstr_workdir := work.Path() + "/:" + conf.DOCKER_WORK_DIR logger.Debug(1, "bindstr_workdir: "+bindstr_workdir) // only mount predata if it is actually used fake_predata := "" if len(work.Predata) > 0 { predata_directory := path.Join(conf.DATA_PATH, "predata") bindstr_predata := predata_directory + "/:" + "/db:ro" // TODO put in config logger.Debug(1, "bindstr_predata: "+bindstr_predata) fake_predata = " -v " + bindstr_predata bindarray = []string{bindstr_workdir, bindstr_predata} } else { bindarray = []string{bindstr_workdir} } fake_docker_cmd := "sudo docker run -t -i --name test -v " + bindstr_workdir + fake_predata + " --workdir=" + conf.DOCKER_WORK_DIR + " " + dockerimage_id + " " + strings.Join(container_cmd, " ") logger.Debug(1, "fake_docker_cmd ("+Dockerimage+"): "+fake_docker_cmd) logger.Debug(1, "starting docker container...") err = client.StartContainer(container_id, &docker.HostConfig{Binds: bindarray}) if err != nil { return nil, errors.New(fmt.Sprintf("error starting container, id=%s, err=%s", container_id, err.Error())) } defer func(container_id string) { // *** clean up // ** kill container err_kill := client.KillContainer(docker.KillContainerOptions{ID: container_id}) if err_kill != nil { logger.Error(fmt.Sprintf("error killing container id=%s, err=%s", container_id, err_kill.Error())) } // *** remove Container opts_remove := docker.RemoveContainerOptions{ID: container_id} if err := client.RemoveContainer(opts_remove); err != nil { logger.Error(fmt.Sprintf("error removing container id=%s, err=%s", container_id, err.Error())) } else { logger.Debug(1, "(deferred func) removed docker container") } }(container_id) var status int = 0 // wait for container to finish done := make(chan error) go func() { var errwait error status, errwait = client.WaitContainer(container_id) done <- errwait // inform main function done <- errwait // inform memory checker }() var MaxMem int64 = -1 var max_memory_total_rss int64 = -1 var max_memory_total_swap int64 = -1 // e.g. /sys/fs/cgroup/memory/docker/<id>/memory.stat memory_stat_filename := path.Join(conf.CGROUP_MEMORY_DOCKER_DIR, container_id, "/memory.stat") go func() { // memory checker for { select { case err_mem := <-done: if err_mem != nil { logger.Error("channel done returned error: " + err_mem.Error()) } return default: } var memory_total_rss int64 = -1 var memory_total_swap int64 = -1 memory_stat_file, err_mem := os.Open(memory_stat_filename) if err_mem != nil { logger.Error("warning: error opening memory_stat_file file:" + err_mem.Error()) time.Sleep(conf.MEM_CHECK_INTERVAL) continue } // Closes the file when we leave the scope of the current function, // this makes sure we never forget to close the file if the // function can exit in multiple places. memory_stat_file_scanner := bufio.NewScanner(memory_stat_file) memory_total_rss_read := false memory_total_swap_read := false // scanner.Scan() advances to the next token returning false if an error was encountered for memory_stat_file_scanner.Scan() { line := memory_stat_file_scanner.Text() if strings.HasPrefix(line, "total_rss ") { // TODO what is total_rss_huge //logger.Debug(1, fmt.Sprint("inspecting container with memory line=", line)) memory_total_rss, err = strconv.ParseInt(strings.TrimPrefix(line, "total_rss "), 10, 64) if err != nil { memory_total_rss = -1 } memory_total_rss_read = true } else if strings.HasPrefix(line, "total_swap ") { // TODO what is total_rss_huge //logger.Debug(1, fmt.Sprint("inspecting container with memory line=", line)) memory_total_swap, err = strconv.ParseInt(strings.TrimPrefix(line, "total_swap "), 10, 64) if err != nil { memory_total_swap = -1 } memory_total_swap_read = true } else { continue } if memory_total_rss_read && memory_total_swap_read { // we found all information we need, leave the loop break } } // When finished scanning if any error other than io.EOF occured // it will be returned by scanner.Err(). if err := memory_stat_file_scanner.Err(); err != nil { logger.Error(fmt.Sprintf("warning: could no read memory usage from cgroups=%s", memory_stat_file_scanner.Err())) //err = nil } else { // RSS maxium if memory_total_rss >= 0 && memory_total_rss > max_memory_total_rss { max_memory_total_rss = memory_total_rss } // SWAP maximum if memory_total_swap >= 0 && memory_total_swap > max_memory_total_swap { max_memory_total_swap = memory_total_swap } // RSS+SWAP maximum if memory_total_rss >= 0 && memory_total_swap >= 0 { memory_combined := memory_total_rss + memory_total_swap if memory_combined > MaxMem { MaxMem = memory_combined } } logger.Debug(1, fmt.Sprintf("memory: rss=%d, swap=%d, max_rss=%d max_swap=%d max_combined=%d", memory_total_rss, memory_total_swap, max_memory_total_rss, max_memory_total_swap, MaxMem)) } memory_stat_file.Close() // defer does not work in for loop ! //time.Sleep(5 * time.Second) time.Sleep(conf.MEM_CHECK_INTERVAL) } }() select { case <-chankill: logger.Debug(1, fmt.Sprint("chankill, try to kill conatiner %s... ", container_id)) if err := client.KillContainer(docker.KillContainerOptions{ID: container_id}); err != nil { return nil, errors.New(fmt.Sprintf("error killing container id=%s, err=%s", container_id, err.Error())) } <-done // allow goroutine to exit return nil, errors.New("process killed as requested from chankill") case err = <-done: if err != nil { return nil, errors.New(fmt.Sprintf("wait_cmd=%s, err=%s", commandName, err.Error())) } logger.Debug(1, fmt.Sprint("(1)docker command returned with status ", status)) } logger.Debug(1, fmt.Sprint("(2)docker command returned with status ", status)) if status != 0 { logger.Debug(1, fmt.Sprint("WaitContainer returned non-zero status ", status)) return nil, errors.New(fmt.Sprintf("error WaitContainer returned non-zero status=%d", status)) } logger.Debug(1, fmt.Sprint("pstats.MaxMemUsage: ", pstats.MaxMemUsage)) pstats.MaxMemUsage = MaxMem pstats.MaxMemoryTotalRss = max_memory_total_rss pstats.MaxMemoryTotalSwap = max_memory_total_swap logger.Debug(1, fmt.Sprint("pstats.MaxMemUsage: ", pstats.MaxMemUsage)) return }