func RunWorkunit(work *core.Workunit) (err error) { args := work.Cmd.ParsedArgs //change cwd to the workunit's working directory if err := work.CDworkpath(); err != nil { return err } commandName := work.Cmd.Name cmd := exec.Command(commandName, args...) msg := fmt.Sprintf("worker: start cmd=%s, args=%v", commandName, args) fmt.Println(msg) logger.Debug(1, msg) logger.Event(event.WORK_START, "workid="+work.Id, "cmd="+commandName, fmt.Sprintf("args=%v", args)) var stdout, stderr io.ReadCloser if conf.PRINT_APP_MSG { stdout, err = cmd.StdoutPipe() if err != nil { return err } stderr, err = cmd.StderrPipe() if err != nil { return err } } if err := cmd.Start(); err != nil { return errors.New(fmt.Sprintf("start_cmd=%s, err=%s", commandName, err.Error())) } if conf.PRINT_APP_MSG { go io.Copy(os.Stdout, stdout) go io.Copy(os.Stderr, stderr) } done := make(chan error) go func() { done <- cmd.Wait() }() select { case <-chankill: if err := cmd.Process.Kill(); err != nil { fmt.Println("failed to kill" + err.Error()) } <-done // allow goroutine to exit fmt.Println("process killed") return errors.New("process killed") case err := <-done: if err != nil { return errors.New(fmt.Sprintf("wait_cmd=%s, err=%s", commandName, err.Error())) } } logger.Event(event.WORK_END, "workid="+work.Id) return }
func RunWorkunitDirect(work *core.Workunit) (pstats *core.WorkPerf, err error) { pstats = new(core.WorkPerf) args := work.Cmd.ParsedArgs //change cwd to the workunit's working directory if err := work.CDworkpath(); err != nil { return nil, err } commandName := work.Cmd.Name if commandName == "" { return nil, errors.New(fmt.Sprintf("error: command name is empty")) } cmd := exec.Command(commandName, args...) msg := fmt.Sprintf("worker: start cmd=%s, args=%v", commandName, args) fmt.Println(msg) logger.Debug(1, msg) logger.Event(event.WORK_START, "workid="+work.Id, "cmd="+commandName, fmt.Sprintf("args=%v", args)) var stdout, stderr io.ReadCloser if conf.PRINT_APP_MSG { stdout, err = cmd.StdoutPipe() if err != nil { return nil, err } stderr, err = cmd.StderrPipe() if err != nil { return nil, err } } stdoutFilePath := fmt.Sprintf("%s/%s", work.Path(), conf.STDOUT_FILENAME) stderrFilePath := fmt.Sprintf("%s/%s", work.Path(), conf.STDERR_FILENAME) outfile, err := os.Create(stdoutFilePath) defer outfile.Close() errfile, err := os.Create(stderrFilePath) defer errfile.Close() out_writer := bufio.NewWriter(outfile) defer out_writer.Flush() err_writer := bufio.NewWriter(errfile) defer err_writer.Flush() if conf.PRINT_APP_MSG { go io.Copy(out_writer, stdout) go io.Copy(err_writer, stderr) } if err := cmd.Start(); err != nil { return nil, errors.New(fmt.Sprintf("start_cmd=%s, err=%s", commandName, err.Error())) } var MaxMem uint64 = 0 done := make(chan error) memcheck_done := make(chan bool) go func() { done <- cmd.Wait() memcheck_done <- true }() mem_check_interval_here := conf.MEM_CHECK_INTERVAL if mem_check_interval_here == 0 { mem_check_interval_here = 10 * time.Second } go func() { mstats := new(runtime.MemStats) runtime.ReadMemStats(mstats) MaxMem = mstats.Alloc time.Sleep(2 * time.Second) for { select { default: mstats := new(runtime.MemStats) runtime.ReadMemStats(mstats) if mstats.Alloc > MaxMem { MaxMem = mstats.Alloc } time.Sleep(mem_check_interval_here) case <-memcheck_done: return } } }() select { case <-chankill: if err := cmd.Process.Kill(); err != nil { fmt.Println("failed to kill" + err.Error()) } <-done // allow goroutine to exit fmt.Println("process killed") return nil, errors.New("process killed") case err := <-done: if err != nil { return nil, errors.New(fmt.Sprintf("wait_cmd=%s, err=%s", commandName, err.Error())) } } logger.Event(event.WORK_END, "workid="+work.Id) pstats.MaxMemUsage = int64(MaxMem) return }
func RunWorkunitDocker(work *core.Workunit) (pstats *core.WorkPerf, err error) { pstats = new(core.WorkPerf) pstats.MaxMemUsage = -1 pstats.MaxMemoryTotalRss = -1 pstats.MaxMemoryTotalSwap = -1 args := work.Cmd.ParsedArgs //change cwd to the workunit's working directory if err := work.CDworkpath(); err != nil { return nil, err } docker_preparation_start := time.Now().Unix() commandName := work.Cmd.Name use_wrapper_script := false wrapper_script_filename := "awe_workunit_wrapper.sh" wrapper_script_filename_host := path.Join(work.Path(), wrapper_script_filename) wrapper_script_filename_docker := path.Join(conf.DOCKER_WORK_DIR, wrapper_script_filename) if len(work.Cmd.Cmd_script) > 0 { use_wrapper_script = true // create wrapper script //conf.DOCKER_WORK_DIR var wrapper_content_string = "#!/bin/bash\n" + strings.Join(work.Cmd.Cmd_script, "\n") + "\n" logger.Debug(1, fmt.Sprintf("write wrapper script: %s\n%s", wrapper_script_filename_host, strings.Join(work.Cmd.Cmd_script, ", "))) var wrapper_content_bytes = []byte(wrapper_content_string) err = ioutil.WriteFile(wrapper_script_filename_host, wrapper_content_bytes, 0755) // not executable: 0644 if err != nil { return nil, errors.New(fmt.Sprintf("error writing wrapper script, err=%s", err.Error())) } } //cmd := exec.Command(commandName, args...) container_name := "AWE_workunit" Dockerimage := work.Cmd.Dockerimage if work.App != nil && work.App.Name != "" { Dockerimage = work.App.AppDef.Dockerimage } if Dockerimage == "" { return nil, errors.New(fmt.Sprintf("Error Dockerimage string empty")) } logger.Debug(1, fmt.Sprintf("Dockerimage: %s", Dockerimage)) use_docker_api := true if conf.DOCKER_BINARY != "API" { use_docker_api = false } var client *docker.Client = nil if use_docker_api { logger.Debug(1, fmt.Sprintf("Using docker API...")) client, err = docker.NewClient(conf.DOCKER_SOCKET) if err != nil { return nil, errors.New(fmt.Sprintf("error creating docker client", err.Error())) } } else { logger.Debug(1, fmt.Sprintf("Using docker docker binary...")) } //imgs, _ := client.ListImages(false) //for _, img := range imgs { // spew.Dump(img) //} // delete any old AWE_container err = RemoveOldAWEContainers(client, container_name) if err != nil { return nil, err } //var node *core.ShockNode = nil // find image in repo (e.g. extract docker image id) node, dockerimage_download_url, err := findDockerImageInShock(Dockerimage, work.Info.DataToken) if err != nil { return nil, errors.New(fmt.Sprintf("Error getting docker url, err=%s", err.Error())) } // TODO attr_json, _ := json.Marshal(node.Attributes) might be better node_attr_map, ok := node.Attributes.(map[string]interface{}) if !ok { return nil, errors.New(fmt.Sprintf("(1) could not type assert Shock_Dockerimage_attributes, Dockerimage=%s", Dockerimage)) } dockerimage_id, ok := node_attr_map["id"].(string) if !ok { return nil, errors.New(fmt.Sprintf("(2) could not type assert Shock_Dockerimage_attributes, Dockerimage=%s", Dockerimage)) } if dockerimage_id == "" { return nil, errors.New(fmt.Sprintf("Id of Dockerimage=%s not found", Dockerimage)) } logger.Debug(1, fmt.Sprintf("using dockerimage id %s instead of name %s ", dockerimage_id, Dockerimage)) // *** find/inspect image image, err := InspectImage(client, dockerimage_id) if err != nil { logger.Debug(1, fmt.Sprintf("docker image %s is not yet in local repository", Dockerimage)) image_retrieval := "load" // TODO only load is guaraneed to work switch { case image_retrieval == "load": { // for images that have been saved err = dockerLoadImage(client, dockerimage_download_url, work.Info.DataToken) } case image_retrieval == "import": { // for containers that have been exported err = dockerImportImage(client, Dockerimage, work.Info.DataToken) } case image_retrieval == "build": { // to create image from Dockerfile err = dockerBuildImage(client, Dockerimage) } } if err != nil { return nil, errors.New(fmt.Sprintf("Docker image was not correctly imported or built, err=%s", err.Error())) } // example urls // find image : http://shock.metagenomics.anl.gov/node/?query&docker=1&tag=wgerlach/bowtie2:2.2.0 // view node: http://shock.metagenomics.anl.gov/node/ed0a6b20-c535-40d7-92e8-754bb8b6b48f // download http://shock.metagenomics.anl.gov/node/ed0a6b20-c535-40d7-92e8-754bb8b6b48f?download if node != nil { } // last test if dockerimage_id != "" { image, err = InspectImage(client, dockerimage_id) if err != nil { return nil, errors.New(fmt.Sprintf("(InspectImage) Docker image (%s , %s) was not correctly imported or built, err=%s", Dockerimage, dockerimage_id, err.Error())) } } else { image, err = InspectImage(client, Dockerimage) if err != nil { return nil, errors.New(fmt.Sprintf("(InspectImage) Docker image (%s) was not correctly imported or built, err=%s", Dockerimage, err.Error())) } } } else { logger.Debug(1, fmt.Sprintf("docker image %s is already in local repository", Dockerimage)) } if dockerimage_id != image.ID { return nil, errors.New(fmt.Sprintf("error: dockerimage_id != image.ID, %s != %s (%s)", dockerimage_id, image.ID, Dockerimage)) } // tag image to make debugging easier if Dockerimage != "" { Dockerimage_array := strings.Split(Dockerimage, ":") // TODO split by colon is risky tag_opts := docker.TagImageOptions{Repo: Dockerimage_array[0], Tag: Dockerimage_array[1]} err = TagImage(client, dockerimage_id, tag_opts) if err != nil { logger.Error(fmt.Sprintf("warning: tagging of image %s with %s failed, err:", dockerimage_id, Dockerimage, err.Error())) } } // collect environment var docker_environment []string docker_environment_string := "" // this is only for the debug output for key, val := range work.Cmd.Environ.Public { env_pair := key + "=" + val docker_environment = append(docker_environment, env_pair) docker_environment_string += " --env=" + env_pair } if work.Cmd.HasPrivateEnv { private_envs, err := FetchPrivateEnvByWorkId(work.Id) if err != nil { return nil, err } for key, val := range private_envs { env_pair := key + "=" + val docker_environment = append(docker_environment, env_pair) docker_environment_string += " -e " + env_pair } } pipe_output := fmt.Sprintf(" 2> %s 1> %s", conf.STDERR_FILENAME, conf.STDOUT_FILENAME) bash_command := "" if use_wrapper_script { //bash_command = fmt.Sprint("/bin/bash", " ", wrapper_script_filename_docker, " ", pipe_output) // bash for wrapper script bash_command = fmt.Sprint(wrapper_script_filename_docker, " ", pipe_output) } else { bash_command = fmt.Sprint(commandName, " ", strings.Join(args, " "), " ", pipe_output) } logger.Debug(1, fmt.Sprint("bash_command: ", bash_command)) // example: "/bin/bash", "-c", "bowtie2 -h 2> awe_stderr.txt 1> awe_stdout.txt" container_cmd := []string{"/bin/bash", "-c", bash_command} // TODO remove bash if possible, but is needed for piping //var empty_struct struct{} bindstr_workdir := work.Path() + "/:" + conf.DOCKER_WORK_DIR logger.Debug(1, "bindstr_workdir: "+bindstr_workdir) var bindarray = []string{} // only mount predata if it is actually used //fake_predata := "" bindstr_predata := "" volume_str := "" if len(work.Predata) > 0 { predata_directory := path.Join(conf.DATA_PATH, "predata") bindstr_predata = predata_directory + "/:" + conf.DOCKER_WORKUNIT_PREDATA_DIR + ":ro" bindarray = []string{bindstr_workdir, bindstr_predata} //old version volume_str = "--volume=" + bindstr_workdir + " --volume=" + bindstr_predata } else { bindarray = []string{bindstr_workdir} volume_str = "--volume=" + bindstr_workdir } logger.Debug(1, "volume_str: "+volume_str) // version for docker command line docker_commandline_create := []string{ // "-t" would be required if I want to attach to the container later, check again documentation if needed "--name=" + container_name, "--workdir=" + conf.DOCKER_WORK_DIR, volume_str, // for workdir and optionally predata } if docker_environment_string != "" { docker_commandline_create = append(docker_commandline_create, docker_environment_string) } // version for docker API config := docker.Config{Image: dockerimage_id, WorkingDir: conf.DOCKER_WORK_DIR, AttachStdout: true, AttachStderr: true, AttachStdin: false, Cmd: container_cmd, //Volumes: map[string]struct{}{conf.DOCKER_WORK_DIR: struct{}{}}, // old version Volumes: map[string]struct{}{bindstr_workdir: struct{}{}}, Env: docker_environment, } if len(work.Predata) > 0 { config.Volumes[bindstr_predata] = struct{}{} } docker_commandline_create = append(docker_commandline_create, dockerimage_id) // docker_commandline_create = append(docker_commandline_create, container_cmd...) // argument to the "docker create" command opts := docker.CreateContainerOptions{Name: container_name, Config: &config} // note: docker binary mounts on creation, while docker API mounts on start of container container_id := "" // *** create container logger.Debug(1, fmt.Sprintf("creating docker container from image %s (%s)", Dockerimage, dockerimage_id)) if client != nil { container_obj, err := client.CreateContainer(opts) if err == nil { container_id = container_obj.ID } else { return nil, errors.New(fmt.Sprintf("error creating container, err=%s", err.Error())) } } else { container_id, err = CreateContainer(docker_commandline_create) if err != nil { return nil, errors.New(fmt.Sprintf("error creating container, err=%s", err.Error())) } } if container_id == "" { return nil, errors.New(fmt.Sprintf("error creating container, container_id is empty")) } logger.Debug(1, fmt.Sprintf("created docker container with ID: %s", container_id)) // *** start container fake_docker_cmd := "sudo docker run -t -i --name test " + volume_str + " " + docker_environment_string + " --workdir=" + conf.DOCKER_WORK_DIR + " " + dockerimage_id + " " + strings.Join(container_cmd, " ") logger.Debug(1, "fake_docker_cmd ("+Dockerimage+"): "+fake_docker_cmd) logger.Debug(1, "starting docker container...") docker_preparation_end := time.Now().Unix() pstats.DockerPrep = docker_preparation_end - docker_preparation_start logger.Debug(1, fmt.Sprintf("DockerPrep time in seconds: %d", pstats.DockerPrep)) if client != nil { err = client.StartContainer(container_id, &docker.HostConfig{Binds: bindarray}) // weired, seems to be needed //err = client.StartContainer(container_id, &docker.HostConfig{}) } else { err = StartContainer(container_id, volume_str) } if err != nil { return nil, errors.New(fmt.Sprintf("error starting container, id=%s, err=%s", container_id, err.Error())) } defer func(container_id string) { // *** clean up // ** kill container var err_kill error if client != nil { err_kill = client.KillContainer(docker.KillContainerOptions{ID: container_id}) } else { err_kill = KillContainer(container_id) } if err_kill != nil { logger.Error(fmt.Sprintf("error killing container id=%s, err=%s", container_id, err_kill.Error())) } // *** remove Container var err error if client != nil { opts_remove := docker.RemoveContainerOptions{ID: container_id} err = client.RemoveContainer(opts_remove) } else { err = RemoveContainer(container_id) } if err != nil { logger.Error(fmt.Sprintf("error removing container id=%s, err=%s", container_id, err.Error())) } else { logger.Debug(1, "(deferred func) removed docker container") } }(container_id) if client != nil { cont, err := client.InspectContainer(container_id) if err != nil { logger.Error(fmt.Sprintf("error inspecting container=%s, err=%s", container_id, err.Error())) } inspect_filename := path.Join(work.Path(), "container_inspect.json") b_inspect, _ := json.MarshalIndent(cont, "", " ") err = ioutil.WriteFile(inspect_filename, b_inspect, 0666) if err != nil { logger.Error(fmt.Sprintf("error writing inspect file for container=%s, err=%s", container_id, err.Error())) } else { logger.Debug(1, fmt.Sprintf("wrote %s for container %s", inspect_filename, container_id)) } } var status int = 0 // wait for container to finish done := make(chan error) go func() { var errwait error if client != nil { status, errwait = client.WaitContainer(container_id) } else { status, errwait = WaitContainer(container_id) } done <- errwait // inform main function if conf.MEM_CHECK_INTERVAL != 0 { done <- errwait // inform memory checker } }() var MaxMem int64 = -1 var max_memory_total_rss int64 = -1 var max_memory_total_swap int64 = -1 memory_stat_filename := "" if conf.MEM_CHECK_INTERVAL != 0 { // documentation: https://docs.docker.com/articles/runmetrics/ // e.g. ubuntu: /sys/fs/cgroup/memory/docker/[ID]/memory.stat // coreos: /sys/fs/cgroup/memory/system.slice/docker-[ID].scope/memory.stat memory_stat_filename = strings.Replace(conf.CGROUP_MEMORY_DOCKER_DIR, "[ID]", container_id, -1) if _, err := os.Stat(memory_stat_filename); os.IsNotExist(err) { logger.Error("warning: memory measurement requested, but no memory.stat found: %s" + memory_stat_filename) memory_stat_filename = "" } } if conf.MEM_CHECK_INTERVAL != 0 && memory_stat_filename != "" { go func() { // memory checker for { select { case err_mem := <-done: if err_mem != nil { logger.Error("channel done returned error: " + err_mem.Error()) } return default: } var memory_total_rss int64 = -1 var memory_total_swap int64 = -1 memory_stat_file, err_mem := os.Open(memory_stat_filename) if err_mem != nil { logger.Error("warning: error opening memory_stat_file file:" + err_mem.Error()) time.Sleep(conf.MEM_CHECK_INTERVAL) continue } // Closes the file when we leave the scope of the current function, // this makes sure we never forget to close the file if the // function can exit in multiple places. memory_stat_file_scanner := bufio.NewScanner(memory_stat_file) memory_total_rss_read := false memory_total_swap_read := false // scanner.Scan() advances to the next token returning false if an error was encountered for memory_stat_file_scanner.Scan() { line := memory_stat_file_scanner.Text() if strings.HasPrefix(line, "total_rss ") { // TODO what is total_rss_huge //logger.Debug(1, fmt.Sprint("inspecting container with memory line=", line)) memory_total_rss, err = strconv.ParseInt(strings.TrimPrefix(line, "total_rss "), 10, 64) if err != nil { memory_total_rss = -1 } memory_total_rss_read = true } else if strings.HasPrefix(line, "total_swap ") { // TODO what is total_rss_huge //logger.Debug(1, fmt.Sprint("inspecting container with memory line=", line)) memory_total_swap, err = strconv.ParseInt(strings.TrimPrefix(line, "total_swap "), 10, 64) if err != nil { memory_total_swap = -1 } memory_total_swap_read = true } else { continue } if memory_total_rss_read && memory_total_swap_read { // we found all information we need, leave the loop break } } // When finished scanning if any error other than io.EOF occured // it will be returned by scanner.Err(). if err := memory_stat_file_scanner.Err(); err != nil { logger.Error(fmt.Sprintf("warning: could no read memory usage from cgroups=%s", memory_stat_file_scanner.Err())) //err = nil } else { // RSS maxium if memory_total_rss >= 0 && memory_total_rss > max_memory_total_rss { max_memory_total_rss = memory_total_rss } // SWAP maximum if memory_total_swap >= 0 && memory_total_swap > max_memory_total_swap { max_memory_total_swap = memory_total_swap } // RSS+SWAP maximum if memory_total_rss >= 0 && memory_total_swap >= 0 { memory_combined := memory_total_rss + memory_total_swap if memory_combined > MaxMem { MaxMem = memory_combined } } logger.Debug(1, fmt.Sprintf("memory: rss=%d, swap=%d, max_rss=%d max_swap=%d max_combined=%d", memory_total_rss, memory_total_swap, max_memory_total_rss, max_memory_total_swap, MaxMem)) } memory_stat_file.Close() // defer does not work in for loop ! //time.Sleep(5 * time.Second) time.Sleep(conf.MEM_CHECK_INTERVAL) } }() } else { logger.Debug(1, "memory checking disabled") } select { case <-chankill: logger.Debug(1, fmt.Sprint("chankill, try to kill conatiner %s... ", container_id)) if client != nil { err = client.KillContainer(docker.KillContainerOptions{ID: container_id}) } else { err = KillContainer(container_id) } if err != nil { return nil, errors.New(fmt.Sprintf("error killing container id=%s, err=%s", container_id, err.Error())) } <-done // allow goroutine to exit return nil, errors.New("process killed as requested from chankill") case err = <-done: logger.Debug(1, fmt.Sprint("(1)docker wait returned with status ", status)) if err != nil { return nil, errors.New(fmt.Sprintf("dockerWait=%s, err=%s", commandName, err.Error())) } } logger.Debug(1, fmt.Sprint("(2)docker wait returned with status ", status)) if status != 0 { logger.Debug(1, fmt.Sprint("WaitContainer returned non-zero status ", status)) return nil, errors.New(fmt.Sprintf("error WaitContainer returned non-zero status=%d", status)) } logger.Debug(1, fmt.Sprint("pstats.MaxMemUsage: ", pstats.MaxMemUsage)) pstats.MaxMemUsage = MaxMem pstats.MaxMemoryTotalRss = max_memory_total_rss pstats.MaxMemoryTotalSwap = max_memory_total_swap logger.Debug(1, fmt.Sprint("pstats.MaxMemUsage: ", pstats.MaxMemUsage)) return }
func RunWorkunitDocker(work *core.Workunit) (pstats *core.WorkPerf, err error) { pstats = new(core.WorkPerf) pstats.MaxMemUsage = -1 pstats.MaxMemoryTotalRss = -1 pstats.MaxMemoryTotalSwap = -1 args := work.Cmd.ParsedArgs //change cwd to the workunit's working directory if err := work.CDworkpath(); err != nil { return nil, err } commandName := work.Cmd.Name use_wrapper_script := false wrapper_script_filename := "awe_workunit_wrapper.sh" wrapper_script_filename_host := path.Join(work.Path(), wrapper_script_filename) wrapper_script_filename_docker := path.Join(conf.DOCKER_WORK_DIR, wrapper_script_filename) if strings.HasPrefix(commandName, "app:") { if len(work.Cmd.ParsedArgs) > 0 { use_wrapper_script = true // create wrapper script //conf.DOCKER_WORK_DIR var wrapper_content_string = "#!/bin/bash\n" + strings.Join(work.Cmd.Cmd_script, "\n") + "\n" logger.Debug(1, fmt.Sprintf("write wrapper script: %s\n%s", wrapper_script_filename_host, strings.Join(work.Cmd.Cmd_script, ", "))) var wrapper_content_bytes = []byte(wrapper_content_string) err = ioutil.WriteFile(wrapper_script_filename_host, wrapper_content_bytes, 0644) if err != nil { return nil, errors.New(fmt.Sprintf("error writing wrapper script, err=%s", err.Error())) } } } //cmd := exec.Command(commandName, args...) container_name := "AWE_workunit" Dockerimage := work.Cmd.Dockerimage logger.Debug(1, fmt.Sprintf("Dockerimage: %s", Dockerimage)) endpoint := "unix:///var/run/docker.sock" client, err := docker.NewClient(endpoint) if err != nil { return nil, errors.New(fmt.Sprintf("error creating docker client", err.Error())) } //imgs, _ := client.ListImages(false) //for _, img := range imgs { // spew.Dump(img) //} // delete any old AWE_container err = RemoveOldAWEContainers(client, container_name) if err != nil { return nil, err } //var node *core.ShockNode = nil // find image in repo (e.g. extract docker image id) node, dockerimage_download_url, err := findDockerImageInShock(Dockerimage) if err != nil { return nil, errors.New(fmt.Sprintf("Error getting docker url, err=%s", err.Error())) } // TODO attr_json, _ := json.Marshal(node.Attributes) might be better node_attr_map, ok := node.Attributes.(map[string]interface{}) if !ok { return nil, errors.New(fmt.Sprintf("(1) could not type assert Shock_Dockerimage_attributes, Dockerimage=%s", Dockerimage)) } dockerimage_id, ok := node_attr_map["id"].(string) if !ok { return nil, errors.New(fmt.Sprintf("(2) could not type assert Shock_Dockerimage_attributes, Dockerimage=%s", Dockerimage)) } if dockerimage_id == "" { return nil, errors.New(fmt.Sprintf("Id of Dockerimage=%s not found", Dockerimage)) } logger.Debug(1, fmt.Sprintf("using dockerimage id %s instead of name %s ", dockerimage_id, Dockerimage)) // *** find/inspect image image, err := client.InspectImage(dockerimage_id) if err != nil { logger.Debug(1, fmt.Sprintf("docker image %s is not yet in local repository", Dockerimage)) image_retrieval := "load" // TODO only load is guaraneed to work switch { case image_retrieval == "load": { // for images that have been saved err = dockerLoadImage(client, dockerimage_download_url) } case image_retrieval == "import": { // for containers that have been exported err = dockerImportImage(client, Dockerimage) } case image_retrieval == "build": { // to create image from Dockerfile err = dockerBuildImage(client, Dockerimage) } } if err != nil { return nil, errors.New(fmt.Sprintf("Docker image was not correctly imported or built, err=%s", err.Error())) } // example urls // find image : http://shock.metagenomics.anl.gov/node/?query&docker=1&tag=wgerlach/bowtie2:2.2.0 // view node: http://shock.metagenomics.anl.gov/node/ed0a6b20-c535-40d7-92e8-754bb8b6b48f // download http://shock.metagenomics.anl.gov/node/ed0a6b20-c535-40d7-92e8-754bb8b6b48f?download if node != nil { } // last test if dockerimage_id != "" { image, err = client.InspectImage(dockerimage_id) if err != nil { return nil, errors.New(fmt.Sprintf("(InspectImage) Docker image (%s , %s) was not correctly imported or built, err=%s", Dockerimage, dockerimage_id, err.Error())) } } else { image, err = client.InspectImage(Dockerimage) if err != nil { return nil, errors.New(fmt.Sprintf("(InspectImage) Docker image (%s) was not correctly imported or built, err=%s", Dockerimage, err.Error())) } } } else { logger.Debug(1, fmt.Sprintf("docker image %s is already in local repository", Dockerimage)) } if dockerimage_id != image.ID { return nil, errors.New(fmt.Sprintf("error: dockerimage_id != image.ID, %s != %s (%s)", dockerimage_id, image.ID, Dockerimage)) } // tag image to make debugging easier if Dockerimage != "" { Dockerimage_array := strings.Split(Dockerimage, ":") // TODO split by colon is risky tag_opts := docker.TagImageOptions{Repo: Dockerimage_array[0], Tag: Dockerimage_array[1]} err = client.TagImage(dockerimage_id, tag_opts) if err != nil { logger.Error(fmt.Sprintf("warning: tagging of image %s with %s failed, err:", dockerimage_id, Dockerimage, err.Error())) } } pipe_output := fmt.Sprintf(" 2> %s 1> %s", conf.STDERR_FILENAME, conf.STDOUT_FILENAME) bash_command := "" if use_wrapper_script { bash_command = fmt.Sprint("/bin/bash", " ", wrapper_script_filename_docker, " ", pipe_output) } else { bash_command = fmt.Sprint(commandName, " ", strings.Join(args, " "), " ", pipe_output) } logger.Debug(1, fmt.Sprint("bash_command: ", bash_command)) // example: "/bin/bash", "-c", "bowtie2 -h 2> awe_stderr.txt 1> awe_stdout.txt" container_cmd := []string{"/bin/bash", "-c", bash_command} // TODO remove bash if possible, but is needed for piping config := docker.Config{Image: dockerimage_id, WorkingDir: conf.DOCKER_WORK_DIR, AttachStdout: true, AttachStderr: true, AttachStdin: false, Cmd: container_cmd, Volumes: map[string]struct{}{conf.DOCKER_WORK_DIR: {}}} opts := docker.CreateContainerOptions{Name: container_name, Config: &config} // *** create container (or find container ?) logger.Debug(1, fmt.Sprintf("creating docker container from image %s (%s)", Dockerimage, dockerimage_id)) container_incomplete, err := client.CreateContainer(opts) if err != nil { return nil, errors.New(fmt.Sprintf("error creating container, err=%s", err.Error())) } container_id := container_incomplete.ID logger.Debug(1, fmt.Sprintf("created docker container with ID: %s", container_id)) // *** inspect the new container if false { container, err := client.InspectContainer(container_id) if err != nil { return nil, errors.New(fmt.Sprintf("error inspecting container, err=%s", err.Error())) } //spew.Dump(container) //spew.Dump(container.Config) fmt.Println("name: ", container.Name) } // *** start container var bindarray = []string{} bindstr_workdir := work.Path() + "/:" + conf.DOCKER_WORK_DIR logger.Debug(1, "bindstr_workdir: "+bindstr_workdir) // only mount predata if it is actually used fake_predata := "" if len(work.Predata) > 0 { predata_directory := path.Join(conf.DATA_PATH, "predata") bindstr_predata := predata_directory + "/:" + "/db:ro" // TODO put in config logger.Debug(1, "bindstr_predata: "+bindstr_predata) fake_predata = " -v " + bindstr_predata bindarray = []string{bindstr_workdir, bindstr_predata} } else { bindarray = []string{bindstr_workdir} } fake_docker_cmd := "sudo docker run -t -i --name test -v " + bindstr_workdir + fake_predata + " --workdir=" + conf.DOCKER_WORK_DIR + " " + dockerimage_id + " " + strings.Join(container_cmd, " ") logger.Debug(1, "fake_docker_cmd ("+Dockerimage+"): "+fake_docker_cmd) logger.Debug(1, "starting docker container...") err = client.StartContainer(container_id, &docker.HostConfig{Binds: bindarray}) if err != nil { return nil, errors.New(fmt.Sprintf("error starting container, id=%s, err=%s", container_id, err.Error())) } defer func(container_id string) { // *** clean up // ** kill container err_kill := client.KillContainer(docker.KillContainerOptions{ID: container_id}) if err_kill != nil { logger.Error(fmt.Sprintf("error killing container id=%s, err=%s", container_id, err_kill.Error())) } // *** remove Container opts_remove := docker.RemoveContainerOptions{ID: container_id} if err := client.RemoveContainer(opts_remove); err != nil { logger.Error(fmt.Sprintf("error removing container id=%s, err=%s", container_id, err.Error())) } else { logger.Debug(1, "(deferred func) removed docker container") } }(container_id) var status int = 0 // wait for container to finish done := make(chan error) go func() { var errwait error status, errwait = client.WaitContainer(container_id) done <- errwait // inform main function done <- errwait // inform memory checker }() var MaxMem int64 = -1 var max_memory_total_rss int64 = -1 var max_memory_total_swap int64 = -1 // e.g. /sys/fs/cgroup/memory/docker/<id>/memory.stat memory_stat_filename := path.Join(conf.CGROUP_MEMORY_DOCKER_DIR, container_id, "/memory.stat") go func() { // memory checker for { select { case err_mem := <-done: if err_mem != nil { logger.Error("channel done returned error: " + err_mem.Error()) } return default: } var memory_total_rss int64 = -1 var memory_total_swap int64 = -1 memory_stat_file, err_mem := os.Open(memory_stat_filename) if err_mem != nil { logger.Error("warning: error opening memory_stat_file file:" + err_mem.Error()) time.Sleep(conf.MEM_CHECK_INTERVAL) continue } // Closes the file when we leave the scope of the current function, // this makes sure we never forget to close the file if the // function can exit in multiple places. memory_stat_file_scanner := bufio.NewScanner(memory_stat_file) memory_total_rss_read := false memory_total_swap_read := false // scanner.Scan() advances to the next token returning false if an error was encountered for memory_stat_file_scanner.Scan() { line := memory_stat_file_scanner.Text() if strings.HasPrefix(line, "total_rss ") { // TODO what is total_rss_huge //logger.Debug(1, fmt.Sprint("inspecting container with memory line=", line)) memory_total_rss, err = strconv.ParseInt(strings.TrimPrefix(line, "total_rss "), 10, 64) if err != nil { memory_total_rss = -1 } memory_total_rss_read = true } else if strings.HasPrefix(line, "total_swap ") { // TODO what is total_rss_huge //logger.Debug(1, fmt.Sprint("inspecting container with memory line=", line)) memory_total_swap, err = strconv.ParseInt(strings.TrimPrefix(line, "total_swap "), 10, 64) if err != nil { memory_total_swap = -1 } memory_total_swap_read = true } else { continue } if memory_total_rss_read && memory_total_swap_read { // we found all information we need, leave the loop break } } // When finished scanning if any error other than io.EOF occured // it will be returned by scanner.Err(). if err := memory_stat_file_scanner.Err(); err != nil { logger.Error(fmt.Sprintf("warning: could no read memory usage from cgroups=%s", memory_stat_file_scanner.Err())) //err = nil } else { // RSS maxium if memory_total_rss >= 0 && memory_total_rss > max_memory_total_rss { max_memory_total_rss = memory_total_rss } // SWAP maximum if memory_total_swap >= 0 && memory_total_swap > max_memory_total_swap { max_memory_total_swap = memory_total_swap } // RSS+SWAP maximum if memory_total_rss >= 0 && memory_total_swap >= 0 { memory_combined := memory_total_rss + memory_total_swap if memory_combined > MaxMem { MaxMem = memory_combined } } logger.Debug(1, fmt.Sprintf("memory: rss=%d, swap=%d, max_rss=%d max_swap=%d max_combined=%d", memory_total_rss, memory_total_swap, max_memory_total_rss, max_memory_total_swap, MaxMem)) } memory_stat_file.Close() // defer does not work in for loop ! //time.Sleep(5 * time.Second) time.Sleep(conf.MEM_CHECK_INTERVAL) } }() select { case <-chankill: logger.Debug(1, fmt.Sprint("chankill, try to kill conatiner %s... ", container_id)) if err := client.KillContainer(docker.KillContainerOptions{ID: container_id}); err != nil { return nil, errors.New(fmt.Sprintf("error killing container id=%s, err=%s", container_id, err.Error())) } <-done // allow goroutine to exit return nil, errors.New("process killed as requested from chankill") case err = <-done: if err != nil { return nil, errors.New(fmt.Sprintf("wait_cmd=%s, err=%s", commandName, err.Error())) } logger.Debug(1, fmt.Sprint("(1)docker command returned with status ", status)) } logger.Debug(1, fmt.Sprint("(2)docker command returned with status ", status)) if status != 0 { logger.Debug(1, fmt.Sprint("WaitContainer returned non-zero status ", status)) return nil, errors.New(fmt.Sprintf("error WaitContainer returned non-zero status=%d", status)) } logger.Debug(1, fmt.Sprint("pstats.MaxMemUsage: ", pstats.MaxMemUsage)) pstats.MaxMemUsage = MaxMem pstats.MaxMemoryTotalRss = max_memory_total_rss pstats.MaxMemoryTotalSwap = max_memory_total_swap logger.Debug(1, fmt.Sprint("pstats.MaxMemUsage: ", pstats.MaxMemUsage)) return }