//fetch input data func MoveInputData(work *core.Workunit) (size int64, err error) { for inputname, io := range work.Inputs { // skip if NoFile == true if !io.NoFile { // is file ! var dataUrl string inputFilePath := fmt.Sprintf("%s/%s", work.Path(), inputname) if work.Rank == 0 { if conf.CACHE_ENABLED && io.Node != "" { if file_path, err := StatCacheFilePath(io.Node); err == nil { //make a link in work dir from cached file linkname := fmt.Sprintf("%s/%s", work.Path(), inputname) fmt.Printf("input found in cache, making link: " + file_path + " -> " + linkname + "\n") err = os.Symlink(file_path, linkname) if err == nil { logger.Event(event.FILE_READY, "workid="+work.Id+";url="+dataUrl) } return 0, err } } dataUrl = io.DataUrl() } else { dataUrl = fmt.Sprintf("%s&index=%s&part=%s", io.DataUrl(), work.IndexType(), work.Part()) } logger.Debug(2, "mover: fetching input file from url:"+dataUrl) logger.Event(event.FILE_IN, "workid="+work.Id+";url="+dataUrl) // download file if datamoved, err := shock.FetchFile(inputFilePath, dataUrl, work.Info.DataToken, io.Uncompress); err != nil { return size, err } else { size += datamoved } logger.Event(event.FILE_READY, "workid="+work.Id+";url="+dataUrl) } // download node attributes if requested if io.AttrFile != "" { // get node node, err := shock.ShockGet(io.Host, io.Node, work.Info.DataToken) if err != nil { return size, err } logger.Debug(2, "mover: fetching input attributes from node:"+node.Id) logger.Event(event.ATTR_IN, "workid="+work.Id+";node="+node.Id) // print node attributes attrFilePath := fmt.Sprintf("%s/%s", work.Path(), io.AttrFile) attr_json, _ := json.Marshal(node.Attributes) if err := ioutil.WriteFile(attrFilePath, attr_json, 0644); err != nil { return size, err } logger.Event(event.ATTR_READY, "workid="+work.Id+";path="+attrFilePath) } } return }
//fetch input data func moveInputData(work *core.Workunit) (size int64, err error) { for _, io := range work.Inputs { inputname := io.FileName dataUrl, uerr := io.DataUrl() if uerr != nil { return 0, uerr } if work.Rank > 0 { dataUrl = fmt.Sprintf("%s&index=%s&part=%s", dataUrl, work.IndexType(), work.Part()) } inputFilePath := path.Join(work.Path(), inputname) logger.Debug(2, "mover: fetching input from url:"+dataUrl) logger.Event(event.FILE_IN, "workid="+work.Id+" url="+dataUrl) // this gets file from any downloadable url, not just shock if datamoved, _, err := shock.FetchFile(inputFilePath, dataUrl, work.Info.DataToken, io.Uncompress, false); err != nil { return size, err } else { size += datamoved } logger.Event(event.FILE_READY, "workid="+work.Id+";url="+dataUrl) } return }
//parse workunit, fetch input data, compose command arguments func ParseWorkunitArgs(work *core.Workunit) (args []string, err error) { argstr := work.Cmd.Args if argstr == "" { return } argList := strings.Fields(argstr) inputsMap := work.Inputs for _, arg := range argList { if strings.Contains(arg, "@") { //parse input/output to accessible local file segs := strings.Split(arg, "@") if len(segs) > 2 { return []string{}, errors.New("invalid format in command args, multiple @ within one arg") } inputname := segs[1] if inputsMap.Has(inputname) { io := inputsMap[inputname] var dataUrl string if work.Rank == 0 { dataUrl = io.DataUrl() } else { dataUrl = fmt.Sprintf("%s&index=%s&part=%s", io.DataUrl(), work.IndexType(), work.Part()) } inputFilePath := fmt.Sprintf("%s/%s", work.Path(), inputname) logger.Debug(2, "mover: fetching input from url:"+dataUrl) logger.Event(event.FILE_IN, "workid="+work.Id+" url="+dataUrl) if err := fetchFile(inputFilePath, dataUrl, work.Info.DataToken); err != nil { //get file from Shock return []string{}, err } logger.Event(event.FILE_READY, "workid="+work.Id+" url="+dataUrl) parsedArg := fmt.Sprintf("%s%s", segs[0], inputFilePath) args = append(args, parsedArg) } } else { //no @, has nothing to do with input/output, append directly args = append(args, arg) } } return args, nil }
//fetch input data func moveInputData(work *core.Workunit) (size int64, err error) { for inputname, io := range work.Inputs { var dataUrl string if work.Rank == 0 { dataUrl = io.DataUrl() } else { dataUrl = fmt.Sprintf("%s&index=%s&part=%s", io.DataUrl(), work.IndexType(), work.Part()) } inputFilePath := path.Join(work.Path(), inputname) logger.Debug(2, "mover: fetching input from url:"+dataUrl) logger.Event(event.FILE_IN, "workid="+work.Id+" url="+dataUrl) if datamoved, err := shock.FetchFile(inputFilePath, dataUrl, work.Info.DataToken, io.Uncompress); err != nil { return size, err } else { size += datamoved } logger.Event(event.FILE_READY, "workid="+work.Id+";url="+dataUrl) } return }
//fetch prerequisite data (e.g. reference dbs) func movePreData(workunit *core.Workunit) (size int64, err error) { for _, io := range workunit.Predata { name := io.FileName predata_directory := path.Join(conf.DATA_PATH, "predata") err = os.MkdirAll(predata_directory, 755) if err != nil { return 0, errors.New("error creating predata_directory: " + err.Error()) } file_path := path.Join(predata_directory, name) dataUrl, uerr := io.DataUrl() if uerr != nil { return 0, uerr } // get shock and local md5sums isShockPredata := true node_md5 := "" if io.Node == "-" { isShockPredata = false } else { node, err := shock.ShockGet(io.Host, io.Node, workunit.Info.DataToken) if err != nil { return 0, errors.New("error in ShockGet: " + err.Error()) } // rename file to be md5sum node_md5 = node.File.Checksum["md5"] file_path = path.Join(predata_directory, node_md5) } // file does not exist or its md5sum is wrong if !isFileExisting(file_path) { logger.Debug(2, "mover: fetching predata from url: "+dataUrl) logger.Event(event.PRE_IN, "workid="+workunit.Id+" url="+dataUrl) var md5sum string file_path_part := file_path + ".part" // temporary name // this gets file from any downloadable url, not just shock size, md5sum, err = shock.FetchFile(file_path_part, dataUrl, workunit.Info.DataToken, io.Uncompress, isShockPredata) if err != nil { return 0, errors.New("error in fetchFile: " + err.Error()) } os.Rename(file_path_part, file_path) if err != nil { return 0, errors.New("error renaming after download of preData: " + err.Error()) } if isShockPredata { if node_md5 != md5sum { return 0, errors.New("error downloaded file md5 does not mach shock md5, node: " + io.Node) } else { logger.Debug(2, "mover: predata "+name+" has md5sum "+md5sum) } } } else { logger.Debug(2, "mover: predata already exists: "+name) } // timstamp for last access - future caching accessfile, err := os.Create(file_path + ".access") if err != nil { return 0, errors.New("error creating predata access file: " + err.Error()) } defer accessfile.Close() accessfile.WriteString(time.Now().String()) // determine if running with docker wants_docker := false if workunit.Cmd.Dockerimage != "" || workunit.App != nil { // TODO need more save way to detect use of docker wants_docker = true } if wants_docker && conf.USE_DOCKER == "no" { return 0, errors.New("error: use of docker images has been disabled by administrator") } if wants_docker == false && conf.USE_DOCKER == "only" { return 0, errors.New("error: use of docker images is enforced by administrator") } // copy or create symlink in work dir linkname := path.Join(workunit.Path(), name) if conf.NO_SYMLINK { // some programs do not accept symlinks (e.g. emirge), need to copy the file into the work directory logger.Debug(1, "copy predata: "+file_path+" -> "+linkname) _, err := shock.CopyFile(file_path, linkname) if err != nil { return 0, fmt.Errorf("error copying file from %s to % s: ", file_path, linkname, err.Error()) } } else { if wants_docker { // new filepath for predata dir in container var docker_file_path string if isShockPredata { docker_file_path = path.Join(conf.DOCKER_WORKUNIT_PREDATA_DIR, node_md5) } else { docker_file_path = path.Join(conf.DOCKER_WORKUNIT_PREDATA_DIR, name) } logger.Debug(1, "creating dangling symlink: "+linkname+" -> "+docker_file_path) // dangling link will give error, we ignore that here _ = os.Symlink(docker_file_path, linkname) } else { logger.Debug(1, "symlink:"+linkname+" -> "+file_path) err = os.Symlink(file_path, linkname) if err != nil { return 0, errors.New("error creating predata file symlink: " + err.Error()) } } } logger.Event(event.PRE_READY, "workid="+workunit.Id+";url="+dataUrl) } return }