Exemple #1
0
func dockerImportImage(client *docker.Client, Dockerimage string, datatoken string) (err error) {

	_, download_url, err := findDockerImageInShock(Dockerimage, datatoken) // TODO get node

	if err != nil {
		return err
	}

	logger.Debug(1, fmt.Sprintf("docker image url=%s", download_url))

	// TODO import base image if needed

	// *** import image
	Dockerimage_array := strings.Split(Dockerimage, ":")
	Dockerimage_repo, Dockerimage_tag := Dockerimage_array[0], Dockerimage_array[1]

	logger.Debug(1, fmt.Sprintf("importing image..."))
	var buf bytes.Buffer
	opts := docker.ImportImageOptions{
		Source:       download_url,
		Repository:   Dockerimage_repo,
		Tag:          Dockerimage_tag,
		OutputStream: &buf,
	}

	err = client.ImportImage(opts)
	if err != nil {
		return errors.New(fmt.Sprintf("Error importing docker image, err=%s", err.Error()))
	}

	return
}
Exemple #2
0
func (qm *ServerMgr) Handle() {
	for {
		select {
		case <-qm.jsReq:
			jid := qm.getNextJid()
			qm.jsAck <- jid
			logger.Debug(2, fmt.Sprintf("qmgr:receive a job submission request, assigned jid=%s\n", jid))

		case task := <-qm.taskIn:
			logger.Debug(2, fmt.Sprintf("qmgr:task recived from chan taskIn, id=%s\n", task.Id))
			qm.addTask(task)

		case coReq := <-qm.coReq:
			logger.Debug(2, fmt.Sprintf("qmgr: workunit checkout request received, Req=%v\n", coReq))
			works, err := qm.popWorks(coReq)
			ack := CoAck{workunits: works, err: err}
			qm.coAck <- ack

		case notice := <-qm.feedback:
			logger.Debug(2, fmt.Sprintf("qmgr: workunit feedback received, workid=%s, status=%s, clientid=%s\n", notice.WorkId, notice.Status, notice.ClientId))
			if err := qm.handleWorkStatusChange(notice); err != nil {
				logger.Error("handleWorkStatusChange(): " + err.Error())
			}

		case <-qm.reminder:
			logger.Debug(3, "time to update workunit queue....\n")
			qm.updateQueue()
			if conf.DEV_MODE {
				fmt.Println(qm.ShowStatus())
			}
		}
	}
}
Exemple #3
0
func (qm *ServerMgr) locateInputs(task *Task) (err error) {
	logger.Debug(2, "trying to locate Inputs of task "+task.Id)
	jobid := strings.Split(task.Id, "_")[0]
	for name, io := range task.Inputs {
		if io.Url == "" {
			preId := fmt.Sprintf("%s_%s", jobid, io.Origin)
			if preTask, ok := qm.taskMap[preId]; ok {
				if preTask.State == TASK_STAT_SKIPPED ||
					preTask.State == TASK_STAT_FAIL_SKIP {
					// For now we know that skipped tasks have
					// just one input and one output. So we know
					// that we just need to change one file (this
					// may change in the future)
					//locateSkippedInput(qm, preTask, io)
				} else {
					outputs := preTask.Outputs
					if outio, ok := outputs[name]; ok {
						io.Node = outio.Node
					}
				}
			}
		}
		io.DataUrl()
		if io.Node == "-" {
			return errors.New(fmt.Sprintf("error in locate input for task %s, %s", task.Id, name))
		}
		//need time out!
		if io.GetFileSize() < 0 {
			return errors.New(fmt.Sprintf("task %s: input file %s not available", task.Id, name))
		}
		logger.Debug(2, fmt.Sprintf("inputs located %s, %s\n", name, io.Node))
	}
	return
}
Exemple #4
0
// was getDockerImageUrl(Dockerimage string) (download_url string, err error)
func findDockerImageInShock(Dockerimage string) (node *shock.ShockNode, download_url string, err error) {

	shock_docker_repo := shock.ShockClient{conf.SHOCK_DOCKER_IMAGE_REPOSITORY, ""}

	logger.Debug(1, fmt.Sprint("try to import docker image, Dockerimage=", Dockerimage))
	//query url = type=dockerimage&name=wgerlach/bowtie2:2.2.0"

	query_response_p, err := shock_docker_repo.Query(url.Values{"type": {"dockerimage"}, "name": {Dockerimage}})
	if err != nil {
		return nil, "", errors.New(fmt.Sprintf("shock node not found for image=%s, err=%s", Dockerimage, err.Error()))
	}
	logger.Debug(1, fmt.Sprintf("query result: %v", query_response_p))

	datalen := len((*query_response_p).Data)

	if datalen == 0 {
		return nil, "", errors.New(fmt.Sprintf("image %s not found in shocks docker repo", Dockerimage))
	} else if datalen > 1 {
		return nil, "", errors.New(fmt.Sprintf("more than one image %s found in shocks docker repo", Dockerimage))
	}

	node = &(*query_response_p).Data[0]
	logger.Debug(1, fmt.Sprintf("found SHOCK node for docker image: %s", node.Id))

	download_url, err = shock_docker_repo.Get_node_download_url(*node)
	if err != nil {
		return nil, "", errors.New(fmt.Sprintf("Could not create download url, err=%s", err.Error()))
	}

	return
}
Exemple #5
0
// show functions used in debug
func (qm *CQMgr) ShowWorkQueue() {
	logger.Debug(1, fmt.Sprintf("current queuing workunits (%d)", qm.workQueue.Len()))
	for _, id := range qm.workQueue.List() {
		logger.Debug(1, fmt.Sprintf("workid=%s", id))
	}
	return
}
Exemple #6
0
func (qm *ServerMgr) ClientHandle() {
	for {
		select {
		case coReq := <-qm.coReq:
			logger.Debug(2, fmt.Sprintf("qmgr: workunit checkout request received, Req=%v", coReq))
			var ack CoAck
			if qm.suspendQueue {
				// queue is suspended, return suspend error
				ack = CoAck{workunits: nil, err: errors.New(e.QueueSuspend)}
			} else {
				qm.updateQueue()
				works, err := qm.popWorks(coReq)
				if err == nil {
					qm.UpdateJobTaskToInProgress(works)
				}
				ack = CoAck{workunits: works, err: err}
			}
			qm.coAck <- ack
		case notice := <-qm.feedback:
			logger.Debug(2, fmt.Sprintf("qmgr: workunit feedback received, workid=%s, status=%s, clientid=%s", notice.WorkId, notice.Status, notice.ClientId))
			if err := qm.handleWorkStatusChange(notice); err != nil {
				logger.Error("handleWorkStatusChange(): " + err.Error())
			}
			qm.updateQueue()
		}
	}
}
Exemple #7
0
func CreateContainer(create_args []string) (container_id string, err error) {

	//docker create [OPTIONS] IMAGE [COMMAND] [ARG...]
	// first line contains ID
	// must have "-t" to attach, this is not documented in docker.

	// prepend "create"
	create_args = append([]string{"create"}, create_args...)

	logger.Debug(1, fmt.Sprintf("(CreateContainer) cmd: %s %s", conf.DOCKER_BINARY, strings.Join(create_args, " ")))

	stdo, _, err := RunCommand(conf.DOCKER_BINARY, create_args...)

	if err != nil {
		logger.Debug(1, fmt.Sprintf("(CreateContainer) cmd.Wait returned error: %s", err.Error()))

		return "", err
	}

	// extract only first line
	endofline := bytes.IndexByte(stdo, '\n')

	stdout_line := ""
	if endofline >= 0 {
		stdout_line = string(stdo[0 : endofline-1])
	} else {
		err = errors.New("docker create returned empty string")
	}

	return stdout_line, err

}
Exemple #8
0
//fetch input data
func MoveInputData(work *core.Workunit) (size int64, err error) {
	for _, io := range work.Inputs {
		inputname := io.FileName
		// skip if NoFile == true
		if !io.NoFile { // is file !
			dataUrl, uerr := io.DataUrl()
			if uerr != nil {
				return 0, uerr
			}
			inputFilePath := fmt.Sprintf("%s/%s", work.Path(), inputname)

			if work.Rank == 0 {
				if conf.CACHE_ENABLED && io.Node != "" {
					if file_path, err := StatCacheFilePath(io.Node); err == nil {
						//make a link in work dir from cached file
						linkname := fmt.Sprintf("%s/%s", work.Path(), inputname)
						fmt.Printf("input found in cache, making link: " + file_path + " -> " + linkname + "\n")
						err = os.Symlink(file_path, linkname)
						if err == nil {
							logger.Event(event.FILE_READY, "workid="+work.Id+";url="+dataUrl)
						}
						return 0, err
					}
				}
			} else {
				dataUrl = fmt.Sprintf("%s&index=%s&part=%s", dataUrl, work.IndexType(), work.Part())
			}
			logger.Debug(2, "mover: fetching input file from url:"+dataUrl)
			logger.Event(event.FILE_IN, "workid="+work.Id+";url="+dataUrl)

			// download file
			if datamoved, _, err := shock.FetchFile(inputFilePath, dataUrl, work.Info.DataToken, io.Uncompress, false); err != nil {
				return size, errors.New("shock.FetchFile returned: " + err.Error())
			} else {
				size += datamoved
			}
			logger.Event(event.FILE_READY, "workid="+work.Id+";url="+dataUrl)
		}

		// download node attributes if requested
		if io.AttrFile != "" {
			// get node
			node, err := shock.ShockGet(io.Host, io.Node, work.Info.DataToken)
			if err != nil {
				//return size, err
				return size, errors.New("shock.ShockGet (node attributes) returned: " + err.Error())
			}
			logger.Debug(2, "mover: fetching input attributes from node:"+node.Id)
			logger.Event(event.ATTR_IN, "workid="+work.Id+";node="+node.Id)
			// print node attributes
			attrFilePath := fmt.Sprintf("%s/%s", work.Path(), io.AttrFile)
			attr_json, _ := json.Marshal(node.Attributes)
			if err := ioutil.WriteFile(attrFilePath, attr_json, 0644); err != nil {
				return size, err
			}
			logger.Event(event.ATTR_READY, "workid="+work.Id+";path="+attrFilePath)
		}
	}
	return
}
Exemple #9
0
func (qm *ServerMgr) createOutputNode(task *Task) (err error) {

	outputs := task.Outputs
	for _, io := range outputs {
		name := io.FileName
		if io.Type == "update" {
			// this an update output, it will update an existing shock node and not create a new one
			if (io.Node == "") || (io.Node == "-") {
				if io.Origin == "" {
					return errors.New(fmt.Sprintf("update output %s in task %s is missing required origin", name, task.Id))
				}
				nodeid, err := qm.locateUpdate(task.Id, name, io.Origin)
				if err != nil {
					return err
				}
				io.Node = nodeid
			}
			logger.Debug(2, fmt.Sprintf("outout %s in task %s is an update of node %s\n", name, task.Id, io.Node))
		} else {
			// POST empty shock node for this output
			logger.Debug(2, fmt.Sprintf("posting output Shock node for file %s in task %s\n", name, task.Id))
			nodeid, err := PostNodeWithToken(io, task.TotalWork, task.Info.DataToken)
			if err != nil {
				return err
			}
			io.Node = nodeid
			logger.Debug(2, fmt.Sprintf("task %s: output Shock node created, node=%s\n", task.Id, nodeid))
		}
	}
	return
}
Exemple #10
0
func (appr AppRegistry) createIOnodes(job *Job) (err error) {

	// go over tasks

	taskid2task := make(map[string]*Task)
	taskid_processed := make(map[string]bool)

	// create taskid2task
	for _, task := range job.Tasks {

		taskid_split := strings.Split(task.Id, "_")
		taskid := taskid_split[1]
		_, ok := taskid2task[taskid]
		if ok {
			err = errors.New("error: task id not unique, id=" + taskid)
			return
		}
		taskid2task[taskid] = task
		logger.Debug(1, fmt.Sprintf("--------adding to taskid2task map: %s", taskid))
	}

	for _, task := range job.Tasks {

		err = appr.createIOnodes_forTask(job, task, taskid2task, taskid_processed)
		if err != nil {
			return errors.New(fmt.Sprintf("error in task %s: %s", task.App.Name, err.Error()))
		}
	}

	logger.Debug(1, fmt.Sprintf("+++ +++ createIONodesm finished"))
	return
}
Exemple #11
0
func (acm AppCommandMode) Get_default_app_variables() (app_variables AppVariables, err error) {
	app_variables = make(AppVariables)
	// this function is called on the server

	// *** app input arguments (app definition)
	logger.Debug(1, fmt.Sprintf("Get_default_app_variables: size of acm.Input=%d", len(acm.Input)))
	time.Sleep(15 * time.Millisecond)
	for _, input_arg := range acm.Input {
		//logger.Debug(1, fmt.Sprintf("app input arg: %s", strings.Join(input_arg, ", ")))

		// save the defaults if available

		logger.Debug(1, fmt.Sprintf("from app-definition: variable \"%s\"", input_arg.Name))
		app_type, err := String2apptype(input_arg.Type)
		if err != nil {
			err = errors.New(fmt.Sprintf("error converting type, error=%s", err.Error()))
			return app_variables, err
		}
		logger.Debug(1, fmt.Sprintf("from app-definition: variable \"%s\" has type %s", input_arg.Name, apptype2string(app_type)))

		logger.Debug(1, fmt.Sprintf("from app-definition: write variable:\"%s\" - default value: \"%s\"", input_arg.Name, input_arg.DefaultValue))
		app_variables[input_arg.Name] = AppVariable{Key: input_arg.Name,
			Var_type: app_type,
			Value:    input_arg.DefaultValue,
			Option:   input_arg.Option,
			Optional: input_arg.Optional}

	}

	return
}
Exemple #12
0
//fetch prerequisite data (e.g. reference dbs)
func movePreData(workunit *core.Workunit) (size int64, err error) {
	for name, io := range workunit.Predata {
		predata_directory := path.Join(conf.DATA_PATH, "predata")
		err = os.MkdirAll(predata_directory, 755)
		if err != nil {
			return 0, errors.New("error creating predata_directory: " + err.Error())
		}

		file_path := path.Join(predata_directory, name)
		if !isFileExisting(file_path) {

			size, err = shock.FetchFile(file_path, io.Url, workunit.Info.DataToken, io.Uncompress)
			if err != nil {
				return 0, errors.New("error in fetchFile:" + err.Error())
			}
		}

		use_symlink := false
		linkname := path.Join(workunit.Path(), name)
		if workunit.Cmd.Dockerimage != "" || strings.HasPrefix(workunit.Cmd.Name, "app:") { // TODO need more save way to detect use of docker

			use_symlink = false // TODO mechanism

			if use_symlink {
				file_path = path.Join(conf.DOCKER_WORKUNIT_PREDATA_DIR, name)
				// some tasks want to write in predata dir, thus need symlink
				logger.Debug(1, "dangling symlink:"+linkname+" -> "+file_path)

				// creation of dangling symlinks is not possible with with os.Symlink, thus use system ln
				link_out, err := exec.Command("ln", "-s", file_path, linkname).CombinedOutput()
				logger.Debug(1, fmt.Sprintf("ln returned %s", link_out))

				if err != nil {
					return 0, errors.New("error creating predata file symlink (dangling version): " + err.Error())
				}
			} else {
				// some programs do not accept symlinks (e.g. emirge), need to copy the file into the work directory
				// linkname refers to target file now.
				logger.Debug(1, "copy predata:"+file_path+" -> "+linkname)

				_, err := shock.CopyFile(file_path, linkname)
				if err != nil {
					return 0, fmt.Errorf("error copying file from %s to % s: ", file_path, linkname, err.Error())
				}
			}
		} else {

			//linkname := path.Join(workunit.Path(), name)
			logger.Debug(1, "symlink:"+linkname+" -> "+file_path)

			err = os.Symlink(file_path, linkname)
			if err != nil {
				return 0, errors.New("error creating predata file symlink: " + err.Error())
			}
		}

	}
	return
}
Exemple #13
0
func InspectImage(client *docker.Client, dockerimage_id string) (image *docker.Image, err error) {
	logger.Debug(1, fmt.Sprintf("(InspectImage) %s:", dockerimage_id))
	if client == nil {
		// if image does not exists, return status 1 and text on stderr

		cmd := exec.Command(conf.DOCKER_BINARY, "inspect", dockerimage_id)

		stdout, err := cmd.StdoutPipe()
		if err != nil {
			return nil, err
		}

		stderr, err := cmd.StderrPipe()
		if err != nil {
			return nil, err
		}

		if err = cmd.Start(); err != nil {
			return nil, err
		}

		var image_array []docker.Image

		err_json := json.NewDecoder(stdout).Decode(&image_array)

		if err_json != nil {
			logger.Debug(1, fmt.Sprintf("(InspectImage) err_json: %s", err_json.Error()))
			image = nil
		}

		err = cmd.Wait() // wait just in case

		if err != nil {

			stderr_bytearray, err_read := ioutil.ReadAll(stderr)
			if err_read != nil {
				return nil, err_read
			}
			logger.Debug(1, fmt.Sprintf("(InspectImage) STDERR: %s", stderr_bytearray))
			return nil, err
		} else {
			err = err_json // in case that failed...
		}

		if len(image_array) == 1 {
			image = &image_array[0]
		} else {
			err = errors.New("error: inspect returned zero (or more than one) images")
		}

		return image, err
	} else {

		image, err = client.InspectImage(dockerimage_id)

	}
	return image, err
}
Exemple #14
0
func (sc *ShockClient) Get_request(resource string, query url.Values, response interface{}) (err error) {

	logger.Debug(1, fmt.Sprint("string_url: ", sc.Host))

	myurl, err := url.ParseRequestURI(sc.Host)
	if err != nil {
		return err
	}

	(*myurl).Path = resource
	(*myurl).RawQuery = query.Encode()

	shockurl := myurl.String()

	logger.Debug(1, fmt.Sprint("shock request url: ", shockurl))

	if len(shockurl) < 5 {
		return errors.New("could not parse SHOCK_DOCKER_IMAGE_REPOSITORY")
	}

	var res *http.Response

	c := make(chan int, 1)
	go func() {
		res, err = httpclient.Get(shockurl, httpclient.Header{}, nil, nil)
		c <- 1 //we are ending
	}()
	select {
	case <-c:
	//go ahead
	case <-time.After(conf.SHOCK_TIMEOUT):
		return errors.New("timeout when getting node from shock, url=" + shockurl)
	}
	if err != nil {
		return
	}
	defer res.Body.Close()

	jsonstream, err := ioutil.ReadAll(res.Body)
	//logger.Debug(1, string(jsonstream))
	if err != nil {
		return err
	}

	//response := new(result)
	if err := json.Unmarshal(jsonstream, response); err != nil {
		return err
	}
	//if len(response.Errs) > 0 {
	//	return errors.New(strings.Join(response.Errs, ","))
	//}
	//node = &response.Data
	//if node == nil {
	//	err = errors.New("empty node got from Shock")
	//}
	return
}
Exemple #15
0
func getMetaDataField(field string) (result string, err error) {
	var url = fmt.Sprintf("%s/%s", conf.OPENSTACK_METADATA_URL, field) // TODO this is not OPENSTACK, this is EC2
	logger.Debug(1, fmt.Sprintf("url=%s", url))

	for i := 0; i < 3; i++ {
		var res *http.Response
		c := make(chan error)
		go func() {
			res, err = http.Get(url)
			if err != nil {
				c <- err //we are ending with error
				return
			}

			defer res.Body.Close()
			bodybytes, err := ioutil.ReadAll(res.Body)
			if err != nil {
				c <- err //we are ending with error
				return
			}
			result = string(bodybytes[:])

			c <- nil //we are ending without error
		}()
		select {
		case err = <-c:
			//go ahead
		case <-time.After(conf.INSTANCE_METADATA_TIMEOUT): //GET timeout
			err = errors.New("timeout: " + url)
		}

		if err != nil {
			logger.Error(fmt.Sprintf("warning: (iteration=%d) %s \"%s\"", i, url, err.Error()))
			continue
		} else if result == "" {
			logger.Error(fmt.Sprintf("warning: (iteration=%d) %s empty result", i, url))
			continue
		}

		break

	}

	if err != nil {
		return "", err
	}

	if result == "" {
		return "", errors.New(fmt.Sprintf("metadata result empty, %s", url))
	}

	logger.Debug(1, fmt.Sprintf("Intance Metadata %s => \"%s\"", url, result))
	return
}
Exemple #16
0
//select workunits, return a slice of ids based on given queuing policy and requested count
func (wq *WQueue) selectWorkunits(workid []string, policy string, count int) (selected []*Workunit, err error) {
	logger.Debug(3, fmt.Sprintf("starting selectWorkunits\n"))
	worklist := wq.GetSet(workid)
	if policy == "FCFS" {
		sort.Sort(byFCFS{worklist})
	}
	for i := 0; i < count; i++ {
		selected = append(selected, worklist[i])
	}
	logger.Debug(3, fmt.Sprintf("done with selectWorkunits\n"))
	return
}
Exemple #17
0
func (apr AppRegistry) GetAppPackage(app_package string) (ap *AppPackage, err error) {

	ap, ok := apr[app_package]
	if ok {
		return ap, nil
	}

	package_url := conf.APP_REGISTRY_URL + "/" + app_package + ".json"

	var new_app_package AppPackage

	for i := 0; i < 3; i++ {

		if i > 0 {
			time.Sleep(1000 * time.Millisecond)
		}
		logger.Debug(1, fmt.Sprintf("downloading app package \"%s\"", package_url))

		res, err := httpclient.GetTimeout(package_url, nil, nil, nil, 5000*time.Millisecond)

		if err != nil {
			logger.Error("warning: " + conf.APP_REGISTRY_URL + " " + err.Error())
			continue
		}

		app_package_json, err := ioutil.ReadAll(res.Body)
		if err != nil {
			logger.Error(fmt.Sprintf("warning, could not read app registry json: %s", err.Error()))
			continue
		}

		// transform json into go struct interface
		//var f map[string]interface{}
		err = json.Unmarshal(app_package_json, &new_app_package)

		if err != nil {
			logger.Error("error unmarshaling app package " + app_package + ", error=" + err.Error())
			continue
		}

		apr[app_package] = &new_app_package
		ap = &new_app_package

		logger.Debug(1, fmt.Sprintf("app package unmarshalled"))
		return ap, nil
	}

	ap = nil
	err = errors.New("could not get app package from " + package_url)
	return

}
Exemple #18
0
func (qm *ServerMgr) createOutputNode(task *Task) (err error) {
	outputs := task.Outputs
	for name, io := range outputs {
		logger.Debug(2, fmt.Sprintf("posting output Shock node for file %s in task %s\n", name, task.Id))
		nodeid, err := PostNodeWithToken(io, task.TotalWork, task.Info.DataToken)
		if err != nil {
			return err
		}
		io.Node = nodeid
		logger.Debug(2, fmt.Sprintf("task %s: output Shock node created, node=%s\n", task.Id, nodeid))
	}
	return
}
Exemple #19
0
func (qm *CQMgr) filterWorkByClient(clientid string) (ids []string) {
	client := qm.clientMap[clientid]
	for id, _ := range qm.workQueue.wait {
		if _, ok := qm.workQueue.workMap[id]; !ok {
			logger.Error(fmt.Sprintf("error: workunit %s is in wait queue but not in workMap", id))
			continue
		}
		work := qm.workQueue.workMap[id]

		// In case of edge case where pointer to workunit is in queue but workunit has been deleted
		// If work.Info is nil, this will cause errors in execution
		// These will be deleted by servermgr.updateQueue()
		if work == nil || work.Info == nil {
			continue
		}

		if client == nil {
			fmt.Fprintf(os.Stderr, "error: Skip_work for client %s is nil", clientid)
			logger.Error(fmt.Sprintf("error: client %s is nil", clientid))
			continue
		}

		if client.Skip_work == nil {
			fmt.Fprintf(os.Stderr, "error: Skip_work for client %s is nil", clientid)
			logger.Error(fmt.Sprintf("error: Skip_work for client %s is nil", clientid))
			continue
		}

		//skip works that are in the client's skip-list
		if contains(client.Skip_work, work.Id) {
			logger.Debug(2, fmt.Sprintf("2) contains(client.Skip_work, work.Id) %s", id))
			continue
		}
		//skip works that have dedicate client groups which this client doesn't belong to
		if len(work.Info.ClientGroups) > 0 {
			eligible_groups := strings.Split(work.Info.ClientGroups, ",")
			if !contains(eligible_groups, client.Group) {
				logger.Debug(2, fmt.Sprintf("3) !contains(eligible_groups, client.Group) %s", id))
				continue
			}
		}
		//append works whos apps are supported by the client
		if contains(client.Apps, work.Cmd.Name) || contains(client.Apps, conf.ALL_APP) {
			ids = append(ids, id)
		} else {
			logger.Debug(2, fmt.Sprintf("3) contains(client.Apps, work.Cmd.Name) || contains(client.Apps, conf.ALL_APP) %s", id))
		}
	}
	return ids
}
Exemple #20
0
func (qm *ServerMgr) locateInputs(task *Task) (err error) {
	logger.Debug(2, "trying to locate Inputs of task "+task.Id)
	jobid, _ := GetJobIdByTaskId(task.Id)
	for _, io := range task.Inputs {
		name := io.FileName
		if io.Url == "" {
			preId := fmt.Sprintf("%s_%s", jobid, io.Origin)
			if preTask, ok := qm.getTask(preId); ok {
				if preTask.State == TASK_STAT_SKIPPED ||
					preTask.State == TASK_STAT_FAIL_SKIP {
					// For now we know that skipped tasks have
					// just one input and one output. So we know
					// that we just need to change one file (this
					// may change in the future)
					//locateSkippedInput(qm, preTask, io)
				} else {
					outputs := preTask.Outputs
					for _, outio := range outputs {
						if outio.FileName == name {
							io.Node = outio.Node
						}
					}
				}
			}
		}
		logger.Debug(2, fmt.Sprintf("processing input %s, %s\n", name, io.Node))
		if io.Node == "-" {
			return errors.New(fmt.Sprintf("error in locate input for task %s, %s", task.Id, name))
		}
		//need time out!
		if io.Node != "" && io.GetFileSize() < 0 {
			return errors.New(fmt.Sprintf("task %s: input file %s not available", task.Id, name))
		}
		logger.Debug(2, fmt.Sprintf("inputs located %s, %s\n", name, io.Node))
	}
	// locate predata
	for _, io := range task.Predata {
		name := io.FileName
		logger.Debug(2, fmt.Sprintf("processing predata %s, %s\n", name, io.Node))
		// only verify predata that is a shock node
		if (io.Node != "") && (io.Node != "-") && (io.GetFileSize() < 0) {
			// bad shock node
			if io.GetFileSize() < 0 {
				return errors.New(fmt.Sprintf("task %s: predata file %s not available", task.Id, name))
			}
			logger.Debug(2, fmt.Sprintf("predata located %s, %s\n", name, io.Node))
		}
	}
	return
}
Exemple #21
0
func WaitContainer(container_id string) (status int, err error) {
	logger.Debug(1, fmt.Sprintf("(WaitContainer) container id: %s", container_id))

	stdo, stde, err := RunCommand(conf.DOCKER_BINARY, []string{"wait", container_id}...)

	_ = stde

	if err != nil {
		logger.Debug(1, fmt.Sprintf("(WaitContainer) cmd.Wait returned error: %s", err.Error()))

		logger.Debug(1, fmt.Sprintf("(WaitContainer) cmd.Wait stdout: %s", stdo))
		logger.Debug(1, fmt.Sprintf("(WaitContainer) cmd.Wait stderr: %s", stde))

		return 0, err
	}

	// extract only first line
	endofline := bytes.IndexByte(stdo, '\n')

	stdout_line := ""
	if endofline > 0 {
		stdout_line = string(stdo[0:endofline])
	} else {
		err = errors.New("docker create returned empty string")
		return 0, err
	}

	negative_status := false

	if strings.HasPrefix(stdout_line, "-") {
		stdout_line = strings.TrimPrefix(stdout_line, "-")
		negative_status = true
	}

	status, err = strconv.Atoi(stdout_line)
	if err != nil {
		logger.Debug(1, fmt.Sprintf("(WaitContainer) could not interpret status code: \"%s\"", stdout_line))
		// handle error
		return 0, err
	}

	if negative_status {
		status *= -1
	}

	return status, nil

}
Exemple #22
0
//update job info when a task in that job changed to a new state
func (qm *ServerMgr) updateJobTask(task *Task) (err error) {
	parts := strings.Split(task.Id, "_")
	jobid := parts[0]
	job, err := LoadJob(jobid)
	if err != nil {
		return
	}
	remainTasks, err := job.UpdateTask(task)
	if err != nil {
		return err
	}

	logger.Debug(2, fmt.Sprintf("remaining tasks for task %s: %d", task.Id, remainTasks))

	if remainTasks == 0 { //job done
		qm.FinalizeJobPerf(jobid)
		qm.LogJobPerf(jobid)
		qm.removeActJob(jobid)
		//delete tasks in task map
		//delete from shock output flagged for deletion
		for _, task := range job.TaskList() {
			task.DeleteOutput()
			task.DeleteInput()
			qm.deleteTask(task.Id)
		}
		//log event about job done (JD)
		logger.Event(event.JOB_DONE, "jobid="+job.Id+";jid="+job.Jid+";project="+job.Info.Project+";name="+job.Info.Name)
	}
	return
}
Exemple #23
0
//fetch input data
func moveInputData(work *core.Workunit) (size int64, err error) {
	for _, io := range work.Inputs {
		inputname := io.FileName
		dataUrl, uerr := io.DataUrl()
		if uerr != nil {
			return 0, uerr
		}
		if work.Rank > 0 {
			dataUrl = fmt.Sprintf("%s&index=%s&part=%s", dataUrl, work.IndexType(), work.Part())
		}

		inputFilePath := path.Join(work.Path(), inputname)

		logger.Debug(2, "mover: fetching input from url:"+dataUrl)
		logger.Event(event.FILE_IN, "workid="+work.Id+" url="+dataUrl)

		// this gets file from any downloadable url, not just shock
		if datamoved, _, err := shock.FetchFile(inputFilePath, dataUrl, work.Info.DataToken, io.Uncompress, false); err != nil {
			return size, err
		} else {
			size += datamoved
		}
		logger.Event(event.FILE_READY, "workid="+work.Id+";url="+dataUrl)
	}
	return
}
Exemple #24
0
func (qm *ServerMgr) TaskHandle() {
	for {
		task := <-qm.taskIn
		logger.Debug(2, fmt.Sprintf("qmgr:task recived from chan taskIn, id=%s\n", task.Id))
		qm.addTask(task)
	}
}
Exemple #25
0
func (qm *CQMgr) ClientChecker() {
	for {
		time.Sleep(30 * time.Second)
		logger.Debug(3, "time to update client list....")
		for _, client := range qm.GetAllClients() {
			if client.Tag == true {
				client.Tag = false
				total_minutes := int(time.Now().Sub(client.RegTime).Minutes())
				hours := total_minutes / 60
				minutes := total_minutes % 60
				client.Serve_time = fmt.Sprintf("%dh%dm", hours, minutes)
				if client.Current_work_length() > 0 {
					client.Idle_time = 0
				} else {
					client.Idle_time += 30
				}
				qm.PutClient(client)
			} else {
				if ok := qm.HasClient(client.Id); !ok {
					continue
				}
				//now client must be gone as tag set to false 30 seconds ago and no heartbeat received thereafter
				logger.Event(event.CLIENT_UNREGISTER, "clientid="+client.Id+";name="+client.Name)
				//requeue unfinished workunits associated with the failed client
				qm.ReQueueWorkunitByClient(client.Id)
				//delete the client from client map
				qm.RemoveClient(client.Id)
			}
		}
	}
}
Exemple #26
0
func (qm *CQMgr) ClientHeartBeat(id string, cg *ClientGroup) (hbmsg HBmsg, err error) {
	hbmsg = make(map[string]string, 1)
	if client, ok := qm.GetClient(id); ok {
		// If the name of the clientgroup (from auth token) does not match the name in the client retrieved, throw an error
		if cg != nil && client.Group != cg.Name {
			return nil, errors.New(e.ClientGroupBadName)
		}
		client.Tag = true
		qm.PutClient(client)
		logger.Debug(3, "HeartBeatFrom:"+"clientid="+id+",name="+client.Name)

		//get suspended workunit that need the client to discard
		workids := qm.getWorkByClient(id)
		suspended := []string{}

		for _, work := range qm.workQueue.GetSet(workids) {
			if work.State == WORK_STAT_SUSPEND {
				suspended = append(suspended, work.Id)
			}
		}
		if len(suspended) > 0 {
			hbmsg["discard"] = strings.Join(suspended, ",")
		}
		if client.Get_Status() == CLIENT_STAT_DELETED {
			hbmsg["stop"] = id
		}
		//hbmsg["discard"] = strings.Join(workids, ",")
		return hbmsg, nil
	}
	return hbmsg, errors.New(e.ClientNotFound)
}
Exemple #27
0
//recover a job in db that is missing from queue (caused by server restarting)
func (qm *ServerMgr) RecoverJob(id string) (err error) {
	//Load job by id
	if qm.isActJob(id) {
		return errors.New("job " + id + " is already active")
	}
	dbjob, err := LoadJob(id)

	if err != nil {
		return errors.New("failed to load job " + err.Error())
	}
	if dbjob.State == JOB_STAT_SUSPEND {
		qm.putSusJob(dbjob.Id)
	} else {
		if dbjob.State == JOB_STAT_COMPLETED || dbjob.State == JOB_STAT_DELETED {
			return errors.New("job is in " + dbjob.State + " state thus cannot be recovered")
		}
		for _, task := range dbjob.Tasks {
			task.Info = dbjob.Info
		}
		qm.EnqueueTasksByJobId(dbjob.Id, dbjob.TaskList())
	}

	logger.Debug(2, fmt.Sprintf("Recovered job %s", id))
	return
}
Exemple #28
0
func heartbeating(host string, clientid string) (msg core.HBmsg, err error) {
	response := new(HeartbeatResponse)
	targeturl := fmt.Sprintf("%s/client/%s?heartbeat", host, clientid)
	//res, err := http.Get(targeturl)
	var headers httpclient.Header
	if conf.CLIENT_GROUP_TOKEN != "" {
		headers = httpclient.Header{
			"Authorization": []string{"CG_TOKEN " + conf.CLIENT_GROUP_TOKEN},
		}
	}
	res, err := httpclient.Get(targeturl, headers, nil, nil)
	logger.Debug(3, fmt.Sprintf("client %s sent a heartbeat to %s", host, clientid))
	if err != nil {
		return
	}
	defer res.Body.Close()
	jsonstream, err := ioutil.ReadAll(res.Body)
	if err != nil {
		return
	}
	if err = json.Unmarshal(jsonstream, response); err == nil {
		if len(response.Errs) > 0 {
			return msg, errors.New(strings.Join(response.Errs, ","))
		}
		return response.Data, nil
	}
	return
}
Exemple #29
0
func resetTask(task *Task, info *Info) {
	task.Info = info
	task.State = TASK_STAT_PENDING
	task.RemainWork = task.TotalWork
	task.ComputeTime = 0
	task.CompletedDate = time.Time{}
	// reset all inputs with an origin
	for _, input := range task.Inputs {
		if input.Origin != "" {
			input.Node = "-"
			input.Url = ""
			input.Size = 0
		}
	}
	// reset / delete all outputs
	for _, output := range task.Outputs {
		if dataUrl, _ := output.DataUrl(); dataUrl != "" {
			// delete dataUrl if is shock node
			if strings.HasSuffix(dataUrl, shock.DATA_SUFFIX) {
				if err := shock.ShockDelete(output.Host, output.Node, output.DataToken); err == nil {
					logger.Debug(2, fmt.Sprintf("Deleted node %s from shock", output.Node))
				} else {
					logger.Error(fmt.Sprintf("resetTask: unable to deleted node %s from shock: %s", output.Node, err.Error()))
				}
			}
		}
		output.Node = "-"
		output.Url = ""
		output.Size = 0
	}
	// delete all workunit logs
	for _, log := range conf.WORKUNIT_LOGS {
		deleteStdLogByTask(task.Id, log)
	}
}
Exemple #30
0
func (qm *ServerMgr) InitMaxJid() (err error) {
	jidfile := conf.DATA_PATH + "/maxjid"
	if _, err := os.Stat(jidfile); err != nil {
		f, err := os.Create(jidfile)
		if err != nil {
			return err
		}
		f.WriteString("10000")
		qm.nextJid = "10001"
		f.Close()
	} else {
		buf, err := ioutil.ReadFile(jidfile)
		if err != nil {
			return err
		}
		bufstr := strings.TrimSpace(string(buf))

		maxjid, err := strconv.Atoi(bufstr)
		if err != nil {
			return err
		}

		qm.nextJid = strconv.Itoa(maxjid + 1)
	}
	logger.Debug(2, fmt.Sprintf("qmgr:jid initialized, next jid=%s\n", qm.nextJid))
	return
}