func GetStringOpt(key string) string { cnode := configFile.Get(key) if cnode == nil { o.Assert("tried to get a configuration option that doesn't exist.") } sopt, ok := cnode.(*configureit.StringOption) if !ok { o.Assert("tried to get a non-string configuration option with GetStringOpt") } return strings.TrimSpace(sopt.Value) }
func GetIntOpt(key string) int { cnode := configFile.Get(key) if cnode == nil { o.Assert("tried to get a configuration option that doesn't exist.") } sopt, ok := cnode.(*configureit.IntOption) if !ok { o.Assert("tried to get a non-int configuration option with GetIntOpt") } return sopt.Value }
func handleRequest(c net.Conn, message interface{}) { o.Debug("Request Recieved. Decoding!") ptr, ok := message.(*o.TaskRequest) if !ok { o.Assert("CC stuffed up - handleRequest got something that wasn't a TaskRequest.") } task := TaskFromProto(ptr) /* search the registry for the task */ o.Debug("Request for Job.ID %d", task.Id) existing := TaskGet(task.Id) if nil != existing { if existing.MyResponse.IsFinished() { o.Debug("job%d: Resending Response", task.Id) sendResponse(c, existing.MyResponse) } } else { // check to see if we have the score // add the Job to our Registry task.MyResponse = NewTaskResponse() task.MyResponse.id = task.Id task.MyResponse.State = RESP_PENDING TaskAdd(task) o.Info("Added New Task (Job ID %d) to our local registry", task.Id) // and then push it onto the pending job list so we know it needs actioning. appendPendingTask(task) } }
func RegisterInterface(ifname string, initfunc func(*TaskRequest) ScoreInterface) { _, exists := interfaces[ifname] if exists { o.Assert("Multiple attempts to register %s interface", ifname) } interfaces[ifname] = initfunc }
func handleRequest(c net.Conn, message interface{}) { o.Debug("Request Recieved. Decoding!") ptr, ok := message.(*o.ProtoTaskRequest) if !ok { o.Assert("CC stuffed up - handleRequest got something that wasn't a ProtoTaskRequest.") } job := o.JobFromProto(ptr) /* search the registry for the job */ o.Debug("Request for Job.ID %d", job.Id) existing := o.JobGet(job.Id) if nil != existing { if existing.MyResponse.IsFinished() { o.Debug("job%d: Resending Response", job.Id) sendResponse(c, existing.MyResponse) } } else { // check to see if we have the score // add the Job to our Registry job.MyResponse = o.NewTaskResponse() job.MyResponse.Id = job.Id job.MyResponse.State = o.RESP_PENDING o.JobAdd(job) o.Info("Added New Job %d to our local registry", job.Id) // and then push it onto the pending job list so we know it needs actioning. appendPendingJob(job) } }
// Ugh. func (job *JobRequest) updateState() { if job.Results == nil { o.Assert("job.Results nil for jobid %d", job.Id) return } was_finished := job.State.Finished() switch job.Scope { case SCOPE_ONEOF: // look for a success (any success) in the responses var success bool for host, res := range job.Results { if res == nil { o.Debug("nil result for %s?", host) continue } if res.State == RESP_FINISHED { success = true break } } // update the job state based upon these findings if success { job.State = JOB_SUCCESSFUL } else { if len(job.Players) < 1 { job.State = JOB_FAILED } else { job.State = JOB_PENDING } } case SCOPE_ALLOF: var success, failed int for pidx := range job.Players { p := job.Players[pidx] resp, exists := job.Results[p] if exists { if resp.DidFail() { failed++ } else if resp.State == RESP_FINISHED { success++ } } } if (success + failed) < len(job.Players) { job.State = JOB_PENDING } else if success == len(job.Players) { job.State = JOB_SUCCESSFUL } else if failed == len(job.Players) { job.State = JOB_FAILED } else { job.State = JOB_FAILED_PARTIAL } } if !was_finished && job.State.Finished() { o.Debug("job%d: finished; setting expiry time", job.Id) regInternalMarkJobForExpiry(job) } }
func handleAck(c net.Conn, message interface{}) { o.Debug("Ack Received") ack, ok := message.(*o.Acknowledgement) if !ok { o.Assert("CC stuffed up - handleAck got something that wasn't a ProtoAcknowledgement.") } acknowledgeResponse(ack.Id) }
func GetCACertList() []string { cnode := configFile.Get("ca certificates") if cnode == nil { o.Assert("tried to get a configuration option that doesn't exist.") } plopt, _ := cnode.(*configureit.PathListOption) return plopt.Values }
func handleAck(c net.Conn, message interface{}) { ack, ok := message.(*o.ProtoAcknowledgement) if !ok { o.Assert("CC stuffed up - handleAck got something that wasn't a ProtoAcknowledgement.") } if ack.Id != nil { acknowledgeResponse(*ack.Id) } }
func regInternalExpireJob(jobid uint64) { job, exists := jobRegister[jobid] if exists { if job.State.Finished() { delete(jobRegister, jobid) } else { o.Assert("Tried to expire incomplete job.") } } }
func regInternalFindNextExpiry() { if expiryChan != nil { o.Assert("Attempted to Find Next Expiry avenue with expiry timer active.") } // if there's nothing to expire, do nothing. if expiryList.Len() == 0 { return } for expiryChan == nil && expiryList.Len() > 0 { jobif := expiryList.Remove(expiryList.Front()) req, ok := jobif.(*JobRequest) if !ok { o.Assert("item in expiryList not a *JobRequest") } if time.Now().Add(expiryLoopFudge).After(req.expirytime) { regInternalExpireJob(req.Id) } else { expiryChan = time.After(req.expirytime.Sub(time.Now())) expiryJobid = req.Id } } }
func handleResult(client *ClientInfo, message interface{}) { jr, _ := message.(*o.ProtoTaskResponse) r := ResponseFromProto(jr) // at this point in time, we only care about terminal // condition codes. a Job that isn't finished is just // prodding us back to let us know it lives. if r.IsFinished() { job := JobGet(r.id) if nil == job { o.Warn("Client %s: NAKing job%d, couldn't find job data", client.Name(), r.id) nack := o.MakeNack(r.id) client.sendNow(nack) } else { job := JobGet(r.id) if job != nil { /* if the job exists, Ack it. */ ack := o.MakeAck(r.id) client.sendNow(ack) } // now, we only accept the results if we were // expecting the results (ie: it was pending) // and expunge the task information from the // pending list so we stop bugging the client for it. task, exists := client.pendingTasks[r.id] if exists { // store the result. if !JobAddResult(client.Player, r) { o.Assert("Couldn't add result for pending task") } // next, work out if the job is a retryable failure or not var didretry bool if r.DidFail() { o.Info("Client %s: reported failure for job%d", client.Name(), r.id) if r.CanRetry() { job := JobGet(r.id) if job.Scope == SCOPE_ONEOF { // right, we're finally deep enough to work out what's going on! JobDisqualifyPlayer(r.id, client.Player) if len(job.Players) >= 1 { // still players left we can try? then go for it! CleanTask(task) DispatchTask(task) didretry = true } } } } if !didretry { // if we didn't retry, the task needs to be marked as finished. task.State = TASK_FINISHED } // update the job state. JobReviewState(r.id) delete(client.pendingTasks, r.id) } } } }
func GetSpoolDirectory() string { if spoolDirectory == "" { o.Assert("GetSpoolDirectory() called before set") } return spoolDirectory }
func doExecution(task *TaskRequest, completionChannel chan<- *TaskResponse) { // we must notify the parent when we exit. defer func(c chan<- *TaskResponse, task *TaskRequest) { c <- task.MyResponse }(completionChannel, task) // first of all, verify that the score exists at all. score, exists := Scores[task.Score] if !exists { o.Warn("job%d: request for unknown score: %s", task.Id, task.Score) task.MyResponse.State = RESP_FAILED_UNKNOWN_SCORE return } si := NewScoreInterface(task) if si == nil { o.Warn("job%d: couldn't initialise score interface", task.Id) task.MyResponse.State = RESP_FAILED_HOST_ERROR return } if !si.Prepare() { o.Warn("job%d: couldn't prepare score interface", task.Id) task.MyResponse.State = RESP_FAILED_HOST_ERROR return } defer si.Cleanup() eenv := si.SetupProcess() task.MyResponse.State = RESP_RUNNING procenv := new(os.ProcAttr) // Build the default environment. procenv.Env = peSetEnv(procenv.Env, "PATH", "/usr/bin:/usr/sbin:/bin:/sbin") procenv.Env = peSetEnv(procenv.Env, "IFS", " \t\n") pwd, err := os.Getwd() if err != nil { task.MyResponse.State = RESP_FAILED_HOST_ERROR o.Warn("job%d: couldn't resolve PWD: %s", task.Id, err) return } procenv.Env = peSetEnv(procenv.Env, "PWD", pwd) // copy in the environment overrides for k, v := range eenv.Environment { procenv.Env = peSetEnv(procenv.Env, k, v) } // attach FDs to procenv. procenv.Files = make([]*os.File, 3) // first off, attach /dev/null to stdin and stdout devNull, err := os.OpenFile(os.DevNull, os.O_RDWR|os.O_APPEND, 0666) o.MightFail(err, "couldn't open DevNull") defer devNull.Close() for i := 0; i < 2; i++ { procenv.Files[i] = devNull } // attach STDERR to to our logger via pipe. lr, lw, err := os.Pipe() o.MightFail(err, "Couldn't create pipe") defer lw.Close() // lr will be closed by the logger. procenv.Files[2] = lw // check the environment's configuration and allow it to override stdin, stdout, and FDs 3+ if nil != eenv.Files { for i := range eenv.Files { if i < 2 { procenv.Files[i] = eenv.Files[i] } else { procenv.Files = append(procenv.Files, eenv.Files[i]) } } } var args []string args = append(args, eenv.Arguments...) o.Info("job%d: executing %s...", task.Id, score.Executable) go batchLogger(task.Id, lr) proc, err := os.StartProcess(score.Executable, args, procenv) if err != nil { o.Warn("job%d: failed to start process", task.Id) task.MyResponse.State = RESP_FAILED_HOST_ERROR return } wm, err := proc.Wait() if err != nil { o.Warn("job%d: error waiting for process", task.Id) task.MyResponse.State = RESP_FAILED_UNKNOWN // Worse of all, we don't even know if we succeeded. return } ws, _ := wm.Sys().(syscall.WaitStatus) if !(ws.Signaled() || ws.Exited()) { o.Assert("Non Terminal notification received when not expected.") return } if ws.Signaled() { o.Warn("job%d: process got signalled", task.Id) task.MyResponse.State = RESP_FAILED_UNKNOWN return } if ws.Exited() { if 0 == ws.ExitStatus() { o.Warn("job%d: process exited OK", task.Id) task.MyResponse.State = RESP_FINISHED } else { o.Warn("job%d: process exited with failure", task.Id) task.MyResponse.State = RESP_FAILED } return } o.Assert("Should never get here.") }