func handleRequest(c net.Conn, message interface{}) { o.Debug("Request Recieved. Decoding!") ptr, ok := message.(*o.ProtoTaskRequest) if !ok { o.Assert("CC stuffed up - handleRequest got something that wasn't a ProtoTaskRequest.") } job := o.JobFromProto(ptr) /* search the registry for the job */ o.Debug("Request for Job.ID %d", job.Id) existing := o.JobGet(job.Id) if nil != existing { if existing.MyResponse.IsFinished() { o.Debug("job%d: Resending Response", job.Id) sendResponse(c, existing.MyResponse) } } else { // check to see if we have the score // add the Job to our Registry job.MyResponse = o.NewTaskResponse() job.MyResponse.Id = job.Id job.MyResponse.State = o.RESP_PENDING o.JobAdd(job) o.Info("Added New Job %d to our local registry", job.Id) // and then push it onto the pending job list so we know it needs actioning. appendPendingJob(job) } }
func handleRequest(c net.Conn, message interface{}) { o.Debug("Request Recieved. Decoding!") ptr, ok := message.(*o.TaskRequest) if !ok { o.Assert("CC stuffed up - handleRequest got something that wasn't a TaskRequest.") } task := TaskFromProto(ptr) /* search the registry for the task */ o.Debug("Request for Job.ID %d", task.Id) existing := TaskGet(task.Id) if nil != existing { if existing.MyResponse.IsFinished() { o.Debug("job%d: Resending Response", task.Id) sendResponse(c, existing.MyResponse) } } else { // check to see if we have the score // add the Job to our Registry task.MyResponse = NewTaskResponse() task.MyResponse.id = task.Id task.MyResponse.State = RESP_PENDING TaskAdd(task) o.Info("Added New Task (Job ID %d) to our local registry", task.Id) // and then push it onto the pending job list so we know it needs actioning. appendPendingTask(task) } }
// Ugh. func (job *JobRequest) updateState() { if job.Results == nil { o.Assert("job.Results nil for jobid %d", job.Id) return } was_finished := job.State.Finished() switch job.Scope { case SCOPE_ONEOF: // look for a success (any success) in the responses var success bool for host, res := range job.Results { if res == nil { o.Debug("nil result for %s?", host) continue } if res.State == RESP_FINISHED { success = true break } } // update the job state based upon these findings if success { job.State = JOB_SUCCESSFUL } else { if len(job.Players) < 1 { job.State = JOB_FAILED } else { job.State = JOB_PENDING } } case SCOPE_ALLOF: var success, failed int for pidx := range job.Players { p := job.Players[pidx] resp, exists := job.Results[p] if exists { if resp.DidFail() { failed++ } else if resp.State == RESP_FINISHED { success++ } } } if (success + failed) < len(job.Players) { job.State = JOB_PENDING } else if success == len(job.Players) { job.State = JOB_SUCCESSFUL } else if failed == len(job.Players) { job.State = JOB_FAILED } else { job.State = JOB_FAILED_PARTIAL } } if !was_finished && job.State.Finished() { o.Debug("job%d: finished; setting expiry time", job.Id) regInternalMarkJobForExpiry(job) } }
func handleAck(c net.Conn, message interface{}) { o.Debug("Ack Received") ack, ok := message.(*o.Acknowledgement) if !ok { o.Assert("CC stuffed up - handleAck got something that wasn't a ProtoAcknowledgement.") } acknowledgeResponse(ack.Id) }
func sendResponse(c net.Conn, resp *TaskResponse) { //FIXME: update retry time on Response o.Debug("Sending Response!") ptr := resp.Encode() p, err := o.Encode(ptr) o.MightFail(err, "Failed to encode response") _, err = p.Send(c) if err != nil { o.Warn("Transmission error: %s", err) c.Close() prequeueResponse(resp) lostConnection <- 1 } else { appendUnacknowledgedResponse(resp) } }
func manageRegistry() { for { select { case req := <-chanRegistryRequest: resp := new(registryResponse) // by default, we failed. resp.success = false // find the operation handler, exists := registryHandlers[req.operation] if exists { handler(req, resp) } if req.responseChannel != nil { req.responseChannel <- resp } case <-expiryChan: o.Debug("job%d: expiring job record", expiryJobid) regInternalExpireJob(expiryJobid) expiryChan = nil regInternalFindNextExpiry() } } }
func handleAudienceRequest(c net.Conn) { defer c.Close() r, _ := c.(io.Reader) w, _ := c.(io.Writer) dec := json.NewDecoder(r) enc := json.NewEncoder(w) outobj := new(GenericJsonRequest) err := dec.Decode(outobj) if err != nil { o.Warn("Error decoding JSON talking to audience: %s", err) return } if nil == outobj.Op { o.Warn("Malformed JSON message talking to audience. Missing Op") return } switch *(outobj.Op) { case "status": if nil == outobj.Id { o.Warn("Malformed Status message talking to audience. Missing Job ID") return } job := JobGet(*outobj.Id) jresp := new([2]interface{}) if nil != job { jresp[0] = "OK" iresp := NewJsonStatusResponse() iresp.Status = job.State resnames := JobGetResultNames(*outobj.Id) for i := range resnames { tr := JobGetResult(*outobj.Id, resnames[i]) if nil != tr { presp := NewJsonPlayerStatus() presp.Status = tr.State for k, v := range tr.Response { presp.Response[k] = v } iresp.Players[resnames[i]] = presp } } jresp[1] = iresp } else { jresp[0] = "Error" jresp[1] = nil } enc.Encode(jresp) o.Debug("Status...") case "queue": if nil == outobj.Score { o.Warn("Malformed Queue message talking to audience. Missing Score") sendQueueFailureResponse("Missing Score", enc) return } if nil == outobj.Scope { o.Warn("Malformed Queue message talking to audience. Missing Scope") sendQueueFailureResponse("Missing Scope", enc) return } if nil == outobj.Players || len(outobj.Players) < 1 { o.Warn("Malformed Queue message talking to audience. Missing Players") sendQueueFailureResponse("Missing Players", enc) return } for _, player := range outobj.Players { if !HostAuthorised(player) { o.Warn("Malformed Queue message - unknown player %s specified.", player) sendQueueFailureResponse("Invalid Player", enc) return } } job := NewRequest() job.Score = *outobj.Score job.Scope = *outobj.Scope job.Players = outobj.Players job.Params = outobj.Params QueueJob(job) sendQueueSuccessResponse(job, enc) default: o.Warn("Unknown operation talking to audience: \"%s\"", *(outobj.Op)) return } _ = enc }
func ProcessingLoop() { var conn net.Conn = nil var nextRetryResp *TaskResponse = nil var taskCompletionChan <-chan *TaskResponse = nil var connectDelay time.Duration var doScoreReload bool = false // kick off a new connection attempt. go connectMe(connectDelay) // and this is where we spin! for { var retryDelay time.Duration = 0 var retryChan <-chan time.Time = nil if conn != nil { for nextRetryResp == nil { nextRetryResp = getNextUnacknowledgedResponse() if nil == nextRetryResp { break } retryDelay = nextRetryResp.RetryTime.Sub(time.Now()) if retryDelay < 0 { sendResponse(conn, nextRetryResp) nextRetryResp = nil } } if nextRetryResp != nil { retryChan = time.After(retryDelay) } } if taskCompletionChan == nil { nextTask := getNextPendingTask() if nextTask != nil { taskCompletionChan = ExecuteTask(nextTask) } else { if conn != nil && !pendingTaskRequest { o.Debug("Asking for trouble") p := o.MakeReadyForTask() p.Send(conn) o.Debug("Sent Request for trouble") pendingTaskRequest = true } } } select { // Currently executing job finishes. case newresp := <-taskCompletionChan: o.Debug("job%d: Completed with State %s\n", newresp.id, newresp.State) // preemptively set a retrytime. newresp.RetryTime = time.Now() // ENOCONN - sub it in as our next retryresponse, and prepend the old one onto the queue. if nil == conn { if nil != nextRetryResp { prequeueResponse(nextRetryResp) } o.Debug("job%d: Queuing Initial Response", newresp.id) nextRetryResp = newresp } else { o.Debug("job%d: Sending Initial Response", newresp.id) sendResponse(conn, newresp) } if doScoreReload { o.Info("Performing Deferred score reload") LoadScores() doScoreReload = false } taskCompletionChan = nil // If the current unacknowledged response needs a retry, send it. case <-retryChan: sendResponse(conn, nextRetryResp) nextRetryResp = nil // New connection. Set up the receiver thread and Introduce ourselves. case nci := <-newConnection: if conn != nil { conn.Close() } conn = nci.conn connectDelay = nci.timeout pendingTaskRequest = false // start the reader go Reader(conn) /* Introduce ourself */ p := o.MakeIdentifyClient(LocalHostname, PlayerVersion) p.Send(conn) // Lost connection. Shut downt he connection. case <-lostConnection: o.Warn("Lost Connection to Master") conn.Close() conn = nil // restart the connection attempts go connectMe(connectDelay) // Message received from master. Decode and action. case p := <-receivedMessage: // because the message could possibly be an ACK, push the next retry response back into the queue so acknowledge can find it. if nil != nextRetryResp { prequeueResponse(nextRetryResp) nextRetryResp = nil } var upkt interface{} = nil if p.Length > 0 { var err error upkt, err = p.Decode() o.MightFail(err, "Couldn't decode packet from master") } handler, exists := dispatcher[p.Type] if exists { connectDelay = 0 handler(conn, upkt) } else { o.Fail("Unhandled Pkt Type %d", p.Type) } // Reload scores case <-reloadScores: // fortunately this is actually completely safe as // long as nobody's currently executing. // who'd have thunk it? if taskCompletionChan == nil { o.Info("Reloading scores") LoadScores() } else { o.Info("Deferring score reload (execution in progress)") doScoreReload = true } // Keepalive delay expired. Send Nop. case <-time.After(KeepaliveDelay): if conn == nil { break } o.Debug("Sending NOP") p := o.MakeNop() p.Send(conn) } } }
func handleNop(c net.Conn, message interface{}) { o.Debug("NOP Received") }
func masterDispatch() { pq := list.New() tq := list.New() for { select { case player := <-playerIdle: o.Debug("Dispatch: Player") /* first, scan to see if we have anything for this player */ i := tq.Front() for { if nil == i { /* Out of items! */ /* Append this player to the waiting players queue */ pq.PushBack(player) break } t, _ := i.Value.(*TaskRequest) if t.IsTarget(player.Player) { /* Found a valid job. Send it to the player, and remove it from our pending * list */ tq.Remove(i) player.TaskQ <- t break } i = i.Next() } case player := <-playerDead: o.Debug("Dispatch: Dead Player") for i := pq.Front(); i != nil; i = i.Next() { p, _ := i.Value.(*ClientInfo) if player.Player == p.Player { pq.Remove(i) break } } case task := <-rqTask: o.Debug("Dispatch: Task") /* first, scan to see if we have valid pending player for this task */ i := pq.Front() for { if nil == i { /* Out of players! */ /* Append this task to the waiting tasks queue */ tq.PushBack(task) break } p, _ := i.Value.(*ClientInfo) if task.IsTarget(p.Player) { /* Found it. */ pq.Remove(i) p.TaskQ <- task break } i = i.Next() } case respChan := <-statusRequest: o.Debug("Status!") response := new(QueueInformation) response.waitingTasks = tq.Len() pqLen := pq.Len() response.idlePlayers = make([]string, pqLen) idx := 0 for i := pq.Front(); i != nil; i = i.Next() { player, _ := i.Value.(*ClientInfo) response.idlePlayers[idx] = player.Player idx++ } respChan <- response } } }
func handleNop(client *ClientInfo, message interface{}) { o.Debug("Client %s: NOP received", client.Name()) }
func handleAudienceRequest(c net.Conn) { defer c.Close() c.SetTimeout(0) r, _ := c.(io.Reader) w, _ := c.(io.Writer) dec := json.NewDecoder(r) enc := json.NewEncoder(w) outobj := new(GenericJsonRequest) err := dec.Decode(outobj) if err != nil { o.Warn("Error decoding JSON talking to audience: %s", err) return } if nil == outobj.Op { o.Warn("Malformed JSON message talking to audience. Missing Op") return } switch *(outobj.Op) { case "status": if nil == outobj.Id { o.Warn("Malformed Status message talking to audience. Missing Job ID") return } job := o.JobGet(*outobj.Id) jresp := new([2]interface{}) if nil != job { jresp[0] = "OK" iresp := NewJsonStatusResponse() switch job.State { case o.JOB_PENDING: iresp.Status = "PENDING" case o.JOB_SUCCESSFUL: iresp.Status = "OK" case o.JOB_FAILED_PARTIAL: iresp.Status = "PARTIAL_FAIL" case o.JOB_FAILED: iresp.Status = "FAIL" default: o.Fail("Blargh. %d is an unknown job state!", job.State) } resnames := o.JobGetResultNames(*outobj.Id) for i := range resnames { tr := o.JobGetResult(*outobj.Id, resnames[i]) if nil != tr { presp := NewJsonPlayerStatus() switch tr.State { case o.RESP_RUNNING: presp.Status = "PENDING" case o.RESP_FINISHED: presp.Status = "OK" case o.RESP_FAILED: presp.Status = "FAIL" case o.RESP_FAILED_UNKNOWN_SCORE: presp.Status = "UNK_SCORE" case o.RESP_FAILED_HOST_ERROR: presp.Status = "HOST_ERROR" case o.RESP_FAILED_UNKNOWN: presp.Status = "UNKNOWN_FAILURE" } for k, v := range tr.Response { presp.Response[k] = v } iresp.Players[resnames[i]] = presp } } jresp[1] = iresp } else { jresp[0] = "Error" jresp[1] = nil } enc.Encode(jresp) o.Debug("Status...") case "queue": if nil == outobj.Score { o.Warn("Malformed Queue message talking to audience. Missing Score") sendQueueFailureResponse("Missing Score", enc) return } if nil == outobj.Scope { o.Warn("Malformed Queue message talking to audience. Missing Scope") sendQueueFailureResponse("Missing Scope", enc) return } if nil == outobj.Players || len(outobj.Players) < 1 { o.Warn("Malformed Queue message talking to audience. Missing Players") sendQueueFailureResponse("Missing Players", enc) return } for _, player := range outobj.Players { if !HostAuthorised(player) { o.Warn("Malformed Queue message - unknown player %s specified.", player) sendQueueFailureResponse("Invalid Player", enc) return } } job := NewRequest() job.Score = *outobj.Score switch *outobj.Scope { case "one": job.Scope = o.SCOPE_ONEOF case "all": job.Scope = o.SCOPE_ALLOF default: sendQueueFailureResponse("Invalid Scope", enc) return } job.Players = outobj.Players job.Params = outobj.Params QueueJob(job) sendQueueSuccessResponse(job, enc) default: o.Warn("Unknown operation talking to audience: \"%s\"", *(outobj.Op)) return } _ = enc }
// This takes an unmarshall'd job and stuffs it back into the job state. func RestoreJobState(job *JobRequest) bool { // check the valid players list. var playersout []string = nil resultsout := make(map[string]*TaskResponse) for _, p := range job.Players { if HostAuthorised(p) { playersout = append(playersout, p) // fix the result too. resout, exists := job.Results[p] if exists && resout != nil { resout.id = job.Id resultsout[p] = resout } // remove it so we can sweep it in pass2 for // results from old hosts that matter. delete(job.Results, p) } } job.Players = playersout if len(job.Players) == 0 { // If there are no players left at this point, discard // the job as corrupt. return false } // now, do pass 2 over the remaining results. for k, v := range job.Results { if v != nil { // if the results indicate completion, we // always retain them. if v.State.Finished() { resultsout[k] = v resultsout[k].id = job.Id } } } job.Results = resultsout // now, check the task data. ONEOF jobs are allowed to // reset tasks that have never been sent. var tasksout []*TaskRequest = nil for _, t := range job.Tasks { // rebuild the return link t.job = job // finished tasks we don't care about. if t.State.Finished() { tasksout = append(tasksout, t) continue } if job.Scope == SCOPE_ONEOF { if t.Player != "" && (t.State == TASK_QUEUED || !HostAuthorised(t.Player)) { t.State = TASK_QUEUED t.Player = "" } tasksout = append(tasksout, t) continue } else { if HostAuthorised(t.Player) { tasksout = append(tasksout, t) } } } job.Tasks = tasksout if len(job.Tasks) == 0 { o.Debug("Empty tasks in deserialised job") // Tasks should never be empty. return false } // put the job back into the system. JobAdd(job) JobReviewState(job.Id) if !job.State.Finished() { // now, redispatch anything that's not actually finished. for _, t := range job.Tasks { if !t.State.Finished() { DispatchTask(t) } } } return true }