func workStealer(control chan int) { fmt.Printf("workStealer lanched, client=%s\n", core.Self.Id) defer fmt.Printf("workStealer exiting...\n") retry := 0 for { if core.Service == "proxy" { <-core.ProxyWorkChan } wu, err := CheckoutWorkunitRemote() if err != nil { if err.Error() == e.QueueEmpty || err.Error() == e.NoEligibleWorkunitFound { //normal, do nothing } else if err.Error() == e.ClientNotFound { //server may be restarted, waiting for the hearbeater goroutine to try re-register ReRegisterWithSelf(conf.SERVER_URL) } else if err.Error() == e.ClientSuspended { fmt.Printf("client suspended, waiting for repair or resume request...\n") //to-do: send out email notice that this client has problem and been suspended time.Sleep(2 * time.Minute) } else { //something is wrong, server may be down fmt.Printf("error in checking out workunits: %v\n", err) retry += 1 } if retry == 3 { os.Exit(1) } if core.Service != "proxy" { //proxy: event driven, client: timer driven time.Sleep(10 * time.Second) } continue } else { retry = 0 } logger.Debug(2, "workStealer: checked out a workunit: id="+wu.Id) //log event about work checktout (WC) logger.Event(event.WORK_CHECKOUT, "workid="+wu.Id) core.Self.Total_checkout += 1 core.Self.Current_work[wu.Id] = true workmap[wu.Id] = ID_WORKSTEALER //hand the work to the next step handler: dataMover workstat := core.NewWorkPerf(wu.Id) workstat.Checkout = time.Now().Unix() rawWork := &mediumwork{ workunit: wu, perfstat: workstat, } fromStealer <- rawWork //if worker overlap is inhibited, wait until deliverer finishes processing the workunit if conf.WORKER_OVERLAP == false && core.Service != "proxy" { chanPermit <- true } } control <- ID_WORKSTEALER //we are ending }
func workStealer(control chan int) { //fmt.Printf("workStealer launched, client=%s\n", core.Self.Id) logger.Debug(0, fmt.Sprintf("workStealer launched, client=%s\n", core.Self.Id)) defer fmt.Printf("workStealer exiting...\n") retry := 0 for { if core.Service == "proxy" { <-core.ProxyWorkChan } wu, err := CheckoutWorkunitRemote() if err != nil { if err.Error() == e.QueueEmpty || err.Error() == e.QueueSuspend || err.Error() == e.NoEligibleWorkunitFound { //normal, do nothing logger.Debug(3, fmt.Sprintf("client %s recieved status %s from server %s", core.Self.Id, err.Error(), conf.SERVER_URL)) } else if err.Error() == e.ClientNotFound { //server may be restarted, waiting for the hearbeater goroutine to try re-register ReRegisterWithSelf(conf.SERVER_URL) } else if err.Error() == e.ClientSuspended { logger.Error("client suspended, waiting for repair or resume request...") //TODO: send out email notice that this client has problem and been suspended time.Sleep(2 * time.Minute) } else if err.Error() == e.ClientDeleted { fmt.Printf("client deleted, exiting...\n") os.Exit(1) // TODO is there a better way of exiting ? E.g. in regard of the logger who wants to flush.... } else { //something is wrong, server may be down logger.Error(fmt.Sprintf("error in checking out workunit: %s, retry=%d", err.Error(), retry)) retry += 1 } //if retry == 12 { // fmt.Printf("failed to checkout workunits for 12 times, exiting...\n") // logger.Error("failed to checkout workunits for 12 times, exiting...") // os.Exit(1) // TODO fix ! //} if core.Service != "proxy" { //proxy: event driven, client: timer driven if retry <= 10 { time.Sleep(10 * time.Second) } else { time.Sleep(30 * time.Second) } } continue } else { retry = 0 } logger.Debug(1, "workStealer: checked out workunit, id="+wu.Id) //log event about work checktout (WC) logger.Event(event.WORK_CHECKOUT, "workid="+wu.Id) core.Self.Increment_total_checkout() core.Self.Current_work_add(wu.Id) workmap[wu.Id] = ID_WORKSTEALER //hand the work to the next step handler: dataMover workstat := core.NewWorkPerf(wu.Id) workstat.Checkout = time.Now().Unix() rawWork := &mediumwork{ workunit: wu, perfstat: workstat, } fromStealer <- rawWork // sends to dataMover //if worker overlap is inhibited, wait until deliverer finishes processing the workunit if conf.WORKER_OVERLAP == false && core.Service != "proxy" { chanPermit <- true } } control <- ID_WORKSTEALER //we are ending }