Exemple #1
0
func workStealer(control chan int) {
	fmt.Printf("workStealer lanched, client=%s\n", core.Self.Id)
	defer fmt.Printf("workStealer exiting...\n")
	retry := 0
	for {
		if core.Service == "proxy" {
			<-core.ProxyWorkChan
		}
		wu, err := CheckoutWorkunitRemote()
		if err != nil {
			if err.Error() == e.QueueEmpty || err.Error() == e.NoEligibleWorkunitFound {
				//normal, do nothing
			} else if err.Error() == e.ClientNotFound {
				//server may be restarted, waiting for the hearbeater goroutine to try re-register
				ReRegisterWithSelf(conf.SERVER_URL)
			} else if err.Error() == e.ClientSuspended {
				fmt.Printf("client suspended, waiting for repair or resume request...\n")
				//to-do: send out email notice that this client has problem and been suspended
				time.Sleep(2 * time.Minute)
			} else {
				//something is wrong, server may be down
				fmt.Printf("error in checking out workunits: %v\n", err)
				retry += 1
			}
			if retry == 3 {
				os.Exit(1)
			}
			if core.Service != "proxy" { //proxy: event driven, client: timer driven
				time.Sleep(10 * time.Second)
			}
			continue
		} else {
			retry = 0
		}
		logger.Debug(2, "workStealer: checked out a workunit: id="+wu.Id)
		//log event about work checktout (WC)
		logger.Event(event.WORK_CHECKOUT, "workid="+wu.Id)
		core.Self.Total_checkout += 1
		core.Self.Current_work[wu.Id] = true
		workmap[wu.Id] = ID_WORKSTEALER

		//hand the work to the next step handler: dataMover
		workstat := core.NewWorkPerf(wu.Id)
		workstat.Checkout = time.Now().Unix()
		rawWork := &mediumwork{
			workunit: wu,
			perfstat: workstat,
		}
		fromStealer <- rawWork

		//if worker overlap is inhibited, wait until deliverer finishes processing the workunit
		if conf.WORKER_OVERLAP == false && core.Service != "proxy" {
			chanPermit <- true
		}
	}
	control <- ID_WORKSTEALER //we are ending
}
Exemple #2
0
func workStealer(control chan int) {
	//fmt.Printf("workStealer launched, client=%s\n", core.Self.Id)
	logger.Debug(0, fmt.Sprintf("workStealer launched, client=%s\n", core.Self.Id))
	defer fmt.Printf("workStealer exiting...\n")
	retry := 0
	for {
		if core.Service == "proxy" {
			<-core.ProxyWorkChan
		}
		wu, err := CheckoutWorkunitRemote()
		if err != nil {
			if err.Error() == e.QueueEmpty || err.Error() == e.QueueSuspend || err.Error() == e.NoEligibleWorkunitFound {
				//normal, do nothing
				logger.Debug(3, fmt.Sprintf("client %s recieved status %s from server %s", core.Self.Id, err.Error(), conf.SERVER_URL))
			} else if err.Error() == e.ClientNotFound {
				//server may be restarted, waiting for the hearbeater goroutine to try re-register
				ReRegisterWithSelf(conf.SERVER_URL)
			} else if err.Error() == e.ClientSuspended {
				logger.Error("client suspended, waiting for repair or resume request...")
				//TODO: send out email notice that this client has problem and been suspended
				time.Sleep(2 * time.Minute)
			} else if err.Error() == e.ClientDeleted {
				fmt.Printf("client deleted, exiting...\n")
				os.Exit(1) // TODO is there a better way of exiting ? E.g. in regard of the logger who wants to flush....
			} else {
				//something is wrong, server may be down

				logger.Error(fmt.Sprintf("error in checking out workunit: %s, retry=%d", err.Error(), retry))
				retry += 1
			}
			//if retry == 12 {
			//	fmt.Printf("failed to checkout workunits for 12 times, exiting...\n")
			//	logger.Error("failed to checkout workunits for 12 times, exiting...")
			//	os.Exit(1) // TODO fix !
			//}
			if core.Service != "proxy" { //proxy: event driven, client: timer driven
				if retry <= 10 {
					time.Sleep(10 * time.Second)
				} else {
					time.Sleep(30 * time.Second)
				}
			}
			continue
		} else {
			retry = 0
		}
		logger.Debug(1, "workStealer: checked out workunit, id="+wu.Id)
		//log event about work checktout (WC)
		logger.Event(event.WORK_CHECKOUT, "workid="+wu.Id)
		core.Self.Increment_total_checkout()
		core.Self.Current_work_add(wu.Id)
		workmap[wu.Id] = ID_WORKSTEALER

		//hand the work to the next step handler: dataMover
		workstat := core.NewWorkPerf(wu.Id)
		workstat.Checkout = time.Now().Unix()
		rawWork := &mediumwork{
			workunit: wu,
			perfstat: workstat,
		}
		fromStealer <- rawWork // sends to dataMover

		//if worker overlap is inhibited, wait until deliverer finishes processing the workunit
		if conf.WORKER_OVERLAP == false && core.Service != "proxy" {
			chanPermit <- true
		}
	}
	control <- ID_WORKSTEALER //we are ending
}