Beispiel #1
0
/*
resources are leased to driver, expires every X miniute unless renewed.
1. request resource
2. release resource
*/
func (s *Scheduler) EventLoop() {
	for {
		event := <-s.EventChan
		switch event := event.(type) {
		default:
		case SubmitTaskGroup:
			// fmt.Printf("processing %+v\n", event)
			taskGroup := event.TaskGroup
			pickedServerChan := make(chan market.Supply, 1)
			go func() {
				defer event.WaitGroup.Done()
				tasks := event.TaskGroup.Tasks

				// wait until inputs are registed
				s.shardLocator.waitForInputDatasetShardLocations(tasks[0])
				// fmt.Printf("inputs of %s is %s\n", tasks[0].Name(), s.allInputLocations(tasks[0]))

				s.Market.AddDemand(market.Requirement(taskGroup), event.Bid, pickedServerChan)

				// get assigned executor location
				supply := <-pickedServerChan
				allocation := supply.Object.(resource.Allocation)
				defer s.Market.ReturnSupply(supply)

				s.remoteExecuteOnLocation(event.FlowContext, taskGroup, allocation, event.WaitGroup)
			}()
		case ReleaseTaskGroupInputs:
			go func() {
				defer event.WaitGroup.Done()

				for _, taskGroup := range event.TaskGroups {
					tasks := taskGroup.Tasks
					for _, ds := range tasks[len(tasks)-1].Outputs {
						shardName := s.option.ExecutableFileHash + "-" + ds.Name()
						location, _ := s.shardLocator.GetShardLocation(shardName)
						request := NewDeleteDatasetShardRequest(shardName)
						// println("deleting", ds.Name(), "on", location.URL())
						if err := RemoteDirectExecute(location.URL(), request); err != nil {
							println("exeuction error:", err.Error())
						}
					}
				}

			}()
		}
	}
}
Beispiel #2
0
/*
resources are leased to driver, expires every X miniute unless renewed.
1. request resource
2. release resource
*/
func (s *Scheduler) EventLoop() {
	for {
		event := <-s.EventChan
		switch event := event.(type) {
		default:
		case SubmitTaskGroup:
			// fmt.Printf("processing %+v\n", event)
			taskGroup := event.TaskGroup
			pickedServerChan := make(chan market.Supply, 1)
			go func() {
				defer event.WaitGroup.Done()
				tasks := event.TaskGroup.Tasks

				// wait until inputs are registed
				s.shardLocator.waitForInputDatasetShardLocations(tasks[0])
				// fmt.Printf("inputs of %s is %s\n", tasks[0].Name(), s.allInputLocations(tasks[0]))

				s.Market.AddDemand(market.Requirement(taskGroup), event.Bid, pickedServerChan)

				// get assigned executor location
				supply := <-pickedServerChan
				allocation := supply.Object.(resource.Allocation)
				defer s.Market.ReturnSupply(supply)

				s.setupInputChannels(event.FlowContext, tasks[0], allocation.Location, event.WaitGroup)

				for _, shard := range tasks[len(tasks)-1].Outputs {
					s.shardLocator.SetShardLocation(s.option.ExecutableFileHash+"-"+shard.Name(), allocation.Location)
				}
				s.setupOutputChannels(tasks[len(tasks)-1].Outputs, event.WaitGroup)

				// fmt.Printf("allocated %s on %v\n", tasks[0].Name(), allocation.Location)
				// create reqeust
				args := []string{
					"-glow.flow.id",
					strconv.Itoa(event.FlowContext.Id),
					"-glow.taskGroup.id",
					strconv.Itoa(taskGroup.Id),
					"-glow.task.name",
					tasks[0].Name(),
					"-glow.agent.port",
					strconv.Itoa(allocation.Location.Port),
					"-glow.taskGroup.inputs",
					s.shardLocator.allInputLocations(tasks[0]),
					"-glow.exe.hash",
					s.shardLocator.executableFileHash,
					"-glow.channel.bufferSize",
					strconv.Itoa(event.FlowContext.ChannelBufferSize),
				}
				for _, arg := range os.Args[1:] {
					args = append(args, arg)
				}
				request := NewStartRequest(
					"./"+filepath.Base(os.Args[0]),
					// filepath.Join(".", filepath.Base(os.Args[0])),
					s.option.Module,
					args,
					allocation.Allocated,
					os.Environ(),
					int32(s.option.DriverPort),
				)

				// fmt.Printf("starting on %s: %v\n", allocation.Allocated, request)
				if err := RemoteDirectExecute(allocation.Location.URL(), request); err != nil {
					log.Printf("exeuction error %v: %v", err, request)
				}
			}()
		case ReleaseTaskGroupInputs:
			go func() {
				defer event.WaitGroup.Done()

				for _, taskGroup := range event.TaskGroups {
					tasks := taskGroup.Tasks
					for _, ds := range tasks[len(tasks)-1].Outputs {
						shardName := s.option.ExecutableFileHash + "-" + ds.Name()
						location, _ := s.shardLocator.GetShardLocation(shardName)
						request := NewDeleteDatasetShardRequest(shardName)
						// println("deleting", ds.Name(), "on", location.URL())
						if err := RemoteDirectExecute(location.URL(), request); err != nil {
							println("exeuction error:", err.Error())
						}
					}
				}

			}()
		}
	}
}