Beispiel #1
0
// Source lists files under a hdfs folder, and process all files
// This is provided more as an example. You can copy the code and customize
// any way you want.
func Source(f *flow.FlowContext, hdfsLocation string, shard int) *flow.Dataset {
	locations, err := List(hdfsLocation)
	if err != nil {
		log.Fatalf("Can not list files under %s:%v", hdfsLocation, err)
	}

	return f.Slice(locations).Partition(shard).Map(TextFile)
}
Beispiel #2
0
// if this should not run, return false
func (tr *TaskRunner) Run(fc *flow.FlowContext) {
	if fc.Id != tr.option.ContextId {
		return
	}
	fc.ChannelBufferSize = tr.option.ChannelBufferSize

	taskGroups := plan.GroupTasks(fc)

	tr.Tasks = taskGroups[tr.option.TaskGroupId].Tasks
	tr.FlowContext = fc

	if len(tr.Tasks) == 0 {
		log.Println("How can the task group has no tasks!")
		return
	}

	// println("taskGroup", tr.Tasks[0].Name(), "starts")
	// 4. setup task input and output channels
	var wg sync.WaitGroup
	tr.connectInputsAndOutputs(&wg)
	// 6. starts to run the task locally
	for _, task := range tr.Tasks {
		// println("run task", task.Name())
		wg.Add(1)
		go func(task *flow.Task) {
			defer wg.Done()
			task.RunTask()
		}(task)
	}
	// 7. need to close connected output channels
	wg.Wait()
	// println("taskGroup", tr.Tasks[0].Name(), "finishes")
}
Beispiel #3
0
// if this should not run, return false
func (tr *TaskRunner) Run(fc *flow.FlowContext) {
	if fc.Id != tr.option.ContextId {
		return
	}
	fc.ChannelBufferSize = tr.option.ChannelBufferSize

	_, taskGroups := plan.GroupTasks(fc)
	tr.Tasks = taskGroups[tr.option.TaskGroupId].Tasks
	tr.FlowContext = fc

	tr.executorStatus.StartTime = time.Now()

	go tr.reportLocalExecutorStatus()

	// println("taskGroup", tr.Tasks[0].Name(), "starts")
	// 4. setup task input and output channels
	var wg sync.WaitGroup
	tr.connectInputsAndOutputs(&wg)
	// 6. starts to run the task locally
	for _, task := range tr.Tasks {
		// println("run task", task.Name())
		wg.Add(1)
		go func(task *flow.Task) {
			defer wg.Done()
			task.RunTask()
		}(task)
	}
	// 7. need to close connected output channels
	wg.Wait()
	// println("taskGroup", tr.Tasks[0].Name(), "finishes", tr.option.RequestId)
	tr.executorStatus.StopTime = time.Now()

	tr.reportLocalExecutorStatusOnce()
}