// Source lists files under a hdfs folder, and process all files // This is provided more as an example. You can copy the code and customize // any way you want. func Source(f *flow.FlowContext, hdfsLocation string, shard int) *flow.Dataset { locations, err := List(hdfsLocation) if err != nil { log.Fatalf("Can not list files under %s:%v", hdfsLocation, err) } return f.Slice(locations).Partition(shard).Map(TextFile) }
// if this should not run, return false func (tr *TaskRunner) Run(fc *flow.FlowContext) { if fc.Id != tr.option.ContextId { return } fc.ChannelBufferSize = tr.option.ChannelBufferSize taskGroups := plan.GroupTasks(fc) tr.Tasks = taskGroups[tr.option.TaskGroupId].Tasks tr.FlowContext = fc if len(tr.Tasks) == 0 { log.Println("How can the task group has no tasks!") return } // println("taskGroup", tr.Tasks[0].Name(), "starts") // 4. setup task input and output channels var wg sync.WaitGroup tr.connectInputsAndOutputs(&wg) // 6. starts to run the task locally for _, task := range tr.Tasks { // println("run task", task.Name()) wg.Add(1) go func(task *flow.Task) { defer wg.Done() task.RunTask() }(task) } // 7. need to close connected output channels wg.Wait() // println("taskGroup", tr.Tasks[0].Name(), "finishes") }
// if this should not run, return false func (tr *TaskRunner) Run(fc *flow.FlowContext) { if fc.Id != tr.option.ContextId { return } fc.ChannelBufferSize = tr.option.ChannelBufferSize _, taskGroups := plan.GroupTasks(fc) tr.Tasks = taskGroups[tr.option.TaskGroupId].Tasks tr.FlowContext = fc tr.executorStatus.StartTime = time.Now() go tr.reportLocalExecutorStatus() // println("taskGroup", tr.Tasks[0].Name(), "starts") // 4. setup task input and output channels var wg sync.WaitGroup tr.connectInputsAndOutputs(&wg) // 6. starts to run the task locally for _, task := range tr.Tasks { // println("run task", task.Name()) wg.Add(1) go func(task *flow.Task) { defer wg.Done() task.RunTask() }(task) } // 7. need to close connected output channels wg.Wait() // println("taskGroup", tr.Tasks[0].Name(), "finishes", tr.option.RequestId) tr.executorStatus.StopTime = time.Now() tr.reportLocalExecutorStatusOnce() }