// 每个worker向chan写入的次数: // chOutProgress: N // chOutMap: 1 for each parsed line // chOutWorker: 1 func (this *Worker) run(chOutProgress chan<- int, chOutMap chan<- mr.KeyValue, chOutWorker chan<- Worker) { defer T.Un(T.Trace("")) this.StartAt = time.Now() var input *stream.Stream if this.manager.option.filemode { input = stream.NewStream(stream.PLAIN_FILE, this.filename) } else { input = stream.NewStream(stream.EXEC_PIPE, LZOP_CMD, LZOP_OPTION, this.filename) } input.Open() defer input.Close() this.Printf("%s worker[%d] opened %s, start to Map...\n", this.kind, this.seq, this.BaseFilename()) for { line, err := input.Reader().ReadString(EOL) if err != nil { if err != io.EOF { log.Fatal(err) } break } this.RawLines++ if chOutProgress != nil && this.RawLines%PROGRESS_LINES_STEP == 0 { // report progress chOutProgress <- PROGRESS_LINES_STEP } if !this.self.IsLineValid(line) { continue } this.ValidLines++ // run map for this line // for pipe stream flush to work, we can't strip EOL this.self.Map(line, chOutMap) } this.EndAt = time.Now() chOutWorker <- *this this.Printf("%s worker[%d] %s done, parsed: %d/%d, duration: %v\n", this.kind, this.seq, this.BaseFilename(), this.ValidLines, this.RawLines, this.Duration()) }
func (this *Worker) initExternalMapper() *stream.Stream { defer T.Un(T.Trace("")) mapper := this.manager.option.mapper if mapper != "" { stream := stream.NewStream(stream.EXEC_PIPE, mapper) if err := stream.Open(); err != nil { this.Fatal(err) } this.Printf("external mapper stream opened: %s\n", mapper) this.mapReader = stream.Reader() this.mapWriter = stream.Writer() return stream } return nil }