Пример #1
0
// 每个worker向chan写入的次数:
// chOutProgress: N
// chOutMap: 1 for each parsed line
// chOutWorker: 1
func (this *Worker) run(chOutProgress chan<- int, chOutMap chan<- mr.KeyValue, chOutWorker chan<- Worker) {
	defer T.Un(T.Trace(""))

	this.StartAt = time.Now()

	var input *stream.Stream
	if this.manager.option.filemode {
		input = stream.NewStream(stream.PLAIN_FILE, this.filename)
	} else {
		input = stream.NewStream(stream.EXEC_PIPE, LZOP_CMD, LZOP_OPTION, this.filename)
	}
	input.Open()
	defer input.Close()

	this.Printf("%s worker[%d] opened %s, start to Map...\n", this.kind, this.seq, this.BaseFilename())

	for {
		line, err := input.Reader().ReadString(EOL)
		if err != nil {
			if err != io.EOF {
				log.Fatal(err)
			}

			break
		}

		this.RawLines++
		if chOutProgress != nil && this.RawLines%PROGRESS_LINES_STEP == 0 {
			// report progress
			chOutProgress <- PROGRESS_LINES_STEP
		}

		if !this.self.IsLineValid(line) {
			continue
		}

		this.ValidLines++

		// run map for this line
		// for pipe stream flush to work, we can't strip EOL
		this.self.Map(line, chOutMap)
	}
	this.EndAt = time.Now()

	chOutWorker <- *this
	this.Printf("%s worker[%d] %s done, parsed: %d/%d, duration: %v\n", this.kind, this.seq, this.BaseFilename(),
		this.ValidLines, this.RawLines, this.Duration())
}
Пример #2
0
func (this *Worker) initExternalMapper() *stream.Stream {
	defer T.Un(T.Trace(""))

	mapper := this.manager.option.mapper
	if mapper != "" {
		stream := stream.NewStream(stream.EXEC_PIPE, mapper)
		if err := stream.Open(); err != nil {
			this.Fatal(err)
		}

		this.Printf("external mapper stream opened: %s\n", mapper)

		this.mapReader = stream.Reader()
		this.mapWriter = stream.Writer()
		return stream
	}

	return nil
}