示例#1
0
// An example Map function. It consumes json data and yields a value for each line
func (s *JsonEntryCounter) Mapper(r io.Reader, w io.Writer) error {
	log.Printf("map_input_file %s", os.Getenv("map_input_file"))
	wg, out := gomrjob.JsonInternalOutputProtocol(w)
	for data := range gomrjob.JsonInputProtocol(r) {
		gomrjob.Counter("example_mr", "Map Lines Read", 1)
		key, err := data.Get(s.KeyField).String()
		if err != nil {
			gomrjob.Counter("example_mr", "Missing Key", 1)
		} else {
			out <- gomrjob.KeyValue{key, 1}
		}
	}
	close(out)
	wg.Wait()
	return nil
}
示例#2
0
// An example Map function. It consumes json data and yields a value for each line
func (s *JsonEntryCounter) Mapper(r io.Reader, w io.Writer) error {
	log.Printf("map_input_file %s", os.Getenv("map_input_file"))
	wg, out := gomrjob.JsonInternalOutputProtocol(w)

	// for efficient counting, use an in-memory counter that flushes the least recently used item
	// less Mapper output makes for faster sorting and reducing.
	counter := lru.NewLRUCounter(func(k interface{}, v int64) {
		out <- gomrjob.KeyValue{k, v}
	}, 1)

	for data := range gomrjob.JsonInputProtocol(r) {
		gomrjob.Counter("example_mr", "Map Lines Read", 1)
		key, err := data.Get(s.KeyField).String()
		if err != nil {
			gomrjob.Counter("example_mr", "Missing Key", 1)
		} else {
			counter.Incr(key, 1)
		}
	}
	counter.Flush()
	close(out)
	wg.Wait()
	return nil
}
示例#3
0
func (s *JsonEntryCounter) Reducer(r io.Reader, w io.Writer) error {
	wg, out := gomrjob.RawJsonInternalOutputProtocol(w)
	for kv := range gomrjob.RawJsonInternalInputProtocol(r) {
		var i int64
		for v := range kv.Values {
			vv, err := v.Int64()
			if err != nil {
				gomrjob.Counter("example_mr", "non-int value", 1)
				log.Printf("non-int value %s", err)
			} else {
				i += vv
			}
		}
		out <- gomrjob.KeyValue{kv.Key, i}
	}
	close(out)
	wg.Wait()
	return nil
}