// An example Map function. It consumes json data and yields a value for each line func (s *JsonEntryCounter) Mapper(r io.Reader, w io.Writer) error { log.Printf("map_input_file %s", os.Getenv("map_input_file")) wg, out := gomrjob.JsonInternalOutputProtocol(w) for data := range gomrjob.JsonInputProtocol(r) { gomrjob.Counter("example_mr", "Map Lines Read", 1) key, err := data.Get(s.KeyField).String() if err != nil { gomrjob.Counter("example_mr", "Missing Key", 1) } else { out <- gomrjob.KeyValue{key, 1} } } close(out) wg.Wait() return nil }
// An example Map function. It consumes json data and yields a value for each line func (s *JsonEntryCounter) Mapper(r io.Reader, w io.Writer) error { log.Printf("map_input_file %s", os.Getenv("map_input_file")) wg, out := gomrjob.JsonInternalOutputProtocol(w) // for efficient counting, use an in-memory counter that flushes the least recently used item // less Mapper output makes for faster sorting and reducing. counter := lru.NewLRUCounter(func(k interface{}, v int64) { out <- gomrjob.KeyValue{k, v} }, 1) for data := range gomrjob.JsonInputProtocol(r) { gomrjob.Counter("example_mr", "Map Lines Read", 1) key, err := data.Get(s.KeyField).String() if err != nil { gomrjob.Counter("example_mr", "Missing Key", 1) } else { counter.Incr(key, 1) } } counter.Flush() close(out) wg.Wait() return nil }
func (s *JsonEntryCounter) Reducer(r io.Reader, w io.Writer) error { wg, out := gomrjob.RawJsonInternalOutputProtocol(w) for kv := range gomrjob.RawJsonInternalInputProtocol(r) { var i int64 for v := range kv.Values { vv, err := v.Int64() if err != nil { gomrjob.Counter("example_mr", "non-int value", 1) log.Printf("non-int value %s", err) } else { i += vv } } out <- gomrjob.KeyValue{kv.Key, i} } close(out) wg.Wait() return nil }