//Run once for map outputs for particular key. //Don't panic, most of the low level things will be moved to library... func MyReduce(inputs []string, partition int, job *gomr.Job) (string, error) { f, err := ioutil.TempFile("", "") fname := f.Name() if err != nil { return "", err } //Download and merge each input into local file for _, input := range inputs { rd, err := job.FetchInputS3(input) if err != nil { return "", err } _, err = io.Copy(f, rd) if err != nil { return "", err } rd.Close() } f.Close() //Sort local file by word ... using the sort unix command.. We need lines for each word bunched together cmd := exec.Command("sort", fname) sorted, err := ioutil.TempFile("", "") cmd.Stdout = sorted err = cmd.Run() if err != nil { return "", err } sortedfname := sorted.Name() sorted.Close() log.Println("sorted", sortedfname) //The merged file is no longer needed cause we now use the sorted file. os.Remove(fname) //Create file for final output output, err := ioutil.TempFile("", "") if err != nil { return "", err } f, err = os.Open(sortedfname) if err != nil { return "", err } scanner := bufio.NewScanner(f) scanner.Split(bufio.ScanLines) word := "" count := 0 for scanner.Scan() { line := scanner.Text() splitted := strings.Split(line, "\t") if splitted[0] != word { //New word detected, yield previous word if count > 0 { fmt.Fprintf(output, "%s\t%d\n", word, count) } word = splitted[0] count, err = strconv.Atoi(splitted[1]) if err != nil { return "", err } } else { count++ } } //Yield last word if count > 0 { fmt.Fprintf(output, "%s\t%d\n", word, count) } f.Close() os.Remove(sortedfname) outname := output.Name() output.Close() //Upload output to S3 and return the key return job.UploadResultS3(outname) }