Exemple #1
0
//Runs once for each user provided input.
//Don't panic, most of the low level things will be moved to library...
func MyMap(input string, job *gomr.Job) (map[int]string, error) {
	outputs := make(map[int]string)
	var err error
	log.Println("Rinning map on ", input)
	//Create one TempFile for each partition
	tmpfiles := make([]*os.File, job.Partitions)
	for i, _ := range tmpfiles {
		tmpfiles[i], err = ioutil.TempFile("", "")
		if err != nil {
			return outputs, err
		}
	}
	//Fetch the input url
	resp, err := http.Get(input)
	if err != nil {
		return outputs, err
	}
	defer resp.Body.Close()
	scanner := bufio.NewScanner(resp.Body)
	scanner.Split(bufio.ScanWords)
	//Map each instance of a word with 1, use FNV hash to write it to its corresponding partition file
	for scanner.Scan() {
		word := scanner.Text()
		//TODO: Maybe make everything lowercase... and check if its really a "word"
		partition := hash(word, job.Partitions)
		fmt.Fprintf(tmpfiles[partition], "%s\t1\n", word)
	}
	//Close each TempFile and upload to S3
	for i, f := range tmpfiles {
		f.Close()
		newpath, err := job.UploadMapS3(f.Name(), i)
		if err != nil {
			return outputs, err
		}
		outputs[i] = newpath
	}
	//Return the list of S3 files
	return outputs, nil
}