func NormalizeTitle(title string) string { normalizedTitle := title normalizedTitle = strings.ToLower(normalizedTitle) normalizedTitle = RomanizeHepburn(title) normalizedTitle = strings.ToLower(normalizedTitle) normalizedTitle = RemoveTrailingApostrophe(normalizedTitle) normalizedTitle, _, _ = transform.String(transform.Chain( norm.NFD, transform.RemoveFunc(func(r rune) bool { return unicode.Is(unicode.Mn, r) }), norm.NFC), normalizedTitle) normalizedTitle = strings.ToLower(normalizedTitle) normalizedTitle = regexp.MustCompile(`\(\d+\)`).ReplaceAllString(normalizedTitle, " ") normalizedTitle = strings.Map(func(r rune) rune { if !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '.' { return ' ' } return r }, normalizedTitle) normalizedTitle = regexp.MustCompile(`\s+`).ReplaceAllString(normalizedTitle, " ") normalizedTitle = strings.TrimSpace(normalizedTitle) return normalizedTitle }
func init() { words = make(map[string]int) count = 0 amz = "./amazon_cells_labelled.txt" ylp = "./yelp_labelled.txt" imd = "./imdb_labelled.txt" sanitize = transform.RemoveFunc(func(r rune) bool { switch { case r >= 'A' && r <= 'Z': return false case r >= 'a' && r <= 'z': return false case r >= '0' && r <= '1': return false case r == ' ': return false case r == '\t': return false default: return true } }) rand.Seed(42) }
func main() { t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC) r := transform.NewReader(os.Stdin, t) if _, err := io.Copy(os.Stdout, r); err != nil { log.Fatal(err) } }
func removeNlChars(str string) string { isOk := func(r rune) bool { return r < 32 || r >= 127 } t := transform.Chain(norm.NFKD, transform.RemoveFunc(isOk)) str, _, _ = transform.String(t, str) return str }
func TestLettersShouldPass1(t *testing.T) { s, _, _ := transform.String(transform.RemoveFunc(OnlyLetters), "THIS iz A L337 aNd Un'Sani~~~~tized sentence") sanitized := []rune(s) for i := range sanitized { assert.False(t, OnlyLetters(sanitized[i]), "Letter %v should be sanitized", sanitized[i]) } }
// NewNaiveBayes returns a NaiveBayes model the // given number of classes instantiated, ready // to learn off the given data stream. The sanitization // function is set to the given function. It must // comply with the transform.RemoveFunc interface func NewNaiveBayes(stream <-chan base.TextDatapoint, classes uint8, sanitize func(rune) bool) *NaiveBayes { return &NaiveBayes{ Words: make(map[string]Word), Count: make([]uint64, classes), Probabilities: make([]float64, classes), sanitize: transform.RemoveFunc(sanitize), stream: stream, } }
// RestoreWithFuncs takes raw JSON data of a model and // restores a model from it. The tokenizer and sanitizer // passed in will be assigned to the restored model. func (b *NaiveBayes) RestoreWithFuncs(data io.Reader, sanitizer func(rune) bool, tokenizer Tokenizer) error { if b == nil { return errors.New("Cannot restore a model to a nil pointer") } err := json.NewDecoder(data).Decode(b) if err != nil { return err } b.sanitize = transform.RemoveFunc(sanitizer) b.Tokenizer = tokenizer return nil }
//function to sanitize input //from: http://rosettacode.org/wiki/Strip_control_codes_and_extended_characters_from_a_string#Go func stripCtlAndExtFromUnicode(str string) string { isOk := func(r rune) bool { return r < 32 || r >= 127 } // The isOk filter is such that there is no need to chain to norm.NFC t := transform.Chain(norm.NFKD, transform.RemoveFunc(isOk)) // This Transformer could also trivially be applied as an io.Reader // or io.Writer filter to automatically do such filtering when reading // or writing data anywhere. str, _, _ = transform.String(t, str) return str }
// NewNaiveBayes returns a NaiveBayes model the // given number of classes instantiated, ready // to learn off the given data stream. The sanitization // function is set to the given function. It must // comply with the transform.RemoveFunc interface func NewNaiveBayes(stream <-chan base.TextDatapoint, classes uint8, sanitize func(rune) bool) *NaiveBayes { return &NaiveBayes{ Words: concurrentMap{sync.RWMutex{}, make(map[string]Word)}, Count: make([]uint64, classes), Probabilities: make([]float64, classes), sanitize: transform.RemoveFunc(sanitize), stream: stream, Tokenizer: &SimpleTokenizer{SplitOn: " "}, Output: os.Stdout, } }
func TestAsciiLetters(t *testing.T) { tests := []testCase{ {"THIS iz A L337 aNd Un'Sani~~~~tized sentence", "THISizALaNdUnSanitizedsentence"}, {"here're some unicode letters: --Æ.ÒÑ", "hereresomeunicodeletters"}, {")(*&^%$@!@#$%^&*(*&^%$#@#$%", ""}, } for _, test := range tests { s, _, _ := transform.String(transform.RemoveFunc(OnlyAsciiLetters), test.input) if s != test.expectedOutput { t.Errorf("got \"%s\" expected \"%s\"\n", s, test.expectedOutput) } } }
// GetCompatibleString removes all the special characters // from the string name to create a new string compatible // with different file names. func GetCompatibleString(name string) string { // Replace all the & signs with and text name = strings.Replace(name, "&", "and", -1) // Change all the characters to ASCII t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC) result, _, _ := transform.String(t, name) // Replace all the spaces with underscore s, _ := regexp.Compile(`\s+`) result = s.ReplaceAllString(result, "_") // Remove all the non alphanumeric characters r, _ := regexp.Compile(`\W`) result = r.ReplaceAllString(result, "") return result }
func ExampleRemoveFunc() { input := []byte(`tschüß; до свидания`) b := make([]byte, len(input)) t := transform.RemoveFunc(unicode.IsSpace) n, _, _ := t.Transform(b, input, true) fmt.Println(string(b[:n])) t = transform.RemoveFunc(func(r rune) bool { return !unicode.Is(unicode.Latin, r) }) n, _, _ = t.Transform(b, input, true) fmt.Println(string(b[:n])) n, _, _ = t.Transform(b, norm.NFD.Bytes(input), true) fmt.Println(string(b[:n])) // Output: // tschüß;досвидания // tschüß // tschuß }
func normalize(name, src string) (string, error) { if name == "" { name = baseWithoutExt(src) } t := transform.Chain(norm.NFD, transform.RemoveFunc(remove), norm.NFC) name = strings.TrimSpace(name) name, _, err := transform.String(t, name) if err != nil { return "", err } name = strings.ToLower(name) name = strings.Replace(name, " ", "_", -1) return name, nil }
// normalize does unicode normalization. func normalize(in []byte) ([]byte, error) { // We need a new transformer for each input as it cannot be reused. filter := func(r rune) bool { return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks (to be removed) } transformer := transform.Chain(norm.NFD, transform.RemoveFunc(filter), norm.NFC) out, _, err := transform.Bytes(transformer, in) out = bytes.Map(func(r rune) rune { if unicode.IsPunct(r) { // Replace punctuations with spaces. return ' ' } return unicode.ToLower(r) // Convert to lower case. }, out) return out, err }
func replace(path string) { copy := []string{} r := `(<script(\s|\S)*?<\/script>)|(<style(\s|\S)*?<\/style>)|(<!--(\s|\S)*?-->)|(<\/?(\s|\S)*?>)|(nbsp;)|((?:\s)\s)|(png)|(jpeg)|(jpg)|(mpg)|(\\u0026)|(\n)|(\v)|(\r)|(\0)|(\t)|(n°) |(à)|(wbe)|(_)` regex, err := regexp.Compile(r) if err != nil { return // there was a problem with the regular expression. } c, _ := readLines(path) for _, v := range c { reg := regex.ReplaceAllString(v, " ") slug := utils.GenerateSlug(reg) regex1, _ := regexp.Compile(`((\-){1,})|(\b\w{1}\b)`) reg = regex1.ReplaceAllString(slug, " ") t := stripchars(reg, `?,.!/©*@#~()$+"'&}]|:;[{²`) s := strings.TrimSpace(t) // fmt.Println(s) normalize := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC) normStr1, _, _ := transform.String(normalize, s) // fmt.Println(normStr1) if len(v) > 0 { copy = append(copy, normStr1) } } // fmt.Println(cleaned, "\n") j := strings.Replace(strings.Join((copy), " "), " ", ",", -1) // fmt.Println(j) regex2, err := regexp.Compile(`((\,){2,})`) j1 := regex2.ReplaceAllString(j, ",") // fmt.Println(j1) j2 := strings.Split(j1, ",") cleaned := []string{} for _, value := range j2 { if !stringInSlice(value, cleaned) { cleaned = append(cleaned, value) } } createCsv(path, filenameCsv, strings.Join(cleaned, ",")) }
// UnicodeSanitize sanitizes string to be used in Hugo URL's, allowing only // a predefined set of special Unicode characters. // If RemovePathAccents configuration flag is enabled, Uniccode accents // are also removed. func UnicodeSanitize(s string) string { source := []rune(s) target := make([]rune, 0, len(source)) for _, r := range source { if unicode.IsLetter(r) || unicode.IsDigit(r) || unicode.IsMark(r) || r == '%' || r == '.' || r == '/' || r == '\\' || r == '_' || r == '-' || r == '#' || r == '+' { target = append(target, r) } } var result string if viper.GetBool("RemovePathAccents") { // remove accents - see https://blog.golang.org/normalization t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC) result, _, _ = transform.String(t, string(target)) } else { result = string(target) } return result }
func cleanSalary(input string) string { cleaner := transform.Chain(norm.NFD, transform.RemoveFunc(func(r rune) bool { return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks }), norm.NFC) output, _, _ := transform.String(cleaner, input) output = strings.ToLower(output) m := reSalarySep.FindStringSubmatchIndex(output) if m != nil { output = output[:m[0]+1] + " - " + output[m[1]-1:] } for { m := reSalarySplit.FindStringSubmatchIndex(output) if m == nil { break } _, e1 := m[2], m[3] s2, _ := m[4], m[5] output = output[:e1] + output[s2:] } return output }
func cleanName(name string) string { name = strings.Replace(name, "ß", "ss", -1) name = strings.Replace(name, "Σ", "e", -1) name = strings.Replace(name, "æ", "a", -1) name = strings.Replace(name, "&", "and", -1) name = strings.Replace(name, "$", "s", -1) for _, c := range removeChars { name = strings.Replace(name, c, "", -1) } for _, c := range spaceChars { name = strings.Replace(name, c, " ", -1) } name = badChanRegex.ReplaceAllString(name, "") name = strings.Join(strings.Fields(name), " ") t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC) unicodeCleanedName, _, err := transform.String(t, name) if err == nil { name = unicodeCleanedName } return strings.Trim(name, ` "`) }
func Bytes(b []byte) ([]byte, error) { t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC) res, _, err := transform.Bytes(t, b) return res, err }
func init() { stripT = transform.Chain( norm.NFD, transform.RemoveFunc(isMn), norm.NFC) }
type PathSlice []Path // Swap implements sort.Interface (and index.Swapper). func (p PathSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } // Less implements sort.Interface. func (p PathSlice) Less(i, j int) bool { return p[i].Encode() < p[j].Encode() } // Len implements sort.Interface. func (p PathSlice) Len() int { return len(p) } func isMn(r rune) bool { return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks } var transformer = transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC) func removeNonAlphaNumeric(s string) string { in := []rune(s) res := make([]rune, len(in)) i := 0 for _, x := range s { if x == '-' { res[i] = ' ' i++ continue } if unicode.IsLetter(x) || unicode.IsDigit(x) || unicode.IsSpace(x) { res[i] = unicode.ToLower(x) i++ }
func TestLettersShouldPass2(t *testing.T) { s, _, _ := transform.String(transform.RemoveFunc(OnlyLetters), "0876543212 3456789)(*&^ %$@!@#$%^& *(*&^%$#@#$%") sanitized := []rune(s) assert.Equal(t, 0, len(sanitized), "Length of string should be 0") }
var mappedToNothing = transform.RemoveFunc(func(r rune) bool { //TODO: replace by a unicode.RangeTable switch r { case '\u00AD': case '\u034F': case '\u1806': case '\u180B': case '\u180C': case '\u180D': case '\u200B': case '\u200C': case '\u200D': case '\u2060': case '\uFE00': case '\uFE01': case '\uFE02': case '\uFE03': case '\uFE04': case '\uFE05': case '\uFE06': case '\uFE07': case '\uFE08': case '\uFE09': case '\uFE0A': case '\uFE0B': case '\uFE0C': case '\uFE0D': case '\uFE0E': case '\uFE0F': case '\uFEFF': default: return false } return true })
func TestWordsAndNumbersShouldPass2(t *testing.T) { s, _, _ := transform.String(transform.RemoveFunc(OnlyWordsAndNumbers), ")(*&^%$@!@#$%^&*(*&^%$#@#$%") sanitized := []rune(s) assert.Equal(t, 0, len(sanitized), "Length of string should be 0") }
func removeDiacritics(s string) string { output, _, _ := transform.String(transform.RemoveFunc(func(r rune) bool { return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks }), s) return output }
func normalizer() transform.Transformer { isMn := func(r rune) bool { return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks } return transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC) }
// Transform characters with accents into plan forms func NeuterAccents(s string) string { t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC) result, _, _ := transform.String(t, string(s)) return result }
func NewReader(r io.Reader) io.Reader { t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC) return transform.NewReader(r, t) }
// UpdateSanitize updates the NaiveBayes model's // text sanitization transformation function func (b *NaiveBayes) UpdateSanitize(sanitize func(rune) bool) { b.sanitize = transform.RemoveFunc(sanitize) }
func main() { flag.Parse() // panic("Just Quit") getHostConfig() // runtime.GOMAXPROCS(2) timeout = 1000 fmt.Println("Feeds") //http://careers.stackoverflow.com/jobs/feed?searchTerm=big+data&location=san+francisco&range=100&distanceUnits=Miles // feeds = append(feeds, Feed{index: 0, url: "http://careers.stackoverflow.com/jobs/feed?searchTerm=big+data&location=san+francisco&range=100&distanceUnits=Miles", status: 0, itemCount: 0, complete: false, itemsComplete: false }) feeds = append(feeds, Feed{index: 0, url: "http://careers.stackoverflow.com/jobs/feed?location=san+francisco%2c+ca&range=100&distanceUnits=Miles", status: 0, itemCount: 0, complete: false, itemsComplete: false}) feeds = append(feeds, Feed{index: 1, url: "http://careers.stackoverflow.com/jobs/feed?location=new+york+city%2c+ny&range=100&distanceUnits=Miles", status: 0, itemCount: 0, complete: false, itemsComplete: false}) feeds = append(feeds, Feed{index: 2, url: "http://careers.stackoverflow.com/jobs/feed?location=los+angeles%2c+ca&range=100&distanceUnits=Miles", status: 0, itemCount: 0, complete: false, itemsComplete: false}) feeds = append(feeds, Feed{index: 3, url: "http://careers.stackoverflow.com/jobs/feed?location=boston%2c+ma&range=100&distanceUnits=Miles", status: 0, itemCount: 0, complete: false, itemsComplete: false}) feeds = append(feeds, Feed{index: 4, url: "http://careers.stackoverflow.com/jobs/feed?location=seattle%2cwa&range=100&distanceUnits=Miles", status: 0, itemCount: 0, complete: false, itemsComplete: false}) feeds = append(feeds, Feed{index: 5, url: "http://careers.stackoverflow.com/jobs/feed?location=austin%2ctx&range=100&distanceUnits=Miles", status: 0, itemCount: 0, complete: false, itemsComplete: false}) feeds = append(feeds, Feed{index: 6, url: "http://careers.stackoverflow.com/jobs/feed?location=chicago%2cil&range=100&distanceUnits=Miles", status: 0, itemCount: 0, complete: false, itemsComplete: false}) mutex = &sync.Mutex{} skillMap = make(map[string]int, 200) loadSkillMapFile(skillMap) fmt.Println("GetRSS") getRSS2() saveSkillMapFile(skillMap) if conf.hbaseZkURL != "" { saveSkillsMapHBase(skillMap) } for i := 0; i < len(guidList); i++ { fmt.Println(guidList[i]) } // guidList := make([]string, 4) // guidList[0] = "http://careers.stackoverflow.com/jobs/103310/senior-software-engineer-american-society-of-clinical" // guidList[1] = "http://careers.stackoverflow.com/jobs/94152/senior-software-engineer-platform-flixster" // guidList[2] = "http://careers.stackoverflow.com/jobs/103328/senior-full-stack-engineer-data-science-adroll" // guidList[3] = "http://careers.stackoverflow.com/jobs/104086/enterprise-architect-new-relic" // fmt.Printf("%v\n", s) // map random times & make s3names fw.Slice(guidList).Map(func(sURL string) URLTuple { fmt.Printf("Map1: %v\n", sURL) fName := "jobs_sof/" + strings.Replace(strings.TrimPrefix(sURL, "http://careers.stackoverflow.com/jobs/"), "/", "_", -1) ms := rand.Intn(3000) return URLTuple{sURL, fName, ms} // Filter already-acquired URLs }).Filter(func(uTuple URLTuple) bool { // is file already stored in S3? //fmt.Printf("Filter:%s, %v\n", uTuple.s3Name, uTuple) svcS3 := s3.New(session.New(&aws.Config{Region: aws.String("us-east-1")})) var params *s3.HeadObjectInput params = &s3.HeadObjectInput{ Bucket: aws.String("opps"), // Required Key: aws.String(uTuple.s3Name), // Required } hobj, _ := svcS3.HeadObject(params) fmt.Printf("Filter: %s => %v\n", uTuple.s3Name, hobj.ContentLength == nil) return hobj.ContentLength == nil // get the URLs }).Map(func(uTuple URLTuple) statusTuple { fmt.Printf("Map3: %v\n", uTuple) // random sleep time.Sleep(time.Duration(uTuple.msWait) * time.Millisecond) // get URL resp, err := http.Get(uTuple.gURL) if err != nil { panic(err) } defer resp.Body.Close() // fmt.Println("Body:", resp.Body) // fmt.Println("Proto:", resp.Proto) // fmt.Printf("response Status = <%s> / Length = %d\n", resp.Status, resp.ContentLength) // fmt.Println("response Headers:", resp.Header) // fmt.Printf("response %+v:\n", resp) // fmt.Println("response Body:", string(body)) failed := 0 passed := 0 if resp.StatusCode == 200 { passed = 1 } else { failed = 1 } // store in S3 if passed == 1 { body, _ := ioutil.ReadAll(resp.Body) reader := strings.NewReader(string(body)) root, err := html.Parse(reader) if err != nil { fmt.Printf("%+v\n", err) } var b bytes.Buffer html.Render(&b, root) fixedHtml := b.String() isOk := func(r rune) bool { return r < 32 || r >= 127 } // The isOk filter is such that there is no need to chain to norm.NFC t2 := transform.Chain(norm.NFKD, transform.RemoveFunc(isOk)) // This Transformer could also trivially be applied as an io.Reader // or io.Writer filter to automatically do such filtering when reading // or writing data anywhere. fixedUnicodeNFKD, _, _ := transform.String(t2, fixedHtml) // fmt.Println("\n\n\n"+fixedUnicodeNFKD) reader = strings.NewReader(fixedUnicodeNFKD) xmlroot, xmlerr := xmlpath.ParseHTML(reader) if xmlerr != nil { log.Fatal(xmlerr) } // fmt.Printf("xml root = %+v\n------\n", xmlroot) path := &xmlpath.Path{} pstr := string("") pstr = `/html/head/title` path = xmlpath.MustCompile(pstr) var ok bool title := "" if title, ok = path.String(xmlroot); ok { // fmt.Printf("%s: %s\n", pstr, title) } fmt.Printf("**** Title: %s\n", title) var iter *xmlpath.Iter var list *xmlpath.Path var cnt int // Location - needs Trim pstr = `//*[@id="hed"]/ul[1]/li/text()` path = xmlpath.MustCompile(pstr) location := "" if location, ok = path.String(xmlroot); ok { // fmt.Printf("Location - %s: %s\n", pstr, strings.Trim(location, " \n")) location = strings.Trim(location, " \n") } // Base Skills - LOOP from 1 until not ok var skills []string list = xmlpath.MustCompile(`//*[@id="hed"]/div[2]/p/a`) iter = list.Iter(xmlroot) for iter.Next() { ele := iter.Node().String() skills = append(skills, ele) // fmt.Printf("Sk-Desc: %s\n", ele) } var desc []string list = xmlpath.MustCompile(`//*[@id="jobdetailpage"]/div[2]/div[1]/div[2]/p`) iter = list.Iter(xmlroot) for iter.Next() { ele := iter.Node().String() desc = append(desc, ele) // fmt.Printf("it-Desc1: %s\n", ele) } list = xmlpath.MustCompile(`//*[@id="jobdetailpage"]/div[2]/div[1]/div[2]/ul/li`) iter = list.Iter(xmlroot) for iter.Next() { ele := iter.Node().String() desc = append(desc, ele) // fmt.Printf("it-Desc2: %s\n", ele) } var sSNR []string list = xmlpath.MustCompile(`//*[@id="jobdetailpage"]/div[2]/div[1]/div[3]/p`) iter = list.Iter(xmlroot) cnt = 0 for iter.Next() { ele := iter.Node().String() sSNR = append(sSNR, ele) // fmt.Printf("Skills1 (%d): %s\n", cnt, ele) cnt++ } list = xmlpath.MustCompile(`//*[@id="jobdetailpage"]/div[2]/div[1]/div[3]/ul/li/text()`) iter = list.Iter(xmlroot) cnt = 0 for iter.Next() { ele := iter.Node().String() sSNR = append(sSNR, ele) // fmt.Printf("Skills2(%d): %s\n", cnt, ele) cnt++ } list = xmlpath.MustCompile(`//*[@id="jobdetailpage"]/div[2]/div[1]/div[3]/ul/li/ul/li/text()`) iter = list.Iter(xmlroot) cnt = 0 for iter.Next() { ele := iter.Node().String() sSNR = append(sSNR, ele) // fmt.Printf("Skills3(%d): %s\n", cnt, ele) cnt++ } // // // about company - // pstr = `//*[@id="jobdetailpage"]/div[2]/div[1]/div[4]/p/text()` // //*[@id="jobdetailpage"]/div[2]/div[1]/div[2]/p[2]/text()[1] // path = xmlpath.MustCompile(pstr) // about := "" // if about, ok = path.String(xmlroot); ok { // fmt.Printf("About: %s - %s\n", pstr, about) // } var about []string list = xmlpath.MustCompile(`//*[@id="jobdetailpage"]/div[2]/div[1]/div[4]/p`) //*[@id="jobdetailpage"]/div[2]/div[1]/div[4]/p[2]/text()[1] iter = list.Iter(xmlroot) cnt = 0 for iter.Next() { ele := iter.Node().String() about = append(about, ele) // fmt.Printf("About(%d): %s\n", cnt, ele) cnt++ } var sep string baseAbout := "ABOUT: " sep = "" for i := 0; i < len(about); i++ { baseAbout += sep + about[i] sep = "\n" } baseSkills := "BASESKILLS: " sep = "" // fmt.Printf("base skills = %+v\n", skills) for i := 0; i < len(skills); i++ { baseSkills += sep + skills[i] sep = " " } baseReqs := "REQUIREMENTS: " sep = "" for i := 0; i < len(sSNR); i++ { baseReqs += sep + sSNR[i] sep = "\n" } baseDesc := "DESCRIPTION: " sep = "" for i := 0; i < len(desc); i++ { baseDesc += sep + desc[i] sep = "\n" } var storage string storage = uTuple.gURL + "\n\n" + "DATE: " + time.Now().Format(time.RFC850) + "\n\n" + "TITLE: " + html.UnescapeString(title) + "\n\n" + "LOCATION: " + html.UnescapeString(location) + "\n\n" + html.UnescapeString(baseSkills) + "\n\n" + html.UnescapeString(baseAbout) + "\n\n" + html.UnescapeString(baseDesc) + "\n\n" + // no second slash html.UnescapeString(baseReqs) + "\n" fmt.Printf("Storing (len = %d):\n***\n%s\n***\n", len(storage), storage) svcS3 := s3.New(session.New(&aws.Config{Region: aws.String("us-east-1")})) bucket := "opps" key := uTuple.s3Name _, err = svcS3.PutObject(&s3.PutObjectInput{ Body: strings.NewReader(string(storage)), Bucket: &bucket, Key: &key, }) if err != nil { fmt.Printf("Failed to upload data to %s/%s, %s\n", bucket, key, err) failed = 1 passed = 0 } } // return statusTuple{passed, failed} return statusTuple{passed, failed} // count URLs }).Reduce(func(x statusTuple, y statusTuple) statusTuple { fmt.Printf("Red1: x= %v, y = %v\n", x, y) return statusTuple{x.pass + y.pass, x.fail + y.fail} }).Map(func(x statusTuple) { fmt.Printf("Map4 Result: passed = %d, failed = %d\n", x.pass, x.fail) }).Run() }