// NewTransformer creates a new transform.Transformer that performs the PRECIS // preparation and enforcement steps on the given UTF-8 encoded bytes. func (p Profile) NewTransformer() *Transformer { var ts []transform.Transformer if p.options.allowwidechars { ts = append(ts, width.Fold) } ts = append(ts, checker{p: p}) if p.options.width != nil { ts = append(ts, width.Fold) } for _, f := range p.options.additional { ts = append(ts, f()) } if p.options.cases { ts = append(ts, transform.Chain( cases.Upper(language.Und), cases.Lower(language.Und), )) } ts = append(ts, p.options.norm) // TODO: Apply directionality rule (blocking on the Bidi package) // TODO: Add the disallow empty rule with a dummy transformer? return &Transformer{transform.Chain(ts...)} }
func NormalizeTitle(title string) string { normalizedTitle := title normalizedTitle = strings.ToLower(normalizedTitle) normalizedTitle = RomanizeHepburn(title) normalizedTitle = strings.ToLower(normalizedTitle) normalizedTitle = RemoveTrailingApostrophe(normalizedTitle) normalizedTitle, _, _ = transform.String(transform.Chain( norm.NFD, transform.RemoveFunc(func(r rune) bool { return unicode.Is(unicode.Mn, r) }), norm.NFC), normalizedTitle) normalizedTitle = strings.ToLower(normalizedTitle) normalizedTitle = regexp.MustCompile(`\(\d+\)`).ReplaceAllString(normalizedTitle, " ") normalizedTitle = strings.Map(func(r rune) rune { if !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '.' { return ' ' } return r }, normalizedTitle) normalizedTitle = regexp.MustCompile(`\s+`).ReplaceAllString(normalizedTitle, " ") normalizedTitle = strings.TrimSpace(normalizedTitle) return normalizedTitle }
// NewReader returns a reader which decode from the given encoding, to utf8. // // If enc is nil, then only an utf8-enforcing replacement reader // (see http://godoc.org/code.google.com/p/go.text/encoding#pkg-variables) // is used. func NewReader(r io.Reader, enc encoding.Encoding) io.Reader { if enc == nil || enc == encoding.Replacement { return transform.NewReader(r, encoding.Replacement.NewEncoder()) } return transform.NewReader(r, transform.Chain(enc.NewDecoder(), encoding.Replacement.NewEncoder())) }
// NewTransformer creates a new transform.Transformer that performs the PRECIS // preparation and enforcement steps on the given UTF-8 encoded bytes. func (p *Profile) NewTransformer() *Transformer { var ts []transform.Transformer // These transforms are applied in the order defined in // https://tools.ietf.org/html/rfc7564#section-7 if p.options.foldWidth { ts = append(ts, width.Fold) } for _, f := range p.options.additional { ts = append(ts, f()) } if p.options.cases != nil { ts = append(ts, p.options.cases) } ts = append(ts, p.options.norm) if p.options.bidiRule { ts = append(ts, bidirule.New()) } ts = append(ts, &checker{p: p, allowed: p.Allowed()}) // TODO: Add the disallow empty rule with a dummy transformer? return &Transformer{transform.Chain(ts...)} }
func main() { t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC) r := transform.NewReader(os.Stdin, t) if _, err := io.Copy(os.Stdout, r); err != nil { log.Fatal(err) } }
func removeNlChars(str string) string { isOk := func(r rune) bool { return r < 32 || r >= 127 } t := transform.Chain(norm.NFKD, transform.RemoveFunc(isOk)) str, _, _ = transform.String(t, str) return str }
func ExampleRemove() { t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC) s, _, _ := transform.String(t, "résumé") fmt.Println(s) // Output: // resume }
// DecodeTransfer decodes base64, quoted-printable or plain text. func decodeTransfer(r io.Reader, label string) io.Reader { switch strings.ToLower(label) { case "base64": return base64.NewDecoder(base64.StdEncoding, transform.NewReader(r, nonASCIITransformer{})) case "quoted-printable": return quotedprintable.NewReader(transform.NewReader(r, transform.Chain(nonASCIITransformer{}, newlineAppendTransformer{}))) case "", "7bit", "8bit", "binary": return r default: return failReader{fmt.Errorf("unsupported transfer encoding: %v", label)} } }
//function to sanitize input //from: http://rosettacode.org/wiki/Strip_control_codes_and_extended_characters_from_a_string#Go func stripCtlAndExtFromUnicode(str string) string { isOk := func(r rune) bool { return r < 32 || r >= 127 } // The isOk filter is such that there is no need to chain to norm.NFC t := transform.Chain(norm.NFKD, transform.RemoveFunc(isOk)) // This Transformer could also trivially be applied as an io.Reader // or io.Writer filter to automatically do such filtering when reading // or writing data anywhere. str, _, _ = transform.String(t, str) return str }
// GetCompatibleString removes all the special characters // from the string name to create a new string compatible // with different file names. func GetCompatibleString(name string) string { // Replace all the & signs with and text name = strings.Replace(name, "&", "and", -1) // Change all the characters to ASCII t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC) result, _, _ := transform.String(t, name) // Replace all the spaces with underscore s, _ := regexp.Compile(`\s+`) result = s.ReplaceAllString(result, "_") // Remove all the non alphanumeric characters r, _ := regexp.Compile(`\W`) result = r.ReplaceAllString(result, "") return result }
func normalize(name, src string) (string, error) { if name == "" { name = baseWithoutExt(src) } t := transform.Chain(norm.NFD, transform.RemoveFunc(remove), norm.NFC) name = strings.TrimSpace(name) name, _, err := transform.String(t, name) if err != nil { return "", err } name = strings.ToLower(name) name = strings.Replace(name, " ", "_", -1) return name, nil }
// normalize does unicode normalization. func normalize(in []byte) ([]byte, error) { // We need a new transformer for each input as it cannot be reused. filter := func(r rune) bool { return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks (to be removed) } transformer := transform.Chain(norm.NFD, transform.RemoveFunc(filter), norm.NFC) out, _, err := transform.Bytes(transformer, in) out = bytes.Map(func(r rune) rune { if unicode.IsPunct(r) { // Replace punctuations with spaces. return ' ' } return unicode.ToLower(r) // Convert to lower case. }, out) return out, err }
func ExampleUTF8Validator() { for i := 0; i < 2; i++ { transformer := charmap.Windows1252.NewEncoder() if i == 1 { transformer = transform.Chain(encoding.UTF8Validator, transformer) } dst := make([]byte, 256) src := []byte("abc\xffxyz") // src is invalid UTF-8. nDst, nSrc, err := transformer.Transform(dst, src, true) fmt.Printf("i=%d: produced %q, consumed %q, error %v\n", i, dst[:nDst], src[:nSrc], err) } // Output: // i=0: produced "abc\x1axyz", consumed "abc\xffxyz", error <nil> // i=1: produced "abc", consumed "abc", error encoding: invalid UTF-8 }
// scanContent scans the content of a document for phrases, // and updates tally. func (conf *config) scanContent(content []byte, contentType, cs string, tally map[rule]int) { if strings.Contains(contentType, "javascript") { conf.scanJSContent(content, tally) return } transformers := make([]transform.Transformer, 0, 3) if cs != "utf-8" { e, _ := charset.Lookup(cs) transformers = append(transformers, e.NewDecoder()) } if strings.Contains(contentType, "html") { transformers = append(transformers, entityDecoder{}) } transformers = append(transformers, new(wordTransformer)) ps := newPhraseScanner(conf.ContentPhraseList, func(s string) { tally[rule{t: contentPhrase, content: s}]++ }) ps.scanByte(' ') var t transform.Transformer if len(transformers) == 1 { t = transformers[0] } else { t = transform.Chain(transformers...) } r := transform.NewReader(bytes.NewReader(content), t) buf := make([]byte, 4096) for { n, err := r.Read(buf) for _, c := range buf[:n] { ps.scanByte(c) } if err != nil { if err != io.EOF { log.Println("Error decoding page content:", err) } break } } ps.scanByte(' ') }
func ExampleUTF8Validator() { for i := 0; i < 2; i++ { var transformer transform.Transformer transformer = unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewEncoder() if i == 1 { transformer = transform.Chain(encoding.UTF8Validator, transformer) } dst := make([]byte, 256) src := []byte("abc\xffxyz") // src is invalid UTF-8. nDst, nSrc, err := transformer.Transform(dst, src, true) fmt.Printf("i=%d: produced %q, consumed %q, error %v\n", i, dst[:nDst], src[:nSrc], err) } // Output: // i=0: produced "\x00a\x00b\x00c\xff\xfd\x00x\x00y\x00z", consumed "abc\xffxyz", error <nil> // i=1: produced "\x00a\x00b\x00c", consumed "abc", error encoding: invalid UTF-8 }
func replace(path string) { copy := []string{} r := `(<script(\s|\S)*?<\/script>)|(<style(\s|\S)*?<\/style>)|(<!--(\s|\S)*?-->)|(<\/?(\s|\S)*?>)|(nbsp;)|((?:\s)\s)|(png)|(jpeg)|(jpg)|(mpg)|(\\u0026)|(\n)|(\v)|(\r)|(\0)|(\t)|(n°) |(à)|(wbe)|(_)` regex, err := regexp.Compile(r) if err != nil { return // there was a problem with the regular expression. } c, _ := readLines(path) for _, v := range c { reg := regex.ReplaceAllString(v, " ") slug := utils.GenerateSlug(reg) regex1, _ := regexp.Compile(`((\-){1,})|(\b\w{1}\b)`) reg = regex1.ReplaceAllString(slug, " ") t := stripchars(reg, `?,.!/©*@#~()$+"'&}]|:;[{²`) s := strings.TrimSpace(t) // fmt.Println(s) normalize := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC) normStr1, _, _ := transform.String(normalize, s) // fmt.Println(normStr1) if len(v) > 0 { copy = append(copy, normStr1) } } // fmt.Println(cleaned, "\n") j := strings.Replace(strings.Join((copy), " "), " ", ",", -1) // fmt.Println(j) regex2, err := regexp.Compile(`((\,){2,})`) j1 := regex2.ReplaceAllString(j, ",") // fmt.Println(j1) j2 := strings.Split(j1, ",") cleaned := []string{} for _, value := range j2 { if !stringInSlice(value, cleaned) { cleaned = append(cleaned, value) } } createCsv(path, filenameCsv, strings.Join(cleaned, ",")) }
// UnicodeSanitize sanitizes string to be used in Hugo URL's, allowing only // a predefined set of special Unicode characters. // If RemovePathAccents configuration flag is enabled, Uniccode accents // are also removed. func UnicodeSanitize(s string) string { source := []rune(s) target := make([]rune, 0, len(source)) for _, r := range source { if unicode.IsLetter(r) || unicode.IsDigit(r) || unicode.IsMark(r) || r == '%' || r == '.' || r == '/' || r == '\\' || r == '_' || r == '-' || r == '#' || r == '+' { target = append(target, r) } } var result string if viper.GetBool("RemovePathAccents") { // remove accents - see https://blog.golang.org/normalization t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC) result, _, _ = transform.String(t, string(target)) } else { result = string(target) } return result }
func cleanSalary(input string) string { cleaner := transform.Chain(norm.NFD, transform.RemoveFunc(func(r rune) bool { return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks }), norm.NFC) output, _, _ := transform.String(cleaner, input) output = strings.ToLower(output) m := reSalarySep.FindStringSubmatchIndex(output) if m != nil { output = output[:m[0]+1] + " - " + output[m[1]-1:] } for { m := reSalarySplit.FindStringSubmatchIndex(output) if m == nil { break } _, e1 := m[2], m[3] s2, _ := m[4], m[5] output = output[:e1] + output[s2:] } return output }
func cleanName(name string) string { name = strings.Replace(name, "ß", "ss", -1) name = strings.Replace(name, "Σ", "e", -1) name = strings.Replace(name, "æ", "a", -1) name = strings.Replace(name, "&", "and", -1) name = strings.Replace(name, "$", "s", -1) for _, c := range removeChars { name = strings.Replace(name, c, "", -1) } for _, c := range spaceChars { name = strings.Replace(name, c, " ", -1) } name = badChanRegex.ReplaceAllString(name, "") name = strings.Join(strings.Fields(name), " ") t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC) unicodeCleanedName, _, err := transform.String(t, name) if err == nil { name = unicodeCleanedName } return strings.Trim(name, ` "`) }
case '\u200C': case '\u200D': case '\u2060': case '\uFE00': case '\uFE01': case '\uFE02': case '\uFE03': case '\uFE04': case '\uFE05': case '\uFE06': case '\uFE07': case '\uFE08': case '\uFE09': case '\uFE0A': case '\uFE0B': case '\uFE0C': case '\uFE0D': case '\uFE0E': case '\uFE0F': case '\uFEFF': default: return false } return true }) // Stringprep implements Stringprep Profile for User Names and Passwords (RFC 4013) // as a transform.Transformer var Stringprep = transform.Chain(nonASCIISpaceTransformer, mappedToNothing, norm.NFKC)
func main() { flag.Parse() // panic("Just Quit") getHostConfig() // runtime.GOMAXPROCS(2) timeout = 1000 fmt.Println("Feeds") //http://careers.stackoverflow.com/jobs/feed?searchTerm=big+data&location=san+francisco&range=100&distanceUnits=Miles // feeds = append(feeds, Feed{index: 0, url: "http://careers.stackoverflow.com/jobs/feed?searchTerm=big+data&location=san+francisco&range=100&distanceUnits=Miles", status: 0, itemCount: 0, complete: false, itemsComplete: false }) feeds = append(feeds, Feed{index: 0, url: "http://careers.stackoverflow.com/jobs/feed?location=san+francisco%2c+ca&range=100&distanceUnits=Miles", status: 0, itemCount: 0, complete: false, itemsComplete: false}) feeds = append(feeds, Feed{index: 1, url: "http://careers.stackoverflow.com/jobs/feed?location=new+york+city%2c+ny&range=100&distanceUnits=Miles", status: 0, itemCount: 0, complete: false, itemsComplete: false}) feeds = append(feeds, Feed{index: 2, url: "http://careers.stackoverflow.com/jobs/feed?location=los+angeles%2c+ca&range=100&distanceUnits=Miles", status: 0, itemCount: 0, complete: false, itemsComplete: false}) feeds = append(feeds, Feed{index: 3, url: "http://careers.stackoverflow.com/jobs/feed?location=boston%2c+ma&range=100&distanceUnits=Miles", status: 0, itemCount: 0, complete: false, itemsComplete: false}) feeds = append(feeds, Feed{index: 4, url: "http://careers.stackoverflow.com/jobs/feed?location=seattle%2cwa&range=100&distanceUnits=Miles", status: 0, itemCount: 0, complete: false, itemsComplete: false}) feeds = append(feeds, Feed{index: 5, url: "http://careers.stackoverflow.com/jobs/feed?location=austin%2ctx&range=100&distanceUnits=Miles", status: 0, itemCount: 0, complete: false, itemsComplete: false}) feeds = append(feeds, Feed{index: 6, url: "http://careers.stackoverflow.com/jobs/feed?location=chicago%2cil&range=100&distanceUnits=Miles", status: 0, itemCount: 0, complete: false, itemsComplete: false}) mutex = &sync.Mutex{} skillMap = make(map[string]int, 200) loadSkillMapFile(skillMap) fmt.Println("GetRSS") getRSS2() saveSkillMapFile(skillMap) if conf.hbaseZkURL != "" { saveSkillsMapHBase(skillMap) } for i := 0; i < len(guidList); i++ { fmt.Println(guidList[i]) } // guidList := make([]string, 4) // guidList[0] = "http://careers.stackoverflow.com/jobs/103310/senior-software-engineer-american-society-of-clinical" // guidList[1] = "http://careers.stackoverflow.com/jobs/94152/senior-software-engineer-platform-flixster" // guidList[2] = "http://careers.stackoverflow.com/jobs/103328/senior-full-stack-engineer-data-science-adroll" // guidList[3] = "http://careers.stackoverflow.com/jobs/104086/enterprise-architect-new-relic" // fmt.Printf("%v\n", s) // map random times & make s3names fw.Slice(guidList).Map(func(sURL string) URLTuple { fmt.Printf("Map1: %v\n", sURL) fName := "jobs_sof/" + strings.Replace(strings.TrimPrefix(sURL, "http://careers.stackoverflow.com/jobs/"), "/", "_", -1) ms := rand.Intn(3000) return URLTuple{sURL, fName, ms} // Filter already-acquired URLs }).Filter(func(uTuple URLTuple) bool { // is file already stored in S3? //fmt.Printf("Filter:%s, %v\n", uTuple.s3Name, uTuple) svcS3 := s3.New(session.New(&aws.Config{Region: aws.String("us-east-1")})) var params *s3.HeadObjectInput params = &s3.HeadObjectInput{ Bucket: aws.String("opps"), // Required Key: aws.String(uTuple.s3Name), // Required } hobj, _ := svcS3.HeadObject(params) fmt.Printf("Filter: %s => %v\n", uTuple.s3Name, hobj.ContentLength == nil) return hobj.ContentLength == nil // get the URLs }).Map(func(uTuple URLTuple) statusTuple { fmt.Printf("Map3: %v\n", uTuple) // random sleep time.Sleep(time.Duration(uTuple.msWait) * time.Millisecond) // get URL resp, err := http.Get(uTuple.gURL) if err != nil { panic(err) } defer resp.Body.Close() // fmt.Println("Body:", resp.Body) // fmt.Println("Proto:", resp.Proto) // fmt.Printf("response Status = <%s> / Length = %d\n", resp.Status, resp.ContentLength) // fmt.Println("response Headers:", resp.Header) // fmt.Printf("response %+v:\n", resp) // fmt.Println("response Body:", string(body)) failed := 0 passed := 0 if resp.StatusCode == 200 { passed = 1 } else { failed = 1 } // store in S3 if passed == 1 { body, _ := ioutil.ReadAll(resp.Body) reader := strings.NewReader(string(body)) root, err := html.Parse(reader) if err != nil { fmt.Printf("%+v\n", err) } var b bytes.Buffer html.Render(&b, root) fixedHtml := b.String() isOk := func(r rune) bool { return r < 32 || r >= 127 } // The isOk filter is such that there is no need to chain to norm.NFC t2 := transform.Chain(norm.NFKD, transform.RemoveFunc(isOk)) // This Transformer could also trivially be applied as an io.Reader // or io.Writer filter to automatically do such filtering when reading // or writing data anywhere. fixedUnicodeNFKD, _, _ := transform.String(t2, fixedHtml) // fmt.Println("\n\n\n"+fixedUnicodeNFKD) reader = strings.NewReader(fixedUnicodeNFKD) xmlroot, xmlerr := xmlpath.ParseHTML(reader) if xmlerr != nil { log.Fatal(xmlerr) } // fmt.Printf("xml root = %+v\n------\n", xmlroot) path := &xmlpath.Path{} pstr := string("") pstr = `/html/head/title` path = xmlpath.MustCompile(pstr) var ok bool title := "" if title, ok = path.String(xmlroot); ok { // fmt.Printf("%s: %s\n", pstr, title) } fmt.Printf("**** Title: %s\n", title) var iter *xmlpath.Iter var list *xmlpath.Path var cnt int // Location - needs Trim pstr = `//*[@id="hed"]/ul[1]/li/text()` path = xmlpath.MustCompile(pstr) location := "" if location, ok = path.String(xmlroot); ok { // fmt.Printf("Location - %s: %s\n", pstr, strings.Trim(location, " \n")) location = strings.Trim(location, " \n") } // Base Skills - LOOP from 1 until not ok var skills []string list = xmlpath.MustCompile(`//*[@id="hed"]/div[2]/p/a`) iter = list.Iter(xmlroot) for iter.Next() { ele := iter.Node().String() skills = append(skills, ele) // fmt.Printf("Sk-Desc: %s\n", ele) } var desc []string list = xmlpath.MustCompile(`//*[@id="jobdetailpage"]/div[2]/div[1]/div[2]/p`) iter = list.Iter(xmlroot) for iter.Next() { ele := iter.Node().String() desc = append(desc, ele) // fmt.Printf("it-Desc1: %s\n", ele) } list = xmlpath.MustCompile(`//*[@id="jobdetailpage"]/div[2]/div[1]/div[2]/ul/li`) iter = list.Iter(xmlroot) for iter.Next() { ele := iter.Node().String() desc = append(desc, ele) // fmt.Printf("it-Desc2: %s\n", ele) } var sSNR []string list = xmlpath.MustCompile(`//*[@id="jobdetailpage"]/div[2]/div[1]/div[3]/p`) iter = list.Iter(xmlroot) cnt = 0 for iter.Next() { ele := iter.Node().String() sSNR = append(sSNR, ele) // fmt.Printf("Skills1 (%d): %s\n", cnt, ele) cnt++ } list = xmlpath.MustCompile(`//*[@id="jobdetailpage"]/div[2]/div[1]/div[3]/ul/li/text()`) iter = list.Iter(xmlroot) cnt = 0 for iter.Next() { ele := iter.Node().String() sSNR = append(sSNR, ele) // fmt.Printf("Skills2(%d): %s\n", cnt, ele) cnt++ } list = xmlpath.MustCompile(`//*[@id="jobdetailpage"]/div[2]/div[1]/div[3]/ul/li/ul/li/text()`) iter = list.Iter(xmlroot) cnt = 0 for iter.Next() { ele := iter.Node().String() sSNR = append(sSNR, ele) // fmt.Printf("Skills3(%d): %s\n", cnt, ele) cnt++ } // // // about company - // pstr = `//*[@id="jobdetailpage"]/div[2]/div[1]/div[4]/p/text()` // //*[@id="jobdetailpage"]/div[2]/div[1]/div[2]/p[2]/text()[1] // path = xmlpath.MustCompile(pstr) // about := "" // if about, ok = path.String(xmlroot); ok { // fmt.Printf("About: %s - %s\n", pstr, about) // } var about []string list = xmlpath.MustCompile(`//*[@id="jobdetailpage"]/div[2]/div[1]/div[4]/p`) //*[@id="jobdetailpage"]/div[2]/div[1]/div[4]/p[2]/text()[1] iter = list.Iter(xmlroot) cnt = 0 for iter.Next() { ele := iter.Node().String() about = append(about, ele) // fmt.Printf("About(%d): %s\n", cnt, ele) cnt++ } var sep string baseAbout := "ABOUT: " sep = "" for i := 0; i < len(about); i++ { baseAbout += sep + about[i] sep = "\n" } baseSkills := "BASESKILLS: " sep = "" // fmt.Printf("base skills = %+v\n", skills) for i := 0; i < len(skills); i++ { baseSkills += sep + skills[i] sep = " " } baseReqs := "REQUIREMENTS: " sep = "" for i := 0; i < len(sSNR); i++ { baseReqs += sep + sSNR[i] sep = "\n" } baseDesc := "DESCRIPTION: " sep = "" for i := 0; i < len(desc); i++ { baseDesc += sep + desc[i] sep = "\n" } var storage string storage = uTuple.gURL + "\n\n" + "DATE: " + time.Now().Format(time.RFC850) + "\n\n" + "TITLE: " + html.UnescapeString(title) + "\n\n" + "LOCATION: " + html.UnescapeString(location) + "\n\n" + html.UnescapeString(baseSkills) + "\n\n" + html.UnescapeString(baseAbout) + "\n\n" + html.UnescapeString(baseDesc) + "\n\n" + // no second slash html.UnescapeString(baseReqs) + "\n" fmt.Printf("Storing (len = %d):\n***\n%s\n***\n", len(storage), storage) svcS3 := s3.New(session.New(&aws.Config{Region: aws.String("us-east-1")})) bucket := "opps" key := uTuple.s3Name _, err = svcS3.PutObject(&s3.PutObjectInput{ Body: strings.NewReader(string(storage)), Bucket: &bucket, Key: &key, }) if err != nil { fmt.Printf("Failed to upload data to %s/%s, %s\n", bucket, key, err) failed = 1 passed = 0 } } // return statusTuple{passed, failed} return statusTuple{passed, failed} // count URLs }).Reduce(func(x statusTuple, y statusTuple) statusTuple { fmt.Printf("Red1: x= %v, y = %v\n", x, y) return statusTuple{x.pass + y.pass, x.fail + y.fail} }).Map(func(x statusTuple) { fmt.Printf("Map4 Result: passed = %d, failed = %d\n", x.pass, x.fail) }).Run() }
// Transform characters with accents into plan forms func NeuterAccents(s string) string { t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC) result, _, _ := transform.String(t, string(s)) return result }
func NewReader(r io.Reader) io.Reader { t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC) return transform.NewReader(r, t) }
func Bytes(b []byte) ([]byte, error) { t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC) res, _, err := transform.Bytes(t, b) return res, err }
func normalizer() transform.Transformer { isMn := func(r rune) bool { return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks } return transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC) }
type PathSlice []Path // Swap implements sort.Interface (and index.Swapper). func (p PathSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } // Less implements sort.Interface. func (p PathSlice) Less(i, j int) bool { return p[i].Encode() < p[j].Encode() } // Len implements sort.Interface. func (p PathSlice) Len() int { return len(p) } func isMn(r rune) bool { return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks } var transformer = transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC) func removeNonAlphaNumeric(s string) string { in := []rune(s) res := make([]rune, len(in)) i := 0 for _, x := range s { if x == '-' { res[i] = ' ' i++ continue } if unicode.IsLetter(x) || unicode.IsDigit(x) || unicode.IsSpace(x) { res[i] = unicode.ToLower(x) i++ }
// NewWriter returns a writer which encodes to the given encoding, utf8. // // If enc is nil, then only an utf8-enforcing replacement writer // (see http://godoc.org/code.google.com/p/go.text/encoding#pkg-variables) // is used. func NewWriter(w io.Writer, enc encoding.Encoding) io.WriteCloser { if enc == nil || enc == encoding.Replacement { return transform.NewWriter(w, encoding.Replacement.NewEncoder()) } return transform.NewWriter(w, transform.Chain(enc.NewEncoder())) }
func init() { stripT = transform.Chain( norm.NFD, transform.RemoveFunc(isMn), norm.NFC) }