func find_keywords(dict darts.Darts, line string) map[string]int { arr := []rune(strings.ToUpper(line)) result := make(map[string]int) for i := 0; i < len(arr); i++ { offset := i c := arr[offset] if unicode.IsSpace(c) || unicode.IsPunct(c) { continue } for pos := 2; offset+pos < len(arr); pos++ { c := arr[offset+pos-1] if unicode.IsPunct(c) { break } // log.Info(string(arr[offset : offset+pos])) exist, results := dict.CommonPrefixSearch(arr[offset:offset+pos], 0) if len(results) > 0 { key := string(arr[offset : offset+pos]) result[key] = result[key] + 1 offset = offset + pos - 1 } else if !exist { break } } } return result }
func owp(dst io.Writer, src io.Reader) { byte_in := func() byte { bs := make([]byte, 1) src.Read(bs) return bs[0] } byte_out := func(b byte) { dst.Write([]byte{b}) } odd := func() byte { for { b := byte_in() if unicode.IsPunct(int(b)) { return b } defer byte_out(b) } panic("impossible") } for { for { b := byte_in() byte_out(b) if b == '.' { return } if unicode.IsPunct(rune(b)) { break } } b := odd() byte_out(b) if b == '.' { return } } }
func owp(dst io.Writer, src io.Reader) { byte_in := func() byte { bs := make([]byte, 1) src.Read(bs) return bs[0] } byte_out := func(b byte) { dst.Write([]byte{b}) } var odd func() byte odd = func() byte { s := byte_in() if unicode.IsPunct(rune(s)) { return s } b := odd() byte_out(s) return b } for { for { b := byte_in() byte_out(b) if b == '.' { return } if unicode.IsPunct(rune(b)) { break } } b := odd() byte_out(b) if b == '.' { return } } }
func DeEscapeProse(p md.Prose) md.Prose { result := make(md.Prose, 0, len(p)) var buf []byte runs: for i := 0; i < len(p); i++ { if buf == nil { buf = p[i].Bytes } for j := 0; ; { k := bytes.IndexByte(buf[j:], '\\') if k == -1 { result = append(result, md.Run{ Line: p[i].Line, Bytes: buf, }) buf = nil continue runs } j += k r, _ := utf8.DecodeRune(buf[j+1:]) if unicode.IsPunct(r) || unicode.IsSymbol(r) { result = append(result, md.Run{ Line: p[i].Line, Bytes: buf[:j], }) buf = buf[j+1:] i-- continue runs } j++ } } return result }
// Escape escapes the given data to make sure it is safe to use it as a // filename. It also replaces spaces and other seperation characters // with the '-' character. It returns an error if the escaped string is // empty. func Escape(name string) (escaped string, err error) { mfunc := func(r rune) rune { switch { case unicode.IsLetter(r): return r case unicode.IsNumber(r): return r case unicode.IsSpace(r): return '-' case unicode.IsPunct(r): return '-' } return -1 } escaped = strings.Map(mfunc, html.UnescapeString(name)) for strings.Contains(escaped, "--") { escaped = strings.Replace(escaped, "--", "-", -1) } escaped = strings.TrimPrefix(escaped, "-") escaped = strings.TrimSuffix(escaped, "-") if len(escaped) <= 0 { err = errors.New("couldn't escape title") } return }
func splitText(t string) (ws []string) { start := 0 inWord := false for i, r := range t { sep := unicode.IsPunct(r) || unicode.IsSpace(r) if sep { switch { case r == '\'': // Accept things like "boy's" case inWord: ws = append(ws, t[start:i]) start = i + 1 inWord = false default: start += utf8.RuneLen(r) } } inWord = !sep } if start < len(t) { ws = append(ws, t[start:]) } return }
// IsSearchWordRune defines the runes that can be used in unquoted predicate arguments // or unquoted literals. These are all unicode letters, digits and punctuation, // execpt for ':', which is used for predicate marking, and '(', ')', which are used // for predicate grouping. func isSearchWordRune(r rune) bool { switch r { case ':', ')', '(': return false } return unicode.IsLetter(r) || unicode.IsDigit(r) || unicode.IsPunct(r) }
// CharCount scans a *bufio.Reader and returns a map of the counts of its // Unicode character types. func CharCount(in *bufio.Reader) map[string]int { counts := make(map[string]int) // counts of Unicode character types for { r, n, err := in.ReadRune() // returns rune, nbytes, error if err == io.EOF { break } if err != nil { fmt.Fprintf(os.Stderr, "charcount: %v\n", err) os.Exit(1) } switch { case r == unicode.ReplacementChar && n == 1: counts["invalid"]++ case unicode.IsControl(r): counts["control"]++ case unicode.IsLetter(r): counts["letter"]++ case unicode.IsMark(r): counts["mark"]++ case unicode.IsNumber(r): counts["number"]++ case unicode.IsPunct(r): counts["punct"]++ case unicode.IsSpace(r): counts["space"]++ case unicode.IsSymbol(r): counts["symbol"]++ } } return counts }
// MorseKeys translates an input string into a series of keys. func MorseKeys(in string) ([]key, error) { afterWord := false afterChar := false result := []key{} for _, c := range in { if unicode.IsSpace(c) { afterWord = true continue } morse, ok := runeToKeys[c] if !ok { return nil, fmt.Errorf("can't translate %c to morse", c) } if unicode.IsPunct(c) && afterChar { result = append(result, punctGap...) } else if afterWord { result = append(result, wordGap...) } else if afterChar { result = append(result, charGap...) } result = append(result, morse...) afterChar = true afterWord = false } return result, nil }
func main() { counts := make(map[rune]int) // counts of Unicode characters var utflen [utf8.UTFMax + 1]int // count of lengths of UTF-8 encodings invalid := 0 // count of invalid UTF-8 characters catCounts := make(map[string]int) // counts per Unicode category unknown := 0 // count of characters of unknown category in := bufio.NewReader(os.Stdin) for { r, n, err := in.ReadRune() // returns rune, nbytes, error if err == io.EOF { break } if err != nil { fmt.Fprintf(os.Stderr, "charcount: %v\n", err) os.Exit(1) } if r == unicode.ReplacementChar && n == 1 { invalid++ continue } counts[r]++ utflen[n]++ switch { case unicode.IsLetter(r): catCounts["Letter"]++ case unicode.IsDigit(r): catCounts["Digit"]++ case unicode.IsSymbol(r): catCounts["Symbol"]++ case unicode.IsPunct(r): catCounts["Punct"]++ case unicode.IsSpace(r): catCounts["Space"]++ default: unknown++ } } fmt.Printf("rune\tcount\n") for c, n := range counts { fmt.Printf("%q\t%d\n", c, n) } fmt.Print("\nlen\tcount\n") for i, n := range utflen { if i > 0 { fmt.Printf("%d\t%d\n", i, n) } } if invalid > 0 { fmt.Printf("\n%d invalid UTF-8 characters\n", invalid) } fmt.Print("\ncat\tcount\n") for cat, n := range catCounts { fmt.Printf("%s\t%d\n", cat, n) } if unknown > 0 { fmt.Printf("\n%d characters of unknown category\n", unknown) } }
func TestRune_IsIndependent(t *testing.T) { numbers := make([]rune, 0) letters := make([]rune, 0) marks := make([]rune, 0) symbols := make([]rune, 0) puncts := make([]rune, 0) others := make([]rune, 0) for _, r := range unicode.Myanmar.R16 { for c := r.Lo; c <= r.Hi; c++ { switch mr := rune(c); true { case unicode.IsLetter(mr): letters = append(letters, mr) case unicode.IsNumber(mr): numbers = append(numbers, mr) case unicode.IsMark(mr): marks = append(marks, mr) case unicode.IsPunct(mr): puncts = append(puncts, mr) case unicode.IsSymbol(mr): symbols = append(symbols, mr) default: others = append(others, mr) } } } independents := string(letters) + string(numbers) + string(puncts) + " \t\r\n" for _, consonant := range independents { if ok, _ := Rune(consonant).IsIndependent(); !ok { t.Errorf("[%U] expected result is true, but it returns false", consonant) } } }
// CharType returns a string representing the unicode type of a rune func CharType(r rune) string { switch { case unicode.IsLetter(r): return "letter" case unicode.IsSpace(r): return "space" case unicode.IsPunct(r): return "punct" case unicode.IsNumber(r): return "number" case unicode.IsSymbol(r): return "symbol" case unicode.IsMark(r): return "mark" case unicode.IsDigit(r): return "digit" case unicode.IsPrint(r): return "print" case unicode.IsControl(r): return "control" case unicode.IsGraphic(r): return "graphic" default: return "invalid" } }
// Stat calculates statistics for all runes read from r. func (m *Main) Stat(r io.RuneReader) (Stats, error) { var stats Stats for { // Read next character. ch, sz, err := r.ReadRune() if err == io.EOF { break } else if err != nil { return stats, err } // Calculate stats. stats.TotalN++ if unicode.IsControl(ch) { stats.ControlN++ } if unicode.IsDigit(ch) { stats.DigitN++ } if unicode.IsGraphic(ch) { stats.GraphicN++ } if unicode.IsLetter(ch) { stats.LetterN++ } if unicode.IsLower(ch) { stats.LowerN++ } if unicode.IsMark(ch) { stats.MarkN++ } if unicode.IsNumber(ch) { stats.NumberN++ } if unicode.IsPrint(ch) { stats.PrintN++ } if unicode.IsPunct(ch) { stats.PunctN++ } if unicode.IsSpace(ch) { stats.SpaceN++ } if unicode.IsSymbol(ch) { stats.SymbolN++ } if unicode.IsTitle(ch) { stats.TitleN++ } if unicode.IsUpper(ch) { stats.UpperN++ } if sz > 1 { stats.MultiByteN++ } } return stats, nil }
func incrementCount(r rune, counts map[int]int) { switch { case unicode.IsControl(r): counts[isControl]++ case unicode.IsNumber(r): counts[isNumber]++ case unicode.IsDigit(r): counts[isDigit]++ case unicode.IsLetter(r): counts[isLetter]++ case unicode.IsMark(r): counts[isMark]++ case unicode.IsPunct(r): counts[isPunct]++ case unicode.IsSpace(r): counts[isSpace]++ case unicode.IsSymbol(r): counts[isSymbol]++ case unicode.IsPrint(r): counts[isPrint]++ case unicode.IsGraphic(r): counts[isGraphic]++ } }
func test_password(pass string) bool { // Windows AD password needs at leat 7 characters password, and must contain characters from three of the following five categories: // uppercase character // lowercase character // digit character // nonalphanumeric characters // any Unicode character that is categorized as an alphabetic character but is not uppercase or lowercase if len(pass) < 7 { return false } d := 0 l := 0 u := 0 p := 0 o := 0 for _, c := range pass { if unicode.IsDigit(c) { // check digit character d = 1 } else if unicode.IsLower(c) { // check lowercase character l = 1 } else if unicode.IsUpper(c) { // check uppercase character u = 1 } else if unicode.IsPunct(c) { // check nonalphanumeric character p = 1 } else { // other unicode character o = 1 } } if d+l+u+p+o < 3 { return false } return true }
func isLetter(ch rune) bool { if ch == '(' || ch == ')' || ch == '\'' || ch == '"' { return false } return unicode.IsLetter(ch) || unicode.IsPunct(ch) || unicode.IsSymbol(ch) }
// synopsis extracts the first sentence from s. All runs of whitespace are // replaced by a single space. func synopsis(s string) string { parts := strings.SplitN(s, "\n\n", 2) s = parts[0] var buf []byte const ( other = iota period space ) last := space Loop: for i := 0; i < len(s); i++ { b := s[i] switch b { case ' ', '\t', '\r', '\n': switch last { case period: break Loop case other: buf = append(buf, ' ') last = space } case '.': last = period buf = append(buf, b) default: last = other buf = append(buf, b) } } // Ensure that synopsis fits an App Engine datastore text property. const m = 400 if len(buf) > m { buf = buf[:m] if i := bytes.LastIndex(buf, []byte{' '}); i >= 0 { buf = buf[:i] } buf = append(buf, " ..."...) } s = string(buf) r, n := utf8.DecodeRuneInString(s) if n < 0 || unicode.IsPunct(r) || unicode.IsSymbol(r) { // ignore Markdown headings, editor settings, Go build constraints, and * in poorly formatted block comments. s = "" } else { for _, prefix := range badSynopsisPrefixes { if strings.HasPrefix(s, prefix) { s = "" break } } } return s }
/* * Password rules: * at least 7 letters * at least 1 number * at least 1 upper case * at least 1 special character */ func ValidatePassword(value, local string) error { fmt.Println("Validate password", value) if len(value) < 7 { return errors.New(i18n.Translate(local, i18nSec, "text03")) } var num, lower, upper, spec bool for _, r := range value { switch { case unicode.IsDigit(r): num = true case unicode.IsUpper(r): upper = true case unicode.IsLower(r): lower = true case unicode.IsSymbol(r), unicode.IsPunct(r): spec = true } } if num && lower && upper && spec { return nil } return errors.New(i18n.Translate(local, i18nSec, "text03")) }
func AllPunctOrSpace(s string) bool { for _, u := range s { if !unicode.IsPunct(u) && !unicode.IsSpace(u) { return false } } return true }
func sanitize(s string) string { buf := new(bytes.Buffer) for _, rne := range s { if !unicode.IsPunct(rne) { buf.WriteRune(rne) } } return buf.String() }
func IsNumber(s string) bool { for _, r := range s { if unicode.IsNumber(r) || unicode.IsPunct(r) { continue } return false } return true }
func startPar(out io.Writer, indent0, indent string, max int) *parFmt { rc := make(chan string) ec := make(chan bool, 1) wc := make(chan string) pf := &parFmt{rc, ec} go func() { for s := range rc { if s == "\n" { wc <- s continue } words := strings.Fields(strings.TrimSpace(s)) for _, w := range words { wc <- w } } close(wc) }() go func() { pos, _ := fmt.Fprintf(out, "%s", indent0) firstword := true lastword := "x" for w := range wc { if len(w) == 0 { continue } if w == "\n" { fmt.Fprintf(out, "\n") firstword = true pos = 0 continue } if pos+len(w)+1 > max { fmt.Fprintf(out, "\n") pos, _ = fmt.Fprintf(out, "%s", indent) firstword = true } if !firstword && len(w)>0 && !unicode.IsPunct(rune(w[0])) { lastr := rune(lastword[len(lastword)-1]) if !strings.ContainsRune("([{", lastr) { fmt.Fprintf(out, " ") pos++ } } fmt.Fprintf(out, "%s", w) pos += len(w) firstword = false lastword = w } if !firstword { fmt.Fprintf(out, "\n") } close(ec) }() return pf }
func Sanitize(r rune) rune { switch { case unicode.IsPunct(r): return ' ' case unicode.IsMark(r): return ' ' case unicode.IsSymbol(r): return ' ' } return r }
func main() { counts := make(map[string]int) var utflen [utf8.UTFMax + 1]int invalid := 0 in := bufio.NewReader(os.Stdin) for { r, n, err := in.ReadRune() if err == io.EOF { break } if err != nil { fmt.Fprintf(os.Stderr, "charcount: %v\n", err) os.Exit(1) } if r == unicode.ReplacementChar && n == 1 { invalid++ continue } utflen[n]++ switch { case unicode.IsLetter(r): counts["Letter"]++ case unicode.IsMark(r): counts["Mark"]++ case unicode.IsNumber(r): counts["Number"]++ case unicode.IsPunct(r): counts["Punct"]++ case unicode.IsSymbol(r): counts["Symbol"]++ case unicode.IsSpace(r): counts["Space"]++ default: counts["Other"]++ } } fmt.Printf("rune\tcount\n") for c, n := range counts { fmt.Printf("%s\t%d\n", c, n) } fmt.Print("\nlen\tcount\n") for i, n := range utflen { if i > 0 { fmt.Printf("%d\t%d\n", i, n) } } if invalid > 0 { fmt.Printf("\n%d invalid UTF-8 characters\n", invalid) } }
func main() { counts := make(map[rune]int) // counts of Unicode characters var utflen [utf8.UTFMax]int // count of lengths of UTF-8 encodings invalid := 0 // count of invalid UTF-8 characters cats := make(map[string]int) // counts of Unicode categories // In a terminal, use CTRL+Z at line start to signal EOF with ENTER. in := bufio.NewReader(os.Stdin) for { r, n, err := in.ReadRune() // returns rune, nbytes, error if err == io.EOF { break } if err != nil { fmt.Fprintf(os.Stderr, "charcount: %v\n", err) os.Exit(1) } if r == unicode.ReplacementChar && n == 1 { invalid++ continue } switch { case unicode.IsLetter(r): cats["letter"]++ case unicode.IsDigit(r): cats["digit"]++ case unicode.IsControl(r): cats["control"]++ case unicode.IsMark(r): cats["mark"]++ case unicode.IsPunct(r): cats["punct"]++ case unicode.IsSymbol(r): cats["symbol"]++ } counts[r]++ utflen[n-1]++ } fmt.Printf("rune\tcount\n") for c, n := range counts { fmt.Printf("%q\t%d\n", c, n) } fmt.Print("\nlen\tcount\n") for i, n := range utflen { fmt.Printf("%d\t%d\n", i+1, n) } fmt.Print("\ncat\tcount\n") for s, n := range cats { fmt.Printf("%v\t%d\n", s, n) } fmt.Printf("\n%d invalid UTF-8 characters\n", invalid) }
func main() { in := bufio.NewReader(os.Stdin) counts := make(map[string]int) // counts of Unicode character types var utflen [utf8.UTFMax + 1]int // count of lengths of UTF-8 encodings invalid := 0 // count of invalid UTF-8 characters for { r, n, err := in.ReadRune() // returns rune, nbytes, error if err == io.EOF { break } if err != nil { fmt.Fprintf(os.Stderr, "charcount: %v\n", err) os.Exit(1) } if r == unicode.ReplacementChar && n == 1 { invalid++ continue } switch { case unicode.IsControl(r): counts["control"]++ case unicode.IsLetter(r): counts["letter"]++ case unicode.IsMark(r): counts["mark"]++ case unicode.IsNumber(r): counts["number"]++ case unicode.IsPunct(r): counts["punct"]++ case unicode.IsSpace(r): counts["space"]++ case unicode.IsSymbol(r): counts["symbol"]++ } utflen[n]++ } fmt.Printf("rune\tcount\n") for c, n := range counts { fmt.Printf("%q\t%d\n", c, n) } fmt.Print("\nlen\tcount\n") for i, n := range utflen { if i > 0 { fmt.Printf("%d\t%d\n", i, n) } } if invalid > 0 { fmt.Printf("\n%d invalid UTF-8 characters\n", invalid) } }
// atTerminator reports whether the input is at valid termination character to // appear after an identifier. func (l *Scanner) atTerminator() bool { r := l.peek() if r == eof || isSpace(r) || isEndOfLine(r) || unicode.IsPunct(r) || unicode.IsSymbol(r) { return true } // Does r start the delimiter? This can be ambiguous (with delim=="//", $x/2 will // succeed but should fail) but only in extremely rare cases caused by willfully // bad choice of delimiter. if rd, _ := utf8.DecodeRuneInString(l.rightDelim); rd == r { return true } return false }
// Provide an integer score for delay-value of a word. // A "word" gets a boost for: // * Ending in punctuation marks. // * Being longer than N letters func delayPercent(word string) int { wordScore := 0 rword := []rune(word) clearword := make([]rune, 0, len(rword)) for _, r := range rword { if unicode.IsLetter(r) || unicode.IsNumber(r) { clearword = append(clearword, r) } } if unicode.IsPunct(rword[len(rword)-1]) { wordScore = wordScore + 2 } if unicode.IsPunct([]rune(rword)[0]) { wordScore = wordScore + 2 } if len(clearword) > 8 { wordScore = wordScore + 1 } if len(clearword) > 12 { wordScore = wordScore + 1 } return 100 + (10 * wordScore) }
func getIdent(r rune) int { i, ok := Ident[r] switch { case ok: return i case unicode.IsNumber(r): return CONST case unicode.IsLetter(r) || unicode.IsPunct(r) || unicode.IsSymbol(r): return RESERVED case unicode.IsSpace(r): return SPACE } return ZERO }
// normalize does unicode normalization. func normalize(in []byte) ([]byte, error) { // We need a new transformer for each input as it cannot be reused. filter := func(r rune) bool { return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks (to be removed) } transformer := transform.Chain(norm.NFD, transform.RemoveFunc(filter), norm.NFC) out, _, err := transform.Bytes(transformer, in) out = bytes.Map(func(r rune) rune { if unicode.IsPunct(r) { // Replace punctuations with spaces. return ' ' } return unicode.ToLower(r) // Convert to lower case. }, out) return out, err }