func IndexFold(s, sub string, offset int) (int, int) { sub = strings.ToLower(sub) n := len(sub) if n == 0 { return 0, 0 } sc, _ := utf8.DecodeRuneInString(sub) for i := offset; i+n <= len(s); { c, size := utf8.DecodeRuneInString(s[i:]) if unicode.ToLower(c) == sc { fail := false var i_, j int for i_, j = i, 0; j < n; { c, size := utf8.DecodeRuneInString(s[i_:]) c2, size2 := utf8.DecodeRuneInString(sub[j:]) if unicode.ToLower(c) != c2 { fail = true break } i_ += size j += size2 } if !fail { return i, i_ } } i += size } return -1, -1 }
func (m *minificationText) processText(in string) string { var buffer bytes.Buffer var rRaw, r rune var size int prevIsSeparator := false prevRune := ' ' isFirst := true for len(in) > 0 { rRaw, size = utf8.DecodeRuneInString(in) r = unicode.ToLower(rRaw) isSeparator := !unicode.Is(notSeparatorRT, r) // digits if isSeparator && !prevIsSeparator { rRaw, _ = utf8.DecodeRuneInString(in[size:]) isSeparator = !m.isDigit(prevRune, r, rRaw) } if !isSeparator && prevIsSeparator && !isFirst { _ = buffer.WriteByte(' ') } if !isSeparator { _, _ = buffer.WriteRune(r) isFirst = false } prevIsSeparator = isSeparator prevRune = r in = in[size:] } return buffer.String() }
// check if the input is a valid symbol. // a symbol is anything that starts with a letter or _ // and don't have any whitespace between // this-is-a-valid-symbol // this!is_another?crazy_symbol // THIS func symbol(in string) (string, string, bool) { orig := in sym, sz := utf8.DecodeRuneInString(in) if isSpace(sym) || isDigit(sym) { // a sym MUST START with something different // from a digit or space return "", orig, false } in = in[sz:] // okay, go ahread and read everything until you find // a space for len(in) > 0 { r, w := utf8.DecodeRuneInString(in) if isSpace(r) { break } else { // not a whitespace // move the sz counter by w bytes // and use the tail of input in = in[w:] sz += w } } // the first space found is kept intact return orig[0:sz], in, true }
func defaultFileName(lang string, pkg *types.Package) string { switch lang { case "java": if pkg == nil { return "Universe.java" } firstRune, size := utf8.DecodeRuneInString(pkg.Name()) className := string(unicode.ToUpper(firstRune)) + pkg.Name()[size:] return className + ".java" case "go": if pkg == nil { return "go_universe.go" } return "go_" + pkg.Name() + ".go" case "objc": if pkg == nil { return "GoUniverse.m" } firstRune, size := utf8.DecodeRuneInString(pkg.Name()) className := string(unicode.ToUpper(firstRune)) + pkg.Name()[size:] return "Go" + className + ".m" } errorf("unknown target language: %q", lang) os.Exit(exitStatus) return "" }
func (l *Lexer) peekNextRune() (rune, int) { if l.left() >= 5 { return utf8.DecodeRuneInString(l.peek(5)) } else { return utf8.DecodeRuneInString(l.remainder()) } }
// 替换文字、参数为要替换的文字内容 func (self *StringFilter) Replace(txt string) string { if len(txt) < 1 { return txt } node := self.root key := []rune(txt) var chars []rune = nil slen := len(key) for i := 0; i < slen; i++ { var match bool var endPos int if _, exists := node.children[key[i]]; exists { node = node.children[key[i]] if node.end { // 单个单词匹配 c, _ := utf8.DecodeRuneInString("*") if chars == nil { chars = key } chars[i] = c } for j := i + 1; j < slen; j++ { if _, exists := node.children[key[j]]; !exists { break } node = node.children[key[j]] if !node.end { continue } match = true endPos = j if len(node.children) > 0 { continue } } if match { if chars == nil { chars = key } for t := i; t <= endPos; t++ { // 从敏感词开始到结束依次替换为* c, _ := utf8.DecodeRuneInString("*") chars[t] = c } } node = self.root } } if chars == nil { return txt } else { return string(chars) } }
func AreOneEditAway(input1, input2 string) bool { len1 := utf8.RuneCountInString(input1) len2 := utf8.RuneCountInString(input2) if len1 != len2 && len1-1 != len2 && len2-1 != len1 { return false } if len1 == len2 { // must be one replacement var width1, width2 int var r1, r2 rune diffSeen := false for i, j := 0, 0; i < len1 || j < len2; i, j = i+width1, j+width2 { r1, width1 = utf8.DecodeRuneInString(input1[i:]) r2, width2 = utf8.DecodeRuneInString(input2[j:]) if r1 != r2 { if diffSeen { return false } else { diffSeen = true } } } return true } else if len1-1 == len2 { // input1 must be a removal from input2 return oneRemovalAway(input2, input1) } else { //if len2-1 == len1 { // input2 must be a removal from input1 return oneRemovalAway(input1, input2) } }
func matchIgnoreCase1(s string) (end int) { end = -1 var r rune var rlen int i := 0 _, _, _ = r, rlen, i r, rlen = utf8.DecodeRuneInString(s[i:]) if rlen == 0 { return } i += rlen switch { case r == 65 || r == 97: goto s2 } return s2: r, rlen = utf8.DecodeRuneInString(s[i:]) if rlen == 0 { return } i += rlen switch { case r == 90 || r == 122: end = i } return }
// Evaluate an attribute value template func evalAVT(input string, node xml.Node, context *ExecutionContext) (out string) { var start, pos int var inSQlit, inDQlit bool for pos < len(input) { r, width := utf8.DecodeRuneInString(input[pos:]) pos += width if r == '\'' { inSQlit = !inSQlit } if r == '"' { inDQlit = !inDQlit } if r == '{' { // if we're not the last character if pos < len(input) { // check for doubled opening brace peek, w := utf8.DecodeRuneInString(input[pos:]) if peek == '{' { out = out + input[start:pos] pos += w start = pos continue } } out = out + input[start:pos-width] start = pos } if r == '}' { if inSQlit || inDQlit { continue } // if we're not the last character if pos < len(input) { // check for doubled closing brace peek, w := utf8.DecodeRuneInString(input[pos:]) if peek == '}' { out = out + input[start:pos] pos += w start = pos continue } } expr := input[start : pos-width] ret, _ := context.EvalXPath(node, expr) switch val := ret.(type) { case []xml.Node: for _, n := range val { out = out + n.Content() } case float64: out = out + fmt.Sprintf("%v", val) case string: out = out + val } start = pos } } out = out + input[start:pos] return }
func matchLazy6(s string) (end int) { end = -1 var r rune var rlen int i := 0 lazy := false type jmp struct{ s, i int } var lazyArr [1]jmp lazyStack := lazyArr[:0] _, _, _ = r, rlen, i r, rlen = utf8.DecodeRuneInString(s[i:]) if rlen == 0 { goto bt } i += rlen switch { case r == 97: goto s2 } goto bt s2: if lazy { lazy = false goto s3 } lazyStack = append(lazyStack, jmp{s: 2, i: i}) r, rlen = utf8.DecodeRuneInString(s[i:]) if rlen == 0 { goto bt } i += rlen switch { case r == 98: end = i } goto bt s3: r, rlen = utf8.DecodeRuneInString(s[i:]) if rlen == 0 { goto bt } i += rlen switch { case r == 97: goto s2 } bt: if end >= 0 || len(lazyStack) == 0 { return } var to jmp to, lazyStack = lazyStack[len(lazyStack)-1], lazyStack[:len(lazyStack)-1] lazy = true i = to.i switch to.s { case 2: goto s2 } return }
func (n NeologdNormalizer) EliminateSpace(s string) string { var ( b bytes.Buffer prev rune ) for p := 0; p < len(s); { c, w := utf8.DecodeRuneInString(s[p:]) p += w if !unicode.IsSpace(c) { b.WriteRune(c) prev = c continue } for p < len(s) { c0, w0 := utf8.DecodeRuneInString(s[p:]) p += w0 if !unicode.IsSpace(c0) { if unicode.In(prev, unicode.Latin, latinSymbols) && unicode.In(c0, unicode.Latin, latinSymbols) { b.WriteRune(' ') } b.WriteRune(c0) prev = c0 break } } } return b.String() }
func (this *JCConv) Init() { // Hiragana hira_txt := "が ぎ ぐ げ ご ざ じ ず ぜ ぞ だ ぢ づ で ど ば び ぶ べ ぼ ぱ ぴ ぷ ぺ ぽ " + "あ い う え お か き く け こ さ し す せ そ た ち つ て と " + "な に ぬ ね の は ひ ふ へ ほ ま み む め も や ゆ よ ら り る れ ろ " + "わ を ん ぁ ぃ ぅ ぇ ぉ ゃ ゅ ょ っ" hira_arr := strings.Split(hira_txt, " ") this.hira = map[rune]bool{} for _, ch := range hira_arr { r, _ := utf8.DecodeRuneInString(ch) this.hira[r] = true } // Katakana kata_txt := "ガ ギ グ ゲ ゴ ザ ジ ズ ゼ ゾ ダ ヂ ヅ デ ド バ ビ ブ ベ ボ パ ピ プ ペ ポ " + "ア イ ウ エ オ カ キ ク ケ コ サ シ ス セ ソ タ チ ツ テ ト " + "ナ ニ ヌ ネ ノ ハ ヒ フ ヘ ホ マ ミ ム メ モ ヤ ユ ヨ ラ リ ル レ ロ " + "ワ ヲ ン ァ ィ ゥ ェ ォ ャ ュ ョ ッ" kata_arr := strings.Split(kata_txt, " ") this.kata = map[rune]bool{} for _, ch := range kata_arr { r, _ := utf8.DecodeRuneInString(ch) this.kata[r] = true } // Conversion map this.kata2hira = map[rune]rune{} for i, kata := range kata_arr { kata_r, _ := utf8.DecodeRuneInString(kata) hira_r, _ := utf8.DecodeRuneInString(hira_arr[i]) this.kata2hira[kata_r] = hira_r } }
func (b *Buffer) TranslationRegex(m Message) Message { translation := make(map[rune]rune) str1, str2 := splitRegex(m.content) for len(str1) > 0 && len(str2) > 0 { r1, size1 := utf8.DecodeRuneInString(str1) r2, size2 := utf8.DecodeRuneInString(str2) translation[r1] = r2 str1 = str1[size1:] str2 = str2[size2:] } if len(str1) != len(str2) { return Message{m.nick, "Translations have different lengths"} } pos, msg := b.TranslationFindMatch(translation) if pos == -1 { return Message{m.nick, "No match found."} } output := make([]rune, 0, len(msg.content)) for len(msg.content) > 0 { r, size := utf8.DecodeRuneInString(msg.content) if t, ok := translation[r]; ok { output = append(output, t) } else { output = append(output, r) } msg.content = msg.content[size:] } b.messages = append(b.messages[:pos], b.messages[pos+1:]...) b.prepend(Message{msg.nick, string(output)}) return Message{msg.nick, string(output)} }
func main() { f, err := ioutil.ReadFile("./japanese.txt") m := map[string]string{} if err == nil { allString := string(f) restpart := allString for strings.Index(restpart, "(") != -1 { word1_end_idx := strings.Index(restpart, "(") beginpart := restpart[0:word1_end_idx] restpart = restpart[word1_end_idx+1:] word1_begin_idx := strings.LastIndex(beginpart, " ") key := beginpart[word1_begin_idx+1 : word1_end_idx] roma1_end_idx := strings.Index(restpart, ")") val := restpart[:roma1_end_idx] m[key] = val } } f, err = ioutil.ReadFile("./source.txt") if err == nil { allString := string(f) restpart := strings.TrimSpace(allString) var output, announce string for len(restpart) > 0 { _, length1 := utf8.DecodeRuneInString(restpart) _, length2 := utf8.DecodeRuneInString(restpart[length1:]) length2 += length1 fmt.Println("length1:", length1) fmt.Println("length2:", length2) if m[restpart[:length2]] != "" { output = strings.Join([]string{output, restpart[:length2], m[restpart[:length2]]}, "") announce = strings.Join([]string{announce, m[restpart[:length2]]}, " ") restpart = restpart[length2:] } else if m[restpart[:length1]] != "" { output = strings.Join([]string{output, restpart[:length1], m[restpart[:length1]]}, "") announce = strings.Join([]string{announce, m[restpart[:length1]]}, " ") restpart = restpart[length1:] } else if restpart[:length1] == "\n" { //fmt.Println(1111) announce = strings.Join([]string{announce, "\r\n"}, "") restpart = restpart[length1:] } else if restpart[:length1] == " " { announce = strings.Join([]string{announce, " "}, "") output = output + " " restpart = restpart[length1:] } else { output = strings.Join([]string{output, restpart[:length1]}, "") engletterreg := regexp.MustCompile("[a-zA-Z0-9]") announce = strings.Join([]string{announce, engletterreg.FindString(restpart[:length1])}, "") restpart = restpart[length1:] } } // for key, val := range m { // fmt.Println(1, key, 2, val) // } fmt.Println(m) output = strings.Join([]string{output, announce}, "\r\n") ioutil.WriteFile("./output.txt", []byte(output), os.ModeAppend) } }
func matchPlus(s string) (end int) { end = -1 var r rune var rlen int i := 0 _, _, _ = r, rlen, i r, rlen = utf8.DecodeRuneInString(s[i:]) if rlen == 0 { return } i += rlen switch { case r == 97: end = i goto s2 } return s2: r, rlen = utf8.DecodeRuneInString(s[i:]) if rlen == 0 { return } i += rlen switch { case r == 97: end = i goto s2 } return }
// SubTree returns the node wich key points to or nil if there is no such key. func (r *Radix) SubTree(key string) *Radix { if len(key) < 1 { return nil } // look up the child starting with the same letter as key // if there is no child with the same starting letter, return false firstRune, _ := utf8.DecodeRuneInString(key) r, ok := r.children[firstRune] if !ok { return nil } posInKey := 0 for r.key != key[posInKey:] { // commonPrefix is now the longest common substring of key and child.key [e.g. only "ab" from "abab" is contained in "abba"] commonPrefix, prefixLength := longestCommonPrefix(key[posInKey:], r.key) posInKey = posInKey + prefixLength // if child.key is not completely contained in key, abort [e.g. trying to find "ab" in "abc"] if r.key != commonPrefix { return nil } // if there is no child starting with the leftover key, abort firstRune, _ := utf8.DecodeRuneInString(key[posInKey:]) r, ok = r.children[firstRune] if !ok { return nil } } return r }
func commonSubString(word string, matches []string) (common string) { cset := []string{} for _, match := range matches { rest := match[len(word):] if len(rest) == 0 { continue } cset = append(cset, rest) } for len(cset) > 0 { crune, _ := utf8.DecodeRuneInString(cset[0]) nset := []string{} for _, match := range cset { r, sz := utf8.DecodeRuneInString(match) rest := match[sz:] if len(rest) > 0 { nset = append(nset, rest) } if r != crune { goto done } } cset = nset common += string(crune) } done: return }
func ParseLink(baseURI Link, format string, link Link) ([]string, bool) { link = ShortenLink(baseURI, link) var r []string for _, v := range format { if len(link) == 0 { return nil, false } c, size := utf8.DecodeRuneInString(string(link)) if v == '$' { x := "" for c != '/' && len(link) > 0 { x += string(c) link = link[size:] c, size = utf8.DecodeRuneInString(string(link)) } r = append(r, x) } else { if c != v { return nil, false } link = link[size:] } } if len(link) > 0 { return nil, false } return r, true }
func varParseString(s string) (string, error) { // quotes are guaranteed to be there s = s[1 : len(s)-1] buf := new(bytes.Buffer) for len(s) != 0 { r, size := utf8.DecodeRuneInString(s) if r == utf8.RuneError && size == 1 { return "", errors.New("invalid UTF-8") } s = s[size:] if r != '\\' { buf.WriteRune(r) continue } r, size = utf8.DecodeRuneInString(s) if r == utf8.RuneError && size == 1 { return "", errors.New("invalid UTF-8") } s = s[size:] switch r { case 'a': buf.WriteRune(0x7) case 'b': buf.WriteRune(0x8) case 'f': buf.WriteRune(0xc) case 'n': buf.WriteRune('\n') case 'r': buf.WriteRune('\r') case 't': buf.WriteRune('\t') case '\n': case 'u': if len(s) < 4 { return "", errors.New("short unicode escape") } r, err := strconv.ParseUint(s[:4], 16, 32) if err != nil { return "", err } buf.WriteRune(rune(r)) s = s[4:] case 'U': if len(s) < 8 { return "", errors.New("short unicode escape") } r, err := strconv.ParseUint(s[:8], 16, 32) if err != nil { return "", err } buf.WriteRune(rune(r)) s = s[8:] default: buf.WriteRune(r) } } return buf.String(), nil }
// open read a file and return it as *Text. // If the file not exist, it will return *Text with one empty line. func open(f string) (*Text, error) { ex, err := exists(f) if err != nil { return nil, err } if !ex { return &Text{lines: []Line{Line{data: ""}}}, nil } file, err := os.Open(f) if err != nil { return nil, err } defer file.Close() lines := make([]Line, 0) // tor use tab(shown as 4 space) for indentation as default. // But when parse an exsit file, follow the file's rule. tabToSpace := false tabWidth := 4 findIndentLine := false scanner := bufio.NewScanner(file) for scanner.Scan() { t := scanner.Text() if !findIndentLine { r, _ := utf8.DecodeRuneInString(t) if r == ' ' || r == '\t' { findIndentLine = true if r == ' ' { tabToSpace = true // calculate tab width tabWidth = 0 remain := t for len(remain) != 0 { r, rlen := utf8.DecodeRuneInString(remain) remain = remain[rlen:] if r != ' ' { break } tabWidth++ } } } } lines = append(lines, Line{t}) } if err := scanner.Err(); err != nil { return nil, err } // if file created with `touch` cmd, scanner could not scan anything, // which cause no line in text that makes program panic. if len(lines) == 0 { lines = append(lines, Line{""}) } return &Text{lines, tabToSpace, tabWidth, false}, nil }
func parseArray(baseString string) (value JSONArray, length int, err *JSONError) { currentRune, runeLength := utf8.DecodeRuneInString(baseString) baseLength := len(baseString) values := []JSONNode{} var whitespaceLen int if currentRune != '[' { return JSONArray{&values}, 0, &JSONError{"Array does not start with '['", JSONErrorMalformedString} } currentLength := runeLength needsComma, needsValue := false, false for currentLength < baseLength { whitespaceLen, err = getDistanceToNextNonWhitespace(baseString[currentLength:]) if err != nil { return JSONArray{new([]JSONNode)}, 0, err } currentLength += whitespaceLen currentRune, runeLength := utf8.DecodeRuneInString(baseString[currentLength:]) if currentRune == ']' { if needsValue { return JSONArray{new([]JSONNode)}, 0, &JSONError{"Array ends with comma, no value", JSONErrorMalformedString} } else { return JSONArray{&values}, currentLength + runeLength, nil } } else if currentRune == ',' { if needsValue { return JSONArray{new([]JSONNode)}, 0, &JSONError{"Array has two commas in a row", JSONErrorMalformedString} } else { needsComma = false needsValue = true } currentLength += runeLength } else { if needsComma { return JSONArray{new([]JSONNode)}, 0, &JSONError{"Array has two values not separated by comma", JSONErrorMalformedString} } value, valueLength, err := parseValue(baseString[currentLength:]) if err != nil { return JSONArray{new([]JSONNode)}, 0, err } needsComma = true needsValue = false values = append(values, value) currentLength += valueLength } } return JSONArray{new([]JSONNode)}, 0, &JSONError{"Array does not end before end of string", JSONErrorMalformedString} }
func populateIODialect() (inDialect *FileDialect, outDialect *FileDialect) { // Convert delimiter type from string to rune. default is TAB. inComma := '\t' outComma := '\t' if len(*cliInDelimiter) > 0 { comma, size := utf8.DecodeRuneInString(*cliInDelimiter) if size == utf8.RuneError { log.Warn("input delimiter option is invalid, but continue running.") } else { inComma = comma } } if len(*cliOutDelimiter) > 0 { comma, size := utf8.DecodeRuneInString(*cliOutDelimiter) if size == utf8.RuneError { log.Warn("output delimiter option is invalid, but continue running.") } else { outComma = comma } } // Check encoding options. default is "utf8". inEncoding := "utf8" outEncoding := "utf8" if len(*cliInEncoding) > 0 { if *cliInEncoding == "sjis" { inEncoding = *cliInEncoding } else { log.Warn("unknown input encoding: ", *cliInEncoding) } } if len(*cliOutEncoding) > 0 { if *cliOutEncoding == "sjis" { outEncoding = *cliOutEncoding } else { log.Warn("unknown output encoding: ", *cliOutEncoding) } } inDialect = &FileDialect{ Encoding: inEncoding, Comma: inComma, Comment: '#', FieldsPerRecord: -1, HasHeader: !*cliNoHeader, SheetNumber: *cliSheet, } if *cliStrict { inDialect.FieldsPerRecord = 0 } outDialect = &FileDialect{ Encoding: outEncoding, Comma: outComma, HasHeader: !*cliOutNoHeader, HasMetadata: *cliOutMeta, } return }
func ValidateAddress(addy string, poolAddy string) bool { if len(addy) != len(poolAddy) { return false } prefix, _ := utf8.DecodeRuneInString(addy) poolPrefix, _ := utf8.DecodeRuneInString(poolAddy) if prefix != poolPrefix { return false } return cnutil.ValidateAddress(addy) }
func (s *BigramScoringFunc) Init(mapping *kbdlayout.KeyboardMapping) { file, err := os.Open("bigrams.txt") if err != nil { log.Fatal(err) } scanner := bufio.NewScanner(file) s.bigrams = make([][]uint64, len(mapping.ID2Rune)) for i := 0; i < len(mapping.ID2Rune); i++ { s.bigrams[i] = make([]uint64, len(mapping.ID2Rune)) } for scanner.Scan() { line := scanner.Text() // read unicode letter letter1, size := utf8.DecodeRuneInString(line) // remove it from the line line = line[size:] letter2, size := utf8.DecodeRuneInString(line) // remove it and space from line line = line[size+1:] // make letters lowercase letter1 = unicode.ToLower(letter1) letter2 = unicode.ToLower(letter2) characterId1, ok := mapping.Rune2ID[letter1] if !ok { // there is no need for this letter, as there is no mapping for it continue } characterId2, ok := mapping.Rune2ID[letter2] if !ok { // there is no need for this letter, as there is no mapping for it continue } // parse count from the line count, err := strconv.ParseUint(line, 10, 64) if err != nil { log.Fatal(err) } s.bigrams[characterId1][characterId2] = count } prepareWeights() qwerty := kbdlayout.NewLayout(kbdlayout.Qwerty, mapping) s.qwertyScore = s.CalculateScore(&qwerty) }
// EqualFold reports whether s and t, interpreted as UTF-8 strings, // are equal under Unicode case-folding. func EqualFold(s, t string) bool { for s != "" && t != "" { // Extract first rune from each string. var sr, tr rune if s[0] < utf8.RuneSelf { sr, s = rune(s[0]), s[1:] } else { r, size := utf8.DecodeRuneInString(s) sr, s = r, s[size:] } if t[0] < utf8.RuneSelf { tr, t = rune(t[0]), t[1:] } else { r, size := utf8.DecodeRuneInString(t) tr, t = r, t[size:] } // If they match, keep going; if not, return false. // Easy case. if tr == sr { continue } // Make sr < tr to simplify what follows. if tr < sr { tr, sr = sr, tr } // Fast check for ASCII. if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' { // ASCII, and sr is upper case. tr must be lower case. if tr == sr+'a'-'A' { continue } return false } // General case. SimpleFold(x) returns the next equivalent rune > x // or wraps around to smaller values. r := unicode.SimpleFold(sr) for r != sr && r < tr { r = unicode.SimpleFold(r) } if r == tr { continue } return false } // One string is empty. Are both? return s == t }
func NewCSVReader(filepath string, info *Schema) <-chan []string { input := make(chan []string, BulkSize) csvinfo := info.Misc.(*CSVMeta) go func() { var ( file io.ReadCloser err error ) if filepath == "-" { file = os.Stdin } else { file, err = os.Open(filepath) if err != nil { log.Fatalln(err) } defer func() { file.Close() }() } // setup the complicated CSV parser r := csv.NewReader(file) r.Comma, _ = utf8.DecodeRuneInString(csvinfo.Comma) r.Comment, _ = utf8.DecodeRuneInString(csvinfo.Comment) r.FieldsPerRecord = csvinfo.Fields r.LazyQuotes = csvinfo.Quotes r.TrimLeadingSpace = csvinfo.Trim conv := NewDateConv(info.Columns) // skip through spcified lines for idx := int64(0); idx < csvinfo.SkipLines; idx++ { r.Read() } defer func() { recover() }() // we prepare for bad channel disrupt // continue to actual work for { record, err := r.Read() if err == io.EOF { close(input) return } else if err != nil { log.Println(err) continue } conv.Convert(record) input <- record } }() return input }
/** Inserts a word into the trie. This function is fastest if the words are inserted in alphabetical order. */ func (t *Trie) Insert(word string) { commonPrefixWidth := 0 commonRuneCount := 0 minRuneCount := utf8.RuneCountInString(word) if minRuneCount > utf8.RuneCountInString(t.previousWord) { minRuneCount = utf8.RuneCountInString(t.previousWord) } for ; commonRuneCount < minRuneCount; commonRuneCount++ { runeValue1, width1 := utf8.DecodeRuneInString(word[commonPrefixWidth:]) runeValue2, _ := utf8.DecodeRuneInString(t.previousWord[commonPrefixWidth:]) if runeValue1 != runeValue2 { break } commonPrefixWidth += width1 } t.cache = t.cache[:commonRuneCount+1] node := t.cache[commonRuneCount] for i, w := commonPrefixWidth, 0; i < len(word); i += w { // fix the bug if words not inserted in alphabetical order isLetterExist := false runeValue, width := utf8.DecodeRuneInString(word[i:]) w = width for _, cld := range node.children { if cld.letter == string(runeValue) { t.cache = append(t.cache, cld) node = cld isLetterExist = true break } } if isLetterExist { continue } next := &TrieNode{ letter: string(runeValue), final: false, } t.nodeCount++ node.children = append(node.children, next) t.cache = append(t.cache, next) node = next } node.final = true t.previousWord = word }
func lintCapAndPunct(s string) (isCap, isPunct bool) { first, firstN := utf8.DecodeRuneInString(s) last, _ := utf8.DecodeLastRuneInString(s) isPunct = last == '.' || last == ':' || last == '!' isCap = unicode.IsUpper(first) if isCap && len(s) > firstN { // Don't flag strings starting with something that looks like an initialism. if second, _ := utf8.DecodeRuneInString(s[firstN:]); unicode.IsUpper(second) { isCap = false } } return }
func (u *User) DisplayName() string { if u.LastName == nil { return u.FirstName } lastName := *u.LastName r, _ := utf8.DecodeRuneInString(u.FirstName) s, _ := utf8.DecodeRuneInString(lastName) if hanOrHangul(r) && hanOrHangul(s) { return lastName + u.FirstName } else { return u.FirstName + " " + lastName } }
func parseChar(t Token, errors *ParserErrorList) *ast.Char { switch { case t.Value == "\\newline": return &ast.Char{Value: '\n'} case len(t.Value) == 2: _, leadingSlashWidth := utf8.DecodeRuneInString(t.Value) r, _ := utf8.DecodeRuneInString(t.Value[leadingSlashWidth:]) return &ast.Char{Value: r} } errors.Add(t.Loc, fmt.Sprintf("Invalid character literal: %v", t.Value)) return &ast.Char{} }