Esempio n. 1
0
func IndexFold(s, sub string, offset int) (int, int) {
	sub = strings.ToLower(sub)
	n := len(sub)
	if n == 0 {
		return 0, 0
	}
	sc, _ := utf8.DecodeRuneInString(sub)
	for i := offset; i+n <= len(s); {
		c, size := utf8.DecodeRuneInString(s[i:])
		if unicode.ToLower(c) == sc {
			fail := false
			var i_, j int
			for i_, j = i, 0; j < n; {
				c, size := utf8.DecodeRuneInString(s[i_:])
				c2, size2 := utf8.DecodeRuneInString(sub[j:])
				if unicode.ToLower(c) != c2 {
					fail = true
					break
				}
				i_ += size
				j += size2
			}
			if !fail {
				return i, i_
			}
		}
		i += size
	}
	return -1, -1
}
Esempio n. 2
0
func (m *minificationText) processText(in string) string {
	var buffer bytes.Buffer
	var rRaw, r rune
	var size int
	prevIsSeparator := false
	prevRune := ' '
	isFirst := true
	for len(in) > 0 {
		rRaw, size = utf8.DecodeRuneInString(in)
		r = unicode.ToLower(rRaw)
		isSeparator := !unicode.Is(notSeparatorRT, r)

		// digits
		if isSeparator && !prevIsSeparator {
			rRaw, _ = utf8.DecodeRuneInString(in[size:])
			isSeparator = !m.isDigit(prevRune, r, rRaw)
		}

		if !isSeparator && prevIsSeparator && !isFirst {
			_ = buffer.WriteByte(' ')
		}

		if !isSeparator {
			_, _ = buffer.WriteRune(r)
			isFirst = false
		}

		prevIsSeparator = isSeparator
		prevRune = r
		in = in[size:]
	}

	return buffer.String()
}
Esempio n. 3
0
// check if the input is a valid symbol.
// a symbol is anything that starts with a letter or _
// and don't have any whitespace between
// this-is-a-valid-symbol
// this!is_another?crazy_symbol
// THIS
func symbol(in string) (string, string, bool) {
	orig := in
	sym, sz := utf8.DecodeRuneInString(in)
	if isSpace(sym) || isDigit(sym) {
		// a sym MUST START with something different
		// from a digit or space
		return "", orig, false
	}
	in = in[sz:]
	// okay, go ahread and read everything until you find
	// a space
	for len(in) > 0 {
		r, w := utf8.DecodeRuneInString(in)
		if isSpace(r) {
			break
		} else {
			// not a whitespace
			// move the sz counter by w bytes
			// and use the tail of input
			in = in[w:]
			sz += w
		}
	}
	// the first space found is kept intact
	return orig[0:sz], in, true
}
Esempio n. 4
0
func defaultFileName(lang string, pkg *types.Package) string {
	switch lang {
	case "java":
		if pkg == nil {
			return "Universe.java"
		}
		firstRune, size := utf8.DecodeRuneInString(pkg.Name())
		className := string(unicode.ToUpper(firstRune)) + pkg.Name()[size:]
		return className + ".java"
	case "go":
		if pkg == nil {
			return "go_universe.go"
		}
		return "go_" + pkg.Name() + ".go"
	case "objc":
		if pkg == nil {
			return "GoUniverse.m"
		}
		firstRune, size := utf8.DecodeRuneInString(pkg.Name())
		className := string(unicode.ToUpper(firstRune)) + pkg.Name()[size:]
		return "Go" + className + ".m"
	}
	errorf("unknown target language: %q", lang)
	os.Exit(exitStatus)
	return ""
}
Esempio n. 5
0
func (l *Lexer) peekNextRune() (rune, int) {
	if l.left() >= 5 {
		return utf8.DecodeRuneInString(l.peek(5))
	} else {
		return utf8.DecodeRuneInString(l.remainder())
	}
}
Esempio n. 6
0
// 替换文字、参数为要替换的文字内容
func (self *StringFilter) Replace(txt string) string {
	if len(txt) < 1 {
		return txt
	}

	node := self.root

	key := []rune(txt)
	var chars []rune = nil
	slen := len(key)

	for i := 0; i < slen; i++ {
		var match bool
		var endPos int
		if _, exists := node.children[key[i]]; exists {
			node = node.children[key[i]]
			if node.end { // 单个单词匹配
				c, _ := utf8.DecodeRuneInString("*")
				if chars == nil {
					chars = key
				}
				chars[i] = c
			}
			for j := i + 1; j < slen; j++ {
				if _, exists := node.children[key[j]]; !exists {
					break
				}

				node = node.children[key[j]]
				if !node.end {
					continue
				}

				match = true
				endPos = j

				if len(node.children) > 0 {
					continue
				}
			}

			if match {
				if chars == nil {
					chars = key
				}
				for t := i; t <= endPos; t++ { // 从敏感词开始到结束依次替换为*
					c, _ := utf8.DecodeRuneInString("*")
					chars[t] = c
				}

			}
			node = self.root
		}
	}
	if chars == nil {
		return txt
	} else {
		return string(chars)
	}
}
Esempio n. 7
0
func AreOneEditAway(input1, input2 string) bool {
	len1 := utf8.RuneCountInString(input1)
	len2 := utf8.RuneCountInString(input2)
	if len1 != len2 && len1-1 != len2 && len2-1 != len1 {
		return false
	}
	if len1 == len2 { // must be one replacement
		var width1, width2 int
		var r1, r2 rune
		diffSeen := false
		for i, j := 0, 0; i < len1 || j < len2; i, j = i+width1, j+width2 {
			r1, width1 = utf8.DecodeRuneInString(input1[i:])
			r2, width2 = utf8.DecodeRuneInString(input2[j:])
			if r1 != r2 {
				if diffSeen {
					return false
				} else {
					diffSeen = true
				}
			}
		}
		return true
	} else if len1-1 == len2 { // input1 must be a removal from input2
		return oneRemovalAway(input2, input1)
	} else { //if len2-1 == len1 { // input2 must be a removal from input1
		return oneRemovalAway(input1, input2)
	}
}
Esempio n. 8
0
func matchIgnoreCase1(s string) (end int) {
	end = -1
	var r rune
	var rlen int
	i := 0
	_, _, _ = r, rlen, i
	r, rlen = utf8.DecodeRuneInString(s[i:])
	if rlen == 0 {
		return
	}
	i += rlen
	switch {
	case r == 65 || r == 97:
		goto s2
	}
	return
s2:
	r, rlen = utf8.DecodeRuneInString(s[i:])
	if rlen == 0 {
		return
	}
	i += rlen
	switch {
	case r == 90 || r == 122:
		end = i
	}
	return
}
Esempio n. 9
0
// Evaluate an attribute value template
func evalAVT(input string, node xml.Node, context *ExecutionContext) (out string) {
	var start, pos int
	var inSQlit, inDQlit bool
	for pos < len(input) {
		r, width := utf8.DecodeRuneInString(input[pos:])
		pos += width
		if r == '\'' {
			inSQlit = !inSQlit
		}
		if r == '"' {
			inDQlit = !inDQlit
		}
		if r == '{' {
			// if we're not the last character
			if pos < len(input) {
				// check for doubled opening brace
				peek, w := utf8.DecodeRuneInString(input[pos:])
				if peek == '{' {
					out = out + input[start:pos]
					pos += w
					start = pos
					continue
				}
			}
			out = out + input[start:pos-width]
			start = pos
		}
		if r == '}' {
			if inSQlit || inDQlit {
				continue
			}
			// if we're not the last character
			if pos < len(input) {
				// check for doubled closing brace
				peek, w := utf8.DecodeRuneInString(input[pos:])
				if peek == '}' {
					out = out + input[start:pos]
					pos += w
					start = pos
					continue
				}
			}
			expr := input[start : pos-width]
			ret, _ := context.EvalXPath(node, expr)
			switch val := ret.(type) {
			case []xml.Node:
				for _, n := range val {
					out = out + n.Content()
				}
			case float64:
				out = out + fmt.Sprintf("%v", val)
			case string:
				out = out + val
			}
			start = pos
		}
	}
	out = out + input[start:pos]
	return
}
Esempio n. 10
0
func matchLazy6(s string) (end int) {
	end = -1
	var r rune
	var rlen int
	i := 0
	lazy := false
	type jmp struct{ s, i int }
	var lazyArr [1]jmp
	lazyStack := lazyArr[:0]
	_, _, _ = r, rlen, i
	r, rlen = utf8.DecodeRuneInString(s[i:])
	if rlen == 0 {
		goto bt
	}
	i += rlen
	switch {
	case r == 97:
		goto s2
	}
	goto bt
s2:
	if lazy {
		lazy = false
		goto s3
	}
	lazyStack = append(lazyStack, jmp{s: 2, i: i})
	r, rlen = utf8.DecodeRuneInString(s[i:])
	if rlen == 0 {
		goto bt
	}
	i += rlen
	switch {
	case r == 98:
		end = i
	}
	goto bt
s3:
	r, rlen = utf8.DecodeRuneInString(s[i:])
	if rlen == 0 {
		goto bt
	}
	i += rlen
	switch {
	case r == 97:
		goto s2
	}
bt:
	if end >= 0 || len(lazyStack) == 0 {
		return
	}
	var to jmp
	to, lazyStack = lazyStack[len(lazyStack)-1], lazyStack[:len(lazyStack)-1]
	lazy = true
	i = to.i
	switch to.s {
	case 2:
		goto s2
	}
	return
}
Esempio n. 11
0
File: neologd.go Progetto: ikawaha/x
func (n NeologdNormalizer) EliminateSpace(s string) string {
	var (
		b    bytes.Buffer
		prev rune
	)
	for p := 0; p < len(s); {
		c, w := utf8.DecodeRuneInString(s[p:])
		p += w
		if !unicode.IsSpace(c) {
			b.WriteRune(c)
			prev = c
			continue
		}
		for p < len(s) {
			c0, w0 := utf8.DecodeRuneInString(s[p:])
			p += w0
			if !unicode.IsSpace(c0) {
				if unicode.In(prev, unicode.Latin, latinSymbols) &&
					unicode.In(c0, unicode.Latin, latinSymbols) {
					b.WriteRune(' ')
				}
				b.WriteRune(c0)
				prev = c0
				break
			}
		}

	}
	return b.String()
}
func (this *JCConv) Init() {
	// Hiragana
	hira_txt := "が ぎ ぐ げ ご ざ じ ず ぜ ぞ だ ぢ づ で ど ば び ぶ べ ぼ ぱ ぴ ぷ ぺ ぽ " +
		"あ い う え お か き く け こ さ し す せ そ た ち つ て と " +
		"な に ぬ ね の は ひ ふ へ ほ ま み む め も や ゆ よ ら り る れ ろ " +
		"わ を ん ぁ ぃ ぅ ぇ ぉ ゃ ゅ ょ っ"
	hira_arr := strings.Split(hira_txt, " ")
	this.hira = map[rune]bool{}
	for _, ch := range hira_arr {
		r, _ := utf8.DecodeRuneInString(ch)
		this.hira[r] = true
	}

	// Katakana
	kata_txt := "ガ ギ グ ゲ ゴ ザ ジ ズ ゼ ゾ ダ ヂ ヅ デ ド バ ビ ブ ベ ボ パ ピ プ ペ ポ " +
		"ア イ ウ エ オ カ キ ク ケ コ サ シ ス セ ソ タ チ ツ テ ト " +
		"ナ ニ ヌ ネ ノ ハ ヒ フ ヘ ホ マ ミ ム メ モ ヤ ユ ヨ ラ リ ル レ ロ " +
		"ワ ヲ ン ァ ィ ゥ ェ ォ ャ ュ ョ ッ"
	kata_arr := strings.Split(kata_txt, " ")
	this.kata = map[rune]bool{}
	for _, ch := range kata_arr {
		r, _ := utf8.DecodeRuneInString(ch)
		this.kata[r] = true
	}

	// Conversion map
	this.kata2hira = map[rune]rune{}
	for i, kata := range kata_arr {
		kata_r, _ := utf8.DecodeRuneInString(kata)
		hira_r, _ := utf8.DecodeRuneInString(hira_arr[i])
		this.kata2hira[kata_r] = hira_r
	}
}
Esempio n. 13
0
func (b *Buffer) TranslationRegex(m Message) Message {
	translation := make(map[rune]rune)
	str1, str2 := splitRegex(m.content)
	for len(str1) > 0 && len(str2) > 0 {
		r1, size1 := utf8.DecodeRuneInString(str1)
		r2, size2 := utf8.DecodeRuneInString(str2)
		translation[r1] = r2
		str1 = str1[size1:]
		str2 = str2[size2:]
	}
	if len(str1) != len(str2) {
		return Message{m.nick, "Translations have different lengths"}
	}
	pos, msg := b.TranslationFindMatch(translation)
	if pos == -1 {
		return Message{m.nick, "No match found."}
	}
	output := make([]rune, 0, len(msg.content))
	for len(msg.content) > 0 {
		r, size := utf8.DecodeRuneInString(msg.content)
		if t, ok := translation[r]; ok {
			output = append(output, t)
		} else {
			output = append(output, r)
		}
		msg.content = msg.content[size:]
	}
	b.messages = append(b.messages[:pos], b.messages[pos+1:]...)
	b.prepend(Message{msg.nick, string(output)})
	return Message{msg.nick, string(output)}
}
func main() {
	f, err := ioutil.ReadFile("./japanese.txt")
	m := map[string]string{}
	if err == nil {
		allString := string(f)
		restpart := allString
		for strings.Index(restpart, "(") != -1 {
			word1_end_idx := strings.Index(restpart, "(")
			beginpart := restpart[0:word1_end_idx]
			restpart = restpart[word1_end_idx+1:]
			word1_begin_idx := strings.LastIndex(beginpart, " ")
			key := beginpart[word1_begin_idx+1 : word1_end_idx]
			roma1_end_idx := strings.Index(restpart, ")")
			val := restpart[:roma1_end_idx]
			m[key] = val
		}
	}

	f, err = ioutil.ReadFile("./source.txt")
	if err == nil {
		allString := string(f)
		restpart := strings.TrimSpace(allString)
		var output, announce string
		for len(restpart) > 0 {
			_, length1 := utf8.DecodeRuneInString(restpart)
			_, length2 := utf8.DecodeRuneInString(restpart[length1:])
			length2 += length1
			fmt.Println("length1:", length1)
			fmt.Println("length2:", length2)
			if m[restpart[:length2]] != "" {
				output = strings.Join([]string{output, restpart[:length2], m[restpart[:length2]]}, "")
				announce = strings.Join([]string{announce, m[restpart[:length2]]}, " ")
				restpart = restpart[length2:]
			} else if m[restpart[:length1]] != "" {
				output = strings.Join([]string{output, restpart[:length1], m[restpart[:length1]]}, "")
				announce = strings.Join([]string{announce, m[restpart[:length1]]}, " ")
				restpart = restpart[length1:]
			} else if restpart[:length1] == "\n" {
				//fmt.Println(1111)
				announce = strings.Join([]string{announce, "\r\n"}, "")
				restpart = restpart[length1:]
			} else if restpart[:length1] == " " {
				announce = strings.Join([]string{announce, " "}, "")
				output = output + " "
				restpart = restpart[length1:]
			} else {
				output = strings.Join([]string{output, restpart[:length1]}, "")
				engletterreg := regexp.MustCompile("[a-zA-Z0-9]")
				announce = strings.Join([]string{announce, engletterreg.FindString(restpart[:length1])}, "")
				restpart = restpart[length1:]
			}
		}
		// for key, val := range m {
		// 	fmt.Println(1, key, 2, val)
		// }
		fmt.Println(m)
		output = strings.Join([]string{output, announce}, "\r\n")
		ioutil.WriteFile("./output.txt", []byte(output), os.ModeAppend)
	}
}
Esempio n. 15
0
func matchPlus(s string) (end int) {
	end = -1
	var r rune
	var rlen int
	i := 0
	_, _, _ = r, rlen, i
	r, rlen = utf8.DecodeRuneInString(s[i:])
	if rlen == 0 {
		return
	}
	i += rlen
	switch {
	case r == 97:
		end = i
		goto s2
	}
	return
s2:
	r, rlen = utf8.DecodeRuneInString(s[i:])
	if rlen == 0 {
		return
	}
	i += rlen
	switch {
	case r == 97:
		end = i
		goto s2
	}
	return
}
Esempio n. 16
0
// SubTree returns the node wich key points to or nil if there is no such key.
func (r *Radix) SubTree(key string) *Radix {
	if len(key) < 1 {
		return nil
	}

	// look up the child starting with the same letter as key
	// if there is no child with the same starting letter, return false
	firstRune, _ := utf8.DecodeRuneInString(key)
	r, ok := r.children[firstRune]
	if !ok {
		return nil
	}

	posInKey := 0

	for r.key != key[posInKey:] {
		// commonPrefix is now the longest common substring of key and child.key [e.g. only "ab" from "abab" is contained in "abba"]
		commonPrefix, prefixLength := longestCommonPrefix(key[posInKey:], r.key)
		posInKey = posInKey + prefixLength

		// if child.key is not completely contained in key, abort [e.g. trying to find "ab" in "abc"]
		if r.key != commonPrefix {
			return nil
		}

		// if there is no child starting with the leftover key, abort
		firstRune, _ := utf8.DecodeRuneInString(key[posInKey:])
		r, ok = r.children[firstRune]
		if !ok {
			return nil
		}
	}

	return r
}
Esempio n. 17
0
File: complete.go Progetto: wkz/belt
func commonSubString(word string, matches []string) (common string) {
	cset := []string{}

	for _, match := range matches {
		rest := match[len(word):]
		if len(rest) == 0 {
			continue
		}

		cset = append(cset, rest)
	}

	for len(cset) > 0 {
		crune, _ := utf8.DecodeRuneInString(cset[0])
		nset := []string{}

		for _, match := range cset {
			r, sz := utf8.DecodeRuneInString(match)
			rest := match[sz:]
			if len(rest) > 0 {
				nset = append(nset, rest)
			}

			if r != crune {
				goto done
			}
		}

		cset = nset
		common += string(crune)
	}
done:
	return
}
Esempio n. 18
0
func ParseLink(baseURI Link, format string, link Link) ([]string, bool) {
	link = ShortenLink(baseURI, link)

	var r []string

	for _, v := range format {
		if len(link) == 0 {
			return nil, false
		}
		c, size := utf8.DecodeRuneInString(string(link))

		if v == '$' {
			x := ""
			for c != '/' && len(link) > 0 {
				x += string(c)
				link = link[size:]
				c, size = utf8.DecodeRuneInString(string(link))
			}
			r = append(r, x)
		} else {
			if c != v {
				return nil, false
			}
			link = link[size:]
		}
	}
	if len(link) > 0 {
		return nil, false
	}
	return r, true
}
Esempio n. 19
0
func varParseString(s string) (string, error) {
	// quotes are guaranteed to be there
	s = s[1 : len(s)-1]
	buf := new(bytes.Buffer)
	for len(s) != 0 {
		r, size := utf8.DecodeRuneInString(s)
		if r == utf8.RuneError && size == 1 {
			return "", errors.New("invalid UTF-8")
		}
		s = s[size:]
		if r != '\\' {
			buf.WriteRune(r)
			continue
		}
		r, size = utf8.DecodeRuneInString(s)
		if r == utf8.RuneError && size == 1 {
			return "", errors.New("invalid UTF-8")
		}
		s = s[size:]
		switch r {
		case 'a':
			buf.WriteRune(0x7)
		case 'b':
			buf.WriteRune(0x8)
		case 'f':
			buf.WriteRune(0xc)
		case 'n':
			buf.WriteRune('\n')
		case 'r':
			buf.WriteRune('\r')
		case 't':
			buf.WriteRune('\t')
		case '\n':
		case 'u':
			if len(s) < 4 {
				return "", errors.New("short unicode escape")
			}
			r, err := strconv.ParseUint(s[:4], 16, 32)
			if err != nil {
				return "", err
			}
			buf.WriteRune(rune(r))
			s = s[4:]
		case 'U':
			if len(s) < 8 {
				return "", errors.New("short unicode escape")
			}
			r, err := strconv.ParseUint(s[:8], 16, 32)
			if err != nil {
				return "", err
			}
			buf.WriteRune(rune(r))
			s = s[8:]
		default:
			buf.WriteRune(r)
		}
	}
	return buf.String(), nil
}
Esempio n. 20
0
File: file.go Progetto: kybin/tor
// open read a file and return it as *Text.
// If the file not exist, it will return *Text with one empty line.
func open(f string) (*Text, error) {
	ex, err := exists(f)
	if err != nil {
		return nil, err
	}
	if !ex {
		return &Text{lines: []Line{Line{data: ""}}}, nil
	}
	file, err := os.Open(f)
	if err != nil {
		return nil, err
	}
	defer file.Close()

	lines := make([]Line, 0)

	// tor use tab(shown as 4 space) for indentation as default.
	// But when parse an exsit file, follow the file's rule.
	tabToSpace := false
	tabWidth := 4

	findIndentLine := false
	scanner := bufio.NewScanner(file)
	for scanner.Scan() {
		t := scanner.Text()
		if !findIndentLine {
			r, _ := utf8.DecodeRuneInString(t)
			if r == ' ' || r == '\t' {
				findIndentLine = true
				if r == ' ' {
					tabToSpace = true
					// calculate tab width
					tabWidth = 0
					remain := t
					for len(remain) != 0 {
						r, rlen := utf8.DecodeRuneInString(remain)
						remain = remain[rlen:]
						if r != ' ' {
							break
						}
						tabWidth++
					}
				}
			}
		}
		lines = append(lines, Line{t})
	}
	if err := scanner.Err(); err != nil {
		return nil, err
	}

	// if file created with `touch` cmd, scanner could not scan anything,
	// which cause no line in text that makes program panic.
	if len(lines) == 0 {
		lines = append(lines, Line{""})
	}

	return &Text{lines, tabToSpace, tabWidth, false}, nil
}
Esempio n. 21
0
func parseArray(baseString string) (value JSONArray, length int, err *JSONError) {
	currentRune, runeLength := utf8.DecodeRuneInString(baseString)
	baseLength := len(baseString)
	values := []JSONNode{}
	var whitespaceLen int

	if currentRune != '[' {
		return JSONArray{&values}, 0, &JSONError{"Array does not start with '['", JSONErrorMalformedString}
	}

	currentLength := runeLength
	needsComma, needsValue := false, false

	for currentLength < baseLength {
		whitespaceLen, err = getDistanceToNextNonWhitespace(baseString[currentLength:])
		if err != nil {
			return JSONArray{new([]JSONNode)}, 0, err
		}

		currentLength += whitespaceLen

		currentRune, runeLength := utf8.DecodeRuneInString(baseString[currentLength:])

		if currentRune == ']' {
			if needsValue {
				return JSONArray{new([]JSONNode)}, 0, &JSONError{"Array ends with comma, no value", JSONErrorMalformedString}
			} else {
				return JSONArray{&values}, currentLength + runeLength, nil
			}
		} else if currentRune == ',' {
			if needsValue {
				return JSONArray{new([]JSONNode)}, 0, &JSONError{"Array has two commas in a row", JSONErrorMalformedString}
			} else {
				needsComma = false
				needsValue = true
			}
			currentLength += runeLength
		} else {
			if needsComma {
				return JSONArray{new([]JSONNode)}, 0, &JSONError{"Array has two values not separated by comma", JSONErrorMalformedString}
			}

			value, valueLength, err := parseValue(baseString[currentLength:])

			if err != nil {
				return JSONArray{new([]JSONNode)}, 0, err
			}

			needsComma = true
			needsValue = false

			values = append(values, value)
			currentLength += valueLength
		}
	}
	return JSONArray{new([]JSONNode)}, 0, &JSONError{"Array does not end before end of string", JSONErrorMalformedString}
}
Esempio n. 22
0
func populateIODialect() (inDialect *FileDialect, outDialect *FileDialect) {
	// Convert delimiter type from string to rune. default is TAB.
	inComma := '\t'
	outComma := '\t'
	if len(*cliInDelimiter) > 0 {
		comma, size := utf8.DecodeRuneInString(*cliInDelimiter)
		if size == utf8.RuneError {
			log.Warn("input delimiter option is invalid, but continue running.")
		} else {
			inComma = comma
		}
	}
	if len(*cliOutDelimiter) > 0 {
		comma, size := utf8.DecodeRuneInString(*cliOutDelimiter)
		if size == utf8.RuneError {
			log.Warn("output delimiter option is invalid, but continue running.")
		} else {
			outComma = comma
		}
	}
	// Check encoding options. default is "utf8".
	inEncoding := "utf8"
	outEncoding := "utf8"
	if len(*cliInEncoding) > 0 {
		if *cliInEncoding == "sjis" {
			inEncoding = *cliInEncoding
		} else {
			log.Warn("unknown input encoding: ", *cliInEncoding)
		}
	}
	if len(*cliOutEncoding) > 0 {
		if *cliOutEncoding == "sjis" {
			outEncoding = *cliOutEncoding
		} else {
			log.Warn("unknown output encoding: ", *cliOutEncoding)
		}
	}
	inDialect = &FileDialect{
		Encoding:        inEncoding,
		Comma:           inComma,
		Comment:         '#',
		FieldsPerRecord: -1,
		HasHeader:       !*cliNoHeader,
		SheetNumber:     *cliSheet,
	}
	if *cliStrict {
		inDialect.FieldsPerRecord = 0
	}
	outDialect = &FileDialect{
		Encoding:    outEncoding,
		Comma:       outComma,
		HasHeader:   !*cliOutNoHeader,
		HasMetadata: *cliOutMeta,
	}
	return
}
Esempio n. 23
0
func ValidateAddress(addy string, poolAddy string) bool {
	if len(addy) != len(poolAddy) {
		return false
	}
	prefix, _ := utf8.DecodeRuneInString(addy)
	poolPrefix, _ := utf8.DecodeRuneInString(poolAddy)
	if prefix != poolPrefix {
		return false
	}
	return cnutil.ValidateAddress(addy)
}
Esempio n. 24
0
func (s *BigramScoringFunc) Init(mapping *kbdlayout.KeyboardMapping) {
	file, err := os.Open("bigrams.txt")
	if err != nil {
		log.Fatal(err)
	}
	scanner := bufio.NewScanner(file)

	s.bigrams = make([][]uint64, len(mapping.ID2Rune))
	for i := 0; i < len(mapping.ID2Rune); i++ {
		s.bigrams[i] = make([]uint64, len(mapping.ID2Rune))
	}

	for scanner.Scan() {
		line := scanner.Text()

		// read unicode letter
		letter1, size := utf8.DecodeRuneInString(line)
		// remove it from the line
		line = line[size:]

		letter2, size := utf8.DecodeRuneInString(line)
		// remove it and space from line
		line = line[size+1:]

		// make letters lowercase
		letter1 = unicode.ToLower(letter1)
		letter2 = unicode.ToLower(letter2)

		characterId1, ok := mapping.Rune2ID[letter1]
		if !ok {
			// there is no need for this letter, as there is no mapping for it
			continue
		}
		characterId2, ok := mapping.Rune2ID[letter2]
		if !ok {
			// there is no need for this letter, as there is no mapping for it
			continue
		}

		// parse count from the line
		count, err := strconv.ParseUint(line, 10, 64)
		if err != nil {
			log.Fatal(err)
		}
		s.bigrams[characterId1][characterId2] = count
	}

	prepareWeights()

	qwerty := kbdlayout.NewLayout(kbdlayout.Qwerty, mapping)
	s.qwertyScore = s.CalculateScore(&qwerty)
}
Esempio n. 25
0
// EqualFold reports whether s and t, interpreted as UTF-8 strings,
// are equal under Unicode case-folding.
func EqualFold(s, t string) bool {
	for s != "" && t != "" {
		// Extract first rune from each string.
		var sr, tr rune
		if s[0] < utf8.RuneSelf {
			sr, s = rune(s[0]), s[1:]
		} else {
			r, size := utf8.DecodeRuneInString(s)
			sr, s = r, s[size:]
		}
		if t[0] < utf8.RuneSelf {
			tr, t = rune(t[0]), t[1:]
		} else {
			r, size := utf8.DecodeRuneInString(t)
			tr, t = r, t[size:]
		}

		// If they match, keep going; if not, return false.

		// Easy case.
		if tr == sr {
			continue
		}

		// Make sr < tr to simplify what follows.
		if tr < sr {
			tr, sr = sr, tr
		}
		// Fast check for ASCII.
		if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' {
			// ASCII, and sr is upper case.  tr must be lower case.
			if tr == sr+'a'-'A' {
				continue
			}
			return false
		}

		// General case.  SimpleFold(x) returns the next equivalent rune > x
		// or wraps around to smaller values.
		r := unicode.SimpleFold(sr)
		for r != sr && r < tr {
			r = unicode.SimpleFold(r)
		}
		if r == tr {
			continue
		}
		return false
	}

	// One string is empty.  Are both?
	return s == t
}
Esempio n. 26
0
func NewCSVReader(filepath string, info *Schema) <-chan []string {
	input := make(chan []string, BulkSize)
	csvinfo := info.Misc.(*CSVMeta)
	go func() {
		var (
			file io.ReadCloser
			err  error
		)

		if filepath == "-" {
			file = os.Stdin
		} else {
			file, err = os.Open(filepath)
			if err != nil {
				log.Fatalln(err)
			}
			defer func() { file.Close() }()
		}

		// setup the complicated CSV parser
		r := csv.NewReader(file)
		r.Comma, _ = utf8.DecodeRuneInString(csvinfo.Comma)
		r.Comment, _ = utf8.DecodeRuneInString(csvinfo.Comment)
		r.FieldsPerRecord = csvinfo.Fields
		r.LazyQuotes = csvinfo.Quotes
		r.TrimLeadingSpace = csvinfo.Trim

		conv := NewDateConv(info.Columns)

		// skip through spcified lines
		for idx := int64(0); idx < csvinfo.SkipLines; idx++ {
			r.Read()
		}

		defer func() { recover() }() // we prepare for bad channel disrupt

		// continue to actual work
		for {
			record, err := r.Read()
			if err == io.EOF {
				close(input)
				return
			} else if err != nil {
				log.Println(err)
				continue
			}
			conv.Convert(record)
			input <- record
		}
	}()
	return input
}
/**
  Inserts a word into the trie. This function is fastest if the words are
  inserted in alphabetical order.
*/
func (t *Trie) Insert(word string) {

	commonPrefixWidth := 0
	commonRuneCount := 0

	minRuneCount := utf8.RuneCountInString(word)
	if minRuneCount > utf8.RuneCountInString(t.previousWord) {
		minRuneCount = utf8.RuneCountInString(t.previousWord)
	}

	for ; commonRuneCount < minRuneCount; commonRuneCount++ {
		runeValue1, width1 := utf8.DecodeRuneInString(word[commonPrefixWidth:])
		runeValue2, _ := utf8.DecodeRuneInString(t.previousWord[commonPrefixWidth:])
		if runeValue1 != runeValue2 {
			break
		}
		commonPrefixWidth += width1
	}

	t.cache = t.cache[:commonRuneCount+1]
	node := t.cache[commonRuneCount]

	for i, w := commonPrefixWidth, 0; i < len(word); i += w {
		// fix the bug if words not inserted in alphabetical order
		isLetterExist := false
		runeValue, width := utf8.DecodeRuneInString(word[i:])
		w = width
		for _, cld := range node.children {
			if cld.letter == string(runeValue) {
				t.cache = append(t.cache, cld)
				node = cld
				isLetterExist = true
				break
			}
		}
		if isLetterExist {
			continue
		}

		next := &TrieNode{
			letter: string(runeValue),
			final:  false,
		}
		t.nodeCount++
		node.children = append(node.children, next)
		t.cache = append(t.cache, next)
		node = next
	}

	node.final = true
	t.previousWord = word
}
Esempio n. 28
0
func lintCapAndPunct(s string) (isCap, isPunct bool) {
	first, firstN := utf8.DecodeRuneInString(s)
	last, _ := utf8.DecodeLastRuneInString(s)
	isPunct = last == '.' || last == ':' || last == '!'
	isCap = unicode.IsUpper(first)
	if isCap && len(s) > firstN {
		// Don't flag strings starting with something that looks like an initialism.
		if second, _ := utf8.DecodeRuneInString(s[firstN:]); unicode.IsUpper(second) {
			isCap = false
		}
	}
	return
}
Esempio n. 29
0
File: helpers.go Progetto: xiaq/tg
func (u *User) DisplayName() string {
	if u.LastName == nil {
		return u.FirstName
	}
	lastName := *u.LastName
	r, _ := utf8.DecodeRuneInString(u.FirstName)
	s, _ := utf8.DecodeRuneInString(lastName)
	if hanOrHangul(r) && hanOrHangul(s) {
		return lastName + u.FirstName
	} else {
		return u.FirstName + " " + lastName
	}
}
Esempio n. 30
0
func parseChar(t Token, errors *ParserErrorList) *ast.Char {
	switch {
	case t.Value == "\\newline":
		return &ast.Char{Value: '\n'}
	case len(t.Value) == 2:
		_, leadingSlashWidth := utf8.DecodeRuneInString(t.Value)
		r, _ := utf8.DecodeRuneInString(t.Value[leadingSlashWidth:])
		return &ast.Char{Value: r}
	}

	errors.Add(t.Loc, fmt.Sprintf("Invalid character literal: %v", t.Value))
	return &ast.Char{}
}