Ejemplo n.º 1
0
func ExampleRuneLen() {
	fmt.Println(utf8.RuneLen('a'))
	fmt.Println(utf8.RuneLen('界'))
	// Output:
	// 1
	// 3
}
Ejemplo n.º 2
0
// RankMatch is similar to Match except it will measure the Levenshtein
// distance between the source and the target and return its result. If there
// was no match, it will return -1.
// Given the requirements of match, RankMatch only needs to perform a subset of
// the Levenshtein calculation, only deletions need be considered, required
// additions and substitutions would fail the match test.
func RankMatch(source, target string) int {
	lenDiff := len(target) - len(source)

	if lenDiff < 0 {
		return -1
	}

	if lenDiff == 0 && source == target {
		return 0
	}

	runeDiff := 0

Outer:
	for _, r1 := range source {
		for i, r2 := range target {
			if r1 == r2 {
				target = target[i+utf8.RuneLen(r2):]
				continue Outer
			} else {
				runeDiff++
			}
		}
		return -1
	}

	// Count up remaining char
	for len(target) > 0 {
		target = target[utf8.RuneLen(rune(target[0])):]
		runeDiff++
	}

	return runeDiff
}
Ejemplo n.º 3
0
// normalizeBidi attempts to prevent names from using bidi control codes to
// screw up our layout
func normalizeBidi(name string) string {
	bidiExplicitDepth := 0
	bidiIsolateDepth := 0

	for _, c := range name {
		switch c {
		case ltrEmbed, rtlEmbed, ltrOverride, rtlOverride:
			bidiExplicitDepth++
		case bidiExplicitPop:
			bidiExplicitDepth--
		case ltrIsolate, rtlIsolate, fsIsolate:
			bidiIsolateDepth++
		case bidiIsolatePop:
			bidiIsolateDepth--
		}
	}
	if bidiExplicitDepth+bidiIsolateDepth > 0 {
		pops := make([]byte,
			bidiExplicitDepth*utf8.RuneLen(bidiExplicitPop)+bidiIsolateDepth+utf8.RuneLen(bidiIsolatePop))
		i := 0
		for ; bidiExplicitDepth > 0; bidiExplicitDepth-- {
			i += utf8.EncodeRune(pops[i:], bidiExplicitPop)
		}
		for ; bidiIsolateDepth > 0; bidiIsolateDepth-- {
			i += utf8.EncodeRune(pops[i:], bidiIsolatePop)
		}
		return name + string(pops[:i])
	}
	return name
}
Ejemplo n.º 4
0
func reverse(s []byte) []byte {
	rest := subslice{s, 0, len(s)}
	result := subslice{s, 0, len(s)}
	// note: que は高々サイズ4の []rune
	que := queue{}
	for !rest.empty() {
		// 後ろから要素を取り出して...
		r := rest.popBack()
		// 先頭に十分な空きができるまで先頭の要素をキューに追加
		for !rest.empty() && frontInsertSpace(rest, result) < utf8.RuneLen(r) {
			que.push(rest.popFront())
		}
		// 後ろから取り出した要素を先頭に移動
		result.pushFront(r)
		// 先頭から取り出した要素を詰めれるだけ後ろに逆順で詰める
		for len(que) > 0 {
			if backInsertSpace(rest, result) < utf8.RuneLen(que.front()) {
				break
			}
			result.pushBack(que.front())
			que.pop()
		}
	}
	// 取り出せる要素が無くなったら,キューの要素を余った隙間に後ろに逆順で追加
	for len(que) > 0 {
		result.pushBack(que.front())
		que.pop()
	}
	return s
}
Ejemplo n.º 5
0
func TestCharcount(t *testing.T) {
	var tests = []struct {
		input   string
		counts  map[rune]int
		utflen  [utf8.UTFMax + 1]int
		invalid int
	}{
		{"Hello", map[rune]int{'H': 1, 'e': 1, 'l': 2, 'o': 1}, utflenFromMap(map[int]int{1: 5}), 0},
		{"あ", map[rune]int{'あ': 1}, utflenFromMap(map[int]int{utf8.RuneLen('あ'): 1}), 0},
		{
			"あiueお",
			map[rune]int{'あ': 1, 'i': 1, 'u': 1, 'e': 1, 'お': 1},
			utflenFromMap(map[int]int{utf8.RuneLen('あ'): 2, 1: 3}),
			0,
		},
	}

	for _, test := range tests {
		in := bufio.NewReader(strings.NewReader(test.input))
		counts, utflen, invalid, err := charcount(in)
		if !reflect.DeepEqual(counts, test.counts) || utflen != test.utflen || invalid != test.invalid || err != nil {
			t.Errorf("charcount(%q) = %v, %v, %v, %v ; want %v, %v, %v, nil", test.input,
				counts, utflen, invalid, err,
				test.counts, test.utflen, test.invalid)
		}
	}
}
Ejemplo n.º 6
0
// htmlReplacer returns s with runes replaced according to replacementTable
// and when badRunes is true, certain bad runes are allowed through unescaped.
func htmlReplacer(s string, replacementTable []string, badRunes bool) string {
	written, b := 0, new(bytes.Buffer)
	for i, r := range s {
		if int(r) < len(replacementTable) {
			if repl := replacementTable[r]; len(repl) != 0 {
				b.WriteString(s[written:i])
				b.WriteString(repl)
				// Valid as long as replacementTable doesn't
				// include anything above 0x7f.
				written = i + utf8.RuneLen(r)
			}
		} else if badRunes {
			// No-op.
			// IE does not allow these ranges in unquoted attrs.
		} else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff {
			fmt.Fprintf(b, "%s&#x%x;", s[written:i], r)
			written = i + utf8.RuneLen(r)
		}
	}
	if written == 0 {
		return s
	}
	b.WriteString(s[written:])
	return b.String()
}
Ejemplo n.º 7
0
Archivo: glob.go Proyecto: nilium/glob
// compileGlobPattern takes a given pattern string consisting of typical
// wildcard characters *, ?, or any literal string and returns a compiled slice
// of scanner functions.
//
// Any character in the pattern string can be escaped using a backslash to
// produce the literal character following it rather than a special character.
func compileGlobPattern(pattern string) ([]*globScanner, error) {
	// compile scanner function array
	wildcards := make([]*globScanner, 0, 4)
	for index, code := range pattern {
		var fn scanFunc = nil
		var start int = -1
		var kind globKind
		switch {
		case code == '\\':
			fn = consumeSubstring
			kind = globString
		case code == '*':
			fn = consumeAllPreceding
			kind = globMany
		case code == '?':
			fn = consumeOnePreceding
			kind = globOne
		case index == 0:
			fn = consumeSubstring
			start = index
			kind = globString
		default:
			continue
		}

		numWildcards := len(wildcards)
		if numWildcards > 0 {
			last := wildcards[numWildcards-1]
			if (kind == globOne || kind == globMany) && last.kind == globMany && last.start == index {
				return nil, ErrInvalidGlobSequence
			} else if code == '\\' && len(last.substr) == 0 {
				last.start += utf8.RuneLen(code)
				continue
			} else {
				last.substr = pattern[last.start:index]
			}
		}

		if start == -1 {
			start = index + utf8.RuneLen(code)
		}

		wildcards = append(wildcards, &globScanner{fn, kind, "", start})
	}

	numWildcards := len(wildcards)
	if numWildcards > 0 {
		last := wildcards[numWildcards-1]
		last.substr = pattern[last.start:]
	}

	wildcards = append(wildcards, &globScanner{consumeEnd, globEnd, "", len(pattern)})

	return wildcards, nil
}
Ejemplo n.º 8
0
// Write a rune to the underlying slice. If the rune is invalid, then the
// RuneError symbol is written. The rune is only written if there is available
// buffer space, otherwise ErrShortWrite is returned.
func (w *Writer) WriteRune(r rune) (cnt int, err error) {
	cnt = utf8.RuneLen(r)
	if cnt == -1 {
		r = utf8.RuneError
		cnt = utf8.RuneLen(r)
	}
	if availCnt := int64(len(w.buf)) - w.idx; availCnt < int64(cnt) {
		return 0, io.ErrShortWrite
	}
	cnt = utf8.EncodeRune(w.buf[w.idx:], r)
	w.idx += int64(cnt)
	return cnt, nil
}
Ejemplo n.º 9
0
/**
 * Lexer::BackupRunes
 */
func (l *lexer) BackupRunes(n int) {
	for ; n > 0; n-- {
		if l.pos > 0 {
			l.pos--
			i := l.runes.Peek(l.pos) // 0-based
			r := i.(rune)
			l.tokenLen -= utf8.RuneLen(r)
			l.column -= utf8.RuneLen(r)
		} else {
			panic("Underflow Exception")
		}
	}
}
Ejemplo n.º 10
0
func (p *parser) handlePreEscape(char rune) {
	switch char {
	case '[':
		p.instructionStartedAt = p.cursor + utf8.RuneLen('[')
		p.instructions = make([]string, 0, 1)
		p.mode = MODE_ESCAPE
	case ']':
		p.instructionStartedAt = p.cursor + utf8.RuneLen('[')
		p.mode = MODE_ITERM_ESCAPE
	default:
		// Not an escape code, false alarm
		p.cursor = p.escapeStartedAt
		p.mode = MODE_NORMAL
	}
}
Ejemplo n.º 11
0
func (self *_lexer) skip(count int) {
	read := self.readIn[self.tail : self.tail+count]
	for _, chr := range read {
		self.tail += 1
		self.tailOffset += utf8.RuneLen(chr)
	}
}
Ejemplo n.º 12
0
func (p *Parser) parseShort(s *parseState, optname string, argument *string) (option *Option, err error) {
	if argument == nil {
		optname, argument = p.splitShortConcatArg(s, optname)
	}

	for i, c := range optname {
		shortname := string(c)

		if option = s.lookup.shortNames[shortname]; option != nil {
			// Only the last short argument can consume an argument from
			// the arguments list, and only if it's non optional
			canarg := (i+utf8.RuneLen(c) == len(optname)) && !option.OptionalArgument

			if _, err := p.parseOption(s, shortname, option, canarg, argument); err != nil {
				return option, err
			}
		} else {
			return nil, newError(ErrUnknownFlag, fmt.Sprintf("unknown flag `%s'", shortname))
		}

		// Only the first option can have a concatted argument, so just
		// clear argument here
		argument = nil
	}

	return option, nil
}
Ejemplo n.º 13
0
Archivo: utf8.go Proyecto: pto/go-book
func main() {
	s := "¶ Greetings!"
	r, l := utf8.DecodeRuneInString(s)
	l2 := utf8.RuneLen(r)
	ok := utf8.ValidString(s)
	fmt.Printf("rune %c length %d = %d ok %t\n", r, l, l2, ok)
}
Ejemplo n.º 14
0
func splitPathOnSeparator(path string, separator rune) []string {
	// if the separator is '\\', then we can just split...
	if separator == '\\' {
		return strings.Split(path, string(separator))
	}

	// otherwise, we need to be careful of situations where the separator was escaped
	cnt := strings.Count(path, string(separator))
	if cnt == 0 {
		return []string{path}
	}
	ret := make([]string, cnt+1)
	pathlen := len(path)
	separatorLen := utf8.RuneLen(separator)
	idx := 0
	for start := 0; start < pathlen; {
		end := indexRuneWithEscaping(path[start:], separator)
		if end == -1 {
			end = pathlen
		} else {
			end += start
		}
		ret[idx] = path[start:end]
		start = end + separatorLen
		idx++
	}
	return ret[:idx]
}
Ejemplo n.º 15
0
// processEscape processes a single escape sequence and returns number of bytes processed.
func (r *Lexer) processEscape(data []byte) (int, error) {
	if len(data) < 2 {
		return 0, fmt.Errorf("syntax error at %v", string(data))
	}

	c := data[1]
	switch c {
	case '"', '/', '\\':
		r.token.byteValue = append(r.token.byteValue, c)
		return 2, nil
	case 'b':
		r.token.byteValue = append(r.token.byteValue, '\b')
		return 2, nil
	case 'f':
		r.token.byteValue = append(r.token.byteValue, '\f')
		return 2, nil
	case 'n':
		r.token.byteValue = append(r.token.byteValue, '\n')
		return 2, nil
	case 'r':
		r.token.byteValue = append(r.token.byteValue, '\r')
		return 2, nil
	case 't':
		r.token.byteValue = append(r.token.byteValue, '\t')
		return 2, nil
	case 'u':
	default:
		return 0, fmt.Errorf("syntax error")
	}

	var val rune

	for i := 2; i < len(data) && i < 6; i++ {
		var v byte
		c = data[i]
		switch c {
		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
			v = c - '0'
		case 'a', 'b', 'c', 'd', 'e', 'f':
			v = c - 'a' + 10
		case 'A', 'B', 'C', 'D', 'E', 'F':
			v = c - 'A' + 10
		default:
			return 0, fmt.Errorf("syntax error")
		}

		val <<= 4
		val |= rune(v)
	}

	l := utf8.RuneLen(val)
	if l == -1 {
		return 0, fmt.Errorf("invalid unicode escape")
	}

	var d [4]byte
	utf8.EncodeRune(d[:], val)
	r.token.byteValue = append(r.token.byteValue, d[:l]...)
	return 6, nil
}
Ejemplo n.º 16
0
// Squash squashes each run of adjacent Unicode spaces in a UTF-8 
// encoded []byte slice into a single ASCII space. It is an "in-place"
// function (see 4.22, p. 91) in that it modifies elements of the
// slice "in-place".
func Squash(input []byte) []byte {
	i := 0
	runes := string(input)
	prevSpace := false // was the previous rune a space?
	for _, r := range runes {
		if unicode.IsSpace(r) || r == ' ' {
			if prevSpace {
				continue
			} else {
				prevSpace = true
				r = ' '	// convert to an ascii space
			}
		} else {
			prevSpace = false
		}
		if utf8.RuneLen(r) > 1 {
			buf := make([]byte, 3)
			j := i + utf8.EncodeRune(buf, r)
			copy(input[i:j], buf)
			i = j
		} else {
			input[i] = byte(r)
			i++
		}
	}
	return input[:i]
}
Ejemplo n.º 17
0
// Validate tests whether the stream name is valid.
func (s StreamName) Validate() error {
	if len(s) == 0 {
		return errors.New("Must contain at least one character.")
	}

	var lastRune rune
	var segmentIdx int
	for idx, r := range s {
		// Alphanumeric.
		if !isAlnum(r) {
			// The stream name must begin with an alphanumeric character.
			if idx == segmentIdx {
				return fmt.Errorf("Segment (at %d) must begin with alphanumeric character.", segmentIdx)
			}

			// Test forward slash, and ensure no adjacent forward slashes.
			if r == StreamNameSep {
				segmentIdx = idx + utf8.RuneLen(r)
			} else if !(r == '.' || r == '_' || r == '-' || r == ':') {
				// Test remaining allowed characters.
				return fmt.Errorf("Illegal charater (%c) at index %d.", r, idx)
			}
		}
		lastRune = r
	}

	// The last rune may not be a separator.
	if lastRune == StreamNameSep {
		return errors.New("Name may not end with a separator.")
	}
	return nil
}
Ejemplo n.º 18
0
// urlEncodePath encode the strings from UTF-8 byte representations to HTML hex escape sequences
//
// This is necessary since regular url.Parse() and url.Encode() functions do not support UTF-8
// non english characters cannot be parsed due to the nature in which url.Encode() is written
//
// This function on the other hand is a direct replacement for url.Encode() technique to support
// pretty much every UTF-8 character.
func urlEncodePath(pathName string) string {
	// if object matches reserved string, no need to encode them
	reservedNames := regexp.MustCompile("^[a-zA-Z0-9-_.~/]+$")
	if reservedNames.MatchString(pathName) {
		return pathName
	}
	var encodedPathname string
	for _, s := range pathName {
		if 'A' <= s && s <= 'Z' || 'a' <= s && s <= 'z' || '0' <= s && s <= '9' { // §2.3 Unreserved characters (mark)
			encodedPathname = encodedPathname + string(s)
			continue
		}
		switch s {
		case '-', '_', '.', '~', '/': // §2.3 Unreserved characters (mark)
			encodedPathname = encodedPathname + string(s)
			continue
		default:
			len := utf8.RuneLen(s)
			if len < 0 {
				// if utf8 cannot convert return the same string as is
				return pathName
			}
			u := make([]byte, len)
			utf8.EncodeRune(u, s)
			for _, r := range u {
				hex := hex.EncodeToString([]byte{r})
				encodedPathname = encodedPathname + "%" + strings.ToUpper(hex)
			}
		}
	}
	return encodedPathname
}
Ejemplo n.º 19
0
// toLowerDeferredCopy will function exactly like
// bytes.ToLower() only it will reuse (overwrite)
// the original byte array when possible
// NOTE: because its possible that the lower-case
// form of a rune has a different utf-8 encoded
// length, in these cases a new byte array is allocated
func toLowerDeferredCopy(s []byte) []byte {
	j := 0
	for i := 0; i < len(s); {
		wid := 1
		r := rune(s[i])
		if r >= utf8.RuneSelf {
			r, wid = utf8.DecodeRune(s[i:])
		}
		l := unicode.ToLower(r)
		lwid := utf8.RuneLen(l)
		if lwid > wid {
			// utf-8 encoded replacement is wider
			// for now, punt and defer
			// to bytes.ToLower() for the remainder
			// only known to happen with chars
			//   Rune Ⱥ(570) width 2 - Lower ⱥ(11365) width 3
			//   Rune Ⱦ(574) width 2 - Lower ⱦ(11366) width 3
			rest := bytes.ToLower(s[i:])
			rv := make([]byte, j+len(rest))
			copy(rv[:j], s[:j])
			copy(rv[j:], rest)
			return rv
		} else {
			utf8.EncodeRune(s[j:], l)
		}
		i += wid
		j += lwid
	}
	return s[:j]
}
Ejemplo n.º 20
0
Archivo: view.go Proyecto: vzex/gocui
// setRune writes a rune at the given point, relative to the view. It
// checks if the position is valid and applies the view's colors, taking
// into account if the cell must be highlighted.
func (v *View) getRuneLen(ch rune) int {
	if utf8.RuneLen(ch) > 1 {
		return 2
	} else {
		return 1
	}
}
Ejemplo n.º 21
0
Archivo: lexer.go Proyecto: nilium/leks
// takeRune counts a rune towards line/column/offset measurements for tokens. Unless an error occurs, all runes read should be passed,
// in order, to this function.
func (l *Lexer) takeRune(r rune, size int) error {
	if size <= 0 {
		size = utf8.RuneLen(r)
	}

	if size == -1 {
		panic(fmt.Errorf("Lexer: takeRune: rune %q has encoded length %d", r, size))
	}

	l.loc.Off += size
	if isNewline(l.last) {
		l.loc.Line++
		l.loc.Col = 1
	} else {
		l.loc.Col++
	}

	if l.shouldCaptureReads() {
		n, err := writeRune(&l.buf, r)
		if size != n || err != nil {
			log.Printf("Error buffering rune %q: (size=%d written=%d) %v", r, size, n, err)
		}

		return err
	}

	return nil
}
Ejemplo n.º 22
0
// readConsole reads utf16 characters from console File,
// encodes them into utf8 and stores them in buffer b.
// It returns the number of utf8 bytes read and an error, if any.
func (f *File) readConsole(b []byte) (n int, err error) {
	if len(b) == 0 {
		return 0, nil
	}
	if len(f.readbuf) == 0 {
		// get more input data from os
		wchars := make([]uint16, len(b))
		var p *uint16
		if len(b) > 0 {
			p = &wchars[0]
		}
		var nw uint32
		err := syscall.ReadConsole(f.fd, p, uint32(len(wchars)), &nw, nil)
		if err != nil {
			return 0, err
		}
		f.readbuf = utf16.Decode(wchars[:nw])
	}
	for i, r := range f.readbuf {
		if utf8.RuneLen(r) > len(b) {
			f.readbuf = f.readbuf[i:]
			return n, nil
		}
		nr := utf8.EncodeRune(b, r)
		b = b[nr:]
		n += nr
	}
	f.readbuf = nil
	return n, nil
}
Ejemplo n.º 23
0
func (f *Freq) getDAGbyTree(src string) (dag []*FIS, idxs []int) {
	dag = make([]*FIS, len(src))
	idxs = make([]int, 0, len(src)+1)
	for i := range src {
		idxs = append(idxs, i)
		p := f.data
		l := FIS{make([]*FI, 0)}
		for j, c := range src[i:] {
			q, ok := p.next[c]
			//if ! ok { break }
			r := 0.0
			if ok {
				p = q
				r = p.freq
			}
			if r == 0 && j == 0 {
				r = f.minf // always add first rune, even not found
			}
			if r > 0 {
				e := i + j + utf8.RuneLen(c)
				fi := FI{i, e, r, j == 0}
				l.data = append(l.data, &fi)
			}
			if !ok {
				break
			}
		}
		dag[i] = &l
	}
	idxs = append(idxs, len(src))
	return
}
Ejemplo n.º 24
0
func (replacementEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	r, size := rune(0), 0

	for ; nSrc < len(src); nSrc += size {
		r = rune(src[nSrc])

		// Decode a 1-byte rune.
		if r < utf8.RuneSelf {
			size = 1

		} else {
			// Decode a multi-byte rune.
			r, size = utf8.DecodeRune(src[nSrc:])
			if size == 1 {
				// All valid runes of size 1 (those below utf8.RuneSelf) were
				// handled above. We have invalid UTF-8 or we haven't seen the
				// full character yet.
				if !atEOF && !utf8.FullRune(src[nSrc:]) {
					err = transform.ErrShortSrc
					break
				}
				r = '\ufffd'
			}
		}

		if nDst+utf8.RuneLen(r) > len(dst) {
			err = transform.ErrShortDst
			break
		}
		nDst += utf8.EncodeRune(dst[nDst:], r)
	}
	return nDst, nSrc, err
}
Ejemplo n.º 25
0
// Map returns a copy of the byte array s with all its characters modified
// according to the mapping function. If mapping returns a negative value, the character is
// dropped from the string with no replacement.  The characters in s and the
// output are interpreted as UTF-8-encoded Unicode code points.
func Map(mapping func(r rune) rune, s []byte) []byte {
	// In the worst case, the array can grow when mapped, making
	// things unpleasant.  But it's so rare we barge in assuming it's
	// fine.  It could also shrink but that falls out naturally.
	maxbytes := len(s) // length of b
	nbytes := 0        // number of bytes encoded in b
	b := make([]byte, maxbytes)
	for i := 0; i < len(s); {
		wid := 1
		r := rune(s[i])
		if r >= utf8.RuneSelf {
			r, wid = utf8.DecodeRune(s[i:])
		}
		r = mapping(r)
		if r >= 0 {
			if nbytes+utf8.RuneLen(r) > maxbytes {
				// Grow the buffer.
				maxbytes = maxbytes*2 + utf8.UTFMax
				nb := make([]byte, maxbytes)
				copy(nb, b[0:nbytes])
				b = nb
			}
			nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r)
		}
		i += wid
	}
	return b[0:nbytes]
}
Ejemplo n.º 26
0
// urlEncodedName encode the strings from UTF-8 byte representations to HTML hex escape sequences
//
// This is necessary since regular url.Parse() and url.Encode() functions do not support UTF-8
// non english characters cannot be parsed due to the nature in which url.Encode() is written
//
// This function on the other hand is a direct replacement for url.Encode() technique to support
// pretty much every UTF-8 character.
func urlEncodeName(name string) (string, *probe.Error) {
	// if object matches reserved string, no need to encode them
	reservedNames := regexp.MustCompile("^[a-zA-Z0-9-_.~/]+$")
	if reservedNames.MatchString(name) {
		return name, nil
	}
	var encodedName string
	for _, s := range name {
		if 'A' <= s && s <= 'Z' || 'a' <= s && s <= 'z' || '0' <= s && s <= '9' { // §2.3 Unreserved characters (mark)
			encodedName = encodedName + string(s)
			continue
		}
		switch s {
		case '-', '_', '.', '~', '/': // §2.3 Unreserved characters (mark)
			encodedName = encodedName + string(s)
			continue
		default:
			len := utf8.RuneLen(s)
			if len < 0 {
				return "", probe.NewError(InvalidArgument{})
			}
			u := make([]byte, len)
			utf8.EncodeRune(u, s)
			for _, r := range u {
				hex := hex.EncodeToString([]byte{r})
				encodedName = encodedName + "%" + strings.ToUpper(hex)
			}
		}
	}
	return encodedName, nil
}
Ejemplo n.º 27
0
// getURLEncodedName encode the strings from UTF-8 byte representations to HTML hex escape sequences
//
// This is necessary since regular url.Parse() and url.Encode() functions do not support UTF-8
// non english characters cannot be parsed due to the nature in which url.Encode() is written
//
// This function on the other hand is a direct replacement for url.Encode() technique to support
// pretty much every UTF-8 character.
func getURLEncodedName(name string) string {
	// if object matches reserved string, no need to encode them
	if reservedNames.MatchString(name) {
		return name
	}
	var encodedName string
	for _, s := range name {
		if 'A' <= s && s <= 'Z' || 'a' <= s && s <= 'z' || '0' <= s && s <= '9' { // §2.3 Unreserved characters (mark)
			encodedName = encodedName + string(s)
			continue
		}
		switch s {
		case '-', '_', '.', '~', '/': // §2.3 Unreserved characters (mark)
			encodedName = encodedName + string(s)
			continue
		default:
			len := utf8.RuneLen(s)
			if len < 0 {
				return name
			}
			u := make([]byte, len)
			utf8.EncodeRune(u, s)
			for _, r := range u {
				hex := hex.EncodeToString([]byte{r})
				encodedName = encodedName + "%" + strings.ToUpper(hex)
			}
		}
	}
	return encodedName
}
Ejemplo n.º 28
0
func (g *Group) lookupByName(name string, ini bool) (*Option, string) {
	name = strings.ToLower(name)

	if ini {
		if ret := g.IniNames[name]; ret != nil {
			return ret, ret.Field.Tag.Get("ini-name")
		}

		if ret := g.Names[name]; ret != nil {
			return ret, ret.Field.Name
		}
	}

	if ret := g.LongNames[name]; ret != nil {
		return ret, ret.LongName
	}

	if utf8.RuneCountInString(name) == 1 {
		r, _ := utf8.DecodeRuneInString(name)

		if ret := g.ShortNames[r]; ret != nil {
			data := make([]byte, utf8.RuneLen(ret.ShortName))
			utf8.EncodeRune(data, ret.ShortName)

			return ret, string(data)
		}
	}

	return nil, ""
}
Ejemplo n.º 29
0
// replace replaces each rune r of s with replacementTable[r], provided that
// r < len(replacementTable). If replacementTable[r] is the empty string then
// no replacement is made.
// It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and
// `\u2029`.
func replace(s string, replacementTable []string) string {
	var b bytes.Buffer
	written := 0
	for i, r := range s {
		var repl string
		switch {
		case int(r) < len(replacementTable) && replacementTable[r] != "":
			repl = replacementTable[r]
		case r == '\u2028':
			repl = `\u2028`
		case r == '\u2029':
			repl = `\u2029`
		default:
			continue
		}
		b.WriteString(s[written:i])
		b.WriteString(repl)
		written = i + utf8.RuneLen(r)
	}
	if written == 0 {
		return s
	}
	b.WriteString(s[written:])
	return b.String()
}
Ejemplo n.º 30
0
func (m *Machine) MultiPatternSearch(content string, returnImmediately bool) []Term {
	terms := make([]Term, 0, 16)

	state := ROOT_STATE
	for pos, c := range content {
	start:
		newState := m.g(state, c)
		if newState == FAIL_STATE {
			state = m.f(state)
			goto start
		} else {
			state = newState
			if state >= len(m.output) {
				continue
			}
			for _, word := range m.output[state] {
				term := Term{
					Pos:  pos + utf8.RuneLen(c) - len(word),
					Word: word,
				}
				terms = append(terms, term)
				if returnImmediately {
					return terms
				}
			}
		}
	}

	return terms
}