Пример #1
0
func toCamelCase(x string) string {
	if len(x) == 0 {
		return ""
	}

	output := make([]byte, 0)
	uppercase := true

	for len(x) > 0 {
		v, size := utf8.DecodeRuneInString(x)

		// If underscore, append and keep going.
		if v == '_' {
			uppercase = true
		} else if unicode.IsLetter(v) {
			if uppercase {
				uppercase = false
				buf := make([]byte, size)
				utf8.EncodeRune(buf, unicode.ToUpper(v))
				output = bytes.Join([][]byte{output, buf}, nil)
			} else if unicode.IsUpper(v) {
				buf := make([]byte, size)
				utf8.EncodeRune(buf, v)
				output = bytes.Join([][]byte{output, buf}, []byte("_"))
			}
		}

		x = x[size:]
	}

	return string(output)
}
Пример #2
0
// normalizeBidi attempts to prevent names from using bidi control codes to
// screw up our layout
func normalizeBidi(name string) string {
	bidiExplicitDepth := 0
	bidiIsolateDepth := 0

	for _, c := range name {
		switch c {
		case ltrEmbed, rtlEmbed, ltrOverride, rtlOverride:
			bidiExplicitDepth++
		case bidiExplicitPop:
			bidiExplicitDepth--
		case ltrIsolate, rtlIsolate, fsIsolate:
			bidiIsolateDepth++
		case bidiIsolatePop:
			bidiIsolateDepth--
		}
	}
	if bidiExplicitDepth+bidiIsolateDepth > 0 {
		pops := make([]byte,
			bidiExplicitDepth*utf8.RuneLen(bidiExplicitPop)+bidiIsolateDepth+utf8.RuneLen(bidiIsolatePop))
		i := 0
		for ; bidiExplicitDepth > 0; bidiExplicitDepth-- {
			i += utf8.EncodeRune(pops[i:], bidiExplicitPop)
		}
		for ; bidiIsolateDepth > 0; bidiIsolateDepth-- {
			i += utf8.EncodeRune(pops[i:], bidiIsolatePop)
		}
		return name + string(pops[:i])
	}
	return name
}
Пример #3
0
func TestReadWriteRune(t *testing.T) {
	const NRune = 1000
	byteBuf := new(bytes.Buffer)
	w := NewAsyncWriter(byteBuf)
	// Write the runes out using WriteRune
	buf := make([]byte, utf8.UTFMax)
	for r := rune(0); r < NRune; r++ {
		size := utf8.EncodeRune(buf, r)
		nbytes, err := w.WriteRune(r)
		if err != nil {
			t.Fatalf("WriteRune(0x%x) error: %s", r, err)
		}
		if nbytes != size {
			t.Fatalf("WriteRune(0x%x) expected %d, got %d", r, size, nbytes)
		}
	}
	w.Flush()

	r := bufio.NewReader(byteBuf)
	// Read them back with ReadRune
	for r1 := rune(0); r1 < NRune; r1++ {
		size := utf8.EncodeRune(buf, r1)
		nr, nbytes, err := r.ReadRune()
		if nr != r1 || nbytes != size || err != nil {
			t.Fatalf("ReadRune(0x%x) got 0x%x,%d not 0x%x,%d (err=%s)", r1, nr, nbytes, r1, size, err)
		}
	}
}
Пример #4
0
// readConsole reads utf16 characters from console File,
// encodes them into utf8 and stores them in buffer buf.
// It returns the number of utf8 bytes read and an error, if any.
func (f *File) readConsole(buf []byte) (n int, err error) {
	if len(buf) == 0 {
		return 0, nil
	}
	if len(f.readbuf) > 0 {
		return f.copyReadConsoleBuffer(buf)
	}
	wchar, err := f.readOneUTF16FromConsole()
	if err != nil {
		return 0, err
	}
	r := rune(wchar)
	if utf16.IsSurrogate(r) {
		wchar, err := f.readOneUTF16FromConsole()
		if err != nil {
			return 0, err
		}
		r = utf16.DecodeRune(r, rune(wchar))
	}
	if nr := utf8.RuneLen(r); nr > len(buf) {
		start := len(f.readbuf)
		for ; nr > 0; nr-- {
			f.readbuf = append(f.readbuf, 0)
		}
		utf8.EncodeRune(f.readbuf[start:cap(f.readbuf)], r)
	} else {
		utf8.EncodeRune(buf, r)
		buf = buf[nr:]
		n += nr
	}
	if n > 0 {
		return n, nil
	}
	return f.copyReadConsoleBuffer(buf)
}
Пример #5
0
// ToUpperCamelCase returns a copy of the string s with all Unicode letters mapped to their camel case.
// It will convert to upper case previous letter of '_' and first letter, and remove letter of '_'.
func ToUpperCamelCase(s string) string {
	if s == "" {
		return ""
	}
	upper := true
	start := 0
	result := make([]byte, 0, len(s))
	var runeBuf [utf8.UTFMax]byte
	var initialism []byte
	for _, c := range s {
		if c == '_' {
			upper = true
			candidate := string(result[start:])
			initialism = initialism[:0]
			for _, r := range candidate {
				if r < utf8.RuneSelf {
					initialism = append(initialism, toUpperASCII(byte(r)))
				} else {
					n := utf8.EncodeRune(runeBuf[:], unicode.ToUpper(r))
					initialism = append(initialism, runeBuf[:n]...)
				}
			}
			if length := commonInitialism.LookupByBytes(initialism); length > 0 {
				result = append(result[:start], initialism...)
			}
			start = len(result)
			continue
		}
		if upper {
			if c < utf8.RuneSelf {
				result = append(result, toUpperASCII(byte(c)))
			} else {
				n := utf8.EncodeRune(runeBuf[:], unicode.ToUpper(c))
				result = append(result, runeBuf[:n]...)
			}
			upper = false
			continue
		}
		if c < utf8.RuneSelf {
			result = append(result, byte(c))
		} else {
			n := utf8.EncodeRune(runeBuf[:], c)
			result = append(result, runeBuf[:n]...)
		}
	}
	candidate := string(result[start:])
	initialism = initialism[:0]
	for _, r := range candidate {
		if r < utf8.RuneSelf {
			initialism = append(initialism, toUpperASCII(byte(r)))
		} else {
			n := utf8.EncodeRune(runeBuf[:], unicode.ToUpper(r))
			initialism = append(initialism, runeBuf[:n]...)
		}
	}
	if length := commonInitialism.LookupByBytes(initialism); length > 0 {
		result = append(result[:start], initialism...)
	}
	return string(result)
}
Пример #6
0
func UTF16ToUTF8Bytes(in []byte, out io.Writer) error {
	if len(in)%2 != 0 {
		return fmt.Errorf("input buffer must have an even length (length=%d)", len(in))
	}

	var runeBuf [4]byte
	var v1, v2 uint16
	for i := 0; i < len(in); i += 2 {
		v1 = uint16(in[i]) | uint16(in[i+1])<<8

		switch {
		case v1 < surr1, surr3 <= v1:
			n := utf8.EncodeRune(runeBuf[:], rune(v1))
			out.Write(runeBuf[:n])
		case surr1 <= v1 && v1 < surr2 && len(in) > i+2:
			v2 = uint16(in[i+2]) | uint16(in[i+3])<<8
			if surr2 <= v2 && v2 < surr3 {
				// valid surrogate sequence
				r := utf16.DecodeRune(rune(v1), rune(v2))
				n := utf8.EncodeRune(runeBuf[:], r)
				out.Write(runeBuf[:n])
			}
			i += 2
		default:
			// invalid surrogate sequence
			n := utf8.EncodeRune(runeBuf[:], replacementChar)
			out.Write(runeBuf[:n])
		}
	}

	return nil
}
Пример #7
0
func (t *Textbox) Tl(u rune, v rune) error {
	//Check if rune is valid
	if !utf8.ValidRune(v) {
		return errors.New("invalid rune.")
	}
	for i, val := range t.canvas {
		for j, w := 0, 0; j < len(val); j += w {
			runeValue, width := utf8.DecodeRune(val[j:])
			if runeValue == u {
				tmp := t.canvas[i][j : j+width]
				replaceWidth := utf8.RuneLen(v)
				if width == replaceWidth {
					// replace rune
					utf8.EncodeRune(tmp, v)
				} else if width > replaceWidth {
					// replace and pad
					utf8.EncodeRune(tmp, v)
					tmp = tmp[replaceWidth:]
					for len(tmp) > 0 {
						replaceWidth = utf8.EncodeRune(tmp, 0x0)
						tmp = tmp[replaceWidth:]
					}
				} else {
					// rune wont fit.
					return errors.New("rune too large.")
				}
			}
			w = width
		}
	}
	return nil
}
Пример #8
0
func TestFoldAgainstUnicode(t *testing.T) {
	const bufSize = 5
	buf1 := make([]byte, 0, bufSize)
	buf2 := make([]byte, 0, bufSize)
	var runes []rune
	for i := 0x20; i <= 0x7f; i++ {
		runes = append(runes, rune(i))
	}
	runes = append(runes, kelvin, smallLongEss)

	funcs := []struct {
		name   string
		fold   func(s, t []byte) bool
		letter bool // must be ASCII letter
		simple bool // must be simple ASCII letter (not 'S' or 'K')
	}{
		{
			name: "equalFoldRight",
			fold: equalFoldRight,
		},
		{
			name:   "asciiEqualFold",
			fold:   asciiEqualFold,
			simple: true,
		},
		{
			name:   "simpleLetterEqualFold",
			fold:   simpleLetterEqualFold,
			simple: true,
			letter: true,
		},
	}

	for _, ff := range funcs {
		for _, r := range runes {
			if r >= utf8.RuneSelf {
				continue
			}
			if ff.letter && !isASCIILetter(byte(r)) {
				continue
			}
			if ff.simple && (r == 's' || r == 'S' || r == 'k' || r == 'K') {
				continue
			}
			for _, r2 := range runes {
				buf1 := append(buf1[:0], 'x')
				buf2 := append(buf2[:0], 'x')
				buf1 = buf1[:1+utf8.EncodeRune(buf1[1:bufSize], r)]
				buf2 = buf2[:1+utf8.EncodeRune(buf2[1:bufSize], r2)]
				buf1 = append(buf1, 'x')
				buf2 = append(buf2, 'x')
				want := bytes.EqualFold(buf1, buf2)
				if got := ff.fold(buf1, buf2); got != want {
					t.Errorf("%s(%q, %q) = %v; want %v", ff.name, buf1, buf2, got, want)
				}
			}
		}
	}
}
Пример #9
0
func MarshalBinary(data []byte, ifc interface{}) error {
	var (
		uix uint64
		ui  uint32
	)

	switch ifc.(type) {
	case byte:
		data[0] = ifc.(byte)
	case float32:
		ui = math.Float32bits(ifc.(float32))
		m32(data, ui)
	case float64:
		uix = math.Float64bits(ifc.(float64))
		m64(data, uix)
	case rune:
		ui = uint32(ifc.(rune))
		m32(data, ui)
	case int:
		ui = uint32(ifc.(int))
		m32(data, ui)
	case uint32:
		ui = ifc.(uint32)
		m32(data, ui)
	case uintptr:
		uix = uint64(ifc.(uintptr))
		for i := uint64(0); i < 8; i++ {
			data[i] = byte((uix >> (8 * i)) & 0xff)
		}
	case uint64:
		uix = ifc.(uint64)
		m64(data, uix)
	case []rune:
		dd := make([]byte, utf8.UTFMax)
		xd := ifc.([]rune)
		n := 0
		for _, r := range xd {
			l := utf8.EncodeRune(dd, r)
			copy(data[n:n+l], dd)
			n += l
		}
	case string:
		dd := make([]byte, utf8.UTFMax)
		xd := ifc.(string)
		n := 0
		for _, r := range xd {
			l := utf8.EncodeRune(dd, r)
			copy(data[n:n+l], dd)
			n += l
		}
	case []byte:
		xd := ifc.([]byte)
		data := make([]byte, len(xd))
		copy(data[0:len(xd)], xd)
	default:
		return errors.New("unknown type for marshal")
	}
	return nil
}
Пример #10
0
func (b *runeBuilder) addRange(lo, hi rune, fold bool) {
	if lo > hi {
		return
	}

	// TODO: Pick off 80-10FFFF for special handling?
	if lo == 0x80 && hi == 0x10FFFF {
	}

	// Split range into same-length sized ranges.
	for i := 1; i < utf8.UTFMax; i++ {
		max := maxRune(i)
		if lo <= max && max < hi {
			b.addRange(lo, max, fold)
			b.addRange(max+1, hi, fold)
			return
		}
	}

	// ASCII range is special.
	if hi < utf8.RuneSelf {
		b.addBranch(b.suffix(byte(lo), byte(hi), fold, 0))
		return
	}

	// Split range into sections that agree on leading bytes.
	for i := 1; i < utf8.UTFMax; i++ {
		m := rune(1)<<uint(6*i) - 1 // last i bytes of UTF-8 sequence
		if lo&^m != hi&^m {
			if lo&m != 0 {
				b.addRange(lo, lo|m, fold)
				b.addRange((lo|m)+1, hi, fold)
				return
			}
			if hi&m != m {
				b.addRange(lo, hi&^m-1, fold)
				b.addRange(hi&^m, hi, fold)
				return
			}
		}
	}

	// Finally.  Generate byte matching equivalent for lo-hi.
	var ulo, uhi [utf8.UTFMax]byte
	n := utf8.EncodeRune(ulo[:], lo)
	m := utf8.EncodeRune(uhi[:], hi)
	if n != m {
		panic("codesearch/regexp: bad utf-8 math")
	}

	pc := uint32(0)
	for i := n - 1; i >= 0; i-- {
		pc = b.suffix(ulo[i], uhi[i], false, pc)
	}
	b.addBranch(pc)
}
Пример #11
0
func (f *format) parse() error {

	if len(f.pattern) == 0 {
		return nil
	}
	f.parsed = true // only IF there is a format

	pw, sw := 0, 0 // prefixWritten, suffixWritten
	suffixStart, precStart := false, false
	hasGroup, hasPlus, hasMinus := false, false, false
	precCount := 0
	for _, c := range f.pattern {
		switch c {
		case '+':
			hasPlus = true
		case '-':
			hasMinus = true
		case '#', '0', '.', ',':
			if false == hasGroup && c == ',' {
				hasGroup = true
			}
			if precStart {
				precCount++
			}
			if false == precStart && c == '.' {
				precStart = true
			}
			suffixStart = true
		default:
			if false == suffixStart { // prefix
				if c > 0 {
					pw += utf8.EncodeRune(f.prefix[pw:], c)
					f.prefix = f.prefix[:formatBufferSize]
				}
			} else if c > 0 { // suffix
				sw += utf8.EncodeRune(f.suffix[sw:], c)
				f.suffix = f.suffix[:formatBufferSize]
			}
		}
	}
	f.prefix = f.prefix[:pw]
	f.suffix = f.suffix[:sw]

	if false == hasGroup {
		f.group = 0
	}
	if false == hasPlus {
		f.plusSign = 0
	}
	if false == hasMinus {
		f.minusSign = 0
	}
	f.precision = precCount

	return nil
}
Пример #12
0
// decomposeHangul writes the decomposed Hangul to buf and returns the number
// of bytes written.  len(buf) should be at least 9.
func decomposeHangul(buf []byte, r rune) int {
	const JamoUTF8Len = 3
	r -= hangulBase
	x := r % jamoTCount
	r /= jamoTCount
	utf8.EncodeRune(buf, jamoLBase+r/jamoVCount)
	utf8.EncodeRune(buf[JamoUTF8Len:], jamoVBase+r%jamoVCount)
	if x != 0 {
		utf8.EncodeRune(buf[2*JamoUTF8Len:], jamoTBase+x)
		return 3 * JamoUTF8Len
	}
	return 2 * JamoUTF8Len
}
Пример #13
0
func nextNode(curNode *node, char rune) *node {
	buf := [utf8.UTFMax]byte{}
	for i := 0; i < utf8.EncodeRune(buf[:], char) && curNode != nil; i++ {
		curNode = curNode.getChild(buf[i])
	}
	// Special case: 'q' never occurs by itself, only as 'qu'
	if char == 'q' && curNode != nil {
		for i := 0; i < utf8.EncodeRune(buf[:], 'u') && curNode != nil; i++ {
			curNode = curNode.getChild(buf[i])
		}
	}
	return curNode
}
Пример #14
0
// Generalized call for multibyte (2 max) decoding (SJIS and CJK). Multibyte to multibyte
func dec_table(in, out chan byte, finished, closed chan error, enc Encoding) {
	table := enc_tables[enc]

	var (
		b     byte
		a     byte
		ok    bool
		multi bool
		sz    int
		cp    = make([]byte, utf8.UTFMax)
	)

loop:
	for {
		select {
		case b, ok = <-in:
			if !ok {
				break loop
			}

			if multi {
				sz = utf8.EncodeRune(cp, table[rune(a)<<8|rune(b)])
				// we don't really care about garbage in the slice
				for _, encoded := range cp[:sz] {
					out <- encoded
				}
			} else {
				if b < 128 {
					utf8.EncodeRune(cp, rune(b))
					out <- cp[0]
				} else {
					multi = true
					a = b
					continue loop
				}
			}
			multi = false

		case <-finished:
			if multi {
				finished <- errors.New("mojibake: dec_table(" + enc.String() + "): malformed byte stream")
			} else {
				finished <- nil
			}
			multi = false

		case <-closed:
			break loop
		}
	}
}
Пример #15
0
func TestRuneIO(t *testing.T) {
	const NRune = 1000
	// Built a test slice while we write the data
	b := make([]byte, utf8.UTFMax*NRune)
	var buf Buffer
	n := 0
	for r := rune(0); r < NRune; r++ {
		size := utf8.EncodeRune(b[n:], r)
		nbytes, err := buf.WriteRune(r)
		if err != nil {
			t.Fatalf("WriteRune(%U) error: %s", r, err)
		}
		if nbytes != size {
			t.Fatalf("WriteRune(%U) expected %d, got %d", r, size, nbytes)
		}
		n += size
	}
	b = b[0:n]

	// Check the resulting bytes
	if !Equal(buf.Bytes(), b) {
		t.Fatalf("incorrect result from WriteRune: %q not %q", buf.Bytes(), b)
	}

	p := make([]byte, utf8.UTFMax)
	// Read it back with ReadRune
	for r := rune(0); r < NRune; r++ {
		size := utf8.EncodeRune(p, r)
		nr, nbytes, err := buf.ReadRune()
		if nr != r || nbytes != size || err != nil {
			t.Fatalf("ReadRune(%U) got %U,%d not %U,%d (err=%s)", r, nr, nbytes, r, size, err)
		}
	}

	// Check that UnreadRune works
	buf.Reset()
	buf.Write(b)
	for r := rune(0); r < NRune; r++ {
		r1, size, _ := buf.ReadRune()
		if err := buf.UnreadRune(); err != nil {
			t.Fatalf("UnreadRune(%U) got error %q", r, err)
		}
		r2, nbytes, err := buf.ReadRune()
		if r1 != r2 || r1 != r || nbytes != size || err != nil {
			t.Fatalf("ReadRune(%U) after UnreadRune got %U,%d not %U,%d (err=%s)", r, r2, nbytes, r, size, err)
		}
	}
}
Пример #16
0
func main() {
	var b []byte
	var r = rand.New(rand.NewSource(0))

	for len(b) < size {
		n := 16 + r.Int()%64 // Length of substring
		p := r.Float32()
		switch {
		case p <= 0.75:
			// Write strings of base64 encoded values.
			for i := 0; i < n; i++ {
				p := r.Float32()
				switch {
				case p < 0.1:
					// Write any lowercase letter.
					b = append(b, alpha1[r.Int()%len(alpha1)])
				case p < 0.7:
					// Write any lowercase or uppercase letter.
					b = append(b, alpha2[r.Int()%len(alpha2)])
				default:
					// Write any character from the base64 alphabet.
					b = append(b, alpha3[r.Int()%len(alpha3)])
				}
			}
		case p <= 1.00:
			// Write strings of utf8 encoded values.
			for i := 0; i < n; i++ {
				p := r.Float32()
				switch {
				case p <= 0.65:
					// Write a 2-byte long utf8 code point.
					var buf [4]byte
					cnt := utf8.EncodeRune(buf[:], rune(0x80+r.Int()%0x780))
					b = append(b, buf[:cnt]...)
				case p <= 1.00:
					// Write a 3-byte long utf8 code point.
					var buf [4]byte
					cnt := utf8.EncodeRune(buf[:], rune(0x800+r.Int()%0xF800))
					b = append(b, buf[:cnt]...)
				}
			}
		}
	}

	if err := ioutil.WriteFile(name, b[:size], 0664); err != nil {
		panic(err)
	}
}
Пример #17
0
func truncatePath(path string) string {
	// Truncate all long strings based on rune count
	if utf8.RuneCountInString(path) > pathMaxLength {
		path = string([]rune(path)[:pathMaxLength])
	}

	// By this point, len(path) should be < pathMaxLength if we're dealing with single-byte runes.
	// Otherwise, we have a multi-byte string and need to calculate the size of each rune and
	// truncate manually.
	//
	// This is a workaround for a bug in termbox-go. Remove it when this issue is fixed:
	// https://github.com/nsf/termbox-go/pull/21
	if len(path) > pathMaxLength {
		out := make([]byte, pathMaxLength, pathMaxLength)
		length := 0
		for {
			r, size := utf8.DecodeRuneInString(path[length:])
			if r == utf8.RuneError && size == 1 {
				break
			}

			// utf8.EncodeRune expects there to be enough room to store the full size of the rune
			if length+size <= pathMaxLength {
				utf8.EncodeRune(out[length:], r)
				length += size
			} else {
				break
			}
		}
		path = string(out[:length])
	}
	return path
}
Пример #18
0
func (t *tScreen) encodeRune(r rune, buf []byte) []byte {

	nb := make([]byte, 6)
	ob := make([]byte, 6)
	num := utf8.EncodeRune(ob, r)
	ob = ob[:num]
	dst := 0
	var err error
	if enc := t.encoder; enc != nil {
		enc.Reset()
		dst, _, err = enc.Transform(nb, ob, true)
	}
	if err != nil || dst == 0 || nb[0] == '\x1a' {
		// Combining characters are elided
		if len(buf) == 0 {
			if acs, ok := t.acs[r]; ok {
				buf = append(buf, []byte(acs)...)
			} else if fb, ok := t.fallback[r]; ok {
				buf = append(buf, []byte(fb)...)
			} else {
				buf = append(buf, '?')
			}
		}
	} else {
		buf = append(buf, nb[:dst]...)
	}

	return buf
}
Пример #19
0
func (t *tScreen) CanDisplay(r rune, checkFallbacks bool) bool {

	if enc := t.encoder; enc != nil {
		nb := make([]byte, 6)
		ob := make([]byte, 6)
		num := utf8.EncodeRune(ob, r)

		enc.Reset()
		dst, _, err := enc.Transform(nb, ob[:num], true)
		if dst != 0 && err == nil && nb[0] != '\x1A' {
			return true
		}
	}
	// Terminal fallbacks always permitted, since we assume they are
	// basically nearly perfect renditions.
	if _, ok := t.acs[r]; ok {
		return true
	}
	if !checkFallbacks {
		return false
	}
	if _, ok := t.fallback[r]; ok {
		return true
	}
	return false
}
Пример #20
0
// toLowerDeferredCopy will function exactly like
// bytes.ToLower() only it will reuse (overwrite)
// the original byte array when possible
// NOTE: because its possible that the lower-case
// form of a rune has a different utf-8 encoded
// length, in these cases a new byte array is allocated
func toLowerDeferredCopy(s []byte) []byte {
	j := 0
	for i := 0; i < len(s); {
		wid := 1
		r := rune(s[i])
		if r >= utf8.RuneSelf {
			r, wid = utf8.DecodeRune(s[i:])
		}
		l := unicode.ToLower(r)
		lwid := utf8.RuneLen(l)
		if lwid > wid {
			// utf-8 encoded replacement is wider
			// for now, punt and defer
			// to bytes.ToLower() for the remainder
			// only known to happen with chars
			//   Rune Ⱥ(570) width 2 - Lower ⱥ(11365) width 3
			//   Rune Ⱦ(574) width 2 - Lower ⱦ(11366) width 3
			rest := bytes.ToLower(s[i:])
			rv := make([]byte, j+len(rest))
			copy(rv[:j], s[:j])
			copy(rv[j:], rest)
			return rv
		} else {
			utf8.EncodeRune(s[j:], l)
		}
		i += wid
		j += lwid
	}
	return s[:j]
}
Пример #21
0
func decodeStr(r io.Reader) (string, error) {
	var szSlice [1]byte
	_, err := r.Read(szSlice[:])
	if err != nil {
		return "", err
	}
	sz := int(szSlice[0])
	if sz == 0 {
		return "", nil
	}
	utfStr := make([]byte, 4*sz)
	data := make([]byte, 2*sz)
	n, err := r.Read(data)
	if err != nil {
		return "", err
	}
	if n < len(data) {
		return "", fmt.Errorf("underflow")
	}
	w := 0
	for i := 0; i < int(2*sz); i += 2 {
		cp := byteOrder.Uint16(data[i:])
		w += utf8.EncodeRune(utfStr[w:], rune(cp))
	}
	if utfStr[w-1] == 0 {
		w--
	}
	s := string(utfStr[:w])
	return s, nil
}
Пример #22
0
// getURLEncodedName encode the strings from UTF-8 byte representations to HTML hex escape sequences
//
// This is necessary since regular url.Parse() and url.Encode() functions do not support UTF-8
// non english characters cannot be parsed due to the nature in which url.Encode() is written
//
// This function on the other hand is a direct replacement for url.Encode() technique to support
// pretty much every UTF-8 character.
func getURLEncodedName(name string) string {
	// if object matches reserved string, no need to encode them
	if reservedNames.MatchString(name) {
		return name
	}
	var encodedName string
	for _, s := range name {
		if 'A' <= s && s <= 'Z' || 'a' <= s && s <= 'z' || '0' <= s && s <= '9' { // §2.3 Unreserved characters (mark)
			encodedName = encodedName + string(s)
			continue
		}
		switch s {
		case '-', '_', '.', '~', '/': // §2.3 Unreserved characters (mark)
			encodedName = encodedName + string(s)
			continue
		default:
			len := utf8.RuneLen(s)
			if len < 0 {
				return name
			}
			u := make([]byte, len)
			utf8.EncodeRune(u, s)
			for _, r := range u {
				hex := hex.EncodeToString([]byte{r})
				encodedName = encodedName + "%" + strings.ToUpper(hex)
			}
		}
	}
	return encodedName
}
Пример #23
0
// Map returns a copy of the byte array s with all its characters modified
// according to the mapping function. If mapping returns a negative value, the character is
// dropped from the string with no replacement.  The characters in s and the
// output are interpreted as UTF-8-encoded Unicode code points.
func Map(mapping func(r rune) rune, s []byte) []byte {
	// In the worst case, the array can grow when mapped, making
	// things unpleasant.  But it's so rare we barge in assuming it's
	// fine.  It could also shrink but that falls out naturally.
	maxbytes := len(s) // length of b
	nbytes := 0        // number of bytes encoded in b
	b := make([]byte, maxbytes)
	for i := 0; i < len(s); {
		wid := 1
		r := rune(s[i])
		if r >= utf8.RuneSelf {
			r, wid = utf8.DecodeRune(s[i:])
		}
		r = mapping(r)
		if r >= 0 {
			if nbytes+utf8.RuneLen(r) > maxbytes {
				// Grow the buffer.
				maxbytes = maxbytes*2 + utf8.UTFMax
				nb := make([]byte, maxbytes)
				copy(nb, b[0:nbytes])
				b = nb
			}
			nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r)
		}
		i += wid
	}
	return b[0:nbytes]
}
Пример #24
0
func (replacementEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	r, size := rune(0), 0

	for ; nSrc < len(src); nSrc += size {
		r = rune(src[nSrc])

		// Decode a 1-byte rune.
		if r < utf8.RuneSelf {
			size = 1

		} else {
			// Decode a multi-byte rune.
			r, size = utf8.DecodeRune(src[nSrc:])
			if size == 1 {
				// All valid runes of size 1 (those below utf8.RuneSelf) were
				// handled above. We have invalid UTF-8 or we haven't seen the
				// full character yet.
				if !atEOF && !utf8.FullRune(src[nSrc:]) {
					err = transform.ErrShortSrc
					break
				}
				r = '\ufffd'
			}
		}

		if nDst+utf8.RuneLen(r) > len(dst) {
			err = transform.ErrShortDst
			break
		}
		nDst += utf8.EncodeRune(dst[nDst:], r)
	}
	return nDst, nSrc, err
}
Пример #25
0
// Extract regular text from the beginning of the pattern,
// possibly after a leading iBOT.
// That text can be used by doExecute to speed up matching.
func (re *Regexp) setPrefix() {
	var b []byte
	var utf = make([]byte, utf8.UTFMax)
	var inst *instr
	// First instruction is start; skip that.  Also skip any initial iBOT.
	inst = re.inst[0].next
	for inst.kind == iBOT {
		inst = inst.next
	}
Loop:
	for ; inst.kind != iEnd; inst = inst.next {
		// stop if this is not a char
		if inst.kind != iChar {
			break
		}
		// stop if this char can be followed by a match for an empty string,
		// which includes closures, ^, and $.
		switch inst.next.kind {
		case iBOT, iEOT, iAlt:
			break Loop
		}
		n := utf8.EncodeRune(utf, inst.char)
		b = append(b, utf[0:n]...)
	}
	// point prefixStart instruction to first non-CHAR after prefix
	re.prefixStart = inst
	re.prefixBytes = b
	re.prefix = string(b)
}
Пример #26
0
// urlEncodePath encode the strings from UTF-8 byte representations to HTML hex escape sequences
//
// This is necessary since regular url.Parse() and url.Encode() functions do not support UTF-8
// non english characters cannot be parsed due to the nature in which url.Encode() is written
//
// This function on the other hand is a direct replacement for url.Encode() technique to support
// pretty much every UTF-8 character.
func urlEncodePath(pathName string) string {
	// if object matches reserved string, no need to encode them
	reservedNames := regexp.MustCompile("^[a-zA-Z0-9-_.~/]+$")
	if reservedNames.MatchString(pathName) {
		return pathName
	}
	var encodedPathname string
	for _, s := range pathName {
		if 'A' <= s && s <= 'Z' || 'a' <= s && s <= 'z' || '0' <= s && s <= '9' { // §2.3 Unreserved characters (mark)
			encodedPathname = encodedPathname + string(s)
			continue
		}
		switch s {
		case '-', '_', '.', '~', '/': // §2.3 Unreserved characters (mark)
			encodedPathname = encodedPathname + string(s)
			continue
		default:
			len := utf8.RuneLen(s)
			if len < 0 {
				// if utf8 cannot convert return the same string as is
				return pathName
			}
			u := make([]byte, len)
			utf8.EncodeRune(u, s)
			for _, r := range u {
				hex := hex.EncodeToString([]byte{r})
				encodedPathname = encodedPathname + "%" + strings.ToUpper(hex)
			}
		}
	}
	return encodedPathname
}
Пример #27
0
func (g *Group) lookupByName(name string, ini bool) (*Option, string) {
	name = strings.ToLower(name)

	if ini {
		if ret := g.IniNames[name]; ret != nil {
			return ret, ret.Field.Tag.Get("ini-name")
		}

		if ret := g.Names[name]; ret != nil {
			return ret, ret.Field.Name
		}
	}

	if ret := g.LongNames[name]; ret != nil {
		return ret, ret.LongName
	}

	if utf8.RuneCountInString(name) == 1 {
		r, _ := utf8.DecodeRuneInString(name)

		if ret := g.ShortNames[r]; ret != nil {
			data := make([]byte, utf8.RuneLen(ret.ShortName))
			utf8.EncodeRune(data, ret.ShortName)

			return ret, string(data)
		}
	}

	return nil, ""
}
Пример #28
0
// WriteRune writes a single Unicode code point, returning
// the number of bytes written and any error.
func (b *Writer) WriteRune(r rune) (size int, err error) {
	if r < utf8.RuneSelf {
		err = b.WriteByte(byte(r))
		if err != nil {
			return 0, err
		}
		return 1, nil
	}
	if b.err != nil {
		return 0, b.err
	}
	n := b.Available()
	if n < utf8.UTFMax {
		if b.Flush(); b.err != nil {
			return 0, b.err
		}
		n = b.Available()
		if n < utf8.UTFMax {
			// Can only happen if buffer is silly small.
			return b.WriteString(string(r))
		}
	}
	size = utf8.EncodeRune(b.buf[b.n:], r)
	b.n += size
	return size, nil
}
Пример #29
0
func (t replaceTransformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	var runeBytes [utf8.UTFMax]byte
	for r, sz := rune(0), 0; len(src) > 0; src = src[sz:] {

		if r = rune(src[0]); r < utf8.RuneSelf {
			sz = 1
		} else {
			r, sz = utf8.DecodeRune(src)

			if sz == 1 {
				// Invalid rune.
				if !atEOF && !utf8.FullRune(src) {
					err = transform.ErrShortSrc
					break
				}
			}
		}

		dsz := utf8.EncodeRune(runeBytes[:], t(r))
		if nDst+dsz > len(dst) {
			err = transform.ErrShortDst
			break
		}

		nDst += copy(dst[nDst:], runeBytes[:dsz])
		nSrc += sz
	}
	return
}
Пример #30
0
func (b *Buffer) WriteRune(r rune) (int, error) {
	i := b.Grows(utf8.UTFMax)
	s := utf8.EncodeRune(b.Buf[i:], r)
	n := utf8.UTFMax - s
	b.Buf = b.Buf[:len(b.Buf)-n]
	return s, nil
}