func toCamelCase(x string) string { if len(x) == 0 { return "" } output := make([]byte, 0) uppercase := true for len(x) > 0 { v, size := utf8.DecodeRuneInString(x) // If underscore, append and keep going. if v == '_' { uppercase = true } else if unicode.IsLetter(v) { if uppercase { uppercase = false buf := make([]byte, size) utf8.EncodeRune(buf, unicode.ToUpper(v)) output = bytes.Join([][]byte{output, buf}, nil) } else if unicode.IsUpper(v) { buf := make([]byte, size) utf8.EncodeRune(buf, v) output = bytes.Join([][]byte{output, buf}, []byte("_")) } } x = x[size:] } return string(output) }
// normalizeBidi attempts to prevent names from using bidi control codes to // screw up our layout func normalizeBidi(name string) string { bidiExplicitDepth := 0 bidiIsolateDepth := 0 for _, c := range name { switch c { case ltrEmbed, rtlEmbed, ltrOverride, rtlOverride: bidiExplicitDepth++ case bidiExplicitPop: bidiExplicitDepth-- case ltrIsolate, rtlIsolate, fsIsolate: bidiIsolateDepth++ case bidiIsolatePop: bidiIsolateDepth-- } } if bidiExplicitDepth+bidiIsolateDepth > 0 { pops := make([]byte, bidiExplicitDepth*utf8.RuneLen(bidiExplicitPop)+bidiIsolateDepth+utf8.RuneLen(bidiIsolatePop)) i := 0 for ; bidiExplicitDepth > 0; bidiExplicitDepth-- { i += utf8.EncodeRune(pops[i:], bidiExplicitPop) } for ; bidiIsolateDepth > 0; bidiIsolateDepth-- { i += utf8.EncodeRune(pops[i:], bidiIsolatePop) } return name + string(pops[:i]) } return name }
func TestReadWriteRune(t *testing.T) { const NRune = 1000 byteBuf := new(bytes.Buffer) w := NewAsyncWriter(byteBuf) // Write the runes out using WriteRune buf := make([]byte, utf8.UTFMax) for r := rune(0); r < NRune; r++ { size := utf8.EncodeRune(buf, r) nbytes, err := w.WriteRune(r) if err != nil { t.Fatalf("WriteRune(0x%x) error: %s", r, err) } if nbytes != size { t.Fatalf("WriteRune(0x%x) expected %d, got %d", r, size, nbytes) } } w.Flush() r := bufio.NewReader(byteBuf) // Read them back with ReadRune for r1 := rune(0); r1 < NRune; r1++ { size := utf8.EncodeRune(buf, r1) nr, nbytes, err := r.ReadRune() if nr != r1 || nbytes != size || err != nil { t.Fatalf("ReadRune(0x%x) got 0x%x,%d not 0x%x,%d (err=%s)", r1, nr, nbytes, r1, size, err) } } }
// readConsole reads utf16 characters from console File, // encodes them into utf8 and stores them in buffer buf. // It returns the number of utf8 bytes read and an error, if any. func (f *File) readConsole(buf []byte) (n int, err error) { if len(buf) == 0 { return 0, nil } if len(f.readbuf) > 0 { return f.copyReadConsoleBuffer(buf) } wchar, err := f.readOneUTF16FromConsole() if err != nil { return 0, err } r := rune(wchar) if utf16.IsSurrogate(r) { wchar, err := f.readOneUTF16FromConsole() if err != nil { return 0, err } r = utf16.DecodeRune(r, rune(wchar)) } if nr := utf8.RuneLen(r); nr > len(buf) { start := len(f.readbuf) for ; nr > 0; nr-- { f.readbuf = append(f.readbuf, 0) } utf8.EncodeRune(f.readbuf[start:cap(f.readbuf)], r) } else { utf8.EncodeRune(buf, r) buf = buf[nr:] n += nr } if n > 0 { return n, nil } return f.copyReadConsoleBuffer(buf) }
// ToUpperCamelCase returns a copy of the string s with all Unicode letters mapped to their camel case. // It will convert to upper case previous letter of '_' and first letter, and remove letter of '_'. func ToUpperCamelCase(s string) string { if s == "" { return "" } upper := true start := 0 result := make([]byte, 0, len(s)) var runeBuf [utf8.UTFMax]byte var initialism []byte for _, c := range s { if c == '_' { upper = true candidate := string(result[start:]) initialism = initialism[:0] for _, r := range candidate { if r < utf8.RuneSelf { initialism = append(initialism, toUpperASCII(byte(r))) } else { n := utf8.EncodeRune(runeBuf[:], unicode.ToUpper(r)) initialism = append(initialism, runeBuf[:n]...) } } if length := commonInitialism.LookupByBytes(initialism); length > 0 { result = append(result[:start], initialism...) } start = len(result) continue } if upper { if c < utf8.RuneSelf { result = append(result, toUpperASCII(byte(c))) } else { n := utf8.EncodeRune(runeBuf[:], unicode.ToUpper(c)) result = append(result, runeBuf[:n]...) } upper = false continue } if c < utf8.RuneSelf { result = append(result, byte(c)) } else { n := utf8.EncodeRune(runeBuf[:], c) result = append(result, runeBuf[:n]...) } } candidate := string(result[start:]) initialism = initialism[:0] for _, r := range candidate { if r < utf8.RuneSelf { initialism = append(initialism, toUpperASCII(byte(r))) } else { n := utf8.EncodeRune(runeBuf[:], unicode.ToUpper(r)) initialism = append(initialism, runeBuf[:n]...) } } if length := commonInitialism.LookupByBytes(initialism); length > 0 { result = append(result[:start], initialism...) } return string(result) }
func UTF16ToUTF8Bytes(in []byte, out io.Writer) error { if len(in)%2 != 0 { return fmt.Errorf("input buffer must have an even length (length=%d)", len(in)) } var runeBuf [4]byte var v1, v2 uint16 for i := 0; i < len(in); i += 2 { v1 = uint16(in[i]) | uint16(in[i+1])<<8 switch { case v1 < surr1, surr3 <= v1: n := utf8.EncodeRune(runeBuf[:], rune(v1)) out.Write(runeBuf[:n]) case surr1 <= v1 && v1 < surr2 && len(in) > i+2: v2 = uint16(in[i+2]) | uint16(in[i+3])<<8 if surr2 <= v2 && v2 < surr3 { // valid surrogate sequence r := utf16.DecodeRune(rune(v1), rune(v2)) n := utf8.EncodeRune(runeBuf[:], r) out.Write(runeBuf[:n]) } i += 2 default: // invalid surrogate sequence n := utf8.EncodeRune(runeBuf[:], replacementChar) out.Write(runeBuf[:n]) } } return nil }
func (t *Textbox) Tl(u rune, v rune) error { //Check if rune is valid if !utf8.ValidRune(v) { return errors.New("invalid rune.") } for i, val := range t.canvas { for j, w := 0, 0; j < len(val); j += w { runeValue, width := utf8.DecodeRune(val[j:]) if runeValue == u { tmp := t.canvas[i][j : j+width] replaceWidth := utf8.RuneLen(v) if width == replaceWidth { // replace rune utf8.EncodeRune(tmp, v) } else if width > replaceWidth { // replace and pad utf8.EncodeRune(tmp, v) tmp = tmp[replaceWidth:] for len(tmp) > 0 { replaceWidth = utf8.EncodeRune(tmp, 0x0) tmp = tmp[replaceWidth:] } } else { // rune wont fit. return errors.New("rune too large.") } } w = width } } return nil }
func TestFoldAgainstUnicode(t *testing.T) { const bufSize = 5 buf1 := make([]byte, 0, bufSize) buf2 := make([]byte, 0, bufSize) var runes []rune for i := 0x20; i <= 0x7f; i++ { runes = append(runes, rune(i)) } runes = append(runes, kelvin, smallLongEss) funcs := []struct { name string fold func(s, t []byte) bool letter bool // must be ASCII letter simple bool // must be simple ASCII letter (not 'S' or 'K') }{ { name: "equalFoldRight", fold: equalFoldRight, }, { name: "asciiEqualFold", fold: asciiEqualFold, simple: true, }, { name: "simpleLetterEqualFold", fold: simpleLetterEqualFold, simple: true, letter: true, }, } for _, ff := range funcs { for _, r := range runes { if r >= utf8.RuneSelf { continue } if ff.letter && !isASCIILetter(byte(r)) { continue } if ff.simple && (r == 's' || r == 'S' || r == 'k' || r == 'K') { continue } for _, r2 := range runes { buf1 := append(buf1[:0], 'x') buf2 := append(buf2[:0], 'x') buf1 = buf1[:1+utf8.EncodeRune(buf1[1:bufSize], r)] buf2 = buf2[:1+utf8.EncodeRune(buf2[1:bufSize], r2)] buf1 = append(buf1, 'x') buf2 = append(buf2, 'x') want := bytes.EqualFold(buf1, buf2) if got := ff.fold(buf1, buf2); got != want { t.Errorf("%s(%q, %q) = %v; want %v", ff.name, buf1, buf2, got, want) } } } } }
func MarshalBinary(data []byte, ifc interface{}) error { var ( uix uint64 ui uint32 ) switch ifc.(type) { case byte: data[0] = ifc.(byte) case float32: ui = math.Float32bits(ifc.(float32)) m32(data, ui) case float64: uix = math.Float64bits(ifc.(float64)) m64(data, uix) case rune: ui = uint32(ifc.(rune)) m32(data, ui) case int: ui = uint32(ifc.(int)) m32(data, ui) case uint32: ui = ifc.(uint32) m32(data, ui) case uintptr: uix = uint64(ifc.(uintptr)) for i := uint64(0); i < 8; i++ { data[i] = byte((uix >> (8 * i)) & 0xff) } case uint64: uix = ifc.(uint64) m64(data, uix) case []rune: dd := make([]byte, utf8.UTFMax) xd := ifc.([]rune) n := 0 for _, r := range xd { l := utf8.EncodeRune(dd, r) copy(data[n:n+l], dd) n += l } case string: dd := make([]byte, utf8.UTFMax) xd := ifc.(string) n := 0 for _, r := range xd { l := utf8.EncodeRune(dd, r) copy(data[n:n+l], dd) n += l } case []byte: xd := ifc.([]byte) data := make([]byte, len(xd)) copy(data[0:len(xd)], xd) default: return errors.New("unknown type for marshal") } return nil }
func (b *runeBuilder) addRange(lo, hi rune, fold bool) { if lo > hi { return } // TODO: Pick off 80-10FFFF for special handling? if lo == 0x80 && hi == 0x10FFFF { } // Split range into same-length sized ranges. for i := 1; i < utf8.UTFMax; i++ { max := maxRune(i) if lo <= max && max < hi { b.addRange(lo, max, fold) b.addRange(max+1, hi, fold) return } } // ASCII range is special. if hi < utf8.RuneSelf { b.addBranch(b.suffix(byte(lo), byte(hi), fold, 0)) return } // Split range into sections that agree on leading bytes. for i := 1; i < utf8.UTFMax; i++ { m := rune(1)<<uint(6*i) - 1 // last i bytes of UTF-8 sequence if lo&^m != hi&^m { if lo&m != 0 { b.addRange(lo, lo|m, fold) b.addRange((lo|m)+1, hi, fold) return } if hi&m != m { b.addRange(lo, hi&^m-1, fold) b.addRange(hi&^m, hi, fold) return } } } // Finally. Generate byte matching equivalent for lo-hi. var ulo, uhi [utf8.UTFMax]byte n := utf8.EncodeRune(ulo[:], lo) m := utf8.EncodeRune(uhi[:], hi) if n != m { panic("codesearch/regexp: bad utf-8 math") } pc := uint32(0) for i := n - 1; i >= 0; i-- { pc = b.suffix(ulo[i], uhi[i], false, pc) } b.addBranch(pc) }
func (f *format) parse() error { if len(f.pattern) == 0 { return nil } f.parsed = true // only IF there is a format pw, sw := 0, 0 // prefixWritten, suffixWritten suffixStart, precStart := false, false hasGroup, hasPlus, hasMinus := false, false, false precCount := 0 for _, c := range f.pattern { switch c { case '+': hasPlus = true case '-': hasMinus = true case '#', '0', '.', ',': if false == hasGroup && c == ',' { hasGroup = true } if precStart { precCount++ } if false == precStart && c == '.' { precStart = true } suffixStart = true default: if false == suffixStart { // prefix if c > 0 { pw += utf8.EncodeRune(f.prefix[pw:], c) f.prefix = f.prefix[:formatBufferSize] } } else if c > 0 { // suffix sw += utf8.EncodeRune(f.suffix[sw:], c) f.suffix = f.suffix[:formatBufferSize] } } } f.prefix = f.prefix[:pw] f.suffix = f.suffix[:sw] if false == hasGroup { f.group = 0 } if false == hasPlus { f.plusSign = 0 } if false == hasMinus { f.minusSign = 0 } f.precision = precCount return nil }
// decomposeHangul writes the decomposed Hangul to buf and returns the number // of bytes written. len(buf) should be at least 9. func decomposeHangul(buf []byte, r rune) int { const JamoUTF8Len = 3 r -= hangulBase x := r % jamoTCount r /= jamoTCount utf8.EncodeRune(buf, jamoLBase+r/jamoVCount) utf8.EncodeRune(buf[JamoUTF8Len:], jamoVBase+r%jamoVCount) if x != 0 { utf8.EncodeRune(buf[2*JamoUTF8Len:], jamoTBase+x) return 3 * JamoUTF8Len } return 2 * JamoUTF8Len }
func nextNode(curNode *node, char rune) *node { buf := [utf8.UTFMax]byte{} for i := 0; i < utf8.EncodeRune(buf[:], char) && curNode != nil; i++ { curNode = curNode.getChild(buf[i]) } // Special case: 'q' never occurs by itself, only as 'qu' if char == 'q' && curNode != nil { for i := 0; i < utf8.EncodeRune(buf[:], 'u') && curNode != nil; i++ { curNode = curNode.getChild(buf[i]) } } return curNode }
// Generalized call for multibyte (2 max) decoding (SJIS and CJK). Multibyte to multibyte func dec_table(in, out chan byte, finished, closed chan error, enc Encoding) { table := enc_tables[enc] var ( b byte a byte ok bool multi bool sz int cp = make([]byte, utf8.UTFMax) ) loop: for { select { case b, ok = <-in: if !ok { break loop } if multi { sz = utf8.EncodeRune(cp, table[rune(a)<<8|rune(b)]) // we don't really care about garbage in the slice for _, encoded := range cp[:sz] { out <- encoded } } else { if b < 128 { utf8.EncodeRune(cp, rune(b)) out <- cp[0] } else { multi = true a = b continue loop } } multi = false case <-finished: if multi { finished <- errors.New("mojibake: dec_table(" + enc.String() + "): malformed byte stream") } else { finished <- nil } multi = false case <-closed: break loop } } }
func TestRuneIO(t *testing.T) { const NRune = 1000 // Built a test slice while we write the data b := make([]byte, utf8.UTFMax*NRune) var buf Buffer n := 0 for r := rune(0); r < NRune; r++ { size := utf8.EncodeRune(b[n:], r) nbytes, err := buf.WriteRune(r) if err != nil { t.Fatalf("WriteRune(%U) error: %s", r, err) } if nbytes != size { t.Fatalf("WriteRune(%U) expected %d, got %d", r, size, nbytes) } n += size } b = b[0:n] // Check the resulting bytes if !Equal(buf.Bytes(), b) { t.Fatalf("incorrect result from WriteRune: %q not %q", buf.Bytes(), b) } p := make([]byte, utf8.UTFMax) // Read it back with ReadRune for r := rune(0); r < NRune; r++ { size := utf8.EncodeRune(p, r) nr, nbytes, err := buf.ReadRune() if nr != r || nbytes != size || err != nil { t.Fatalf("ReadRune(%U) got %U,%d not %U,%d (err=%s)", r, nr, nbytes, r, size, err) } } // Check that UnreadRune works buf.Reset() buf.Write(b) for r := rune(0); r < NRune; r++ { r1, size, _ := buf.ReadRune() if err := buf.UnreadRune(); err != nil { t.Fatalf("UnreadRune(%U) got error %q", r, err) } r2, nbytes, err := buf.ReadRune() if r1 != r2 || r1 != r || nbytes != size || err != nil { t.Fatalf("ReadRune(%U) after UnreadRune got %U,%d not %U,%d (err=%s)", r, r2, nbytes, r, size, err) } } }
func main() { var b []byte var r = rand.New(rand.NewSource(0)) for len(b) < size { n := 16 + r.Int()%64 // Length of substring p := r.Float32() switch { case p <= 0.75: // Write strings of base64 encoded values. for i := 0; i < n; i++ { p := r.Float32() switch { case p < 0.1: // Write any lowercase letter. b = append(b, alpha1[r.Int()%len(alpha1)]) case p < 0.7: // Write any lowercase or uppercase letter. b = append(b, alpha2[r.Int()%len(alpha2)]) default: // Write any character from the base64 alphabet. b = append(b, alpha3[r.Int()%len(alpha3)]) } } case p <= 1.00: // Write strings of utf8 encoded values. for i := 0; i < n; i++ { p := r.Float32() switch { case p <= 0.65: // Write a 2-byte long utf8 code point. var buf [4]byte cnt := utf8.EncodeRune(buf[:], rune(0x80+r.Int()%0x780)) b = append(b, buf[:cnt]...) case p <= 1.00: // Write a 3-byte long utf8 code point. var buf [4]byte cnt := utf8.EncodeRune(buf[:], rune(0x800+r.Int()%0xF800)) b = append(b, buf[:cnt]...) } } } } if err := ioutil.WriteFile(name, b[:size], 0664); err != nil { panic(err) } }
func truncatePath(path string) string { // Truncate all long strings based on rune count if utf8.RuneCountInString(path) > pathMaxLength { path = string([]rune(path)[:pathMaxLength]) } // By this point, len(path) should be < pathMaxLength if we're dealing with single-byte runes. // Otherwise, we have a multi-byte string and need to calculate the size of each rune and // truncate manually. // // This is a workaround for a bug in termbox-go. Remove it when this issue is fixed: // https://github.com/nsf/termbox-go/pull/21 if len(path) > pathMaxLength { out := make([]byte, pathMaxLength, pathMaxLength) length := 0 for { r, size := utf8.DecodeRuneInString(path[length:]) if r == utf8.RuneError && size == 1 { break } // utf8.EncodeRune expects there to be enough room to store the full size of the rune if length+size <= pathMaxLength { utf8.EncodeRune(out[length:], r) length += size } else { break } } path = string(out[:length]) } return path }
func (t *tScreen) encodeRune(r rune, buf []byte) []byte { nb := make([]byte, 6) ob := make([]byte, 6) num := utf8.EncodeRune(ob, r) ob = ob[:num] dst := 0 var err error if enc := t.encoder; enc != nil { enc.Reset() dst, _, err = enc.Transform(nb, ob, true) } if err != nil || dst == 0 || nb[0] == '\x1a' { // Combining characters are elided if len(buf) == 0 { if acs, ok := t.acs[r]; ok { buf = append(buf, []byte(acs)...) } else if fb, ok := t.fallback[r]; ok { buf = append(buf, []byte(fb)...) } else { buf = append(buf, '?') } } } else { buf = append(buf, nb[:dst]...) } return buf }
func (t *tScreen) CanDisplay(r rune, checkFallbacks bool) bool { if enc := t.encoder; enc != nil { nb := make([]byte, 6) ob := make([]byte, 6) num := utf8.EncodeRune(ob, r) enc.Reset() dst, _, err := enc.Transform(nb, ob[:num], true) if dst != 0 && err == nil && nb[0] != '\x1A' { return true } } // Terminal fallbacks always permitted, since we assume they are // basically nearly perfect renditions. if _, ok := t.acs[r]; ok { return true } if !checkFallbacks { return false } if _, ok := t.fallback[r]; ok { return true } return false }
// toLowerDeferredCopy will function exactly like // bytes.ToLower() only it will reuse (overwrite) // the original byte array when possible // NOTE: because its possible that the lower-case // form of a rune has a different utf-8 encoded // length, in these cases a new byte array is allocated func toLowerDeferredCopy(s []byte) []byte { j := 0 for i := 0; i < len(s); { wid := 1 r := rune(s[i]) if r >= utf8.RuneSelf { r, wid = utf8.DecodeRune(s[i:]) } l := unicode.ToLower(r) lwid := utf8.RuneLen(l) if lwid > wid { // utf-8 encoded replacement is wider // for now, punt and defer // to bytes.ToLower() for the remainder // only known to happen with chars // Rune Ⱥ(570) width 2 - Lower ⱥ(11365) width 3 // Rune Ⱦ(574) width 2 - Lower ⱦ(11366) width 3 rest := bytes.ToLower(s[i:]) rv := make([]byte, j+len(rest)) copy(rv[:j], s[:j]) copy(rv[j:], rest) return rv } else { utf8.EncodeRune(s[j:], l) } i += wid j += lwid } return s[:j] }
func decodeStr(r io.Reader) (string, error) { var szSlice [1]byte _, err := r.Read(szSlice[:]) if err != nil { return "", err } sz := int(szSlice[0]) if sz == 0 { return "", nil } utfStr := make([]byte, 4*sz) data := make([]byte, 2*sz) n, err := r.Read(data) if err != nil { return "", err } if n < len(data) { return "", fmt.Errorf("underflow") } w := 0 for i := 0; i < int(2*sz); i += 2 { cp := byteOrder.Uint16(data[i:]) w += utf8.EncodeRune(utfStr[w:], rune(cp)) } if utfStr[w-1] == 0 { w-- } s := string(utfStr[:w]) return s, nil }
// getURLEncodedName encode the strings from UTF-8 byte representations to HTML hex escape sequences // // This is necessary since regular url.Parse() and url.Encode() functions do not support UTF-8 // non english characters cannot be parsed due to the nature in which url.Encode() is written // // This function on the other hand is a direct replacement for url.Encode() technique to support // pretty much every UTF-8 character. func getURLEncodedName(name string) string { // if object matches reserved string, no need to encode them if reservedNames.MatchString(name) { return name } var encodedName string for _, s := range name { if 'A' <= s && s <= 'Z' || 'a' <= s && s <= 'z' || '0' <= s && s <= '9' { // §2.3 Unreserved characters (mark) encodedName = encodedName + string(s) continue } switch s { case '-', '_', '.', '~', '/': // §2.3 Unreserved characters (mark) encodedName = encodedName + string(s) continue default: len := utf8.RuneLen(s) if len < 0 { return name } u := make([]byte, len) utf8.EncodeRune(u, s) for _, r := range u { hex := hex.EncodeToString([]byte{r}) encodedName = encodedName + "%" + strings.ToUpper(hex) } } } return encodedName }
// Map returns a copy of the byte array s with all its characters modified // according to the mapping function. If mapping returns a negative value, the character is // dropped from the string with no replacement. The characters in s and the // output are interpreted as UTF-8-encoded Unicode code points. func Map(mapping func(r rune) rune, s []byte) []byte { // In the worst case, the array can grow when mapped, making // things unpleasant. But it's so rare we barge in assuming it's // fine. It could also shrink but that falls out naturally. maxbytes := len(s) // length of b nbytes := 0 // number of bytes encoded in b b := make([]byte, maxbytes) for i := 0; i < len(s); { wid := 1 r := rune(s[i]) if r >= utf8.RuneSelf { r, wid = utf8.DecodeRune(s[i:]) } r = mapping(r) if r >= 0 { if nbytes+utf8.RuneLen(r) > maxbytes { // Grow the buffer. maxbytes = maxbytes*2 + utf8.UTFMax nb := make([]byte, maxbytes) copy(nb, b[0:nbytes]) b = nb } nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r) } i += wid } return b[0:nbytes] }
func (replacementEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { r, size := rune(0), 0 for ; nSrc < len(src); nSrc += size { r = rune(src[nSrc]) // Decode a 1-byte rune. if r < utf8.RuneSelf { size = 1 } else { // Decode a multi-byte rune. r, size = utf8.DecodeRune(src[nSrc:]) if size == 1 { // All valid runes of size 1 (those below utf8.RuneSelf) were // handled above. We have invalid UTF-8 or we haven't seen the // full character yet. if !atEOF && !utf8.FullRune(src[nSrc:]) { err = transform.ErrShortSrc break } r = '\ufffd' } } if nDst+utf8.RuneLen(r) > len(dst) { err = transform.ErrShortDst break } nDst += utf8.EncodeRune(dst[nDst:], r) } return nDst, nSrc, err }
// Extract regular text from the beginning of the pattern, // possibly after a leading iBOT. // That text can be used by doExecute to speed up matching. func (re *Regexp) setPrefix() { var b []byte var utf = make([]byte, utf8.UTFMax) var inst *instr // First instruction is start; skip that. Also skip any initial iBOT. inst = re.inst[0].next for inst.kind == iBOT { inst = inst.next } Loop: for ; inst.kind != iEnd; inst = inst.next { // stop if this is not a char if inst.kind != iChar { break } // stop if this char can be followed by a match for an empty string, // which includes closures, ^, and $. switch inst.next.kind { case iBOT, iEOT, iAlt: break Loop } n := utf8.EncodeRune(utf, inst.char) b = append(b, utf[0:n]...) } // point prefixStart instruction to first non-CHAR after prefix re.prefixStart = inst re.prefixBytes = b re.prefix = string(b) }
// urlEncodePath encode the strings from UTF-8 byte representations to HTML hex escape sequences // // This is necessary since regular url.Parse() and url.Encode() functions do not support UTF-8 // non english characters cannot be parsed due to the nature in which url.Encode() is written // // This function on the other hand is a direct replacement for url.Encode() technique to support // pretty much every UTF-8 character. func urlEncodePath(pathName string) string { // if object matches reserved string, no need to encode them reservedNames := regexp.MustCompile("^[a-zA-Z0-9-_.~/]+$") if reservedNames.MatchString(pathName) { return pathName } var encodedPathname string for _, s := range pathName { if 'A' <= s && s <= 'Z' || 'a' <= s && s <= 'z' || '0' <= s && s <= '9' { // §2.3 Unreserved characters (mark) encodedPathname = encodedPathname + string(s) continue } switch s { case '-', '_', '.', '~', '/': // §2.3 Unreserved characters (mark) encodedPathname = encodedPathname + string(s) continue default: len := utf8.RuneLen(s) if len < 0 { // if utf8 cannot convert return the same string as is return pathName } u := make([]byte, len) utf8.EncodeRune(u, s) for _, r := range u { hex := hex.EncodeToString([]byte{r}) encodedPathname = encodedPathname + "%" + strings.ToUpper(hex) } } } return encodedPathname }
func (g *Group) lookupByName(name string, ini bool) (*Option, string) { name = strings.ToLower(name) if ini { if ret := g.IniNames[name]; ret != nil { return ret, ret.Field.Tag.Get("ini-name") } if ret := g.Names[name]; ret != nil { return ret, ret.Field.Name } } if ret := g.LongNames[name]; ret != nil { return ret, ret.LongName } if utf8.RuneCountInString(name) == 1 { r, _ := utf8.DecodeRuneInString(name) if ret := g.ShortNames[r]; ret != nil { data := make([]byte, utf8.RuneLen(ret.ShortName)) utf8.EncodeRune(data, ret.ShortName) return ret, string(data) } } return nil, "" }
// WriteRune writes a single Unicode code point, returning // the number of bytes written and any error. func (b *Writer) WriteRune(r rune) (size int, err error) { if r < utf8.RuneSelf { err = b.WriteByte(byte(r)) if err != nil { return 0, err } return 1, nil } if b.err != nil { return 0, b.err } n := b.Available() if n < utf8.UTFMax { if b.Flush(); b.err != nil { return 0, b.err } n = b.Available() if n < utf8.UTFMax { // Can only happen if buffer is silly small. return b.WriteString(string(r)) } } size = utf8.EncodeRune(b.buf[b.n:], r) b.n += size return size, nil }
func (t replaceTransformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { var runeBytes [utf8.UTFMax]byte for r, sz := rune(0), 0; len(src) > 0; src = src[sz:] { if r = rune(src[0]); r < utf8.RuneSelf { sz = 1 } else { r, sz = utf8.DecodeRune(src) if sz == 1 { // Invalid rune. if !atEOF && !utf8.FullRune(src) { err = transform.ErrShortSrc break } } } dsz := utf8.EncodeRune(runeBytes[:], t(r)) if nDst+dsz > len(dst) { err = transform.ErrShortDst break } nDst += copy(dst[nDst:], runeBytes[:dsz]) nSrc += sz } return }
func (b *Buffer) WriteRune(r rune) (int, error) { i := b.Grows(utf8.UTFMax) s := utf8.EncodeRune(b.Buf[i:], r) n := utf8.UTFMax - s b.Buf = b.Buf[:len(b.Buf)-n] return s, nil }