// htmlReplacer returns s with runes replaced acccording to replacementTable // and when badRunes is true, certain bad runes are allowed through unescaped. func htmlReplacer(s string, replacementTable []string, badRunes bool) string { written, b := 0, new(bytes.Buffer) for i, r := range s { if r < len(replacementTable) { if repl := replacementTable[r]; len(repl) != 0 { b.WriteString(s[written:i]) b.WriteString(repl) // Valid as long as replacementTable doesn't // include anything above 0x7f. written = i + utf8.RuneLen(r) } } else if badRunes { // No-op. // IE does not allow these ranges in unquoted attrs. } else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff { fmt.Fprintf(b, "%s&#x%x;", s[written:i], r) written = i + utf8.RuneLen(r) } } if written == 0 { return s } b.WriteString(s[written:]) return b.String() }
func runesToString(runes []int) string { length := 0 for _, v := range runes { length += utf8.RuneLen(v) } data := make([]byte, length) cur := data for _, v := range runes { rlen := utf8.RuneLen(v) utf8.EncodeRune(cur[0:rlen], v) cur = cur[rlen:] } return string(data) }
func TestEntityLength(t *testing.T) { // We verify that the length of UTF-8 encoding of each value is <= 1 + len(key). // The +1 comes from the leading "&". This property implies that the length of // unescaped text is <= the length of escaped text. for k, v := range entity { if 1+len(k) < utf8.RuneLen(v) { t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v)) } } for k, v := range entity2 { if 1+len(k) < utf8.RuneLen(v[0])+utf8.RuneLen(v[1]) { t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v[0]) + string(v[1])) } } }
// Map returns a copy of the byte array s with all its characters modified // according to the mapping function. If mapping returns a negative value, the character is // dropped from the string with no replacement. The characters in s and the // output are interpreted as UTF-8-encoded Unicode code points. func Map(mapping func(rune int) int, s []byte) []byte { // In the worst case, the array can grow when mapped, making // things unpleasant. But it's so rare we barge in assuming it's // fine. It could also shrink but that falls out naturally. maxbytes := len(s) // length of b nbytes := 0 // number of bytes encoded in b b := make([]byte, maxbytes) for i := 0; i < len(s); { wid := 1 rune := int(s[i]) if rune >= utf8.RuneSelf { rune, wid = utf8.DecodeRune(s[i:]) } rune = mapping(rune) if rune >= 0 { if nbytes+utf8.RuneLen(rune) > maxbytes { // Grow the buffer. maxbytes = maxbytes*2 + utf8.UTFMax nb := make([]byte, maxbytes) copy(nb, b[0:nbytes]) b = nb } nbytes += utf8.EncodeRune(b[nbytes:maxbytes], rune) } i += wid } return b[0:nbytes] }
// replace replaces each rune r of s with replacementTable[r], provided that // r < len(replacementTable). If replacementTable[r] is the empty string then // no replacement is made. // It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and // `\u2029`. func replace(s string, replacementTable []string) string { var b bytes.Buffer written := 0 for i, r := range s { var repl string switch { case int(r) < len(replacementTable) && replacementTable[r] != "": repl = replacementTable[r] case r == '\u2028': repl = `\u2028` case r == '\u2029': repl = `\u2029` default: continue } b.WriteString(s[written:i]) b.WriteString(repl) written = i + utf8.RuneLen(r) } if written == 0 { return s } b.WriteString(s[written:]) return b.String() }
func Write(fd int, p []byte) (n int, errno int) { var mode uint32 var done uint32 if isConsole, _ := GetConsoleMode(int32(fd), &mode); UnicodeConsoleOutput && isConsole { // TODO: The number of TCHARs to write. If the total size of the // specified number of characters exceeds 64 KB, the function fails with ERROR_NOT_ENOUGH_MEMORY. buf16 := utf16.Encode([]int(string(p))) //for _, c := range buf16 { print(c," ") } ; println() if ok, e := WriteConsole(int32(fd), buf16, &done); !ok { return 0, e } // convert length of utf16 characters to number of bytes written if done == uint32(len(buf16)) { done = uint32(len(p)) } else { done = 0 for _, rune := range utf16.Decode(buf16[:done]) { done += uint32(utf8.RuneLen(rune)) } } } else { // TODO: This might as well fail with large writes, only Microsoft doesn't say that, see // http://code.google.com/p/msysgit/issues/detail?id=409 for example if ok, e := syscall.WriteFile(int32(fd), p, &done, nil); !ok { return 0, e } } return int(done), 0 }
// Map returns a copy of the string s with all its characters modified // according to the mapping function. func Map(mapping func(rune int) int, s string) string { // In the worst case, the string can grow when mapped, making // things unpleasant. But it's so rare we barge in assuming it's // fine. It could also shrink but that falls out naturally. maxbytes := len(s) // length of b nbytes := 0 // number of bytes encoded in b b := make([]byte, maxbytes) for _, c := range s { rune := mapping(c) wid := 1 if rune >= utf8.RuneSelf { wid = utf8.RuneLen(rune) } if nbytes+wid > maxbytes { // Grow the buffer. maxbytes = maxbytes*2 + utf8.UTFMax nb := make([]byte, maxbytes) for i, c := range b[0:nbytes] { nb[i] = c } b = nb } nbytes += utf8.EncodeRune(rune, b[nbytes:maxbytes]) } return string(b[0:nbytes]) }
func TestEntityLength(t *testing.T) { // We verify that the length of UTF-8 encoding of each value is <= 1 + len(key). // The +1 comes from the leading "&". This property implies that the length of // unescaped text is <= the length of escaped text. for k, v := range entity { if 1+len(k) < utf8.RuneLen(v) { t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v)) } if len(k) > longestEntityWithoutSemicolon && k[len(k)-1] != ';' { t.Errorf("entity name %s is %d characters, but longestEntityWithoutSemicolon=%d", k, len(k), longestEntityWithoutSemicolon) } } for k, v := range entity2 { if 1+len(k) < utf8.RuneLen(v[0])+utf8.RuneLen(v[1]) { t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v[0]) + string(v[1])) } } }
func main() { fmt.Println("Strings now...") for i := 1; i <= 100; i++ { for j := 0; j <= i; j++ { fmt.Printf("A") } fmt.Println() } fmt.Println("Counting chars") input := "asSASA ddd dsjkdsjs dk" fmt.Printf("Number of chars in %s: %d\n", input, len(input)) bytes := []byte(input) fmt.Printf("Number of bytes in %s: %d\n", input, len(bytes)) fmt.Println("Now with unicode...") input = "aΦx" for pos, char := range input { fmt.Printf("character '%c' starts at byte position %d\n", char, pos) } var num_bytes = 0 for _, char := range input { num_bytes += utf8.RuneLen(char) } fmt.Printf("%s contains %d bytes\n", input, num_bytes) fmt.Println("Swapping chars...") input = "asSASA ddd dsjkdsjs dk" var output = "" for pos, char := range input { switch pos { case 3: output = output + "a" case 4: output = output + "b" case 5: output = output + "c" default: output = output + string(char) } } fmt.Println(output) fmt.Println("Swapping chars...") input = "foobar" temp := []byte(input) for i, j := 0, len(temp)-1; i < j; i, j = i+1, j-1 { temp[i], temp[j] = temp[j], temp[i] } fmt.Println(string(temp)) }
func urlquoter(c int, safe string) []byte { safe_bytes := strings.Bytes(safe); c_bytes := make([]byte, utf8.RuneLen(c)); utf8.EncodeRune(c, c_bytes); if bytes.Index(safe_bytes, c_bytes) != -1 || bytes.Index(always_safe, c_bytes) != -1 { return c_bytes; } else { return strings.Bytes(fmt.Sprintf("%%%02X", c)); } panic("unreachable"); }
/* Capitalizes the first character of the value. Example: {value|capfirst} If value is "neste", the output will be "Neste". */ func CapFirstFormatter(w io.Writer, formatter string, data ...interface{}) { b := getBytes(data...) if len(b) > 0 { rune, size := utf8.DecodeRune(b) rune = unicode.ToUpper(rune) capSize := utf8.RuneLen(rune) capb := make([]byte, capSize) utf8.EncodeRune(capb, rune) w.Write(capb) w.Write(b[size:]) } }
func (r *DelimReader) Read(p []byte) (n int, err os.Error) { bytes_written := 0 if r.remainder != nil { for i := 0; i < len(r.remainder); i++ { p[i] = r.remainder[i] bytes_written++ } r.remainder = nil } for bytes_written < len(p) { rune, size, err := r.reader.ReadRune() if err != nil { return bytes_written, err } for _, value := range r.delimiters { if value == rune { rune = r.used_delimiter size = utf8.RuneLen(rune) } } if bytes_written+size > len(p) { // we need to split the rune and hold on to the remainder writable := len(p) - bytes_written target := make([]byte, size) _ = utf8.EncodeRune(target, rune) for i := 0; i < writable; i++ { p[bytes_written] = target[i] bytes_written++ } r.remainder = target[writable:] } else { target := p[bytes_written : bytes_written+size] _ = utf8.EncodeRune(target, rune) bytes_written += size } } return bytes_written, nil }
// Return all anchored substrings of the given string within the Trie, with a matching set of // their associated values. func (p *Trie) AllSubstringsAndValues(s string) (*vector.StringVector, *vector.Vector) { sv := new(vector.StringVector) vv := new(vector.Vector) for pos, rune := range s { child, ok := p.children[rune] if !ok { // return whatever we have so far break } // if this is a leaf node, add the string so far and its value if child.leaf { sv.Push(s[0 : pos+utf8.RuneLen(rune)]) vv.Push(child.value) } p = child } return sv, vv }
// Map returns a copy of the string s with all its characters modified // according to the mapping function. If mapping returns a negative value, the character is // dropped from the string with no replacement. func Map(mapping func(rune) rune, s string) string { // In the worst case, the string can grow when mapped, making // things unpleasant. But it's so rare we barge in assuming it's // fine. It could also shrink but that falls out naturally. maxbytes := len(s) // length of b nbytes := 0 // number of bytes encoded in b // The output buffer b is initialized on demand, the first // time a character differs. var b []byte for i, c := range s { r := mapping(c) if b == nil { if r == c { continue } b = make([]byte, maxbytes) nbytes = copy(b, s[:i]) } if r >= 0 { wid := 1 if r >= utf8.RuneSelf { wid = utf8.RuneLen(r) } if nbytes+wid > maxbytes { // Grow the buffer. maxbytes = maxbytes*2 + utf8.UTFMax nb := make([]byte, maxbytes) copy(nb, b[0:nbytes]) b = nb } nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r) } } if b == nil { return s } return string(b[0:nbytes]) }
func tr(conn *irc.Conn, nick *irc.Nick, args, target string) { if args == "" { return } var sourcelang, targetlang, text string index := strings.IndexAny(args, " ") // handle spaces and ideographic spaces (U+3000) if index == 5 && args[2] == '|' { sourcelang = args[:2] targetlang = args[3:5] if args[5] == ' ' { text = args[6:] } else { text = args[5+utf8.RuneLen(3000):] } } else { sourcelang = "auto" targetlang = "en" text = args } say(conn, target, translate(sourcelang, targetlang, text)) }
func (s *ss) ReadRune() (rune int, size int, err os.Error) { if s.peekRune >= 0 { s.count++ rune = s.peekRune size = utf8.RuneLen(rune) s.prevRune = rune s.peekRune = -1 return } if s.atEOF || s.nlIsEnd && s.prevRune == '\n' || s.count >= s.fieldLimit { err = os.EOF return } rune, size, err = s.rr.ReadRune() if err == nil { s.count++ s.prevRune = rune } else if err == os.EOF { s.atEOF = true } return }
// cssEscaper escapes HTML and CSS special characters using \<hex>+ escapes. func cssEscaper(args ...interface{}) string { s, _ := stringify(args...) var b bytes.Buffer written := 0 for i, r := range s { var repl string switch r { case 0: repl = `\0` case '\t': repl = `\9` case '\n': repl = `\a` case '\f': repl = `\c` case '\r': repl = `\d` // Encode HTML specials as hex so the output can be embedded // in HTML attributes without further encoding. case '"': repl = `\22` case '&': repl = `\26` case '\'': repl = `\27` case '(': repl = `\28` case ')': repl = `\29` case '+': repl = `\2b` case '/': repl = `\2f` case ':': repl = `\3a` case ';': repl = `\3b` case '<': repl = `\3c` case '>': repl = `\3e` case '\\': repl = `\\` case '{': repl = `\7b` case '}': repl = `\7d` default: continue } b.WriteString(s[written:i]) b.WriteString(repl) written = i + utf8.RuneLen(r) if repl != `\\` && (written == len(s) || isHex(s[written]) || isCSSSpace(s[written])) { b.WriteByte(' ') } } if written == 0 { return s } b.WriteString(s[written:]) return b.String() }
// integer; interprets prec but not wid. Once formatted, result is sent to pad() // and then flags are cleared. func (f *fmt) integer(a int64, base uint64, signedness bool, digits string) { // precision of 0 and value of 0 means "print nothing" if f.precPresent && f.prec == 0 && a == 0 { return } var buf []byte = f.intbuf[0:] negative := signedness == signed && a < 0 if negative { a = -a } // two ways to ask for extra leading zero digits: %.3d or %03d. // apparently the first cancels the second. prec := 0 if f.precPresent { prec = f.prec f.zero = false } else if f.zero && f.widPresent && !f.minus && f.wid > 0 { prec = f.wid if negative || f.plus || f.space { prec-- // leave room for sign } } // format a into buf, ending at buf[i]. (printing is easier right-to-left.) // a is made into unsigned ua. we could make things // marginally faster by splitting the 32-bit case out into a separate // block but it's not worth the duplication, so ua has 64 bits. i := len(f.intbuf) ua := uint64(a) for ua >= base { i-- buf[i] = digits[ua%base] ua /= base } i-- buf[i] = digits[ua] for i > 0 && prec > nByte-i { i-- buf[i] = '0' } // Various prefixes: 0x, -, etc. if f.sharp { switch base { case 8: if buf[i] != '0' { i-- buf[i] = '0' } case 16: i-- buf[i] = 'x' + digits[10] - 'a' i-- buf[i] = '0' } } if f.unicode { i-- buf[i] = '+' i-- buf[i] = 'U' } if negative { i-- buf[i] = '-' } else if f.plus { i-- buf[i] = '+' } else if f.space { i-- buf[i] = ' ' } // If we want a quoted char for %#U, move the data up to make room. if f.unicode && f.uniQuote && a >= 0 && a <= unicode.MaxRune && unicode.IsPrint(int(a)) { runeWidth := utf8.RuneLen(int(a)) width := 1 + 1 + runeWidth + 1 // space, quote, rune, quote copy(buf[i-width:], buf[i:]) // guaranteed to have enough room. i -= width // Now put " 'x'" at the end. j := len(buf) - width buf[j] = ' ' j++ buf[j] = '\'' j++ utf8.EncodeRune(buf[j:], int(a)) j += runeWidth buf[j] = '\'' } f.pad(buf[i:]) }
func unreadRune(state *LexerState, reader *bufio.Reader, rune int) { unreadBytes(state, reader, utf8.RuneLen(rune)) }