Beispiel #1
0
// htmlReplacer returns s with runes replaced acccording to replacementTable
// and when badRunes is true, certain bad runes are allowed through unescaped.
func htmlReplacer(s string, replacementTable []string, badRunes bool) string {
	written, b := 0, new(bytes.Buffer)
	for i, r := range s {
		if r < len(replacementTable) {
			if repl := replacementTable[r]; len(repl) != 0 {
				b.WriteString(s[written:i])
				b.WriteString(repl)
				// Valid as long as replacementTable doesn't
				// include anything above 0x7f.
				written = i + utf8.RuneLen(r)
			}
		} else if badRunes {
			// No-op.
			// IE does not allow these ranges in unquoted attrs.
		} else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff {
			fmt.Fprintf(b, "%s&#x%x;", s[written:i], r)
			written = i + utf8.RuneLen(r)
		}
	}
	if written == 0 {
		return s
	}
	b.WriteString(s[written:])
	return b.String()
}
func runesToString(runes []int) string {
	length := 0
	for _, v := range runes {
		length += utf8.RuneLen(v)
	}
	data := make([]byte, length)
	cur := data
	for _, v := range runes {
		rlen := utf8.RuneLen(v)
		utf8.EncodeRune(cur[0:rlen], v)
		cur = cur[rlen:]
	}
	return string(data)
}
Beispiel #3
0
func TestEntityLength(t *testing.T) {
	// We verify that the length of UTF-8 encoding of each value is <= 1 + len(key).
	// The +1 comes from the leading "&". This property implies that the length of
	// unescaped text is <= the length of escaped text.
	for k, v := range entity {
		if 1+len(k) < utf8.RuneLen(v) {
			t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v))
		}
	}
	for k, v := range entity2 {
		if 1+len(k) < utf8.RuneLen(v[0])+utf8.RuneLen(v[1]) {
			t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v[0]) + string(v[1]))
		}
	}
}
// Map returns a copy of the byte array s with all its characters modified
// according to the mapping function. If mapping returns a negative value, the character is
// dropped from the string with no replacement.  The characters in s and the
// output are interpreted as UTF-8-encoded Unicode code points.
func Map(mapping func(rune int) int, s []byte) []byte {
	// In the worst case, the array can grow when mapped, making
	// things unpleasant.  But it's so rare we barge in assuming it's
	// fine.  It could also shrink but that falls out naturally.
	maxbytes := len(s) // length of b
	nbytes := 0        // number of bytes encoded in b
	b := make([]byte, maxbytes)
	for i := 0; i < len(s); {
		wid := 1
		rune := int(s[i])
		if rune >= utf8.RuneSelf {
			rune, wid = utf8.DecodeRune(s[i:])
		}
		rune = mapping(rune)
		if rune >= 0 {
			if nbytes+utf8.RuneLen(rune) > maxbytes {
				// Grow the buffer.
				maxbytes = maxbytes*2 + utf8.UTFMax
				nb := make([]byte, maxbytes)
				copy(nb, b[0:nbytes])
				b = nb
			}
			nbytes += utf8.EncodeRune(b[nbytes:maxbytes], rune)
		}
		i += wid
	}
	return b[0:nbytes]
}
Beispiel #5
0
// replace replaces each rune r of s with replacementTable[r], provided that
// r < len(replacementTable). If replacementTable[r] is the empty string then
// no replacement is made.
// It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and
// `\u2029`.
func replace(s string, replacementTable []string) string {
	var b bytes.Buffer
	written := 0
	for i, r := range s {
		var repl string
		switch {
		case int(r) < len(replacementTable) && replacementTable[r] != "":
			repl = replacementTable[r]
		case r == '\u2028':
			repl = `\u2028`
		case r == '\u2029':
			repl = `\u2029`
		default:
			continue
		}
		b.WriteString(s[written:i])
		b.WriteString(repl)
		written = i + utf8.RuneLen(r)
	}
	if written == 0 {
		return s
	}
	b.WriteString(s[written:])
	return b.String()
}
Beispiel #6
0
func Write(fd int, p []byte) (n int, errno int) {
	var mode uint32
	var done uint32
	if isConsole, _ := GetConsoleMode(int32(fd), &mode); UnicodeConsoleOutput && isConsole {
		// TODO: The number of TCHARs to write. If the total size of the
		// specified number of characters exceeds 64 KB, the function fails with ERROR_NOT_ENOUGH_MEMORY.
		buf16 := utf16.Encode([]int(string(p)))
		//for _, c := range buf16 { print(c," ") } ; println()
		if ok, e := WriteConsole(int32(fd), buf16, &done); !ok {
			return 0, e
		}
		// convert length of utf16 characters to number of bytes written
		if done == uint32(len(buf16)) {
			done = uint32(len(p))
		} else {
			done = 0
			for _, rune := range utf16.Decode(buf16[:done]) {
				done += uint32(utf8.RuneLen(rune))
			}
		}
	} else {
		// TODO: This might as well fail with large writes, only Microsoft doesn't say that, see
		// http://code.google.com/p/msysgit/issues/detail?id=409 for example
		if ok, e := syscall.WriteFile(int32(fd), p, &done, nil); !ok {
			return 0, e
		}
	}
	return int(done), 0
}
Beispiel #7
0
// Map returns a copy of the string s with all its characters modified
// according to the mapping function.
func Map(mapping func(rune int) int, s string) string {
	// In the worst case, the string can grow when mapped, making
	// things unpleasant.  But it's so rare we barge in assuming it's
	// fine.  It could also shrink but that falls out naturally.
	maxbytes := len(s) // length of b
	nbytes := 0        // number of bytes encoded in b
	b := make([]byte, maxbytes)
	for _, c := range s {
		rune := mapping(c)
		wid := 1
		if rune >= utf8.RuneSelf {
			wid = utf8.RuneLen(rune)
		}
		if nbytes+wid > maxbytes {
			// Grow the buffer.
			maxbytes = maxbytes*2 + utf8.UTFMax
			nb := make([]byte, maxbytes)
			for i, c := range b[0:nbytes] {
				nb[i] = c
			}
			b = nb
		}
		nbytes += utf8.EncodeRune(rune, b[nbytes:maxbytes])
	}
	return string(b[0:nbytes])
}
Beispiel #8
0
func TestEntityLength(t *testing.T) {
	// We verify that the length of UTF-8 encoding of each value is <= 1 + len(key).
	// The +1 comes from the leading "&". This property implies that the length of
	// unescaped text is <= the length of escaped text.
	for k, v := range entity {
		if 1+len(k) < utf8.RuneLen(v) {
			t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v))
		}
		if len(k) > longestEntityWithoutSemicolon && k[len(k)-1] != ';' {
			t.Errorf("entity name %s is %d characters, but longestEntityWithoutSemicolon=%d", k, len(k), longestEntityWithoutSemicolon)
		}
	}
	for k, v := range entity2 {
		if 1+len(k) < utf8.RuneLen(v[0])+utf8.RuneLen(v[1]) {
			t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v[0]) + string(v[1]))
		}
	}
}
Beispiel #9
0
func main() {
	fmt.Println("Strings now...")
	for i := 1; i <= 100; i++ {
		for j := 0; j <= i; j++ {
			fmt.Printf("A")
		}
		fmt.Println()
	}

	fmt.Println("Counting chars")
	input := "asSASA ddd dsjkdsjs dk"
	fmt.Printf("Number of chars in %s: %d\n", input, len(input))
	bytes := []byte(input)
	fmt.Printf("Number of bytes in %s: %d\n", input, len(bytes))

	fmt.Println("Now with unicode...")
	input = "aΦx"
	for pos, char := range input {
		fmt.Printf("character '%c' starts at byte position %d\n", char, pos)
	}

	var num_bytes = 0
	for _, char := range input {
		num_bytes += utf8.RuneLen(char)
	}
	fmt.Printf("%s contains %d bytes\n", input, num_bytes)

	fmt.Println("Swapping chars...")
	input = "asSASA ddd dsjkdsjs dk"

	var output = ""
	for pos, char := range input {
		switch pos {
		case 3:
			output = output + "a"
		case 4:
			output = output + "b"
		case 5:
			output = output + "c"
		default:
			output = output + string(char)
		}
	}

	fmt.Println(output)

	fmt.Println("Swapping chars...")
	input = "foobar"
	temp := []byte(input)

	for i, j := 0, len(temp)-1; i < j; i, j = i+1, j-1 {
		temp[i], temp[j] = temp[j], temp[i]
	}

	fmt.Println(string(temp))
}
Beispiel #10
0
func urlquoter(c int, safe string) []byte {
	safe_bytes := strings.Bytes(safe);
	c_bytes := make([]byte, utf8.RuneLen(c));
	utf8.EncodeRune(c, c_bytes);
	if bytes.Index(safe_bytes, c_bytes) != -1 || bytes.Index(always_safe, c_bytes) != -1 {
		return c_bytes;
	}
	else {
		return strings.Bytes(fmt.Sprintf("%%%02X", c));
	}
	panic("unreachable");
}
Beispiel #11
0
/*
Capitalizes the first character of the value.

Example:

	{value|capfirst}

If value is "neste", the output will be "Neste".
*/
func CapFirstFormatter(w io.Writer, formatter string, data ...interface{}) {
	b := getBytes(data...)

	if len(b) > 0 {
		rune, size := utf8.DecodeRune(b)
		rune = unicode.ToUpper(rune)
		capSize := utf8.RuneLen(rune)
		capb := make([]byte, capSize)
		utf8.EncodeRune(capb, rune)
		w.Write(capb)
		w.Write(b[size:])
	}
}
Beispiel #12
0
func (r *DelimReader) Read(p []byte) (n int, err os.Error) {
	bytes_written := 0

	if r.remainder != nil {
		for i := 0; i < len(r.remainder); i++ {
			p[i] = r.remainder[i]
			bytes_written++
		}
		r.remainder = nil
	}

	for bytes_written < len(p) {
		rune, size, err := r.reader.ReadRune()
		if err != nil {
			return bytes_written, err
		}
		for _, value := range r.delimiters {
			if value == rune {
				rune = r.used_delimiter
				size = utf8.RuneLen(rune)
			}
		}
		if bytes_written+size > len(p) {
			// we need to split the rune and hold on to the remainder
			writable := len(p) - bytes_written
			target := make([]byte, size)
			_ = utf8.EncodeRune(target, rune)
			for i := 0; i < writable; i++ {
				p[bytes_written] = target[i]
				bytes_written++
			}
			r.remainder = target[writable:]
		} else {
			target := p[bytes_written : bytes_written+size]
			_ = utf8.EncodeRune(target, rune)
			bytes_written += size
		}
	}
	return bytes_written, nil
}
Beispiel #13
0
// Return all anchored substrings of the given string within the Trie, with a matching set of
// their associated values.
func (p *Trie) AllSubstringsAndValues(s string) (*vector.StringVector, *vector.Vector) {
	sv := new(vector.StringVector)
	vv := new(vector.Vector)

	for pos, rune := range s {
		child, ok := p.children[rune]
		if !ok {
			// return whatever we have so far
			break
		}

		// if this is a leaf node, add the string so far and its value
		if child.leaf {
			sv.Push(s[0 : pos+utf8.RuneLen(rune)])
			vv.Push(child.value)
		}

		p = child
	}

	return sv, vv
}
Beispiel #14
0
// Map returns a copy of the string s with all its characters modified
// according to the mapping function. If mapping returns a negative value, the character is
// dropped from the string with no replacement.
func Map(mapping func(rune) rune, s string) string {
	// In the worst case, the string can grow when mapped, making
	// things unpleasant.  But it's so rare we barge in assuming it's
	// fine.  It could also shrink but that falls out naturally.
	maxbytes := len(s) // length of b
	nbytes := 0        // number of bytes encoded in b
	// The output buffer b is initialized on demand, the first
	// time a character differs.
	var b []byte

	for i, c := range s {
		r := mapping(c)
		if b == nil {
			if r == c {
				continue
			}
			b = make([]byte, maxbytes)
			nbytes = copy(b, s[:i])
		}
		if r >= 0 {
			wid := 1
			if r >= utf8.RuneSelf {
				wid = utf8.RuneLen(r)
			}
			if nbytes+wid > maxbytes {
				// Grow the buffer.
				maxbytes = maxbytes*2 + utf8.UTFMax
				nb := make([]byte, maxbytes)
				copy(nb, b[0:nbytes])
				b = nb
			}
			nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r)
		}
	}
	if b == nil {
		return s
	}
	return string(b[0:nbytes])
}
Beispiel #15
0
func tr(conn *irc.Conn, nick *irc.Nick, args, target string) {
	if args == "" {
		return
	}

	var sourcelang, targetlang, text string
	index := strings.IndexAny(args, "  ") // handle spaces and ideographic spaces (U+3000)
	if index == 5 && args[2] == '|' {
		sourcelang = args[:2]
		targetlang = args[3:5]
		if args[5] == ' ' {
			text = args[6:]
		} else {
			text = args[5+utf8.RuneLen(3000):]
		}
	} else {
		sourcelang = "auto"
		targetlang = "en"
		text = args
	}

	say(conn, target, translate(sourcelang, targetlang, text))
}
Beispiel #16
0
func (s *ss) ReadRune() (rune int, size int, err os.Error) {
	if s.peekRune >= 0 {
		s.count++
		rune = s.peekRune
		size = utf8.RuneLen(rune)
		s.prevRune = rune
		s.peekRune = -1
		return
	}
	if s.atEOF || s.nlIsEnd && s.prevRune == '\n' || s.count >= s.fieldLimit {
		err = os.EOF
		return
	}

	rune, size, err = s.rr.ReadRune()
	if err == nil {
		s.count++
		s.prevRune = rune
	} else if err == os.EOF {
		s.atEOF = true
	}
	return
}
Beispiel #17
0
// cssEscaper escapes HTML and CSS special characters using \<hex>+ escapes.
func cssEscaper(args ...interface{}) string {
	s, _ := stringify(args...)
	var b bytes.Buffer
	written := 0
	for i, r := range s {
		var repl string
		switch r {
		case 0:
			repl = `\0`
		case '\t':
			repl = `\9`
		case '\n':
			repl = `\a`
		case '\f':
			repl = `\c`
		case '\r':
			repl = `\d`
		// Encode HTML specials as hex so the output can be embedded
		// in HTML attributes without further encoding.
		case '"':
			repl = `\22`
		case '&':
			repl = `\26`
		case '\'':
			repl = `\27`
		case '(':
			repl = `\28`
		case ')':
			repl = `\29`
		case '+':
			repl = `\2b`
		case '/':
			repl = `\2f`
		case ':':
			repl = `\3a`
		case ';':
			repl = `\3b`
		case '<':
			repl = `\3c`
		case '>':
			repl = `\3e`
		case '\\':
			repl = `\\`
		case '{':
			repl = `\7b`
		case '}':
			repl = `\7d`
		default:
			continue
		}
		b.WriteString(s[written:i])
		b.WriteString(repl)
		written = i + utf8.RuneLen(r)
		if repl != `\\` && (written == len(s) || isHex(s[written]) || isCSSSpace(s[written])) {
			b.WriteByte(' ')
		}
	}
	if written == 0 {
		return s
	}
	b.WriteString(s[written:])
	return b.String()
}
Beispiel #18
0
// integer; interprets prec but not wid.  Once formatted, result is sent to pad()
// and then flags are cleared.
func (f *fmt) integer(a int64, base uint64, signedness bool, digits string) {
	// precision of 0 and value of 0 means "print nothing"
	if f.precPresent && f.prec == 0 && a == 0 {
		return
	}

	var buf []byte = f.intbuf[0:]
	negative := signedness == signed && a < 0
	if negative {
		a = -a
	}

	// two ways to ask for extra leading zero digits: %.3d or %03d.
	// apparently the first cancels the second.
	prec := 0
	if f.precPresent {
		prec = f.prec
		f.zero = false
	} else if f.zero && f.widPresent && !f.minus && f.wid > 0 {
		prec = f.wid
		if negative || f.plus || f.space {
			prec-- // leave room for sign
		}
	}

	// format a into buf, ending at buf[i].  (printing is easier right-to-left.)
	// a is made into unsigned ua.  we could make things
	// marginally faster by splitting the 32-bit case out into a separate
	// block but it's not worth the duplication, so ua has 64 bits.
	i := len(f.intbuf)
	ua := uint64(a)
	for ua >= base {
		i--
		buf[i] = digits[ua%base]
		ua /= base
	}
	i--
	buf[i] = digits[ua]
	for i > 0 && prec > nByte-i {
		i--
		buf[i] = '0'
	}

	// Various prefixes: 0x, -, etc.
	if f.sharp {
		switch base {
		case 8:
			if buf[i] != '0' {
				i--
				buf[i] = '0'
			}
		case 16:
			i--
			buf[i] = 'x' + digits[10] - 'a'
			i--
			buf[i] = '0'
		}
	}
	if f.unicode {
		i--
		buf[i] = '+'
		i--
		buf[i] = 'U'
	}

	if negative {
		i--
		buf[i] = '-'
	} else if f.plus {
		i--
		buf[i] = '+'
	} else if f.space {
		i--
		buf[i] = ' '
	}

	// If we want a quoted char for %#U, move the data up to make room.
	if f.unicode && f.uniQuote && a >= 0 && a <= unicode.MaxRune && unicode.IsPrint(int(a)) {
		runeWidth := utf8.RuneLen(int(a))
		width := 1 + 1 + runeWidth + 1 // space, quote, rune, quote
		copy(buf[i-width:], buf[i:])   // guaranteed to have enough room.
		i -= width
		// Now put " 'x'" at the end.
		j := len(buf) - width
		buf[j] = ' '
		j++
		buf[j] = '\''
		j++
		utf8.EncodeRune(buf[j:], int(a))
		j += runeWidth
		buf[j] = '\''
	}

	f.pad(buf[i:])
}
Beispiel #19
0
func unreadRune(state *LexerState, reader *bufio.Reader, rune int) {
	unreadBytes(state, reader, utf8.RuneLen(rune))
}