Exemplo n.º 1
0
// Encode percent-encodes rawurl, avoiding double encoding.
// It doesn't touch:
// - alphanumeric characters ([0-9a-zA-Z]);
// - percent-encoded characters (%[0-9a-fA-F]{2});
// - excluded characters ([;/?:@&=+$,-_.!~*'()#]).
// Invalid UTF-8 sequences are replaced with U+FFFD.
func Encode(rawurl string) string {
	const hexdigit = "0123456789ABCDEF"
	var buf bytes.Buffer
	i := 0
	for i < len(rawurl) {
		r, rlen := utf8.DecodeRuneInString(rawurl[i:])
		if r >= 0x80 {
			for j, n := i, i+rlen; j < n; j++ {
				b := rawurl[j]
				buf.WriteByte('%')
				buf.WriteByte(hexdigit[(b>>4)&0xf])
				buf.WriteByte(hexdigit[b&0xf])
			}
		} else if r == '%' {
			if i+2 < len(rawurl) &&
				byteutil.IsHexDigit(rawurl[i+1]) &&
				byteutil.IsHexDigit(rawurl[i+2]) {
				buf.WriteByte('%')
				buf.WriteByte(byteutil.ByteToUpper(rawurl[i+1]))
				buf.WriteByte(byteutil.ByteToUpper(rawurl[i+2]))
				i += 2
			} else {
				buf.WriteString("%25")
			}
		} else if !cs4[r] {
			buf.WriteByte('%')
			buf.WriteByte(hexdigit[(r>>4)&0xf])
			buf.WriteByte(hexdigit[r&0xf])
		} else {
			buf.WriteByte(byte(r))
		}
		i += rlen
	}
	return buf.String()
}
Exemplo n.º 2
0
// Decode decodes a percent-encoded URL.
// Invalid percent-encoded sequences are left as is.
// Invalid UTF-8 sequences are replaced with U+FFFD.
func Decode(rawurl string) string {
	var buf bytes.Buffer
	i := 0
	const replacement = "\xEF\xBF\xBD"
outer:
	for i < len(rawurl) {
		r, rlen := utf8.DecodeRuneInString(rawurl[i:])
		if r == '%' && i+2 < len(rawurl) &&
			byteutil.IsHexDigit(rawurl[i+1]) &&
			byteutil.IsHexDigit(rawurl[i+2]) {
			b := byteutil.Unhex(rawurl[i+1])<<4 | byteutil.Unhex(rawurl[i+2])
			if b < 0x80 {
				buf.WriteByte(b)
				i += 3
				continue
			}
			var n int
			if b&0xe0 == 0xc0 {
				n = 1
			} else if b&0xf0 == 0xe0 {
				n = 2
			} else if b&0xf8 == 0xf0 {
				n = 3
			}
			if n == 0 {
				buf.WriteString(replacement)
				i += 3
				continue
			}
			rb := make([]byte, n+1)
			rb[0] = b
			j := i + 3
			for k := 0; k < n; k++ {
				b, j = advance(rawurl, j)
				if j > len(rawurl) || b&0xc0 != 0x80 {
					buf.WriteString(replacement)
					i += 3
					continue outer
				}
				rb[k+1] = b
			}
			r, _ := utf8.DecodeRune(rb)
			buf.WriteRune(r)
			i = j
			continue
		}
		buf.WriteRune(r)
		i += rlen
	}
	return buf.String()
}
Exemplo n.º 3
0
func advance(s string, pos int) (byte, int) {
	if pos >= len(s) {
		return 0, len(s) + 1
	}
	if s[pos] != '%' {
		return s[pos], pos + 1
	}
	if pos+2 < len(s) &&
		byteutil.IsHexDigit(s[pos+1]) &&
		byteutil.IsHexDigit(s[pos+2]) {
		return byteutil.Unhex(s[pos+1])<<4 | byteutil.Unhex(s[pos+2]), pos + 3
	}
	return '%', pos + 1
}