Beispiel #1
0
func ExampleValidRune() {
	valid := 'a'
	invalid := rune(0xfffffff)

	fmt.Println(utf8.ValidRune(valid))
	fmt.Println(utf8.ValidRune(invalid))
	// Output:
	// true
	// false
}
Beispiel #2
0
// keywords returns a slice of searchable tokens extracted
// from the given UserID packet keywords string.
func keywords(key *openpgp.PrimaryKey) []string {
	m := make(map[string]bool)
	for _, uid := range key.UserIDs {
		s := strings.ToLower(uid.Keywords)
		lbr, rbr := strings.Index(s, "<"), strings.LastIndex(s, ">")
		if lbr != -1 && rbr > lbr {
			m[s[lbr+1:rbr]] = true
		}
		if lbr != -1 {
			fields := strings.FieldsFunc(s[:lbr], func(r rune) bool {
				if !utf8.ValidRune(r) {
					return true
				}
				if unicode.IsLetter(r) || unicode.IsNumber(r) {
					return false
				}
				return true
			})
			for _, field := range fields {
				m[field] = true
			}
		}
	}
	var result []string
	for k := range m {
		result = append(result, k)
	}
	return result
}
Beispiel #3
0
func loadTestData() []Test {
	f := openReader(*testdata)
	buffer, err := ioutil.ReadAll(f)
	f.Close()
	Error(err)
	archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
	Error(err)
	tests := []Test{}
	for _, f := range archive.File {
		// Skip the short versions, which are simply duplicates of the long versions.
		if strings.Contains(f.Name, "SHORT") || f.FileInfo().IsDir() {
			continue
		}
		ff, err := f.Open()
		Error(err)
		defer ff.Close()
		input := bufio.NewReader(ff)
		test := Test{name: path.Base(f.Name)}
		for {
			line, err := input.ReadString('\n')
			if err != nil {
				if err == io.EOF {
					break
				}
				log.Fatal(err)
			}
			if len(line) <= 1 || line[0] == '#' {
				if m := versionRe.FindStringSubmatch(line); m != nil {
					if m[1] != unicode.Version {
						log.Printf("warning:%s: version is %s; want %s", f.Name, m[1], unicode.Version)
					}
				}
				continue
			}
			m := testRe.FindStringSubmatch(line)
			if m == nil || len(m) < 3 {
				log.Fatalf(`Failed to parse: "%s" result: %#v`, line, m)
			}
			str := []byte{}
			// In the regression test data (unpaired) surrogates are assigned a weight
			// corresponding to their code point value.  However, utf8.DecodeRune,
			// which is used to compute the implicit weight, assigns FFFD to surrogates.
			// We therefore skip tests with surrogates.  This skips about 35 entries
			// per test.
			valid := true
			for _, split := range strings.Split(m[1], " ") {
				r, err := strconv.ParseUint(split, 16, 64)
				Error(err)
				valid = valid && utf8.ValidRune(rune(r))
				str = append(str, string(rune(r))...)
			}
			if valid {
				test.str = append(test.str, str)
				test.comment = append(test.comment, m[2])
			}
		}
		tests = append(tests, test)
	}
	return tests
}
Beispiel #4
0
func genRune(int64Gen gopter.Gen) gopter.Gen {
	return int64Gen.Map(func(value interface{}) interface{} {
		return rune(value.(int64))
	}).SuchThat(func(v interface{}) bool {
		return utf8.ValidRune(v.(rune))
	})
}
Beispiel #5
0
func (t *Textbox) Tl(u rune, v rune) error {
	//Check if rune is valid
	if !utf8.ValidRune(v) {
		return errors.New("invalid rune.")
	}
	for i, val := range t.canvas {
		for j, w := 0, 0; j < len(val); j += w {
			runeValue, width := utf8.DecodeRune(val[j:])
			if runeValue == u {
				tmp := t.canvas[i][j : j+width]
				replaceWidth := utf8.RuneLen(v)
				if width == replaceWidth {
					// replace rune
					utf8.EncodeRune(tmp, v)
				} else if width > replaceWidth {
					// replace and pad
					utf8.EncodeRune(tmp, v)
					tmp = tmp[replaceWidth:]
					for len(tmp) > 0 {
						replaceWidth = utf8.EncodeRune(tmp, 0x0)
						tmp = tmp[replaceWidth:]
					}
				} else {
					// rune wont fit.
					return errors.New("rune too large.")
				}
			}
			w = width
		}
	}
	return nil
}
func main() {
	var b1 []byte = []byte("안녕하세요")
	fmt.Println(utf8.Valid(b1)) // true: "안녕하세요"는 UTF-8이 맞으므로 true
	var b2 []byte = []byte{0xff, 0xf1, 0xc1}
	fmt.Println(utf8.Valid(b2)) // false: 0xff 0xf1 0xc1은 UTF-8이 아니므로 false

	var r1 rune = '한'
	fmt.Println(utf8.ValidRune(r1)) // true: '한'은 UTF-8이 맞으므로 true
	var r2 rune = 0x11111111
	fmt.Println(utf8.ValidRune(r2)) // false: 0x11111111은 UTF-8이 아니므로 false

	var s1 string = "한글"
	fmt.Println(utf8.ValidString(s1)) // true: "한글"은 UTF-8이 맞으므로 true
	var s2 string = string([]byte{0xff, 0xf1, 0xc1})
	fmt.Println(utf8.ValidString(s2)) // false: 0xff 0xf1 0xc1은 UTF-8이 아니므로 false
}
func getUtf8FistValidChar(str []rune) int {
	for i, c := range str {
		if utf8.ValidRune(c) {
			return i
		}
	}

	return -1
}
Beispiel #8
0
func appendQuotedRuneWith(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte {
	buf = append(buf, quote)
	if !utf8.ValidRune(r) {
		r = utf8.RuneError
	}
	buf = appendEscapedRune(buf, r, utf8.RuneLen(r), quote, ASCIIonly, graphicOnly)
	buf = append(buf, quote)
	return buf
}
Beispiel #9
0
Datei: mail.go Projekt: kaey/mail
func stripNonUTF8(str string) string {
	buf := new(bytes.Buffer)
	for _, r := range str {
		if utf8.ValidRune(r) {
			buf.WriteRune(r)
		}
	}

	return buf.String()
}
Beispiel #10
0
// escape replaces any characters which are not printable with corresponding
// hexadecimal escape sequence (\XX).
func escape(s string) string {
	// Check if a replacement is required.
	extra := 0
	for i := 0; i < len(s); {
		r, size := utf8.DecodeRuneInString(s[i:])
		if utf8.ValidRune(r) && unicode.IsPrint(r) {
			i += size
			continue
		}
		// Two extra bytes are required for each non-printable byte; e.g.
		//    "\n" -> `\0A`
		//    "\x00" -> `\00`
		extra += 2
		i++
	}
	if extra == 0 {
		return s
	}

	// Replace non-printable bytes.
	const hextable = "0123456789ABCDEF"
	buf := make([]byte, len(s)+extra)
	j := 0
	for i := 0; i < len(s); {
		r, size := utf8.DecodeRuneInString(s[i:])
		if utf8.ValidRune(r) && unicode.IsPrint(r) {
			for k := 0; k < size; k++ {
				buf[j+k] = s[i+k]
			}
			i += size
			j += size
			continue
		}
		b := s[i]
		buf[j] = '\\'
		buf[j+1] = hextable[b>>4]
		buf[j+2] = hextable[b&0x0F]
		i++
		j += 3
	}
	return string(buf)
}
Beispiel #11
0
func (p *parser) asciiEscapeToUnicode(bs []byte) rune {
	s := string(bs)
	hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32)
	if err != nil {
		p.bug("Could not parse '%s' as a hexadecimal number, but the "+
			"lexer claims it's OK: %s", s, err)
	}
	if !utf8.ValidRune(rune(hex)) {
		p.panicf("Escaped character '\\u%s' is not valid UTF-8.", s)
	}
	return rune(hex)
}
Beispiel #12
0
// similarEscapeCustomChar converts a SQL:2008 regexp pattern to POSIX style,
// so it can be used by our regexp engine. This version of the function allows
// for a custom escape character.
func similarEscapeCustomChar(pattern string, escapeChar rune) string {
	patternBuilder := make([]rune, 0, utf8.RuneCountInString(pattern))

	inCharClass := false
	afterEscape := false
	numQuotes := 0
	for _, c := range pattern {
		switch {
		case afterEscape:
			// For SUBSTRING patterns
			if c == '"' && !inCharClass {
				if numQuotes%2 == 0 {
					patternBuilder = append(patternBuilder, '(')
				} else {
					patternBuilder = append(patternBuilder, ')')
				}
				numQuotes++
			} else {
				patternBuilder = append(patternBuilder, '\\', c)
			}
			afterEscape = false
		case utf8.ValidRune(escapeChar) && c == escapeChar:
			// SQL99 escape character; do not immediately send to output
			afterEscape = true
		case inCharClass:
			if c == '\\' {
				patternBuilder = append(patternBuilder, '\\')
			}
			patternBuilder = append(patternBuilder, c)
			if c == ']' {
				inCharClass = false
			}
		case c == '[':
			patternBuilder = append(patternBuilder, c)
			inCharClass = true
		case c == '%':
			patternBuilder = append(patternBuilder, '.', '*')
		case c == '_':
			patternBuilder = append(patternBuilder, '.')
		case c == '(':
			// Convert to non-capturing parenthesis
			patternBuilder = append(patternBuilder, '(', '?', ':')
		case c == '\\', c == '.', c == '^', c == '$':
			// Escape these characters because they are NOT
			// metacharacters for SQL-style regexp
			patternBuilder = append(patternBuilder, '\\', c)
		default:
			patternBuilder = append(patternBuilder, c)
		}
	}

	return string(patternBuilder)
}
Beispiel #13
0
func lexLexer(l *Lexer) stateFunc {
	for {
		switch r := l.next(); {
		case utf8.ValidRune(r):
			fmt.Printf("hello rune %v\n", r)
			l.output = append(l.output, r)
			return lexNext
			/*
				case r >= 0 || r <= 1023: //<= BasicLatin.order[0] || r >= BasicLatin.order[1]:
					fmt.Println("hello stateFunc next:", r)
					l.emit(itemBasicLatin)
					return lexNext
			*/
		}
	}
}
Beispiel #14
0
// IndexRune returns the index of the first instance of the Unicode code point
// r, or -1 if rune is not present in s.
// If r is utf8.RuneError, it returns the first instance of any
// invalid UTF-8 byte sequence.
func IndexRune(s string, r rune) int {
	switch {
	case 0 <= r && r < utf8.RuneSelf:
		return IndexByte(s, byte(r))
	case r == utf8.RuneError:
		for i, r := range s {
			if r == utf8.RuneError {
				return i
			}
		}
		return -1
	case !utf8.ValidRune(r):
		return -1
	default:
		return Index(s, string(r))
	}
}
Beispiel #15
0
func writeTables() {
	propTrie := triegen.NewTrie("derivedProperties")
	w := gen.NewCodeWriter()
	defer w.WriteGoFile(*outputFile, "precis")
	gen.WriteUnicodeVersion(w)

	// Iterate over all the runes...
	for i := rune(0); i < unicode.MaxRune; i++ {
		r := rune(i)

		if !utf8.ValidRune(r) {
			continue
		}

		e, ok := exceptions[i]
		p := e.prop
		switch {
		case ok:
		case !unicode.In(r, assigned):
			p = unassigned
		case r >= 0x0021 && r <= 0x007e: // Is ASCII 7
			p = pValid
		case unicode.In(r, disallowedRunes, unicode.Cc):
			p = disallowed
		case hasCompat(r):
			p = idDisOrFreePVal
		case isLetterDigits(r):
			p = pValid
		case isIdDisAndFreePVal(r):
			p = idDisOrFreePVal
		default:
			p = disallowed
		}
		cat := runeCategory[r]
		// Don't set category for runes that are disallowed.
		if p == disallowed {
			cat = exceptions[r].cat
		}
		propTrie.Insert(r, uint64(p)|uint64(cat))
	}
	sz, err := propTrie.Gen(w)
	if err != nil {
		log.Fatal(err)
	}
	w.Size += sz
}
Beispiel #16
0
func scanUnicodeRune(p *parser, n int) scanner {
	p.record(0)
	for i := 0; i < n; i++ {
		if r := p.readByte(); !isHex(r) {
			return p.expectStr("hexadecimal digit")
		}
	}
	s := p.slice(0)
	codepoint, err := strconv.ParseUint(s, 16, 64)
	if err != nil {
		return p.setError(err)
	}
	r := rune(codepoint)
	if !utf8.ValidRune(r) {
		return p.errorScanner("%s is not a valid utf8 rune", s)
	}
	return scanReturnString(p, string(r))
}
Beispiel #17
0
func writeTables() {
	propTrie := triegen.NewTrie("derivedProperties")
	w := gen.NewCodeWriter()
	defer w.WriteGoFile(*outputFile, "precis")
	gen.WriteUnicodeVersion(w)

	// Iterate over all the runes...
	for i := uint32(0); i < unicode.MaxRune; i++ {
		r := rune(i)

		if !utf8.ValidRune(r) {
			continue
		}

		p, ok := exceptions[i]
		switch {
		case ok:
		case !unicode.In(r, assigned):
			p = unassigned
		case r >= 33 && r <= 126: // Is ASCII 7
			p = pValid
		case r == 0x200C || r == 0x200D: // Is join control
			p = contextJ
		case unicode.In(r, disallowedRunes, unicode.Cc):
			p = disallowed
		case isHasCompat(r):
			p = idDis | freePVal
		case isLetterDigits(r):
			p = pValid
		case isIdDisAndFreePVal(r):
			p = idDis | freePVal
		default:
			p = disallowed
		}
		propTrie.Insert(r, uint64(p))
	}
	sz, err := propTrie.Gen(w)
	if err != nil {
		log.Fatal(err)
	}
	w.Size += sz
}
Beispiel #18
0
func (t *Textbox) Fill(u rune) error {
	if !utf8.ValidRune(u) {
		return errors.New("invalid rune.")
	}
	for i, val := range t.canvas {
		replaceWidth := utf8.RuneLen(u)
		for j := 0; j < len(val); j += replaceWidth {
			tmp := t.canvas[i]
			for len(tmp) >= utf8.RuneLen(u) {
				utf8.EncodeRune(tmp, u)
				tmp = tmp[replaceWidth:]
			}
			for len(tmp) > 0 {
				replaceWidth = utf8.EncodeRune(tmp, 0x0)
				tmp = tmp[replaceWidth:]
			}
		}
	}
	return nil
}
Beispiel #19
0
// IndexRune interprets s as a sequence of UTF-8-encoded Unicode code points.
// It returns the byte index of the first occurrence in s of the given rune.
// It returns -1 if rune is not present in s.
// If r is utf8.RuneError, it returns the first instance of any
// invalid UTF-8 byte sequence.
func IndexRune(s []byte, r rune) int {
	switch {
	case 0 <= r && r < utf8.RuneSelf:
		return IndexByte(s, byte(r))
	case r == utf8.RuneError:
		for i := 0; i < len(s); {
			r1, n := utf8.DecodeRune(s[i:])
			if r1 == utf8.RuneError {
				return i
			}
			i += n
		}
		return -1
	case !utf8.ValidRune(r):
		return -1
	default:
		var b [utf8.UTFMax]byte
		n := utf8.EncodeRune(b[:], r)
		return Index(s, b[:n])
	}
}
Beispiel #20
0
func TestUTF7Random(t *testing.T) {
	f := func(b []byte) bool {
		d := UTF7EncodeBytes(b)
		d2, err := UTF7DecodeBytes(d)
		if err != nil {
			t.Error("UTF7 decode bytes return error", err)
			t.FailNow()
		}
		return reflect.DeepEqual(b, d2)
	}
	c := quick.Config{
		Rand: rand.New(rand.NewSource(time.Now().Unix())),
		Values: func(v []reflect.Value, r *rand.Rand) {
			var u rune
			var n, uni int
			var slice []byte
			buf := make([]byte, 4)
			size := r.Intn(4096)
			b := make([]byte, 0, size*4)
			for i := 0; i < size; i++ {
				uni = r.Int() % 0x110000
				u = rune(uni)
				if !utf8.ValidRune(u) {
					u = unicode.ReplacementChar
				}
				n = utf8.EncodeRune(buf, u)
				slice = buf[:n]
				b = append(b, slice...)
			}
			v[0] = reflect.ValueOf(b)
		},
	}
	if err := quick.Check(f, &c); err != nil {
		t.Error("failed for UTF7 blackbox test")
		t.FailNow()
	}
}
Beispiel #21
0
// Parse Go unicode value:
func (ctx *parseContext) parseUnicodeValue(location int, err *Error) (rune, int) {
	/*
		unicode_value    = unicode_char | little_u_value | big_u_value | escaped_char .
		byte_value       = octal_byte_value | hex_byte_value .
		octal_byte_value = `\` octal_digit octal_digit octal_digit .
		hex_byte_value   = `\` "x" hex_digit hex_digit .
		little_u_value   = `\` "u" hex_digit hex_digit hex_digit hex_digit .
		big_u_value      = `\` "U" hex_digit hex_digit hex_digit hex_digit
		                           hex_digit hex_digit hex_digit hex_digit .
					   escaped_char     = `\` ( "a" | "b" | "f" | "n" | "r" | "t" | "v" | `\` | "'" | `"` ) .
	*/
	if location >= len(ctx.str) {
		err.Location = location
		err.Message = "Unexpected end of file: waiting for Unicode character"
		return 0, -1
	}

	if ctx.str[location] == '\\' {
		location++
		if location >= len(ctx.str) {
			err.Location = location
			err.Message = "Unexpected end of file in escape sequence"
			return 0, -1
		}

		if ctx.str[location] == '\\' {
			return '\\', location + 1
		} else if ctx.str[location] == 'a' {
			return '\a', location + 1
		} else if ctx.str[location] == 'b' {
			return '\b', location + 1
		} else if ctx.str[location] == 'f' {
			return '\f', location + 1
		} else if ctx.str[location] == 'n' {
			return '\n', location + 1
		} else if ctx.str[location] == 'r' {
			return '\r', location + 1
		} else if ctx.str[location] == 't' {
			return '\t', location + 1
		} else if ctx.str[location] == 'v' {
			return '\v', location + 1
		} else if ctx.str[location] == '`' {
			return '`', location + 1
		} else if ctx.str[location] == '\'' {
			return '\'', location + 1
		} else if ctx.str[location] == '"' {
			return '"', location + 1
		} else if ctx.str[location] >= '0' && ctx.str[location] < 3 {
			if location+2 >= len(ctx.str) {
				err.Location = location
				err.Message = "Unexpected end of file in escape sequence"
				return 0, -1
			}

			var r rune
			for i := 0; i < 3; i++ {
				if ctx.str[location+i] >= '0' && ctx.str[location+i] <= '7' {
					r = r*8 + rune(ctx.str[location+i]-'0')
				} else {
					err.Location = location
					err.Message = "Invalid character in octal_byte"
					return 0, -1
				}
			}

			return r, location + 3

		} else if ctx.str[location] == 'x' || ctx.str[location] == 'u' || ctx.str[location] == 'U' {
			var l int
			if ctx.str[location] == 'x' {
				l = 2
			} else if ctx.str[location] == 'u' {
				l = 4
			} else {
				l = 8
			}

			if location+l >= len(ctx.str) {
				err.Location = location
				err.Message = "Unexpected end of file in escape sequence"
				return 0, -1
			}

			location++

			var r rune
			for i := 0; i < l; i++ {
				if ctx.str[location+i] >= '0' && ctx.str[location+i] <= '9' {
					r = r*16 + rune(ctx.str[location+i]-'0')
				} else if ctx.str[location+i] >= 'a' && ctx.str[location+i] <= 'f' {
					r = r*16 + rune(ctx.str[location+i]-'a'+10)
				} else if ctx.str[location+i] >= 'A' && ctx.str[location+i] <= 'F' {
					r = r*16 + rune(ctx.str[location+i]-'A'+10)
				} else {
					err.Location = location
					err.Message = "Illegal character in hex code"
					return 0, -1
				}
			}

			if !utf8.ValidRune(r) {
				err.Location = location
				err.Message = "Invalid rune"
				return 0, -1
			}

			return r, location + l
		}

		err.Location = location
		err.Message = "Invalid escaped char"
		return 0, -1
	}

	r, l := utf8.DecodeRune(ctx.str[location:])
	if l <= 0 {
		err.Location = location
		err.Message = "Invalid Unicode character"
		return 0, -1
	}

	return r, location + l
}
Beispiel #22
0
func genTables() {
	t := triegen.NewTrie("idna")

	ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
		r := p.Rune(0)

		const cccVirama = 9
		if p.Int(ucd.CanonicalCombiningClass) == cccVirama {
			runes[p.Rune(0)] = viramaModifier
		}
		switch {
		case unicode.In(r, unicode.Mark):
			runes[r] |= modifier
		}
	})

	ucd.Parse(gen.OpenUCDFile("extracted/DerivedJoiningType.txt"), func(p *ucd.Parser) {
		switch v := p.String(1); v {
		case "L", "D", "T", "R":
			runes[p.Rune(0)] |= joinType[v] << joinShift
		}
	})

	ucd.Parse(gen.OpenUnicodeFile("idna", "", "IdnaMappingTable.txt"), func(p *ucd.Parser) {
		r := p.Rune(0)

		// The mappings table explicitly defines surrogates as invalid.
		if !utf8.ValidRune(r) {
			return
		}

		cat := catFromEntry(p)
		isMapped := cat == mapped || cat == disallowedSTD3Mapped || cat == deviation
		if !isMapped {
			// Only include additional category information for non-mapped
			// runes. The additional information is only used after mapping and
			// the bits would clash with mapping information.
			// TODO: it would be possible to inline this data and avoid
			// additional lookups. This is quite tedious, though, so let's first
			// see if we need this.
			cat |= category(runes[r])
		}

		s := string(p.Runes(2))
		if s != "" && !isMapped {
			log.Fatalf("%U: Mapping with non-mapping category %d", r, cat)
		}
		t.Insert(r, uint64(makeEntry(r, s))+uint64(cat))
	})

	w := gen.NewCodeWriter()
	defer w.WriteGoFile("tables.go", "idna")

	gen.WriteUnicodeVersion(w)

	w.WriteVar("mappings", string(mappings))
	w.WriteVar("xorData", string(xorData))

	sz, err := t.Gen(w, triegen.Compact(&normCompacter{}))
	if err != nil {
		log.Fatal(err)
	}
	w.Size += sz
}
Beispiel #23
0
func (c Char) validate() Char {
	if !utf8.ValidRune(rune(c)) {
		Errorf("invalid char value %U\n", c)
	}
	return c
}
Beispiel #24
0
func testUTF8() {
	b := []byte("Hello, 世界")
	r, size := utf8.DecodeLastRune(b)
	TEQ("", '界', r)
	TEQ("", size, 3)
	b = b[:len(b)-size]
	r, size = utf8.DecodeLastRune(b)
	TEQ("", '世', r)
	TEQ("", size, 3)
	b = b[:len(b)-size]
	r, size = utf8.DecodeLastRune(b)
	TEQ("", ' ', r)
	TEQ("", size, 1)

	//fmt.Println("len(Zi)=", len("字"), hx.CodeInt(`'字'.length;`))

	str := "Hello, 世界"
	r, size = utf8.DecodeLastRuneInString(str)
	TEQ("", '界', r)
	TEQ("", size, 3)
	str = str[:len(str)-size]
	r, size = utf8.DecodeLastRuneInString(str)
	TEQ("", '世', r)
	TEQ("", size, 3)
	str = str[:len(str)-size]
	r, size = utf8.DecodeLastRuneInString(str)
	TEQ("", ' ', r)
	TEQ("", size, 1)

	ru := '世'
	buf := make([]byte, 3)
	n := utf8.EncodeRune(buf, ru)
	TEQ("", n, 3)
	TEQbyteSlice("", buf, []byte{228, 184, 150})

	buf = []byte{228, 184, 150} // 世
	TEQ("", true, utf8.FullRune(buf))
	TEQ("", false, utf8.FullRune(buf[:2]))

	str = "世"
	TEQ("", true, utf8.FullRuneInString(str))
	//if ShowKnownErrors || hx.GetInt("", "'字'.length") == 3 {
	TEQ(""+" NOTE: known error handling incorrect strings on UTF16 platforms", false, utf8.FullRuneInString(str[:2]))
	//}
	buf = []byte("Hello, 世界")
	TEQ("", 13, len(buf))
	TEQ("", 9, utf8.RuneCount(buf))

	str = "Hello, 世界"
	TEQ("", 13, len(str))
	TEQ("", 9, utf8.RuneCountInString(str))

	TEQ("", 1, utf8.RuneLen('a'))
	TEQ("", 3, utf8.RuneLen('界'))

	buf = []byte("a界")
	TEQ("", true, utf8.RuneStart(buf[0]))
	TEQ("", true, utf8.RuneStart(buf[1]))
	TEQ("", false, utf8.RuneStart(buf[2]))

	valid := []byte("Hello, 世界")
	invalid := []byte{0xff, 0xfe, 0xfd}
	TEQ("", true, utf8.Valid(valid))
	TEQ("", false, utf8.Valid(invalid))

	valid_rune := 'a'
	invalid_rune := rune(0xfffffff)
	TEQ("", true, utf8.ValidRune(valid_rune))
	TEQ("", false, utf8.ValidRune(invalid_rune))

	valid_string := "Hello, 世界"
	invalid_string := string([]byte{0xff, 0xfe, 0xfd})
	TEQ("", true, utf8.ValidString(valid_string))
	//if ShowKnownErrors || hx.GetInt("", "'字'.length") == 3 {
	TEQ(""+" NOTE: known error handling incorrect strings on UTF16 platforms", false, utf8.ValidString(invalid_string))
	//}
}
Beispiel #25
0
func testUTF8() {
	b := []byte("Hello, 世界")
	r, size := utf8.DecodeLastRune(b)
	TEQ(tardisgolib.CPos(), '界', r)
	TEQ(tardisgolib.CPos(), size, 3)
	b = b[:len(b)-size]
	r, size = utf8.DecodeLastRune(b)
	TEQ(tardisgolib.CPos(), '世', r)
	TEQ(tardisgolib.CPos(), size, 3)
	b = b[:len(b)-size]
	r, size = utf8.DecodeLastRune(b)
	TEQ(tardisgolib.CPos(), ' ', r)
	TEQ(tardisgolib.CPos(), size, 1)

	str := "Hello, 世界"
	r, size = utf8.DecodeLastRuneInString(str)
	TEQ(tardisgolib.CPos(), '界', r)
	TEQ(tardisgolib.CPos(), size, 3)
	str = str[:len(str)-size]
	r, size = utf8.DecodeLastRuneInString(str)
	TEQ(tardisgolib.CPos(), '世', r)
	TEQ(tardisgolib.CPos(), size, 3)
	str = str[:len(str)-size]
	r, size = utf8.DecodeLastRuneInString(str)
	TEQ(tardisgolib.CPos(), ' ', r)
	TEQ(tardisgolib.CPos(), size, 1)

	ru := '世'
	buf := make([]byte, 3)
	n := utf8.EncodeRune(buf, ru)
	TEQ(tardisgolib.CPos(), n, 3)
	TEQbyteSlice(tardisgolib.CPos(), buf, []byte{228, 184, 150})

	buf = []byte{228, 184, 150} // 世
	TEQ(tardisgolib.CPos(), true, utf8.FullRune(buf))
	TEQ(tardisgolib.CPos(), false, utf8.FullRune(buf[:2]))

	str = "世"
	TEQ(tardisgolib.CPos(), true, utf8.FullRuneInString(str))
	if ShowKnownErrors || tardisgolib.Zilen() == 3 {
		TEQ(tardisgolib.CPos()+" NOTE: known error handling incorrect strings on UTF16 platforms", false, utf8.FullRuneInString(str[:2]))
	}
	buf = []byte("Hello, 世界")
	TEQ(tardisgolib.CPos(), 13, len(buf))
	TEQ(tardisgolib.CPos(), 9, utf8.RuneCount(buf))

	str = "Hello, 世界"
	TEQ(tardisgolib.CPos(), 13, len(str))
	TEQ(tardisgolib.CPos(), 9, utf8.RuneCountInString(str))

	TEQ(tardisgolib.CPos(), 1, utf8.RuneLen('a'))
	TEQ(tardisgolib.CPos(), 3, utf8.RuneLen('界'))

	buf = []byte("a界")
	TEQ(tardisgolib.CPos(), true, utf8.RuneStart(buf[0]))
	TEQ(tardisgolib.CPos(), true, utf8.RuneStart(buf[1]))
	TEQ(tardisgolib.CPos(), false, utf8.RuneStart(buf[2]))

	valid := []byte("Hello, 世界")
	invalid := []byte{0xff, 0xfe, 0xfd}
	TEQ(tardisgolib.CPos(), true, utf8.Valid(valid))
	TEQ(tardisgolib.CPos(), false, utf8.Valid(invalid))

	valid_rune := 'a'
	invalid_rune := rune(0xfffffff)
	TEQ(tardisgolib.CPos(), true, utf8.ValidRune(valid_rune))
	TEQ(tardisgolib.CPos(), false, utf8.ValidRune(invalid_rune))

	valid_string := "Hello, 世界"
	invalid_string := string([]byte{0xff, 0xfe, 0xfd})
	TEQ(tardisgolib.CPos(), true, utf8.ValidString(valid_string))
	if ShowKnownErrors || tardisgolib.Zilen() == 3 {
		TEQ(tardisgolib.CPos()+" NOTE: known error handling incorrect strings on UTF16 platforms", false, utf8.ValidString(invalid_string))
	}
}
Beispiel #26
0
// TestDumpRandom generates a random number of random rows with all data
// types. This data is dumped, inserted, and dumped again. The two dumps
// are compared for exactness. The data from the inserted dump is then
// SELECT'd and compared to the original generated data to ensure it is
// round-trippable.
func TestDumpRandom(t *testing.T) {
	defer leaktest.AfterTest(t)()

	c, err := newCLITest(t, false)
	if err != nil {
		t.Fatal(err)
	}
	defer c.stop(true)

	url, cleanup := sqlutils.PGUrl(t, c.ServingAddr(), "TestDumpRandom", url.User(security.RootUser))
	defer cleanup()

	conn := makeSQLConn(url.String())
	defer conn.Close()

	if err := conn.Exec(`
		CREATE DATABASE d;
		CREATE DATABASE o;
		CREATE TABLE d.t (
			rowid int,
			i int,
			f float,
			d date,
			m timestamp,
			n interval,
			o bool,
			e decimal,
			s string,
			b bytes,
			PRIMARY KEY (rowid, i, f, d, m, n, o, e, s, b)
		);
	`, nil); err != nil {
		t.Fatal(err)
	}

	rnd, seed := randutil.NewPseudoRand()
	t.Logf("random seed: %v", seed)

	start := timeutil.Now()

	for iteration := 0; timeutil.Since(start) < *randomTestTime; iteration++ {
		if err := conn.Exec(`DELETE FROM d.t`, nil); err != nil {
			t.Fatal(err)
		}
		var generatedRows [][]driver.Value
		count := rnd.Int63n(500)
		t.Logf("random iteration %v: %v rows", iteration, count)
		for _i := int64(0); _i < count; _i++ {
			// Generate a random number of random inserts.
			i := rnd.Int63()
			f := rnd.Float64()
			d := time.Unix(0, rnd.Int63()).Round(time.Hour * 24).UTC()
			m := time.Unix(0, rnd.Int63()).Round(time.Microsecond).UTC()
			n := time.Duration(rnd.Int63()).String()
			o := rnd.Intn(2) == 1
			e := strings.TrimRight(inf.NewDec(rnd.Int63(), inf.Scale(rnd.Int31n(20)-10)).String(), ".0")
			sr := make([]byte, rnd.Intn(500))
			if _, err := rnd.Read(sr); err != nil {
				t.Fatal(err)
			}
			s := make([]byte, 0, len(sr))
			for _, b := range sr {
				r := rune(b)
				if !utf8.ValidRune(r) {
					continue
				}
				s = append(s, []byte(string(r))...)
			}
			b := make([]byte, rnd.Intn(500))
			if _, err := rnd.Read(b); err != nil {
				t.Fatal(err)
			}

			vals := []driver.Value{
				_i,
				i,
				f,
				d,
				m,
				[]byte(n), // intervals come out as `[]byte`s
				o,
				[]byte(e), // decimals come out as `[]byte`s
				string(s),
				b,
			}
			if err := conn.Exec("INSERT INTO d.t VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)", vals); err != nil {
				t.Fatal(err)
			}
			generatedRows = append(generatedRows, vals[1:])
		}

		check := func(table string) {
			q := fmt.Sprintf("SELECT i, f, d, m, n, o, e, s, b FROM %s ORDER BY rowid", table)
			nrows, err := conn.Query(q, nil)
			if err != nil {
				t.Fatal(err)
			}
			defer func() {
				if err := nrows.Close(); err != nil {
					t.Fatal(err)
				}
			}()
			for gi, generatedRow := range generatedRows {
				fetched := make([]driver.Value, len(nrows.Columns()))
				if err := nrows.Next(fetched); err != nil {
					t.Fatal(err)
				}

				for i, fetchedVal := range fetched {
					generatedVal := generatedRow[i]
					if t, ok := fetchedVal.(time.Time); ok {
						// dates and timestamps come out with offset zero (but
						// not UTC specifically).
						fetchedVal = t.UTC()
					}
					if !reflect.DeepEqual(fetchedVal, generatedVal) {
						t.Errorf("NOT EQUAL: table %s, row %d, col %d\ngenerated (%T): %v\nselected (%T): %v\n", table, gi, i, generatedVal, generatedVal, fetchedVal, fetchedVal)
					}
				}
				if t.Failed() {
					t.FailNow()
				}
			}
		}

		check("d.t")

		var buf bytes.Buffer
		if err := DumpTable(&buf, conn, "d", "t"); err != nil {
			t.Fatal(err)
		}
		dump := buf.String()
		buf.Reset()

		if err := conn.Exec(`
			SET DATABASE = o;
			DROP TABLE IF EXISTS t;
		`, nil); err != nil {
			t.Fatal(err)
		}
		if err := conn.Exec(dump, nil); err != nil {
			t.Fatal(err)
		}

		check("o.t")

		if err := DumpTable(&buf, conn, "o", "t"); err != nil {
			t.Fatal(err)
		}
		dump2 := buf.String()
		if dump != dump2 {
			t.Fatalf("unmatching dumps:\nFIRST:\n%s\n\nSECOND:\n%s", dump, dump2)
		}
	}
}