func ExampleValidRune() { valid := 'a' invalid := rune(0xfffffff) fmt.Println(utf8.ValidRune(valid)) fmt.Println(utf8.ValidRune(invalid)) // Output: // true // false }
// keywords returns a slice of searchable tokens extracted // from the given UserID packet keywords string. func keywords(key *openpgp.PrimaryKey) []string { m := make(map[string]bool) for _, uid := range key.UserIDs { s := strings.ToLower(uid.Keywords) lbr, rbr := strings.Index(s, "<"), strings.LastIndex(s, ">") if lbr != -1 && rbr > lbr { m[s[lbr+1:rbr]] = true } if lbr != -1 { fields := strings.FieldsFunc(s[:lbr], func(r rune) bool { if !utf8.ValidRune(r) { return true } if unicode.IsLetter(r) || unicode.IsNumber(r) { return false } return true }) for _, field := range fields { m[field] = true } } } var result []string for k := range m { result = append(result, k) } return result }
func loadTestData() []Test { f := openReader(*testdata) buffer, err := ioutil.ReadAll(f) f.Close() Error(err) archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer))) Error(err) tests := []Test{} for _, f := range archive.File { // Skip the short versions, which are simply duplicates of the long versions. if strings.Contains(f.Name, "SHORT") || f.FileInfo().IsDir() { continue } ff, err := f.Open() Error(err) defer ff.Close() input := bufio.NewReader(ff) test := Test{name: path.Base(f.Name)} for { line, err := input.ReadString('\n') if err != nil { if err == io.EOF { break } log.Fatal(err) } if len(line) <= 1 || line[0] == '#' { if m := versionRe.FindStringSubmatch(line); m != nil { if m[1] != unicode.Version { log.Printf("warning:%s: version is %s; want %s", f.Name, m[1], unicode.Version) } } continue } m := testRe.FindStringSubmatch(line) if m == nil || len(m) < 3 { log.Fatalf(`Failed to parse: "%s" result: %#v`, line, m) } str := []byte{} // In the regression test data (unpaired) surrogates are assigned a weight // corresponding to their code point value. However, utf8.DecodeRune, // which is used to compute the implicit weight, assigns FFFD to surrogates. // We therefore skip tests with surrogates. This skips about 35 entries // per test. valid := true for _, split := range strings.Split(m[1], " ") { r, err := strconv.ParseUint(split, 16, 64) Error(err) valid = valid && utf8.ValidRune(rune(r)) str = append(str, string(rune(r))...) } if valid { test.str = append(test.str, str) test.comment = append(test.comment, m[2]) } } tests = append(tests, test) } return tests }
func genRune(int64Gen gopter.Gen) gopter.Gen { return int64Gen.Map(func(value interface{}) interface{} { return rune(value.(int64)) }).SuchThat(func(v interface{}) bool { return utf8.ValidRune(v.(rune)) }) }
func (t *Textbox) Tl(u rune, v rune) error { //Check if rune is valid if !utf8.ValidRune(v) { return errors.New("invalid rune.") } for i, val := range t.canvas { for j, w := 0, 0; j < len(val); j += w { runeValue, width := utf8.DecodeRune(val[j:]) if runeValue == u { tmp := t.canvas[i][j : j+width] replaceWidth := utf8.RuneLen(v) if width == replaceWidth { // replace rune utf8.EncodeRune(tmp, v) } else if width > replaceWidth { // replace and pad utf8.EncodeRune(tmp, v) tmp = tmp[replaceWidth:] for len(tmp) > 0 { replaceWidth = utf8.EncodeRune(tmp, 0x0) tmp = tmp[replaceWidth:] } } else { // rune wont fit. return errors.New("rune too large.") } } w = width } } return nil }
func main() { var b1 []byte = []byte("안녕하세요") fmt.Println(utf8.Valid(b1)) // true: "안녕하세요"는 UTF-8이 맞으므로 true var b2 []byte = []byte{0xff, 0xf1, 0xc1} fmt.Println(utf8.Valid(b2)) // false: 0xff 0xf1 0xc1은 UTF-8이 아니므로 false var r1 rune = '한' fmt.Println(utf8.ValidRune(r1)) // true: '한'은 UTF-8이 맞으므로 true var r2 rune = 0x11111111 fmt.Println(utf8.ValidRune(r2)) // false: 0x11111111은 UTF-8이 아니므로 false var s1 string = "한글" fmt.Println(utf8.ValidString(s1)) // true: "한글"은 UTF-8이 맞으므로 true var s2 string = string([]byte{0xff, 0xf1, 0xc1}) fmt.Println(utf8.ValidString(s2)) // false: 0xff 0xf1 0xc1은 UTF-8이 아니므로 false }
func getUtf8FistValidChar(str []rune) int { for i, c := range str { if utf8.ValidRune(c) { return i } } return -1 }
func appendQuotedRuneWith(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte { buf = append(buf, quote) if !utf8.ValidRune(r) { r = utf8.RuneError } buf = appendEscapedRune(buf, r, utf8.RuneLen(r), quote, ASCIIonly, graphicOnly) buf = append(buf, quote) return buf }
func stripNonUTF8(str string) string { buf := new(bytes.Buffer) for _, r := range str { if utf8.ValidRune(r) { buf.WriteRune(r) } } return buf.String() }
// escape replaces any characters which are not printable with corresponding // hexadecimal escape sequence (\XX). func escape(s string) string { // Check if a replacement is required. extra := 0 for i := 0; i < len(s); { r, size := utf8.DecodeRuneInString(s[i:]) if utf8.ValidRune(r) && unicode.IsPrint(r) { i += size continue } // Two extra bytes are required for each non-printable byte; e.g. // "\n" -> `\0A` // "\x00" -> `\00` extra += 2 i++ } if extra == 0 { return s } // Replace non-printable bytes. const hextable = "0123456789ABCDEF" buf := make([]byte, len(s)+extra) j := 0 for i := 0; i < len(s); { r, size := utf8.DecodeRuneInString(s[i:]) if utf8.ValidRune(r) && unicode.IsPrint(r) { for k := 0; k < size; k++ { buf[j+k] = s[i+k] } i += size j += size continue } b := s[i] buf[j] = '\\' buf[j+1] = hextable[b>>4] buf[j+2] = hextable[b&0x0F] i++ j += 3 } return string(buf) }
func (p *parser) asciiEscapeToUnicode(bs []byte) rune { s := string(bs) hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32) if err != nil { p.bug("Could not parse '%s' as a hexadecimal number, but the "+ "lexer claims it's OK: %s", s, err) } if !utf8.ValidRune(rune(hex)) { p.panicf("Escaped character '\\u%s' is not valid UTF-8.", s) } return rune(hex) }
// similarEscapeCustomChar converts a SQL:2008 regexp pattern to POSIX style, // so it can be used by our regexp engine. This version of the function allows // for a custom escape character. func similarEscapeCustomChar(pattern string, escapeChar rune) string { patternBuilder := make([]rune, 0, utf8.RuneCountInString(pattern)) inCharClass := false afterEscape := false numQuotes := 0 for _, c := range pattern { switch { case afterEscape: // For SUBSTRING patterns if c == '"' && !inCharClass { if numQuotes%2 == 0 { patternBuilder = append(patternBuilder, '(') } else { patternBuilder = append(patternBuilder, ')') } numQuotes++ } else { patternBuilder = append(patternBuilder, '\\', c) } afterEscape = false case utf8.ValidRune(escapeChar) && c == escapeChar: // SQL99 escape character; do not immediately send to output afterEscape = true case inCharClass: if c == '\\' { patternBuilder = append(patternBuilder, '\\') } patternBuilder = append(patternBuilder, c) if c == ']' { inCharClass = false } case c == '[': patternBuilder = append(patternBuilder, c) inCharClass = true case c == '%': patternBuilder = append(patternBuilder, '.', '*') case c == '_': patternBuilder = append(patternBuilder, '.') case c == '(': // Convert to non-capturing parenthesis patternBuilder = append(patternBuilder, '(', '?', ':') case c == '\\', c == '.', c == '^', c == '$': // Escape these characters because they are NOT // metacharacters for SQL-style regexp patternBuilder = append(patternBuilder, '\\', c) default: patternBuilder = append(patternBuilder, c) } } return string(patternBuilder) }
func lexLexer(l *Lexer) stateFunc { for { switch r := l.next(); { case utf8.ValidRune(r): fmt.Printf("hello rune %v\n", r) l.output = append(l.output, r) return lexNext /* case r >= 0 || r <= 1023: //<= BasicLatin.order[0] || r >= BasicLatin.order[1]: fmt.Println("hello stateFunc next:", r) l.emit(itemBasicLatin) return lexNext */ } } }
// IndexRune returns the index of the first instance of the Unicode code point // r, or -1 if rune is not present in s. // If r is utf8.RuneError, it returns the first instance of any // invalid UTF-8 byte sequence. func IndexRune(s string, r rune) int { switch { case 0 <= r && r < utf8.RuneSelf: return IndexByte(s, byte(r)) case r == utf8.RuneError: for i, r := range s { if r == utf8.RuneError { return i } } return -1 case !utf8.ValidRune(r): return -1 default: return Index(s, string(r)) } }
func writeTables() { propTrie := triegen.NewTrie("derivedProperties") w := gen.NewCodeWriter() defer w.WriteGoFile(*outputFile, "precis") gen.WriteUnicodeVersion(w) // Iterate over all the runes... for i := rune(0); i < unicode.MaxRune; i++ { r := rune(i) if !utf8.ValidRune(r) { continue } e, ok := exceptions[i] p := e.prop switch { case ok: case !unicode.In(r, assigned): p = unassigned case r >= 0x0021 && r <= 0x007e: // Is ASCII 7 p = pValid case unicode.In(r, disallowedRunes, unicode.Cc): p = disallowed case hasCompat(r): p = idDisOrFreePVal case isLetterDigits(r): p = pValid case isIdDisAndFreePVal(r): p = idDisOrFreePVal default: p = disallowed } cat := runeCategory[r] // Don't set category for runes that are disallowed. if p == disallowed { cat = exceptions[r].cat } propTrie.Insert(r, uint64(p)|uint64(cat)) } sz, err := propTrie.Gen(w) if err != nil { log.Fatal(err) } w.Size += sz }
func scanUnicodeRune(p *parser, n int) scanner { p.record(0) for i := 0; i < n; i++ { if r := p.readByte(); !isHex(r) { return p.expectStr("hexadecimal digit") } } s := p.slice(0) codepoint, err := strconv.ParseUint(s, 16, 64) if err != nil { return p.setError(err) } r := rune(codepoint) if !utf8.ValidRune(r) { return p.errorScanner("%s is not a valid utf8 rune", s) } return scanReturnString(p, string(r)) }
func writeTables() { propTrie := triegen.NewTrie("derivedProperties") w := gen.NewCodeWriter() defer w.WriteGoFile(*outputFile, "precis") gen.WriteUnicodeVersion(w) // Iterate over all the runes... for i := uint32(0); i < unicode.MaxRune; i++ { r := rune(i) if !utf8.ValidRune(r) { continue } p, ok := exceptions[i] switch { case ok: case !unicode.In(r, assigned): p = unassigned case r >= 33 && r <= 126: // Is ASCII 7 p = pValid case r == 0x200C || r == 0x200D: // Is join control p = contextJ case unicode.In(r, disallowedRunes, unicode.Cc): p = disallowed case isHasCompat(r): p = idDis | freePVal case isLetterDigits(r): p = pValid case isIdDisAndFreePVal(r): p = idDis | freePVal default: p = disallowed } propTrie.Insert(r, uint64(p)) } sz, err := propTrie.Gen(w) if err != nil { log.Fatal(err) } w.Size += sz }
func (t *Textbox) Fill(u rune) error { if !utf8.ValidRune(u) { return errors.New("invalid rune.") } for i, val := range t.canvas { replaceWidth := utf8.RuneLen(u) for j := 0; j < len(val); j += replaceWidth { tmp := t.canvas[i] for len(tmp) >= utf8.RuneLen(u) { utf8.EncodeRune(tmp, u) tmp = tmp[replaceWidth:] } for len(tmp) > 0 { replaceWidth = utf8.EncodeRune(tmp, 0x0) tmp = tmp[replaceWidth:] } } } return nil }
// IndexRune interprets s as a sequence of UTF-8-encoded Unicode code points. // It returns the byte index of the first occurrence in s of the given rune. // It returns -1 if rune is not present in s. // If r is utf8.RuneError, it returns the first instance of any // invalid UTF-8 byte sequence. func IndexRune(s []byte, r rune) int { switch { case 0 <= r && r < utf8.RuneSelf: return IndexByte(s, byte(r)) case r == utf8.RuneError: for i := 0; i < len(s); { r1, n := utf8.DecodeRune(s[i:]) if r1 == utf8.RuneError { return i } i += n } return -1 case !utf8.ValidRune(r): return -1 default: var b [utf8.UTFMax]byte n := utf8.EncodeRune(b[:], r) return Index(s, b[:n]) } }
func TestUTF7Random(t *testing.T) { f := func(b []byte) bool { d := UTF7EncodeBytes(b) d2, err := UTF7DecodeBytes(d) if err != nil { t.Error("UTF7 decode bytes return error", err) t.FailNow() } return reflect.DeepEqual(b, d2) } c := quick.Config{ Rand: rand.New(rand.NewSource(time.Now().Unix())), Values: func(v []reflect.Value, r *rand.Rand) { var u rune var n, uni int var slice []byte buf := make([]byte, 4) size := r.Intn(4096) b := make([]byte, 0, size*4) for i := 0; i < size; i++ { uni = r.Int() % 0x110000 u = rune(uni) if !utf8.ValidRune(u) { u = unicode.ReplacementChar } n = utf8.EncodeRune(buf, u) slice = buf[:n] b = append(b, slice...) } v[0] = reflect.ValueOf(b) }, } if err := quick.Check(f, &c); err != nil { t.Error("failed for UTF7 blackbox test") t.FailNow() } }
// Parse Go unicode value: func (ctx *parseContext) parseUnicodeValue(location int, err *Error) (rune, int) { /* unicode_value = unicode_char | little_u_value | big_u_value | escaped_char . byte_value = octal_byte_value | hex_byte_value . octal_byte_value = `\` octal_digit octal_digit octal_digit . hex_byte_value = `\` "x" hex_digit hex_digit . little_u_value = `\` "u" hex_digit hex_digit hex_digit hex_digit . big_u_value = `\` "U" hex_digit hex_digit hex_digit hex_digit hex_digit hex_digit hex_digit hex_digit . escaped_char = `\` ( "a" | "b" | "f" | "n" | "r" | "t" | "v" | `\` | "'" | `"` ) . */ if location >= len(ctx.str) { err.Location = location err.Message = "Unexpected end of file: waiting for Unicode character" return 0, -1 } if ctx.str[location] == '\\' { location++ if location >= len(ctx.str) { err.Location = location err.Message = "Unexpected end of file in escape sequence" return 0, -1 } if ctx.str[location] == '\\' { return '\\', location + 1 } else if ctx.str[location] == 'a' { return '\a', location + 1 } else if ctx.str[location] == 'b' { return '\b', location + 1 } else if ctx.str[location] == 'f' { return '\f', location + 1 } else if ctx.str[location] == 'n' { return '\n', location + 1 } else if ctx.str[location] == 'r' { return '\r', location + 1 } else if ctx.str[location] == 't' { return '\t', location + 1 } else if ctx.str[location] == 'v' { return '\v', location + 1 } else if ctx.str[location] == '`' { return '`', location + 1 } else if ctx.str[location] == '\'' { return '\'', location + 1 } else if ctx.str[location] == '"' { return '"', location + 1 } else if ctx.str[location] >= '0' && ctx.str[location] < 3 { if location+2 >= len(ctx.str) { err.Location = location err.Message = "Unexpected end of file in escape sequence" return 0, -1 } var r rune for i := 0; i < 3; i++ { if ctx.str[location+i] >= '0' && ctx.str[location+i] <= '7' { r = r*8 + rune(ctx.str[location+i]-'0') } else { err.Location = location err.Message = "Invalid character in octal_byte" return 0, -1 } } return r, location + 3 } else if ctx.str[location] == 'x' || ctx.str[location] == 'u' || ctx.str[location] == 'U' { var l int if ctx.str[location] == 'x' { l = 2 } else if ctx.str[location] == 'u' { l = 4 } else { l = 8 } if location+l >= len(ctx.str) { err.Location = location err.Message = "Unexpected end of file in escape sequence" return 0, -1 } location++ var r rune for i := 0; i < l; i++ { if ctx.str[location+i] >= '0' && ctx.str[location+i] <= '9' { r = r*16 + rune(ctx.str[location+i]-'0') } else if ctx.str[location+i] >= 'a' && ctx.str[location+i] <= 'f' { r = r*16 + rune(ctx.str[location+i]-'a'+10) } else if ctx.str[location+i] >= 'A' && ctx.str[location+i] <= 'F' { r = r*16 + rune(ctx.str[location+i]-'A'+10) } else { err.Location = location err.Message = "Illegal character in hex code" return 0, -1 } } if !utf8.ValidRune(r) { err.Location = location err.Message = "Invalid rune" return 0, -1 } return r, location + l } err.Location = location err.Message = "Invalid escaped char" return 0, -1 } r, l := utf8.DecodeRune(ctx.str[location:]) if l <= 0 { err.Location = location err.Message = "Invalid Unicode character" return 0, -1 } return r, location + l }
func genTables() { t := triegen.NewTrie("idna") ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) { r := p.Rune(0) const cccVirama = 9 if p.Int(ucd.CanonicalCombiningClass) == cccVirama { runes[p.Rune(0)] = viramaModifier } switch { case unicode.In(r, unicode.Mark): runes[r] |= modifier } }) ucd.Parse(gen.OpenUCDFile("extracted/DerivedJoiningType.txt"), func(p *ucd.Parser) { switch v := p.String(1); v { case "L", "D", "T", "R": runes[p.Rune(0)] |= joinType[v] << joinShift } }) ucd.Parse(gen.OpenUnicodeFile("idna", "", "IdnaMappingTable.txt"), func(p *ucd.Parser) { r := p.Rune(0) // The mappings table explicitly defines surrogates as invalid. if !utf8.ValidRune(r) { return } cat := catFromEntry(p) isMapped := cat == mapped || cat == disallowedSTD3Mapped || cat == deviation if !isMapped { // Only include additional category information for non-mapped // runes. The additional information is only used after mapping and // the bits would clash with mapping information. // TODO: it would be possible to inline this data and avoid // additional lookups. This is quite tedious, though, so let's first // see if we need this. cat |= category(runes[r]) } s := string(p.Runes(2)) if s != "" && !isMapped { log.Fatalf("%U: Mapping with non-mapping category %d", r, cat) } t.Insert(r, uint64(makeEntry(r, s))+uint64(cat)) }) w := gen.NewCodeWriter() defer w.WriteGoFile("tables.go", "idna") gen.WriteUnicodeVersion(w) w.WriteVar("mappings", string(mappings)) w.WriteVar("xorData", string(xorData)) sz, err := t.Gen(w, triegen.Compact(&normCompacter{})) if err != nil { log.Fatal(err) } w.Size += sz }
func (c Char) validate() Char { if !utf8.ValidRune(rune(c)) { Errorf("invalid char value %U\n", c) } return c }
func testUTF8() { b := []byte("Hello, 世界") r, size := utf8.DecodeLastRune(b) TEQ("", '界', r) TEQ("", size, 3) b = b[:len(b)-size] r, size = utf8.DecodeLastRune(b) TEQ("", '世', r) TEQ("", size, 3) b = b[:len(b)-size] r, size = utf8.DecodeLastRune(b) TEQ("", ' ', r) TEQ("", size, 1) //fmt.Println("len(Zi)=", len("字"), hx.CodeInt(`'字'.length;`)) str := "Hello, 世界" r, size = utf8.DecodeLastRuneInString(str) TEQ("", '界', r) TEQ("", size, 3) str = str[:len(str)-size] r, size = utf8.DecodeLastRuneInString(str) TEQ("", '世', r) TEQ("", size, 3) str = str[:len(str)-size] r, size = utf8.DecodeLastRuneInString(str) TEQ("", ' ', r) TEQ("", size, 1) ru := '世' buf := make([]byte, 3) n := utf8.EncodeRune(buf, ru) TEQ("", n, 3) TEQbyteSlice("", buf, []byte{228, 184, 150}) buf = []byte{228, 184, 150} // 世 TEQ("", true, utf8.FullRune(buf)) TEQ("", false, utf8.FullRune(buf[:2])) str = "世" TEQ("", true, utf8.FullRuneInString(str)) //if ShowKnownErrors || hx.GetInt("", "'字'.length") == 3 { TEQ(""+" NOTE: known error handling incorrect strings on UTF16 platforms", false, utf8.FullRuneInString(str[:2])) //} buf = []byte("Hello, 世界") TEQ("", 13, len(buf)) TEQ("", 9, utf8.RuneCount(buf)) str = "Hello, 世界" TEQ("", 13, len(str)) TEQ("", 9, utf8.RuneCountInString(str)) TEQ("", 1, utf8.RuneLen('a')) TEQ("", 3, utf8.RuneLen('界')) buf = []byte("a界") TEQ("", true, utf8.RuneStart(buf[0])) TEQ("", true, utf8.RuneStart(buf[1])) TEQ("", false, utf8.RuneStart(buf[2])) valid := []byte("Hello, 世界") invalid := []byte{0xff, 0xfe, 0xfd} TEQ("", true, utf8.Valid(valid)) TEQ("", false, utf8.Valid(invalid)) valid_rune := 'a' invalid_rune := rune(0xfffffff) TEQ("", true, utf8.ValidRune(valid_rune)) TEQ("", false, utf8.ValidRune(invalid_rune)) valid_string := "Hello, 世界" invalid_string := string([]byte{0xff, 0xfe, 0xfd}) TEQ("", true, utf8.ValidString(valid_string)) //if ShowKnownErrors || hx.GetInt("", "'字'.length") == 3 { TEQ(""+" NOTE: known error handling incorrect strings on UTF16 platforms", false, utf8.ValidString(invalid_string)) //} }
func testUTF8() { b := []byte("Hello, 世界") r, size := utf8.DecodeLastRune(b) TEQ(tardisgolib.CPos(), '界', r) TEQ(tardisgolib.CPos(), size, 3) b = b[:len(b)-size] r, size = utf8.DecodeLastRune(b) TEQ(tardisgolib.CPos(), '世', r) TEQ(tardisgolib.CPos(), size, 3) b = b[:len(b)-size] r, size = utf8.DecodeLastRune(b) TEQ(tardisgolib.CPos(), ' ', r) TEQ(tardisgolib.CPos(), size, 1) str := "Hello, 世界" r, size = utf8.DecodeLastRuneInString(str) TEQ(tardisgolib.CPos(), '界', r) TEQ(tardisgolib.CPos(), size, 3) str = str[:len(str)-size] r, size = utf8.DecodeLastRuneInString(str) TEQ(tardisgolib.CPos(), '世', r) TEQ(tardisgolib.CPos(), size, 3) str = str[:len(str)-size] r, size = utf8.DecodeLastRuneInString(str) TEQ(tardisgolib.CPos(), ' ', r) TEQ(tardisgolib.CPos(), size, 1) ru := '世' buf := make([]byte, 3) n := utf8.EncodeRune(buf, ru) TEQ(tardisgolib.CPos(), n, 3) TEQbyteSlice(tardisgolib.CPos(), buf, []byte{228, 184, 150}) buf = []byte{228, 184, 150} // 世 TEQ(tardisgolib.CPos(), true, utf8.FullRune(buf)) TEQ(tardisgolib.CPos(), false, utf8.FullRune(buf[:2])) str = "世" TEQ(tardisgolib.CPos(), true, utf8.FullRuneInString(str)) if ShowKnownErrors || tardisgolib.Zilen() == 3 { TEQ(tardisgolib.CPos()+" NOTE: known error handling incorrect strings on UTF16 platforms", false, utf8.FullRuneInString(str[:2])) } buf = []byte("Hello, 世界") TEQ(tardisgolib.CPos(), 13, len(buf)) TEQ(tardisgolib.CPos(), 9, utf8.RuneCount(buf)) str = "Hello, 世界" TEQ(tardisgolib.CPos(), 13, len(str)) TEQ(tardisgolib.CPos(), 9, utf8.RuneCountInString(str)) TEQ(tardisgolib.CPos(), 1, utf8.RuneLen('a')) TEQ(tardisgolib.CPos(), 3, utf8.RuneLen('界')) buf = []byte("a界") TEQ(tardisgolib.CPos(), true, utf8.RuneStart(buf[0])) TEQ(tardisgolib.CPos(), true, utf8.RuneStart(buf[1])) TEQ(tardisgolib.CPos(), false, utf8.RuneStart(buf[2])) valid := []byte("Hello, 世界") invalid := []byte{0xff, 0xfe, 0xfd} TEQ(tardisgolib.CPos(), true, utf8.Valid(valid)) TEQ(tardisgolib.CPos(), false, utf8.Valid(invalid)) valid_rune := 'a' invalid_rune := rune(0xfffffff) TEQ(tardisgolib.CPos(), true, utf8.ValidRune(valid_rune)) TEQ(tardisgolib.CPos(), false, utf8.ValidRune(invalid_rune)) valid_string := "Hello, 世界" invalid_string := string([]byte{0xff, 0xfe, 0xfd}) TEQ(tardisgolib.CPos(), true, utf8.ValidString(valid_string)) if ShowKnownErrors || tardisgolib.Zilen() == 3 { TEQ(tardisgolib.CPos()+" NOTE: known error handling incorrect strings on UTF16 platforms", false, utf8.ValidString(invalid_string)) } }
// TestDumpRandom generates a random number of random rows with all data // types. This data is dumped, inserted, and dumped again. The two dumps // are compared for exactness. The data from the inserted dump is then // SELECT'd and compared to the original generated data to ensure it is // round-trippable. func TestDumpRandom(t *testing.T) { defer leaktest.AfterTest(t)() c, err := newCLITest(t, false) if err != nil { t.Fatal(err) } defer c.stop(true) url, cleanup := sqlutils.PGUrl(t, c.ServingAddr(), "TestDumpRandom", url.User(security.RootUser)) defer cleanup() conn := makeSQLConn(url.String()) defer conn.Close() if err := conn.Exec(` CREATE DATABASE d; CREATE DATABASE o; CREATE TABLE d.t ( rowid int, i int, f float, d date, m timestamp, n interval, o bool, e decimal, s string, b bytes, PRIMARY KEY (rowid, i, f, d, m, n, o, e, s, b) ); `, nil); err != nil { t.Fatal(err) } rnd, seed := randutil.NewPseudoRand() t.Logf("random seed: %v", seed) start := timeutil.Now() for iteration := 0; timeutil.Since(start) < *randomTestTime; iteration++ { if err := conn.Exec(`DELETE FROM d.t`, nil); err != nil { t.Fatal(err) } var generatedRows [][]driver.Value count := rnd.Int63n(500) t.Logf("random iteration %v: %v rows", iteration, count) for _i := int64(0); _i < count; _i++ { // Generate a random number of random inserts. i := rnd.Int63() f := rnd.Float64() d := time.Unix(0, rnd.Int63()).Round(time.Hour * 24).UTC() m := time.Unix(0, rnd.Int63()).Round(time.Microsecond).UTC() n := time.Duration(rnd.Int63()).String() o := rnd.Intn(2) == 1 e := strings.TrimRight(inf.NewDec(rnd.Int63(), inf.Scale(rnd.Int31n(20)-10)).String(), ".0") sr := make([]byte, rnd.Intn(500)) if _, err := rnd.Read(sr); err != nil { t.Fatal(err) } s := make([]byte, 0, len(sr)) for _, b := range sr { r := rune(b) if !utf8.ValidRune(r) { continue } s = append(s, []byte(string(r))...) } b := make([]byte, rnd.Intn(500)) if _, err := rnd.Read(b); err != nil { t.Fatal(err) } vals := []driver.Value{ _i, i, f, d, m, []byte(n), // intervals come out as `[]byte`s o, []byte(e), // decimals come out as `[]byte`s string(s), b, } if err := conn.Exec("INSERT INTO d.t VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)", vals); err != nil { t.Fatal(err) } generatedRows = append(generatedRows, vals[1:]) } check := func(table string) { q := fmt.Sprintf("SELECT i, f, d, m, n, o, e, s, b FROM %s ORDER BY rowid", table) nrows, err := conn.Query(q, nil) if err != nil { t.Fatal(err) } defer func() { if err := nrows.Close(); err != nil { t.Fatal(err) } }() for gi, generatedRow := range generatedRows { fetched := make([]driver.Value, len(nrows.Columns())) if err := nrows.Next(fetched); err != nil { t.Fatal(err) } for i, fetchedVal := range fetched { generatedVal := generatedRow[i] if t, ok := fetchedVal.(time.Time); ok { // dates and timestamps come out with offset zero (but // not UTC specifically). fetchedVal = t.UTC() } if !reflect.DeepEqual(fetchedVal, generatedVal) { t.Errorf("NOT EQUAL: table %s, row %d, col %d\ngenerated (%T): %v\nselected (%T): %v\n", table, gi, i, generatedVal, generatedVal, fetchedVal, fetchedVal) } } if t.Failed() { t.FailNow() } } } check("d.t") var buf bytes.Buffer if err := DumpTable(&buf, conn, "d", "t"); err != nil { t.Fatal(err) } dump := buf.String() buf.Reset() if err := conn.Exec(` SET DATABASE = o; DROP TABLE IF EXISTS t; `, nil); err != nil { t.Fatal(err) } if err := conn.Exec(dump, nil); err != nil { t.Fatal(err) } check("o.t") if err := DumpTable(&buf, conn, "o", "t"); err != nil { t.Fatal(err) } dump2 := buf.String() if dump != dump2 { t.Fatalf("unmatching dumps:\nFIRST:\n%s\n\nSECOND:\n%s", dump, dump2) } } }