func loadTestData() []Test { f := gen.OpenUnicodeFile("UCA", "", "CollationTest.zip") buffer, err := ioutil.ReadAll(f) f.Close() Error(err) archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer))) Error(err) tests := []Test{} for _, f := range archive.File { // Skip the short versions, which are simply duplicates of the long versions. if strings.Contains(f.Name, "SHORT") || f.FileInfo().IsDir() { continue } ff, err := f.Open() Error(err) defer ff.Close() scanner := bufio.NewScanner(ff) test := Test{name: path.Base(f.Name)} for scanner.Scan() { line := scanner.Text() if len(line) <= 1 || line[0] == '#' { if m := versionRe.FindStringSubmatch(line); m != nil { if m[1] != gen.UnicodeVersion() { log.Printf("warning:%s: version is %s; want %s", f.Name, m[1], gen.UnicodeVersion()) } } continue } m := testRe.FindStringSubmatch(line) if m == nil || len(m) < 3 { log.Fatalf(`Failed to parse: "%s" result: %#v`, line, m) } str := []byte{} // In the regression test data (unpaired) surrogates are assigned a weight // corresponding to their code point value. However, utf8.DecodeRune, // which is used to compute the implicit weight, assigns FFFD to surrogates. // We therefore skip tests with surrogates. This skips about 35 entries // per test. valid := true for _, split := range strings.Split(m[1], " ") { r, err := strconv.ParseUint(split, 16, 64) Error(err) valid = valid && utf8.ValidRune(rune(r)) str = append(str, string(rune(r))...) } if valid { test.str = append(test.str, str) test.comment = append(test.comment, m[2]) } } if scanner.Err() != nil { log.Fatal(scanner.Err()) } tests = append(tests, test) } return tests }
// parseUCA parses a Default Unicode Collation Element Table of the format // specified in http://www.unicode.org/reports/tr10/#File_Format. // It returns the variable top. func parseUCA(builder *build.Builder) { r := gen.OpenUnicodeFile("UCA", "", "allkeys.txt") defer r.Close() input := bufio.NewReader(r) colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`) for i := 1; true; i++ { l, prefix, err := input.ReadLine() if err == io.EOF { break } Error(err) line := string(l) if prefix { log.Fatalf("%d: buffer overflow", i) } if len(line) == 0 || line[0] == '#' { continue } if line[0] == '@' { if strings.HasPrefix(line[1:], "version ") { if v := strings.Split(line[1:], " ")[1]; v != gen.UnicodeVersion() { log.Fatalf("incompatible version %s; want %s", v, gen.UnicodeVersion()) } } } else { // parse entries part := strings.Split(line, " ; ") if len(part) != 2 { log.Fatalf("%d: production rule without ';': %v", i, line) } lhs := []rune{} for _, v := range strings.Split(part[0], " ") { if v != "" { lhs = append(lhs, rune(convHex(i, v))) } } vars := []int{} rhs := [][]int{} for i, m := range colelem.FindAllStringSubmatch(part[1], -1) { if m[1] == "*" { vars = append(vars, i) } elem := []int{} for _, h := range strings.Split(m[2], ".") { elem = append(elem, convHex(i, h)) } rhs = append(rhs, elem) } builder.Add(lhs, rhs, vars) } } }
func main() { gen.Init() args = flag.Args() if !*verbose { // Set vprintf to a no-op. vprintf = func(string, ...interface{}) (int, error) { return 0, nil } } // TODO: create temporary cache directory to load files and create and set // a "cache" option if the user did not specify the UNICODE_DIR environment // variable. This will prevent duplicate downloads and also will enable long // tests, which really need to be run after each generated package. if gen.UnicodeVersion() != unicode.Version { fmt.Printf("Requested Unicode version %s; core unicode version is %s.\n", gen.UnicodeVersion, unicode.Version) // TODO: use collate to compare. Simple comparison will work, though, // until Unicode reaches version 10. To avoid circular dependencies, we // could use the NumericWeighter without using package collate using a // trivial Weighter implementation. if gen.UnicodeVersion() < unicode.Version && !*force { os.Exit(2) } } var ( cldr = generate("unicode/cldr") language = generate("language", cldr) internal = generate("internal", language) norm = generate("unicode/norm") rangetable = generate("unicode/rangetable") cases = generate("cases", norm, language, rangetable) width = generate("width") bidi = generate("unicode/bidi", norm, rangetable) _ = generate("secure/precis", norm, rangetable, cases, width, bidi) _ = generate("encoding/htmlindex", language) _ = generate("currency", cldr, language, internal) _ = generate("internal/number", cldr, language, internal) _ = generate("language/display", cldr, language) _ = generate("collate", norm, cldr, language, rangetable) _ = generate("search", norm, cldr, language, rangetable) ) all.Wait() if hasErrors { fmt.Println("FAIL") os.Exit(1) } vprintf("SUCCESS\n") }
func getVersions() []string { if *versionList == "" { log.Fatal(bootstrapMessage) } versions := strings.Split(*versionList, ",") sort.Strings(versions) // Ensure that at least the current version is included. for _, v := range versions { if v == gen.UnicodeVersion() { return versions } } versions = append(versions, gen.UnicodeVersion()) sort.Strings(versions) return versions }
func main() { gen.Init() args = flag.Args() if !*verbose { // Set vprintf to a no-op. vprintf = func(string, ...interface{}) (int, error) { return 0, nil } } if gen.UnicodeVersion() != unicode.Version { fmt.Printf("Requested Unicode version %s; core unicode version is %s.\n", gen.UnicodeVersion, unicode.Version) // TODO: use collate to compare. Simple comparison will work, though, // until Unicode reaches version 10. To avoid circular dependencies, we // could use the NumericWeighter without using package collate using a // trivial Weighter implementation. if gen.UnicodeVersion() < unicode.Version && !*force { os.Exit(2) } } var ( cldr = generate("cldr") language = generate("language", cldr) internal = generate("internal", language) norm = generate("unicode/norm") _ = generate("unicode/rangetable") _ = generate("width") _ = generate("currency", cldr, language, internal) _ = generate("display", cldr, language) _ = generate("cases", norm) _ = generate("collate", norm, cldr, language) _ = generate("search", norm, cldr, language) ) all.Wait() if hasErrors { fmt.Println("FAIL") os.Exit(1) } vprintf("SUCCESS\n") }
// parseUCA parses a Default Unicode Collation Element Table of the format // specified in http://www.unicode.org/reports/tr10/#File_Format. // It returns the variable top. func parseUCA(builder *build.Builder) { var r io.ReadCloser var err error for _, f := range openArchive().File { if strings.HasSuffix(f.Name, "allkeys_CLDR.txt") { r, err = f.Open() } } if r == nil { log.Fatal("File allkeys_CLDR.txt not found in archive.") } failOnError(err) defer r.Close() scanner := bufio.NewScanner(r) colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`) for i := 1; scanner.Scan(); i++ { line := scanner.Text() if len(line) == 0 || line[0] == '#' { continue } if line[0] == '@' { // parse properties switch { case strings.HasPrefix(line[1:], "version "): a := strings.Split(line[1:], " ") if a[1] != gen.UnicodeVersion() { log.Fatalf("incompatible version %s; want %s", a[1], gen.UnicodeVersion()) } case strings.HasPrefix(line[1:], "backwards "): log.Fatalf("%d: unsupported option backwards", i) default: log.Printf("%d: unknown option %s", i, line[1:]) } } else { // parse entries part := strings.Split(line, " ; ") if len(part) != 2 { log.Fatalf("%d: production rule without ';': %v", i, line) } lhs := []rune{} for _, v := range strings.Split(part[0], " ") { if v == "" { continue } lhs = append(lhs, rune(convHex(i, v))) } var n int var vars []int rhs := [][]int{} for i, m := range colelem.FindAllStringSubmatch(part[1], -1) { n += len(m[0]) elem := []int{} for _, h := range strings.Split(m[2], ".") { elem = append(elem, convHex(i, h)) } if m[1] == "*" { vars = append(vars, i) } rhs = append(rhs, elem) } if len(part[1]) < n+3 || part[1][n+1] != '#' { log.Fatalf("%d: expected comment; found %s", i, part[1][n:]) } if *test { testInput.add(string(lhs)) } failOnError(builder.Add(lhs, rhs, vars)) } } if scanner.Err() != nil { log.Fatal(scanner.Err()) } }
func makeTables() { w := &bytes.Buffer{} size := 0 if *tablelist == "" { return } list := strings.Split(*tablelist, ",") if *tablelist == "all" { list = []string{"recomp", "info"} } // Compute maximum decomposition size. max := 0 for _, c := range chars { if n := len(string(c.forms[FCompatibility].expandedDecomp)); n > max { max = n } } fmt.Fprintln(w, "const (") fmt.Fprintln(w, "\t// Version is the Unicode edition from which the tables are derived.") fmt.Fprintf(w, "\tVersion = %q\n", gen.UnicodeVersion()) fmt.Fprintln(w) fmt.Fprintln(w, "\t// MaxTransformChunkSize indicates the maximum number of bytes that Transform") fmt.Fprintln(w, "\t// may need to write atomically for any Form. Making a destination buffer at") fmt.Fprintln(w, "\t// least this size ensures that Transform can always make progress and that") fmt.Fprintln(w, "\t// the user does not need to grow the buffer on an ErrShortDst.") fmt.Fprintf(w, "\tMaxTransformChunkSize = %d+maxNonStarters*4\n", len(string(0x034F))+max) fmt.Fprintln(w, ")\n") // Print the CCC remap table. size += len(cccMap) fmt.Fprintf(w, "var ccc = [%d]uint8{", len(cccMap)) for i := 0; i < len(cccMap); i++ { if i%8 == 0 { fmt.Fprintln(w) } fmt.Fprintf(w, "%3d, ", cccMap[uint8(i)]) } fmt.Fprintln(w, "\n}\n") if contains(list, "info") { size += printCharInfoTables(w) } if contains(list, "recomp") { // Note that we use 32 bit keys, instead of 64 bit. // This clips the bits of three entries, but we know // this won't cause a collision. The compiler will catch // any changes made to UnicodeData.txt that introduces // a collision. // Note that the recomposition map for NFC and NFKC // are identical. // Recomposition map nrentries := 0 for _, c := range chars { f := c.forms[FCanonical] if !f.isOneWay && len(f.decomp) > 0 { nrentries++ } } sz := nrentries * 8 size += sz fmt.Fprintf(w, "// recompMap: %d bytes (entries only)\n", sz) fmt.Fprintln(w, "var recompMap = map[uint32]rune{") for i, c := range chars { f := c.forms[FCanonical] d := f.decomp if !f.isOneWay && len(d) > 0 { key := uint32(uint16(d[0]))<<16 + uint32(uint16(d[1])) fmt.Fprintf(w, "0x%.8X: 0x%.4X,\n", key, i) } } fmt.Fprintf(w, "}\n\n") } fmt.Fprintf(w, "// Total size of tables: %dKB (%d bytes)\n", (size+512)/1024, size) gen.WriteGoFile("tables.go", "norm", w.Bytes()) }
func main() { gen.Init() args = flag.Args() if !*verbose { // Set vprintf to a no-op. vprintf = func(string, ...interface{}) (int, error) { return 0, nil } } // TODO: create temporary cache directory to load files and create and set // a "cache" option if the user did not specify the UNICODE_DIR environment // variable. This will prevent duplicate downloads and also will enable long // tests, which really need to be run after each generated package. updateCore := *doCore if gen.UnicodeVersion() != unicode.Version { fmt.Printf("Requested Unicode version %s; core unicode version is %s.\n", gen.UnicodeVersion(), unicode.Version) // TODO: use collate to compare. Simple comparison will work, though, // until Unicode reaches version 10. To avoid circular dependencies, we // could use the NumericWeighter without using package collate using a // trivial Weighter implementation. if gen.UnicodeVersion() < unicode.Version && !*force { os.Exit(2) } updateCore = true } var unicode = &dependency{} if updateCore { fmt.Printf("Updating core to version %s...\n", gen.UnicodeVersion()) unicode = generate("unicode") // Test some users of the unicode packages, especially the ones that // keep a mirrored table. These may need to be corrected by hand. generate("regexp", unicode) generate("strconv", unicode) // mimics Unicode table generate("strings", unicode) generate("testing", unicode) // mimics Unicode table } var ( cldr = generate("./unicode/cldr", unicode) language = generate("./language", cldr) internal = generate("./internal", unicode, language) norm = generate("./unicode/norm", unicode) rangetable = generate("./unicode/rangetable", unicode) cases = generate("./cases", unicode, norm, language, rangetable) width = generate("./width", unicode) bidi = generate("./unicode/bidi", unicode, norm, rangetable) _ = generate("./secure/precis", unicode, norm, rangetable, cases, width, bidi) _ = generate("./encoding/htmlindex", unicode, language) _ = generate("./currency", unicode, cldr, language, internal) _ = generate("./internal/number", unicode, cldr, language, internal) _ = generate("./language/display", unicode, cldr, language, internal) _ = generate("./collate", unicode, norm, cldr, language, rangetable) _ = generate("./search", unicode, norm, cldr, language, rangetable) ) if updateCore { copyVendored() generate("vendor/golang_org/x/net/idna", unicode, norm, width, cases) } all.Wait() if hasErrors { fmt.Println("FAIL") os.Exit(1) } vprintf("SUCCESS\n") }