func main() { gen.Init() gen.Repackage("gen_common.go", "common.go", "language") w := gen.NewCodeWriter() defer w.WriteGoFile("tables.go", "language") fmt.Fprintln(w, `import "golang.org/x/text/internal/tag"`) b := newBuilder(w) gen.WriteCLDRVersion(w) b.parseIndices() b.writeType(fromTo{}) b.writeLanguage() b.writeScript() b.writeRegion() b.writeVariant() // TODO: b.writeLocale() b.computeRegionGroups() b.writeLikelyData() b.writeMatchData() b.writeRegionInclusionData() b.writeParents() }
func main() { gen.Init() // Read the CLDR zip file. r := gen.OpenCLDRCoreZip() defer r.Close() d := &cldr.Decoder{} d.SetDirFilter("main", "supplemental") d.SetSectionFilter("localeDisplayNames") data, err := d.DecodeZip(r) if err != nil { log.Fatalf("DecodeZip: %v", err) } w := gen.NewCodeWriter() defer w.WriteGoFile(*outputFile, "display") gen.WriteCLDRVersion(w) b := builder{ w: w, data: data, group: make(map[string]*group), } b.generate() }
func main() { gen.Init() gen.Repackage("gen_common.go", "common.go", "currency") // Read the CLDR zip file. r := gen.OpenCLDRCoreZip() defer r.Close() d := &cldr.Decoder{} d.SetDirFilter("supplemental", "main") d.SetSectionFilter("numbers") data, err := d.DecodeZip(r) if err != nil { log.Fatalf("DecodeZip: %v", err) } w := gen.NewCodeWriter() defer w.WriteGoFile(*outputFile, "currency") fmt.Fprintln(w, `import "golang.org/x/text/internal/tag"`) gen.WriteCLDRVersion(w) b := &builder{} b.genCurrencies(w, data.Supplemental()) b.genSymbols(w, data) }
func main() { gen.Init() rewriteCommon() w := gen.NewCodeWriter() defer w.WriteGoFile("tables.go", "language") b := newBuilder(w) fmt.Fprintf(w, version, cldr.Version) b.parseIndices() b.writeType(fromTo{}) b.writeLanguage() b.writeScript() b.writeRegion() b.writeVariant() // TODO: b.writeLocale() b.writeCurrencies() b.computeRegionGroups() b.writeLikelyData() b.writeMatchData() b.writeRegionInclusionData() b.writeParents() }
func genTables() { chars := parseUCD() verifyProperties(chars) t := triegen.NewTrie("case") for i := range chars { c := &chars[i] makeEntry(c) t.Insert(rune(i), uint64(c.entry)) } w := gen.NewCodeWriter() defer w.WriteGoFile("tables.go", "cases") gen.WriteUnicodeVersion(w) // TODO: write CLDR version after adding a mechanism to detect that the // tables on which the manually created locale-sensitive casing code is // based hasn't changed. w.WriteVar("xorData", string(xorData)) w.WriteVar("exceptions", string(exceptionData)) sz, err := t.Gen(w, triegen.Compact(&sparseCompacter{})) if err != nil { log.Fatal(err) } w.Size += sz }
func main() { gen.Init() const pkg = "number" gen.Repackage("gen_common.go", "common.go", pkg) // Read the CLDR zip file. r := gen.OpenCLDRCoreZip() defer r.Close() d := &cldr.Decoder{} d.SetDirFilter("supplemental", "main") d.SetSectionFilter("numbers", "numberingSystem") data, err := d.DecodeZip(r) if err != nil { log.Fatalf("DecodeZip: %v", err) } w := gen.NewCodeWriter() defer w.WriteGoFile(*outputFile, pkg) fmt.Fprintln(w, `import "golang.org/x/text/internal/stringset"`) gen.WriteCLDRVersion(w) genNumSystem(w, data) genSymbols(w, data) }
func main() { gen.Init() names, counts := parse() appendRepeatNames(names, counts) appendUniqueNames(names, counts) table0, table1 := makeTables() gen.Repackage("gen_bits.go", "bits.go", "runenames") w := gen.NewCodeWriter() w.WriteVar("table0", table0) w.WriteVar("table1", table1) w.WriteConst("data", string(data)) w.WriteGoFile("tables.go", "runenames") }
func writeTables() { propTrie := triegen.NewTrie("derivedProperties") w := gen.NewCodeWriter() defer w.WriteGoFile(*outputFile, "precis") gen.WriteUnicodeVersion(w) // Iterate over all the runes... for i := rune(0); i < unicode.MaxRune; i++ { r := rune(i) if !utf8.ValidRune(r) { continue } e, ok := exceptions[i] p := e.prop switch { case ok: case !unicode.In(r, assigned): p = unassigned case r >= 0x0021 && r <= 0x007e: // Is ASCII 7 p = pValid case unicode.In(r, disallowedRunes, unicode.Cc): p = disallowed case hasCompat(r): p = idDisOrFreePVal case isLetterDigits(r): p = pValid case isIdDisAndFreePVal(r): p = idDisOrFreePVal default: p = disallowed } cat := runeCategory[r] // Don't set category for runes that are disallowed. if p == disallowed { cat = exceptions[r].cat } propTrie.Insert(r, uint64(p)|uint64(cat)) } sz, err := propTrie.Gen(w) if err != nil { log.Fatal(err) } w.Size += sz }
func writeTables() { propTrie := triegen.NewTrie("derivedProperties") w := gen.NewCodeWriter() defer w.WriteGoFile(*outputFile, "precis") gen.WriteUnicodeVersion(w) // Iterate over all the runes... for i := uint32(0); i < unicode.MaxRune; i++ { r := rune(i) if !utf8.ValidRune(r) { continue } p, ok := exceptions[i] switch { case ok: case !unicode.In(r, assigned): p = unassigned case r >= 33 && r <= 126: // Is ASCII 7 p = pValid case r == 0x200C || r == 0x200D: // Is join control p = contextJ case unicode.In(r, disallowedRunes, unicode.Cc): p = disallowed case isHasCompat(r): p = idDis | freePVal case isLetterDigits(r): p = pValid case isIdDisAndFreePVal(r): p = idDis | freePVal default: p = disallowed } propTrie.Insert(r, uint64(p)) } sz, err := propTrie.Gen(w) if err != nil { log.Fatal(err) } w.Size += sz }
func main() { gen.Init() rewriteCommon() // Read the CLDR zip file. r := gen.OpenCLDRCoreZip() defer r.Close() d := &cldr.Decoder{} d.SetDirFilter("supplemental") data, err := d.DecodeZip(r) if err != nil { log.Fatalf("DecodeZip: %v", err) } w := gen.NewCodeWriter() defer w.WriteGoFile(*outputFile, "currency") fmt.Fprintln(w, `import "golang.org/x/text/internal/tag"`) gen.WriteCLDRVersion(w) genCurrencies(w, data.Supplemental()) }
func main() { r := gen.OpenCLDRCoreZip() defer r.Close() d := &cldr.Decoder{} data, err := d.DecodeZip(r) if err != nil { log.Fatalf("DecodeZip: %v", err) } w := gen.NewCodeWriter() defer w.WriteGoFile("tables.go", "internal") // Create parents table. parents := make([]uint16, language.NumCompactTags) for _, loc := range data.Locales() { tag := language.MustParse(loc) index, ok := language.CompactIndex(tag) if !ok { continue } parentIndex := 0 // und for p := tag.Parent(); p != language.Und; p = p.Parent() { if x, ok := language.CompactIndex(p); ok { parentIndex = x break } } parents[index] = uint16(parentIndex) } w.WriteComment(` Parent maps a compact index of a tag to the compact index of the parent of this tag.`) w.WriteVar("Parent", parents) }
func genTables() { if numClass > 0x0F { log.Fatalf("Too many Class constants (%#x > 0x0F).", numClass) } w := gen.NewCodeWriter() defer w.WriteGoFile(*outputFile, "bidi") gen.WriteUnicodeVersion(w) t := triegen.NewTrie("bidi") // Build data about bracket mapping. These bits need to be or-ed with // any other bits. orMask := map[rune]uint64{} xorMap := map[rune]int{} xorMasks := []rune{0} // First value is no-op. parse("BidiBrackets.txt", func(p *ucd.Parser) { r1 := p.Rune(0) r2 := p.Rune(1) xor := r1 ^ r2 if _, ok := xorMap[xor]; !ok { xorMap[xor] = len(xorMasks) xorMasks = append(xorMasks, xor) } entry := uint64(xorMap[xor]) << xorMaskShift switch p.String(2) { case "o": entry |= openMask case "c", "n": default: log.Fatalf("Unknown bracket class %q.", p.String(2)) } orMask[r1] = entry }) w.WriteComment(` xorMasks contains masks to be xor-ed with brackets to get the reverse version.`) w.WriteVar("xorMasks", xorMasks) done := map[rune]bool{} insert := func(r rune, c class) { if !done[r] { t.Insert(r, orMask[r]|uint64(c)) done[r] = true } } // Insert the derived BiDi properties. parse("extracted/DerivedBidiClass.txt", func(p *ucd.Parser) { r := p.Rune(0) class, ok := bidiClass[p.String(1)] if !ok { log.Fatalf("%U: Unknown BiDi class %q", r, p.String(1)) } insert(r, class) }) visitDefaults(insert) // TODO: use sparse blocks. This would reduce table size considerably // from the looks of it. sz, err := t.Gen(w) if err != nil { log.Fatal(err) } w.Size += sz }
func main() { mibs := map[string]bool{} all := []string{} w := gen.NewCodeWriter() defer w.WriteGoFile("tables.go", "charmap") printf := func(s string, a ...interface{}) { fmt.Fprintf(w, s, a...) } printf("import (\n") printf("\t\"golang.org/x/text/encoding\"\n") printf("\t\"golang.org/x/text/encoding/internal/identifier\"\n") printf(")\n\n") for _, e := range encodings { varNames := strings.Split(e.varName, ",") all = append(all, varNames...) varName := varNames[0] switch { case strings.HasPrefix(e.mapping, "http://encoding.spec.whatwg.org/"): e.mapping = getWHATWG(e.mapping) case strings.HasPrefix(e.mapping, "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/"): e.mapping = getUCM(e.mapping) } asciiSuperset, low := strings.HasPrefix(e.mapping, ascii), 0x00 if asciiSuperset { low = 0x80 } lvn := 1 if strings.HasPrefix(varName, "ISO") || strings.HasPrefix(varName, "KOI") { lvn = 3 } lowerVarName := strings.ToLower(varName[:lvn]) + varName[lvn:] printf("// %s is the %s encoding.\n", varName, e.name) if e.comment != "" { printf("//\n// %s\n", e.comment) } printf("var %s encoding.Encoding = &%s\n\nvar %s = charmap{\nname: %q,\n", varName, lowerVarName, lowerVarName, e.name) if mibs[e.mib] { log.Fatalf("MIB type %q declared multiple times.", e.mib) } printf("mib: identifier.%s,\n", e.mib) printf("asciiSuperset: %t,\n", asciiSuperset) printf("low: 0x%02x,\n", low) printf("replacement: 0x%02x,\n", e.replacement) printf("decode: [256]utf8Enc{\n") i, backMapping := 0, map[rune]byte{} for _, c := range e.mapping { if _, ok := backMapping[c]; !ok && c != utf8.RuneError { backMapping[c] = byte(i) } var buf [8]byte n := utf8.EncodeRune(buf[:], c) if n > 3 { panic(fmt.Sprintf("rune %q (%U) is too long", c, c)) } printf("{%d,[3]byte{0x%02x,0x%02x,0x%02x}},", n, buf[0], buf[1], buf[2]) if i%2 == 1 { printf("\n") } i++ } printf("},\n") printf("encode: [256]uint32{\n") encode := make([]uint32, 0, 256) for c, i := range backMapping { encode = append(encode, uint32(i)<<24|uint32(c)) } sort.Sort(byRune(encode)) for len(encode) < cap(encode) { encode = append(encode, encode[len(encode)-1]) } for i, enc := range encode { printf("0x%08x,", enc) if i%8 == 7 { printf("\n") } } printf("},\n}\n") // Add an estimate of the size of a single charmap{} struct value, which // includes two 256 elem arrays of 4 bytes and some extra fields, which // align to 3 uint64s on 64-bit architectures. w.Size += 2*4*256 + 3*8 } // TODO: add proper line breaking. printf("var listAll = []encoding.Encoding{\n%s,\n}\n\n", strings.Join(all, ",\n")) }
func main() { gen.Init() // Read the CLDR zip file. r := gen.OpenCLDRCoreZip() defer r.Close() d := &cldr.Decoder{} data, err := d.DecodeZip(r) if err != nil { log.Fatalf("DecodeZip: %v", err) } w := gen.NewCodeWriter() defer func() { buf := &bytes.Buffer{} if _, err = w.WriteGo(buf, "language"); err != nil { log.Fatalf("Error formatting file index.go: %v", err) } // Since we're generating a table for our own package we need to rewrite // doing the equivalent of go fmt -r 'language.b -> b'. Using // bytes.Replace will do. out := bytes.Replace(buf.Bytes(), []byte("language."), nil, -1) if err := ioutil.WriteFile("index.go", out, 0600); err != nil { log.Fatalf("Could not create file index.go: %v", err) } }() m := map[language.Tag]bool{} for _, lang := range data.Locales() { // We include all locales unconditionally to be consistent with en_US. // We want en_US, even though it has no data associated with it. // TODO: put any of the languages for which no data exists at the end // of the index. This allows all components based on ICU to use that // as the cutoff point. // if x := data.RawLDML(lang); false || // x.LocaleDisplayNames != nil || // x.Characters != nil || // x.Delimiters != nil || // x.Measurement != nil || // x.Dates != nil || // x.Numbers != nil || // x.Units != nil || // x.ListPatterns != nil || // x.Collations != nil || // x.Segmentations != nil || // x.Rbnf != nil || // x.Annotations != nil || // x.Metadata != nil { // TODO: support POSIX natively, albeit non-standard. tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1)) m[tag] = true // } } // Include locales for plural rules, which uses a different structure. for _, plurals := range data.Supplemental().Plurals { for _, rules := range plurals.PluralRules { for _, lang := range strings.Split(rules.Locales, " ") { m[language.Make(lang)] = true } } } var core, special []language.Tag for t := range m { if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" { log.Fatalf("Unexpected extension %v in %v", x, t) } if len(t.Variants()) == 0 && len(t.Extensions()) == 0 { core = append(core, t) } else { special = append(special, t) } } w.WriteComment(` NumCompactTags is the number of common tags. The maximum tag is NumCompactTags-1.`) w.WriteConst("NumCompactTags", len(core)+len(special)) sort.Sort(byAlpha(special)) w.WriteVar("specialTags", special) // TODO: order by frequency? sort.Sort(byAlpha(core)) // Size computations are just an estimate. w.Size += int(reflect.TypeOf(map[uint32]uint16{}).Size()) w.Size += len(core) * 6 // size of uint32 and uint16 fmt.Fprintln(w) fmt.Fprintln(w, "var coreTags = map[uint32]uint16{") fmt.Fprintln(w, "0x0: 0, // und") i := len(special) + 1 // Und and special tags already written. for _, t := range core { if t == language.Und { continue } fmt.Fprint(w.Hash, t, i) b, s, r := t.Raw() fmt.Fprintf(w, "0x%s%s%s: %d, // %s\n", getIndex(b, 3), // 3 is enough as it is guaranteed to be a compact number getIndex(s, 2), getIndex(r, 3), i, t) i++ } fmt.Fprintln(w, "}") }
func main() { gen.Init() // Read the CLDR zip file. r := gen.OpenCLDRCoreZip() defer r.Close() d := &cldr.Decoder{} data, err := d.DecodeZip(r) if err != nil { log.Fatalf("DecodeZip: %v", err) } w := gen.NewCodeWriter() defer func() { buf := &bytes.Buffer{} if _, err = w.WriteGo(buf, "language"); err != nil { log.Fatalf("Error formatting file index.go: %v", err) } // Since we're generating a table for our own package we need to rewrite // doing the equivalent of go fmt -r 'language.b -> b'. Using // bytes.Replace will do. out := bytes.Replace(buf.Bytes(), []byte("language."), nil, -1) if err := ioutil.WriteFile("index.go", out, 0600); err != nil { log.Fatalf("Could not create file index.go: %v", err) } }() m := map[language.Tag]bool{} for _, lang := range data.Locales() { if x := data.RawLDML(lang); false || x.LocaleDisplayNames != nil || x.Characters != nil || x.Delimiters != nil || x.Measurement != nil || x.Dates != nil || x.Numbers != nil || x.Units != nil || x.ListPatterns != nil || x.Collations != nil || x.Segmentations != nil || x.Rbnf != nil || x.Annotations != nil || x.Metadata != nil { // TODO: support POSIX natively, albeit non-standard. tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1)) m[tag] = true } } var core, special []language.Tag for t := range m { if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" { log.Fatalf("Unexpected extension %v in %v", x, t) } if len(t.Variants()) == 0 && len(t.Extensions()) == 0 { core = append(core, t) } else { special = append(special, t) } } w.WriteComment(` NumCompactTags is the number of common tags. The maximum tag is NumCompactTags-1.`) w.WriteConst("NumCompactTags", len(core)+len(special)) sort.Sort(byAlpha(special)) w.WriteVar("specialTags", special) type coreKey struct { base language.Base script language.Script region language.Region } w.WriteType(coreKey{}) // TODO: order by frequency? sort.Sort(byAlpha(core)) // Size computations are just an estimate. w.Size += int(reflect.TypeOf(map[coreKey]uint16{}).Size()) w.Size += len(core) * int(reflect.TypeOf(coreKey{}).Size()+2) // 2 is for uint16 fmt.Fprintln(w, "var coreTags = map[coreKey]uint16{") fmt.Fprintln(w, "coreKey{}: 0, // und") i := len(special) + 1 // Und and special tags already written. for _, t := range core { if t == language.Und { continue } fmt.Fprint(w.Hash, t, i) b, s, r := t.Raw() key := fmt.Sprintf("%#v", coreKey{b, s, r}) key = strings.Replace(key[len("main."):], "language.", "", -1) fmt.Fprintf(w, "%s: %d, // %s\n", key, i, t) i++ } fmt.Fprintln(w, "}") }
func genTables() { t := triegen.NewTrie("idna") ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) { r := p.Rune(0) const cccVirama = 9 if p.Int(ucd.CanonicalCombiningClass) == cccVirama { runes[p.Rune(0)] = viramaModifier } switch { case unicode.In(r, unicode.Mark): runes[r] |= modifier } }) ucd.Parse(gen.OpenUCDFile("extracted/DerivedJoiningType.txt"), func(p *ucd.Parser) { switch v := p.String(1); v { case "L", "D", "T", "R": runes[p.Rune(0)] |= joinType[v] << joinShift } }) ucd.Parse(gen.OpenUnicodeFile("idna", "", "IdnaMappingTable.txt"), func(p *ucd.Parser) { r := p.Rune(0) // The mappings table explicitly defines surrogates as invalid. if !utf8.ValidRune(r) { return } cat := catFromEntry(p) isMapped := cat == mapped || cat == disallowedSTD3Mapped || cat == deviation if !isMapped { // Only include additional category information for non-mapped // runes. The additional information is only used after mapping and // the bits would clash with mapping information. // TODO: it would be possible to inline this data and avoid // additional lookups. This is quite tedious, though, so let's first // see if we need this. cat |= category(runes[r]) } s := string(p.Runes(2)) if s != "" && !isMapped { log.Fatalf("%U: Mapping with non-mapping category %d", r, cat) } t.Insert(r, uint64(makeEntry(r, s))+uint64(cat)) }) w := gen.NewCodeWriter() defer w.WriteGoFile("tables.go", "idna") gen.WriteUnicodeVersion(w) w.WriteVar("mappings", string(mappings)) w.WriteVar("xorData", string(xorData)) sz, err := t.Gen(w, triegen.Compact(&normCompacter{})) if err != nil { log.Fatal(err) } w.Size += sz }