func genTables() { chars := parseUCD() verifyProperties(chars) t := triegen.NewTrie("case") for i := range chars { c := &chars[i] makeEntry(c) t.Insert(rune(i), uint64(c.entry)) } w := &bytes.Buffer{} sz, err := t.Gen(w, triegen.Compact(&sparseCompacter{})) if err != nil { log.Fatal(err) } gen.WriteUnicodeVersion(w) // TODO: write CLDR version after adding a mechanism to detect that the // tables on which the manually created locale-sensitive casing code is // based hasn't changed. fmt.Fprintf(w, "// xorData: %d bytes\n", len(xorData)) fmt.Fprintf(w, "var xorData = %+q\n\n", string(xorData)) fmt.Fprintf(w, "// exceptions: %d bytes\n", len(exceptionData)) fmt.Fprintf(w, "var exceptions = %q\n\n", string(exceptionData)) sz += len(exceptionData) fmt.Fprintf(w, "// Total table size %d bytes (%dKiB)\n", sz, sz/1024) gen.WriteGoFile("tables.go", "cases", w.Bytes()) }
func main() { t := triegen.NewTrie("width") // wide is the base parse("EastAsianWidth.txt", func(p *ucd.Parser) { if contains(p.String(1), "W", "F") { t.Insert(p.Rune(0), widthTwo) } }) // zero overrides wide parse("extracted/DerivedGeneralCategory.txt", func(p *ucd.Parser) { cat := p.String(1) if cat == "Me" || cat == "Mn" { t.Insert(p.Rune(0), widthZero) } }) // misc overrides for _, v := range overrides { for r := v.from; r <= v.to; r++ { t.Insert(r, encodeWidth(v.width)) } } w := &bytes.Buffer{} gen.WriteUnicodeVersion(w) t.Gen(w) gen.WriteGoFile("tables.go", "runewidth", w.Bytes()) }
func printTestdata() { type lastInfo struct { ccc uint8 nLead uint8 nTrail uint8 f string } last := lastInfo{} w := &bytes.Buffer{} fmt.Fprintf(w, testHeader) for r, c := range chars { f := c.forms[FCanonical] qc, cf, d := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp) f = c.forms[FCompatibility] qck, cfk, dk := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp) s := "" if d == dk && qc == qck && cf == cfk { s = fmt.Sprintf("f(%s, %v, %q)", qc, cf, d) } else { s = fmt.Sprintf("g(%s, %s, %v, %v, %q, %q)", qc, qck, cf, cfk, d, dk) } current := lastInfo{c.ccc, c.nLeadingNonStarters, c.nTrailingNonStarters, s} if last != current { fmt.Fprintf(w, "\t{0x%x, %d, %d, %d, %s},\n", r, c.origCCC, c.nLeadingNonStarters, c.nTrailingNonStarters, s) last = current } } fmt.Fprintln(w, "}") gen.WriteGoFile("data_test.go", "norm", w.Bytes()) }
func main() { gen.Init() // Read the CLDR zip file. r := gen.OpenCLDRCoreZip() defer r.Close() d := &cldr.Decoder{} d.SetDirFilter("main", "supplemental") d.SetSectionFilter("localeDisplayNames") data, err := d.DecodeZip(r) if err != nil { log.Fatalf("DecodeZip: %v", err) } w := &bytes.Buffer{} gen.WriteCLDRVersion(w) b := builder{ w: w, data: data, group: make(map[string]*group), } b.generate() gen.WriteGoFile(*outputFile, "display", w.Bytes()) }
func main() { gen.Init() rewriteCommon() w := &bytes.Buffer{} b := newBuilder(w) fmt.Fprintf(w, version, cldr.Version) b.parseIndices() b.writeType(fromTo{}) b.writeLanguage() b.writeScript() b.writeRegion() b.writeVariant() // TODO: b.writeLocale() b.writeCurrencies() b.computeRegionGroups() b.writeLikelyData() b.writeMatchData() b.writeRegionInclusionData() b.writeParents() fmt.Fprintf(w, "\n// Size: %.1fK (%d bytes); Check: %X\n", float32(b.size)/1024, b.size, b.hash32.Sum32()) gen.WriteGoFile("tables.go", "language", w.Bytes()) }
func main() { gen.Init() b := build.NewBuilder() parseUCA(b) if tables.contains("chars") { parseMain() } parseCollation(b) c, err := b.Build() failOnError(err) if *test { testCollator(collate.NewFromTable(c)) } else { w := &bytes.Buffer{} gen.WriteUnicodeVersion(w) gen.WriteCLDRVersion(w) if tables.contains("collate") { _, err = b.Print(w) failOnError(err) } if tables.contains("chars") { printExemplarCharacters(w) } gen.WriteGoFile("tables.go", *pkg, w.Bytes()) } }
func main() { gen.Init() r := gen.Open("http://www.w3.org/TR", "w3", "encoding/indexes/encodings.json") var groups []group if err := json.NewDecoder(r).Decode(&groups); err != nil { log.Fatalf("Error reading encodings.json: %v", err) } w := &bytes.Buffer{} fmt.Fprintln(w, "type htmlEncoding byte") fmt.Fprintln(w, "const (") for i, g := range groups { for _, e := range g.Encodings { name := consts[e.Name] if name == "" { log.Fatalf("No const defined for %s.", e.Name) } if i == 0 { fmt.Fprintf(w, "%s htmlEncoding = iota\n", name) } else { fmt.Fprintf(w, "%s\n", name) } } } fmt.Fprintln(w, "numEncodings") fmt.Fprint(w, ")\n\n") fmt.Fprintln(w, "var canonical = [numEncodings]string{") for _, g := range groups { for _, e := range g.Encodings { fmt.Fprintf(w, "%q,\n", e.Name) } } fmt.Fprint(w, "}\n\n") fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{") for _, g := range groups { for _, e := range g.Encodings { for _, l := range e.Labels { fmt.Fprintf(w, "%q: %s,\n", l, consts[e.Name]) } } } fmt.Fprint(w, "}\n\n") var tags []string fmt.Fprintln(w, "var localeMap = []htmlEncoding{") for _, loc := range locales { tags = append(tags, loc.tag) fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag) } fmt.Fprint(w, "}\n\n") fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " ")) gen.WriteGoFile("tables.go", "htmlindex", w.Bytes()) }
func genTests() { w := &bytes.Buffer{} fmt.Fprintf(w, "\nvar mapRunes = map[rune]struct{r rune; e elem}{\n") getWidthData(func(r rune, tag elem, alt rune) { if alt != 0 { fmt.Fprintf(w, "\t0x%X: {0x%X, 0x%X},\n", r, alt, tag) } }) fmt.Fprintln(w, "}") gen.WriteGoFile("runes_test.go", "width", w.Bytes()) }
func rewriteCommon() { // Generate common.go src, err := ioutil.ReadFile("gen_common.go") failOnError(err) const toDelete = "// +build ignore\n\npackage main\n\n" i := bytes.Index(src, []byte(toDelete)) if i < 0 { log.Fatalf("could not find %q in gen_common.go", toDelete) } w := &bytes.Buffer{} w.Write(src[i+len(toDelete):]) gen.WriteGoFile("common.go", "language", w.Bytes()) }
// repackage rewrites a file from belonging to package main to belonging to // package width. func repackage(inFile, outFile string) { src, err := ioutil.ReadFile(inFile) if err != nil { log.Fatalf("reading %s: %v", inFile, err) } const toDelete = "package main\n\n" i := bytes.Index(src, []byte(toDelete)) if i < 0 { log.Fatalf("Could not find %q in gen_trieval.go", toDelete) } w := &bytes.Buffer{} w.Write(src[i+len(toDelete):]) gen.WriteGoFile(outFile, "width", w.Bytes()) }
func genTrieval() { src, err := ioutil.ReadFile("gen_trieval.go") if err != nil { log.Fatalf("reading gen_trieval.go: %v", err) } const toDelete = "// +build ignore\n\npackage main\n\n" i := bytes.Index(src, []byte(toDelete)) if i < 0 { log.Fatalf("could not find %q in gen_trieval.go", toDelete) } w := &bytes.Buffer{} w.Write(src[i+len(toDelete):]) gen.WriteGoFile("trieval.go", "cases", w.Bytes()) }
func main() { gen.Init() versions := getVersions() w := &bytes.Buffer{} fmt.Fprintf(w, "//go:generate go run gen.go --versions=%s\n\n", strings.Join(versions, ",")) fmt.Fprintf(w, "import \"unicode\"\n\n") vstr := func(s string) string { return strings.Replace(s, ".", "_", -1) } fmt.Fprintf(w, "var assigned = map[string]*unicode.RangeTable{\n") for _, v := range versions { fmt.Fprintf(w, "\t%q: assigned%s,\n", v, vstr(v)) } fmt.Fprintf(w, "}\n\n") var size int for _, v := range versions { assigned := []rune{} r := gen.Open("http://www.unicode.org/Public/", "", v+"/ucd/UnicodeData.txt") ucd.Parse(r, func(p *ucd.Parser) { assigned = append(assigned, p.Rune(0)) }) rt := rangetable.New(assigned...) sz := int(reflect.TypeOf(unicode.RangeTable{}).Size()) sz += int(reflect.TypeOf(unicode.Range16{}).Size()) * len(rt.R16) sz += int(reflect.TypeOf(unicode.Range32{}).Size()) * len(rt.R32) fmt.Fprintf(w, "// size %d bytes (%d KiB)\n", sz, sz/1024) fmt.Fprintf(w, "var assigned%s = ", vstr(v)) print(w, rt) size += sz } fmt.Fprintf(w, "// Total size %d bytes (%d KiB)\n", size, size/1024) gen.WriteGoFile("tables.go", "rangetable", w.Bytes()) }
func genTables() { t := triegen.NewTrie("width") // fold and inverse mappings. See mapComment for a description of the format // of each entry. Add dummy value to make an index of 0 mean no mapping. inverse := [][4]byte{{}} mapping := map[[4]byte]int{[4]byte{}: 0} getWidthData(func(r rune, tag elem, alt rune) { idx := 0 if alt != 0 { var buf [4]byte buf[0] = byte(utf8.EncodeRune(buf[1:], alt)) s := string(r) buf[buf[0]] ^= s[len(s)-1] var ok bool if idx, ok = mapping[buf]; !ok { idx = len(mapping) if idx > math.MaxUint8 { log.Fatalf("Index %d does not fit in a byte.", idx) } mapping[buf] = idx inverse = append(inverse, buf) } } t.Insert(r, uint64(tag|elem(idx))) }) w := &bytes.Buffer{} gen.WriteUnicodeVersion(w) sz, err := t.Gen(w) if err != nil { log.Fatal(err) } sz += writeMappings(w, inverse) fmt.Fprintf(w, "// Total table size %d bytes (%dKiB)\n", sz, sz/1024) gen.WriteGoFile(*outputFile, "width", w.Bytes()) }
func main() { flag.Parse() r := gen.OpenCLDRCoreZip() buffer, err := ioutil.ReadAll(r) if err != nil { log.Fatal("Could not read zip file") } r.Close() z, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer))) if err != nil { log.Fatalf("Could not read zip archive: %v", err) } var buf bytes.Buffer version := gen.CLDRVersion() for _, dtd := range files { for _, f := range z.File { if strings.HasSuffix(f.Name, dtd.file+".dtd") { r, err := f.Open() failOnError(err) b := makeBuilder(&buf, dtd) b.parseDTD(r) b.resolve(b.index[dtd.top[0]]) b.write() if b.version != "" && version != b.version { println(f.Name) log.Fatalf("main: inconsistent versions: found %s; want %s", b.version, version) } break } } } fmt.Fprintln(&buf, "// Version is the version of CLDR from which the XML definitions are generated.") fmt.Fprintf(&buf, "const Version = %q\n", version) gen.WriteGoFile(*outputFile, "cldr", buf.Bytes()) }
func genTablesTest() { w := &bytes.Buffer{} fmt.Fprintln(w, "var (") printProperties(w, "DerivedCoreProperties.txt", "Case_Ignorable", verifyIgnore) // We discard the output as we know we have perfect functions. We run them // just to verify the properties are correct. n := printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Cased", verifyCased) n += printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Lowercase", verifyLower) n += printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Uppercase", verifyUpper) if n > 0 { log.Fatalf("One of the discarded properties does not have a perfect filter.") } // <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)? fmt.Fprintln(w, "\tspecial = map[rune]struct{ toLower, toTitle, toUpper string }{") parse("SpecialCasing.txt", func(p *ucd.Parser) { // Skip conditional entries. if p.String(4) != "" { return } r := p.Rune(0) fmt.Fprintf(w, "\t\t0x%04x: {%q, %q, %q},\n", r, string(p.Runes(1)), string(p.Runes(2)), string(p.Runes(3))) }) fmt.Fprint(w, "\t}\n\n") // Break property notBreak := map[rune]bool{} parse("auxiliary/WordBreakProperty.txt", func(p *ucd.Parser) { switch p.String(1) { case "Extend", "Format", "MidLetter", "MidNumLet", "Single_Quote", "ALetter", "Hebrew_Letter", "Numeric", "ExtendNumLet": notBreak[p.Rune(0)] = true } }) fmt.Fprintln(w, "\tbreakProp = []struct{ lo, hi rune }{") inBreak := false for r := rune(0); r <= lastRuneForTesting; r++ { if isBreak := !notBreak[r]; isBreak != inBreak { if isBreak { fmt.Fprintf(w, "\t\t{0x%x, ", r) } else { fmt.Fprintf(w, "0x%x},\n", r-1) } inBreak = isBreak } } if inBreak { fmt.Fprintf(w, "0x%x},\n", lastRuneForTesting) } fmt.Fprint(w, "\t}\n\n") // Word break test // Filter out all samples that do not contain cased characters. cased := map[rune]bool{} parse("DerivedCoreProperties.txt", func(p *ucd.Parser) { if p.String(1) == "Cased" { cased[p.Rune(0)] = true } }) fmt.Fprintln(w, "\tbreakTest = []string{") parse("auxiliary/WordBreakTest.txt", func(p *ucd.Parser) { c := strings.Split(p.String(0), " ") const sep = '|' numCased := 0 test := "" for ; len(c) >= 2; c = c[2:] { if c[0] == "รท" && test != "" { test += string(sep) } i, err := strconv.ParseUint(c[1], 16, 32) r := rune(i) if err != nil { log.Fatalf("Invalid rune %q.", c[1]) } if r == sep { log.Fatalf("Separator %q not allowed in test data. Pick another one.", sep) } if cased[r] { numCased++ } test += string(r) } if numCased > 1 { fmt.Fprintf(w, "\t\t%q,\n", test) } }) fmt.Fprintln(w, "\t}") fmt.Fprintln(w, ")") gen.WriteGoFile("tables_test.go", "cases", w.Bytes()) }
func makeTables() { w := &bytes.Buffer{} size := 0 if *tablelist == "" { return } list := strings.Split(*tablelist, ",") if *tablelist == "all" { list = []string{"recomp", "info"} } // Compute maximum decomposition size. max := 0 for _, c := range chars { if n := len(string(c.forms[FCompatibility].expandedDecomp)); n > max { max = n } } fmt.Fprintln(w, "const (") fmt.Fprintln(w, "\t// Version is the Unicode edition from which the tables are derived.") fmt.Fprintf(w, "\tVersion = %q\n", gen.UnicodeVersion()) fmt.Fprintln(w) fmt.Fprintln(w, "\t// MaxTransformChunkSize indicates the maximum number of bytes that Transform") fmt.Fprintln(w, "\t// may need to write atomically for any Form. Making a destination buffer at") fmt.Fprintln(w, "\t// least this size ensures that Transform can always make progress and that") fmt.Fprintln(w, "\t// the user does not need to grow the buffer on an ErrShortDst.") fmt.Fprintf(w, "\tMaxTransformChunkSize = %d+maxNonStarters*4\n", len(string(0x034F))+max) fmt.Fprintln(w, ")\n") // Print the CCC remap table. size += len(cccMap) fmt.Fprintf(w, "var ccc = [%d]uint8{", len(cccMap)) for i := 0; i < len(cccMap); i++ { if i%8 == 0 { fmt.Fprintln(w) } fmt.Fprintf(w, "%3d, ", cccMap[uint8(i)]) } fmt.Fprintln(w, "\n}\n") if contains(list, "info") { size += printCharInfoTables(w) } if contains(list, "recomp") { // Note that we use 32 bit keys, instead of 64 bit. // This clips the bits of three entries, but we know // this won't cause a collision. The compiler will catch // any changes made to UnicodeData.txt that introduces // a collision. // Note that the recomposition map for NFC and NFKC // are identical. // Recomposition map nrentries := 0 for _, c := range chars { f := c.forms[FCanonical] if !f.isOneWay && len(f.decomp) > 0 { nrentries++ } } sz := nrentries * 8 size += sz fmt.Fprintf(w, "// recompMap: %d bytes (entries only)\n", sz) fmt.Fprintln(w, "var recompMap = map[uint32]rune{") for i, c := range chars { f := c.forms[FCanonical] d := f.decomp if !f.isOneWay && len(d) > 0 { key := uint32(uint16(d[0]))<<16 + uint32(uint16(d[1])) fmt.Fprintf(w, "0x%.8X: 0x%.4X,\n", key, i) } } fmt.Fprintf(w, "}\n\n") } fmt.Fprintf(w, "// Total size of tables: %dKB (%d bytes)\n", (size+512)/1024, size) gen.WriteGoFile("tables.go", "norm", w.Bytes()) }
func main() { r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml") reg := ®istry{} if err := xml.NewDecoder(r).Decode(®); err != nil && err != io.EOF { log.Fatalf("Error decoding charset registry: %v", err) } if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" { log.Fatalf("Unexpected ID %s", reg.Registry[0].ID) } w := &bytes.Buffer{} fmt.Fprintf(w, "const (\n") for _, rec := range reg.Registry[0].Record { constName := "" for _, a := range rec.Alias { if strings.HasPrefix(a, "cs") && strings.IndexByte(a, '-') == -1 { // Some of the constant definitions have comments in them. Strip those. constName = strings.Title(strings.SplitN(a[2:], "\n", 2)[0]) } } if constName == "" { switch rec.MIB { case "2085": constName = "HZGB2312" // Not listed as alias for some reason. default: log.Fatalf("No cs alias defined for %s.", rec.MIB) } } if rec.MIME != "" { rec.MIME = fmt.Sprintf(" (MIME: %s)", rec.MIME) } fmt.Fprintf(w, "// %s is the MIB identifier with IANA name %s%s.\n//\n", constName, rec.Name, rec.MIME) if len(rec.Desc.Data) > 0 { fmt.Fprint(w, "// ") d := xml.NewDecoder(strings.NewReader(rec.Desc.Data)) inElem := true attr := "" for { t, err := d.Token() if err != nil { if err != io.EOF { log.Fatal(err) } break } switch x := t.(type) { case xml.CharData: attr = "" // Don't need attribute info. a := bytes.Split([]byte(x), []byte("\n")) for i, b := range a { if b = bytes.TrimSpace(b); len(b) != 0 { if !inElem && i > 0 { fmt.Fprint(w, "\n// ") } inElem = false fmt.Fprintf(w, "%s ", string(b)) } } case xml.StartElement: if x.Name.Local == "xref" { inElem = true use := false for _, a := range x.Attr { if a.Name.Local == "type" { use = use || a.Value != "person" } if a.Name.Local == "data" && use { attr = a.Value + " " } } } case xml.EndElement: inElem = false fmt.Fprint(w, attr) } } fmt.Fprint(w, "\n") } for _, x := range rec.Xref { switch x.Type { case "rfc": fmt.Fprintf(w, "// Reference: %s\n", strings.ToUpper(x.Data)) case "uri": fmt.Fprintf(w, "// Reference: %s\n", x.Data) } } fmt.Fprintf(w, "%s MIB = %s\n", constName, rec.MIB) fmt.Fprintln(w) } fmt.Fprintln(w, ")") gen.WriteGoFile("mib.go", "identifier", w.Bytes()) }