func genTables() { chars := parseUCD() verifyProperties(chars) t := triegen.NewTrie("case") for i := range chars { c := &chars[i] makeEntry(c) t.Insert(rune(i), uint64(c.entry)) } w := &bytes.Buffer{} sz, err := t.Gen(w, triegen.Compact(&sparseCompacter{})) if err != nil { log.Fatal(err) } gen.WriteUnicodeVersion(w) // TODO: write CLDR version after adding a mechanism to detect that the // tables on which the manually created locale-sensitive casing code is // based hasn't changed. fmt.Fprintf(w, "// xorData: %d bytes\n", len(xorData)) fmt.Fprintf(w, "var xorData = %+q\n\n", string(xorData)) fmt.Fprintf(w, "// exceptions: %d bytes\n", len(exceptionData)) fmt.Fprintf(w, "var exceptions = %q\n\n", string(exceptionData)) sz += len(exceptionData) fmt.Fprintf(w, "// Total table size %d bytes (%dKiB)\n", sz, sz/1024) gen.WriteGoFile("tables.go", "cases", w.Bytes()) }
func main() { gen.Init() b := build.NewBuilder() parseUCA(b) if tables.contains("chars") { parseMain() } parseCollation(b) c, err := b.Build() failOnError(err) if *test { testCollator(collate.NewFromTable(c)) } else { w := &bytes.Buffer{} gen.WriteUnicodeVersion(w) gen.WriteCLDRVersion(w) if tables.contains("collate") { _, err = b.Print(w) failOnError(err) } if tables.contains("chars") { printExemplarCharacters(w) } gen.WriteGoFile("tables.go", *pkg, w.Bytes()) } }
func genTables() { chars := parseUCD() verifyProperties(chars) t := triegen.NewTrie("case") for i := range chars { c := &chars[i] makeEntry(c) t.Insert(rune(i), uint64(c.entry)) } w := gen.NewCodeWriter() defer w.WriteGoFile("tables.go", "cases") gen.WriteUnicodeVersion(w) // TODO: write CLDR version after adding a mechanism to detect that the // tables on which the manually created locale-sensitive casing code is // based hasn't changed. w.WriteVar("xorData", string(xorData)) w.WriteVar("exceptions", string(exceptionData)) sz, err := t.Gen(w, triegen.Compact(&sparseCompacter{})) if err != nil { log.Fatal(err) } w.Size += sz }
func main() { t := triegen.NewTrie("width") // wide is the base parse("EastAsianWidth.txt", func(p *ucd.Parser) { if contains(p.String(1), "W", "F") { t.Insert(p.Rune(0), widthTwo) } }) // zero overrides wide parse("extracted/DerivedGeneralCategory.txt", func(p *ucd.Parser) { cat := p.String(1) if cat == "Me" || cat == "Mn" { t.Insert(p.Rune(0), widthZero) } }) // misc overrides for _, v := range overrides { for r := v.from; r <= v.to; r++ { t.Insert(r, encodeWidth(v.width)) } } w := &bytes.Buffer{} gen.WriteUnicodeVersion(w) t.Gen(w) gen.WriteGoFile("tables.go", "runewidth", w.Bytes()) }
func writeTables() { propTrie := triegen.NewTrie("derivedProperties") w := gen.NewCodeWriter() defer w.WriteGoFile(*outputFile, "precis") gen.WriteUnicodeVersion(w) // Iterate over all the runes... for i := rune(0); i < unicode.MaxRune; i++ { r := rune(i) if !utf8.ValidRune(r) { continue } e, ok := exceptions[i] p := e.prop switch { case ok: case !unicode.In(r, assigned): p = unassigned case r >= 0x0021 && r <= 0x007e: // Is ASCII 7 p = pValid case unicode.In(r, disallowedRunes, unicode.Cc): p = disallowed case hasCompat(r): p = idDisOrFreePVal case isLetterDigits(r): p = pValid case isIdDisAndFreePVal(r): p = idDisOrFreePVal default: p = disallowed } cat := runeCategory[r] // Don't set category for runes that are disallowed. if p == disallowed { cat = exceptions[r].cat } propTrie.Insert(r, uint64(p)|uint64(cat)) } sz, err := propTrie.Gen(w) if err != nil { log.Fatal(err) } w.Size += sz }
func writeTables() { propTrie := triegen.NewTrie("derivedProperties") w := gen.NewCodeWriter() defer w.WriteGoFile(*outputFile, "precis") gen.WriteUnicodeVersion(w) // Iterate over all the runes... for i := uint32(0); i < unicode.MaxRune; i++ { r := rune(i) if !utf8.ValidRune(r) { continue } p, ok := exceptions[i] switch { case ok: case !unicode.In(r, assigned): p = unassigned case r >= 33 && r <= 126: // Is ASCII 7 p = pValid case r == 0x200C || r == 0x200D: // Is join control p = contextJ case unicode.In(r, disallowedRunes, unicode.Cc): p = disallowed case isHasCompat(r): p = idDis | freePVal case isLetterDigits(r): p = pValid case isIdDisAndFreePVal(r): p = idDis | freePVal default: p = disallowed } propTrie.Insert(r, uint64(p)) } sz, err := propTrie.Gen(w) if err != nil { log.Fatal(err) } w.Size += sz }
func genTables() { t := triegen.NewTrie("width") // fold and inverse mappings. See mapComment for a description of the format // of each entry. Add dummy value to make an index of 0 mean no mapping. inverse := [][4]byte{{}} mapping := map[[4]byte]int{[4]byte{}: 0} getWidthData(func(r rune, tag elem, alt rune) { idx := 0 if alt != 0 { var buf [4]byte buf[0] = byte(utf8.EncodeRune(buf[1:], alt)) s := string(r) buf[buf[0]] ^= s[len(s)-1] var ok bool if idx, ok = mapping[buf]; !ok { idx = len(mapping) if idx > math.MaxUint8 { log.Fatalf("Index %d does not fit in a byte.", idx) } mapping[buf] = idx inverse = append(inverse, buf) } } t.Insert(r, uint64(tag|elem(idx))) }) w := &bytes.Buffer{} gen.WriteUnicodeVersion(w) sz, err := t.Gen(w) if err != nil { log.Fatal(err) } sz += writeMappings(w, inverse) fmt.Fprintf(w, "// Total table size %d bytes (%dKiB)\n", sz, sz/1024) gen.WriteGoFile(*outputFile, "width", w.Bytes()) }
func genTables() { if numClass > 0x0F { log.Fatalf("Too many Class constants (%#x > 0x0F).", numClass) } w := gen.NewCodeWriter() defer w.WriteGoFile(*outputFile, "bidi") gen.WriteUnicodeVersion(w) t := triegen.NewTrie("bidi") // Build data about bracket mapping. These bits need to be or-ed with // any other bits. orMask := map[rune]uint64{} xorMap := map[rune]int{} xorMasks := []rune{0} // First value is no-op. parse("BidiBrackets.txt", func(p *ucd.Parser) { r1 := p.Rune(0) r2 := p.Rune(1) xor := r1 ^ r2 if _, ok := xorMap[xor]; !ok { xorMap[xor] = len(xorMasks) xorMasks = append(xorMasks, xor) } entry := uint64(xorMap[xor]) << xorMaskShift switch p.String(2) { case "o": entry |= openMask case "c", "n": default: log.Fatalf("Unknown bracket class %q.", p.String(2)) } orMask[r1] = entry }) w.WriteComment(` xorMasks contains masks to be xor-ed with brackets to get the reverse version.`) w.WriteVar("xorMasks", xorMasks) done := map[rune]bool{} insert := func(r rune, c class) { if !done[r] { t.Insert(r, orMask[r]|uint64(c)) done[r] = true } } // Insert the derived BiDi properties. parse("extracted/DerivedBidiClass.txt", func(p *ucd.Parser) { r := p.Rune(0) class, ok := bidiClass[p.String(1)] if !ok { log.Fatalf("%U: Unknown BiDi class %q", r, p.String(1)) } insert(r, class) }) visitDefaults(insert) // TODO: use sparse blocks. This would reduce table size considerably // from the looks of it. sz, err := t.Gen(w) if err != nil { log.Fatal(err) } w.Size += sz }
func genTables() { t := triegen.NewTrie("idna") ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) { r := p.Rune(0) const cccVirama = 9 if p.Int(ucd.CanonicalCombiningClass) == cccVirama { runes[p.Rune(0)] = viramaModifier } switch { case unicode.In(r, unicode.Mark): runes[r] |= modifier } }) ucd.Parse(gen.OpenUCDFile("extracted/DerivedJoiningType.txt"), func(p *ucd.Parser) { switch v := p.String(1); v { case "L", "D", "T", "R": runes[p.Rune(0)] |= joinType[v] << joinShift } }) ucd.Parse(gen.OpenUnicodeFile("idna", "", "IdnaMappingTable.txt"), func(p *ucd.Parser) { r := p.Rune(0) // The mappings table explicitly defines surrogates as invalid. if !utf8.ValidRune(r) { return } cat := catFromEntry(p) isMapped := cat == mapped || cat == disallowedSTD3Mapped || cat == deviation if !isMapped { // Only include additional category information for non-mapped // runes. The additional information is only used after mapping and // the bits would clash with mapping information. // TODO: it would be possible to inline this data and avoid // additional lookups. This is quite tedious, though, so let's first // see if we need this. cat |= category(runes[r]) } s := string(p.Runes(2)) if s != "" && !isMapped { log.Fatalf("%U: Mapping with non-mapping category %d", r, cat) } t.Insert(r, uint64(makeEntry(r, s))+uint64(cat)) }) w := gen.NewCodeWriter() defer w.WriteGoFile("tables.go", "idna") gen.WriteUnicodeVersion(w) w.WriteVar("mappings", string(mappings)) w.WriteVar("xorData", string(xorData)) sz, err := t.Gen(w, triegen.Compact(&normCompacter{})) if err != nil { log.Fatal(err) } w.Size += sz }