func genTables() { chars := parseUCD() verifyProperties(chars) t := triegen.NewTrie("case") for i := range chars { c := &chars[i] makeEntry(c) t.Insert(rune(i), uint64(c.entry)) } w := gen.NewCodeWriter() defer w.WriteGoFile("tables.go", "cases") gen.WriteUnicodeVersion(w) // TODO: write CLDR version after adding a mechanism to detect that the // tables on which the manually created locale-sensitive casing code is // based hasn't changed. w.WriteVar("xorData", string(xorData)) w.WriteVar("exceptions", string(exceptionData)) sz, err := t.Gen(w, triegen.Compact(&sparseCompacter{})) if err != nil { log.Fatal(err) } w.Size += sz }
func genTables() { chars := parseUCD() verifyProperties(chars) t := triegen.NewTrie("case") for i := range chars { c := &chars[i] makeEntry(c) t.Insert(rune(i), uint64(c.entry)) } w := &bytes.Buffer{} sz, err := t.Gen(w, triegen.Compact(&sparseCompacter{})) if err != nil { log.Fatal(err) } gen.WriteUnicodeVersion(w) // TODO: write CLDR version after adding a mechanism to detect that the // tables on which the manually created locale-sensitive casing code is // based hasn't changed. fmt.Fprintf(w, "// xorData: %d bytes\n", len(xorData)) fmt.Fprintf(w, "var xorData = %+q\n\n", string(xorData)) fmt.Fprintf(w, "// exceptions: %d bytes\n", len(exceptionData)) fmt.Fprintf(w, "var exceptions = %q\n\n", string(exceptionData)) sz += len(exceptionData) fmt.Fprintf(w, "// Total table size %d bytes (%dKiB)\n", sz, sz/1024) gen.WriteGoFile("tables.go", "cases", w.Bytes()) }
func genTables() { chars := parseUCD() verifyProperties(chars) t := triegen.NewTrie("case") for i := range chars { c := &chars[i] makeEntry(c) t.Insert(rune(i), uint64(c.entry)) } const file = "tables.go" w, err := os.Create(file + ".tmp") if err != nil { logger.Fatal(err) } fmt.Fprintf(w, header, *url) sz, err := t.Gen(w, triegen.Compact(&sparseCompacter{})) if err != nil { logger.Fatal(err) } fmt.Fprintf(w, "// exceptions: %d bytes\n", len(exceptionData)) fmt.Fprintf(w, "var exceptions = %q\n\n", string(exceptionData)) sz += len(exceptionData) fmt.Fprintf(w, "// Total table size %d bytes (%dKiB)\n", sz, sz/1024) if err := os.Rename(file+".tmp", file); err != nil { logger.Fatalf("Rename to file %v failed.", file) } exec.Command("gofmt", "-w", file).Run() }
func ExampleCompacter() { t := triegen.NewTrie("root") for r := rune(0); r < 10000; r += 64 { t.Insert(r, 0x9015BADA55^uint64(r)) } sz, _ := t.Gen(ioutil.Discard) fmt.Printf("Size normal: %5d\n", sz) var c myCompacter sz, _ = t.Gen(ioutil.Discard, triegen.Compact(&c)) fmt.Printf("Size compacted: %5d\n", sz) // Output: // Size normal: 81344 // Size compacted: 3224 }
func printCharInfoTables(w io.Writer) int { mkstr := func(r rune, f *FormInfo) (int, string) { d := f.expandedDecomp s := string([]rune(d)) if max := 1 << 6; len(s) >= max { const msg = "%U: too many bytes in decomposition: %d >= %d" log.Fatalf(msg, r, len(s), max) } head := uint8(len(s)) if f.quickCheck[MComposed] != QCYes { head |= 0x40 } if f.combinesForward { head |= 0x80 } s = string([]byte{head}) + s lccc := ccc(d[0]) tccc := ccc(d[len(d)-1]) cc := ccc(r) if cc != 0 && lccc == 0 && tccc == 0 { log.Fatalf("%U: trailing and leading ccc are 0 for non-zero ccc %d", r, cc) } if tccc < lccc && lccc != 0 { const msg = "%U: lccc (%d) must be <= tcc (%d)" log.Fatalf(msg, r, lccc, tccc) } index := normalDecomp nTrail := chars[r].nTrailingNonStarters if tccc > 0 || lccc > 0 || nTrail > 0 { tccc <<= 2 tccc |= nTrail s += string([]byte{tccc}) index = endMulti for _, r := range d[1:] { if ccc(r) == 0 { index = firstCCC } } if lccc > 0 { s += string([]byte{lccc}) if index == firstCCC { log.Fatalf("%U: multi-segment decomposition not supported for decompositions with leading CCC != 0", r) } index = firstLeadingCCC } if cc != lccc { if cc != 0 { log.Fatalf("%U: for lccc != ccc, expected ccc to be 0; was %d", r, cc) } index = firstCCCZeroExcept } } else if len(d) > 1 { index = firstMulti } return index, s } decompSet := makeDecompSet() const nLeadStr = "\x00\x01" // 0-byte length and tccc with nTrail. decompSet.insert(firstStarterWithNLead, nLeadStr) // Store the uniqued decompositions in a byte buffer, // preceded by their byte length. for _, c := range chars { for _, f := range c.forms { if len(f.expandedDecomp) == 0 { continue } if f.combinesBackward { log.Fatalf("%U: combinesBackward and decompose", c.codePoint) } index, s := mkstr(c.codePoint, &f) decompSet.insert(index, s) } } decompositions := bytes.NewBuffer(make([]byte, 0, 10000)) size := 0 positionMap := make(map[string]uint16) decompositions.WriteString("\000") fmt.Fprintln(w, "const (") for i, m := range decompSet { sa := []string{} for s := range m { sa = append(sa, s) } sort.Strings(sa) for _, s := range sa { p := decompositions.Len() decompositions.WriteString(s) positionMap[s] = uint16(p) } if cname[i] != "" { fmt.Fprintf(w, "%s = 0x%X\n", cname[i], decompositions.Len()) } } fmt.Fprintln(w, "maxDecomp = 0x8000") fmt.Fprintln(w, ")") b := decompositions.Bytes() printBytes(w, b, "decomps") size += len(b) varnames := []string{"nfc", "nfkc"} for i := 0; i < FNumberOfFormTypes; i++ { trie := triegen.NewTrie(varnames[i]) for r, c := range chars { f := c.forms[i] d := f.expandedDecomp if len(d) != 0 { _, key := mkstr(c.codePoint, &f) trie.Insert(rune(r), uint64(positionMap[key])) if c.ccc != ccc(d[0]) { // We assume the lead ccc of a decomposition !=0 in this case. if ccc(d[0]) == 0 { log.Fatalf("Expected leading CCC to be non-zero; ccc is %d", c.ccc) } } } else if c.nLeadingNonStarters > 0 && len(f.expandedDecomp) == 0 && c.ccc == 0 && !f.combinesBackward { // Handle cases where it can't be detected that the nLead should be equal // to nTrail. trie.Insert(c.codePoint, uint64(positionMap[nLeadStr])) } else if v := makeEntry(&f, &c)<<8 | uint16(c.ccc); v != 0 { trie.Insert(c.codePoint, uint64(0x8000|v)) } } sz, err := trie.Gen(w, triegen.Compact(&normCompacter{name: varnames[i]})) if err != nil { log.Fatal(err) } size += sz } return size }
func genTables() { t := triegen.NewTrie("idna") ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) { r := p.Rune(0) const cccVirama = 9 if p.Int(ucd.CanonicalCombiningClass) == cccVirama { runes[p.Rune(0)] = viramaModifier } switch { case unicode.In(r, unicode.Mark): runes[r] |= modifier } }) ucd.Parse(gen.OpenUCDFile("extracted/DerivedJoiningType.txt"), func(p *ucd.Parser) { switch v := p.String(1); v { case "L", "D", "T", "R": runes[p.Rune(0)] |= joinType[v] << joinShift } }) ucd.Parse(gen.OpenUnicodeFile("idna", "", "IdnaMappingTable.txt"), func(p *ucd.Parser) { r := p.Rune(0) // The mappings table explicitly defines surrogates as invalid. if !utf8.ValidRune(r) { return } cat := catFromEntry(p) isMapped := cat == mapped || cat == disallowedSTD3Mapped || cat == deviation if !isMapped { // Only include additional category information for non-mapped // runes. The additional information is only used after mapping and // the bits would clash with mapping information. // TODO: it would be possible to inline this data and avoid // additional lookups. This is quite tedious, though, so let's first // see if we need this. cat |= category(runes[r]) } s := string(p.Runes(2)) if s != "" && !isMapped { log.Fatalf("%U: Mapping with non-mapping category %d", r, cat) } t.Insert(r, uint64(makeEntry(r, s))+uint64(cat)) }) w := gen.NewCodeWriter() defer w.WriteGoFile("tables.go", "idna") gen.WriteUnicodeVersion(w) w.WriteVar("mappings", string(mappings)) w.WriteVar("xorData", string(xorData)) sz, err := t.Gen(w, triegen.Compact(&normCompacter{})) if err != nil { log.Fatal(err) } w.Size += sz }