Пример #1
0
func genTables() {
	chars := parseUCD()
	verifyProperties(chars)

	t := triegen.NewTrie("case")
	for i := range chars {
		c := &chars[i]
		makeEntry(c)
		t.Insert(rune(i), uint64(c.entry))
	}

	w := gen.NewCodeWriter()
	defer w.WriteGoFile("tables.go", "cases")

	gen.WriteUnicodeVersion(w)

	// TODO: write CLDR version after adding a mechanism to detect that the
	// tables on which the manually created locale-sensitive casing code is
	// based hasn't changed.

	w.WriteVar("xorData", string(xorData))
	w.WriteVar("exceptions", string(exceptionData))

	sz, err := t.Gen(w, triegen.Compact(&sparseCompacter{}))
	if err != nil {
		log.Fatal(err)
	}
	w.Size += sz
}
Пример #2
0
func main() {
	t := triegen.NewTrie("width")

	// wide is the base
	parse("EastAsianWidth.txt", func(p *ucd.Parser) {
		if contains(p.String(1), "W", "F") {
			t.Insert(p.Rune(0), widthTwo)
		}
	})

	// zero overrides wide
	parse("extracted/DerivedGeneralCategory.txt", func(p *ucd.Parser) {
		cat := p.String(1)
		if cat == "Me" || cat == "Mn" {
			t.Insert(p.Rune(0), widthZero)
		}
	})

	// misc overrides
	for _, v := range overrides {
		for r := v.from; r <= v.to; r++ {
			t.Insert(r, encodeWidth(v.width))
		}
	}

	w := &bytes.Buffer{}
	gen.WriteUnicodeVersion(w)
	t.Gen(w)
	gen.WriteGoFile("tables.go", "runewidth", w.Bytes())
}
Пример #3
0
func genTables() {
	chars := parseUCD()
	verifyProperties(chars)

	t := triegen.NewTrie("case")
	for i := range chars {
		c := &chars[i]
		makeEntry(c)
		t.Insert(rune(i), uint64(c.entry))
	}

	w := &bytes.Buffer{}

	sz, err := t.Gen(w, triegen.Compact(&sparseCompacter{}))
	if err != nil {
		log.Fatal(err)
	}

	gen.WriteUnicodeVersion(w)
	// TODO: write CLDR version after adding a mechanism to detect that the
	// tables on which the manually created locale-sensitive casing code is
	// based hasn't changed.

	fmt.Fprintf(w, "// xorData: %d bytes\n", len(xorData))
	fmt.Fprintf(w, "var xorData = %+q\n\n", string(xorData))

	fmt.Fprintf(w, "// exceptions: %d bytes\n", len(exceptionData))
	fmt.Fprintf(w, "var exceptions = %q\n\n", string(exceptionData))

	sz += len(exceptionData)
	fmt.Fprintf(w, "// Total table size %d bytes (%dKiB)\n", sz, sz/1024)

	gen.WriteGoFile("tables.go", "cases", w.Bytes())
}
Пример #4
0
// ExampleGen_build demonstrates the creation of multiple tries sharing common
// blocks. ExampleGen_lookup demonstrates how to use the generated tries.
func ExampleGen_build() {
	var tries []*triegen.Trie

	rv := runeValues()
	for _, c := range []struct {
		include func(rune) bool
		name    string
	}{
		{func(r rune) bool { return true }, "all"},
		{func(r rune) bool { return r < 0x80 }, "ASCII only"},
		{func(r rune) bool { return r < 0x80 }, "ASCII only 2"},
		{func(r rune) bool { return r <= 0xFFFF }, "BMP only"},
		{func(r rune) bool { return r > 0xFFFF }, "No BMP"},
	} {
		t := triegen.NewTrie(c.name)
		tries = append(tries, t)

		for r, v := range rv {
			if c.include(r) {
				t.Insert(r, v)
			}
		}
	}
	sz, err := triegen.Gen(genWriter, "multi", tries)

	fmt.Printf("Trie size: %d bytes\n", sz)
	fmt.Printf("Error:     %v\n", err)

	// Output:
	// Trie size: 18250 bytes
	// Error:     <nil>
}
Пример #5
0
func genTables() {
	chars := parseUCD()
	verifyProperties(chars)

	t := triegen.NewTrie("case")
	for i := range chars {
		c := &chars[i]
		makeEntry(c)
		t.Insert(rune(i), uint64(c.entry))
	}

	const file = "tables.go"
	w, err := os.Create(file + ".tmp")
	if err != nil {
		logger.Fatal(err)
	}

	fmt.Fprintf(w, header, *url)
	sz, err := t.Gen(w, triegen.Compact(&sparseCompacter{}))
	if err != nil {
		logger.Fatal(err)
	}

	fmt.Fprintf(w, "// exceptions: %d bytes\n", len(exceptionData))
	fmt.Fprintf(w, "var exceptions = %q\n\n", string(exceptionData))

	sz += len(exceptionData)
	fmt.Fprintf(w, "// Total table size %d bytes (%dKiB)\n", sz, sz/1024)

	if err := os.Rename(file+".tmp", file); err != nil {
		logger.Fatalf("Rename to file %v failed.", file)
	}
	exec.Command("gofmt", "-w", file).Run()
}
Пример #6
0
// Example_build shows how to build a simple trie. It assigns the value 1 to
// 100 random runes generated by randomRunes.
func Example_build() {
	t := triegen.NewTrie("rand")

	for r := range randomRunes() {
		t.Insert(r, 1)
	}
	sz, err := t.Gen(genWriter)

	fmt.Printf("Trie size: %d bytes\n", sz)
	fmt.Printf("Error:     %v\n", err)

	// Output:
	// Trie size: 9280 bytes
	// Error:     <nil>
}
Пример #7
0
func writeTables() {
	propTrie := triegen.NewTrie("derivedProperties")
	w := gen.NewCodeWriter()
	defer w.WriteGoFile(*outputFile, "precis")
	gen.WriteUnicodeVersion(w)

	// Iterate over all the runes...
	for i := rune(0); i < unicode.MaxRune; i++ {
		r := rune(i)

		if !utf8.ValidRune(r) {
			continue
		}

		e, ok := exceptions[i]
		p := e.prop
		switch {
		case ok:
		case !unicode.In(r, assigned):
			p = unassigned
		case r >= 0x0021 && r <= 0x007e: // Is ASCII 7
			p = pValid
		case unicode.In(r, disallowedRunes, unicode.Cc):
			p = disallowed
		case hasCompat(r):
			p = idDisOrFreePVal
		case isLetterDigits(r):
			p = pValid
		case isIdDisAndFreePVal(r):
			p = idDisOrFreePVal
		default:
			p = disallowed
		}
		cat := runeCategory[r]
		// Don't set category for runes that are disallowed.
		if p == disallowed {
			cat = exceptions[r].cat
		}
		propTrie.Insert(r, uint64(p)|uint64(cat))
	}
	sz, err := propTrie.Gen(w)
	if err != nil {
		log.Fatal(err)
	}
	w.Size += sz
}
func ExampleCompacter() {
	t := triegen.NewTrie("root")
	for r := rune(0); r < 10000; r += 64 {
		t.Insert(r, 0x9015BADA55^uint64(r))
	}
	sz, _ := t.Gen(ioutil.Discard)

	fmt.Printf("Size normal:    %5d\n", sz)

	var c myCompacter
	sz, _ = t.Gen(ioutil.Discard, triegen.Compact(&c))

	fmt.Printf("Size compacted: %5d\n", sz)

	// Output:
	// Size normal:    81344
	// Size compacted:  3224
}
Пример #9
0
func writeTables() {
	propTrie := triegen.NewTrie("derivedProperties")
	w := gen.NewCodeWriter()
	defer w.WriteGoFile(*outputFile, "precis")
	gen.WriteUnicodeVersion(w)

	// Iterate over all the runes...
	for i := uint32(0); i < unicode.MaxRune; i++ {
		r := rune(i)

		if !utf8.ValidRune(r) {
			continue
		}

		p, ok := exceptions[i]
		switch {
		case ok:
		case !unicode.In(r, assigned):
			p = unassigned
		case r >= 33 && r <= 126: // Is ASCII 7
			p = pValid
		case r == 0x200C || r == 0x200D: // Is join control
			p = contextJ
		case unicode.In(r, disallowedRunes, unicode.Cc):
			p = disallowed
		case isHasCompat(r):
			p = idDis | freePVal
		case isLetterDigits(r):
			p = pValid
		case isIdDisAndFreePVal(r):
			p = idDis | freePVal
		default:
			p = disallowed
		}
		propTrie.Insert(r, uint64(p))
	}
	sz, err := propTrie.Gen(w)
	if err != nil {
		log.Fatal(err)
	}
	w.Size += sz
}
Пример #10
0
func genTables() {
	t := triegen.NewTrie("width")
	// fold and inverse mappings. See mapComment for a description of the format
	// of each entry. Add dummy value to make an index of 0 mean no mapping.
	inverse := [][4]byte{{}}
	mapping := map[[4]byte]int{[4]byte{}: 0}

	getWidthData(func(r rune, tag elem, alt rune) {
		idx := 0
		if alt != 0 {
			var buf [4]byte
			buf[0] = byte(utf8.EncodeRune(buf[1:], alt))
			s := string(r)
			buf[buf[0]] ^= s[len(s)-1]
			var ok bool
			if idx, ok = mapping[buf]; !ok {
				idx = len(mapping)
				if idx > math.MaxUint8 {
					log.Fatalf("Index %d does not fit in a byte.", idx)
				}
				mapping[buf] = idx
				inverse = append(inverse, buf)
			}
		}
		t.Insert(r, uint64(tag|elem(idx)))
	})

	w := &bytes.Buffer{}
	gen.WriteUnicodeVersion(w)

	sz, err := t.Gen(w)
	if err != nil {
		log.Fatal(err)
	}

	sz += writeMappings(w, inverse)

	fmt.Fprintf(w, "// Total table size %d bytes (%dKiB)\n", sz, sz/1024)

	gen.WriteGoFile(*outputFile, "width", w.Bytes())
}
Пример #11
0
Файл: gen.go Проект: 7ukey/text
func genTables() {
	if numClass > 0x0F {
		log.Fatalf("Too many Class constants (%#x > 0x0F).", numClass)
	}
	w := gen.NewCodeWriter()
	defer w.WriteGoFile(*outputFile, "bidi")

	gen.WriteUnicodeVersion(w)

	t := triegen.NewTrie("bidi")

	// Build data about bracket mapping. These bits need to be or-ed with
	// any other bits.
	orMask := map[rune]uint64{}

	xorMap := map[rune]int{}
	xorMasks := []rune{0} // First value is no-op.

	parse("BidiBrackets.txt", func(p *ucd.Parser) {
		r1 := p.Rune(0)
		r2 := p.Rune(1)
		xor := r1 ^ r2
		if _, ok := xorMap[xor]; !ok {
			xorMap[xor] = len(xorMasks)
			xorMasks = append(xorMasks, xor)
		}
		entry := uint64(xorMap[xor]) << xorMaskShift
		switch p.String(2) {
		case "o":
			entry |= openMask
		case "c", "n":
		default:
			log.Fatalf("Unknown bracket class %q.", p.String(2))
		}
		orMask[r1] = entry
	})

	w.WriteComment(`
	xorMasks contains masks to be xor-ed with brackets to get the reverse 
	version.`)
	w.WriteVar("xorMasks", xorMasks)

	done := map[rune]bool{}

	insert := func(r rune, c class) {
		if !done[r] {
			t.Insert(r, orMask[r]|uint64(c))
			done[r] = true
		}
	}

	// Insert the derived BiDi properties.
	parse("extracted/DerivedBidiClass.txt", func(p *ucd.Parser) {
		r := p.Rune(0)
		class, ok := bidiClass[p.String(1)]
		if !ok {
			log.Fatalf("%U: Unknown BiDi class %q", r, p.String(1))
		}
		insert(r, class)
	})
	visitDefaults(insert)

	// TODO: use sparse blocks. This would reduce table size considerably
	// from the looks of it.

	sz, err := t.Gen(w)
	if err != nil {
		log.Fatal(err)
	}
	w.Size += sz
}
Пример #12
0
func printCharInfoTables(w io.Writer) int {
	mkstr := func(r rune, f *FormInfo) (int, string) {
		d := f.expandedDecomp
		s := string([]rune(d))
		if max := 1 << 6; len(s) >= max {
			const msg = "%U: too many bytes in decomposition: %d >= %d"
			log.Fatalf(msg, r, len(s), max)
		}
		head := uint8(len(s))
		if f.quickCheck[MComposed] != QCYes {
			head |= 0x40
		}
		if f.combinesForward {
			head |= 0x80
		}
		s = string([]byte{head}) + s

		lccc := ccc(d[0])
		tccc := ccc(d[len(d)-1])
		cc := ccc(r)
		if cc != 0 && lccc == 0 && tccc == 0 {
			log.Fatalf("%U: trailing and leading ccc are 0 for non-zero ccc %d", r, cc)
		}
		if tccc < lccc && lccc != 0 {
			const msg = "%U: lccc (%d) must be <= tcc (%d)"
			log.Fatalf(msg, r, lccc, tccc)
		}
		index := normalDecomp
		nTrail := chars[r].nTrailingNonStarters
		if tccc > 0 || lccc > 0 || nTrail > 0 {
			tccc <<= 2
			tccc |= nTrail
			s += string([]byte{tccc})
			index = endMulti
			for _, r := range d[1:] {
				if ccc(r) == 0 {
					index = firstCCC
				}
			}
			if lccc > 0 {
				s += string([]byte{lccc})
				if index == firstCCC {
					log.Fatalf("%U: multi-segment decomposition not supported for decompositions with leading CCC != 0", r)
				}
				index = firstLeadingCCC
			}
			if cc != lccc {
				if cc != 0 {
					log.Fatalf("%U: for lccc != ccc, expected ccc to be 0; was %d", r, cc)
				}
				index = firstCCCZeroExcept
			}
		} else if len(d) > 1 {
			index = firstMulti
		}
		return index, s
	}

	decompSet := makeDecompSet()
	const nLeadStr = "\x00\x01" // 0-byte length and tccc with nTrail.
	decompSet.insert(firstStarterWithNLead, nLeadStr)

	// Store the uniqued decompositions in a byte buffer,
	// preceded by their byte length.
	for _, c := range chars {
		for _, f := range c.forms {
			if len(f.expandedDecomp) == 0 {
				continue
			}
			if f.combinesBackward {
				log.Fatalf("%U: combinesBackward and decompose", c.codePoint)
			}
			index, s := mkstr(c.codePoint, &f)
			decompSet.insert(index, s)
		}
	}

	decompositions := bytes.NewBuffer(make([]byte, 0, 10000))
	size := 0
	positionMap := make(map[string]uint16)
	decompositions.WriteString("\000")
	fmt.Fprintln(w, "const (")
	for i, m := range decompSet {
		sa := []string{}
		for s := range m {
			sa = append(sa, s)
		}
		sort.Strings(sa)
		for _, s := range sa {
			p := decompositions.Len()
			decompositions.WriteString(s)
			positionMap[s] = uint16(p)
		}
		if cname[i] != "" {
			fmt.Fprintf(w, "%s = 0x%X\n", cname[i], decompositions.Len())
		}
	}
	fmt.Fprintln(w, "maxDecomp = 0x8000")
	fmt.Fprintln(w, ")")
	b := decompositions.Bytes()
	printBytes(w, b, "decomps")
	size += len(b)

	varnames := []string{"nfc", "nfkc"}
	for i := 0; i < FNumberOfFormTypes; i++ {
		trie := triegen.NewTrie(varnames[i])

		for r, c := range chars {
			f := c.forms[i]
			d := f.expandedDecomp
			if len(d) != 0 {
				_, key := mkstr(c.codePoint, &f)
				trie.Insert(rune(r), uint64(positionMap[key]))
				if c.ccc != ccc(d[0]) {
					// We assume the lead ccc of a decomposition !=0 in this case.
					if ccc(d[0]) == 0 {
						log.Fatalf("Expected leading CCC to be non-zero; ccc is %d", c.ccc)
					}
				}
			} else if c.nLeadingNonStarters > 0 && len(f.expandedDecomp) == 0 && c.ccc == 0 && !f.combinesBackward {
				// Handle cases where it can't be detected that the nLead should be equal
				// to nTrail.
				trie.Insert(c.codePoint, uint64(positionMap[nLeadStr]))
			} else if v := makeEntry(&f, &c)<<8 | uint16(c.ccc); v != 0 {
				trie.Insert(c.codePoint, uint64(0x8000|v))
			}
		}
		sz, err := trie.Gen(w, triegen.Compact(&normCompacter{name: varnames[i]}))
		if err != nil {
			log.Fatal(err)
		}
		size += sz
	}
	return size
}
Пример #13
0
func genTables() {
	t := triegen.NewTrie("idna")

	ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
		r := p.Rune(0)

		const cccVirama = 9
		if p.Int(ucd.CanonicalCombiningClass) == cccVirama {
			runes[p.Rune(0)] = viramaModifier
		}
		switch {
		case unicode.In(r, unicode.Mark):
			runes[r] |= modifier
		}
	})

	ucd.Parse(gen.OpenUCDFile("extracted/DerivedJoiningType.txt"), func(p *ucd.Parser) {
		switch v := p.String(1); v {
		case "L", "D", "T", "R":
			runes[p.Rune(0)] |= joinType[v] << joinShift
		}
	})

	ucd.Parse(gen.OpenUnicodeFile("idna", "", "IdnaMappingTable.txt"), func(p *ucd.Parser) {
		r := p.Rune(0)

		// The mappings table explicitly defines surrogates as invalid.
		if !utf8.ValidRune(r) {
			return
		}

		cat := catFromEntry(p)
		isMapped := cat == mapped || cat == disallowedSTD3Mapped || cat == deviation
		if !isMapped {
			// Only include additional category information for non-mapped
			// runes. The additional information is only used after mapping and
			// the bits would clash with mapping information.
			// TODO: it would be possible to inline this data and avoid
			// additional lookups. This is quite tedious, though, so let's first
			// see if we need this.
			cat |= category(runes[r])
		}

		s := string(p.Runes(2))
		if s != "" && !isMapped {
			log.Fatalf("%U: Mapping with non-mapping category %d", r, cat)
		}
		t.Insert(r, uint64(makeEntry(r, s))+uint64(cat))
	})

	w := gen.NewCodeWriter()
	defer w.WriteGoFile("tables.go", "idna")

	gen.WriteUnicodeVersion(w)

	w.WriteVar("mappings", string(mappings))
	w.WriteVar("xorData", string(xorData))

	sz, err := t.Gen(w, triegen.Compact(&normCompacter{}))
	if err != nil {
		log.Fatal(err)
	}
	w.Size += sz
}