Example #1
0
func main() {
	gen.Init()

	gen.Repackage("gen_common.go", "common.go", "language")

	w := gen.NewCodeWriter()
	defer w.WriteGoFile("tables.go", "language")

	fmt.Fprintln(w, `import "golang.org/x/text/internal/tag"`)

	b := newBuilder(w)
	gen.WriteCLDRVersion(w)

	b.parseIndices()
	b.writeType(fromTo{})
	b.writeLanguage()
	b.writeScript()
	b.writeRegion()
	b.writeVariant()
	// TODO: b.writeLocale()
	b.computeRegionGroups()
	b.writeLikelyData()
	b.writeMatchData()
	b.writeRegionInclusionData()
	b.writeParents()
}
Example #2
0
func main() {
	gen.Init()

	// Read the CLDR zip file.
	r := gen.OpenCLDRCoreZip()
	defer r.Close()

	d := &cldr.Decoder{}
	d.SetDirFilter("main", "supplemental")
	d.SetSectionFilter("localeDisplayNames")
	data, err := d.DecodeZip(r)
	if err != nil {
		log.Fatalf("DecodeZip: %v", err)
	}

	w := gen.NewCodeWriter()
	defer w.WriteGoFile(*outputFile, "display")

	gen.WriteCLDRVersion(w)

	b := builder{
		w:     w,
		data:  data,
		group: make(map[string]*group),
	}
	b.generate()
}
Example #3
0
func main() {
	gen.Init()

	gen.Repackage("gen_common.go", "common.go", "currency")

	// Read the CLDR zip file.
	r := gen.OpenCLDRCoreZip()
	defer r.Close()

	d := &cldr.Decoder{}
	d.SetDirFilter("supplemental", "main")
	d.SetSectionFilter("numbers")
	data, err := d.DecodeZip(r)
	if err != nil {
		log.Fatalf("DecodeZip: %v", err)
	}

	w := gen.NewCodeWriter()
	defer w.WriteGoFile(*outputFile, "currency")

	fmt.Fprintln(w, `import "golang.org/x/text/internal/tag"`)

	gen.WriteCLDRVersion(w)
	b := &builder{}
	b.genCurrencies(w, data.Supplemental())
	b.genSymbols(w, data)
}
Example #4
0
func main() {
	gen.Init()

	rewriteCommon()

	w := gen.NewCodeWriter()
	defer w.WriteGoFile("tables.go", "language")

	b := newBuilder(w)
	fmt.Fprintf(w, version, cldr.Version)

	b.parseIndices()
	b.writeType(fromTo{})
	b.writeLanguage()
	b.writeScript()
	b.writeRegion()
	b.writeVariant()
	// TODO: b.writeLocale()
	b.writeCurrencies()
	b.computeRegionGroups()
	b.writeLikelyData()
	b.writeMatchData()
	b.writeRegionInclusionData()
	b.writeParents()
}
Example #5
0
func genTables() {
	chars := parseUCD()
	verifyProperties(chars)

	t := triegen.NewTrie("case")
	for i := range chars {
		c := &chars[i]
		makeEntry(c)
		t.Insert(rune(i), uint64(c.entry))
	}

	w := gen.NewCodeWriter()
	defer w.WriteGoFile("tables.go", "cases")

	gen.WriteUnicodeVersion(w)

	// TODO: write CLDR version after adding a mechanism to detect that the
	// tables on which the manually created locale-sensitive casing code is
	// based hasn't changed.

	w.WriteVar("xorData", string(xorData))
	w.WriteVar("exceptions", string(exceptionData))

	sz, err := t.Gen(w, triegen.Compact(&sparseCompacter{}))
	if err != nil {
		log.Fatal(err)
	}
	w.Size += sz
}
Example #6
0
func main() {
	gen.Init()

	const pkg = "number"

	gen.Repackage("gen_common.go", "common.go", pkg)
	// Read the CLDR zip file.
	r := gen.OpenCLDRCoreZip()
	defer r.Close()

	d := &cldr.Decoder{}
	d.SetDirFilter("supplemental", "main")
	d.SetSectionFilter("numbers", "numberingSystem")
	data, err := d.DecodeZip(r)
	if err != nil {
		log.Fatalf("DecodeZip: %v", err)
	}

	w := gen.NewCodeWriter()
	defer w.WriteGoFile(*outputFile, pkg)

	fmt.Fprintln(w, `import "golang.org/x/text/internal/stringset"`)

	gen.WriteCLDRVersion(w)

	genNumSystem(w, data)
	genSymbols(w, data)
}
Example #7
0
func main() {
	gen.Init()

	names, counts := parse()
	appendRepeatNames(names, counts)
	appendUniqueNames(names, counts)

	table0, table1 := makeTables()

	gen.Repackage("gen_bits.go", "bits.go", "runenames")

	w := gen.NewCodeWriter()
	w.WriteVar("table0", table0)
	w.WriteVar("table1", table1)
	w.WriteConst("data", string(data))
	w.WriteGoFile("tables.go", "runenames")
}
Example #8
0
func writeTables() {
	propTrie := triegen.NewTrie("derivedProperties")
	w := gen.NewCodeWriter()
	defer w.WriteGoFile(*outputFile, "precis")
	gen.WriteUnicodeVersion(w)

	// Iterate over all the runes...
	for i := rune(0); i < unicode.MaxRune; i++ {
		r := rune(i)

		if !utf8.ValidRune(r) {
			continue
		}

		e, ok := exceptions[i]
		p := e.prop
		switch {
		case ok:
		case !unicode.In(r, assigned):
			p = unassigned
		case r >= 0x0021 && r <= 0x007e: // Is ASCII 7
			p = pValid
		case unicode.In(r, disallowedRunes, unicode.Cc):
			p = disallowed
		case hasCompat(r):
			p = idDisOrFreePVal
		case isLetterDigits(r):
			p = pValid
		case isIdDisAndFreePVal(r):
			p = idDisOrFreePVal
		default:
			p = disallowed
		}
		cat := runeCategory[r]
		// Don't set category for runes that are disallowed.
		if p == disallowed {
			cat = exceptions[r].cat
		}
		propTrie.Insert(r, uint64(p)|uint64(cat))
	}
	sz, err := propTrie.Gen(w)
	if err != nil {
		log.Fatal(err)
	}
	w.Size += sz
}
Example #9
0
func writeTables() {
	propTrie := triegen.NewTrie("derivedProperties")
	w := gen.NewCodeWriter()
	defer w.WriteGoFile(*outputFile, "precis")
	gen.WriteUnicodeVersion(w)

	// Iterate over all the runes...
	for i := uint32(0); i < unicode.MaxRune; i++ {
		r := rune(i)

		if !utf8.ValidRune(r) {
			continue
		}

		p, ok := exceptions[i]
		switch {
		case ok:
		case !unicode.In(r, assigned):
			p = unassigned
		case r >= 33 && r <= 126: // Is ASCII 7
			p = pValid
		case r == 0x200C || r == 0x200D: // Is join control
			p = contextJ
		case unicode.In(r, disallowedRunes, unicode.Cc):
			p = disallowed
		case isHasCompat(r):
			p = idDis | freePVal
		case isLetterDigits(r):
			p = pValid
		case isIdDisAndFreePVal(r):
			p = idDis | freePVal
		default:
			p = disallowed
		}
		propTrie.Insert(r, uint64(p))
	}
	sz, err := propTrie.Gen(w)
	if err != nil {
		log.Fatal(err)
	}
	w.Size += sz
}
Example #10
0
File: gen.go Project: rdterner/text
func main() {
	gen.Init()

	rewriteCommon()

	// Read the CLDR zip file.
	r := gen.OpenCLDRCoreZip()
	defer r.Close()

	d := &cldr.Decoder{}
	d.SetDirFilter("supplemental")
	data, err := d.DecodeZip(r)
	if err != nil {
		log.Fatalf("DecodeZip: %v", err)
	}

	w := gen.NewCodeWriter()
	defer w.WriteGoFile(*outputFile, "currency")

	fmt.Fprintln(w, `import "golang.org/x/text/internal/tag"`)

	gen.WriteCLDRVersion(w)
	genCurrencies(w, data.Supplemental())
}
Example #11
0
func main() {
	r := gen.OpenCLDRCoreZip()
	defer r.Close()

	d := &cldr.Decoder{}
	data, err := d.DecodeZip(r)
	if err != nil {
		log.Fatalf("DecodeZip: %v", err)
	}

	w := gen.NewCodeWriter()
	defer w.WriteGoFile("tables.go", "internal")

	// Create parents table.
	parents := make([]uint16, language.NumCompactTags)
	for _, loc := range data.Locales() {
		tag := language.MustParse(loc)
		index, ok := language.CompactIndex(tag)
		if !ok {
			continue
		}
		parentIndex := 0 // und
		for p := tag.Parent(); p != language.Und; p = p.Parent() {
			if x, ok := language.CompactIndex(p); ok {
				parentIndex = x
				break
			}
		}
		parents[index] = uint16(parentIndex)
	}

	w.WriteComment(`
	Parent maps a compact index of a tag to the compact index of the parent of
	this tag.`)
	w.WriteVar("Parent", parents)
}
Example #12
0
File: gen.go Project: 7ukey/text
func genTables() {
	if numClass > 0x0F {
		log.Fatalf("Too many Class constants (%#x > 0x0F).", numClass)
	}
	w := gen.NewCodeWriter()
	defer w.WriteGoFile(*outputFile, "bidi")

	gen.WriteUnicodeVersion(w)

	t := triegen.NewTrie("bidi")

	// Build data about bracket mapping. These bits need to be or-ed with
	// any other bits.
	orMask := map[rune]uint64{}

	xorMap := map[rune]int{}
	xorMasks := []rune{0} // First value is no-op.

	parse("BidiBrackets.txt", func(p *ucd.Parser) {
		r1 := p.Rune(0)
		r2 := p.Rune(1)
		xor := r1 ^ r2
		if _, ok := xorMap[xor]; !ok {
			xorMap[xor] = len(xorMasks)
			xorMasks = append(xorMasks, xor)
		}
		entry := uint64(xorMap[xor]) << xorMaskShift
		switch p.String(2) {
		case "o":
			entry |= openMask
		case "c", "n":
		default:
			log.Fatalf("Unknown bracket class %q.", p.String(2))
		}
		orMask[r1] = entry
	})

	w.WriteComment(`
	xorMasks contains masks to be xor-ed with brackets to get the reverse 
	version.`)
	w.WriteVar("xorMasks", xorMasks)

	done := map[rune]bool{}

	insert := func(r rune, c class) {
		if !done[r] {
			t.Insert(r, orMask[r]|uint64(c))
			done[r] = true
		}
	}

	// Insert the derived BiDi properties.
	parse("extracted/DerivedBidiClass.txt", func(p *ucd.Parser) {
		r := p.Rune(0)
		class, ok := bidiClass[p.String(1)]
		if !ok {
			log.Fatalf("%U: Unknown BiDi class %q", r, p.String(1))
		}
		insert(r, class)
	})
	visitDefaults(insert)

	// TODO: use sparse blocks. This would reduce table size considerably
	// from the looks of it.

	sz, err := t.Gen(w)
	if err != nil {
		log.Fatal(err)
	}
	w.Size += sz
}
Example #13
0
func main() {
	mibs := map[string]bool{}
	all := []string{}

	w := gen.NewCodeWriter()
	defer w.WriteGoFile("tables.go", "charmap")

	printf := func(s string, a ...interface{}) { fmt.Fprintf(w, s, a...) }

	printf("import (\n")
	printf("\t\"golang.org/x/text/encoding\"\n")
	printf("\t\"golang.org/x/text/encoding/internal/identifier\"\n")
	printf(")\n\n")
	for _, e := range encodings {
		varNames := strings.Split(e.varName, ",")
		all = append(all, varNames...)
		varName := varNames[0]
		switch {
		case strings.HasPrefix(e.mapping, "http://encoding.spec.whatwg.org/"):
			e.mapping = getWHATWG(e.mapping)
		case strings.HasPrefix(e.mapping, "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/"):
			e.mapping = getUCM(e.mapping)
		}

		asciiSuperset, low := strings.HasPrefix(e.mapping, ascii), 0x00
		if asciiSuperset {
			low = 0x80
		}
		lvn := 1
		if strings.HasPrefix(varName, "ISO") || strings.HasPrefix(varName, "KOI") {
			lvn = 3
		}
		lowerVarName := strings.ToLower(varName[:lvn]) + varName[lvn:]
		printf("// %s is the %s encoding.\n", varName, e.name)
		if e.comment != "" {
			printf("//\n// %s\n", e.comment)
		}
		printf("var %s encoding.Encoding = &%s\n\nvar %s = charmap{\nname: %q,\n",
			varName, lowerVarName, lowerVarName, e.name)
		if mibs[e.mib] {
			log.Fatalf("MIB type %q declared multiple times.", e.mib)
		}
		printf("mib: identifier.%s,\n", e.mib)
		printf("asciiSuperset: %t,\n", asciiSuperset)
		printf("low: 0x%02x,\n", low)
		printf("replacement: 0x%02x,\n", e.replacement)

		printf("decode: [256]utf8Enc{\n")
		i, backMapping := 0, map[rune]byte{}
		for _, c := range e.mapping {
			if _, ok := backMapping[c]; !ok && c != utf8.RuneError {
				backMapping[c] = byte(i)
			}
			var buf [8]byte
			n := utf8.EncodeRune(buf[:], c)
			if n > 3 {
				panic(fmt.Sprintf("rune %q (%U) is too long", c, c))
			}
			printf("{%d,[3]byte{0x%02x,0x%02x,0x%02x}},", n, buf[0], buf[1], buf[2])
			if i%2 == 1 {
				printf("\n")
			}
			i++
		}
		printf("},\n")

		printf("encode: [256]uint32{\n")
		encode := make([]uint32, 0, 256)
		for c, i := range backMapping {
			encode = append(encode, uint32(i)<<24|uint32(c))
		}
		sort.Sort(byRune(encode))
		for len(encode) < cap(encode) {
			encode = append(encode, encode[len(encode)-1])
		}
		for i, enc := range encode {
			printf("0x%08x,", enc)
			if i%8 == 7 {
				printf("\n")
			}
		}
		printf("},\n}\n")

		// Add an estimate of the size of a single charmap{} struct value, which
		// includes two 256 elem arrays of 4 bytes and some extra fields, which
		// align to 3 uint64s on 64-bit architectures.
		w.Size += 2*4*256 + 3*8
	}
	// TODO: add proper line breaking.
	printf("var listAll = []encoding.Encoding{\n%s,\n}\n\n", strings.Join(all, ",\n"))
}
Example #14
0
func main() {
	gen.Init()

	// Read the CLDR zip file.
	r := gen.OpenCLDRCoreZip()
	defer r.Close()

	d := &cldr.Decoder{}
	data, err := d.DecodeZip(r)
	if err != nil {
		log.Fatalf("DecodeZip: %v", err)
	}

	w := gen.NewCodeWriter()
	defer func() {
		buf := &bytes.Buffer{}

		if _, err = w.WriteGo(buf, "language"); err != nil {
			log.Fatalf("Error formatting file index.go: %v", err)
		}

		// Since we're generating a table for our own package we need to rewrite
		// doing the equivalent of go fmt -r 'language.b -> b'. Using
		// bytes.Replace will do.
		out := bytes.Replace(buf.Bytes(), []byte("language."), nil, -1)
		if err := ioutil.WriteFile("index.go", out, 0600); err != nil {
			log.Fatalf("Could not create file index.go: %v", err)
		}
	}()

	m := map[language.Tag]bool{}
	for _, lang := range data.Locales() {
		// We include all locales unconditionally to be consistent with en_US.
		// We want en_US, even though it has no data associated with it.

		// TODO: put any of the languages for which no data exists at the end
		// of the index. This allows all components based on ICU to use that
		// as the cutoff point.
		// if x := data.RawLDML(lang); false ||
		// 	x.LocaleDisplayNames != nil ||
		// 	x.Characters != nil ||
		// 	x.Delimiters != nil ||
		// 	x.Measurement != nil ||
		// 	x.Dates != nil ||
		// 	x.Numbers != nil ||
		// 	x.Units != nil ||
		// 	x.ListPatterns != nil ||
		// 	x.Collations != nil ||
		// 	x.Segmentations != nil ||
		// 	x.Rbnf != nil ||
		// 	x.Annotations != nil ||
		// 	x.Metadata != nil {

		// TODO: support POSIX natively, albeit non-standard.
		tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1))
		m[tag] = true
		// }
	}
	// Include locales for plural rules, which uses a different structure.
	for _, plurals := range data.Supplemental().Plurals {
		for _, rules := range plurals.PluralRules {
			for _, lang := range strings.Split(rules.Locales, " ") {
				m[language.Make(lang)] = true
			}
		}
	}

	var core, special []language.Tag

	for t := range m {
		if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" {
			log.Fatalf("Unexpected extension %v in %v", x, t)
		}
		if len(t.Variants()) == 0 && len(t.Extensions()) == 0 {
			core = append(core, t)
		} else {
			special = append(special, t)
		}
	}

	w.WriteComment(`
	NumCompactTags is the number of common tags. The maximum tag is
	NumCompactTags-1.`)
	w.WriteConst("NumCompactTags", len(core)+len(special))

	sort.Sort(byAlpha(special))
	w.WriteVar("specialTags", special)

	// TODO: order by frequency?
	sort.Sort(byAlpha(core))

	// Size computations are just an estimate.
	w.Size += int(reflect.TypeOf(map[uint32]uint16{}).Size())
	w.Size += len(core) * 6 // size of uint32 and uint16

	fmt.Fprintln(w)
	fmt.Fprintln(w, "var coreTags = map[uint32]uint16{")
	fmt.Fprintln(w, "0x0: 0, // und")
	i := len(special) + 1 // Und and special tags already written.
	for _, t := range core {
		if t == language.Und {
			continue
		}
		fmt.Fprint(w.Hash, t, i)
		b, s, r := t.Raw()
		fmt.Fprintf(w, "0x%s%s%s: %d, // %s\n",
			getIndex(b, 3), // 3 is enough as it is guaranteed to be a compact number
			getIndex(s, 2),
			getIndex(r, 3),
			i, t)
		i++
	}
	fmt.Fprintln(w, "}")
}
Example #15
0
func main() {
	gen.Init()

	// Read the CLDR zip file.
	r := gen.OpenCLDRCoreZip()
	defer r.Close()

	d := &cldr.Decoder{}
	data, err := d.DecodeZip(r)
	if err != nil {
		log.Fatalf("DecodeZip: %v", err)
	}

	w := gen.NewCodeWriter()
	defer func() {
		buf := &bytes.Buffer{}

		if _, err = w.WriteGo(buf, "language"); err != nil {
			log.Fatalf("Error formatting file index.go: %v", err)
		}

		// Since we're generating a table for our own package we need to rewrite
		// doing the equivalent of go fmt -r 'language.b -> b'. Using
		// bytes.Replace will do.
		out := bytes.Replace(buf.Bytes(), []byte("language."), nil, -1)
		if err := ioutil.WriteFile("index.go", out, 0600); err != nil {
			log.Fatalf("Could not create file index.go: %v", err)
		}
	}()

	m := map[language.Tag]bool{}
	for _, lang := range data.Locales() {
		if x := data.RawLDML(lang); false ||
			x.LocaleDisplayNames != nil ||
			x.Characters != nil ||
			x.Delimiters != nil ||
			x.Measurement != nil ||
			x.Dates != nil ||
			x.Numbers != nil ||
			x.Units != nil ||
			x.ListPatterns != nil ||
			x.Collations != nil ||
			x.Segmentations != nil ||
			x.Rbnf != nil ||
			x.Annotations != nil ||
			x.Metadata != nil {

			// TODO: support POSIX natively, albeit non-standard.
			tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1))
			m[tag] = true
		}
	}
	var core, special []language.Tag

	for t := range m {
		if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" {
			log.Fatalf("Unexpected extension %v in %v", x, t)
		}
		if len(t.Variants()) == 0 && len(t.Extensions()) == 0 {
			core = append(core, t)
		} else {
			special = append(special, t)
		}
	}

	w.WriteComment(`
	NumCompactTags is the number of common tags. The maximum tag is
	NumCompactTags-1.`)
	w.WriteConst("NumCompactTags", len(core)+len(special))

	sort.Sort(byAlpha(special))
	w.WriteVar("specialTags", special)

	type coreKey struct {
		base   language.Base
		script language.Script
		region language.Region
	}
	w.WriteType(coreKey{})

	// TODO: order by frequency?
	sort.Sort(byAlpha(core))

	// Size computations are just an estimate.
	w.Size += int(reflect.TypeOf(map[coreKey]uint16{}).Size())
	w.Size += len(core) * int(reflect.TypeOf(coreKey{}).Size()+2) // 2 is for uint16

	fmt.Fprintln(w, "var coreTags = map[coreKey]uint16{")
	fmt.Fprintln(w, "coreKey{}: 0, // und")
	i := len(special) + 1 // Und and special tags already written.
	for _, t := range core {
		if t == language.Und {
			continue
		}
		fmt.Fprint(w.Hash, t, i)
		b, s, r := t.Raw()
		key := fmt.Sprintf("%#v", coreKey{b, s, r})
		key = strings.Replace(key[len("main."):], "language.", "", -1)
		fmt.Fprintf(w, "%s: %d, // %s\n", key, i, t)
		i++
	}
	fmt.Fprintln(w, "}")
}
Example #16
0
func genTables() {
	t := triegen.NewTrie("idna")

	ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
		r := p.Rune(0)

		const cccVirama = 9
		if p.Int(ucd.CanonicalCombiningClass) == cccVirama {
			runes[p.Rune(0)] = viramaModifier
		}
		switch {
		case unicode.In(r, unicode.Mark):
			runes[r] |= modifier
		}
	})

	ucd.Parse(gen.OpenUCDFile("extracted/DerivedJoiningType.txt"), func(p *ucd.Parser) {
		switch v := p.String(1); v {
		case "L", "D", "T", "R":
			runes[p.Rune(0)] |= joinType[v] << joinShift
		}
	})

	ucd.Parse(gen.OpenUnicodeFile("idna", "", "IdnaMappingTable.txt"), func(p *ucd.Parser) {
		r := p.Rune(0)

		// The mappings table explicitly defines surrogates as invalid.
		if !utf8.ValidRune(r) {
			return
		}

		cat := catFromEntry(p)
		isMapped := cat == mapped || cat == disallowedSTD3Mapped || cat == deviation
		if !isMapped {
			// Only include additional category information for non-mapped
			// runes. The additional information is only used after mapping and
			// the bits would clash with mapping information.
			// TODO: it would be possible to inline this data and avoid
			// additional lookups. This is quite tedious, though, so let's first
			// see if we need this.
			cat |= category(runes[r])
		}

		s := string(p.Runes(2))
		if s != "" && !isMapped {
			log.Fatalf("%U: Mapping with non-mapping category %d", r, cat)
		}
		t.Insert(r, uint64(makeEntry(r, s))+uint64(cat))
	})

	w := gen.NewCodeWriter()
	defer w.WriteGoFile("tables.go", "idna")

	gen.WriteUnicodeVersion(w)

	w.WriteVar("mappings", string(mappings))
	w.WriteVar("xorData", string(xorData))

	sz, err := t.Gen(w, triegen.Compact(&normCompacter{}))
	if err != nil {
		log.Fatal(err)
	}
	w.Size += sz
}