Beispiel #1
0
func genTables() {
	chars := parseUCD()
	verifyProperties(chars)

	t := triegen.NewTrie("case")
	for i := range chars {
		c := &chars[i]
		makeEntry(c)
		t.Insert(rune(i), uint64(c.entry))
	}

	w := &bytes.Buffer{}

	sz, err := t.Gen(w, triegen.Compact(&sparseCompacter{}))
	if err != nil {
		log.Fatal(err)
	}

	gen.WriteUnicodeVersion(w)
	// TODO: write CLDR version after adding a mechanism to detect that the
	// tables on which the manually created locale-sensitive casing code is
	// based hasn't changed.

	fmt.Fprintf(w, "// xorData: %d bytes\n", len(xorData))
	fmt.Fprintf(w, "var xorData = %+q\n\n", string(xorData))

	fmt.Fprintf(w, "// exceptions: %d bytes\n", len(exceptionData))
	fmt.Fprintf(w, "var exceptions = %q\n\n", string(exceptionData))

	sz += len(exceptionData)
	fmt.Fprintf(w, "// Total table size %d bytes (%dKiB)\n", sz, sz/1024)

	gen.WriteGoFile("tables.go", "cases", w.Bytes())
}
Beispiel #2
0
func main() {
	t := triegen.NewTrie("width")

	// wide is the base
	parse("EastAsianWidth.txt", func(p *ucd.Parser) {
		if contains(p.String(1), "W", "F") {
			t.Insert(p.Rune(0), widthTwo)
		}
	})

	// zero overrides wide
	parse("extracted/DerivedGeneralCategory.txt", func(p *ucd.Parser) {
		cat := p.String(1)
		if cat == "Me" || cat == "Mn" {
			t.Insert(p.Rune(0), widthZero)
		}
	})

	// misc overrides
	for _, v := range overrides {
		for r := v.from; r <= v.to; r++ {
			t.Insert(r, encodeWidth(v.width))
		}
	}

	w := &bytes.Buffer{}
	gen.WriteUnicodeVersion(w)
	t.Gen(w)
	gen.WriteGoFile("tables.go", "runewidth", w.Bytes())
}
Beispiel #3
0
func printTestdata() {
	type lastInfo struct {
		ccc    uint8
		nLead  uint8
		nTrail uint8
		f      string
	}

	last := lastInfo{}
	w := &bytes.Buffer{}
	fmt.Fprintf(w, testHeader)
	for r, c := range chars {
		f := c.forms[FCanonical]
		qc, cf, d := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp)
		f = c.forms[FCompatibility]
		qck, cfk, dk := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp)
		s := ""
		if d == dk && qc == qck && cf == cfk {
			s = fmt.Sprintf("f(%s, %v, %q)", qc, cf, d)
		} else {
			s = fmt.Sprintf("g(%s, %s, %v, %v, %q, %q)", qc, qck, cf, cfk, d, dk)
		}
		current := lastInfo{c.ccc, c.nLeadingNonStarters, c.nTrailingNonStarters, s}
		if last != current {
			fmt.Fprintf(w, "\t{0x%x, %d, %d, %d, %s},\n", r, c.origCCC, c.nLeadingNonStarters, c.nTrailingNonStarters, s)
			last = current
		}
	}
	fmt.Fprintln(w, "}")
	gen.WriteGoFile("data_test.go", "norm", w.Bytes())
}
Beispiel #4
0
func main() {
	gen.Init()

	// Read the CLDR zip file.
	r := gen.OpenCLDRCoreZip()
	defer r.Close()

	d := &cldr.Decoder{}
	d.SetDirFilter("main", "supplemental")
	d.SetSectionFilter("localeDisplayNames")
	data, err := d.DecodeZip(r)
	if err != nil {
		log.Fatalf("DecodeZip: %v", err)
	}

	w := &bytes.Buffer{}
	gen.WriteCLDRVersion(w)

	b := builder{
		w:     w,
		data:  data,
		group: make(map[string]*group),
	}
	b.generate()
	gen.WriteGoFile(*outputFile, "display", w.Bytes())
}
func main() {
	gen.Init()

	rewriteCommon()

	w := &bytes.Buffer{}

	b := newBuilder(w)
	fmt.Fprintf(w, version, cldr.Version)

	b.parseIndices()
	b.writeType(fromTo{})
	b.writeLanguage()
	b.writeScript()
	b.writeRegion()
	b.writeVariant()
	// TODO: b.writeLocale()
	b.writeCurrencies()
	b.computeRegionGroups()
	b.writeLikelyData()
	b.writeMatchData()
	b.writeRegionInclusionData()
	b.writeParents()

	fmt.Fprintf(w, "\n// Size: %.1fK (%d bytes); Check: %X\n", float32(b.size)/1024, b.size, b.hash32.Sum32())
	gen.WriteGoFile("tables.go", "language", w.Bytes())
}
Beispiel #6
0
func main() {
	gen.Init()
	b := build.NewBuilder()
	parseUCA(b)
	if tables.contains("chars") {
		parseMain()
	}
	parseCollation(b)

	c, err := b.Build()
	failOnError(err)

	if *test {
		testCollator(collate.NewFromTable(c))
	} else {
		w := &bytes.Buffer{}

		gen.WriteUnicodeVersion(w)
		gen.WriteCLDRVersion(w)

		if tables.contains("collate") {
			_, err = b.Print(w)
			failOnError(err)
		}
		if tables.contains("chars") {
			printExemplarCharacters(w)
		}
		gen.WriteGoFile("tables.go", *pkg, w.Bytes())
	}
}
Beispiel #7
0
func main() {
	gen.Init()

	r := gen.Open("http://www.w3.org/TR", "w3", "encoding/indexes/encodings.json")
	var groups []group
	if err := json.NewDecoder(r).Decode(&groups); err != nil {
		log.Fatalf("Error reading encodings.json: %v", err)
	}

	w := &bytes.Buffer{}
	fmt.Fprintln(w, "type htmlEncoding byte")
	fmt.Fprintln(w, "const (")
	for i, g := range groups {
		for _, e := range g.Encodings {
			name := consts[e.Name]
			if name == "" {
				log.Fatalf("No const defined for %s.", e.Name)
			}
			if i == 0 {
				fmt.Fprintf(w, "%s htmlEncoding = iota\n", name)
			} else {
				fmt.Fprintf(w, "%s\n", name)
			}
		}
	}
	fmt.Fprintln(w, "numEncodings")
	fmt.Fprint(w, ")\n\n")

	fmt.Fprintln(w, "var canonical = [numEncodings]string{")
	for _, g := range groups {
		for _, e := range g.Encodings {
			fmt.Fprintf(w, "%q,\n", e.Name)
		}
	}
	fmt.Fprint(w, "}\n\n")

	fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{")
	for _, g := range groups {
		for _, e := range g.Encodings {
			for _, l := range e.Labels {
				fmt.Fprintf(w, "%q: %s,\n", l, consts[e.Name])
			}
		}
	}
	fmt.Fprint(w, "}\n\n")

	var tags []string
	fmt.Fprintln(w, "var localeMap = []htmlEncoding{")
	for _, loc := range locales {
		tags = append(tags, loc.tag)
		fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag)
	}
	fmt.Fprint(w, "}\n\n")

	fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " "))

	gen.WriteGoFile("tables.go", "htmlindex", w.Bytes())
}
Beispiel #8
0
func genTests() {
	w := &bytes.Buffer{}
	fmt.Fprintf(w, "\nvar mapRunes = map[rune]struct{r rune; e elem}{\n")
	getWidthData(func(r rune, tag elem, alt rune) {
		if alt != 0 {
			fmt.Fprintf(w, "\t0x%X: {0x%X, 0x%X},\n", r, alt, tag)
		}
	})
	fmt.Fprintln(w, "}")
	gen.WriteGoFile("runes_test.go", "width", w.Bytes())
}
func rewriteCommon() {
	// Generate common.go
	src, err := ioutil.ReadFile("gen_common.go")
	failOnError(err)
	const toDelete = "// +build ignore\n\npackage main\n\n"
	i := bytes.Index(src, []byte(toDelete))
	if i < 0 {
		log.Fatalf("could not find %q in gen_common.go", toDelete)
	}
	w := &bytes.Buffer{}
	w.Write(src[i+len(toDelete):])
	gen.WriteGoFile("common.go", "language", w.Bytes())
}
Beispiel #10
0
// repackage rewrites a file from belonging to package main to belonging to
// package width.
func repackage(inFile, outFile string) {
	src, err := ioutil.ReadFile(inFile)
	if err != nil {
		log.Fatalf("reading %s: %v", inFile, err)
	}
	const toDelete = "package main\n\n"
	i := bytes.Index(src, []byte(toDelete))
	if i < 0 {
		log.Fatalf("Could not find %q in gen_trieval.go", toDelete)
	}
	w := &bytes.Buffer{}
	w.Write(src[i+len(toDelete):])
	gen.WriteGoFile(outFile, "width", w.Bytes())
}
Beispiel #11
0
func genTrieval() {
	src, err := ioutil.ReadFile("gen_trieval.go")
	if err != nil {
		log.Fatalf("reading gen_trieval.go: %v", err)
	}
	const toDelete = "// +build ignore\n\npackage main\n\n"
	i := bytes.Index(src, []byte(toDelete))
	if i < 0 {
		log.Fatalf("could not find %q in gen_trieval.go", toDelete)
	}
	w := &bytes.Buffer{}
	w.Write(src[i+len(toDelete):])
	gen.WriteGoFile("trieval.go", "cases", w.Bytes())
}
Beispiel #12
0
func main() {
	gen.Init()

	versions := getVersions()

	w := &bytes.Buffer{}

	fmt.Fprintf(w, "//go:generate go run gen.go --versions=%s\n\n", strings.Join(versions, ","))
	fmt.Fprintf(w, "import \"unicode\"\n\n")

	vstr := func(s string) string { return strings.Replace(s, ".", "_", -1) }

	fmt.Fprintf(w, "var assigned = map[string]*unicode.RangeTable{\n")
	for _, v := range versions {
		fmt.Fprintf(w, "\t%q: assigned%s,\n", v, vstr(v))
	}
	fmt.Fprintf(w, "}\n\n")

	var size int
	for _, v := range versions {
		assigned := []rune{}

		r := gen.Open("http://www.unicode.org/Public/", "", v+"/ucd/UnicodeData.txt")
		ucd.Parse(r, func(p *ucd.Parser) {
			assigned = append(assigned, p.Rune(0))
		})

		rt := rangetable.New(assigned...)
		sz := int(reflect.TypeOf(unicode.RangeTable{}).Size())
		sz += int(reflect.TypeOf(unicode.Range16{}).Size()) * len(rt.R16)
		sz += int(reflect.TypeOf(unicode.Range32{}).Size()) * len(rt.R32)

		fmt.Fprintf(w, "// size %d bytes (%d KiB)\n", sz, sz/1024)
		fmt.Fprintf(w, "var assigned%s = ", vstr(v))
		print(w, rt)

		size += sz
	}

	fmt.Fprintf(w, "// Total size %d bytes (%d KiB)\n", size, size/1024)

	gen.WriteGoFile("tables.go", "rangetable", w.Bytes())
}
Beispiel #13
0
func genTables() {
	t := triegen.NewTrie("width")
	// fold and inverse mappings. See mapComment for a description of the format
	// of each entry. Add dummy value to make an index of 0 mean no mapping.
	inverse := [][4]byte{{}}
	mapping := map[[4]byte]int{[4]byte{}: 0}

	getWidthData(func(r rune, tag elem, alt rune) {
		idx := 0
		if alt != 0 {
			var buf [4]byte
			buf[0] = byte(utf8.EncodeRune(buf[1:], alt))
			s := string(r)
			buf[buf[0]] ^= s[len(s)-1]
			var ok bool
			if idx, ok = mapping[buf]; !ok {
				idx = len(mapping)
				if idx > math.MaxUint8 {
					log.Fatalf("Index %d does not fit in a byte.", idx)
				}
				mapping[buf] = idx
				inverse = append(inverse, buf)
			}
		}
		t.Insert(r, uint64(tag|elem(idx)))
	})

	w := &bytes.Buffer{}
	gen.WriteUnicodeVersion(w)

	sz, err := t.Gen(w)
	if err != nil {
		log.Fatal(err)
	}

	sz += writeMappings(w, inverse)

	fmt.Fprintf(w, "// Total table size %d bytes (%dKiB)\n", sz, sz/1024)

	gen.WriteGoFile(*outputFile, "width", w.Bytes())
}
Beispiel #14
0
func main() {
	flag.Parse()

	r := gen.OpenCLDRCoreZip()
	buffer, err := ioutil.ReadAll(r)
	if err != nil {
		log.Fatal("Could not read zip file")
	}
	r.Close()
	z, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
	if err != nil {
		log.Fatalf("Could not read zip archive: %v", err)
	}

	var buf bytes.Buffer

	version := gen.CLDRVersion()

	for _, dtd := range files {
		for _, f := range z.File {
			if strings.HasSuffix(f.Name, dtd.file+".dtd") {
				r, err := f.Open()
				failOnError(err)

				b := makeBuilder(&buf, dtd)
				b.parseDTD(r)
				b.resolve(b.index[dtd.top[0]])
				b.write()
				if b.version != "" && version != b.version {
					println(f.Name)
					log.Fatalf("main: inconsistent versions: found %s; want %s", b.version, version)
				}
				break
			}
		}
	}
	fmt.Fprintln(&buf, "// Version is the version of CLDR from which the XML definitions are generated.")
	fmt.Fprintf(&buf, "const Version = %q\n", version)

	gen.WriteGoFile(*outputFile, "cldr", buf.Bytes())
}
Beispiel #15
0
func genTablesTest() {
	w := &bytes.Buffer{}

	fmt.Fprintln(w, "var (")
	printProperties(w, "DerivedCoreProperties.txt", "Case_Ignorable", verifyIgnore)

	// We discard the output as we know we have perfect functions. We run them
	// just to verify the properties are correct.
	n := printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Cased", verifyCased)
	n += printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Lowercase", verifyLower)
	n += printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Uppercase", verifyUpper)
	if n > 0 {
		log.Fatalf("One of the discarded properties does not have a perfect filter.")
	}

	// <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)?
	fmt.Fprintln(w, "\tspecial = map[rune]struct{ toLower, toTitle, toUpper string }{")
	parse("SpecialCasing.txt", func(p *ucd.Parser) {
		// Skip conditional entries.
		if p.String(4) != "" {
			return
		}
		r := p.Rune(0)
		fmt.Fprintf(w, "\t\t0x%04x: {%q, %q, %q},\n",
			r, string(p.Runes(1)), string(p.Runes(2)), string(p.Runes(3)))
	})
	fmt.Fprint(w, "\t}\n\n")

	// Break property
	notBreak := map[rune]bool{}
	parse("auxiliary/WordBreakProperty.txt", func(p *ucd.Parser) {
		switch p.String(1) {
		case "Extend", "Format", "MidLetter", "MidNumLet", "Single_Quote",
			"ALetter", "Hebrew_Letter", "Numeric", "ExtendNumLet":
			notBreak[p.Rune(0)] = true
		}
	})

	fmt.Fprintln(w, "\tbreakProp = []struct{ lo, hi rune }{")
	inBreak := false
	for r := rune(0); r <= lastRuneForTesting; r++ {
		if isBreak := !notBreak[r]; isBreak != inBreak {
			if isBreak {
				fmt.Fprintf(w, "\t\t{0x%x, ", r)
			} else {
				fmt.Fprintf(w, "0x%x},\n", r-1)
			}
			inBreak = isBreak
		}
	}
	if inBreak {
		fmt.Fprintf(w, "0x%x},\n", lastRuneForTesting)
	}
	fmt.Fprint(w, "\t}\n\n")

	// Word break test
	// Filter out all samples that do not contain cased characters.
	cased := map[rune]bool{}
	parse("DerivedCoreProperties.txt", func(p *ucd.Parser) {
		if p.String(1) == "Cased" {
			cased[p.Rune(0)] = true
		}
	})

	fmt.Fprintln(w, "\tbreakTest = []string{")
	parse("auxiliary/WordBreakTest.txt", func(p *ucd.Parser) {
		c := strings.Split(p.String(0), " ")

		const sep = '|'
		numCased := 0
		test := ""
		for ; len(c) >= 2; c = c[2:] {
			if c[0] == "÷" && test != "" {
				test += string(sep)
			}
			i, err := strconv.ParseUint(c[1], 16, 32)
			r := rune(i)
			if err != nil {
				log.Fatalf("Invalid rune %q.", c[1])
			}
			if r == sep {
				log.Fatalf("Separator %q not allowed in test data. Pick another one.", sep)
			}
			if cased[r] {
				numCased++
			}
			test += string(r)
		}
		if numCased > 1 {
			fmt.Fprintf(w, "\t\t%q,\n", test)
		}
	})
	fmt.Fprintln(w, "\t}")

	fmt.Fprintln(w, ")")

	gen.WriteGoFile("tables_test.go", "cases", w.Bytes())
}
Beispiel #16
0
func makeTables() {
	w := &bytes.Buffer{}

	size := 0
	if *tablelist == "" {
		return
	}
	list := strings.Split(*tablelist, ",")
	if *tablelist == "all" {
		list = []string{"recomp", "info"}
	}

	// Compute maximum decomposition size.
	max := 0
	for _, c := range chars {
		if n := len(string(c.forms[FCompatibility].expandedDecomp)); n > max {
			max = n
		}
	}

	fmt.Fprintln(w, "const (")
	fmt.Fprintln(w, "\t// Version is the Unicode edition from which the tables are derived.")
	fmt.Fprintf(w, "\tVersion = %q\n", gen.UnicodeVersion())
	fmt.Fprintln(w)
	fmt.Fprintln(w, "\t// MaxTransformChunkSize indicates the maximum number of bytes that Transform")
	fmt.Fprintln(w, "\t// may need to write atomically for any Form. Making a destination buffer at")
	fmt.Fprintln(w, "\t// least this size ensures that Transform can always make progress and that")
	fmt.Fprintln(w, "\t// the user does not need to grow the buffer on an ErrShortDst.")
	fmt.Fprintf(w, "\tMaxTransformChunkSize = %d+maxNonStarters*4\n", len(string(0x034F))+max)
	fmt.Fprintln(w, ")\n")

	// Print the CCC remap table.
	size += len(cccMap)
	fmt.Fprintf(w, "var ccc = [%d]uint8{", len(cccMap))
	for i := 0; i < len(cccMap); i++ {
		if i%8 == 0 {
			fmt.Fprintln(w)
		}
		fmt.Fprintf(w, "%3d, ", cccMap[uint8(i)])
	}
	fmt.Fprintln(w, "\n}\n")

	if contains(list, "info") {
		size += printCharInfoTables(w)
	}

	if contains(list, "recomp") {
		// Note that we use 32 bit keys, instead of 64 bit.
		// This clips the bits of three entries, but we know
		// this won't cause a collision. The compiler will catch
		// any changes made to UnicodeData.txt that introduces
		// a collision.
		// Note that the recomposition map for NFC and NFKC
		// are identical.

		// Recomposition map
		nrentries := 0
		for _, c := range chars {
			f := c.forms[FCanonical]
			if !f.isOneWay && len(f.decomp) > 0 {
				nrentries++
			}
		}
		sz := nrentries * 8
		size += sz
		fmt.Fprintf(w, "// recompMap: %d bytes (entries only)\n", sz)
		fmt.Fprintln(w, "var recompMap = map[uint32]rune{")
		for i, c := range chars {
			f := c.forms[FCanonical]
			d := f.decomp
			if !f.isOneWay && len(d) > 0 {
				key := uint32(uint16(d[0]))<<16 + uint32(uint16(d[1]))
				fmt.Fprintf(w, "0x%.8X: 0x%.4X,\n", key, i)
			}
		}
		fmt.Fprintf(w, "}\n\n")
	}

	fmt.Fprintf(w, "// Total size of tables: %dKB (%d bytes)\n", (size+512)/1024, size)
	gen.WriteGoFile("tables.go", "norm", w.Bytes())
}
Beispiel #17
0
func main() {
	r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml")
	reg := &registry{}
	if err := xml.NewDecoder(r).Decode(&reg); err != nil && err != io.EOF {
		log.Fatalf("Error decoding charset registry: %v", err)
	}
	if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" {
		log.Fatalf("Unexpected ID %s", reg.Registry[0].ID)
	}

	w := &bytes.Buffer{}
	fmt.Fprintf(w, "const (\n")
	for _, rec := range reg.Registry[0].Record {
		constName := ""
		for _, a := range rec.Alias {
			if strings.HasPrefix(a, "cs") && strings.IndexByte(a, '-') == -1 {
				// Some of the constant definitions have comments in them. Strip those.
				constName = strings.Title(strings.SplitN(a[2:], "\n", 2)[0])
			}
		}
		if constName == "" {
			switch rec.MIB {
			case "2085":
				constName = "HZGB2312" // Not listed as alias for some reason.
			default:
				log.Fatalf("No cs alias defined for %s.", rec.MIB)
			}
		}
		if rec.MIME != "" {
			rec.MIME = fmt.Sprintf(" (MIME: %s)", rec.MIME)
		}
		fmt.Fprintf(w, "// %s is the MIB identifier with IANA name %s%s.\n//\n", constName, rec.Name, rec.MIME)
		if len(rec.Desc.Data) > 0 {
			fmt.Fprint(w, "// ")
			d := xml.NewDecoder(strings.NewReader(rec.Desc.Data))
			inElem := true
			attr := ""
			for {
				t, err := d.Token()
				if err != nil {
					if err != io.EOF {
						log.Fatal(err)
					}
					break
				}
				switch x := t.(type) {
				case xml.CharData:
					attr = "" // Don't need attribute info.
					a := bytes.Split([]byte(x), []byte("\n"))
					for i, b := range a {
						if b = bytes.TrimSpace(b); len(b) != 0 {
							if !inElem && i > 0 {
								fmt.Fprint(w, "\n// ")
							}
							inElem = false
							fmt.Fprintf(w, "%s ", string(b))
						}
					}
				case xml.StartElement:
					if x.Name.Local == "xref" {
						inElem = true
						use := false
						for _, a := range x.Attr {
							if a.Name.Local == "type" {
								use = use || a.Value != "person"
							}
							if a.Name.Local == "data" && use {
								attr = a.Value + " "
							}
						}
					}
				case xml.EndElement:
					inElem = false
					fmt.Fprint(w, attr)
				}
			}
			fmt.Fprint(w, "\n")
		}
		for _, x := range rec.Xref {
			switch x.Type {
			case "rfc":
				fmt.Fprintf(w, "// Reference: %s\n", strings.ToUpper(x.Data))
			case "uri":
				fmt.Fprintf(w, "// Reference: %s\n", x.Data)
			}
		}
		fmt.Fprintf(w, "%s MIB = %s\n", constName, rec.MIB)
		fmt.Fprintln(w)
	}
	fmt.Fprintln(w, ")")

	gen.WriteGoFile("mib.go", "identifier", w.Bytes())
}