Beispiel #1
0
func loadTestData() []Test {
	f := gen.OpenUnicodeFile("UCA", "", "CollationTest.zip")
	buffer, err := ioutil.ReadAll(f)
	f.Close()
	Error(err)
	archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
	Error(err)
	tests := []Test{}
	for _, f := range archive.File {
		// Skip the short versions, which are simply duplicates of the long versions.
		if strings.Contains(f.Name, "SHORT") || f.FileInfo().IsDir() {
			continue
		}
		ff, err := f.Open()
		Error(err)
		defer ff.Close()
		scanner := bufio.NewScanner(ff)
		test := Test{name: path.Base(f.Name)}
		for scanner.Scan() {
			line := scanner.Text()
			if len(line) <= 1 || line[0] == '#' {
				if m := versionRe.FindStringSubmatch(line); m != nil {
					if m[1] != gen.UnicodeVersion() {
						log.Printf("warning:%s: version is %s; want %s", f.Name, m[1], gen.UnicodeVersion())
					}
				}
				continue
			}
			m := testRe.FindStringSubmatch(line)
			if m == nil || len(m) < 3 {
				log.Fatalf(`Failed to parse: "%s" result: %#v`, line, m)
			}
			str := []byte{}
			// In the regression test data (unpaired) surrogates are assigned a weight
			// corresponding to their code point value.  However, utf8.DecodeRune,
			// which is used to compute the implicit weight, assigns FFFD to surrogates.
			// We therefore skip tests with surrogates.  This skips about 35 entries
			// per test.
			valid := true
			for _, split := range strings.Split(m[1], " ") {
				r, err := strconv.ParseUint(split, 16, 64)
				Error(err)
				valid = valid && utf8.ValidRune(rune(r))
				str = append(str, string(rune(r))...)
			}
			if valid {
				test.str = append(test.str, str)
				test.comment = append(test.comment, m[2])
			}
		}
		if scanner.Err() != nil {
			log.Fatal(scanner.Err())
		}
		tests = append(tests, test)
	}
	return tests
}
Beispiel #2
0
// parseUCA parses a Default Unicode Collation Element Table of the format
// specified in http://www.unicode.org/reports/tr10/#File_Format.
// It returns the variable top.
func parseUCA(builder *build.Builder) {
	r := gen.OpenUnicodeFile("UCA", "", "allkeys.txt")
	defer r.Close()
	input := bufio.NewReader(r)
	colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`)
	for i := 1; true; i++ {
		l, prefix, err := input.ReadLine()
		if err == io.EOF {
			break
		}
		Error(err)
		line := string(l)
		if prefix {
			log.Fatalf("%d: buffer overflow", i)
		}
		if len(line) == 0 || line[0] == '#' {
			continue
		}
		if line[0] == '@' {
			if strings.HasPrefix(line[1:], "version ") {
				if v := strings.Split(line[1:], " ")[1]; v != gen.UnicodeVersion() {
					log.Fatalf("incompatible version %s; want %s", v, gen.UnicodeVersion())
				}
			}
		} else {
			// parse entries
			part := strings.Split(line, " ; ")
			if len(part) != 2 {
				log.Fatalf("%d: production rule without ';': %v", i, line)
			}
			lhs := []rune{}
			for _, v := range strings.Split(part[0], " ") {
				if v != "" {
					lhs = append(lhs, rune(convHex(i, v)))
				}
			}
			vars := []int{}
			rhs := [][]int{}
			for i, m := range colelem.FindAllStringSubmatch(part[1], -1) {
				if m[1] == "*" {
					vars = append(vars, i)
				}
				elem := []int{}
				for _, h := range strings.Split(m[2], ".") {
					elem = append(elem, convHex(i, h))
				}
				rhs = append(rhs, elem)
			}
			builder.Add(lhs, rhs, vars)
		}
	}
}
Beispiel #3
0
func main() {
	gen.Init()
	args = flag.Args()
	if !*verbose {
		// Set vprintf to a no-op.
		vprintf = func(string, ...interface{}) (int, error) { return 0, nil }
	}

	// TODO: create temporary cache directory to load files and create and set
	// a "cache" option if the user did not specify the UNICODE_DIR environment
	// variable. This will prevent duplicate downloads and also will enable long
	// tests, which really need to be run after each generated package.

	if gen.UnicodeVersion() != unicode.Version {
		fmt.Printf("Requested Unicode version %s; core unicode version is %s.\n",
			gen.UnicodeVersion,
			unicode.Version)
		// TODO: use collate to compare. Simple comparison will work, though,
		// until Unicode reaches version 10. To avoid circular dependencies, we
		// could use the NumericWeighter without using package collate using a
		// trivial Weighter implementation.
		if gen.UnicodeVersion() < unicode.Version && !*force {
			os.Exit(2)
		}
	}
	var (
		cldr       = generate("unicode/cldr")
		language   = generate("language", cldr)
		internal   = generate("internal", language)
		norm       = generate("unicode/norm")
		rangetable = generate("unicode/rangetable")
		cases      = generate("cases", norm, language, rangetable)
		width      = generate("width")
		bidi       = generate("unicode/bidi", norm, rangetable)
		_          = generate("secure/precis", norm, rangetable, cases, width, bidi)
		_          = generate("encoding/htmlindex", language)
		_          = generate("currency", cldr, language, internal)
		_          = generate("internal/number", cldr, language, internal)
		_          = generate("language/display", cldr, language)
		_          = generate("collate", norm, cldr, language, rangetable)
		_          = generate("search", norm, cldr, language, rangetable)
	)
	all.Wait()

	if hasErrors {
		fmt.Println("FAIL")
		os.Exit(1)
	}
	vprintf("SUCCESS\n")
}
Beispiel #4
0
func getVersions() []string {
	if *versionList == "" {
		log.Fatal(bootstrapMessage)
	}

	versions := strings.Split(*versionList, ",")
	sort.Strings(versions)

	// Ensure that at least the current version is included.
	for _, v := range versions {
		if v == gen.UnicodeVersion() {
			return versions
		}
	}

	versions = append(versions, gen.UnicodeVersion())
	sort.Strings(versions)
	return versions
}
Beispiel #5
0
func main() {
	gen.Init()
	args = flag.Args()
	if !*verbose {
		// Set vprintf to a no-op.
		vprintf = func(string, ...interface{}) (int, error) { return 0, nil }
	}

	if gen.UnicodeVersion() != unicode.Version {
		fmt.Printf("Requested Unicode version %s; core unicode version is %s.\n",
			gen.UnicodeVersion,
			unicode.Version)
		// TODO: use collate to compare. Simple comparison will work, though,
		// until Unicode reaches version 10. To avoid circular dependencies, we
		// could use the NumericWeighter without using package collate using a
		// trivial Weighter implementation.
		if gen.UnicodeVersion() < unicode.Version && !*force {
			os.Exit(2)
		}
	}
	var (
		cldr     = generate("cldr")
		language = generate("language", cldr)
		internal = generate("internal", language)
		norm     = generate("unicode/norm")
		_        = generate("unicode/rangetable")
		_        = generate("width")
		_        = generate("currency", cldr, language, internal)
		_        = generate("display", cldr, language)
		_        = generate("cases", norm)
		_        = generate("collate", norm, cldr, language)
		_        = generate("search", norm, cldr, language)
	)
	all.Wait()

	if hasErrors {
		fmt.Println("FAIL")
		os.Exit(1)
	}
	vprintf("SUCCESS\n")
}
Beispiel #6
0
// parseUCA parses a Default Unicode Collation Element Table of the format
// specified in http://www.unicode.org/reports/tr10/#File_Format.
// It returns the variable top.
func parseUCA(builder *build.Builder) {
	var r io.ReadCloser
	var err error
	for _, f := range openArchive().File {
		if strings.HasSuffix(f.Name, "allkeys_CLDR.txt") {
			r, err = f.Open()
		}
	}
	if r == nil {
		log.Fatal("File allkeys_CLDR.txt not found in archive.")
	}
	failOnError(err)
	defer r.Close()
	scanner := bufio.NewScanner(r)
	colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`)
	for i := 1; scanner.Scan(); i++ {
		line := scanner.Text()
		if len(line) == 0 || line[0] == '#' {
			continue
		}
		if line[0] == '@' {
			// parse properties
			switch {
			case strings.HasPrefix(line[1:], "version "):
				a := strings.Split(line[1:], " ")
				if a[1] != gen.UnicodeVersion() {
					log.Fatalf("incompatible version %s; want %s", a[1], gen.UnicodeVersion())
				}
			case strings.HasPrefix(line[1:], "backwards "):
				log.Fatalf("%d: unsupported option backwards", i)
			default:
				log.Printf("%d: unknown option %s", i, line[1:])
			}
		} else {
			// parse entries
			part := strings.Split(line, " ; ")
			if len(part) != 2 {
				log.Fatalf("%d: production rule without ';': %v", i, line)
			}
			lhs := []rune{}
			for _, v := range strings.Split(part[0], " ") {
				if v == "" {
					continue
				}
				lhs = append(lhs, rune(convHex(i, v)))
			}
			var n int
			var vars []int
			rhs := [][]int{}
			for i, m := range colelem.FindAllStringSubmatch(part[1], -1) {
				n += len(m[0])
				elem := []int{}
				for _, h := range strings.Split(m[2], ".") {
					elem = append(elem, convHex(i, h))
				}
				if m[1] == "*" {
					vars = append(vars, i)
				}
				rhs = append(rhs, elem)
			}
			if len(part[1]) < n+3 || part[1][n+1] != '#' {
				log.Fatalf("%d: expected comment; found %s", i, part[1][n:])
			}
			if *test {
				testInput.add(string(lhs))
			}
			failOnError(builder.Add(lhs, rhs, vars))
		}
	}
	if scanner.Err() != nil {
		log.Fatal(scanner.Err())
	}
}
Beispiel #7
0
func makeTables() {
	w := &bytes.Buffer{}

	size := 0
	if *tablelist == "" {
		return
	}
	list := strings.Split(*tablelist, ",")
	if *tablelist == "all" {
		list = []string{"recomp", "info"}
	}

	// Compute maximum decomposition size.
	max := 0
	for _, c := range chars {
		if n := len(string(c.forms[FCompatibility].expandedDecomp)); n > max {
			max = n
		}
	}

	fmt.Fprintln(w, "const (")
	fmt.Fprintln(w, "\t// Version is the Unicode edition from which the tables are derived.")
	fmt.Fprintf(w, "\tVersion = %q\n", gen.UnicodeVersion())
	fmt.Fprintln(w)
	fmt.Fprintln(w, "\t// MaxTransformChunkSize indicates the maximum number of bytes that Transform")
	fmt.Fprintln(w, "\t// may need to write atomically for any Form. Making a destination buffer at")
	fmt.Fprintln(w, "\t// least this size ensures that Transform can always make progress and that")
	fmt.Fprintln(w, "\t// the user does not need to grow the buffer on an ErrShortDst.")
	fmt.Fprintf(w, "\tMaxTransformChunkSize = %d+maxNonStarters*4\n", len(string(0x034F))+max)
	fmt.Fprintln(w, ")\n")

	// Print the CCC remap table.
	size += len(cccMap)
	fmt.Fprintf(w, "var ccc = [%d]uint8{", len(cccMap))
	for i := 0; i < len(cccMap); i++ {
		if i%8 == 0 {
			fmt.Fprintln(w)
		}
		fmt.Fprintf(w, "%3d, ", cccMap[uint8(i)])
	}
	fmt.Fprintln(w, "\n}\n")

	if contains(list, "info") {
		size += printCharInfoTables(w)
	}

	if contains(list, "recomp") {
		// Note that we use 32 bit keys, instead of 64 bit.
		// This clips the bits of three entries, but we know
		// this won't cause a collision. The compiler will catch
		// any changes made to UnicodeData.txt that introduces
		// a collision.
		// Note that the recomposition map for NFC and NFKC
		// are identical.

		// Recomposition map
		nrentries := 0
		for _, c := range chars {
			f := c.forms[FCanonical]
			if !f.isOneWay && len(f.decomp) > 0 {
				nrentries++
			}
		}
		sz := nrentries * 8
		size += sz
		fmt.Fprintf(w, "// recompMap: %d bytes (entries only)\n", sz)
		fmt.Fprintln(w, "var recompMap = map[uint32]rune{")
		for i, c := range chars {
			f := c.forms[FCanonical]
			d := f.decomp
			if !f.isOneWay && len(d) > 0 {
				key := uint32(uint16(d[0]))<<16 + uint32(uint16(d[1]))
				fmt.Fprintf(w, "0x%.8X: 0x%.4X,\n", key, i)
			}
		}
		fmt.Fprintf(w, "}\n\n")
	}

	fmt.Fprintf(w, "// Total size of tables: %dKB (%d bytes)\n", (size+512)/1024, size)
	gen.WriteGoFile("tables.go", "norm", w.Bytes())
}
Beispiel #8
0
func main() {
	gen.Init()
	args = flag.Args()
	if !*verbose {
		// Set vprintf to a no-op.
		vprintf = func(string, ...interface{}) (int, error) { return 0, nil }
	}

	// TODO: create temporary cache directory to load files and create and set
	// a "cache" option if the user did not specify the UNICODE_DIR environment
	// variable. This will prevent duplicate downloads and also will enable long
	// tests, which really need to be run after each generated package.

	updateCore := *doCore
	if gen.UnicodeVersion() != unicode.Version {
		fmt.Printf("Requested Unicode version %s; core unicode version is %s.\n",
			gen.UnicodeVersion(),
			unicode.Version)
		// TODO: use collate to compare. Simple comparison will work, though,
		// until Unicode reaches version 10. To avoid circular dependencies, we
		// could use the NumericWeighter without using package collate using a
		// trivial Weighter implementation.
		if gen.UnicodeVersion() < unicode.Version && !*force {
			os.Exit(2)
		}
		updateCore = true
	}

	var unicode = &dependency{}
	if updateCore {
		fmt.Printf("Updating core to version %s...\n", gen.UnicodeVersion())
		unicode = generate("unicode")

		// Test some users of the unicode packages, especially the ones that
		// keep a mirrored table. These may need to be corrected by hand.
		generate("regexp", unicode)
		generate("strconv", unicode) // mimics Unicode table
		generate("strings", unicode)
		generate("testing", unicode) // mimics Unicode table
	}

	var (
		cldr       = generate("./unicode/cldr", unicode)
		language   = generate("./language", cldr)
		internal   = generate("./internal", unicode, language)
		norm       = generate("./unicode/norm", unicode)
		rangetable = generate("./unicode/rangetable", unicode)
		cases      = generate("./cases", unicode, norm, language, rangetable)
		width      = generate("./width", unicode)
		bidi       = generate("./unicode/bidi", unicode, norm, rangetable)
		_          = generate("./secure/precis", unicode, norm, rangetable, cases, width, bidi)
		_          = generate("./encoding/htmlindex", unicode, language)
		_          = generate("./currency", unicode, cldr, language, internal)
		_          = generate("./internal/number", unicode, cldr, language, internal)
		_          = generate("./language/display", unicode, cldr, language, internal)
		_          = generate("./collate", unicode, norm, cldr, language, rangetable)
		_          = generate("./search", unicode, norm, cldr, language, rangetable)
	)

	if updateCore {
		copyVendored()
		generate("vendor/golang_org/x/net/idna", unicode, norm, width, cases)
	}
	all.Wait()

	if hasErrors {
		fmt.Println("FAIL")
		os.Exit(1)
	}
	vprintf("SUCCESS\n")
}