Beispiel #1
0
// parseCollation parses XML files in the collation directory of the CLDR core.zip file.
func parseCollation(b *build.Builder) {
	d := &cldr.Decoder{}
	d.SetDirFilter("collation")
	data := decodeCLDR(d)
	for _, loc := range data.Locales() {
		x, err := data.LDML(loc)
		failOnError(err)
		if skipLang(x.Identity.Language.Type) {
			continue
		}
		cs := x.Collations.Collation
		sl := cldr.MakeSlice(&cs)
		if !types.all {
			sl.SelectAnyOf("type", append(types.s, x.Collations.Default())...)
		}
		sl.SelectOnePerGroup("alt", altInclude())

		for _, c := range cs {
			m := make(map[language.Part]string)
			m[language.TagPart] = loc
			if c.Type != x.Collations.Default() {
				m[language.Extension('u')] = "co-" + c.Type
			}
			id, err := language.Compose(m)
			failOnError(err)
			t := b.Tailoring(id)
			c.Process(processor{t})
		}
	}
}
Beispiel #2
0
// parseUCA parses a Default Unicode Collation Element Table of the format
// specified in http://www.unicode.org/reports/tr10/#File_Format.
// It returns the variable top.
func parseUCA(builder *build.Builder) {
	r := gen.OpenUnicodeFile("UCA", "", "allkeys.txt")
	defer r.Close()
	input := bufio.NewReader(r)
	colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`)
	for i := 1; true; i++ {
		l, prefix, err := input.ReadLine()
		if err == io.EOF {
			break
		}
		Error(err)
		line := string(l)
		if prefix {
			log.Fatalf("%d: buffer overflow", i)
		}
		if len(line) == 0 || line[0] == '#' {
			continue
		}
		if line[0] == '@' {
			if strings.HasPrefix(line[1:], "version ") {
				if v := strings.Split(line[1:], " ")[1]; v != gen.UnicodeVersion() {
					log.Fatalf("incompatible version %s; want %s", v, gen.UnicodeVersion())
				}
			}
		} else {
			// parse entries
			part := strings.Split(line, " ; ")
			if len(part) != 2 {
				log.Fatalf("%d: production rule without ';': %v", i, line)
			}
			lhs := []rune{}
			for _, v := range strings.Split(part[0], " ") {
				if v != "" {
					lhs = append(lhs, rune(convHex(i, v)))
				}
			}
			vars := []int{}
			rhs := [][]int{}
			for i, m := range colelem.FindAllStringSubmatch(part[1], -1) {
				if m[1] == "*" {
					vars = append(vars, i)
				}
				elem := []int{}
				for _, h := range strings.Split(m[2], ".") {
					elem = append(elem, convHex(i, h))
				}
				rhs = append(rhs, elem)
			}
			builder.Add(lhs, rhs, vars)
		}
	}
}
Beispiel #3
0
// parseCollation parses XML files in the collation directory of the CLDR core.zip file.
func parseCollation(b *build.Builder) {
	d := &cldr.Decoder{}
	d.SetDirFilter("collation")
	data := decodeCLDR(d)
	for _, loc := range data.Locales() {
		x, err := data.LDML(loc)
		failOnError(err)
		if skipLang(x.Identity.Language.Type) {
			continue
		}
		cs := x.Collations.Collation
		sl := cldr.MakeSlice(&cs)
		if len(types.s) == 0 {
			sl.SelectAnyOf("type", x.Collations.Default())
		} else if !types.all {
			sl.SelectAnyOf("type", types.s...)
		}
		sl.SelectOnePerGroup("alt", altInclude())

		for _, c := range cs {
			id, err := language.Parse(loc)
			if err != nil {
				fmt.Fprintf(os.Stderr, "invalid locale: %q", err)
				continue
			}
			// Support both old- and new-style defaults.
			d := c.Type
			if x.Collations.DefaultCollation == nil {
				d = x.Collations.Default()
			} else {
				d = x.Collations.DefaultCollation.Data()
			}
			// We assume tables are being built either for search or collation,
			// but not both. For search the default is always "search".
			if d != c.Type && c.Type != "search" {
				typ := c.Type
				if len(c.Type) > 8 {
					typ = typeMap[c.Type]
				}
				id, err = id.SetTypeForKey("co", typ)
				failOnError(err)
			}
			t := b.Tailoring(id)
			c.Process(processor{t})
		}
	}
}
Beispiel #4
0
// parseUCA parses a Default Unicode Collation Element Table of the format
// specified in http://www.unicode.org/reports/tr10/#File_Format.
// It returns the variable top.
func parseUCA(builder *build.Builder) {
	var r io.ReadCloser
	var err error
	if strings.HasSuffix(*root, ".zip") {
		for _, f := range openArchive(root).File {
			if strings.HasSuffix(f.Name, "allkeys_CLDR.txt") {
				r, err = f.Open()
			}
		}
		if r == nil {
			err = fmt.Errorf("file allkeys_CLDR.txt not found in archive %q", *root)
		}
	} else {
		r, err = openReader(root)
	}
	failOnError(err)
	defer r.Close()
	scanner := bufio.NewScanner(r)
	colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`)
	for i := 1; scanner.Scan(); i++ {
		line := scanner.Text()
		if len(line) == 0 || line[0] == '#' {
			continue
		}
		if line[0] == '@' {
			// parse properties
			switch {
			case strings.HasPrefix(line[1:], "version "):
				a := strings.Split(line[1:], " ")
				if a[1] != unicode.Version {
					log.Fatalf("incompatible version %s; want %s", a[1], unicode.Version)
				}
			case strings.HasPrefix(line[1:], "backwards "):
				log.Fatalf("%d: unsupported option backwards", i)
			default:
				log.Printf("%d: unknown option %s", i, line[1:])
			}
		} else {
			// parse entries
			part := strings.Split(line, " ; ")
			if len(part) != 2 {
				log.Fatalf("%d: production rule without ';': %v", i, line)
			}
			lhs := []rune{}
			for _, v := range strings.Split(part[0], " ") {
				if v == "" {
					continue
				}
				lhs = append(lhs, rune(convHex(i, v)))
			}
			var n int
			var vars []int
			rhs := [][]int{}
			for i, m := range colelem.FindAllStringSubmatch(part[1], -1) {
				n += len(m[0])
				elem := []int{}
				for _, h := range strings.Split(m[2], ".") {
					elem = append(elem, convHex(i, h))
				}
				if m[1] == "*" {
					vars = append(vars, i)
				}
				rhs = append(rhs, elem)
			}
			if len(part[1]) < n+3 || part[1][n+1] != '#' {
				log.Fatalf("%d: expected comment; found %s", i, part[1][n:])
			}
			if *test {
				testInput.add(string(lhs))
			}
			failOnError(builder.Add(lhs, rhs, vars))
		}
	}
	if scanner.Err() != nil {
		log.Fatal(scanner.Err())
	}
}