// parseCollation parses XML files in the collation directory of the CLDR core.zip file. func parseCollation(b *build.Builder) { d := &cldr.Decoder{} d.SetDirFilter("collation") data := decodeCLDR(d) for _, loc := range data.Locales() { x, err := data.LDML(loc) failOnError(err) if skipLang(x.Identity.Language.Type) { continue } cs := x.Collations.Collation sl := cldr.MakeSlice(&cs) if !types.all { sl.SelectAnyOf("type", append(types.s, x.Collations.Default())...) } sl.SelectOnePerGroup("alt", altInclude()) for _, c := range cs { m := make(map[locale.Part]string) m[locale.TagPart] = loc if c.Type != x.Collations.Default() { m[locale.Extension('u')] = "co-" + c.Type } id, err := locale.Compose(m) failOnError(err) t := b.Tailoring(id) c.Process(processor{t}) } } }
// parseUCA parses a Default Unicode Collation Element Table of the format // specified in http://www.unicode.org/reports/tr10/#File_Format. // It returns the variable top. func parseUCA(builder *build.Builder) { r := openReader(*ducet) defer r.Close() input := bufio.NewReader(r) colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`) for i := 1; true; i++ { l, prefix, err := input.ReadLine() if err == io.EOF { break } Error(err) line := string(l) if prefix { log.Fatalf("%d: buffer overflow", i) } if len(line) == 0 || line[0] == '#' { continue } if line[0] == '@' { if strings.HasPrefix(line[1:], "version ") { if v := strings.Split(line[1:], " ")[1]; v != unicode.Version { log.Fatalf("incompatible version %s; want %s", v, unicode.Version) } } } else { // parse entries part := strings.Split(line, " ; ") if len(part) != 2 { log.Fatalf("%d: production rule without ';': %v", i, line) } lhs := []rune{} for _, v := range strings.Split(part[0], " ") { if v != "" { lhs = append(lhs, rune(convHex(i, v))) } } vars := []int{} rhs := [][]int{} for i, m := range colelem.FindAllStringSubmatch(part[1], -1) { if m[1] == "*" { vars = append(vars, i) } elem := []int{} for _, h := range strings.Split(m[2], ".") { elem = append(elem, convHex(i, h)) } rhs = append(rhs, elem) } builder.Add(lhs, rhs, vars) } } }
// parseUCA parses a Default Unicode Collation Element Table of the format // specified in http://www.unicode.org/reports/tr10/#File_Format. // It returns the variable top. func parseUCA(builder *build.Builder) { var r io.ReadCloser var err error if strings.HasSuffix(*root, ".zip") { for _, f := range openArchive(root).File { if strings.HasSuffix(f.Name, "allkeys_CLDR.txt") { r, err = f.Open() } } if r == nil { err = fmt.Errorf("file allkeys_CLDR.txt not found in archive %q", *root) } } else { r, err = openReader(root) } failOnError(err) defer r.Close() scanner := bufio.NewScanner(r) colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`) for i := 1; scanner.Scan(); i++ { line := scanner.Text() if len(line) == 0 || line[0] == '#' { continue } if line[0] == '@' { // parse properties switch { case strings.HasPrefix(line[1:], "version "): a := strings.Split(line[1:], " ") if a[1] != unicode.Version { log.Fatalf("incompatible version %s; want %s", a[1], unicode.Version) } case strings.HasPrefix(line[1:], "backwards "): log.Fatalf("%d: unsupported option backwards", i) default: log.Printf("%d: unknown option %s", i, line[1:]) } } else { // parse entries part := strings.Split(line, " ; ") if len(part) != 2 { log.Fatalf("%d: production rule without ';': %v", i, line) } lhs := []rune{} for _, v := range strings.Split(part[0], " ") { if v == "" { continue } lhs = append(lhs, rune(convHex(i, v))) } var n int var vars []int rhs := [][]int{} for i, m := range colelem.FindAllStringSubmatch(part[1], -1) { n += len(m[0]) elem := []int{} for _, h := range strings.Split(m[2], ".") { elem = append(elem, convHex(i, h)) } if m[1] == "*" { vars = append(vars, i) } rhs = append(rhs, elem) } if len(part[1]) < n+3 || part[1][n+1] != '#' { log.Fatalf("%d: expected comment; found %s", i, part[1][n:]) } if *test { testInput.add(string(lhs)) } failOnError(builder.Add(lhs, rhs, vars)) } } if scanner.Err() != nil { log.Fatal(scanner.Err()) } }