// parseUCA parses a Default Unicode Collation Element Table of the format // specified in http://www.unicode.org/reports/tr10/#File_Format. // It returns the variable top. func parseUCA(builder *build.Builder) { r := openReader(*ducet) defer r.Close() input := bufio.NewReader(r) colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`) for i := 1; true; i++ { l, prefix, err := input.ReadLine() if err == io.EOF { break } Error(err) line := string(l) if prefix { log.Fatalf("%d: buffer overflow", i) } if len(line) == 0 || line[0] == '#' { continue } if line[0] == '@' { if strings.HasPrefix(line[1:], "version ") { if v := strings.Split(line[1:], " ")[1]; v != unicode.Version { log.Fatalf("incompatible version %s; want %s", v, unicode.Version) } } } else { // parse entries part := strings.Split(line, " ; ") if len(part) != 2 { log.Fatalf("%d: production rule without ';': %v", i, line) } lhs := []rune{} for _, v := range strings.Split(part[0], " ") { if v != "" { lhs = append(lhs, rune(convHex(i, v))) } } vars := []int{} rhs := [][]int{} for i, m := range colelem.FindAllStringSubmatch(part[1], -1) { if m[1] == "*" { vars = append(vars, i) } elem := []int{} for _, h := range strings.Split(m[2], ".") { elem = append(elem, convHex(i, h)) } rhs = append(rhs, elem) } builder.Add(lhs, rhs, vars) } } }
// parseUCA parses a Default Unicode Collation Element Table of the format // specified in http://www.unicode.org/reports/tr10/#File_Format. // It returns the variable top. func parseUCA(builder *build.Builder) { var r io.ReadCloser var err error if strings.HasSuffix(*root, ".zip") { for _, f := range openArchive(root).File { if strings.HasSuffix(f.Name, "allkeys_CLDR.txt") { r, err = f.Open() } } if r == nil { err = fmt.Errorf("file allkeys_CLDR.txt not found in archive %q", *root) } } else { r, err = openReader(root) } failOnError(err) defer r.Close() input := bufio.NewReader(r) colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`) for i := 1; err == nil; i++ { l, prefix, e := input.ReadLine() err = e line := string(l) if prefix { log.Fatalf("%d: buffer overflow", i) } if err != nil && err != io.EOF { log.Fatalf("%d: %v", i, err) } if len(line) == 0 || line[0] == '#' { continue } if line[0] == '@' { // parse properties switch { case strings.HasPrefix(line[1:], "version "): a := strings.Split(line[1:], " ") if a[1] != unicode.Version { log.Fatalf("incompatible version %s; want %s", a[1], unicode.Version) } case strings.HasPrefix(line[1:], "backwards "): log.Fatalf("%d: unsupported option backwards", i) default: log.Printf("%d: unknown option %s", i, line[1:]) } } else { // parse entries part := strings.Split(line, " ; ") if len(part) != 2 { log.Fatalf("%d: production rule without ';': %v", i, line) } lhs := []rune{} for _, v := range strings.Split(part[0], " ") { if v == "" { continue } lhs = append(lhs, rune(convHex(i, v))) } var n int var vars []int rhs := [][]int{} for i, m := range colelem.FindAllStringSubmatch(part[1], -1) { n += len(m[0]) elem := []int{} for _, h := range strings.Split(m[2], ".") { elem = append(elem, convHex(i, h)) } if m[1] == "*" { vars = append(vars, i) } rhs = append(rhs, elem) } if len(part[1]) < n+3 || part[1][n+1] != '#' { log.Fatalf("%d: expected comment; found %s", i, part[1][n:]) } if *test { testInput.add(string(lhs)) } failOnError(builder.Add(lhs, rhs, vars)) } } }