func insertCollation(builder *build.Builder, locale string, c *Collation) { t := builder.Tailoring(locale) for _, r := range c.Rules.Any { switch r.XMLName.Local { case "reset": if r.Before == "" { failOnError(t.SetAnchor(r.Value)) } else { failOnError(t.SetAnchorBefore(r.Value)) } case "x": var context, extend string for _, r1 := range r.Any { switch r1.XMLName.Local { case "context": context = r1.Value case "extend": extend = r1.Value } } for _, r1 := range r.Any { if t := r1.XMLName.Local; t == "context" || t == "extend" { continue } insertTailoring(t, r1, context, extend) } default: insertTailoring(t, r, "", "") } } }
// parseUCA parses a Default Unicode Collation Element Table of the format // specified in http://www.unicode.org/reports/tr10/#File_Format. // It returns the variable top. func parseUCA(builder *build.Builder) { r := openReader(*ducet) defer r.Close() input := bufio.NewReader(r) colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`) for i := 1; true; i++ { l, prefix, err := input.ReadLine() if err == io.EOF { break } Error(err) line := string(l) if prefix { log.Fatalf("%d: buffer overflow", i) } if len(line) == 0 || line[0] == '#' { continue } if line[0] == '@' { if strings.HasPrefix(line[1:], "version ") { if v := strings.Split(line[1:], " ")[1]; v != unicode.Version { log.Fatalf("incompatible version %s; want %s", v, unicode.Version) } } } else { // parse entries part := strings.Split(line, " ; ") if len(part) != 2 { log.Fatalf("%d: production rule without ';': %v", i, line) } lhs := []rune{} for _, v := range strings.Split(part[0], " ") { if v != "" { lhs = append(lhs, rune(convHex(i, v))) } } vars := []int{} rhs := [][]int{} for i, m := range colelem.FindAllStringSubmatch(part[1], -1) { if m[1] == "*" { vars = append(vars, i) } elem := []int{} for _, h := range strings.Split(m[2], ".") { elem = append(elem, convHex(i, h)) } rhs = append(rhs, elem) } builder.Add(lhs, rhs, vars) } } }
// parseUCA parses a Default Unicode Collation Element Table of the format // specified in http://www.unicode.org/reports/tr10/#File_Format. // It returns the variable top. func parseUCA(builder *build.Builder) { var r io.ReadCloser var err error if strings.HasSuffix(*root, ".zip") { for _, f := range openArchive(root).File { if strings.HasSuffix(f.Name, "allkeys_CLDR.txt") { r, err = f.Open() } } if r == nil { err = fmt.Errorf("file allkeys_CLDR.txt not found in archive %q", *root) } } else { r, err = openReader(root) } failOnError(err) defer r.Close() scanner := bufio.NewScanner(r) colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`) for i := 1; scanner.Scan(); i++ { line := scanner.Text() if len(line) == 0 || line[0] == '#' { continue } if line[0] == '@' { // parse properties switch { case strings.HasPrefix(line[1:], "version "): a := strings.Split(line[1:], " ") if a[1] != unicode.Version { log.Fatalf("incompatible version %s; want %s", a[1], unicode.Version) } case strings.HasPrefix(line[1:], "backwards "): log.Fatalf("%d: unsupported option backwards", i) default: log.Printf("%d: unknown option %s", i, line[1:]) } } else { // parse entries part := strings.Split(line, " ; ") if len(part) != 2 { log.Fatalf("%d: production rule without ';': %v", i, line) } lhs := []rune{} for _, v := range strings.Split(part[0], " ") { if v == "" { continue } lhs = append(lhs, rune(convHex(i, v))) } var n int var vars []int rhs := [][]int{} for i, m := range colelem.FindAllStringSubmatch(part[1], -1) { n += len(m[0]) elem := []int{} for _, h := range strings.Split(m[2], ".") { elem = append(elem, convHex(i, h)) } if m[1] == "*" { vars = append(vars, i) } rhs = append(rhs, elem) } if len(part[1]) < n+3 || part[1][n+1] != '#' { log.Fatalf("%d: expected comment; found %s", i, part[1][n:]) } if *test { testInput.add(string(lhs)) } failOnError(builder.Add(lhs, rhs, vars)) } } if scanner.Err() != nil { log.Fatal(scanner.Err()) } }
// parseUCA parses a Default Unicode Collation Element Table of the format // specified in http://www.unicode.org/reports/tr10/#File_Format. // It returns the variable top. func parseUCA(builder *build.Builder) int { maxVar, minNonVar := 0, 1<<30 r, err := openReader(*ducet) failonerror(err) defer r.Close() input := bufio.NewReader(r) colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`) for i := 1; err == nil; i++ { l, prefix, e := input.ReadLine() err = e line := string(l) if prefix { log.Fatalf("%d: buffer overflow", i) } if err != nil && err != io.EOF { log.Fatalf("%d: %v", i, err) } if len(line) == 0 || line[0] == '#' { continue } if line[0] == '@' { // parse properties switch { case strings.HasPrefix(line[1:], "version "): a := strings.Split(line[1:], " ") if a[1] != unicode.Version { log.Fatalf("incompatible version %s; want %s", a[1], unicode.Version) } case strings.HasPrefix(line[1:], "backwards "): log.Fatalf("%d: unsupported option backwards", i) default: log.Printf("%d: unknown option %s", i, line[1:]) } } else { // parse entries part := strings.Split(line, " ; ") if len(part) != 2 { log.Fatalf("%d: production rule without ';': %v", i, line) } lhs := []rune{} for _, v := range strings.Split(part[0], " ") { if v == "" { continue } lhs = append(lhs, rune(convHex(i, v))) } var n int rhs := [][]int{} for _, m := range colelem.FindAllStringSubmatch(part[1], -1) { n += len(m[0]) elem := []int{} for _, h := range strings.Split(m[2], ".") { elem = append(elem, convHex(i, h)) } if p := elem[0]; m[1] == "*" { if p > maxVar { maxVar = p } } else if p > 0 && p < minNonVar { minNonVar = p } rhs = append(rhs, elem) } if len(part[1]) < n+3 || part[1][n+1] != '#' { log.Fatalf("%d: expected comment; found %s", i, part[1][n:]) } builder.Add(lhs, rhs) } } if maxVar >= minNonVar { log.Fatalf("found maxVar > minNonVar (%d > %d)", maxVar, minNonVar) } return maxVar }