func TestTables(t *testing.T) { if !*long { return } gen.Init() // Read the CLDR zip file. r := gen.OpenCLDRCoreZip() defer r.Close() d := &cldr.Decoder{} d.SetDirFilter("supplemental", "main") d.SetSectionFilter("numbers") data, err := d.DecodeZip(r) if err != nil { t.Fatalf("DecodeZip: %v", err) } dr, err := cldr.ParseDraft(*draft) if err != nil { t.Fatalf("filter: %v", err) } for _, lang := range data.Locales() { p := message.NewPrinter(language.MustParse(lang)) ldml := data.RawLDML(lang) if ldml.Numbers == nil || ldml.Numbers.Currencies == nil { continue } for _, c := range ldml.Numbers.Currencies.Currency { syms := cldr.MakeSlice(&c.Symbol) syms.SelectDraft(dr) for _, sym := range c.Symbol { cur, err := ParseISO(c.Type) if err != nil { continue } formatter := Symbol switch sym.Alt { case "": case "narrow": formatter = NarrowSymbol default: continue } want := sym.Data() if got := p.Sprint(formatter(cur)); got != want { t.Errorf("%s:%sSymbol(%s) = %s; want %s", lang, strings.Title(sym.Alt), c.Type, got, want) } } } } }
func ExampleSlice() { var dr *cldr.CLDR // assume this is initalized x, _ := dr.LDML("en") cs := x.Collations.Collation // remove all but the default cldr.MakeSlice(&cs).Filter(func(e cldr.Elem) bool { return e.GetCommon().Type != x.Collations.Default() }) for i, c := range cs { fmt.Println(i, c.Type) } }
func (b *builder) filter() { filter := func(s *cldr.Slice) { if *short { s.SelectOnePerGroup("alt", []string{"short", ""}) } else { s.SelectOnePerGroup("alt", []string{"stand-alone", ""}) } d, err := cldr.ParseDraft(*draft) if err != nil { log.Fatalf("filter: %v", err) } s.SelectDraft(d) } for _, loc := range b.data.Locales() { if ldn := b.data.RawLDML(loc).LocaleDisplayNames; ldn != nil { if ldn.Languages != nil { s := cldr.MakeSlice(&ldn.Languages.Language) if filter(&s); len(ldn.Languages.Language) == 0 { ldn.Languages = nil } } if ldn.Scripts != nil { s := cldr.MakeSlice(&ldn.Scripts.Script) if filter(&s); len(ldn.Scripts.Script) == 0 { ldn.Scripts = nil } } if ldn.Territories != nil { s := cldr.MakeSlice(&ldn.Territories.Territory) if filter(&s); len(ldn.Territories.Territory) == 0 { ldn.Territories = nil } } } } }
// parseCollation parses XML files in the collation directory of the CLDR core.zip file. func parseCollation(b *build.Builder) { d := &cldr.Decoder{} d.SetDirFilter("collation") data := decodeCLDR(d) for _, loc := range data.Locales() { x, err := data.LDML(loc) failOnError(err) if skipLang(x.Identity.Language.Type) { continue } cs := x.Collations.Collation sl := cldr.MakeSlice(&cs) if len(types.s) == 0 { sl.SelectAnyOf("type", x.Collations.Default()) } else if !types.all { sl.SelectAnyOf("type", types.s...) } sl.SelectOnePerGroup("alt", altInclude()) for _, c := range cs { id, err := language.Parse(loc) if err != nil { fmt.Fprintf(os.Stderr, "invalid locale: %q", err) continue } // Support both old- and new-style defaults. d := c.Type if x.Collations.DefaultCollation == nil { d = x.Collations.Default() } else { d = x.Collations.DefaultCollation.Data() } // We assume tables are being built either for search or collation, // but not both. For search the default is always "search". if d != c.Type && c.Type != "search" { typ := c.Type if len(c.Type) > 8 { typ = typeMap[c.Type] } id, err = id.SetTypeForKey("co", typ) failOnError(err) } t := b.Tailoring(id) c.Process(processor{t}) } } }
// genSymbols generates the symbols used for currencies. Most symbols are // defined in root and there is only very small variation per language. // The following rules apply: // - A symbol can be requested as normal or narrow. // - If a symbol is not defined for a currency, it defaults to its ISO code. func (b *builder) genSymbols(w *gen.CodeWriter, data *cldr.CLDR) { d, err := cldr.ParseDraft(*draft) if err != nil { log.Fatalf("filter: %v", err) } const ( normal = iota narrow numTypes ) // language -> currency -> type -> symbol var symbols [language.NumCompactTags][][numTypes]*string // Collect symbol information per language. for _, lang := range data.Locales() { ldml := data.RawLDML(lang) if ldml.Numbers == nil || ldml.Numbers.Currencies == nil { continue } langIndex, ok := language.CompactIndex(language.MustParse(lang)) if !ok { log.Fatalf("No compact index for language %s", lang) } symbols[langIndex] = make([][numTypes]*string, b.numCurrencies+1) for _, c := range ldml.Numbers.Currencies.Currency { syms := cldr.MakeSlice(&c.Symbol) syms.SelectDraft(d) for _, sym := range c.Symbol { v := sym.Data() if v == c.Type { // We define "" to mean the ISO symbol. v = "" } cur := b.currencies.Index([]byte(c.Type)) // XXX gets reassigned to 0 in the package's code. if c.Type == "XXX" { cur = 0 } if cur == -1 { fmt.Println("Unsupported:", c.Type) continue } switch sym.Alt { case "": symbols[langIndex][cur][normal] = &v case "narrow": symbols[langIndex][cur][narrow] = &v } } } } // Remove values identical to the parent. for langIndex, data := range symbols { for curIndex, curs := range data { for typ, sym := range curs { if sym == nil { continue } for p := uint16(langIndex); p != 0; { p = internal.Parent[p] x := symbols[p] if x == nil { continue } if v := x[curIndex][typ]; v != nil || p == 0 { // Value is equal to the default value root value is undefined. parentSym := "" if v != nil { parentSym = *v } if parentSym == *sym { // Value is the same as parent. data[curIndex][typ] = nil } break } } } } } // Create symbol index. symbolData := []byte{0} symbolLookup := map[string]uint16{"": 0} // 0 means default, so block that value. for _, data := range symbols { for _, curs := range data { for _, sym := range curs { if sym == nil { continue } if _, ok := symbolLookup[*sym]; !ok { symbolLookup[*sym] = uint16(len(symbolData)) symbolData = append(symbolData, byte(len(*sym))) symbolData = append(symbolData, *sym...) } } } } w.WriteComment(` symbols holds symbol data of the form <n> <str>, where n is the length of the symbol string str.`) w.WriteConst("symbols", string(symbolData)) // Create index from language to currency lookup to symbol. type curToIndex struct{ cur, idx uint16 } w.WriteType(curToIndex{}) prefix := []string{"normal", "narrow"} // Create data for regular and narrow symbol data. for typ := normal; typ <= narrow; typ++ { indexes := []curToIndex{} // maps currency to symbol index languages := []uint16{} for _, data := range symbols { languages = append(languages, uint16(len(indexes))) for curIndex, curs := range data { if sym := curs[typ]; sym != nil { indexes = append(indexes, curToIndex{uint16(curIndex), symbolLookup[*sym]}) } } } languages = append(languages, uint16(len(indexes))) w.WriteVar(prefix[typ]+"LangIndex", languages) w.WriteVar(prefix[typ]+"SymIndex", indexes) } }
// writeMatchData writes tables with languages and scripts for which there is // mutual intelligibility. The data is based on CLDR's languageMatching data. // Note that we use a different algorithm than the one defined by CLDR and that // we slightly modify the data. For example, we convert scores to confidence levels. // We also drop all region-related data as we use a different algorithm to // determine region equivalence. func (b *builder) writeMatchData() { b.writeType(mutualIntelligibility{}) b.writeType(scriptIntelligibility{}) lm := b.supp.LanguageMatching.LanguageMatches cldr.MakeSlice(&lm).SelectAnyOf("type", "written") matchLang := []mutualIntelligibility{} matchScript := []scriptIntelligibility{} // Convert the languageMatch entries in lists keyed by desired language. for _, m := range lm[0].LanguageMatch { // Different versions of CLDR use different separators. desired := strings.Replace(m.Desired, "-", "_", -1) supported := strings.Replace(m.Supported, "-", "_", -1) d := strings.Split(desired, "_") s := strings.Split(supported, "_") if len(d) != len(s) || len(d) > 2 { // Skip all entries with regions and work around CLDR bug. continue } pct, _ := strconv.ParseInt(m.Percent, 10, 8) if len(d) == 2 && d[0] == s[0] && len(d[1]) == 4 { // language-script pair. lang := uint16(0) if d[0] != "*" { lang = uint16(b.langIndex(d[0])) } matchScript = append(matchScript, scriptIntelligibility{ lang: lang, want: uint8(b.script.index(d[1])), have: uint8(b.script.index(s[1])), conf: toConf(uint8(pct)), }) if m.Oneway != "true" { matchScript = append(matchScript, scriptIntelligibility{ lang: lang, want: uint8(b.script.index(s[1])), have: uint8(b.script.index(d[1])), conf: toConf(uint8(pct)), }) } } else if len(d) == 1 && d[0] != "*" { if pct == 100 { // nb == no is already handled by macro mapping. Check there // really is only this case. if d[0] != "no" || s[0] != "nb" { log.Fatalf("unhandled equivalence %s == %s", s[0], d[0]) } continue } matchLang = append(matchLang, mutualIntelligibility{ want: uint16(b.langIndex(d[0])), have: uint16(b.langIndex(s[0])), conf: uint8(pct), oneway: m.Oneway == "true", }) } else { // TODO: Handle other mappings. a := []string{"*;*", "*_*;*_*", "es_MX;es_419"} s := strings.Join([]string{desired, supported}, ";") if i := sort.SearchStrings(a, s); i == len(a) || a[i] != s { log.Printf("%q not handled", s) } } } sort.Stable(sortByConf(matchLang)) // collapse percentage into confidence classes for i, m := range matchLang { matchLang[i].conf = toConf(m.conf) } b.writeSlice("matchLang", matchLang) b.writeSlice("matchScript", matchScript) }
func genSymbols(w *gen.CodeWriter, data *cldr.CLDR) { d, err := cldr.ParseDraft(*draft) if err != nil { log.Fatalf("invalid draft level: %v", err) } nNumberSystems := system(len(systemMap)) type symbols [NumSymbolTypes]string type key struct { tag int // from language.CompactIndex system system } symbolMap := map[key]*symbols{} defaults := map[int]system{} for _, lang := range data.Locales() { ldml := data.RawLDML(lang) if ldml.Numbers == nil { continue } langIndex, ok := language.CompactIndex(language.MustParse(lang)) if !ok { log.Fatalf("No compact index for language %s", lang) } if d := ldml.Numbers.DefaultNumberingSystem; len(d) > 0 { defaults[langIndex] = getNumberSystem(d[0].Data()) } syms := cldr.MakeSlice(&ldml.Numbers.Symbols) syms.SelectDraft(d) for _, sym := range ldml.Numbers.Symbols { if sym.NumberSystem == "" { // This is just linking the default of root to "latn". continue } symbolMap[key{langIndex, getNumberSystem(sym.NumberSystem)}] = &symbols{ SymDecimal: getFirst("decimal", sym.Decimal), SymGroup: getFirst("group", sym.Group), SymList: getFirst("list", sym.List), SymPercentSign: getFirst("percentSign", sym.PercentSign), SymPlusSign: getFirst("plusSign", sym.PlusSign), SymMinusSign: getFirst("minusSign", sym.MinusSign), SymExponential: getFirst("exponential", sym.Exponential), SymSuperscriptingExponent: getFirst("superscriptingExponent", sym.SuperscriptingExponent), SymPerMille: getFirst("perMille", sym.PerMille), SymInfinity: getFirst("infinity", sym.Infinity), SymNan: getFirst("nan", sym.Nan), SymTimeSeparator: getFirst("timeSeparator", sym.TimeSeparator), } } } // Expand all values. for k, syms := range symbolMap { for t := SymDecimal; t < NumSymbolTypes; t++ { p := k.tag for syms[t] == "" { p = int(internal.Parent[p]) if pSyms, ok := symbolMap[key{p, k.system}]; ok && (*pSyms)[t] != "" { syms[t] = (*pSyms)[t] break } if p == 0 /* und */ { // Default to root, latn. syms[t] = (*symbolMap[key{}])[t] } } } } // Unique the symbol sets and write the string data. m := map[symbols]int{} sb := stringset.NewBuilder() symIndex := [][NumSymbolTypes]byte{} for ns := system(0); ns < nNumberSystems; ns++ { for _, l := range data.Locales() { langIndex, _ := language.CompactIndex(language.MustParse(l)) s := symbolMap[key{langIndex, ns}] if s == nil { continue } if _, ok := m[*s]; !ok { m[*s] = len(symIndex) sb.Add(s[:]...) var x [NumSymbolTypes]byte for i := SymDecimal; i < NumSymbolTypes; i++ { x[i] = byte(sb.Index((*s)[i])) } symIndex = append(symIndex, x) } } } w.WriteVar("symIndex", symIndex) w.WriteVar("symData", sb.Set()) // resolveSymbolIndex gets the index from the closest matching locale, // including the locale itself. resolveSymbolIndex := func(langIndex int, ns system) byte { for { if sym := symbolMap[key{langIndex, ns}]; sym != nil { return byte(m[*sym]) } if langIndex == 0 { return 0 // und, latn } langIndex = int(internal.Parent[langIndex]) } } // Create an index with the symbols for each locale for the latn numbering // system. If this is not the default, or the only one, for a locale, we // will overwrite the value later. var langToDefaults [language.NumCompactTags]byte for _, l := range data.Locales() { langIndex, _ := language.CompactIndex(language.MustParse(l)) langToDefaults[langIndex] = resolveSymbolIndex(langIndex, 0) } // Delete redundant entries. for _, l := range data.Locales() { langIndex, _ := language.CompactIndex(language.MustParse(l)) def := defaults[langIndex] syms := symbolMap[key{langIndex, def}] if syms == nil { continue } for ns := system(0); ns < nNumberSystems; ns++ { if ns == def { continue } if altSyms, ok := symbolMap[key{langIndex, ns}]; ok && *altSyms == *syms { delete(symbolMap, key{langIndex, ns}) } } } // Create a sorted list of alternatives per language. This will only need to // be referenced if a user specified an alternative numbering system. var langToAlt []altSymData for _, l := range data.Locales() { langIndex, _ := language.CompactIndex(language.MustParse(l)) start := len(langToAlt) if start > 0x7F { log.Fatal("Number of alternative assignments > 0x7F") } // Create the entry for the default value. def := defaults[langIndex] langToAlt = append(langToAlt, altSymData{ compactTag: uint16(langIndex), system: def, symIndex: resolveSymbolIndex(langIndex, def), }) for ns := system(0); ns < nNumberSystems; ns++ { if def == ns { continue } if sym := symbolMap[key{langIndex, ns}]; sym != nil { langToAlt = append(langToAlt, altSymData{ compactTag: uint16(langIndex), system: ns, symIndex: resolveSymbolIndex(langIndex, ns), }) } } if def == 0 && len(langToAlt) == start+1 { // No additional data: erase the entry. langToAlt = langToAlt[:start] } else { // Overwrite the entry in langToDefaults. langToDefaults[langIndex] = 0x80 | byte(start) } } w.WriteComment(` langToDefaults maps a compact language index to the default numbering system and default symbol set`) w.WriteVar("langToDefaults", langToDefaults) w.WriteComment(` langToAlt is a list of numbering system and symbol set pairs, sorted and marked by compact language index.`) w.WriteVar("langToAlt", langToAlt) }
func TestTables(t *testing.T) { testtext.SkipIfNotLong(t) // Read the CLDR zip file. r := gen.OpenCLDRCoreZip() defer r.Close() d := &cldr.Decoder{} d.SetDirFilter("supplemental", "main") d.SetSectionFilter("numbers") data, err := d.DecodeZip(r) if err != nil { t.Fatalf("DecodeZip: %v", err) } dr, err := cldr.ParseDraft(*draft) if err != nil { t.Fatalf("filter: %v", err) } for _, lang := range data.Locales() { p := message.NewPrinter(language.MustParse(lang)) ldml := data.RawLDML(lang) if ldml.Numbers == nil || ldml.Numbers.Currencies == nil { continue } for _, c := range ldml.Numbers.Currencies.Currency { syms := cldr.MakeSlice(&c.Symbol) syms.SelectDraft(dr) for _, sym := range c.Symbol { cur, err := ParseISO(c.Type) if err != nil { continue } formatter := Symbol switch sym.Alt { case "": case "narrow": formatter = NarrowSymbol default: continue } want := sym.Data() if got := p.Sprint(formatter(cur)); got != want { t.Errorf("%s:%sSymbol(%s) = %s; want %s", lang, strings.Title(sym.Alt), c.Type, got, want) } } } } for _, reg := range data.Supplemental().CurrencyData.Region { i := 0 for ; regionData[i].Region().String() != reg.Iso3166; i++ { } it := Query(Historical, NonTender, Region(language.MustParseRegion(reg.Iso3166))) for _, cur := range reg.Currency { from, _ := time.Parse("2006-01-02", cur.From) to, _ := time.Parse("2006-01-02", cur.To) it.Next() for j, r := range []QueryIter{&iter{regionInfo: ®ionData[i]}, it} { if got, _ := r.From(); from != got { t.Errorf("%d:%s:%s:from: got %v; want %v", j, reg.Iso3166, cur.Iso4217, got, from) } if got, _ := r.To(); to != got { t.Errorf("%d:%s:%s:to: got %v; want %v", j, reg.Iso3166, cur.Iso4217, got, to) } } i++ } } }
// genFormats generates the lookup table for decimal, scientific and percent // patterns. // // CLDR allows for patterns to be different per language for different numbering // systems. In practice the patterns are set to be consistent for a language // independent of the numbering system. genFormats verifies that no language // deviates from this. func genFormats(w *gen.CodeWriter, data *cldr.CLDR) { d, err := cldr.ParseDraft(*draft) if err != nil { log.Fatalf("invalid draft level: %v", err) } // Fill the first slot with a dummy so we can identify unspecified tags. formats := []number.Format{{}} patterns := map[string]int{} // TODO: It would be possible to eliminate two of these slices by having // another indirection and store a reference to the combination of patterns. decimal := make([]byte, language.NumCompactTags) scientific := make([]byte, language.NumCompactTags) percent := make([]byte, language.NumCompactTags) for _, lang := range data.Locales() { ldml := data.RawLDML(lang) if ldml.Numbers == nil { continue } langIndex, ok := language.CompactIndex(language.MustParse(lang)) if !ok { log.Fatalf("No compact index for language %s", lang) } type patternSlice []*struct { cldr.Common Numbers string `xml:"numbers,attr"` Count string `xml:"count,attr"` } add := func(name string, tags []byte, ps patternSlice) { sl := cldr.MakeSlice(&ps) sl.SelectDraft(d) if len(ps) == 0 { return } if len(ps) > 2 || len(ps) == 2 && ps[0] != ps[1] { log.Fatalf("Inconsistent %d patterns for language %s", name, lang) } s := ps[0].Data() index, ok := patterns[s] if !ok { nf, err := number.ParsePattern(s) if err != nil { log.Fatal(err) } index = len(formats) patterns[s] = index formats = append(formats, *nf) } tags[langIndex] = byte(index) } for _, df := range ldml.Numbers.DecimalFormats { for _, l := range df.DecimalFormatLength { if l.Type != "" { continue } for _, f := range l.DecimalFormat { add("decimal", decimal, f.Pattern) } } } for _, df := range ldml.Numbers.ScientificFormats { for _, l := range df.ScientificFormatLength { if l.Type != "" { continue } for _, f := range l.ScientificFormat { add("scientific", scientific, f.Pattern) } } } for _, df := range ldml.Numbers.PercentFormats { for _, l := range df.PercentFormatLength { if l.Type != "" { continue } for _, f := range l.PercentFormat { add("percent", percent, f.Pattern) } } } } // Complete the parent tag array to reflect inheritance. An index of 0 // indicates an unspecified value. for _, data := range [][]byte{decimal, scientific, percent} { for i := range data { p := uint16(i) for ; data[p] == 0; p = internal.Parent[p] { } data[i] = data[p] } } w.WriteVar("tagToDecimal", decimal) w.WriteVar("tagToScientific", scientific) w.WriteVar("tagToPercent", percent) value := strings.Replace(fmt.Sprintf("%#v", formats), "number.", "", -1) // Break up the lines. This won't give ideal perfect formatting, but it is // better than one huge line. value = strings.Replace(value, ", ", ",\n", -1) fmt.Fprintf(w, "var formats = %s\n", value) }
func TestSymbols(t *testing.T) { testtext.SkipIfNotLong(t) draft, err := cldr.ParseDraft(*draft) if err != nil { log.Fatalf("invalid draft level: %v", err) } r := gen.OpenCLDRCoreZip() defer r.Close() d := &cldr.Decoder{} d.SetDirFilter("main") d.SetSectionFilter("numbers") data, err := d.DecodeZip(r) if err != nil { t.Fatalf("DecodeZip: %v", err) } for _, lang := range data.Locales() { ldml := data.RawLDML(lang) if ldml.Numbers == nil { continue } langIndex, ok := language.CompactIndex(language.MustParse(lang)) if !ok { t.Fatalf("No compact index for language %s", lang) } syms := cldr.MakeSlice(&ldml.Numbers.Symbols) syms.SelectDraft(draft) for _, sym := range ldml.Numbers.Symbols { if sym.NumberSystem == "" { continue } testCases := []struct { name string st SymbolType x interface{} }{ {"Decimal", SymDecimal, sym.Decimal}, {"Group", SymGroup, sym.Group}, {"List", SymList, sym.List}, {"PercentSign", SymPercentSign, sym.PercentSign}, {"PlusSign", SymPlusSign, sym.PlusSign}, {"MinusSign", SymMinusSign, sym.MinusSign}, {"Exponential", SymExponential, sym.Exponential}, {"SuperscriptingExponent", SymSuperscriptingExponent, sym.SuperscriptingExponent}, {"PerMille", SymPerMille, sym.PerMille}, {"Infinity", SymInfinity, sym.Infinity}, {"NaN", SymNan, sym.Nan}, {"TimeSeparator", SymTimeSeparator, sym.TimeSeparator}, } info := InfoFromLangID(langIndex, sym.NumberSystem) for _, tc := range testCases { // Extract the wanted value. v := reflect.ValueOf(tc.x) if v.Len() == 0 { return } if v.Len() > 1 { t.Fatalf("Multiple values of %q within single symbol not supported.", tc.name) } want := v.Index(0).MethodByName("Data").Call(nil)[0].String() got := info.Symbol(tc.st) if got != want { t.Errorf("%s:%s:%s: got %q; want %q", lang, sym.NumberSystem, tc.name, got, want) } } } } }