func TestGetScriptID(t *testing.T) { idx := tag.Index("0000BbbbDdddEeeeZzzz\xff\xff\xff\xff") tests := []struct { in string out scriptID }{ {" ", 0}, {" ", 0}, {" ", 0}, {"", 0}, {"Aaaa", 0}, {"Bbbb", 1}, {"Dddd", 2}, {"dddd", 2}, {"dDDD", 2}, {"Eeee", 3}, {"Zzzz", 4}, } for i, tt := range tests { if id, err := getScriptID(idx, b(tt.in)); id != tt.out { t.Errorf("%d:%s: found %d; want %d", i, tt.in, id, tt.out) } else if id == 0 && err == nil { t.Errorf("%d:%s: no error; expected one", i, tt.in) } } }
func (b *builder) writeCurrencies() { b.writeConsts(b.currency.index, "XTS", "XXX") digits := map[string]uint64{} rounding := map[string]uint64{} for _, info := range b.supp.CurrencyData.Fractions[0].Info { var err error digits[info.Iso4217], err = strconv.ParseUint(info.Digits, 10, curDigitBits) failOnError(err) rounding[info.Iso4217], err = strconv.ParseUint(info.Rounding, 10, curRoundBits) failOnError(err) } for i, cur := range b.currency.slice() { d := uint64(2) // default number of decimal positions if dd, ok := digits[cur]; ok { d = dd } var r uint64 if r = rounding[cur]; r == 0 { r = 1 // default rounding increment in units 10^{-digits) } b.currency.s[i] += mkCurrencyInfo(int(r), int(d)) } b.writeConst("currency", tag.Index(b.currency.join())) // Hack alert: gofmt indents a trailing comment after an indented string. // Ensure that the next thing written is not a comment. // writeLikelyData serves this purpose as it starts with an uncommented type. }
func TestCurrency(t *testing.T) { idx := tag.Index(strings.Join([]string{ " \x00", "BBB" + mkCurrencyInfo(5, 2), "DDD\x00", "XXX\x00", "ZZZ\x00", "\xff\xff\xff\xff", }, "")) tests := []struct { in string out currencyID round, dec int }{ {" ", 0, 0, 0}, {" ", 0, 0, 0}, {" ", 0, 0, 0}, {"", 0, 0, 0}, {"BBB", 1, 5, 2}, {"DDD", 2, 0, 0}, {"dDd", 2, 0, 0}, {"ddd", 2, 0, 0}, {"XXX", 3, 0, 0}, {"Zzz", 4, 0, 0}, } for i, tt := range tests { id, err := getCurrencyID(idx, b(tt.in)) if id != tt.out { t.Errorf("%d:%s: found %d; want %d", i, tt.in, id, tt.out) } else if tt.out == 0 && err == nil { t.Errorf("%d:%s: no error; expected one", i, tt.in) } if id > 0 { if d := decimals(idx, id); d != tt.dec { t.Errorf("%d:dec(%s): found %d; want %d", i, tt.in, d, tt.dec) } if d := round(idx, id); d != tt.round { t.Errorf("%d:round(%s): found %d; want %d", i, tt.in, d, tt.round) } } } }
func (b *builder) writeScript() { b.writeConsts(b.script.index, scriptConsts...) b.writeConst("script", tag.Index(b.script.join())) supp := make([]uint8, len(b.lang.slice())) for i, v := range b.lang.slice()[1:] { if sc := b.registry[v].suppressScript; sc != "" { supp[i+1] = uint8(b.script.index(sc)) } } b.writeSlice("suppressScript", supp) // There is only one deprecated script in CLDR. This value is hard-coded. // We check here if the code must be updated. for _, a := range b.supp.Metadata.Alias.ScriptAlias { if a.Type != "Qaai" { log.Panicf("unexpected deprecated stript %q", a.Type) } } }
func (b *builder) writeRegion() { b.writeConsts(b.region.index, regionConsts...) isoOffset := b.region.index("AA") m49map := make([]int16, len(b.region.slice())) fromM49map := make(map[int16]int) altRegionISO3 := "" altRegionIDs := []uint16{} b.writeConst("isoRegionOffset", isoOffset) // 2-letter region lookup and mapping to numeric codes. regionISO := b.region.clone() regionISO.s = regionISO.s[isoOffset:] regionISO.sorted = false regionTypes := make([]byte, len(b.region.s)) // Is the region valid BCP 47? for s, e := range b.registry { if len(s) == 2 && s == strings.ToUpper(s) { i := b.region.index(s) for _, d := range e.description { if strings.Contains(d, "Private use") { regionTypes[i] = iso3166UserAssgined } } regionTypes[i] |= bcp47Region } } // Is the region a valid ccTLD? r := gen.OpenIANAFile("domains/root/db") defer r.Close() buf, err := ioutil.ReadAll(r) failOnError(err) re := regexp.MustCompile(`"/domains/root/db/([a-z]{2}).html"`) for _, m := range re.FindAllSubmatch(buf, -1) { i := b.region.index(strings.ToUpper(string(m[1]))) regionTypes[i] |= ccTLD } b.writeSlice("regionTypes", regionTypes) iso3Set := make(map[string]int) update := func(iso2, iso3 string) { i := regionISO.index(iso2) if j, ok := iso3Set[iso3]; !ok && iso3[0] == iso2[0] { regionISO.s[i] += iso3[1:] iso3Set[iso3] = -1 } else { if ok && j >= 0 { regionISO.s[i] += string([]byte{0, byte(j)}) } else { iso3Set[iso3] = len(altRegionISO3) regionISO.s[i] += string([]byte{0, byte(len(altRegionISO3))}) altRegionISO3 += iso3 altRegionIDs = append(altRegionIDs, uint16(isoOffset+i)) } } } for _, tc := range b.supp.CodeMappings.TerritoryCodes { i := regionISO.index(tc.Type) + isoOffset if d := m49map[i]; d != 0 { log.Panicf("%s found as a duplicate UN.M49 code of %03d", tc.Numeric, d) } m49 := parseM49(tc.Numeric) m49map[i] = m49 if r := fromM49map[m49]; r == 0 { fromM49map[m49] = i } else if r != i { dep := b.registry[regionISO.s[r-isoOffset]].deprecated if t := b.registry[tc.Type]; t != nil && dep != "" && (t.deprecated == "" || t.deprecated > dep) { fromM49map[m49] = i } } } for _, ta := range b.supp.Metadata.Alias.TerritoryAlias { if len(ta.Type) == 3 && ta.Type[0] <= '9' && len(ta.Replacement) == 2 { from := parseM49(ta.Type) if r := fromM49map[from]; r == 0 { fromM49map[from] = regionISO.index(ta.Replacement) + isoOffset } } } for _, tc := range b.supp.CodeMappings.TerritoryCodes { if len(tc.Alpha3) == 3 { update(tc.Type, tc.Alpha3) } } // This entries are not included in territoryCodes. Mostly 3-letter variants // of deleted codes and an entry for QU. for _, m := range []struct{ iso2, iso3 string }{ {"CT", "CTE"}, {"DY", "DHY"}, {"HV", "HVO"}, {"JT", "JTN"}, {"MI", "MID"}, {"NH", "NHB"}, {"NQ", "ATN"}, {"PC", "PCI"}, {"PU", "PUS"}, {"PZ", "PCZ"}, {"RH", "RHO"}, {"VD", "VDR"}, {"WK", "WAK"}, // These three-letter codes are used for others as well. {"FQ", "ATF"}, } { update(m.iso2, m.iso3) } for i, s := range regionISO.s { if len(s) != 4 { regionISO.s[i] = s + " " } } b.writeConst("regionISO", tag.Index(regionISO.join())) b.writeConst("altRegionISO3", altRegionISO3) b.writeSlice("altRegionIDs", altRegionIDs) // Create list of deprecated regions. // TODO: consider inserting SF -> FI. Not included by CLDR, but is the only // Transitionally-reserved mapping not included. regionOldMap := stringSet{} // Include regions in territoryAlias (not all are in the IANA registry!) for _, reg := range b.supp.Metadata.Alias.TerritoryAlias { if len(reg.Type) == 2 && reg.Reason == "deprecated" && len(reg.Replacement) == 2 { regionOldMap.add(reg.Type) regionOldMap.updateLater(reg.Type, reg.Replacement) i, _ := regionISO.find(reg.Type) j, _ := regionISO.find(reg.Replacement) if k := m49map[i+isoOffset]; k == 0 { m49map[i+isoOffset] = m49map[j+isoOffset] } } } b.writeSortedMap("regionOldMap", ®ionOldMap, func(s string) uint16 { return uint16(b.region.index(s)) }) // 3-digit region lookup, groupings. for i := 1; i < isoOffset; i++ { m := parseM49(b.region.s[i]) m49map[i] = m fromM49map[m] = i } b.writeSlice("m49", m49map) const ( searchBits = 7 regionBits = 9 ) if len(m49map) >= 1<<regionBits { log.Fatalf("Maximum number of regions exceeded: %d > %d", len(m49map), 1<<regionBits) } m49Index := [9]int16{} fromM49 := []uint16{} m49 := []int{} for k, _ := range fromM49map { m49 = append(m49, int(k)) } sort.Ints(m49) for _, k := range m49[1:] { val := (k & (1<<searchBits - 1)) << regionBits fromM49 = append(fromM49, uint16(val|fromM49map[int16(k)])) m49Index[1:][k>>searchBits] = int16(len(fromM49)) } b.writeSlice("m49Index", m49Index) b.writeSlice("fromM49", fromM49) }
// writeLanguage generates all tables needed for language canonicalization. func (b *builder) writeLanguage() { meta := b.supp.Metadata b.writeConst("nonCanonicalUnd", b.lang.index("und")) b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...) b.writeConst("langPrivateStart", b.langIndex("qaa")) b.writeConst("langPrivateEnd", b.langIndex("qtz")) // Get language codes that need to be mapped (overlong 3-letter codes, // deprecated 2-letter codes, legacy and grandfathered tags.) langAliasMap := stringSet{} aliasTypeMap := map[string]langAliasType{} // altLangISO3 get the alternative ISO3 names that need to be mapped. altLangISO3 := stringSet{} // Add dummy start to avoid the use of index 0. altLangISO3.add("---") altLangISO3.updateLater("---", "aa") lang := b.lang.clone() for _, a := range meta.Alias.LanguageAlias { if a.Replacement == "" { a.Replacement = "und" } // TODO: support mapping to tags repl := strings.SplitN(a.Replacement, "_", 2)[0] if a.Reason == "overlong" { if len(a.Replacement) == 2 && len(a.Type) == 3 { lang.updateLater(a.Replacement, a.Type) } } else if len(a.Type) <= 3 { switch a.Reason { case "macrolanguage": aliasTypeMap[a.Type] = langMacro case "deprecated": // handled elsewhere continue case "bibliographic", "legacy": if a.Type == "no" { continue } aliasTypeMap[a.Type] = langLegacy default: log.Fatalf("new %s alias: %s", a.Reason, a.Type) } langAliasMap.add(a.Type) langAliasMap.updateLater(a.Type, repl) } } // Manually add the mapping of "nb" (Norwegian) to its macro language. // This can be removed if CLDR adopts this change. langAliasMap.add("nb") langAliasMap.updateLater("nb", "no") aliasTypeMap["nb"] = langMacro for k, v := range b.registry { // Also add deprecated values for 3-letter ISO codes, which CLDR omits. if v.typ == "language" && v.deprecated != "" && v.preferred != "" { langAliasMap.add(k) langAliasMap.updateLater(k, v.preferred) aliasTypeMap[k] = langDeprecated } } // Fix CLDR mappings. lang.updateLater("tl", "tgl") lang.updateLater("sh", "hbs") lang.updateLater("mo", "mol") lang.updateLater("no", "nor") lang.updateLater("tw", "twi") lang.updateLater("nb", "nob") lang.updateLater("ak", "aka") // Ensure that each 2-letter code is matched with a 3-letter code. for _, v := range lang.s[1:] { s, ok := lang.update[v] if !ok { if s, ok = lang.update[langAliasMap.update[v]]; !ok { continue } lang.update[v] = s } if v[0] != s[0] { altLangISO3.add(s) altLangISO3.updateLater(s, v) } } // Complete canonialized language tags. lang.freeze() for i, v := range lang.s { // We can avoid these manual entries by using the IANI registry directly. // Seems easier to update the list manually, as changes are rare. // The panic in this loop will trigger if we miss an entry. add := "" if s, ok := lang.update[v]; ok { if s[0] == v[0] { add = s[1:] } else { add = string([]byte{0, byte(altLangISO3.index(s))}) } } else if len(v) == 3 { add = "\x00" } else { log.Panicf("no data for long form of %q", v) } lang.s[i] += add } b.writeConst("lang", tag.Index(lang.join())) b.writeConst("langNoIndexOffset", len(b.lang.s)) // space of all valid 3-letter language identifiers. b.writeBitVector("langNoIndex", b.langNoIndex.slice()) altLangIndex := []uint16{} for i, s := range altLangISO3.slice() { altLangISO3.s[i] += string([]byte{byte(len(altLangIndex))}) if i > 0 { idx := b.lang.index(altLangISO3.update[s]) altLangIndex = append(altLangIndex, uint16(idx)) } } b.writeConst("altLangISO3", tag.Index(altLangISO3.join())) b.writeSlice("altLangIndex", altLangIndex) b.writeSortedMap("langAliasMap", &langAliasMap, b.langIndex) types := make([]langAliasType, len(langAliasMap.s)) for i, s := range langAliasMap.s { types[i] = aliasTypeMap[s] } b.writeSlice("langAliasTypes", types) }
func (b *builder) genCurrencies(w *gen.CodeWriter, data *cldr.SupplementalData) { // 3-letter ISO currency codes // Start with dummy to let index start at 1. currencies := []string{"\x00\x00\x00\x00"} // currency codes for _, reg := range data.CurrencyData.Region { for _, cur := range reg.Currency { currencies = append(currencies, cur.Iso4217) } } // Not included in the list for some reasons: currencies = append(currencies, "MVP") sort.Strings(currencies) // Unique the elements. k := 0 for i := 1; i < len(currencies); i++ { if currencies[k] != currencies[i] { currencies[k+1] = currencies[i] k++ } } currencies = currencies[:k+1] // Close with dummy for simpler and faster searching. currencies = append(currencies, "\xff\xff\xff\xff") // Write currency values. fmt.Fprintln(w, "const (") for _, c := range constants { index := sort.SearchStrings(currencies, c) fmt.Fprintf(w, "\t%s = %d\n", strings.ToLower(c), index) } fmt.Fprint(w, ")") // Compute currency-related data that we merge into the table. for _, info := range data.CurrencyData.Fractions[0].Info { if info.Iso4217 == "DEFAULT" { continue } standard := getRoundingIndex(info.Digits, info.Rounding, 0) cash := getRoundingIndex(info.CashDigits, info.CashRounding, standard) index := sort.SearchStrings(currencies, info.Iso4217) currencies[index] += mkCurrencyInfo(standard, cash) } // Set default values for entries that weren't touched. for i, c := range currencies { if len(c) == 3 { currencies[i] += mkCurrencyInfo(0, 0) } } b.currencies = tag.Index(strings.Join(currencies, "")) w.WriteComment(` currency holds an alphabetically sorted list of canonical 3-letter currency identifiers. Each identifier is followed by a byte of type currencyInfo, defined in gen_common.go.`) w.WriteConst("currency", b.currencies) // Hack alert: gofmt indents a trailing comment after an indented string. // Ensure that the next thing written is not a comment. b.numCurrencies = (len(b.currencies) / 4) - 2 w.WriteConst("numCurrencies", b.numCurrencies) // Create a table that maps regions to currencies. regionToCurrency := []toCurrency{} for _, reg := range data.CurrencyData.Region { if len(reg.Iso3166) != 2 { log.Fatalf("Unexpected group %q in region data", reg.Iso3166) } if len(reg.Currency) == 0 { continue } cur := reg.Currency[0] if cur.To != "" || cur.Tender == "false" { continue } regionToCurrency = append(regionToCurrency, toCurrency{ region: regionToCode(language.MustParseRegion(reg.Iso3166)), code: uint16(b.currencies.Index([]byte(cur.Iso4217))), }) } sort.Sort(byRegion(regionToCurrency)) w.WriteType(toCurrency{}) w.WriteVar("regionToCurrency", regionToCurrency) }