// write the data for the given header as single entries. The size for this data // was already accounted for in writeEntry. func (h *header) writeSingle(w *gen.CodeWriter, name string) { if len(dict) > 0 && dict.contains(h.tag) { tag := identifier(h.tag) w.WriteConst(tag+name+"Str", h.data) // Note that we create a slice instead of an array. If we use an array // we need to refer to it as a[:] in other tables, which will cause the // array to always be included by the linker. See Issue 7651. w.WriteVar(tag+name+"Idx", h.index) } }
func genPluralsTests(w *gen.CodeWriter, data *cldr.CLDR) { w.WriteType(pluralTest{}) for _, plurals := range data.Supplemental().Plurals { if plurals.Type == "" { // The empty type is reserved for plural ranges. continue } tests := []pluralTest{} for _, pRules := range plurals.PluralRules { for _, rule := range pRules.PluralRule { test := pluralTest{ locales: pRules.Locales, form: countMap[rule.Count], } scan := bufio.NewScanner(strings.NewReader(rule.Data())) scan.Split(splitTokens) var p *[]string for scan.Scan() { switch t := scan.Text(); t { case "@integer": p = &test.integer case "@decimal": p = &test.decimal case ",", "…": default: if p != nil { *p = append(*p, t) } } } tests = append(tests, test) } } w.WriteVar(plurals.Type+"Tests", tests) } }
func (b *builder) genCurrencies(w *gen.CodeWriter, data *cldr.SupplementalData) { // 3-letter ISO currency codes // Start with dummy to let index start at 1. currencies := []string{"\x00\x00\x00\x00"} // currency codes for _, reg := range data.CurrencyData.Region { for _, cur := range reg.Currency { currencies = append(currencies, cur.Iso4217) } } // Not included in the list for some reasons: currencies = append(currencies, "MVP") sort.Strings(currencies) // Unique the elements. k := 0 for i := 1; i < len(currencies); i++ { if currencies[k] != currencies[i] { currencies[k+1] = currencies[i] k++ } } currencies = currencies[:k+1] // Close with dummy for simpler and faster searching. currencies = append(currencies, "\xff\xff\xff\xff") // Write currency values. fmt.Fprintln(w, "const (") for _, c := range constants { index := sort.SearchStrings(currencies, c) fmt.Fprintf(w, "\t%s = %d\n", strings.ToLower(c), index) } fmt.Fprint(w, ")") // Compute currency-related data that we merge into the table. for _, info := range data.CurrencyData.Fractions[0].Info { if info.Iso4217 == "DEFAULT" { continue } standard := getRoundingIndex(info.Digits, info.Rounding, 0) cash := getRoundingIndex(info.CashDigits, info.CashRounding, standard) index := sort.SearchStrings(currencies, info.Iso4217) currencies[index] += mkCurrencyInfo(standard, cash) } // Set default values for entries that weren't touched. for i, c := range currencies { if len(c) == 3 { currencies[i] += mkCurrencyInfo(0, 0) } } b.currencies = tag.Index(strings.Join(currencies, "")) w.WriteComment(` currency holds an alphabetically sorted list of canonical 3-letter currency identifiers. Each identifier is followed by a byte of type currencyInfo, defined in gen_common.go.`) w.WriteConst("currency", b.currencies) // Hack alert: gofmt indents a trailing comment after an indented string. // Ensure that the next thing written is not a comment. b.numCurrencies = (len(b.currencies) / 4) - 2 w.WriteConst("numCurrencies", b.numCurrencies) // Create a table that maps regions to currencies. regionToCurrency := []toCurrency{} for _, reg := range data.CurrencyData.Region { if len(reg.Iso3166) != 2 { log.Fatalf("Unexpected group %q in region data", reg.Iso3166) } if len(reg.Currency) == 0 { continue } cur := reg.Currency[0] if cur.To != "" || cur.Tender == "false" { continue } regionToCurrency = append(regionToCurrency, toCurrency{ region: regionToCode(language.MustParseRegion(reg.Iso3166)), code: uint16(b.currencies.Index([]byte(cur.Iso4217))), }) } sort.Sort(byRegion(regionToCurrency)) w.WriteType(toCurrency{}) w.WriteVar("regionToCurrency", regionToCurrency) }
// genSymbols generates the symbols used for currencies. Most symbols are // defined in root and there is only very small variation per language. // The following rules apply: // - A symbol can be requested as normal or narrow. // - If a symbol is not defined for a currency, it defaults to its ISO code. func (b *builder) genSymbols(w *gen.CodeWriter, data *cldr.CLDR) { d, err := cldr.ParseDraft(*draft) if err != nil { log.Fatalf("filter: %v", err) } const ( normal = iota narrow numTypes ) // language -> currency -> type -> symbol var symbols [language.NumCompactTags][][numTypes]*string // Collect symbol information per language. for _, lang := range data.Locales() { ldml := data.RawLDML(lang) if ldml.Numbers == nil || ldml.Numbers.Currencies == nil { continue } langIndex, ok := language.CompactIndex(language.MustParse(lang)) if !ok { log.Fatalf("No compact index for language %s", lang) } symbols[langIndex] = make([][numTypes]*string, b.numCurrencies+1) for _, c := range ldml.Numbers.Currencies.Currency { syms := cldr.MakeSlice(&c.Symbol) syms.SelectDraft(d) for _, sym := range c.Symbol { v := sym.Data() if v == c.Type { // We define "" to mean the ISO symbol. v = "" } cur := b.currencies.Index([]byte(c.Type)) // XXX gets reassigned to 0 in the package's code. if c.Type == "XXX" { cur = 0 } if cur == -1 { fmt.Println("Unsupported:", c.Type) continue } switch sym.Alt { case "": symbols[langIndex][cur][normal] = &v case "narrow": symbols[langIndex][cur][narrow] = &v } } } } // Remove values identical to the parent. for langIndex, data := range symbols { for curIndex, curs := range data { for typ, sym := range curs { if sym == nil { continue } for p := uint16(langIndex); p != 0; { p = internal.Parent[p] x := symbols[p] if x == nil { continue } if v := x[curIndex][typ]; v != nil || p == 0 { // Value is equal to the default value root value is undefined. parentSym := "" if v != nil { parentSym = *v } if parentSym == *sym { // Value is the same as parent. data[curIndex][typ] = nil } break } } } } } // Create symbol index. symbolData := []byte{0} symbolLookup := map[string]uint16{"": 0} // 0 means default, so block that value. for _, data := range symbols { for _, curs := range data { for _, sym := range curs { if sym == nil { continue } if _, ok := symbolLookup[*sym]; !ok { symbolLookup[*sym] = uint16(len(symbolData)) symbolData = append(symbolData, byte(len(*sym))) symbolData = append(symbolData, *sym...) } } } } w.WriteComment(` symbols holds symbol data of the form <n> <str>, where n is the length of the symbol string str.`) w.WriteConst("symbols", string(symbolData)) // Create index from language to currency lookup to symbol. type curToIndex struct{ cur, idx uint16 } w.WriteType(curToIndex{}) prefix := []string{"normal", "narrow"} // Create data for regular and narrow symbol data. for typ := normal; typ <= narrow; typ++ { indexes := []curToIndex{} // maps currency to symbol index languages := []uint16{} for _, data := range symbols { languages = append(languages, uint16(len(indexes))) for curIndex, curs := range data { if sym := curs[typ]; sym != nil { indexes = append(indexes, curToIndex{uint16(curIndex), symbolLookup[*sym]}) } } } languages = append(languages, uint16(len(indexes))) w.WriteVar(prefix[typ]+"LangIndex", languages) w.WriteVar(prefix[typ]+"SymIndex", indexes) } }
func genNumSystem(w *gen.CodeWriter, data *cldr.CLDR) { numSysData := []systemData{ {digitSize: 1, zero: [4]byte{'0'}}, } for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem { if len(ns.Digits) == 0 { continue } switch ns.Id { case "latn": // hard-wired continue case "hanidec": // non-consecutive digits: treat as "algorithmic" continue } zero, sz := utf8.DecodeRuneInString(ns.Digits) if ns.Digits[sz-1]+9 > 0xBF { // 1011 1111: highest continuation byte log.Fatalf("Last byte of zero value overflows for %s", ns.Id) } i := rune(0) for _, r := range ns.Digits { // Verify that we can do simple math on the UTF-8 byte sequence // of zero to get the digit. if zero+i != r { // Runes not consecutive. log.Fatalf("Digit %d of %s (%U) is not offset correctly from zero value", i, ns.Id, r) } i++ } var x [utf8.UTFMax]byte utf8.EncodeRune(x[:], zero) id := system(len(numSysData)) systemMap[ns.Id] = id numSysData = append(numSysData, systemData{ id: id, digitSize: byte(sz), zero: x, }) } w.WriteVar("numSysData", numSysData) algoID := system(len(numSysData)) fmt.Fprintln(w, "const (") for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem { id, ok := systemMap[ns.Id] if !ok { id = algoID systemMap[ns.Id] = id algoID++ } fmt.Fprintf(w, "num%s = %#x\n", strings.Title(ns.Id), id) } fmt.Fprintln(w, "numNumberSystems") fmt.Fprintln(w, ")") fmt.Fprintln(w, "var systemMap = map[string]system{") for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem { fmt.Fprintf(w, "%q: num%s,\n", ns.Id, strings.Title(ns.Id)) w.Size += len(ns.Id) + 16 + 1 // very coarse approximation } fmt.Fprintln(w, "}") }
func genSymbols(w *gen.CodeWriter, data *cldr.CLDR) { d, err := cldr.ParseDraft(*draft) if err != nil { log.Fatalf("invalid draft level: %v", err) } nNumberSystems := system(len(systemMap)) type symbols [NumSymbolTypes]string type key struct { tag int // from language.CompactIndex system system } symbolMap := map[key]*symbols{} defaults := map[int]system{} for _, lang := range data.Locales() { ldml := data.RawLDML(lang) if ldml.Numbers == nil { continue } langIndex, ok := language.CompactIndex(language.MustParse(lang)) if !ok { log.Fatalf("No compact index for language %s", lang) } if d := ldml.Numbers.DefaultNumberingSystem; len(d) > 0 { defaults[langIndex] = getNumberSystem(d[0].Data()) } syms := cldr.MakeSlice(&ldml.Numbers.Symbols) syms.SelectDraft(d) for _, sym := range ldml.Numbers.Symbols { if sym.NumberSystem == "" { // This is just linking the default of root to "latn". continue } symbolMap[key{langIndex, getNumberSystem(sym.NumberSystem)}] = &symbols{ SymDecimal: getFirst("decimal", sym.Decimal), SymGroup: getFirst("group", sym.Group), SymList: getFirst("list", sym.List), SymPercentSign: getFirst("percentSign", sym.PercentSign), SymPlusSign: getFirst("plusSign", sym.PlusSign), SymMinusSign: getFirst("minusSign", sym.MinusSign), SymExponential: getFirst("exponential", sym.Exponential), SymSuperscriptingExponent: getFirst("superscriptingExponent", sym.SuperscriptingExponent), SymPerMille: getFirst("perMille", sym.PerMille), SymInfinity: getFirst("infinity", sym.Infinity), SymNan: getFirst("nan", sym.Nan), SymTimeSeparator: getFirst("timeSeparator", sym.TimeSeparator), } } } // Expand all values. for k, syms := range symbolMap { for t := SymDecimal; t < NumSymbolTypes; t++ { p := k.tag for syms[t] == "" { p = int(internal.Parent[p]) if pSyms, ok := symbolMap[key{p, k.system}]; ok && (*pSyms)[t] != "" { syms[t] = (*pSyms)[t] break } if p == 0 /* und */ { // Default to root, latn. syms[t] = (*symbolMap[key{}])[t] } } } } // Unique the symbol sets and write the string data. m := map[symbols]int{} sb := stringset.NewBuilder() symIndex := [][NumSymbolTypes]byte{} for ns := system(0); ns < nNumberSystems; ns++ { for _, l := range data.Locales() { langIndex, _ := language.CompactIndex(language.MustParse(l)) s := symbolMap[key{langIndex, ns}] if s == nil { continue } if _, ok := m[*s]; !ok { m[*s] = len(symIndex) sb.Add(s[:]...) var x [NumSymbolTypes]byte for i := SymDecimal; i < NumSymbolTypes; i++ { x[i] = byte(sb.Index((*s)[i])) } symIndex = append(symIndex, x) } } } w.WriteVar("symIndex", symIndex) w.WriteVar("symData", sb.Set()) // resolveSymbolIndex gets the index from the closest matching locale, // including the locale itself. resolveSymbolIndex := func(langIndex int, ns system) byte { for { if sym := symbolMap[key{langIndex, ns}]; sym != nil { return byte(m[*sym]) } if langIndex == 0 { return 0 // und, latn } langIndex = int(internal.Parent[langIndex]) } } // Create an index with the symbols for each locale for the latn numbering // system. If this is not the default, or the only one, for a locale, we // will overwrite the value later. var langToDefaults [language.NumCompactTags]byte for _, l := range data.Locales() { langIndex, _ := language.CompactIndex(language.MustParse(l)) langToDefaults[langIndex] = resolveSymbolIndex(langIndex, 0) } // Delete redundant entries. for _, l := range data.Locales() { langIndex, _ := language.CompactIndex(language.MustParse(l)) def := defaults[langIndex] syms := symbolMap[key{langIndex, def}] if syms == nil { continue } for ns := system(0); ns < nNumberSystems; ns++ { if ns == def { continue } if altSyms, ok := symbolMap[key{langIndex, ns}]; ok && *altSyms == *syms { delete(symbolMap, key{langIndex, ns}) } } } // Create a sorted list of alternatives per language. This will only need to // be referenced if a user specified an alternative numbering system. var langToAlt []altSymData for _, l := range data.Locales() { langIndex, _ := language.CompactIndex(language.MustParse(l)) start := len(langToAlt) if start > 0x7F { log.Fatal("Number of alternative assignments > 0x7F") } // Create the entry for the default value. def := defaults[langIndex] langToAlt = append(langToAlt, altSymData{ compactTag: uint16(langIndex), system: def, symIndex: resolveSymbolIndex(langIndex, def), }) for ns := system(0); ns < nNumberSystems; ns++ { if def == ns { continue } if sym := symbolMap[key{langIndex, ns}]; sym != nil { langToAlt = append(langToAlt, altSymData{ compactTag: uint16(langIndex), system: ns, symIndex: resolveSymbolIndex(langIndex, ns), }) } } if def == 0 && len(langToAlt) == start+1 { // No additional data: erase the entry. langToAlt = langToAlt[:start] } else { // Overwrite the entry in langToDefaults. langToDefaults[langIndex] = 0x80 | byte(start) } } w.WriteComment(` langToDefaults maps a compact language index to the default numbering system and default symbol set`) w.WriteVar("langToDefaults", langToDefaults) w.WriteComment(` langToAlt is a list of numbering system and symbol set pairs, sorted and marked by compact language index.`) w.WriteVar("langToAlt", langToAlt) }
func genPlurals(w *gen.CodeWriter, data *cldr.CLDR) { for _, plurals := range data.Supplemental().Plurals { if plurals.Type == "" { continue } // Initialize setMap and inclusionMasks. They are already populated with // a few entries to serve as an example and to assign nice numbers to // common cases. // setMap contains sets of numbers represented by boolean arrays where // a true value for element i means that the number i is included. setMap := map[[numN]bool]int{ // The above init func adds an entry for including all numbers. [numN]bool{1: true}: 1, // fix {1} to a nice value [numN]bool{2: true}: 2, // fix {2} to a nice value [numN]bool{0: true}: 3, // fix {0} to a nice value } // inclusionMasks contains bit masks for every number under numN to // indicate in which set the number is included. Bit 1 << x will be set // if it is included in set x. inclusionMasks := [numN]uint64{ // Note: these entries are not complete: more bits will be set along the way. 0: 1 << 3, 1: 1 << 1, 2: 1 << 2, } // Create set {0..99}. We will assign this set the identifier 0. var all [numN]bool for i := range all { // Mark number i as being included in the set (which has identifier 0). inclusionMasks[i] |= 1 << 0 // Mark number i as included in the set. all[i] = true } // Register the identifier for the set. setMap[all] = 0 rules := []pluralCheck{} index := []byte{0} langMap := map[int]byte{0: 0} // From compact language index to index for _, pRules := range plurals.PluralRules { // Parse the rules. var conds []orCondition for _, rule := range pRules.PluralRule { form := countMap[rule.Count] conds = parsePluralCondition(conds, rule.Data(), form) } // Encode the rules. for _, c := range conds { // If an or condition only has filters, we create an entry for // this filter and the set that contains all values. empty := true for _, b := range c.used { empty = empty && !b } if empty { rules = append(rules, pluralCheck{ cat: byte(opMod<<opShift) | byte(c.form), setID: 0, // all values }) continue } // We have some entries with values. for i, set := range c.set { if !c.used[i] { continue } index, ok := setMap[set] if !ok { index = len(setMap) setMap[set] = index for i := range inclusionMasks { if set[i] { inclusionMasks[i] |= 1 << uint64(index) } } } rules = append(rules, pluralCheck{ cat: byte(i<<opShift | andNext), setID: byte(index), }) } // Now set the last entry to the plural form the rule matches. rules[len(rules)-1].cat &^= formMask rules[len(rules)-1].cat |= byte(c.form) } // Point the relevant locales to the created entries. for _, loc := range strings.Split(pRules.Locales, " ") { if strings.TrimSpace(loc) == "" { continue } lang, ok := language.CompactIndex(language.MustParse(loc)) if !ok { log.Printf("No compact index for locale %q", loc) } langMap[lang] = byte(len(index) - 1) } index = append(index, byte(len(rules))) } w.WriteVar(plurals.Type+"Rules", rules) w.WriteVar(plurals.Type+"Index", index) // Expand the values. langToIndex := make([]byte, language.NumCompactTags) for i := range langToIndex { for p := i; ; p = int(internal.Parent[p]) { if x, ok := langMap[p]; ok { langToIndex[i] = x break } } } w.WriteVar(plurals.Type+"LangToIndex", langToIndex) // Need to convert array to slice because of golang.org/issue/7651. // This will allow tables to be dropped when unused. This is especially // relevant for the ordinal data, which I suspect won't be used as much. w.WriteVar(plurals.Type+"InclusionMasks", inclusionMasks[:]) if len(rules) > 0xFF { log.Fatalf("Too many entries for rules: %#x", len(rules)) } if len(index) > 0xFF { log.Fatalf("Too many entries for index: %#x", len(index)) } if len(setMap) > 64 { // maximum number of bits. log.Fatalf("Too many entries for setMap: %d", len(setMap)) } w.WriteComment( "Slots used for %s: %X of 0xFF rules; %X of 0xFF indexes; %d of 64 sets", plurals.Type, len(rules), len(index), len(setMap)) // Prevent comment from attaching to the next entry. fmt.Fprint(w, "\n\n") } }
// genFormats generates the lookup table for decimal, scientific and percent // patterns. // // CLDR allows for patterns to be different per language for different numbering // systems. In practice the patterns are set to be consistent for a language // independent of the numbering system. genFormats verifies that no language // deviates from this. func genFormats(w *gen.CodeWriter, data *cldr.CLDR) { d, err := cldr.ParseDraft(*draft) if err != nil { log.Fatalf("invalid draft level: %v", err) } // Fill the first slot with a dummy so we can identify unspecified tags. formats := []number.Format{{}} patterns := map[string]int{} // TODO: It would be possible to eliminate two of these slices by having // another indirection and store a reference to the combination of patterns. decimal := make([]byte, language.NumCompactTags) scientific := make([]byte, language.NumCompactTags) percent := make([]byte, language.NumCompactTags) for _, lang := range data.Locales() { ldml := data.RawLDML(lang) if ldml.Numbers == nil { continue } langIndex, ok := language.CompactIndex(language.MustParse(lang)) if !ok { log.Fatalf("No compact index for language %s", lang) } type patternSlice []*struct { cldr.Common Numbers string `xml:"numbers,attr"` Count string `xml:"count,attr"` } add := func(name string, tags []byte, ps patternSlice) { sl := cldr.MakeSlice(&ps) sl.SelectDraft(d) if len(ps) == 0 { return } if len(ps) > 2 || len(ps) == 2 && ps[0] != ps[1] { log.Fatalf("Inconsistent %d patterns for language %s", name, lang) } s := ps[0].Data() index, ok := patterns[s] if !ok { nf, err := number.ParsePattern(s) if err != nil { log.Fatal(err) } index = len(formats) patterns[s] = index formats = append(formats, *nf) } tags[langIndex] = byte(index) } for _, df := range ldml.Numbers.DecimalFormats { for _, l := range df.DecimalFormatLength { if l.Type != "" { continue } for _, f := range l.DecimalFormat { add("decimal", decimal, f.Pattern) } } } for _, df := range ldml.Numbers.ScientificFormats { for _, l := range df.ScientificFormatLength { if l.Type != "" { continue } for _, f := range l.ScientificFormat { add("scientific", scientific, f.Pattern) } } } for _, df := range ldml.Numbers.PercentFormats { for _, l := range df.PercentFormatLength { if l.Type != "" { continue } for _, f := range l.PercentFormat { add("percent", percent, f.Pattern) } } } } // Complete the parent tag array to reflect inheritance. An index of 0 // indicates an unspecified value. for _, data := range [][]byte{decimal, scientific, percent} { for i := range data { p := uint16(i) for ; data[p] == 0; p = internal.Parent[p] { } data[i] = data[p] } } w.WriteVar("tagToDecimal", decimal) w.WriteVar("tagToScientific", scientific) w.WriteVar("tagToPercent", percent) value := strings.Replace(fmt.Sprintf("%#v", formats), "number.", "", -1) // Break up the lines. This won't give ideal perfect formatting, but it is // better than one huge line. value = strings.Replace(value, ", ", ",\n", -1) fmt.Fprintf(w, "var formats = %s\n", value) }