func ExampleSlice() {
	var dr *cldr.CLDR // assume this is initalized

	x, _ := dr.LDML("en")
	cs := x.Collations.Collation
	// remove all but the default
	cldr.MakeSlice(&cs).Filter(func(e cldr.Elem) bool {
		return e.GetCommon().Type != x.Collations.Default()
	})
	for i, c := range cs {
		fmt.Println(i, c.Type)
	}
}
Esempio n. 2
0
func genPluralsTests(w *gen.CodeWriter, data *cldr.CLDR) {
	w.WriteType(pluralTest{})

	for _, plurals := range data.Supplemental().Plurals {
		if plurals.Type == "" {
			// The empty type is reserved for plural ranges.
			continue
		}
		tests := []pluralTest{}

		for _, pRules := range plurals.PluralRules {
			for _, rule := range pRules.PluralRule {
				test := pluralTest{
					locales: pRules.Locales,
					form:    countMap[rule.Count],
				}
				scan := bufio.NewScanner(strings.NewReader(rule.Data()))
				scan.Split(splitTokens)
				var p *[]string
				for scan.Scan() {
					switch t := scan.Text(); t {
					case "@integer":
						p = &test.integer
					case "@decimal":
						p = &test.decimal
					case ",", "…":
					default:
						if p != nil {
							*p = append(*p, t)
						}
					}
				}
				tests = append(tests, test)
			}
		}
		w.WriteVar(plurals.Type+"Tests", tests)
	}
}
Esempio n. 3
0
// genSymbols generates the symbols used for currencies. Most symbols are
// defined in root and there is only very small variation per language.
// The following rules apply:
// - A symbol can be requested as normal or narrow.
// - If a symbol is not defined for a currency, it defaults to its ISO code.
func (b *builder) genSymbols(w *gen.CodeWriter, data *cldr.CLDR) {
	d, err := cldr.ParseDraft(*draft)
	if err != nil {
		log.Fatalf("filter: %v", err)
	}

	const (
		normal = iota
		narrow
		numTypes
	)
	// language -> currency -> type ->  symbol
	var symbols [language.NumCompactTags][][numTypes]*string

	// Collect symbol information per language.
	for _, lang := range data.Locales() {
		ldml := data.RawLDML(lang)
		if ldml.Numbers == nil || ldml.Numbers.Currencies == nil {
			continue
		}

		langIndex, ok := language.CompactIndex(language.MustParse(lang))
		if !ok {
			log.Fatalf("No compact index for language %s", lang)
		}

		symbols[langIndex] = make([][numTypes]*string, b.numCurrencies+1)

		for _, c := range ldml.Numbers.Currencies.Currency {
			syms := cldr.MakeSlice(&c.Symbol)
			syms.SelectDraft(d)

			for _, sym := range c.Symbol {
				v := sym.Data()
				if v == c.Type {
					// We define "" to mean the ISO symbol.
					v = ""
				}
				cur := b.currencies.Index([]byte(c.Type))
				// XXX gets reassigned to 0 in the package's code.
				if c.Type == "XXX" {
					cur = 0
				}
				if cur == -1 {
					fmt.Println("Unsupported:", c.Type)
					continue
				}

				switch sym.Alt {
				case "":
					symbols[langIndex][cur][normal] = &v
				case "narrow":
					symbols[langIndex][cur][narrow] = &v
				}
			}
		}
	}

	// Remove values identical to the parent.
	for langIndex, data := range symbols {
		for curIndex, curs := range data {
			for typ, sym := range curs {
				if sym == nil {
					continue
				}
				for p := uint16(langIndex); p != 0; {
					p = internal.Parent[p]
					x := symbols[p]
					if x == nil {
						continue
					}
					if v := x[curIndex][typ]; v != nil || p == 0 {
						// Value is equal to the default value root value is undefined.
						parentSym := ""
						if v != nil {
							parentSym = *v
						}
						if parentSym == *sym {
							// Value is the same as parent.
							data[curIndex][typ] = nil
						}
						break
					}
				}
			}
		}
	}

	// Create symbol index.
	symbolData := []byte{0}
	symbolLookup := map[string]uint16{"": 0} // 0 means default, so block that value.
	for _, data := range symbols {
		for _, curs := range data {
			for _, sym := range curs {
				if sym == nil {
					continue
				}
				if _, ok := symbolLookup[*sym]; !ok {
					symbolLookup[*sym] = uint16(len(symbolData))
					symbolData = append(symbolData, byte(len(*sym)))
					symbolData = append(symbolData, *sym...)
				}
			}
		}
	}
	w.WriteComment(`
	symbols holds symbol data of the form <n> <str>, where n is the length of
	the symbol string str.`)
	w.WriteConst("symbols", string(symbolData))

	// Create index from language to currency lookup to symbol.
	type curToIndex struct{ cur, idx uint16 }
	w.WriteType(curToIndex{})

	prefix := []string{"normal", "narrow"}
	// Create data for regular and narrow symbol data.
	for typ := normal; typ <= narrow; typ++ {

		indexes := []curToIndex{} // maps currency to symbol index
		languages := []uint16{}

		for _, data := range symbols {
			languages = append(languages, uint16(len(indexes)))
			for curIndex, curs := range data {

				if sym := curs[typ]; sym != nil {
					indexes = append(indexes, curToIndex{uint16(curIndex), symbolLookup[*sym]})
				}
			}
		}
		languages = append(languages, uint16(len(indexes)))

		w.WriteVar(prefix[typ]+"LangIndex", languages)
		w.WriteVar(prefix[typ]+"SymIndex", indexes)
	}
}
Esempio n. 4
0
File: gen.go Progetto: jak-atx/vic
func genNumSystem(w *gen.CodeWriter, data *cldr.CLDR) {
	numSysData := []systemData{
		{digitSize: 1, zero: [4]byte{'0'}},
	}

	for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
		if len(ns.Digits) == 0 {
			continue
		}
		switch ns.Id {
		case "latn":
			// hard-wired
			continue
		case "hanidec":
			// non-consecutive digits: treat as "algorithmic"
			continue
		}

		zero, sz := utf8.DecodeRuneInString(ns.Digits)
		if ns.Digits[sz-1]+9 > 0xBF { // 1011 1111: highest continuation byte
			log.Fatalf("Last byte of zero value overflows for %s", ns.Id)
		}

		i := rune(0)
		for _, r := range ns.Digits {
			// Verify that we can do simple math on the UTF-8 byte sequence
			// of zero to get the digit.
			if zero+i != r {
				// Runes not consecutive.
				log.Fatalf("Digit %d of %s (%U) is not offset correctly from zero value", i, ns.Id, r)
			}
			i++
		}
		var x [utf8.UTFMax]byte
		utf8.EncodeRune(x[:], zero)
		id := system(len(numSysData))
		systemMap[ns.Id] = id
		numSysData = append(numSysData, systemData{
			id:        id,
			digitSize: byte(sz),
			zero:      x,
		})
	}
	w.WriteVar("numSysData", numSysData)

	algoID := system(len(numSysData))
	fmt.Fprintln(w, "const (")
	for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
		id, ok := systemMap[ns.Id]
		if !ok {
			id = algoID
			systemMap[ns.Id] = id
			algoID++
		}
		fmt.Fprintf(w, "num%s = %#x\n", strings.Title(ns.Id), id)
	}
	fmt.Fprintln(w, "numNumberSystems")
	fmt.Fprintln(w, ")")

	fmt.Fprintln(w, "var systemMap = map[string]system{")
	for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
		fmt.Fprintf(w, "%q: num%s,\n", ns.Id, strings.Title(ns.Id))
		w.Size += len(ns.Id) + 16 + 1 // very coarse approximation
	}
	fmt.Fprintln(w, "}")
}
Esempio n. 5
0
File: gen.go Progetto: jak-atx/vic
func genSymbols(w *gen.CodeWriter, data *cldr.CLDR) {
	d, err := cldr.ParseDraft(*draft)
	if err != nil {
		log.Fatalf("invalid draft level: %v", err)
	}

	nNumberSystems := system(len(systemMap))

	type symbols [NumSymbolTypes]string

	type key struct {
		tag    int // from language.CompactIndex
		system system
	}
	symbolMap := map[key]*symbols{}

	defaults := map[int]system{}

	for _, lang := range data.Locales() {
		ldml := data.RawLDML(lang)
		if ldml.Numbers == nil {
			continue
		}
		langIndex, ok := language.CompactIndex(language.MustParse(lang))
		if !ok {
			log.Fatalf("No compact index for language %s", lang)
		}
		if d := ldml.Numbers.DefaultNumberingSystem; len(d) > 0 {
			defaults[langIndex] = getNumberSystem(d[0].Data())
		}

		syms := cldr.MakeSlice(&ldml.Numbers.Symbols)
		syms.SelectDraft(d)

		for _, sym := range ldml.Numbers.Symbols {
			if sym.NumberSystem == "" {
				// This is just linking the default of root to "latn".
				continue
			}
			symbolMap[key{langIndex, getNumberSystem(sym.NumberSystem)}] = &symbols{
				SymDecimal:                getFirst("decimal", sym.Decimal),
				SymGroup:                  getFirst("group", sym.Group),
				SymList:                   getFirst("list", sym.List),
				SymPercentSign:            getFirst("percentSign", sym.PercentSign),
				SymPlusSign:               getFirst("plusSign", sym.PlusSign),
				SymMinusSign:              getFirst("minusSign", sym.MinusSign),
				SymExponential:            getFirst("exponential", sym.Exponential),
				SymSuperscriptingExponent: getFirst("superscriptingExponent", sym.SuperscriptingExponent),
				SymPerMille:               getFirst("perMille", sym.PerMille),
				SymInfinity:               getFirst("infinity", sym.Infinity),
				SymNan:                    getFirst("nan", sym.Nan),
				SymTimeSeparator:          getFirst("timeSeparator", sym.TimeSeparator),
			}
		}
	}

	// Expand all values.
	for k, syms := range symbolMap {
		for t := SymDecimal; t < NumSymbolTypes; t++ {
			p := k.tag
			for syms[t] == "" {
				p = int(internal.Parent[p])
				if pSyms, ok := symbolMap[key{p, k.system}]; ok && (*pSyms)[t] != "" {
					syms[t] = (*pSyms)[t]
					break
				}
				if p == 0 /* und */ {
					// Default to root, latn.
					syms[t] = (*symbolMap[key{}])[t]
				}
			}
		}
	}

	// Unique the symbol sets and write the string data.
	m := map[symbols]int{}
	sb := stringset.NewBuilder()

	symIndex := [][NumSymbolTypes]byte{}

	for ns := system(0); ns < nNumberSystems; ns++ {
		for _, l := range data.Locales() {
			langIndex, _ := language.CompactIndex(language.MustParse(l))
			s := symbolMap[key{langIndex, ns}]
			if s == nil {
				continue
			}
			if _, ok := m[*s]; !ok {
				m[*s] = len(symIndex)
				sb.Add(s[:]...)
				var x [NumSymbolTypes]byte
				for i := SymDecimal; i < NumSymbolTypes; i++ {
					x[i] = byte(sb.Index((*s)[i]))
				}
				symIndex = append(symIndex, x)
			}
		}
	}
	w.WriteVar("symIndex", symIndex)
	w.WriteVar("symData", sb.Set())

	// resolveSymbolIndex gets the index from the closest matching locale,
	// including the locale itself.
	resolveSymbolIndex := func(langIndex int, ns system) byte {
		for {
			if sym := symbolMap[key{langIndex, ns}]; sym != nil {
				return byte(m[*sym])
			}
			if langIndex == 0 {
				return 0 // und, latn
			}
			langIndex = int(internal.Parent[langIndex])
		}
	}

	// Create an index with the symbols for each locale for the latn numbering
	// system. If this is not the default, or the only one, for a locale, we
	// will overwrite the value later.
	var langToDefaults [language.NumCompactTags]byte
	for _, l := range data.Locales() {
		langIndex, _ := language.CompactIndex(language.MustParse(l))
		langToDefaults[langIndex] = resolveSymbolIndex(langIndex, 0)
	}

	// Delete redundant entries.
	for _, l := range data.Locales() {
		langIndex, _ := language.CompactIndex(language.MustParse(l))
		def := defaults[langIndex]
		syms := symbolMap[key{langIndex, def}]
		if syms == nil {
			continue
		}
		for ns := system(0); ns < nNumberSystems; ns++ {
			if ns == def {
				continue
			}
			if altSyms, ok := symbolMap[key{langIndex, ns}]; ok && *altSyms == *syms {
				delete(symbolMap, key{langIndex, ns})
			}
		}
	}

	// Create a sorted list of alternatives per language. This will only need to
	// be referenced if a user specified an alternative numbering system.
	var langToAlt []altSymData
	for _, l := range data.Locales() {
		langIndex, _ := language.CompactIndex(language.MustParse(l))
		start := len(langToAlt)
		if start > 0x7F {
			log.Fatal("Number of alternative assignments > 0x7F")
		}
		// Create the entry for the default value.
		def := defaults[langIndex]
		langToAlt = append(langToAlt, altSymData{
			compactTag: uint16(langIndex),
			system:     def,
			symIndex:   resolveSymbolIndex(langIndex, def),
		})

		for ns := system(0); ns < nNumberSystems; ns++ {
			if def == ns {
				continue
			}
			if sym := symbolMap[key{langIndex, ns}]; sym != nil {
				langToAlt = append(langToAlt, altSymData{
					compactTag: uint16(langIndex),
					system:     ns,
					symIndex:   resolveSymbolIndex(langIndex, ns),
				})
			}
		}
		if def == 0 && len(langToAlt) == start+1 {
			// No additional data: erase the entry.
			langToAlt = langToAlt[:start]
		} else {
			// Overwrite the entry in langToDefaults.
			langToDefaults[langIndex] = 0x80 | byte(start)
		}
	}
	w.WriteComment(`
langToDefaults maps a compact language index to the default numbering system
and default symbol set`)
	w.WriteVar("langToDefaults", langToDefaults)

	w.WriteComment(`
langToAlt is a list of numbering system and symbol set pairs, sorted and
marked by compact language index.`)
	w.WriteVar("langToAlt", langToAlt)
}
Esempio n. 6
0
func genPlurals(w *gen.CodeWriter, data *cldr.CLDR) {
	for _, plurals := range data.Supplemental().Plurals {
		if plurals.Type == "" {
			continue
		}
		// Initialize setMap and inclusionMasks. They are already populated with
		// a few entries to serve as an example and to assign nice numbers to
		// common cases.

		// setMap contains sets of numbers represented by boolean arrays where
		// a true value for element i means that the number i is included.
		setMap := map[[numN]bool]int{
			// The above init func adds an entry for including all numbers.
			[numN]bool{1: true}: 1, // fix {1} to a nice value
			[numN]bool{2: true}: 2, // fix {2} to a nice value
			[numN]bool{0: true}: 3, // fix {0} to a nice value
		}

		// inclusionMasks contains bit masks for every number under numN to
		// indicate in which set the number is included. Bit 1 << x will be set
		// if it is included in set x.
		inclusionMasks := [numN]uint64{
			// Note: these entries are not complete: more bits will be set along the way.
			0: 1 << 3,
			1: 1 << 1,
			2: 1 << 2,
		}

		// Create set {0..99}. We will assign this set the identifier 0.
		var all [numN]bool
		for i := range all {
			// Mark number i as being included in the set (which has identifier 0).
			inclusionMasks[i] |= 1 << 0
			// Mark number i as included in the set.
			all[i] = true
		}
		// Register the identifier for the set.
		setMap[all] = 0

		rules := []pluralCheck{}
		index := []byte{0}
		langMap := map[int]byte{0: 0} // From compact language index to index

		for _, pRules := range plurals.PluralRules {
			// Parse the rules.
			var conds []orCondition
			for _, rule := range pRules.PluralRule {
				form := countMap[rule.Count]
				conds = parsePluralCondition(conds, rule.Data(), form)
			}
			// Encode the rules.
			for _, c := range conds {
				// If an or condition only has filters, we create an entry for
				// this filter and the set that contains all values.
				empty := true
				for _, b := range c.used {
					empty = empty && !b
				}
				if empty {
					rules = append(rules, pluralCheck{
						cat:   byte(opMod<<opShift) | byte(c.form),
						setID: 0, // all values
					})
					continue
				}
				// We have some entries with values.
				for i, set := range c.set {
					if !c.used[i] {
						continue
					}
					index, ok := setMap[set]
					if !ok {
						index = len(setMap)
						setMap[set] = index
						for i := range inclusionMasks {
							if set[i] {
								inclusionMasks[i] |= 1 << uint64(index)
							}
						}
					}
					rules = append(rules, pluralCheck{
						cat:   byte(i<<opShift | andNext),
						setID: byte(index),
					})
				}
				// Now set the last entry to the plural form the rule matches.
				rules[len(rules)-1].cat &^= formMask
				rules[len(rules)-1].cat |= byte(c.form)
			}
			// Point the relevant locales to the created entries.
			for _, loc := range strings.Split(pRules.Locales, " ") {
				if strings.TrimSpace(loc) == "" {
					continue
				}
				lang, ok := language.CompactIndex(language.MustParse(loc))
				if !ok {
					log.Printf("No compact index for locale %q", loc)
				}
				langMap[lang] = byte(len(index) - 1)
			}
			index = append(index, byte(len(rules)))
		}
		w.WriteVar(plurals.Type+"Rules", rules)
		w.WriteVar(plurals.Type+"Index", index)
		// Expand the values.
		langToIndex := make([]byte, language.NumCompactTags)
		for i := range langToIndex {
			for p := i; ; p = int(internal.Parent[p]) {
				if x, ok := langMap[p]; ok {
					langToIndex[i] = x
					break
				}
			}
		}
		w.WriteVar(plurals.Type+"LangToIndex", langToIndex)
		// Need to convert array to slice because of golang.org/issue/7651.
		// This will allow tables to be dropped when unused. This is especially
		// relevant for the ordinal data, which I suspect won't be used as much.
		w.WriteVar(plurals.Type+"InclusionMasks", inclusionMasks[:])

		if len(rules) > 0xFF {
			log.Fatalf("Too many entries for rules: %#x", len(rules))
		}
		if len(index) > 0xFF {
			log.Fatalf("Too many entries for index: %#x", len(index))
		}
		if len(setMap) > 64 { // maximum number of bits.
			log.Fatalf("Too many entries for setMap: %d", len(setMap))
		}
		w.WriteComment(
			"Slots used for %s: %X of 0xFF rules; %X of 0xFF indexes; %d of 64 sets",
			plurals.Type, len(rules), len(index), len(setMap))
		// Prevent comment from attaching to the next entry.
		fmt.Fprint(w, "\n\n")
	}
}
Esempio n. 7
0
// genFormats generates the lookup table for decimal, scientific and percent
// patterns.
//
// CLDR allows for patterns to be different per language for different numbering
// systems. In practice the patterns are set to be consistent for a language
// independent of the numbering system. genFormats verifies that no language
// deviates from this.
func genFormats(w *gen.CodeWriter, data *cldr.CLDR) {
	d, err := cldr.ParseDraft(*draft)
	if err != nil {
		log.Fatalf("invalid draft level: %v", err)
	}

	// Fill the first slot with a dummy so we can identify unspecified tags.
	formats := []number.Format{{}}
	patterns := map[string]int{}

	// TODO: It would be possible to eliminate two of these slices by having
	// another indirection and store a reference to the combination of patterns.
	decimal := make([]byte, language.NumCompactTags)
	scientific := make([]byte, language.NumCompactTags)
	percent := make([]byte, language.NumCompactTags)

	for _, lang := range data.Locales() {
		ldml := data.RawLDML(lang)
		if ldml.Numbers == nil {
			continue
		}
		langIndex, ok := language.CompactIndex(language.MustParse(lang))
		if !ok {
			log.Fatalf("No compact index for language %s", lang)
		}
		type patternSlice []*struct {
			cldr.Common
			Numbers string `xml:"numbers,attr"`
			Count   string `xml:"count,attr"`
		}

		add := func(name string, tags []byte, ps patternSlice) {
			sl := cldr.MakeSlice(&ps)
			sl.SelectDraft(d)
			if len(ps) == 0 {
				return
			}
			if len(ps) > 2 || len(ps) == 2 && ps[0] != ps[1] {
				log.Fatalf("Inconsistent %d patterns for language %s", name, lang)
			}
			s := ps[0].Data()

			index, ok := patterns[s]
			if !ok {
				nf, err := number.ParsePattern(s)
				if err != nil {
					log.Fatal(err)
				}
				index = len(formats)
				patterns[s] = index
				formats = append(formats, *nf)
			}
			tags[langIndex] = byte(index)
		}

		for _, df := range ldml.Numbers.DecimalFormats {
			for _, l := range df.DecimalFormatLength {
				if l.Type != "" {
					continue
				}
				for _, f := range l.DecimalFormat {
					add("decimal", decimal, f.Pattern)
				}
			}
		}
		for _, df := range ldml.Numbers.ScientificFormats {
			for _, l := range df.ScientificFormatLength {
				if l.Type != "" {
					continue
				}
				for _, f := range l.ScientificFormat {
					add("scientific", scientific, f.Pattern)
				}
			}
		}
		for _, df := range ldml.Numbers.PercentFormats {
			for _, l := range df.PercentFormatLength {
				if l.Type != "" {
					continue
				}
				for _, f := range l.PercentFormat {
					add("percent", percent, f.Pattern)
				}
			}
		}
	}

	// Complete the parent tag array to reflect inheritance. An index of 0
	// indicates an unspecified value.
	for _, data := range [][]byte{decimal, scientific, percent} {
		for i := range data {
			p := uint16(i)
			for ; data[p] == 0; p = internal.Parent[p] {
			}
			data[i] = data[p]
		}
	}
	w.WriteVar("tagToDecimal", decimal)
	w.WriteVar("tagToScientific", scientific)
	w.WriteVar("tagToPercent", percent)

	value := strings.Replace(fmt.Sprintf("%#v", formats), "number.", "", -1)
	// Break up the lines. This won't give ideal perfect formatting, but it is
	// better than one huge line.
	value = strings.Replace(value, ", ", ",\n", -1)
	fmt.Fprintf(w, "var formats = %s\n", value)
}