func TestGetScriptID(t *testing.T) {
	idx := tag.Index("0000BbbbDdddEeeeZzzz\xff\xff\xff\xff")
	tests := []struct {
		in  string
		out scriptID
	}{
		{"    ", 0},
		{"      ", 0},
		{"  ", 0},
		{"", 0},
		{"Aaaa", 0},
		{"Bbbb", 1},
		{"Dddd", 2},
		{"dddd", 2},
		{"dDDD", 2},
		{"Eeee", 3},
		{"Zzzz", 4},
	}
	for i, tt := range tests {
		if id, err := getScriptID(idx, b(tt.in)); id != tt.out {
			t.Errorf("%d:%s: found %d; want %d", i, tt.in, id, tt.out)
		} else if id == 0 && err == nil {
			t.Errorf("%d:%s: no error; expected one", i, tt.in)
		}
	}
}
Example #2
0
func (b *builder) writeCurrencies() {
	b.writeConsts(b.currency.index, "XTS", "XXX")

	digits := map[string]uint64{}
	rounding := map[string]uint64{}
	for _, info := range b.supp.CurrencyData.Fractions[0].Info {
		var err error
		digits[info.Iso4217], err = strconv.ParseUint(info.Digits, 10, curDigitBits)
		failOnError(err)
		rounding[info.Iso4217], err = strconv.ParseUint(info.Rounding, 10, curRoundBits)
		failOnError(err)
	}
	for i, cur := range b.currency.slice() {
		d := uint64(2) // default number of decimal positions
		if dd, ok := digits[cur]; ok {
			d = dd
		}
		var r uint64
		if r = rounding[cur]; r == 0 {
			r = 1 // default rounding increment in units 10^{-digits)
		}
		b.currency.s[i] += mkCurrencyInfo(int(r), int(d))
	}
	b.writeConst("currency", tag.Index(b.currency.join()))
	// Hack alert: gofmt indents a trailing comment after an indented string.
	// Ensure that the next thing written is not a comment.
	// writeLikelyData serves this purpose as it starts with an uncommented type.
}
Example #3
0
func TestCurrency(t *testing.T) {
	idx := tag.Index(strings.Join([]string{
		"   \x00",
		"BBB" + mkCurrencyInfo(5, 2),
		"DDD\x00",
		"XXX\x00",
		"ZZZ\x00",
		"\xff\xff\xff\xff",
	}, ""))
	tests := []struct {
		in         string
		out        currencyID
		round, dec int
	}{
		{"   ", 0, 0, 0},
		{"     ", 0, 0, 0},
		{" ", 0, 0, 0},
		{"", 0, 0, 0},
		{"BBB", 1, 5, 2},
		{"DDD", 2, 0, 0},
		{"dDd", 2, 0, 0},
		{"ddd", 2, 0, 0},
		{"XXX", 3, 0, 0},
		{"Zzz", 4, 0, 0},
	}
	for i, tt := range tests {
		id, err := getCurrencyID(idx, b(tt.in))
		if id != tt.out {
			t.Errorf("%d:%s: found %d; want %d", i, tt.in, id, tt.out)
		} else if tt.out == 0 && err == nil {
			t.Errorf("%d:%s: no error; expected one", i, tt.in)
		}
		if id > 0 {
			if d := decimals(idx, id); d != tt.dec {
				t.Errorf("%d:dec(%s): found %d; want %d", i, tt.in, d, tt.dec)
			}
			if d := round(idx, id); d != tt.round {
				t.Errorf("%d:round(%s): found %d; want %d", i, tt.in, d, tt.round)
			}
		}
	}
}
Example #4
0
func (b *builder) writeScript() {
	b.writeConsts(b.script.index, scriptConsts...)
	b.writeConst("script", tag.Index(b.script.join()))

	supp := make([]uint8, len(b.lang.slice()))
	for i, v := range b.lang.slice()[1:] {
		if sc := b.registry[v].suppressScript; sc != "" {
			supp[i+1] = uint8(b.script.index(sc))
		}
	}
	b.writeSlice("suppressScript", supp)

	// There is only one deprecated script in CLDR. This value is hard-coded.
	// We check here if the code must be updated.
	for _, a := range b.supp.Metadata.Alias.ScriptAlias {
		if a.Type != "Qaai" {
			log.Panicf("unexpected deprecated stript %q", a.Type)
		}
	}
}
Example #5
0
func (b *builder) writeRegion() {
	b.writeConsts(b.region.index, regionConsts...)

	isoOffset := b.region.index("AA")
	m49map := make([]int16, len(b.region.slice()))
	fromM49map := make(map[int16]int)
	altRegionISO3 := ""
	altRegionIDs := []uint16{}

	b.writeConst("isoRegionOffset", isoOffset)

	// 2-letter region lookup and mapping to numeric codes.
	regionISO := b.region.clone()
	regionISO.s = regionISO.s[isoOffset:]
	regionISO.sorted = false

	regionTypes := make([]byte, len(b.region.s))

	// Is the region valid BCP 47?
	for s, e := range b.registry {
		if len(s) == 2 && s == strings.ToUpper(s) {
			i := b.region.index(s)
			for _, d := range e.description {
				if strings.Contains(d, "Private use") {
					regionTypes[i] = iso3166UserAssgined
				}
			}
			regionTypes[i] |= bcp47Region
		}
	}

	// Is the region a valid ccTLD?
	r := gen.OpenIANAFile("domains/root/db")
	defer r.Close()

	buf, err := ioutil.ReadAll(r)
	failOnError(err)
	re := regexp.MustCompile(`"/domains/root/db/([a-z]{2}).html"`)
	for _, m := range re.FindAllSubmatch(buf, -1) {
		i := b.region.index(strings.ToUpper(string(m[1])))
		regionTypes[i] |= ccTLD
	}

	b.writeSlice("regionTypes", regionTypes)

	iso3Set := make(map[string]int)
	update := func(iso2, iso3 string) {
		i := regionISO.index(iso2)
		if j, ok := iso3Set[iso3]; !ok && iso3[0] == iso2[0] {
			regionISO.s[i] += iso3[1:]
			iso3Set[iso3] = -1
		} else {
			if ok && j >= 0 {
				regionISO.s[i] += string([]byte{0, byte(j)})
			} else {
				iso3Set[iso3] = len(altRegionISO3)
				regionISO.s[i] += string([]byte{0, byte(len(altRegionISO3))})
				altRegionISO3 += iso3
				altRegionIDs = append(altRegionIDs, uint16(isoOffset+i))
			}
		}
	}
	for _, tc := range b.supp.CodeMappings.TerritoryCodes {
		i := regionISO.index(tc.Type) + isoOffset
		if d := m49map[i]; d != 0 {
			log.Panicf("%s found as a duplicate UN.M49 code of %03d", tc.Numeric, d)
		}
		m49 := parseM49(tc.Numeric)
		m49map[i] = m49
		if r := fromM49map[m49]; r == 0 {
			fromM49map[m49] = i
		} else if r != i {
			dep := b.registry[regionISO.s[r-isoOffset]].deprecated
			if t := b.registry[tc.Type]; t != nil && dep != "" && (t.deprecated == "" || t.deprecated > dep) {
				fromM49map[m49] = i
			}
		}
	}
	for _, ta := range b.supp.Metadata.Alias.TerritoryAlias {
		if len(ta.Type) == 3 && ta.Type[0] <= '9' && len(ta.Replacement) == 2 {
			from := parseM49(ta.Type)
			if r := fromM49map[from]; r == 0 {
				fromM49map[from] = regionISO.index(ta.Replacement) + isoOffset
			}
		}
	}
	for _, tc := range b.supp.CodeMappings.TerritoryCodes {
		if len(tc.Alpha3) == 3 {
			update(tc.Type, tc.Alpha3)
		}
	}
	// This entries are not included in territoryCodes. Mostly 3-letter variants
	// of deleted codes and an entry for QU.
	for _, m := range []struct{ iso2, iso3 string }{
		{"CT", "CTE"},
		{"DY", "DHY"},
		{"HV", "HVO"},
		{"JT", "JTN"},
		{"MI", "MID"},
		{"NH", "NHB"},
		{"NQ", "ATN"},
		{"PC", "PCI"},
		{"PU", "PUS"},
		{"PZ", "PCZ"},
		{"RH", "RHO"},
		{"VD", "VDR"},
		{"WK", "WAK"},
		// These three-letter codes are used for others as well.
		{"FQ", "ATF"},
	} {
		update(m.iso2, m.iso3)
	}
	for i, s := range regionISO.s {
		if len(s) != 4 {
			regionISO.s[i] = s + "  "
		}
	}
	b.writeConst("regionISO", tag.Index(regionISO.join()))
	b.writeConst("altRegionISO3", altRegionISO3)
	b.writeSlice("altRegionIDs", altRegionIDs)

	// Create list of deprecated regions.
	// TODO: consider inserting SF -> FI. Not included by CLDR, but is the only
	// Transitionally-reserved mapping not included.
	regionOldMap := stringSet{}
	// Include regions in territoryAlias (not all are in the IANA registry!)
	for _, reg := range b.supp.Metadata.Alias.TerritoryAlias {
		if len(reg.Type) == 2 && reg.Reason == "deprecated" && len(reg.Replacement) == 2 {
			regionOldMap.add(reg.Type)
			regionOldMap.updateLater(reg.Type, reg.Replacement)
			i, _ := regionISO.find(reg.Type)
			j, _ := regionISO.find(reg.Replacement)
			if k := m49map[i+isoOffset]; k == 0 {
				m49map[i+isoOffset] = m49map[j+isoOffset]
			}
		}
	}
	b.writeSortedMap("regionOldMap", &regionOldMap, func(s string) uint16 {
		return uint16(b.region.index(s))
	})
	// 3-digit region lookup, groupings.
	for i := 1; i < isoOffset; i++ {
		m := parseM49(b.region.s[i])
		m49map[i] = m
		fromM49map[m] = i
	}
	b.writeSlice("m49", m49map)

	const (
		searchBits = 7
		regionBits = 9
	)
	if len(m49map) >= 1<<regionBits {
		log.Fatalf("Maximum number of regions exceeded: %d > %d", len(m49map), 1<<regionBits)
	}
	m49Index := [9]int16{}
	fromM49 := []uint16{}
	m49 := []int{}
	for k, _ := range fromM49map {
		m49 = append(m49, int(k))
	}
	sort.Ints(m49)
	for _, k := range m49[1:] {
		val := (k & (1<<searchBits - 1)) << regionBits
		fromM49 = append(fromM49, uint16(val|fromM49map[int16(k)]))
		m49Index[1:][k>>searchBits] = int16(len(fromM49))
	}
	b.writeSlice("m49Index", m49Index)
	b.writeSlice("fromM49", fromM49)
}
Example #6
0
// writeLanguage generates all tables needed for language canonicalization.
func (b *builder) writeLanguage() {
	meta := b.supp.Metadata

	b.writeConst("nonCanonicalUnd", b.lang.index("und"))
	b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...)
	b.writeConst("langPrivateStart", b.langIndex("qaa"))
	b.writeConst("langPrivateEnd", b.langIndex("qtz"))

	// Get language codes that need to be mapped (overlong 3-letter codes,
	// deprecated 2-letter codes, legacy and grandfathered tags.)
	langAliasMap := stringSet{}
	aliasTypeMap := map[string]langAliasType{}

	// altLangISO3 get the alternative ISO3 names that need to be mapped.
	altLangISO3 := stringSet{}
	// Add dummy start to avoid the use of index 0.
	altLangISO3.add("---")
	altLangISO3.updateLater("---", "aa")

	lang := b.lang.clone()
	for _, a := range meta.Alias.LanguageAlias {
		if a.Replacement == "" {
			a.Replacement = "und"
		}
		// TODO: support mapping to tags
		repl := strings.SplitN(a.Replacement, "_", 2)[0]
		if a.Reason == "overlong" {
			if len(a.Replacement) == 2 && len(a.Type) == 3 {
				lang.updateLater(a.Replacement, a.Type)
			}
		} else if len(a.Type) <= 3 {
			switch a.Reason {
			case "macrolanguage":
				aliasTypeMap[a.Type] = langMacro
			case "deprecated":
				// handled elsewhere
				continue
			case "bibliographic", "legacy":
				if a.Type == "no" {
					continue
				}
				aliasTypeMap[a.Type] = langLegacy
			default:
				log.Fatalf("new %s alias: %s", a.Reason, a.Type)
			}
			langAliasMap.add(a.Type)
			langAliasMap.updateLater(a.Type, repl)
		}
	}
	// Manually add the mapping of "nb" (Norwegian) to its macro language.
	// This can be removed if CLDR adopts this change.
	langAliasMap.add("nb")
	langAliasMap.updateLater("nb", "no")
	aliasTypeMap["nb"] = langMacro

	for k, v := range b.registry {
		// Also add deprecated values for 3-letter ISO codes, which CLDR omits.
		if v.typ == "language" && v.deprecated != "" && v.preferred != "" {
			langAliasMap.add(k)
			langAliasMap.updateLater(k, v.preferred)
			aliasTypeMap[k] = langDeprecated
		}
	}
	// Fix CLDR mappings.
	lang.updateLater("tl", "tgl")
	lang.updateLater("sh", "hbs")
	lang.updateLater("mo", "mol")
	lang.updateLater("no", "nor")
	lang.updateLater("tw", "twi")
	lang.updateLater("nb", "nob")
	lang.updateLater("ak", "aka")

	// Ensure that each 2-letter code is matched with a 3-letter code.
	for _, v := range lang.s[1:] {
		s, ok := lang.update[v]
		if !ok {
			if s, ok = lang.update[langAliasMap.update[v]]; !ok {
				continue
			}
			lang.update[v] = s
		}
		if v[0] != s[0] {
			altLangISO3.add(s)
			altLangISO3.updateLater(s, v)
		}
	}

	// Complete canonialized language tags.
	lang.freeze()
	for i, v := range lang.s {
		// We can avoid these manual entries by using the IANI registry directly.
		// Seems easier to update the list manually, as changes are rare.
		// The panic in this loop will trigger if we miss an entry.
		add := ""
		if s, ok := lang.update[v]; ok {
			if s[0] == v[0] {
				add = s[1:]
			} else {
				add = string([]byte{0, byte(altLangISO3.index(s))})
			}
		} else if len(v) == 3 {
			add = "\x00"
		} else {
			log.Panicf("no data for long form of %q", v)
		}
		lang.s[i] += add
	}
	b.writeConst("lang", tag.Index(lang.join()))

	b.writeConst("langNoIndexOffset", len(b.lang.s))

	// space of all valid 3-letter language identifiers.
	b.writeBitVector("langNoIndex", b.langNoIndex.slice())

	altLangIndex := []uint16{}
	for i, s := range altLangISO3.slice() {
		altLangISO3.s[i] += string([]byte{byte(len(altLangIndex))})
		if i > 0 {
			idx := b.lang.index(altLangISO3.update[s])
			altLangIndex = append(altLangIndex, uint16(idx))
		}
	}
	b.writeConst("altLangISO3", tag.Index(altLangISO3.join()))
	b.writeSlice("altLangIndex", altLangIndex)

	b.writeSortedMap("langAliasMap", &langAliasMap, b.langIndex)
	types := make([]langAliasType, len(langAliasMap.s))
	for i, s := range langAliasMap.s {
		types[i] = aliasTypeMap[s]
	}
	b.writeSlice("langAliasTypes", types)
}
Example #7
0
func (b *builder) genCurrencies(w *gen.CodeWriter, data *cldr.SupplementalData) {
	// 3-letter ISO currency codes
	// Start with dummy to let index start at 1.
	currencies := []string{"\x00\x00\x00\x00"}

	// currency codes
	for _, reg := range data.CurrencyData.Region {
		for _, cur := range reg.Currency {
			currencies = append(currencies, cur.Iso4217)
		}
	}
	// Not included in the list for some reasons:
	currencies = append(currencies, "MVP")

	sort.Strings(currencies)
	// Unique the elements.
	k := 0
	for i := 1; i < len(currencies); i++ {
		if currencies[k] != currencies[i] {
			currencies[k+1] = currencies[i]
			k++
		}
	}
	currencies = currencies[:k+1]

	// Close with dummy for simpler and faster searching.
	currencies = append(currencies, "\xff\xff\xff\xff")

	// Write currency values.
	fmt.Fprintln(w, "const (")
	for _, c := range constants {
		index := sort.SearchStrings(currencies, c)
		fmt.Fprintf(w, "\t%s = %d\n", strings.ToLower(c), index)
	}
	fmt.Fprint(w, ")")

	// Compute currency-related data that we merge into the table.
	for _, info := range data.CurrencyData.Fractions[0].Info {
		if info.Iso4217 == "DEFAULT" {
			continue
		}
		standard := getRoundingIndex(info.Digits, info.Rounding, 0)
		cash := getRoundingIndex(info.CashDigits, info.CashRounding, standard)

		index := sort.SearchStrings(currencies, info.Iso4217)
		currencies[index] += mkCurrencyInfo(standard, cash)
	}

	// Set default values for entries that weren't touched.
	for i, c := range currencies {
		if len(c) == 3 {
			currencies[i] += mkCurrencyInfo(0, 0)
		}
	}

	b.currencies = tag.Index(strings.Join(currencies, ""))
	w.WriteComment(`
	currency holds an alphabetically sorted list of canonical 3-letter currency
	identifiers. Each identifier is followed by a byte of type currencyInfo,
	defined in gen_common.go.`)
	w.WriteConst("currency", b.currencies)

	// Hack alert: gofmt indents a trailing comment after an indented string.
	// Ensure that the next thing written is not a comment.
	b.numCurrencies = (len(b.currencies) / 4) - 2
	w.WriteConst("numCurrencies", b.numCurrencies)

	// Create a table that maps regions to currencies.
	regionToCurrency := []toCurrency{}

	for _, reg := range data.CurrencyData.Region {
		if len(reg.Iso3166) != 2 {
			log.Fatalf("Unexpected group %q in region data", reg.Iso3166)
		}
		if len(reg.Currency) == 0 {
			continue
		}
		cur := reg.Currency[0]
		if cur.To != "" || cur.Tender == "false" {
			continue
		}
		regionToCurrency = append(regionToCurrency, toCurrency{
			region: regionToCode(language.MustParseRegion(reg.Iso3166)),
			code:   uint16(b.currencies.Index([]byte(cur.Iso4217))),
		})
	}
	sort.Sort(byRegion(regionToCurrency))

	w.WriteType(toCurrency{})
	w.WriteVar("regionToCurrency", regionToCurrency)
}