Example #1
0
// parseCollation parses XML files in the collation directory of the CLDR core.zip file.
func parseCollation(b *build.Builder) {
	d := &cldr.Decoder{}
	d.SetDirFilter("collation")
	data := decodeCLDR(d)
	for _, loc := range data.Locales() {
		x, err := data.LDML(loc)
		failOnError(err)
		if skipLang(x.Identity.Language.Type) {
			continue
		}
		cs := x.Collations.Collation
		sl := cldr.MakeSlice(&cs)
		if !types.all {
			sl.SelectAnyOf("type", append(types.s, x.Collations.Default())...)
		}
		sl.SelectOnePerGroup("alt", altInclude())

		for _, c := range cs {
			m := make(map[language.Part]string)
			m[language.TagPart] = loc
			if c.Type != x.Collations.Default() {
				m[language.Extension('u')] = "co-" + c.Type
			}
			id, err := language.Compose(m)
			failOnError(err)
			t := b.Tailoring(id)
			c.Process(processor{t})
		}
	}
}
Example #2
0
func TestTables(t *testing.T) {
	if !*long {
		return
	}

	gen.Init()

	// Read the CLDR zip file.
	r := gen.OpenCLDRCoreZip()
	defer r.Close()

	d := &cldr.Decoder{}
	d.SetDirFilter("supplemental", "main")
	d.SetSectionFilter("numbers")
	data, err := d.DecodeZip(r)
	if err != nil {
		t.Fatalf("DecodeZip: %v", err)
	}

	dr, err := cldr.ParseDraft(*draft)
	if err != nil {
		t.Fatalf("filter: %v", err)
	}

	for _, lang := range data.Locales() {
		p := message.NewPrinter(language.MustParse(lang))

		ldml := data.RawLDML(lang)
		if ldml.Numbers == nil || ldml.Numbers.Currencies == nil {
			continue
		}
		for _, c := range ldml.Numbers.Currencies.Currency {
			syms := cldr.MakeSlice(&c.Symbol)
			syms.SelectDraft(dr)

			for _, sym := range c.Symbol {
				cur, err := ParseISO(c.Type)
				if err != nil {
					continue
				}
				formatter := Symbol
				switch sym.Alt {
				case "":
				case "narrow":
					formatter = NarrowSymbol
				default:
					continue
				}
				want := sym.Data()
				if got := p.Sprint(formatter(cur)); got != want {
					t.Errorf("%s:%sSymbol(%s) = %s; want %s", lang, strings.Title(sym.Alt), c.Type, got, want)
				}
			}
		}
	}
}
Example #3
0
func ExampleSlice() {
	var dr *cldr.CLDR // assume this is initalized

	x, _ := dr.LDML("en")
	cs := x.Collations.Collation
	// remove all but the default
	cldr.MakeSlice(&cs).Filter(func(e cldr.Elem) bool {
		return e.GetCommon().Type != x.Collations.Default()
	})
	for i, c := range cs {
		fmt.Println(i, c.Type)
	}
}
Example #4
0
func (b *builder) filter() {
	filter := func(s *cldr.Slice) {
		if *short {
			s.SelectOnePerGroup("alt", []string{"short", ""})
		} else {
			s.SelectOnePerGroup("alt", []string{"stand-alone", ""})
		}
		d, err := cldr.ParseDraft(*draft)
		if err != nil {
			log.Fatalf("filter: %v", err)
		}
		s.SelectDraft(d)
	}
	for _, loc := range b.data.Locales() {
		if ldn := b.data.RawLDML(loc).LocaleDisplayNames; ldn != nil {
			if ldn.Languages != nil {
				s := cldr.MakeSlice(&ldn.Languages.Language)
				if filter(&s); len(ldn.Languages.Language) == 0 {
					ldn.Languages = nil
				}
			}
			if ldn.Scripts != nil {
				s := cldr.MakeSlice(&ldn.Scripts.Script)
				if filter(&s); len(ldn.Scripts.Script) == 0 {
					ldn.Scripts = nil
				}
			}
			if ldn.Territories != nil {
				s := cldr.MakeSlice(&ldn.Territories.Territory)
				if filter(&s); len(ldn.Territories.Territory) == 0 {
					ldn.Territories = nil
				}
			}
		}
	}
}
Example #5
0
// parseCollation parses XML files in the collation directory of the CLDR core.zip file.
func parseCollation(b *build.Builder) {
	d := &cldr.Decoder{}
	d.SetDirFilter("collation")
	data := decodeCLDR(d)
	for _, loc := range data.Locales() {
		x, err := data.LDML(loc)
		failOnError(err)
		if skipLang(x.Identity.Language.Type) {
			continue
		}
		cs := x.Collations.Collation
		sl := cldr.MakeSlice(&cs)
		if len(types.s) == 0 {
			sl.SelectAnyOf("type", x.Collations.Default())
		} else if !types.all {
			sl.SelectAnyOf("type", types.s...)
		}
		sl.SelectOnePerGroup("alt", altInclude())

		for _, c := range cs {
			id, err := language.Parse(loc)
			if err != nil {
				if loc == "en-US-posix" {
					fmt.Fprintf(os.Stderr, "invalid locale: %q", err.Error())
					continue
				}
				id = language.Make("en-US-u-va-posix")
			}
			// Support both old- and new-style defaults.
			d := c.Type
			if x.Collations.DefaultCollation == nil {
				d = x.Collations.Default()
			} else {
				d = x.Collations.DefaultCollation.Data()
			}
			// We assume tables are being built either for search or collation,
			// but not both. For search the default is always "search".
			if d != c.Type && c.Type != "search" {
				id, err = id.SetTypeForKey("co", c.Type)
				failOnError(err)
			}
			t := b.Tailoring(id)
			c.Process(processor{t})
		}
	}
}
Example #6
0
// writeMatchData writes tables with languages and scripts for which there is
// mutual intelligibility. The data is based on CLDR's languageMatching data.
// Note that we use a different algorithm than the one defined by CLDR and that
// we slightly modify the data. For example, we convert scores to confidence levels.
// We also drop all region-related data as we use a different algorithm to
// determine region equivalence.
func (b *builder) writeMatchData() {
	b.writeType(mutualIntelligibility{})
	b.writeType(scriptIntelligibility{})
	lm := b.supp.LanguageMatching.LanguageMatches
	cldr.MakeSlice(&lm).SelectAnyOf("type", "written")

	matchLang := []mutualIntelligibility{}
	matchScript := []scriptIntelligibility{}
	// Convert the languageMatch entries in lists keyed by desired language.
	for _, m := range lm[0].LanguageMatch {
		// Different versions of CLDR use different separators.
		desired := strings.Replace(m.Desired, "-", "_", -1)
		supported := strings.Replace(m.Supported, "-", "_", -1)
		d := strings.Split(desired, "_")
		s := strings.Split(supported, "_")
		if len(d) != len(s) || len(d) > 2 {
			// Skip all entries with regions and work around CLDR bug.
			continue
		}
		pct, _ := strconv.ParseInt(m.Percent, 10, 8)
		if len(d) == 2 && d[0] == s[0] && len(d[1]) == 4 {
			// language-script pair.
			lang := uint16(0)
			if d[0] != "*" {
				lang = uint16(b.langIndex(d[0]))
			}
			matchScript = append(matchScript, scriptIntelligibility{
				lang: lang,
				want: uint8(b.script.index(d[1])),
				have: uint8(b.script.index(s[1])),
				conf: toConf(uint8(pct)),
			})
			if m.Oneway != "true" {
				matchScript = append(matchScript, scriptIntelligibility{
					lang: lang,
					want: uint8(b.script.index(s[1])),
					have: uint8(b.script.index(d[1])),
					conf: toConf(uint8(pct)),
				})
			}
		} else if len(d) == 1 && d[0] != "*" {
			if pct == 100 {
				// nb == no is already handled by macro mapping. Check there
				// really is only this case.
				if d[0] != "no" || s[0] != "nb" {
					log.Fatalf("unhandled equivalence %s == %s", s[0], d[0])
				}
				continue
			}
			matchLang = append(matchLang, mutualIntelligibility{
				want:   uint16(b.langIndex(d[0])),
				have:   uint16(b.langIndex(s[0])),
				conf:   uint8(pct),
				oneway: m.Oneway == "true",
			})
		} else {
			// TODO: Handle the es_MX -> es_419 mapping. This does not seem to
			// make much sense for our purposes, though.
			a := []string{"*;*", "*_*;*_*", "es_MX;es_419"}
			s := strings.Join([]string{desired, supported}, ";")
			if i := sort.SearchStrings(a, s); i == len(a) || a[i] != s {
				log.Fatalf("%q not handled", s)
			}
		}
	}
	sort.Sort(sortByConf(matchLang))
	// collapse percentage into confidence classes
	for i, m := range matchLang {
		matchLang[i].conf = toConf(m.conf)
	}
	b.writeSlice("matchLang", matchLang)
	b.writeSlice("matchScript", matchScript)
}
Example #7
0
// genSymbols generates the symbols used for currencies. Most symbols are
// defined in root and there is only very small variation per language.
// The following rules apply:
// - A symbol can be requested as normal or narrow.
// - If a symbol is not defined for a currency, it defaults to its ISO code.
func (b *builder) genSymbols(w *gen.CodeWriter, data *cldr.CLDR) {
	d, err := cldr.ParseDraft(*draft)
	if err != nil {
		log.Fatalf("filter: %v", err)
	}

	const (
		normal = iota
		narrow
		numTypes
	)
	// language -> currency -> type ->  symbol
	var symbols [language.NumCompactTags][][numTypes]*string

	// Collect symbol information per language.
	for _, lang := range data.Locales() {
		ldml := data.RawLDML(lang)
		if ldml.Numbers == nil || ldml.Numbers.Currencies == nil {
			continue
		}

		langIndex, ok := language.CompactIndex(language.MustParse(lang))
		if !ok {
			log.Fatalf("No compact index for language %s", lang)
		}

		symbols[langIndex] = make([][numTypes]*string, b.numCurrencies+1)

		for _, c := range ldml.Numbers.Currencies.Currency {
			syms := cldr.MakeSlice(&c.Symbol)
			syms.SelectDraft(d)

			for _, sym := range c.Symbol {
				v := sym.Data()
				if v == c.Type {
					// We define "" to mean the ISO symbol.
					v = ""
				}
				cur := b.currencies.Index([]byte(c.Type))
				if cur == -1 {
					fmt.Println("Unsupported:", c.Type) // TODO: mark MVP as supported.
					continue
				}

				switch sym.Alt {
				case "":
					symbols[langIndex][cur][normal] = &v
				case "narrow":
					symbols[langIndex][cur][narrow] = &v
				}
			}
		}
	}

	// Remove values identical to the parent.
	for langIndex, data := range symbols {
		for curIndex, curs := range data {
			for typ, sym := range curs {
				if sym == nil {
					continue
				}
				for p := uint16(langIndex); p != 0; {
					p = internal.Parent[p]
					x := symbols[p]
					if x == nil {
						continue
					}
					if v := x[curIndex][typ]; v != nil || p == 0 {
						// Value is equal to the default value root value is undefined.
						parentSym := ""
						if v != nil {
							parentSym = *v
						}
						if parentSym == *sym {
							// Value is the same as parent.
							data[curIndex][typ] = nil
						}
						break
					}
				}
			}
		}
	}

	// Create symbol index.
	symbolData := []byte{0}
	symbolLookup := map[string]uint16{"": 0} // 0 means default, so block that value.
	for _, data := range symbols {
		for _, curs := range data {
			for _, sym := range curs {
				if sym == nil {
					continue
				}
				if _, ok := symbolLookup[*sym]; !ok {
					symbolLookup[*sym] = uint16(len(symbolData))
					symbolData = append(symbolData, byte(len(*sym)))
					symbolData = append(symbolData, *sym...)
				}
			}
		}
	}
	w.WriteComment(`
	symbols holds symbol data of the form <n> <str>, where n is the length of
	the symbol string str.`)
	w.WriteConst("symbols", string(symbolData))

	// Create index from language to currency lookup to symbol.
	type curToIndex struct{ cur, idx uint16 }
	w.WriteType(curToIndex{})

	prefix := []string{"normal", "narrow"}
	// Create data for regular and narrow symbol data.
	for typ := normal; typ <= narrow; typ++ {

		indexes := []curToIndex{} // maps currency to symbol index
		languages := []uint16{}

		for _, data := range symbols {
			languages = append(languages, uint16(len(indexes)))
			for curIndex, curs := range data {

				if sym := curs[typ]; sym != nil {
					indexes = append(indexes, curToIndex{uint16(curIndex), symbolLookup[*sym]})
				}
			}
		}
		languages = append(languages, uint16(len(indexes)))

		w.WriteVar(prefix[typ]+"LangIndex", languages)
		w.WriteVar(prefix[typ]+"SymIndex", indexes)
	}
}