func TestCaseProperties(t *testing.T) { assigned := rangetable.Assigned(UnicodeVersion) coreVersion := rangetable.Assigned(unicode.Version) for r := rune(0); r <= lastRuneForTesting; r++ { if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) { continue } c := contextFromRune(r) if got, want := c.info.isCaseIgnorable(), propIgnore(r); got != want { t.Errorf("caseIgnorable(%U): got %v; want %v (%x)", r, got, want, c.info) } // New letters may change case types, but existing case pairings should // not change. See Case Pair Stability in // http://unicode.org/policies/stability_policy.html. if rf := unicode.SimpleFold(r); rf != r && unicode.In(rf, assigned) { if got, want := c.info.isCased(), propCased(r); got != want { t.Errorf("cased(%U): got %v; want %v (%x)", r, got, want, c.info) } if got, want := c.caseType() == cUpper, propUpper(r); got != want { t.Errorf("upper(%U): got %v; want %v (%x)", r, got, want, c.info) } if got, want := c.caseType() == cLower, propLower(r); got != want { t.Errorf("lower(%U): got %v; want %v (%x)", r, got, want, c.info) } } if got, want := c.info.isBreak(), hasBreakProp(r); got != want { t.Errorf("isBreak(%U): got %v; want %v (%x)", r, got, want, c.info) } } // TODO: get title case from unicode file. }
func TestMapping(t *testing.T) { assigned := rangetable.Assigned(UnicodeVersion) coreVersion := rangetable.Assigned(unicode.Version) if coreVersion == nil { coreVersion = assigned } apply := func(r rune, f func(c *context) bool) string { c := contextFromRune(r) f(c) return string(c.dst[:c.pDst]) } for r, tt := range special { if got, want := apply(r, lower), tt.toLower; got != want { t.Errorf("lowerSpecial:(%U): got %+q; want %+q", r, got, want) } if got, want := apply(r, title), tt.toTitle; got != want { t.Errorf("titleSpecial:(%U): got %+q; want %+q", r, got, want) } if got, want := apply(r, upper), tt.toUpper; got != want { t.Errorf("upperSpecial:(%U): got %+q; want %+q", r, got, want) } } for r := rune(0); r <= lastRuneForTesting; r++ { if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) { continue } if rf := unicode.SimpleFold(r); rf == r || !unicode.In(rf, assigned) { continue } if _, ok := special[r]; ok { continue } want := string(unicode.ToLower(r)) if got := apply(r, lower); got != want { t.Errorf("lower:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want)) } want = string(unicode.ToUpper(r)) if got := apply(r, upper); got != want { t.Errorf("upper:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want)) } want = string(unicode.ToTitle(r)) if got := apply(r, title); got != want { t.Errorf("title:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want)) } } }
func TestCaseProperties(t *testing.T) { if unicode.Version != UnicodeVersion { t.Skipf("UnicodeVersion=%s, but unicode.Version=%s", UnicodeVersion, unicode.Version) } assigned := rangetable.Assigned(UnicodeVersion) for r := rune(0); r <= lastRuneForTesting; r++ { if !unicode.In(r, assigned) || !unicode.In(unicode.SimpleFold(r), assigned) { continue } c := contextFromRune(r) if got, want := c.info.isCaseIgnorable(), propIgnore(r); got != want { t.Errorf("caseIgnorable(%U): got %v; want %v (%x)", r, got, want, c.info) } if got, want := c.info.isCased(), propCased(r); got != want { t.Errorf("cased(%U): got %v; want %v (%x)", r, got, want, c.info) } if got, want := c.caseType() == cUpper, propUpper(r); got != want { t.Errorf("upper(%U): got %v; want %v (%x)", r, got, want, c.info) } if got, want := c.caseType() == cLower, propLower(r); got != want { t.Errorf("lower(%U): got %v; want %v (%x)", r, got, want, c.info) } if got, want := c.info.isBreak(), hasBreakProp(r); got != want { t.Errorf("isBreak(%U): got %v; want %v (%x)", r, got, want, c.info) } } // TODO: get title case from unicode file. }
// Ensure that ceratain properties were generated correctly. func TestTable(t *testing.T) { tests := []tableTest{ tableTest{ rangetable.Merge( unicode.Lt, unicode.Nl, unicode.No, // Other letter digits unicode.Me, // Modifiers unicode.Zs, // Spaces unicode.So, // Symbols unicode.Pi, unicode.Pf, // Punctuation ), idDisOrFreePVal, }, tableTest{ rangetable.New(0x30000, 0x30101, 0xDFFFF), unassigned, }, } assigned := rangetable.Assigned(UnicodeVersion) for _, test := range tests { rangetable.Visit(test.rangeTable, func(r rune) { if !unicode.In(r, assigned) { return } b := make([]byte, 4) n := utf8.EncodeRune(b, r) trieval, _ := dpTrie.lookup(b[:n]) p := entry(trieval).property() if p != test.prop && !exceptions.Contains(r) { t.Errorf("%U: got %+x; want %+x", r, test.prop, p) } }) } }
func TestFoldData(t *testing.T) { assigned := rangetable.Assigned(UnicodeVersion) coreVersion := rangetable.Assigned(unicode.Version) apply := func(r rune, f func(c *context) bool) (string, info) { c := contextFromRune(r) f(c) return string(c.dst[:c.pDst]), c.info.cccType() } for r := rune(0); r <= lastRuneForTesting; r++ { if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) { continue } x := runeFoldData(r) if got, info := apply(r, foldFull); got != x.full { t.Errorf("full:%q (%U): got %q %U; want %q %U (ccc=%x)", r, r, got, []rune(got), x.full, []rune(x.full), info) } // TODO: special and simple. } }
func TestCCC(t *testing.T) { assigned := rangetable.Assigned(UnicodeVersion) normVersion := rangetable.Assigned(norm.Version) for r := rune(0); r <= lastRuneForTesting; r++ { if !unicode.In(r, assigned) || !unicode.In(r, normVersion) { continue } c := contextFromRune(r) p := norm.NFC.PropertiesString(string(r)) want := cccOther switch p.CCC() { case 0: want = cccZero case above: want = cccAbove } if got := c.info.cccType(); got != want { t.Errorf("%U: got %x; want %x", r, got, want) } } }
func main() { gen.Init() // Load data runes := []rune{} ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) { if p.String(1) == "Default_Ignorable_Code_Point" { runes = append(runes, p.Rune(0)) } }) ucd.Parse(gen.OpenUCDFile("HangulSyllableType.txt"), func(p *ucd.Parser) { if p.String(1) == "LVT" { runes = append(runes, p.Rune(0)) } }) disallowedRunes = rangetable.New(runes...) assigned = rangetable.Assigned(unicode.Version) writeTables() gen.Repackage("gen_trieval.go", "trieval.go", "precis") }
// This file contains tests which need to import package collate, which causes // an import cycle when done within package colltab itself. import ( "bytes" "testing" "unicode" "golang.org/x/text/collate" "golang.org/x/text/language" "golang.org/x/text/unicode/rangetable" ) // assigned is used to only test runes that are inside the scope of the Unicode // version used to generation the collation table. var assigned = rangetable.Assigned(collate.UnicodeVersion) func TestNonDigits(t *testing.T) { c := collate.New(language.English, collate.Loose, collate.Numeric) // Verify that all non-digit numbers sort outside of the number range. for r, hi := rune(unicode.N.R16[0].Lo), rune(unicode.N.R32[0].Hi); r <= hi; r++ { if unicode.In(r, unicode.Nd) || !unicode.In(r, assigned) { continue } if a := string(r); c.CompareString(a, "0") != -1 && c.CompareString(a, "999999") != 1 { t.Errorf("%+q non-digit number is collated as digit", a) } } }
func main() { gen.Init() // Load data runes := []rune{} // PrecisIgnorableProperties: https://tools.ietf.org/html/rfc7564#section-9.13 ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) { if p.String(1) == "Default_Ignorable_Code_Point" { runes = append(runes, p.Rune(0)) } }) ucd.Parse(gen.OpenUCDFile("PropList.txt"), func(p *ucd.Parser) { switch p.String(1) { case "Noncharacter_Code_Point": runes = append(runes, p.Rune(0)) } }) // OldHangulJamo: https://tools.ietf.org/html/rfc5892#section-2.9 ucd.Parse(gen.OpenUCDFile("HangulSyllableType.txt"), func(p *ucd.Parser) { switch p.String(1) { case "L", "V", "T": runes = append(runes, p.Rune(0)) } }) disallowedRunes = rangetable.New(runes...) assigned = rangetable.Assigned(unicode.Version) // Load category data. runeCategory['l'] = latinSmallL ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) { const cccVirama = 9 if p.Int(ucd.CanonicalCombiningClass) == cccVirama { setCategory(p.Rune(0), viramaModifier) } }) ucd.Parse(gen.OpenUCDFile("Scripts.txt"), func(p *ucd.Parser) { switch p.String(1) { case "Greek": setCategory(p.Rune(0), greek) case "Hebrew": setCategory(p.Rune(0), hebrew) case "Hiragana", "Katakana", "Han": setCategory(p.Rune(0), japanese) } }) // Set the rule categories associated with exceptions. This overrides any // previously set categories. The original categories are manually // reintroduced in the categoryTransitions table. for r, e := range exceptions { if e.cat != 0 { runeCategory[r] = e.cat } } cat := map[string]category{ "L": joiningL, "D": joiningD, "T": joiningT, "R": joiningR, } ucd.Parse(gen.OpenUCDFile("extracted/DerivedJoiningType.txt"), func(p *ucd.Parser) { switch v := p.String(1); v { case "L", "D", "T", "R": setCategory(p.Rune(0), cat[v]) } }) writeTables() gen.Repackage("gen_trieval.go", "trieval.go", "precis") }