func (tc *transformTest) doTest(t *testing.T, tr Transformer) { testtext.Run(t, tc.desc, func(t *testing.T) { b := make([]byte, tc.nBuf) nDst, nSrc, err := tr.Transform(b, []byte(tc.src), tc.atEOF) if got := string(b[:nDst]); got != tc.dst[:nDst] { t.Errorf("dst was %+q; want %+q", got, tc.dst) } if nDst != tc.nDst { t.Errorf("nDst was %d; want %d", nDst, tc.nDst) } if nSrc != tc.nSrc { t.Errorf("nSrc was %d; want %d", nSrc, tc.nSrc) } if err != tc.err { t.Errorf("error was %v; want %v", err, tc.err) } if got := tr.String(tc.src); got != tc.dst { t.Errorf("String(%q) = %q; want %q", tc.src, got, tc.dst) } n, err := tr.Span([]byte(tc.src), tc.atEOF) if n != tc.nSpan || err != tc.errSpan { t.Errorf("Span: got %d, %v; want %d, %v", n, err, tc.nSpan, tc.errSpan) } }) }
func TestLanguage(t *testing.T) { tests := []struct { dict string tag string name string }{ {"agq", "sr", ""}, // sr is in Value.Languages(), but is not supported by agq. // CLDR 30 dropped Vlaams as the word for nl-BE. It is still called // Flemish in English, though. TODO: this is probably incorrect. // West-Vlaams (vls) is not Vlaams. West-Vlaams could be considered its // own language, whereas Vlaams is generally Dutch. So expect to have // to change these tests back. {"nl", "nl", "Nederlands"}, {"nl", "vls", "West-Vlaams"}, {"nl", "nl-BE", "Nederlands"}, {"en", "pt", "Portuguese"}, {"en", "pt-PT", "European Portuguese"}, {"en", "pt-BR", "Brazilian Portuguese"}, {"en", "en", "English"}, {"en", "en-GB", "British English"}, {"en", "en-US", "American English"}, // American English in CLDR 24+ {"en", lastLang2zu.String(), "Zulu"}, {"en", firstLang2aa.String(), "Afar"}, {"en", lastLang3zza.String(), "Zaza"}, {"en", firstLang3ace.String(), "Achinese"}, {"en", firstTagAr001.String(), "Modern Standard Arabic"}, {"en", lastTagZhHant.String(), "Traditional Chinese"}, {"en", "aaa", ""}, {"en", "zzj", ""}, // If full tag doesn't match, try without script or region. {"en", "aa-Hans", "Afar"}, {"en", "af-Arab", "Afrikaans"}, {"en", "zu-Cyrl", "Zulu"}, {"en", "aa-GB", "Afar"}, {"en", "af-NA", "Afrikaans"}, {"en", "zu-BR", "Zulu"}, {"agq", "zh-Hant", ""}, // Canonical equivalents. {"ro", "ro-MD", "moldovenească"}, {"ro", "mo", "moldovenească"}, {"en", "sh", "Serbo-Croatian"}, {"en", "sr-Latn", "Serbo-Croatian"}, {"en", "sr", "Serbian"}, {"en", "sr-ME", "Serbian"}, {"en", "sr-Latn-ME", "Serbo-Croatian"}, // See comments in TestTag. } for i, tt := range tests { testtext.Run(t, tt.dict+"/"+tt.tag, func(t *testing.T) { d := Languages(language.Raw.MustParse(tt.dict)) if n := d.Name(language.Raw.MustParse(tt.tag)); n != tt.name { t.Errorf("%d:%s:%s: was %q; want %q", i, tt.dict, tt.tag, n, tt.name) } if len(tt.tag) <= 3 { if n := d.Name(language.MustParseBase(tt.tag)); n != tt.name { t.Errorf("%d:%s:base(%s): was %q; want %q", i, tt.dict, tt.tag, n, tt.name) } } }) } }
func TestRegionDistance(t *testing.T) { tests := []struct { a, b string d int }{ {"NL", "NL", 0}, {"NL", "EU", 1}, {"EU", "NL", 1}, {"005", "005", 0}, {"NL", "BE", 2}, {"CO", "005", 1}, {"005", "CO", 1}, {"CO", "419", 2}, {"419", "CO", 2}, {"005", "419", 1}, {"419", "005", 1}, {"001", "013", 2}, {"013", "001", 2}, {"CO", "CW", 4}, {"CO", "PW", 6}, {"CO", "BV", 6}, {"ZZ", "QQ", 2}, } for i, tt := range tests { testtext.Run(t, tt.a+"/"+tt.b, func(t *testing.T) { ra, _ := getRegionID([]byte(tt.a)) rb, _ := getRegionID([]byte(tt.b)) if d := regionDistance(ra, rb); d != tt.d { t.Errorf("%d: d(%s, %s) = %v; want %v", i, tt.a, tt.b, d, tt.d) } }) } }
func TestWordBreaks(t *testing.T) { for _, tt := range breakTest { testtext.Run(t, tt, func(t *testing.T) { parts := strings.Split(tt, "|") want := "" for _, s := range parts { found := false // This algorithm implements title casing given word breaks // as defined in the Unicode standard 3.13 R3. for _, r := range s { title := unicode.ToTitle(r) lower := unicode.ToLower(r) if !found && title != lower { found = true want += string(title) } else { want += string(lower) } } } src := strings.Join(parts, "") got := Title(language.Und).String(src) if got != want { t.Errorf("got %q; want %q", got, want) } }) } }
func TestFormats(t *testing.T) { testCases := []struct { lang string pattern string index []byte }{ {"en", "#,##0.###", tagToDecimal}, {"de", "#,##0.###", tagToDecimal}, {"de-CH", "#,##0.###", tagToDecimal}, {"pa", "#,##,##0.###", tagToDecimal}, {"pa-Arab", "#,##0.###", tagToDecimal}, // Does NOT inherit from pa! {"mr", "#,##,##0.###", tagToDecimal}, {"mr-IN", "#,##,##0.###", tagToDecimal}, // Inherits from mr. {"nl", "#E0", tagToScientific}, {"nl-MX", "#E0", tagToScientific}, // Inherits through Tag.Parent. {"zgh", "#,##0 %", tagToPercent}, } for _, tc := range testCases { testtext.Run(t, tc.lang, func(t *testing.T) { got := formatForLang(language.MustParse(tc.lang), tc.index) want, _ := ParsePattern(tc.pattern) if *got != *want { t.Errorf("\ngot %#v;\nwant %#v", got, want) } }) } }
// doTest performs a single test f(input) and verifies that the output matches // out and that the returned error is expected. The errors string contains // all allowed error codes as categorized in // http://www.unicode.org/Public/idna/9.0.0/IdnaTest.txt: // P: Processing // V: Validity // A: to ASCII // B: Bidi // C: Context J func doTest(t *testing.T, f func(string) (string, error), name, input, want, errors string) { errors = strings.Trim(errors, "[]") test := "ok" if errors != "" { test = "err:" + errors } // Replace some of the escape sequences to make it easier to single out // tests on the command name. in := strings.Trim(strconv.QuoteToASCII(input), `"`) in = strings.Replace(in, `\u`, "#", -1) in = strings.Replace(in, `\U`, "#", -1) name = fmt.Sprintf("%s/%s/%s", name, in, test) testtext.Run(t, name, func(t *testing.T) { got, err := f(input) if err != nil { code := err.(interface { code() string }).code() if strings.Index(errors, code) == -1 { t.Errorf("error %q not in set of expected errors {%v}", code, errors) } } else if errors != "" { t.Errorf("no errors; want error in {%v}", errors) } if want != "" && got != want { t.Errorf(`string: got %+q; want %+q`, got, want) } }) }
func testString(t *testing.T, f func(Transformer, string) (string, int, error)) { for _, tt := range append(testCases, chainTests()...) { if tt.desc == "allowStutter = true" { // We don't have control over the buffer size, so we eliminate tests // that depend on a specific buffer size being set. continue } if tt.wantErr == ErrShortDst || tt.wantErr == ErrShortSrc { // The result string will be different. continue } testtext.Run(t, tt.desc, func(t *testing.T) { got, n, err := f(tt.t, tt.src) if tt.wantErr != err { t.Errorf("error: got %v; want %v", err, tt.wantErr) } // Check that err == nil implies that n == len(tt.src). Note that vice // versa isn't necessarily true. if err == nil && n != len(tt.src) { t.Errorf("err == nil: got %d bytes, want %d", n, err) } if got != tt.wantStr { t.Errorf("string: got %q; want %q", got, tt.wantStr) } }) } }
func TestShortBuffersAndOverflow(t *testing.T) { for i, tt := range bufferTests { testtext.Run(t, tt.desc, func(t *testing.T) { buf := make([]byte, tt.dstSize) got := []byte{} var nSrc, nDst int var err error for p := 0; p < len(tt.src); p += nSrc { q := p + tt.srcSize if q > len(tt.src) { q = len(tt.src) } nDst, nSrc, err = tt.t.Transform(buf, []byte(tt.src[p:q]), q == len(tt.src)) got = append(got, buf[:nDst]...) if p == 0 && err != tt.firstErr { t.Errorf("%d:%s:\n error was %v; want %v", i, tt.desc, err, tt.firstErr) break } } if string(got) != tt.want { t.Errorf("%d:%s:\ngot %+q;\nwant %+q", i, tt.desc, got, tt.want) } testHandover(t, Caser{tt.t}, tt.src) }) } }
// TestAlloc tests that some mapping methods should not cause any allocation. func TestAlloc(t *testing.T) { dst := make([]byte, 256) // big enough to hold any result src := []byte(txtNonASCII) for i, f := range []func() Caser{ func() Caser { return Upper(language.Und) }, func() Caser { return Lower(language.Und) }, func() Caser { return Lower(language.Und, HandleFinalSigma(false)) }, // TODO: use a shared copy for these casers as well, in order of // importance, starting with the most important: // func() Caser { return Title(language.Und) }, // func() Caser { return Title(language.Und, HandleFinalSigma(false)) }, } { testtext.Run(t, "", func(t *testing.T) { var c Caser v := testtext.AllocsPerRun(10, func() { c = f() }) if v > 0 { // TODO: Right now only Upper has 1 allocation. Special-case Lower // and Title as well to have less allocations for the root locale. t.Errorf("%d:init: number of allocs was %f; want 0", i, v) } v = testtext.AllocsPerRun(2, func() { c.Transform(dst, src, true) }) if v > 0 { t.Errorf("%d:transform: number of allocs was %f; want 0", i, v) } }) } }
func doTests(t *testing.T, fn func(t *testing.T, tc ruleTest)) { for rule, cases := range testCases { for i, tc := range cases { name := fmt.Sprintf("%d/%d:%+q:%s", rule, i, tc.in, tc.in) testtext.Run(t, name, func(t *testing.T) { fn(t, tc) }) } } }
func doTests(t *testing.T, fn func(t *testing.T, p *Profile, tc testCase)) { for _, g := range enforceTestCases { for i, tc := range g.cases { name := fmt.Sprintf("%s:%d:%+q", g.name, i, tc.input) testtext.Run(t, name, func(t *testing.T) { fn(t, g.p, tc) }) } } }
func runSpanTests(t *testing.T, name string, f Form, testCases []spanTest) { for i, tc := range testCases { s := fmt.Sprintf("Bytes/%s/%d=%+q/atEOF=%v", name, i, pc(tc.input), tc.atEOF) ok := testtext.Run(t, s, func(t *testing.T) { n, err := f.Span([]byte(tc.input), tc.atEOF) if n != tc.n || err != tc.err { t.Errorf("\n got %d, %v;\nwant %d, %v", n, err, tc.n, tc.err) } }) if !ok { continue // Don't do the String variant if the Bytes variant failed. } s = fmt.Sprintf("String/%s/%d=%+q/atEOF=%v", name, i, pc(tc.input), tc.atEOF) testtext.Run(t, s, func(t *testing.T) { n, err := f.SpanString(tc.input, tc.atEOF) if n != tc.n || err != tc.err { t.Errorf("\n got %d, %v;\nwant %d, %v", n, err, tc.n, tc.err) } }) } }
func TestCompare(t *testing.T) { for _, g := range compareTestCases { for i, tc := range g.cases { name := fmt.Sprintf("%s:%d:%+q", g.name, i, tc.a) testtext.Run(t, name, func(t *testing.T) { if result := g.p.Compare(tc.a, tc.b); result != tc.result { t.Errorf("got %v; want %v", result, tc.result) } }) } } }
func TestReader(t *testing.T) { for _, tc := range testCases { testtext.Run(t, tc.desc, func(t *testing.T) { r := NewReader(strings.NewReader(tc.src), tc.t) // Differently sized dst and src buffers are not part of the // exported API. We override them manually. r.dst = make([]byte, tc.dstSize) r.src = make([]byte, tc.srcSize) got, err := ioutil.ReadAll(r) str := string(got) if str != tc.wantStr || err != tc.wantErr { t.Errorf("\ngot %q, %v\nwant %q, %v", str, err, tc.wantStr, tc.wantErr) } }) } }
func testHandover(t *testing.T, c Caser, src string) { want := c.String(src) // Find the common prefix. pSrc := 0 for ; pSrc < len(src) && pSrc < len(want) && want[pSrc] == src[pSrc]; pSrc++ { } // Test handover for each substring of the prefix. for i := 0; i < pSrc; i++ { testtext.Run(t, fmt.Sprint("interleave/", i), func(t *testing.T) { dst := make([]byte, 4*len(src)) c.Reset() nSpan, _ := c.Span([]byte(src[:i]), false) copy(dst, src[:nSpan]) nTransform, _, _ := c.Transform(dst[nSpan:], []byte(src[nSpan:]), true) got := string(dst[:nSpan+nTransform]) if got != want { t.Errorf("full string: got %q; want %q", got, want) } }) } }
func TestWriter(t *testing.T) { tests := append(testCases, chainTests()...) for _, tc := range tests { sizes := []int{1, 2, 3, 4, 5, 10, 100, 1000} if tc.ioSize > 0 { sizes = []int{tc.ioSize} } for _, sz := range sizes { testtext.Run(t, fmt.Sprintf("%s/%d", tc.desc, sz), func(t *testing.T) { bb := &bytes.Buffer{} w := NewWriter(bb, tc.t) // Differently sized dst and src buffers are not part of the // exported API. We override them manually. w.dst = make([]byte, tc.dstSize) w.src = make([]byte, tc.srcSize) src := make([]byte, sz) var err error for b := tc.src; len(b) > 0 && err == nil; { n := copy(src, b) b = b[n:] m := 0 m, err = w.Write(src[:n]) if m != n && err == nil { t.Errorf("did not consume all bytes %d < %d", m, n) } } if err == nil { err = w.Close() } str := bb.String() if str != tc.wantStr || err != tc.wantErr { t.Errorf("\ngot %q, %v\nwant %q, %v", str, err, tc.wantStr, tc.wantErr) } }) } } }
func TestConformance(t *testing.T) { testtext.SkipIfNotLong(t) r := gen.OpenUnicodeFile("idna", "", "IdnaTest.txt") defer r.Close() section := "main" started := false p := ucd.New(r, ucd.CommentHandler(func(s string) { if started { section = strings.ToLower(strings.Split(s, " ")[0]) } })) for p.Next() { started = true // What to test profiles := []*Profile{} switch p.String(0) { case "T": profiles = append(profiles, Transitional) case "N": profiles = append(profiles, NonTransitional) case "B": profiles = append(profiles, Transitional) profiles = append(profiles, NonTransitional) } src := unescape(p.String(1)) if incorrectTests[src] { continue } wantToUnicode := unescape(p.String(2)) if wantToUnicode == "" { wantToUnicode = src } wantToASCII := unescape(p.String(3)) if wantToASCII == "" { wantToASCII = wantToUnicode } test := "err:" if strings.HasPrefix(wantToUnicode, "[") { test += strings.Replace(strings.Trim(wantToUnicode, "[]"), " ", "", -1) } if strings.HasPrefix(wantToASCII, "[") { test += strings.Replace(strings.Trim(wantToASCII, "[]"), " ", "", -1) } if test == "err:" { test = "ok" } // TODO: also do IDNA tests. // invalidInIDNA2008 := p.String(4) == "NV8" for _, p := range profiles { testtext.Run(t, fmt.Sprintf("%s:%s/%s/%+q", section, test, p, src), func(t *testing.T) { got, err := p.ToUnicode(src) wantErr := strings.HasPrefix(wantToUnicode, "[") gotErr := err != nil if wantErr { if gotErr != wantErr { t.Errorf(`ToUnicode:err got %v; want %v (%s)`, gotErr, wantErr, wantToUnicode) } } else if got != wantToUnicode || gotErr != wantErr { t.Errorf(`ToUnicode: got %+q, %v (%v); want %+q, %v`, got, gotErr, err, wantToUnicode, wantErr) } got, err = p.ToASCII(src) wantErr = strings.HasPrefix(wantToASCII, "[") gotErr = err != nil if wantErr { if gotErr != wantErr { t.Errorf(`ToASCII:err got %v; want %v (%s)`, gotErr, wantErr, wantToASCII) } } else if got != wantToASCII || gotErr != wantErr { t.Errorf(`ToASCII: got %+q, %v (%v); want %+q, %v`, got, gotErr, err, wantToASCII, wantErr) } }) } } }
func TestString(t *testing.T) { testtext.Run(t, "transform", func(t *testing.T) { testString(t, String) }) // Overrun the internal destination buffer. for i, s := range []string{ aaa[:1*initialBufSize-1], aaa[:1*initialBufSize+0], aaa[:1*initialBufSize+1], AAA[:1*initialBufSize-1], AAA[:1*initialBufSize+0], AAA[:1*initialBufSize+1], AAA[:2*initialBufSize-1], AAA[:2*initialBufSize+0], AAA[:2*initialBufSize+1], aaa[:1*initialBufSize-2] + "A", aaa[:1*initialBufSize-1] + "A", aaa[:1*initialBufSize+0] + "A", aaa[:1*initialBufSize+1] + "A", } { testtext.Run(t, fmt.Sprint("dst buffer test using lower/", i), func(t *testing.T) { got, _, _ := String(lowerCaseASCII{}, s) if want := strings.ToLower(s); got != want { t.Errorf("got %s (%d); want %s (%d)", got, len(got), want, len(want)) } }) } // Overrun the internal source buffer. for i, s := range []string{ aaa[:1*initialBufSize-1], aaa[:1*initialBufSize+0], aaa[:1*initialBufSize+1], aaa[:2*initialBufSize+1], aaa[:2*initialBufSize+0], aaa[:2*initialBufSize+1], } { testtext.Run(t, fmt.Sprint("src buffer test using rleEncode/", i), func(t *testing.T) { got, _, _ := String(rleEncode{}, s) if want := fmt.Sprintf("%da", len(s)); got != want { t.Errorf("got %s (%d); want %s (%d)", got, len(got), want, len(want)) } }) } // Test allocations for non-changing strings. // Note we still need to allocate a single buffer. for i, s := range []string{ "", "123456789", aaa[:initialBufSize-1], aaa[:initialBufSize+0], aaa[:initialBufSize+1], aaa[:10*initialBufSize], } { testtext.Run(t, fmt.Sprint("alloc/", i), func(t *testing.T) { if n := testtext.AllocsPerRun(5, func() { String(&lowerCaseASCIILookahead{}, s) }); n > 1 { t.Errorf("#allocs was %f; want 1", n) } }) } }
func TestHandover(t *testing.T) { testCases := []struct { desc string t Caser first, second string }{{ "title/nosigma/single midword", Title(language.Und, HandleFinalSigma(false)), "A.", "a", }, { "title/nosigma/single midword", Title(language.Und, HandleFinalSigma(false)), "A", ".a", }, { "title/nosigma/double midword", Title(language.Und, HandleFinalSigma(false)), "A..", "a", }, { "title/nosigma/double midword", Title(language.Und, HandleFinalSigma(false)), "A.", ".a", }, { "title/nosigma/double midword", Title(language.Und, HandleFinalSigma(false)), "A", "..a", }, { "title/sigma/single midword", Title(language.Und), "ΟΣ.", "a", }, { "title/sigma/single midword", Title(language.Und), "ΟΣ", ".a", }, { "title/sigma/double midword", Title(language.Und), "ΟΣ..", "a", }, { "title/sigma/double midword", Title(language.Und), "ΟΣ.", ".a", }, { "title/sigma/double midword", Title(language.Und), "ΟΣ", "..a", }, { "title/af/leading apostrophe", Title(language.Afrikaans), "'", "n bietje", }} for _, tc := range testCases { testtext.Run(t, tc.desc, func(t *testing.T) { src := tc.first + tc.second want := tc.t.String(src) tc.t.Reset() n, _ := tc.t.Span([]byte(tc.first), false) dst := make([]byte, len(want)) copy(dst, tc.first[:n]) nDst, _, _ := tc.t.Transform(dst[n:], []byte(src[n:]), true) got := string(dst[:n+nDst]) if got != want { t.Errorf("got %q; want %q", got, want) } }) } }
func TestICUConformance(t *testing.T) { // Build test set. input := []string{ "a.a a_a", "a\u05d0a", "\u05d0'a", "a\u03084a", "a\u0308a", "a3\u30a3a", "a\u303aa", "a_\u303a_a", "1_a..a", "1_a.a", "a..a.", "a--a-", "a-a-", "a\u200ba", "a\u200b\u200ba", "a\u00ad\u00ada", // Format "a\u00ada", "a''a", // SingleQuote "a'a", "a::a", // MidLetter "a:a", "a..a", // MidNumLet "a.a", "a;;a", // MidNum "a;a", "a__a", // ExtendNumlet "a_a", "ΟΣ''a", } add := func(x interface{}) { switch v := x.(type) { case string: input = append(input, v) case []string: for _, s := range v { input = append(input, s) } } } for _, tc := range testCases { add(tc.src) add(tc.lower) add(tc.upper) add(tc.title) } for _, tc := range bufferTests { add(tc.src) } for _, tc := range breakTest { add(strings.Replace(tc, "|", "", -1)) } for _, tc := range foldTestCases { add(tc) } // Compare ICU to Go. for _, c := range []string{"lower", "upper", "title", "fold"} { for _, tag := range []string{ "und", "af", "az", "el", "lt", "nl", "tr", } { for _, s := range input { if exclude(c, tag, s) { continue } testtext.Run(t, path.Join(c, tag, s), func(t *testing.T) { want := doICU(tag, c, s) got := doGo(tag, c, s) if norm.NFC.String(got) != norm.NFC.String(want) { t.Errorf("\n in %[3]q (%+[3]q)\n got %[1]q (%+[1]q)\n want %[2]q (%+[2]q)", got, want, s) } }) } } } }
func TestSpan(t *testing.T) { for _, tt := range []struct { desc string src string want string atEOF bool err error t Caser }{{ desc: "und/upper/basic", src: "abcdefg", want: "", atEOF: true, err: transform.ErrEndOfSpan, t: Upper(language.Und), }, { desc: "und/upper/short src", src: "123É"[:4], want: "123", atEOF: false, err: transform.ErrShortSrc, t: Upper(language.Und), }, { desc: "und/upper/no error on short", src: "12", want: "12", atEOF: false, t: Upper(language.Und), }, { desc: "und/lower/basic", src: "ABCDEFG", want: "", atEOF: true, err: transform.ErrEndOfSpan, t: Lower(language.Und), }, { desc: "und/lower/short src num", src: "123é"[:4], want: "123", atEOF: false, err: transform.ErrShortSrc, t: Lower(language.Und), }, { desc: "und/lower/short src greek", src: "αβγé"[:7], want: "αβγ", atEOF: false, err: transform.ErrShortSrc, t: Lower(language.Und), }, { desc: "und/lower/no error on short", src: "12", want: "12", atEOF: false, t: Lower(language.Und), }, { desc: "und/lower/simple (no final sigma)", src: "ος οσσ", want: "οσ οσσ", atEOF: true, t: Lower(language.Und, HandleFinalSigma(false)), }, { desc: "und/title/simple (no final sigma)", src: "Οσ Οσσ", want: "Οσ Οσσ", atEOF: true, t: Title(language.Und, HandleFinalSigma(false)), }, { desc: "und/lower/final sigma: no error", src: "οΣ", // Oς want: "ο", // Oς err: transform.ErrEndOfSpan, t: Lower(language.Und), }, { desc: "und/title/final sigma: no error", src: "ΟΣ", // Oς want: "Ο", // Oς err: transform.ErrEndOfSpan, t: Title(language.Und), }, { desc: "und/title/final sigma: no short source!", src: "ΟσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσΣ", want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσ", err: transform.ErrEndOfSpan, t: Title(language.Und), }, { desc: "und/title/clipped UTF-8 rune", src: "Σσ" + string([]byte{0xCF}), want: "Σσ", atEOF: false, err: transform.ErrShortSrc, t: Title(language.Und), }, { desc: "und/title/clipped UTF-8 rune atEOF", src: "Σσσ" + string([]byte{0xCF}), want: "Σσσ" + string([]byte{0xCF}), atEOF: true, t: Title(language.Und), }, { // Note: the choice to change the final sigma at the end in case of // too many case ignorables is arbitrary. The main reason for this // choice is that it results in simpler code. desc: "und/title/long string", src: "A" + strings.Repeat("a", maxIgnorable+5), want: "A" + strings.Repeat("a", maxIgnorable+5), t: Title(language.Und), }, { // Note: the choice to change the final sigma at the end in case of // too many case ignorables is arbitrary. The main reason for this // choice is that it results in simpler code. desc: "und/title/cyrillic", src: "При", want: "При", atEOF: true, t: Title(language.Und, HandleFinalSigma(false)), }, { // Note: the choice to change the final sigma at the end in case of // too many case ignorables is arbitrary. The main reason for this // choice is that it results in simpler code. desc: "und/title/final sigma: max ignorables", src: "Οσ" + strings.Repeat(".", maxIgnorable) + "A", want: "Οσ" + strings.Repeat(".", maxIgnorable) + "A", t: Title(language.Und), }, { desc: "el/upper/max ignorables - not implemented", src: "Ο" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0313", want: "", err: transform.ErrEndOfSpan, t: Upper(language.Greek), }, { desc: "el/upper/too many ignorables - not implemented", src: "Ο" + strings.Repeat("\u0321", maxIgnorable) + "\u0313", want: "", err: transform.ErrEndOfSpan, t: Upper(language.Greek), }, { desc: "el/upper/short dst", src: "123ο", want: "", err: transform.ErrEndOfSpan, t: Upper(language.Greek), }, { desc: "lt/lower/max ignorables", src: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300", want: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300", t: Lower(language.Lithuanian), }, { desc: "lt/lower/isLower", src: "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0300", want: "", err: transform.ErrEndOfSpan, t: Lower(language.Lithuanian), }, { desc: "lt/lower/not identical", src: "aaaaa\u00cc", // U+00CC LATIN CAPITAL LETTER I GRAVE err: transform.ErrEndOfSpan, want: "aaaaa", t: Lower(language.Lithuanian), }, { desc: "lt/lower/identical", src: "aaaai\u0307\u0300", // U+00CC LATIN CAPITAL LETTER I GRAVE want: "aaaai\u0307\u0300", t: Lower(language.Lithuanian), }, { desc: "lt/upper/not implemented", src: "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300", want: "", err: transform.ErrEndOfSpan, t: Upper(language.Lithuanian), }, { desc: "lt/upper/not implemented, ascii", src: "AB", want: "", err: transform.ErrEndOfSpan, t: Upper(language.Lithuanian), }, { desc: "nl/title/pre-IJ cutoff", src: " IJ", want: " IJ", t: Title(language.Dutch), }, { desc: "nl/title/mid-IJ cutoff", src: " Ia", want: " Ia", t: Title(language.Dutch), }, { desc: "af/title/apostrophe", src: "'n Bietje", want: "'n Bietje", t: Title(language.Afrikaans), }, { desc: "af/title/apostrophe-incorrect", src: "'N Bietje", // The Single_Quote (a MidWord), needs to be retained as unspanned so // that a successive call to Transform can detect that N should not be // capitalized. want: "", err: transform.ErrEndOfSpan, t: Title(language.Afrikaans), }} { testtext.Run(t, tt.desc, func(t *testing.T) { for p := 0; p < len(tt.want); p += utf8.RuneLen([]rune(tt.src[p:])[0]) { tt.t.Reset() n, err := tt.t.Span([]byte(tt.src[:p]), false) if err != nil && err != transform.ErrShortSrc { t.Errorf("early failure:Span(%+q): %v (%d < %d)", tt.src[:p], err, n, len(tt.want)) break } } tt.t.Reset() n, err := tt.t.Span([]byte(tt.src), tt.atEOF) if n != len(tt.want) || err != tt.err { t.Errorf("Span(%+q, %v): got %d, %v; want %d, %v", tt.src, tt.atEOF, n, err, len(tt.want), tt.err) } testHandover(t, tt.t, tt.src) }) } }
// TestValues tests that for all languages, regions, and scripts in Values, at // least one language has a name defined for it by checking it exists in // English, which is assumed to be the most comprehensive. It is also tested // that a Namer returns "" for unsupported values. func TestValues(t *testing.T) { type testcase struct { kind string n Namer } // checkDefined checks that a value exists in a Namer. checkDefined := func(x interface{}, namers []testcase) { for _, n := range namers { testtext.Run(t, fmt.Sprintf("%s.Name(%s)", n.kind, x), func(t *testing.T) { if n.n.Name(x) == "" { // As of version 28 there is no data for az-Arab in English, // although there is useful data in other languages. if x.(fmt.Stringer).String() == "az-Arab" { return } t.Errorf("supported but no result") } }) } } // checkUnsupported checks that a value does not exist in a Namer. checkUnsupported := func(x interface{}, namers []testcase) { for _, n := range namers { if got := n.n.Name(x); got != "" { t.Fatalf("%s.Name(%s): unsupported tag gave non-empty result: %q", n.kind, x, got) } } } tags := map[language.Tag]bool{} namers := []testcase{ {"Languages(en)", Languages(language.English)}, {"Tags(en)", Tags(language.English)}, {"English.Languages()", English.Languages()}, {"English.Tags()", English.Tags()}, } for _, tag := range Values.Tags() { checkDefined(tag, namers) tags[tag] = true } for _, base := range language.Supported.BaseLanguages() { tag, _ := language.All.Compose(base) if !tags[tag] { checkUnsupported(tag, namers) } } regions := map[language.Region]bool{} namers = []testcase{ {"Regions(en)", Regions(language.English)}, {"English.Regions()", English.Regions()}, } for _, r := range Values.Regions() { checkDefined(r, namers) regions[r] = true } for _, r := range language.Supported.Regions() { if r = r.Canonicalize(); !regions[r] { checkUnsupported(r, namers) } } scripts := map[language.Script]bool{} namers = []testcase{ {"Scripts(en)", Scripts(language.English)}, {"English.Scripts()", English.Scripts()}, } for _, s := range Values.Scripts() { checkDefined(s, namers) scripts[s] = true } for _, s := range language.Supported.Scripts() { // Canonicalize the script. tag, _ := language.DeprecatedScript.Compose(s) if _, s, _ = tag.Raw(); !scripts[s] { checkUnsupported(s, namers) } } }