Ejemplo n.º 1
0
func (tc *transformTest) doTest(t *testing.T, tr Transformer) {
	testtext.Run(t, tc.desc, func(t *testing.T) {
		b := make([]byte, tc.nBuf)
		nDst, nSrc, err := tr.Transform(b, []byte(tc.src), tc.atEOF)
		if got := string(b[:nDst]); got != tc.dst[:nDst] {
			t.Errorf("dst was %+q; want %+q", got, tc.dst)
		}
		if nDst != tc.nDst {
			t.Errorf("nDst was %d; want %d", nDst, tc.nDst)
		}
		if nSrc != tc.nSrc {
			t.Errorf("nSrc was %d; want %d", nSrc, tc.nSrc)
		}
		if err != tc.err {
			t.Errorf("error was %v; want %v", err, tc.err)
		}
		if got := tr.String(tc.src); got != tc.dst {
			t.Errorf("String(%q) = %q; want %q", tc.src, got, tc.dst)
		}
		n, err := tr.Span([]byte(tc.src), tc.atEOF)
		if n != tc.nSpan || err != tc.errSpan {
			t.Errorf("Span: got %d, %v; want %d, %v", n, err, tc.nSpan, tc.errSpan)
		}
	})
}
Ejemplo n.º 2
0
func TestLanguage(t *testing.T) {
	tests := []struct {
		dict string
		tag  string
		name string
	}{
		{"agq", "sr", ""}, // sr is in Value.Languages(), but is not supported by agq.
		// CLDR 30 dropped Vlaams as the word for nl-BE. It is still called
		// Flemish in English, though. TODO: this is probably incorrect.
		// West-Vlaams (vls) is not Vlaams. West-Vlaams could be considered its
		// own language, whereas Vlaams is generally Dutch. So expect to have
		// to change these tests back.
		{"nl", "nl", "Nederlands"},
		{"nl", "vls", "West-Vlaams"},
		{"nl", "nl-BE", "Nederlands"},
		{"en", "pt", "Portuguese"},
		{"en", "pt-PT", "European Portuguese"},
		{"en", "pt-BR", "Brazilian Portuguese"},
		{"en", "en", "English"},
		{"en", "en-GB", "British English"},
		{"en", "en-US", "American English"}, // American English in CLDR 24+
		{"en", lastLang2zu.String(), "Zulu"},
		{"en", firstLang2aa.String(), "Afar"},
		{"en", lastLang3zza.String(), "Zaza"},
		{"en", firstLang3ace.String(), "Achinese"},
		{"en", firstTagAr001.String(), "Modern Standard Arabic"},
		{"en", lastTagZhHant.String(), "Traditional Chinese"},
		{"en", "aaa", ""},
		{"en", "zzj", ""},
		// If full tag doesn't match, try without script or region.
		{"en", "aa-Hans", "Afar"},
		{"en", "af-Arab", "Afrikaans"},
		{"en", "zu-Cyrl", "Zulu"},
		{"en", "aa-GB", "Afar"},
		{"en", "af-NA", "Afrikaans"},
		{"en", "zu-BR", "Zulu"},
		{"agq", "zh-Hant", ""},
		// Canonical equivalents.
		{"ro", "ro-MD", "moldovenească"},
		{"ro", "mo", "moldovenească"},
		{"en", "sh", "Serbo-Croatian"},
		{"en", "sr-Latn", "Serbo-Croatian"},
		{"en", "sr", "Serbian"},
		{"en", "sr-ME", "Serbian"},
		{"en", "sr-Latn-ME", "Serbo-Croatian"}, // See comments in TestTag.
	}
	for i, tt := range tests {
		testtext.Run(t, tt.dict+"/"+tt.tag, func(t *testing.T) {
			d := Languages(language.Raw.MustParse(tt.dict))
			if n := d.Name(language.Raw.MustParse(tt.tag)); n != tt.name {
				t.Errorf("%d:%s:%s: was %q; want %q", i, tt.dict, tt.tag, n, tt.name)
			}
			if len(tt.tag) <= 3 {
				if n := d.Name(language.MustParseBase(tt.tag)); n != tt.name {
					t.Errorf("%d:%s:base(%s): was %q; want %q", i, tt.dict, tt.tag, n, tt.name)
				}
			}
		})
	}
}
Ejemplo n.º 3
0
func TestRegionDistance(t *testing.T) {
	tests := []struct {
		a, b string
		d    int
	}{
		{"NL", "NL", 0},
		{"NL", "EU", 1},
		{"EU", "NL", 1},
		{"005", "005", 0},
		{"NL", "BE", 2},
		{"CO", "005", 1},
		{"005", "CO", 1},
		{"CO", "419", 2},
		{"419", "CO", 2},
		{"005", "419", 1},
		{"419", "005", 1},
		{"001", "013", 2},
		{"013", "001", 2},
		{"CO", "CW", 4},
		{"CO", "PW", 6},
		{"CO", "BV", 6},
		{"ZZ", "QQ", 2},
	}
	for i, tt := range tests {
		testtext.Run(t, tt.a+"/"+tt.b, func(t *testing.T) {
			ra, _ := getRegionID([]byte(tt.a))
			rb, _ := getRegionID([]byte(tt.b))
			if d := regionDistance(ra, rb); d != tt.d {
				t.Errorf("%d: d(%s, %s) = %v; want %v", i, tt.a, tt.b, d, tt.d)
			}
		})
	}
}
Ejemplo n.º 4
0
func TestWordBreaks(t *testing.T) {
	for _, tt := range breakTest {
		testtext.Run(t, tt, func(t *testing.T) {
			parts := strings.Split(tt, "|")
			want := ""
			for _, s := range parts {
				found := false
				// This algorithm implements title casing given word breaks
				// as defined in the Unicode standard 3.13 R3.
				for _, r := range s {
					title := unicode.ToTitle(r)
					lower := unicode.ToLower(r)
					if !found && title != lower {
						found = true
						want += string(title)
					} else {
						want += string(lower)
					}
				}
			}
			src := strings.Join(parts, "")
			got := Title(language.Und).String(src)
			if got != want {
				t.Errorf("got %q; want %q", got, want)
			}
		})
	}
}
Ejemplo n.º 5
0
func TestFormats(t *testing.T) {
	testCases := []struct {
		lang    string
		pattern string
		index   []byte
	}{
		{"en", "#,##0.###", tagToDecimal},
		{"de", "#,##0.###", tagToDecimal},
		{"de-CH", "#,##0.###", tagToDecimal},
		{"pa", "#,##,##0.###", tagToDecimal},
		{"pa-Arab", "#,##0.###", tagToDecimal}, // Does NOT inherit from pa!
		{"mr", "#,##,##0.###", tagToDecimal},
		{"mr-IN", "#,##,##0.###", tagToDecimal}, // Inherits from mr.
		{"nl", "#E0", tagToScientific},
		{"nl-MX", "#E0", tagToScientific}, // Inherits through Tag.Parent.
		{"zgh", "#,##0 %", tagToPercent},
	}
	for _, tc := range testCases {
		testtext.Run(t, tc.lang, func(t *testing.T) {
			got := formatForLang(language.MustParse(tc.lang), tc.index)
			want, _ := ParsePattern(tc.pattern)
			if *got != *want {
				t.Errorf("\ngot  %#v;\nwant %#v", got, want)
			}
		})
	}
}
Ejemplo n.º 6
0
// doTest performs a single test f(input) and verifies that the output matches
// out and that the returned error is expected. The errors string contains
// all allowed error codes as categorized in
// http://www.unicode.org/Public/idna/9.0.0/IdnaTest.txt:
// P: Processing
// V: Validity
// A: to ASCII
// B: Bidi
// C: Context J
func doTest(t *testing.T, f func(string) (string, error), name, input, want, errors string) {
	errors = strings.Trim(errors, "[]")
	test := "ok"
	if errors != "" {
		test = "err:" + errors
	}
	// Replace some of the escape sequences to make it easier to single out
	// tests on the command name.
	in := strings.Trim(strconv.QuoteToASCII(input), `"`)
	in = strings.Replace(in, `\u`, "#", -1)
	in = strings.Replace(in, `\U`, "#", -1)
	name = fmt.Sprintf("%s/%s/%s", name, in, test)

	testtext.Run(t, name, func(t *testing.T) {
		got, err := f(input)

		if err != nil {
			code := err.(interface {
				code() string
			}).code()
			if strings.Index(errors, code) == -1 {
				t.Errorf("error %q not in set of expected errors {%v}", code, errors)
			}
		} else if errors != "" {
			t.Errorf("no errors; want error in {%v}", errors)
		}

		if want != "" && got != want {
			t.Errorf(`string: got %+q; want %+q`, got, want)
		}
	})
}
Ejemplo n.º 7
0
func testString(t *testing.T, f func(Transformer, string) (string, int, error)) {
	for _, tt := range append(testCases, chainTests()...) {
		if tt.desc == "allowStutter = true" {
			// We don't have control over the buffer size, so we eliminate tests
			// that depend on a specific buffer size being set.
			continue
		}
		if tt.wantErr == ErrShortDst || tt.wantErr == ErrShortSrc {
			// The result string will be different.
			continue
		}
		testtext.Run(t, tt.desc, func(t *testing.T) {
			got, n, err := f(tt.t, tt.src)
			if tt.wantErr != err {
				t.Errorf("error: got %v; want %v", err, tt.wantErr)
			}
			// Check that err == nil implies that n == len(tt.src). Note that vice
			// versa isn't necessarily true.
			if err == nil && n != len(tt.src) {
				t.Errorf("err == nil: got %d bytes, want %d", n, err)
			}
			if got != tt.wantStr {
				t.Errorf("string: got %q; want %q", got, tt.wantStr)
			}
		})
	}
}
Ejemplo n.º 8
0
func TestShortBuffersAndOverflow(t *testing.T) {
	for i, tt := range bufferTests {
		testtext.Run(t, tt.desc, func(t *testing.T) {
			buf := make([]byte, tt.dstSize)
			got := []byte{}
			var nSrc, nDst int
			var err error
			for p := 0; p < len(tt.src); p += nSrc {
				q := p + tt.srcSize
				if q > len(tt.src) {
					q = len(tt.src)
				}
				nDst, nSrc, err = tt.t.Transform(buf, []byte(tt.src[p:q]), q == len(tt.src))
				got = append(got, buf[:nDst]...)

				if p == 0 && err != tt.firstErr {
					t.Errorf("%d:%s:\n error was %v; want %v", i, tt.desc, err, tt.firstErr)
					break
				}
			}
			if string(got) != tt.want {
				t.Errorf("%d:%s:\ngot  %+q;\nwant %+q", i, tt.desc, got, tt.want)
			}
			testHandover(t, Caser{tt.t}, tt.src)
		})
	}
}
Ejemplo n.º 9
0
// TestAlloc tests that some mapping methods should not cause any allocation.
func TestAlloc(t *testing.T) {
	dst := make([]byte, 256) // big enough to hold any result
	src := []byte(txtNonASCII)

	for i, f := range []func() Caser{
		func() Caser { return Upper(language.Und) },
		func() Caser { return Lower(language.Und) },
		func() Caser { return Lower(language.Und, HandleFinalSigma(false)) },
		// TODO: use a shared copy for these casers as well, in order of
		// importance, starting with the most important:
		// func() Caser { return Title(language.Und) },
		// func() Caser { return Title(language.Und, HandleFinalSigma(false)) },
	} {
		testtext.Run(t, "", func(t *testing.T) {
			var c Caser
			v := testtext.AllocsPerRun(10, func() {
				c = f()
			})
			if v > 0 {
				// TODO: Right now only Upper has 1 allocation. Special-case Lower
				// and Title as well to have less allocations for the root locale.
				t.Errorf("%d:init: number of allocs was %f; want 0", i, v)
			}
			v = testtext.AllocsPerRun(2, func() {
				c.Transform(dst, src, true)
			})
			if v > 0 {
				t.Errorf("%d:transform: number of allocs was %f; want 0", i, v)
			}
		})
	}
}
Ejemplo n.º 10
0
func doTests(t *testing.T, fn func(t *testing.T, tc ruleTest)) {
	for rule, cases := range testCases {
		for i, tc := range cases {
			name := fmt.Sprintf("%d/%d:%+q:%s", rule, i, tc.in, tc.in)
			testtext.Run(t, name, func(t *testing.T) {
				fn(t, tc)
			})
		}
	}
}
Ejemplo n.º 11
0
func doTests(t *testing.T, fn func(t *testing.T, p *Profile, tc testCase)) {
	for _, g := range enforceTestCases {
		for i, tc := range g.cases {
			name := fmt.Sprintf("%s:%d:%+q", g.name, i, tc.input)
			testtext.Run(t, name, func(t *testing.T) {
				fn(t, g.p, tc)
			})
		}
	}
}
Ejemplo n.º 12
0
func runSpanTests(t *testing.T, name string, f Form, testCases []spanTest) {
	for i, tc := range testCases {
		s := fmt.Sprintf("Bytes/%s/%d=%+q/atEOF=%v", name, i, pc(tc.input), tc.atEOF)
		ok := testtext.Run(t, s, func(t *testing.T) {
			n, err := f.Span([]byte(tc.input), tc.atEOF)
			if n != tc.n || err != tc.err {
				t.Errorf("\n got %d, %v;\nwant %d, %v", n, err, tc.n, tc.err)
			}
		})
		if !ok {
			continue // Don't do the String variant if the Bytes variant failed.
		}
		s = fmt.Sprintf("String/%s/%d=%+q/atEOF=%v", name, i, pc(tc.input), tc.atEOF)
		testtext.Run(t, s, func(t *testing.T) {
			n, err := f.SpanString(tc.input, tc.atEOF)
			if n != tc.n || err != tc.err {
				t.Errorf("\n got %d, %v;\nwant %d, %v", n, err, tc.n, tc.err)
			}
		})
	}
}
Ejemplo n.º 13
0
func TestCompare(t *testing.T) {
	for _, g := range compareTestCases {
		for i, tc := range g.cases {
			name := fmt.Sprintf("%s:%d:%+q", g.name, i, tc.a)
			testtext.Run(t, name, func(t *testing.T) {
				if result := g.p.Compare(tc.a, tc.b); result != tc.result {
					t.Errorf("got %v; want %v", result, tc.result)
				}
			})
		}
	}
}
Ejemplo n.º 14
0
func TestReader(t *testing.T) {
	for _, tc := range testCases {
		testtext.Run(t, tc.desc, func(t *testing.T) {
			r := NewReader(strings.NewReader(tc.src), tc.t)
			// Differently sized dst and src buffers are not part of the
			// exported API. We override them manually.
			r.dst = make([]byte, tc.dstSize)
			r.src = make([]byte, tc.srcSize)
			got, err := ioutil.ReadAll(r)
			str := string(got)
			if str != tc.wantStr || err != tc.wantErr {
				t.Errorf("\ngot  %q, %v\nwant %q, %v", str, err, tc.wantStr, tc.wantErr)
			}
		})
	}
}
Ejemplo n.º 15
0
func testHandover(t *testing.T, c Caser, src string) {
	want := c.String(src)
	// Find the common prefix.
	pSrc := 0
	for ; pSrc < len(src) && pSrc < len(want) && want[pSrc] == src[pSrc]; pSrc++ {
	}

	// Test handover for each substring of the prefix.
	for i := 0; i < pSrc; i++ {
		testtext.Run(t, fmt.Sprint("interleave/", i), func(t *testing.T) {
			dst := make([]byte, 4*len(src))
			c.Reset()
			nSpan, _ := c.Span([]byte(src[:i]), false)
			copy(dst, src[:nSpan])
			nTransform, _, _ := c.Transform(dst[nSpan:], []byte(src[nSpan:]), true)
			got := string(dst[:nSpan+nTransform])
			if got != want {
				t.Errorf("full string: got %q; want %q", got, want)
			}
		})
	}
}
Ejemplo n.º 16
0
func TestWriter(t *testing.T) {
	tests := append(testCases, chainTests()...)
	for _, tc := range tests {
		sizes := []int{1, 2, 3, 4, 5, 10, 100, 1000}
		if tc.ioSize > 0 {
			sizes = []int{tc.ioSize}
		}
		for _, sz := range sizes {
			testtext.Run(t, fmt.Sprintf("%s/%d", tc.desc, sz), func(t *testing.T) {
				bb := &bytes.Buffer{}
				w := NewWriter(bb, tc.t)
				// Differently sized dst and src buffers are not part of the
				// exported API. We override them manually.
				w.dst = make([]byte, tc.dstSize)
				w.src = make([]byte, tc.srcSize)
				src := make([]byte, sz)
				var err error
				for b := tc.src; len(b) > 0 && err == nil; {
					n := copy(src, b)
					b = b[n:]
					m := 0
					m, err = w.Write(src[:n])
					if m != n && err == nil {
						t.Errorf("did not consume all bytes %d < %d", m, n)
					}
				}
				if err == nil {
					err = w.Close()
				}
				str := bb.String()
				if str != tc.wantStr || err != tc.wantErr {
					t.Errorf("\ngot  %q, %v\nwant %q, %v", str, err, tc.wantStr, tc.wantErr)
				}
			})
		}
	}
}
Ejemplo n.º 17
0
func TestConformance(t *testing.T) {
	testtext.SkipIfNotLong(t)

	r := gen.OpenUnicodeFile("idna", "", "IdnaTest.txt")
	defer r.Close()

	section := "main"
	started := false
	p := ucd.New(r, ucd.CommentHandler(func(s string) {
		if started {
			section = strings.ToLower(strings.Split(s, " ")[0])
		}
	}))
	for p.Next() {
		started = true

		// What to test
		profiles := []*Profile{}
		switch p.String(0) {
		case "T":
			profiles = append(profiles, Transitional)
		case "N":
			profiles = append(profiles, NonTransitional)
		case "B":
			profiles = append(profiles, Transitional)
			profiles = append(profiles, NonTransitional)
		}

		src := unescape(p.String(1))
		if incorrectTests[src] {
			continue
		}

		wantToUnicode := unescape(p.String(2))
		if wantToUnicode == "" {
			wantToUnicode = src
		}
		wantToASCII := unescape(p.String(3))
		if wantToASCII == "" {
			wantToASCII = wantToUnicode
		}
		test := "err:"
		if strings.HasPrefix(wantToUnicode, "[") {
			test += strings.Replace(strings.Trim(wantToUnicode, "[]"), " ", "", -1)
		}
		if strings.HasPrefix(wantToASCII, "[") {
			test += strings.Replace(strings.Trim(wantToASCII, "[]"), " ", "", -1)
		}
		if test == "err:" {
			test = "ok"
		}

		// TODO: also do IDNA tests.
		// invalidInIDNA2008 := p.String(4) == "NV8"

		for _, p := range profiles {
			testtext.Run(t, fmt.Sprintf("%s:%s/%s/%+q", section, test, p, src), func(t *testing.T) {
				got, err := p.ToUnicode(src)
				wantErr := strings.HasPrefix(wantToUnicode, "[")
				gotErr := err != nil
				if wantErr {
					if gotErr != wantErr {
						t.Errorf(`ToUnicode:err got %v; want %v (%s)`,
							gotErr, wantErr, wantToUnicode)
					}
				} else if got != wantToUnicode || gotErr != wantErr {
					t.Errorf(`ToUnicode: got %+q, %v (%v); want %+q, %v`,
						got, gotErr, err, wantToUnicode, wantErr)
				}

				got, err = p.ToASCII(src)
				wantErr = strings.HasPrefix(wantToASCII, "[")
				gotErr = err != nil
				if wantErr {
					if gotErr != wantErr {
						t.Errorf(`ToASCII:err got %v; want %v (%s)`,
							gotErr, wantErr, wantToASCII)
					}
				} else if got != wantToASCII || gotErr != wantErr {
					t.Errorf(`ToASCII: got %+q, %v (%v); want %+q, %v`,
						got, gotErr, err, wantToASCII, wantErr)
				}
			})
		}
	}
}
Ejemplo n.º 18
0
func TestString(t *testing.T) {
	testtext.Run(t, "transform", func(t *testing.T) { testString(t, String) })

	// Overrun the internal destination buffer.
	for i, s := range []string{
		aaa[:1*initialBufSize-1],
		aaa[:1*initialBufSize+0],
		aaa[:1*initialBufSize+1],
		AAA[:1*initialBufSize-1],
		AAA[:1*initialBufSize+0],
		AAA[:1*initialBufSize+1],
		AAA[:2*initialBufSize-1],
		AAA[:2*initialBufSize+0],
		AAA[:2*initialBufSize+1],
		aaa[:1*initialBufSize-2] + "A",
		aaa[:1*initialBufSize-1] + "A",
		aaa[:1*initialBufSize+0] + "A",
		aaa[:1*initialBufSize+1] + "A",
	} {
		testtext.Run(t, fmt.Sprint("dst buffer test using lower/", i), func(t *testing.T) {
			got, _, _ := String(lowerCaseASCII{}, s)
			if want := strings.ToLower(s); got != want {
				t.Errorf("got %s (%d); want %s (%d)", got, len(got), want, len(want))
			}
		})
	}

	// Overrun the internal source buffer.
	for i, s := range []string{
		aaa[:1*initialBufSize-1],
		aaa[:1*initialBufSize+0],
		aaa[:1*initialBufSize+1],
		aaa[:2*initialBufSize+1],
		aaa[:2*initialBufSize+0],
		aaa[:2*initialBufSize+1],
	} {
		testtext.Run(t, fmt.Sprint("src buffer test using rleEncode/", i), func(t *testing.T) {
			got, _, _ := String(rleEncode{}, s)
			if want := fmt.Sprintf("%da", len(s)); got != want {
				t.Errorf("got %s (%d); want %s (%d)", got, len(got), want, len(want))
			}
		})
	}

	// Test allocations for non-changing strings.
	// Note we still need to allocate a single buffer.
	for i, s := range []string{
		"",
		"123456789",
		aaa[:initialBufSize-1],
		aaa[:initialBufSize+0],
		aaa[:initialBufSize+1],
		aaa[:10*initialBufSize],
	} {
		testtext.Run(t, fmt.Sprint("alloc/", i), func(t *testing.T) {
			if n := testtext.AllocsPerRun(5, func() { String(&lowerCaseASCIILookahead{}, s) }); n > 1 {
				t.Errorf("#allocs was %f; want 1", n)
			}
		})
	}
}
Ejemplo n.º 19
0
func TestHandover(t *testing.T) {
	testCases := []struct {
		desc          string
		t             Caser
		first, second string
	}{{
		"title/nosigma/single midword",
		Title(language.Und, HandleFinalSigma(false)),
		"A.", "a",
	}, {
		"title/nosigma/single midword",
		Title(language.Und, HandleFinalSigma(false)),
		"A", ".a",
	}, {
		"title/nosigma/double midword",
		Title(language.Und, HandleFinalSigma(false)),
		"A..", "a",
	}, {
		"title/nosigma/double midword",
		Title(language.Und, HandleFinalSigma(false)),
		"A.", ".a",
	}, {
		"title/nosigma/double midword",
		Title(language.Und, HandleFinalSigma(false)),
		"A", "..a",
	}, {
		"title/sigma/single midword",
		Title(language.Und),
		"ΟΣ.", "a",
	}, {
		"title/sigma/single midword",
		Title(language.Und),
		"ΟΣ", ".a",
	}, {
		"title/sigma/double midword",
		Title(language.Und),
		"ΟΣ..", "a",
	}, {
		"title/sigma/double midword",
		Title(language.Und),
		"ΟΣ.", ".a",
	}, {
		"title/sigma/double midword",
		Title(language.Und),
		"ΟΣ", "..a",
	}, {
		"title/af/leading apostrophe",
		Title(language.Afrikaans),
		"'", "n bietje",
	}}
	for _, tc := range testCases {
		testtext.Run(t, tc.desc, func(t *testing.T) {
			src := tc.first + tc.second
			want := tc.t.String(src)
			tc.t.Reset()
			n, _ := tc.t.Span([]byte(tc.first), false)

			dst := make([]byte, len(want))
			copy(dst, tc.first[:n])

			nDst, _, _ := tc.t.Transform(dst[n:], []byte(src[n:]), true)
			got := string(dst[:n+nDst])
			if got != want {
				t.Errorf("got %q; want %q", got, want)
			}
		})
	}
}
Ejemplo n.º 20
0
func TestICUConformance(t *testing.T) {
	// Build test set.
	input := []string{
		"a.a a_a",
		"a\u05d0a",
		"\u05d0'a",
		"a\u03084a",
		"a\u0308a",
		"a3\u30a3a",
		"a\u303aa",
		"a_\u303a_a",
		"1_a..a",
		"1_a.a",
		"a..a.",
		"a--a-",
		"a-a-",
		"a\u200ba",
		"a\u200b\u200ba",
		"a\u00ad\u00ada", // Format
		"a\u00ada",
		"a''a", // SingleQuote
		"a'a",
		"a::a", // MidLetter
		"a:a",
		"a..a", // MidNumLet
		"a.a",
		"a;;a", // MidNum
		"a;a",
		"a__a", // ExtendNumlet
		"a_a",
		"ΟΣ''a",
	}
	add := func(x interface{}) {
		switch v := x.(type) {
		case string:
			input = append(input, v)
		case []string:
			for _, s := range v {
				input = append(input, s)
			}
		}
	}
	for _, tc := range testCases {
		add(tc.src)
		add(tc.lower)
		add(tc.upper)
		add(tc.title)
	}
	for _, tc := range bufferTests {
		add(tc.src)
	}
	for _, tc := range breakTest {
		add(strings.Replace(tc, "|", "", -1))
	}
	for _, tc := range foldTestCases {
		add(tc)
	}

	// Compare ICU to Go.
	for _, c := range []string{"lower", "upper", "title", "fold"} {
		for _, tag := range []string{
			"und", "af", "az", "el", "lt", "nl", "tr",
		} {
			for _, s := range input {
				if exclude(c, tag, s) {
					continue
				}
				testtext.Run(t, path.Join(c, tag, s), func(t *testing.T) {
					want := doICU(tag, c, s)
					got := doGo(tag, c, s)
					if norm.NFC.String(got) != norm.NFC.String(want) {
						t.Errorf("\n    in %[3]q (%+[3]q)\n   got %[1]q (%+[1]q)\n  want %[2]q (%+[2]q)", got, want, s)
					}
				})
			}
		}
	}
}
Ejemplo n.º 21
0
func TestSpan(t *testing.T) {
	for _, tt := range []struct {
		desc  string
		src   string
		want  string
		atEOF bool
		err   error
		t     Caser
	}{{
		desc:  "und/upper/basic",
		src:   "abcdefg",
		want:  "",
		atEOF: true,
		err:   transform.ErrEndOfSpan,
		t:     Upper(language.Und),
	}, {
		desc:  "und/upper/short src",
		src:   "123É"[:4],
		want:  "123",
		atEOF: false,
		err:   transform.ErrShortSrc,
		t:     Upper(language.Und),
	}, {
		desc:  "und/upper/no error on short",
		src:   "12",
		want:  "12",
		atEOF: false,
		t:     Upper(language.Und),
	}, {
		desc:  "und/lower/basic",
		src:   "ABCDEFG",
		want:  "",
		atEOF: true,
		err:   transform.ErrEndOfSpan,
		t:     Lower(language.Und),
	}, {
		desc:  "und/lower/short src num",
		src:   "123é"[:4],
		want:  "123",
		atEOF: false,
		err:   transform.ErrShortSrc,
		t:     Lower(language.Und),
	}, {
		desc:  "und/lower/short src greek",
		src:   "αβγé"[:7],
		want:  "αβγ",
		atEOF: false,
		err:   transform.ErrShortSrc,
		t:     Lower(language.Und),
	}, {
		desc:  "und/lower/no error on short",
		src:   "12",
		want:  "12",
		atEOF: false,
		t:     Lower(language.Und),
	}, {
		desc:  "und/lower/simple (no final sigma)",
		src:   "ος οσσ",
		want:  "οσ οσσ",
		atEOF: true,
		t:     Lower(language.Und, HandleFinalSigma(false)),
	}, {
		desc:  "und/title/simple (no final sigma)",
		src:   "Οσ Οσσ",
		want:  "Οσ Οσσ",
		atEOF: true,
		t:     Title(language.Und, HandleFinalSigma(false)),
	}, {
		desc: "und/lower/final sigma: no error",
		src:  "οΣ", // Oς
		want: "ο",  // Oς
		err:  transform.ErrEndOfSpan,
		t:    Lower(language.Und),
	}, {
		desc: "und/title/final sigma: no error",
		src:  "ΟΣ", // Oς
		want: "Ο",  // Oς
		err:  transform.ErrEndOfSpan,
		t:    Title(language.Und),
	}, {
		desc: "und/title/final sigma: no short source!",
		src:  "ΟσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσΣ",
		want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσ",
		err:  transform.ErrEndOfSpan,
		t:    Title(language.Und),
	}, {
		desc:  "und/title/clipped UTF-8 rune",
		src:   "Σσ" + string([]byte{0xCF}),
		want:  "Σσ",
		atEOF: false,
		err:   transform.ErrShortSrc,
		t:     Title(language.Und),
	}, {
		desc:  "und/title/clipped UTF-8 rune atEOF",
		src:   "Σσσ" + string([]byte{0xCF}),
		want:  "Σσσ" + string([]byte{0xCF}),
		atEOF: true,
		t:     Title(language.Und),
	}, {
		// Note: the choice to change the final sigma at the end in case of
		// too many case ignorables is arbitrary. The main reason for this
		// choice is that it results in simpler code.
		desc: "und/title/long string",
		src:  "A" + strings.Repeat("a", maxIgnorable+5),
		want: "A" + strings.Repeat("a", maxIgnorable+5),
		t:    Title(language.Und),
	}, {
		// Note: the choice to change the final sigma at the end in case of
		// too many case ignorables is arbitrary. The main reason for this
		// choice is that it results in simpler code.
		desc:  "und/title/cyrillic",
		src:   "При",
		want:  "При",
		atEOF: true,
		t:     Title(language.Und, HandleFinalSigma(false)),
	}, {
		// Note: the choice to change the final sigma at the end in case of
		// too many case ignorables is arbitrary. The main reason for this
		// choice is that it results in simpler code.
		desc: "und/title/final sigma: max ignorables",
		src:  "Οσ" + strings.Repeat(".", maxIgnorable) + "A",
		want: "Οσ" + strings.Repeat(".", maxIgnorable) + "A",
		t:    Title(language.Und),
	}, {
		desc: "el/upper/max ignorables - not implemented",
		src:  "Ο" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0313",
		want: "",
		err:  transform.ErrEndOfSpan,
		t:    Upper(language.Greek),
	}, {
		desc: "el/upper/too many ignorables - not implemented",
		src:  "Ο" + strings.Repeat("\u0321", maxIgnorable) + "\u0313",
		want: "",
		err:  transform.ErrEndOfSpan,
		t:    Upper(language.Greek),
	}, {
		desc: "el/upper/short dst",
		src:  "123ο",
		want: "",
		err:  transform.ErrEndOfSpan,
		t:    Upper(language.Greek),
	}, {
		desc: "lt/lower/max ignorables",
		src:  "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300",
		want: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300",
		t:    Lower(language.Lithuanian),
	}, {
		desc: "lt/lower/isLower",
		src:  "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0300",
		want: "",
		err:  transform.ErrEndOfSpan,
		t:    Lower(language.Lithuanian),
	}, {
		desc: "lt/lower/not identical",
		src:  "aaaaa\u00cc", // U+00CC LATIN CAPITAL LETTER I GRAVE
		err:  transform.ErrEndOfSpan,
		want: "aaaaa",
		t:    Lower(language.Lithuanian),
	}, {
		desc: "lt/lower/identical",
		src:  "aaaai\u0307\u0300", // U+00CC LATIN CAPITAL LETTER I GRAVE
		want: "aaaai\u0307\u0300",
		t:    Lower(language.Lithuanian),
	}, {
		desc: "lt/upper/not implemented",
		src:  "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300",
		want: "",
		err:  transform.ErrEndOfSpan,
		t:    Upper(language.Lithuanian),
	}, {
		desc: "lt/upper/not implemented, ascii",
		src:  "AB",
		want: "",
		err:  transform.ErrEndOfSpan,
		t:    Upper(language.Lithuanian),
	}, {
		desc: "nl/title/pre-IJ cutoff",
		src:  "  IJ",
		want: "  IJ",
		t:    Title(language.Dutch),
	}, {
		desc: "nl/title/mid-IJ cutoff",
		src:  "  Ia",
		want: "  Ia",
		t:    Title(language.Dutch),
	}, {
		desc: "af/title/apostrophe",
		src:  "'n Bietje",
		want: "'n Bietje",
		t:    Title(language.Afrikaans),
	}, {
		desc: "af/title/apostrophe-incorrect",
		src:  "'N Bietje",
		// The Single_Quote (a MidWord), needs to be retained as unspanned so
		// that a successive call to Transform can detect that N should not be
		// capitalized.
		want: "",
		err:  transform.ErrEndOfSpan,
		t:    Title(language.Afrikaans),
	}} {
		testtext.Run(t, tt.desc, func(t *testing.T) {
			for p := 0; p < len(tt.want); p += utf8.RuneLen([]rune(tt.src[p:])[0]) {
				tt.t.Reset()
				n, err := tt.t.Span([]byte(tt.src[:p]), false)
				if err != nil && err != transform.ErrShortSrc {
					t.Errorf("early failure:Span(%+q): %v (%d < %d)", tt.src[:p], err, n, len(tt.want))
					break
				}
			}
			tt.t.Reset()
			n, err := tt.t.Span([]byte(tt.src), tt.atEOF)
			if n != len(tt.want) || err != tt.err {
				t.Errorf("Span(%+q, %v): got %d, %v; want %d, %v", tt.src, tt.atEOF, n, err, len(tt.want), tt.err)
			}
			testHandover(t, tt.t, tt.src)
		})
	}
}
Ejemplo n.º 22
0
// TestValues tests that for all languages, regions, and scripts in Values, at
// least one language has a name defined for it by checking it exists in
// English, which is assumed to be the most comprehensive. It is also tested
// that a Namer returns "" for unsupported values.
func TestValues(t *testing.T) {
	type testcase struct {
		kind string
		n    Namer
	}
	// checkDefined checks that a value exists in a Namer.
	checkDefined := func(x interface{}, namers []testcase) {
		for _, n := range namers {
			testtext.Run(t, fmt.Sprintf("%s.Name(%s)", n.kind, x), func(t *testing.T) {
				if n.n.Name(x) == "" {
					// As of version 28 there is no data for az-Arab in English,
					// although there is useful data in other languages.
					if x.(fmt.Stringer).String() == "az-Arab" {
						return
					}
					t.Errorf("supported but no result")
				}
			})
		}
	}
	// checkUnsupported checks that a value does not exist in a Namer.
	checkUnsupported := func(x interface{}, namers []testcase) {
		for _, n := range namers {
			if got := n.n.Name(x); got != "" {
				t.Fatalf("%s.Name(%s): unsupported tag gave non-empty result: %q", n.kind, x, got)
			}
		}
	}

	tags := map[language.Tag]bool{}
	namers := []testcase{
		{"Languages(en)", Languages(language.English)},
		{"Tags(en)", Tags(language.English)},
		{"English.Languages()", English.Languages()},
		{"English.Tags()", English.Tags()},
	}
	for _, tag := range Values.Tags() {
		checkDefined(tag, namers)
		tags[tag] = true
	}
	for _, base := range language.Supported.BaseLanguages() {
		tag, _ := language.All.Compose(base)
		if !tags[tag] {
			checkUnsupported(tag, namers)
		}
	}

	regions := map[language.Region]bool{}
	namers = []testcase{
		{"Regions(en)", Regions(language.English)},
		{"English.Regions()", English.Regions()},
	}
	for _, r := range Values.Regions() {
		checkDefined(r, namers)
		regions[r] = true
	}
	for _, r := range language.Supported.Regions() {
		if r = r.Canonicalize(); !regions[r] {
			checkUnsupported(r, namers)
		}
	}

	scripts := map[language.Script]bool{}
	namers = []testcase{
		{"Scripts(en)", Scripts(language.English)},
		{"English.Scripts()", English.Scripts()},
	}
	for _, s := range Values.Scripts() {
		checkDefined(s, namers)
		scripts[s] = true
	}
	for _, s := range language.Supported.Scripts() {
		// Canonicalize the script.
		tag, _ := language.DeprecatedScript.Compose(s)
		if _, s, _ = tag.Raw(); !scripts[s] {
			checkUnsupported(s, namers)
		}
	}
}