func TestCaseProperties(t *testing.T) {
	assigned := rangetable.Assigned(UnicodeVersion)
	coreVersion := rangetable.Assigned(unicode.Version)
	for r := rune(0); r <= lastRuneForTesting; r++ {
		if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) {
			continue
		}
		c := contextFromRune(r)
		if got, want := c.info.isCaseIgnorable(), propIgnore(r); got != want {
			t.Errorf("caseIgnorable(%U): got %v; want %v (%x)", r, got, want, c.info)
		}
		// New letters may change case types, but existing case pairings should
		// not change. See Case Pair Stability in
		// http://unicode.org/policies/stability_policy.html.
		if rf := unicode.SimpleFold(r); rf != r && unicode.In(rf, assigned) {
			if got, want := c.info.isCased(), propCased(r); got != want {
				t.Errorf("cased(%U): got %v; want %v (%x)", r, got, want, c.info)
			}
			if got, want := c.caseType() == cUpper, propUpper(r); got != want {
				t.Errorf("upper(%U): got %v; want %v (%x)", r, got, want, c.info)
			}
			if got, want := c.caseType() == cLower, propLower(r); got != want {
				t.Errorf("lower(%U): got %v; want %v (%x)", r, got, want, c.info)
			}
		}
		if got, want := c.info.isBreak(), hasBreakProp(r); got != want {
			t.Errorf("isBreak(%U): got %v; want %v (%x)", r, got, want, c.info)
		}
	}
	// TODO: get title case from unicode file.
}
Example #2
0
func TestMapping(t *testing.T) {
	assigned := rangetable.Assigned(UnicodeVersion)
	coreVersion := rangetable.Assigned(unicode.Version)
	if coreVersion == nil {
		coreVersion = assigned
	}
	apply := func(r rune, f func(c *context) bool) string {
		c := contextFromRune(r)
		f(c)
		return string(c.dst[:c.pDst])
	}

	for r, tt := range special {
		if got, want := apply(r, lower), tt.toLower; got != want {
			t.Errorf("lowerSpecial:(%U): got %+q; want %+q", r, got, want)
		}
		if got, want := apply(r, title), tt.toTitle; got != want {
			t.Errorf("titleSpecial:(%U): got %+q; want %+q", r, got, want)
		}
		if got, want := apply(r, upper), tt.toUpper; got != want {
			t.Errorf("upperSpecial:(%U): got %+q; want %+q", r, got, want)
		}
	}

	for r := rune(0); r <= lastRuneForTesting; r++ {
		if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) {
			continue
		}
		if rf := unicode.SimpleFold(r); rf == r || !unicode.In(rf, assigned) {
			continue
		}
		if _, ok := special[r]; ok {
			continue
		}
		want := string(unicode.ToLower(r))
		if got := apply(r, lower); got != want {
			t.Errorf("lower:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want))
		}

		want = string(unicode.ToUpper(r))
		if got := apply(r, upper); got != want {
			t.Errorf("upper:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want))
		}

		want = string(unicode.ToTitle(r))
		if got := apply(r, title); got != want {
			t.Errorf("title:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want))
		}
	}
}
Example #3
0
func TestCaseProperties(t *testing.T) {
	if unicode.Version != UnicodeVersion {
		t.Skipf("UnicodeVersion=%s, but unicode.Version=%s", UnicodeVersion, unicode.Version)
	}
	assigned := rangetable.Assigned(UnicodeVersion)
	for r := rune(0); r <= lastRuneForTesting; r++ {
		if !unicode.In(r, assigned) || !unicode.In(unicode.SimpleFold(r), assigned) {
			continue
		}
		c := contextFromRune(r)
		if got, want := c.info.isCaseIgnorable(), propIgnore(r); got != want {
			t.Errorf("caseIgnorable(%U): got %v; want %v (%x)", r, got, want, c.info)
		}
		if got, want := c.info.isCased(), propCased(r); got != want {
			t.Errorf("cased(%U): got %v; want %v (%x)", r, got, want, c.info)
		}
		if got, want := c.caseType() == cUpper, propUpper(r); got != want {
			t.Errorf("upper(%U): got %v; want %v (%x)", r, got, want, c.info)
		}
		if got, want := c.caseType() == cLower, propLower(r); got != want {
			t.Errorf("lower(%U): got %v; want %v (%x)", r, got, want, c.info)
		}
		if got, want := c.info.isBreak(), hasBreakProp(r); got != want {
			t.Errorf("isBreak(%U): got %v; want %v (%x)", r, got, want, c.info)
		}
	}
	// TODO: get title case from unicode file.
}
Example #4
0
// Ensure that ceratain properties were generated correctly.
func TestTable(t *testing.T) {
	tests := []tableTest{
		tableTest{
			rangetable.Merge(
				unicode.Lt, unicode.Nl, unicode.No, // Other letter digits
				unicode.Me,             // Modifiers
				unicode.Zs,             // Spaces
				unicode.So,             // Symbols
				unicode.Pi, unicode.Pf, // Punctuation
			),
			idDisOrFreePVal,
		},
		tableTest{
			rangetable.New(0x30000, 0x30101, 0xDFFFF),
			unassigned,
		},
	}

	assigned := rangetable.Assigned(UnicodeVersion)

	for _, test := range tests {
		rangetable.Visit(test.rangeTable, func(r rune) {
			if !unicode.In(r, assigned) {
				return
			}
			b := make([]byte, 4)
			n := utf8.EncodeRune(b, r)
			trieval, _ := dpTrie.lookup(b[:n])
			p := entry(trieval).property()
			if p != test.prop && !exceptions.Contains(r) {
				t.Errorf("%U: got %+x; want %+x", r, test.prop, p)
			}
		})
	}
}
func TestFoldData(t *testing.T) {
	assigned := rangetable.Assigned(UnicodeVersion)
	coreVersion := rangetable.Assigned(unicode.Version)
	apply := func(r rune, f func(c *context) bool) (string, info) {
		c := contextFromRune(r)
		f(c)
		return string(c.dst[:c.pDst]), c.info.cccType()
	}
	for r := rune(0); r <= lastRuneForTesting; r++ {
		if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) {
			continue
		}
		x := runeFoldData(r)
		if got, info := apply(r, foldFull); got != x.full {
			t.Errorf("full:%q (%U): got %q %U; want %q %U (ccc=%x)", r, r, got, []rune(got), x.full, []rune(x.full), info)
		}
		// TODO: special and simple.
	}
}
Example #6
0
func TestCCC(t *testing.T) {
	assigned := rangetable.Assigned(UnicodeVersion)
	normVersion := rangetable.Assigned(norm.Version)
	for r := rune(0); r <= lastRuneForTesting; r++ {
		if !unicode.In(r, assigned) || !unicode.In(r, normVersion) {
			continue
		}
		c := contextFromRune(r)

		p := norm.NFC.PropertiesString(string(r))
		want := cccOther
		switch p.CCC() {
		case 0:
			want = cccZero
		case above:
			want = cccAbove
		}
		if got := c.info.cccType(); got != want {
			t.Errorf("%U: got %x; want %x", r, got, want)
		}
	}
}
Example #7
0
func main() {
	gen.Init()

	// Load data
	runes := []rune{}
	ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) {
		if p.String(1) == "Default_Ignorable_Code_Point" {
			runes = append(runes, p.Rune(0))
		}
	})
	ucd.Parse(gen.OpenUCDFile("HangulSyllableType.txt"), func(p *ucd.Parser) {
		if p.String(1) == "LVT" {
			runes = append(runes, p.Rune(0))
		}
	})

	disallowedRunes = rangetable.New(runes...)
	assigned = rangetable.Assigned(unicode.Version)

	writeTables()
	gen.Repackage("gen_trieval.go", "trieval.go", "precis")
}
Example #8
0
// This file contains tests which need to import package collate, which causes
// an import cycle when done within package colltab itself.

import (
	"bytes"
	"testing"
	"unicode"

	"golang.org/x/text/collate"
	"golang.org/x/text/language"
	"golang.org/x/text/unicode/rangetable"
)

// assigned is used to only test runes that are inside the scope of the Unicode
// version used to generation the collation table.
var assigned = rangetable.Assigned(collate.UnicodeVersion)

func TestNonDigits(t *testing.T) {
	c := collate.New(language.English, collate.Loose, collate.Numeric)

	// Verify that all non-digit numbers sort outside of the number range.
	for r, hi := rune(unicode.N.R16[0].Lo), rune(unicode.N.R32[0].Hi); r <= hi; r++ {
		if unicode.In(r, unicode.Nd) || !unicode.In(r, assigned) {
			continue
		}
		if a := string(r); c.CompareString(a, "0") != -1 && c.CompareString(a, "999999") != 1 {
			t.Errorf("%+q non-digit number is collated as digit", a)
		}
	}
}
Example #9
0
func main() {
	gen.Init()

	// Load data
	runes := []rune{}
	// PrecisIgnorableProperties: https://tools.ietf.org/html/rfc7564#section-9.13
	ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) {
		if p.String(1) == "Default_Ignorable_Code_Point" {
			runes = append(runes, p.Rune(0))
		}
	})
	ucd.Parse(gen.OpenUCDFile("PropList.txt"), func(p *ucd.Parser) {
		switch p.String(1) {
		case "Noncharacter_Code_Point":
			runes = append(runes, p.Rune(0))
		}
	})
	// OldHangulJamo: https://tools.ietf.org/html/rfc5892#section-2.9
	ucd.Parse(gen.OpenUCDFile("HangulSyllableType.txt"), func(p *ucd.Parser) {
		switch p.String(1) {
		case "L", "V", "T":
			runes = append(runes, p.Rune(0))
		}
	})

	disallowedRunes = rangetable.New(runes...)
	assigned = rangetable.Assigned(unicode.Version)

	// Load category data.
	runeCategory['l'] = latinSmallL
	ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
		const cccVirama = 9
		if p.Int(ucd.CanonicalCombiningClass) == cccVirama {
			setCategory(p.Rune(0), viramaModifier)
		}
	})
	ucd.Parse(gen.OpenUCDFile("Scripts.txt"), func(p *ucd.Parser) {
		switch p.String(1) {
		case "Greek":
			setCategory(p.Rune(0), greek)
		case "Hebrew":
			setCategory(p.Rune(0), hebrew)
		case "Hiragana", "Katakana", "Han":
			setCategory(p.Rune(0), japanese)
		}
	})

	// Set the rule categories associated with exceptions. This overrides any
	// previously set categories. The original categories are manually
	// reintroduced in the categoryTransitions table.
	for r, e := range exceptions {
		if e.cat != 0 {
			runeCategory[r] = e.cat
		}
	}
	cat := map[string]category{
		"L": joiningL,
		"D": joiningD,
		"T": joiningT,

		"R": joiningR,
	}
	ucd.Parse(gen.OpenUCDFile("extracted/DerivedJoiningType.txt"), func(p *ucd.Parser) {
		switch v := p.String(1); v {
		case "L", "D", "T", "R":
			setCategory(p.Rune(0), cat[v])
		}
	})

	writeTables()
	gen.Repackage("gen_trieval.go", "trieval.go", "precis")
}