Beispiel #1
0
// parse calls f for each entry in the given UCD file.
func (opts ucdParser) parse(filename string, f func(p *ucd.Parser)) {
	var r io.ReadCloser
	if *localPath != "" {
		f, err := os.Open(filepath.Join(*localPath, filename))
		if err != nil {
			logger.Fatal(err)
		}
		r = f
	} else {
		resp, err := http.Get(*url + "/" + filename)
		if err != nil {
			logger.Fatal(err)
		}
		if resp.StatusCode != 200 {
			logger.Fatalf("bad GET status for %s: %v", *url, resp.Status)
		}
		r = resp.Body
	}
	defer r.Close()
	p := ucd.New(r, opts...)
	for p.Next() {
		f(p)
	}
	if err := p.Err(); err != nil {
		logger.Fatal(err)
	}
}
// TestBidiCore performs the tests in BidiTest.txt.
// See http://www.unicode.org/Public/UCD/latest/ucd/BidiTest.txt.
func TestBidiCore(t *testing.T) {
	testtext.SkipIfNotLong(t)

	r := gen.OpenUCDFile("BidiTest.txt")
	defer r.Close()

	var wantLevels, wantOrder []string
	p := ucd.New(r, ucd.Part(func(p *ucd.Parser) {
		s := strings.Split(p.String(0), ":")
		switch s[0] {
		case "Levels":
			wantLevels = strings.Fields(s[1])
		case "Reorder":
			wantOrder = strings.Fields(s[1])
		default:
			log.Fatalf("Unknown part %q.", s[0])
		}
	}))

	for p.Next() {
		types := []class{}
		for _, s := range p.Strings(0) {
			types = append(types, bidiClass[s])
		}
		// We ignore the bracketing part of the algorithm.
		pairTypes := make([]bracketType, len(types))
		pairValues := make([]rune, len(types))

		for i := uint(0); i < 3; i++ {
			if p.Uint(1)&(1<<i) == 0 {
				continue
			}
			lev := level(int(i) - 1)
			par := newParagraph(types, pairTypes, pairValues, lev)

			if *testLevels {
				levels := par.resultLevels
				for i, s := range wantLevels {
					if s == "x" {
						continue
					}
					l, _ := strconv.ParseUint(s, 10, 8)
					if level(l)&1 != levels[i]&1 {
						t.Errorf("%s:%d:levels: got %v; want %v", p.String(0), lev, levels, wantLevels)
						break
					}
				}
			}

			order := par.getReordering([]int{len(types)})
			gotOrder := filterOrder(types, order)
			if got, want := fmt.Sprint(gotOrder), fmt.Sprint(wantOrder); got != want {
				t.Errorf("%s:%d:order: got %v; want %v\noriginal %v", p.String(0), lev, got, want, order)
			}
		}
	}
	if err := p.Err(); err != nil {
		log.Fatal(err)
	}
}
Beispiel #3
0
func parse(path string, f func(p *ucd.Parser)) {
	r := gen.OpenUCDFile(path)
	defer r.Close()
	p := ucd.New(r)
	for p.Next() {
		f(p)
	}
	if err := p.Err(); err != nil {
		log.Fatal(err)
	}
}
Beispiel #4
0
// parse calls f for each entry in the given UCD file.
func (opts ucdParser) parse(filename string, f func(p *ucd.Parser)) {
	r := gen.OpenUCDFile(filename)
	defer r.Close()
	p := ucd.New(r, opts...)
	for p.Next() {
		f(p)
	}
	if err := p.Err(); err != nil {
		log.Fatal(err)
	}
}
Beispiel #5
0
// Use values in DerivedNormalizationProps.txt to compare against the
// values we computed.
// DerivedNormalizationProps.txt has form:
// 00C0..00C5    ; NFD_QC; N # ...
// 0374          ; NFD_QC; N # ...
// See http://unicode.org/reports/tr44/ for full explanation
func testDerived() {
	f := gen.OpenUCDFile("DerivedNormalizationProps.txt")
	defer f.Close()
	p := ucd.New(f)
	for p.Next() {
		r := p.Rune(0)
		c := &chars[r]

		var ftype, mode int
		qt := p.String(1)
		switch qt {
		case "NFC_QC":
			ftype, mode = FCanonical, MComposed
		case "NFD_QC":
			ftype, mode = FCanonical, MDecomposed
		case "NFKC_QC":
			ftype, mode = FCompatibility, MComposed
		case "NFKD_QC":
			ftype, mode = FCompatibility, MDecomposed
		default:
			continue
		}
		var qr QCResult
		switch p.String(2) {
		case "Y":
			qr = QCYes
		case "N":
			qr = QCNo
		case "M":
			qr = QCMaybe
		default:
			log.Fatalf(`Unexpected quick check value "%s"`, p.String(2))
		}
		if got := c.forms[ftype].quickCheck[mode]; got != qr {
			log.Printf("%U: FAILED %s (was %v need %v)\n", r, qt, got, qr)
		}
		c.forms[ftype].verified[mode] = true
	}
	if err := p.Err(); err != nil {
		log.Fatal(err)
	}
	// Any unspecified value must be QCYes. Verify this.
	for i, c := range chars {
		for j, fd := range c.forms {
			for k, qr := range fd.quickCheck {
				if !fd.verified[k] && qr != QCYes {
					m := "%U: FAIL F:%d M:%d (was %v need Yes) %s\n"
					log.Printf(m, i, j, k, qr, c.name)
				}
			}
		}
	}
}
Beispiel #6
0
// parse calls f for each entry in the given UCD file.
func parse(version string, f func(p *ucd.Parser)) {
	r := gen.Open("http://www.unicode.org/Public/", "", version+"/ucd/UnicodeData.txt")
	defer r.Close()

	p := ucd.New(r)
	for p.Next() {
		f(p)
	}
	if err := p.Err(); err != nil {
		log.Fatal(err)
	}
}
Beispiel #7
0
func Example() {
	// Read rune-by-rune from UnicodeData.
	var count int
	p := ucd.New(strings.NewReader(unicodeData))
	for p.Next() {
		count++
		if lower := p.Runes(ucd.SimpleLowercaseMapping); lower != nil {
			fmt.Printf("lower(%U) -> %U\n", p.Rune(0), lower[0])
		}
	}
	if err := p.Err(); err != nil {
		fmt.Println(err)
	}
	fmt.Println("Number of runes visited:", count)

	// Read raw ranges from Scripts.
	p = ucd.New(strings.NewReader(scripts), ucd.KeepRanges)
	for p.Next() {
		start, end := p.Range(0)
		fmt.Printf("%04X..%04X: %s\n", start, end, p.String(1))
	}
	if err := p.Err(); err != nil {
		fmt.Println(err)
	}

	// Output:
	// lower(U+00C0) -> U+00E0
	// lower(U+00C1) -> U+00E1
	// lower(U+00C2) -> U+00E2
	// lower(U+00C3) -> U+00E3
	// lower(U+00C4) -> U+00E4
	// Number of runes visited: 6594
	// 0000..001F: Common
	// 0020..0020: Common
	// 0021..0023: Common
	// 0024..0024: Common
}
Beispiel #8
0
// CompositionExclusions.txt has form:
// 0958    # ...
// See http://unicode.org/reports/tr44/ for full explanation
func loadCompositionExclusions() {
	f := gen.OpenUCDFile("CompositionExclusions.txt")
	defer f.Close()
	p := ucd.New(f)
	for p.Next() {
		c := &chars[p.Rune(0)]
		if c.excludeInComp {
			log.Fatalf("%U: Duplicate entry in exclusions.", c.codePoint)
		}
		c.excludeInComp = true
	}
	if e := p.Err(); e != nil {
		log.Fatal(e)
	}
}
Beispiel #9
0
func loadUnicodeData() {
	f := gen.OpenUCDFile("UnicodeData.txt")
	defer f.Close()
	p := ucd.New(f)
	for p.Next() {
		r := p.Rune(ucd.CodePoint)
		char := &chars[r]

		char.ccc = uint8(p.Uint(ucd.CanonicalCombiningClass))
		decmap := p.String(ucd.DecompMapping)

		exp, err := parseDecomposition(decmap, false)
		isCompat := false
		if err != nil {
			if len(decmap) > 0 {
				exp, err = parseDecomposition(decmap, true)
				if err != nil {
					log.Fatalf(`%U: bad decomp |%v|: "%s"`, r, decmap, err)
				}
				isCompat = true
			}
		}

		char.name = p.String(ucd.Name)
		char.codePoint = r
		char.forms[FCompatibility].decomp = exp
		if !isCompat {
			char.forms[FCanonical].decomp = exp
		} else {
			char.compatDecomp = true
		}
		if len(decmap) > 0 {
			char.forms[FCompatibility].decomp = exp
		}
	}
	if err := p.Err(); err != nil {
		log.Fatal(err)
	}
}
Beispiel #10
0
func TestConformance(t *testing.T) {
	testtext.SkipIfNotLong(t)

	r := gen.OpenUnicodeFile("idna", "", "IdnaTest.txt")
	defer r.Close()

	section := "main"
	started := false
	p := ucd.New(r, ucd.CommentHandler(func(s string) {
		if started {
			section = strings.ToLower(strings.Split(s, " ")[0])
		}
	}))
	transitional := New(Transitional(true), VerifyDNSLength(true))
	nonTransitional := New(VerifyDNSLength(true))
	for p.Next() {
		started = true

		// What to test
		profiles := []*Profile{}
		switch p.String(0) {
		case "T":
			profiles = append(profiles, transitional)
		case "N":
			profiles = append(profiles, nonTransitional)
		case "B":
			profiles = append(profiles, transitional)
			profiles = append(profiles, nonTransitional)
		}

		src := unescape(p.String(1))

		wantToUnicode := unescape(p.String(2))
		if wantToUnicode == "" {
			wantToUnicode = src
		}
		wantToASCII := unescape(p.String(3))
		if wantToASCII == "" {
			wantToASCII = wantToUnicode
		}
		wantErrToUnicode := ""
		if strings.HasPrefix(wantToUnicode, "[") {
			wantErrToUnicode = wantToUnicode
			wantToUnicode = ""
		}
		wantErrToASCII := ""
		if strings.HasPrefix(wantToASCII, "[") {
			wantErrToASCII = wantToASCII
			wantToASCII = ""
		}

		// TODO: also do IDNA tests.
		// invalidInIDNA2008 := p.String(4) == "NV8"

		for _, p := range profiles {
			name := fmt.Sprintf("%s:%s", section, p)
			doTest(t, p.ToUnicode, name+":ToUnicode", src, wantToUnicode, wantErrToUnicode)
			doTest(t, p.ToASCII, name+":ToASCII", src, wantToASCII, wantErrToASCII)
		}
	}
}
Beispiel #11
0
func TestConformance(t *testing.T) {
	testtext.SkipIfNotLong(t)

	r := gen.OpenUnicodeFile("idna", "", "IdnaTest.txt")
	defer r.Close()

	section := "main"
	started := false
	p := ucd.New(r, ucd.CommentHandler(func(s string) {
		if started {
			section = strings.ToLower(strings.Split(s, " ")[0])
		}
	}))
	for p.Next() {
		started = true

		// What to test
		profiles := []*Profile{}
		switch p.String(0) {
		case "T":
			profiles = append(profiles, Transitional)
		case "N":
			profiles = append(profiles, NonTransitional)
		case "B":
			profiles = append(profiles, Transitional)
			profiles = append(profiles, NonTransitional)
		}

		src := unescape(p.String(1))
		if incorrectTests[src] {
			continue
		}

		wantToUnicode := unescape(p.String(2))
		if wantToUnicode == "" {
			wantToUnicode = src
		}
		wantToASCII := unescape(p.String(3))
		if wantToASCII == "" {
			wantToASCII = wantToUnicode
		}
		test := "err:"
		if strings.HasPrefix(wantToUnicode, "[") {
			test += strings.Replace(strings.Trim(wantToUnicode, "[]"), " ", "", -1)
		}
		if strings.HasPrefix(wantToASCII, "[") {
			test += strings.Replace(strings.Trim(wantToASCII, "[]"), " ", "", -1)
		}
		if test == "err:" {
			test = "ok"
		}

		// TODO: also do IDNA tests.
		// invalidInIDNA2008 := p.String(4) == "NV8"

		for _, p := range profiles {
			testtext.Run(t, fmt.Sprintf("%s:%s/%s/%+q", section, test, p, src), func(t *testing.T) {
				got, err := p.ToUnicode(src)
				wantErr := strings.HasPrefix(wantToUnicode, "[")
				gotErr := err != nil
				if wantErr {
					if gotErr != wantErr {
						t.Errorf(`ToUnicode:err got %v; want %v (%s)`,
							gotErr, wantErr, wantToUnicode)
					}
				} else if got != wantToUnicode || gotErr != wantErr {
					t.Errorf(`ToUnicode: got %+q, %v (%v); want %+q, %v`,
						got, gotErr, err, wantToUnicode, wantErr)
				}

				got, err = p.ToASCII(src)
				wantErr = strings.HasPrefix(wantToASCII, "[")
				gotErr = err != nil
				if wantErr {
					if gotErr != wantErr {
						t.Errorf(`ToASCII:err got %v; want %v (%s)`,
							gotErr, wantErr, wantToASCII)
					}
				} else if got != wantToASCII || gotErr != wantErr {
					t.Errorf(`ToASCII: got %+q, %v (%v); want %+q, %v`,
						got, gotErr, err, wantToASCII, wantErr)
				}
			})
		}
	}
}