Пример #1
0
// Use values in DerivedNormalizationProps.txt to compare against the
// values we computed.
// DerivedNormalizationProps.txt has form:
// 00C0..00C5    ; NFD_QC; N # ...
// 0374          ; NFD_QC; N # ...
// See http://unicode.org/reports/tr44/ for full explanation
func testDerived() {
	f := openReader("DerivedNormalizationProps.txt")
	defer f.Close()
	p := ucd.New(f)
	for p.Next() {
		r := p.Rune(0)
		c := &chars[r]

		var ftype, mode int
		qt := p.String(1)
		switch qt {
		case "NFC_QC":
			ftype, mode = FCanonical, MComposed
		case "NFD_QC":
			ftype, mode = FCanonical, MDecomposed
		case "NFKC_QC":
			ftype, mode = FCompatibility, MComposed
		case "NFKD_QC":
			ftype, mode = FCompatibility, MDecomposed
		default:
			continue
		}
		var qr QCResult
		switch p.String(2) {
		case "Y":
			qr = QCYes
		case "N":
			qr = QCNo
		case "M":
			qr = QCMaybe
		default:
			log.Fatalf(`Unexpected quick check value "%s"`, p.String(2))
		}
		if got := c.forms[ftype].quickCheck[mode]; got != qr {
			logger.Printf("%U: FAILED %s (was %v need %v)\n", r, qt, got, qr)
		}
		c.forms[ftype].verified[mode] = true
	}
	if err := p.Err(); err != nil {
		logger.Fatal(err)
	}
	// Any unspecified value must be QCYes. Verify this.
	for i, c := range chars {
		for j, fd := range c.forms {
			for k, qr := range fd.quickCheck {
				if !fd.verified[k] && qr != QCYes {
					m := "%U: FAIL F:%d M:%d (was %v need Yes) %s\n"
					logger.Printf(m, i, j, k, qr, c.name)
				}
			}
		}
	}
}
Пример #2
0
func Example() {
	// Read rune-by-rune from UnicodeData.
	var count int
	p := ucd.New(strings.NewReader(unicodeData))
	for p.Next() {
		count++
		if lower := p.Runes(ucd.SimpleLowercaseMapping); lower != nil {
			fmt.Printf("lower(%U) -> %U\n", p.Rune(0), lower[0])
		}
	}
	if err := p.Err(); err != nil {
		fmt.Println(err)
	}
	fmt.Println("Number of runes visited:", count)

	// Read raw ranges from Scripts.
	p = ucd.New(strings.NewReader(scripts), ucd.KeepRanges)
	for p.Next() {
		start, end := p.Range(0)
		fmt.Printf("%04X..%04X: %s\n", start, end, p.String(1))
	}
	if err := p.Err(); err != nil {
		fmt.Println(err)
	}

	// Output:
	// lower(U+00C0) -> U+00E0
	// lower(U+00C1) -> U+00E1
	// lower(U+00C2) -> U+00E2
	// lower(U+00C3) -> U+00E3
	// lower(U+00C4) -> U+00E4
	// Number of runes visited: 6594
	// 0000..001F: Common
	// 0020..0020: Common
	// 0021..0023: Common
	// 0024..0024: Common
}
Пример #3
0
// CompositionExclusions.txt has form:
// 0958    # ...
// See http://unicode.org/reports/tr44/ for full explanation
func loadCompositionExclusions() {
	f := openReader("CompositionExclusions.txt")
	defer f.Close()
	p := ucd.New(f)
	for p.Next() {
		c := &chars[p.Rune(0)]
		if c.excludeInComp {
			logger.Fatalf("%U: Duplicate entry in exclusions.", c.codePoint)
		}
		c.excludeInComp = true
	}
	if e := p.Err(); e != nil {
		logger.Fatal(e)
	}
}
Пример #4
0
func loadUnicodeData() {
	f := openReader("UnicodeData.txt")
	defer f.Close()
	p := ucd.New(f)
	for p.Next() {
		r := p.Rune(ucd.CodePoint)
		char := &chars[r]

		char.ccc = uint8(p.Uint(ucd.CanonicalCombiningClass))
		decmap := p.String(ucd.DecompMapping)

		exp, err := parseDecomposition(decmap, false)
		isCompat := false
		if err != nil {
			if len(decmap) > 0 {
				exp, err = parseDecomposition(decmap, true)
				if err != nil {
					logger.Fatalf(`%U: bad decomp |%v|: "%s"`, r, decmap, err)
				}
				isCompat = true
			}
		}

		char.name = p.String(ucd.Name)
		char.codePoint = r
		char.forms[FCompatibility].decomp = exp
		if !isCompat {
			char.forms[FCanonical].decomp = exp
		} else {
			char.compatDecomp = true
		}
		if len(decmap) > 0 {
			char.forms[FCompatibility].decomp = exp
		}
	}
	if err := p.Err(); err != nil {
		logger.Fatal(err)
	}
}