// Use values in DerivedNormalizationProps.txt to compare against the // values we computed. // DerivedNormalizationProps.txt has form: // 00C0..00C5 ; NFD_QC; N # ... // 0374 ; NFD_QC; N # ... // See http://unicode.org/reports/tr44/ for full explanation func testDerived() { f := openReader("DerivedNormalizationProps.txt") defer f.Close() p := ucd.New(f) for p.Next() { r := p.Rune(0) c := &chars[r] var ftype, mode int qt := p.String(1) switch qt { case "NFC_QC": ftype, mode = FCanonical, MComposed case "NFD_QC": ftype, mode = FCanonical, MDecomposed case "NFKC_QC": ftype, mode = FCompatibility, MComposed case "NFKD_QC": ftype, mode = FCompatibility, MDecomposed default: continue } var qr QCResult switch p.String(2) { case "Y": qr = QCYes case "N": qr = QCNo case "M": qr = QCMaybe default: log.Fatalf(`Unexpected quick check value "%s"`, p.String(2)) } if got := c.forms[ftype].quickCheck[mode]; got != qr { logger.Printf("%U: FAILED %s (was %v need %v)\n", r, qt, got, qr) } c.forms[ftype].verified[mode] = true } if err := p.Err(); err != nil { logger.Fatal(err) } // Any unspecified value must be QCYes. Verify this. for i, c := range chars { for j, fd := range c.forms { for k, qr := range fd.quickCheck { if !fd.verified[k] && qr != QCYes { m := "%U: FAIL F:%d M:%d (was %v need Yes) %s\n" logger.Printf(m, i, j, k, qr, c.name) } } } } }
func Example() { // Read rune-by-rune from UnicodeData. var count int p := ucd.New(strings.NewReader(unicodeData)) for p.Next() { count++ if lower := p.Runes(ucd.SimpleLowercaseMapping); lower != nil { fmt.Printf("lower(%U) -> %U\n", p.Rune(0), lower[0]) } } if err := p.Err(); err != nil { fmt.Println(err) } fmt.Println("Number of runes visited:", count) // Read raw ranges from Scripts. p = ucd.New(strings.NewReader(scripts), ucd.KeepRanges) for p.Next() { start, end := p.Range(0) fmt.Printf("%04X..%04X: %s\n", start, end, p.String(1)) } if err := p.Err(); err != nil { fmt.Println(err) } // Output: // lower(U+00C0) -> U+00E0 // lower(U+00C1) -> U+00E1 // lower(U+00C2) -> U+00E2 // lower(U+00C3) -> U+00E3 // lower(U+00C4) -> U+00E4 // Number of runes visited: 6594 // 0000..001F: Common // 0020..0020: Common // 0021..0023: Common // 0024..0024: Common }
// CompositionExclusions.txt has form: // 0958 # ... // See http://unicode.org/reports/tr44/ for full explanation func loadCompositionExclusions() { f := openReader("CompositionExclusions.txt") defer f.Close() p := ucd.New(f) for p.Next() { c := &chars[p.Rune(0)] if c.excludeInComp { logger.Fatalf("%U: Duplicate entry in exclusions.", c.codePoint) } c.excludeInComp = true } if e := p.Err(); e != nil { logger.Fatal(e) } }
func loadUnicodeData() { f := openReader("UnicodeData.txt") defer f.Close() p := ucd.New(f) for p.Next() { r := p.Rune(ucd.CodePoint) char := &chars[r] char.ccc = uint8(p.Uint(ucd.CanonicalCombiningClass)) decmap := p.String(ucd.DecompMapping) exp, err := parseDecomposition(decmap, false) isCompat := false if err != nil { if len(decmap) > 0 { exp, err = parseDecomposition(decmap, true) if err != nil { logger.Fatalf(`%U: bad decomp |%v|: "%s"`, r, decmap, err) } isCompat = true } } char.name = p.String(ucd.Name) char.codePoint = r char.forms[FCompatibility].decomp = exp if !isCompat { char.forms[FCanonical].decomp = exp } else { char.compatDecomp = true } if len(decmap) > 0 { char.forms[FCompatibility].decomp = exp } } if err := p.Err(); err != nil { logger.Fatal(err) } }