// parse calls f for each entry in the given UCD file. func (opts ucdParser) parse(filename string, f func(p *ucd.Parser)) { var r io.ReadCloser if *localPath != "" { f, err := os.Open(filepath.Join(*localPath, filename)) if err != nil { logger.Fatal(err) } r = f } else { resp, err := http.Get(*url + "/" + filename) if err != nil { logger.Fatal(err) } if resp.StatusCode != 200 { logger.Fatalf("bad GET status for %s: %v", *url, resp.Status) } r = resp.Body } defer r.Close() p := ucd.New(r, opts...) for p.Next() { f(p) } if err := p.Err(); err != nil { logger.Fatal(err) } }
// TestBidiCore performs the tests in BidiTest.txt. // See http://www.unicode.org/Public/UCD/latest/ucd/BidiTest.txt. func TestBidiCore(t *testing.T) { testtext.SkipIfNotLong(t) r := gen.OpenUCDFile("BidiTest.txt") defer r.Close() var wantLevels, wantOrder []string p := ucd.New(r, ucd.Part(func(p *ucd.Parser) { s := strings.Split(p.String(0), ":") switch s[0] { case "Levels": wantLevels = strings.Fields(s[1]) case "Reorder": wantOrder = strings.Fields(s[1]) default: log.Fatalf("Unknown part %q.", s[0]) } })) for p.Next() { types := []class{} for _, s := range p.Strings(0) { types = append(types, bidiClass[s]) } // We ignore the bracketing part of the algorithm. pairTypes := make([]bracketType, len(types)) pairValues := make([]rune, len(types)) for i := uint(0); i < 3; i++ { if p.Uint(1)&(1<<i) == 0 { continue } lev := level(int(i) - 1) par := newParagraph(types, pairTypes, pairValues, lev) if *testLevels { levels := par.resultLevels for i, s := range wantLevels { if s == "x" { continue } l, _ := strconv.ParseUint(s, 10, 8) if level(l)&1 != levels[i]&1 { t.Errorf("%s:%d:levels: got %v; want %v", p.String(0), lev, levels, wantLevels) break } } } order := par.getReordering([]int{len(types)}) gotOrder := filterOrder(types, order) if got, want := fmt.Sprint(gotOrder), fmt.Sprint(wantOrder); got != want { t.Errorf("%s:%d:order: got %v; want %v\noriginal %v", p.String(0), lev, got, want, order) } } } if err := p.Err(); err != nil { log.Fatal(err) } }
func parse(path string, f func(p *ucd.Parser)) { r := gen.OpenUCDFile(path) defer r.Close() p := ucd.New(r) for p.Next() { f(p) } if err := p.Err(); err != nil { log.Fatal(err) } }
// parse calls f for each entry in the given UCD file. func (opts ucdParser) parse(filename string, f func(p *ucd.Parser)) { r := gen.OpenUCDFile(filename) defer r.Close() p := ucd.New(r, opts...) for p.Next() { f(p) } if err := p.Err(); err != nil { log.Fatal(err) } }
// Use values in DerivedNormalizationProps.txt to compare against the // values we computed. // DerivedNormalizationProps.txt has form: // 00C0..00C5 ; NFD_QC; N # ... // 0374 ; NFD_QC; N # ... // See http://unicode.org/reports/tr44/ for full explanation func testDerived() { f := gen.OpenUCDFile("DerivedNormalizationProps.txt") defer f.Close() p := ucd.New(f) for p.Next() { r := p.Rune(0) c := &chars[r] var ftype, mode int qt := p.String(1) switch qt { case "NFC_QC": ftype, mode = FCanonical, MComposed case "NFD_QC": ftype, mode = FCanonical, MDecomposed case "NFKC_QC": ftype, mode = FCompatibility, MComposed case "NFKD_QC": ftype, mode = FCompatibility, MDecomposed default: continue } var qr QCResult switch p.String(2) { case "Y": qr = QCYes case "N": qr = QCNo case "M": qr = QCMaybe default: log.Fatalf(`Unexpected quick check value "%s"`, p.String(2)) } if got := c.forms[ftype].quickCheck[mode]; got != qr { log.Printf("%U: FAILED %s (was %v need %v)\n", r, qt, got, qr) } c.forms[ftype].verified[mode] = true } if err := p.Err(); err != nil { log.Fatal(err) } // Any unspecified value must be QCYes. Verify this. for i, c := range chars { for j, fd := range c.forms { for k, qr := range fd.quickCheck { if !fd.verified[k] && qr != QCYes { m := "%U: FAIL F:%d M:%d (was %v need Yes) %s\n" log.Printf(m, i, j, k, qr, c.name) } } } } }
// parse calls f for each entry in the given UCD file. func parse(version string, f func(p *ucd.Parser)) { r := gen.Open("http://www.unicode.org/Public/", "", version+"/ucd/UnicodeData.txt") defer r.Close() p := ucd.New(r) for p.Next() { f(p) } if err := p.Err(); err != nil { log.Fatal(err) } }
func Example() { // Read rune-by-rune from UnicodeData. var count int p := ucd.New(strings.NewReader(unicodeData)) for p.Next() { count++ if lower := p.Runes(ucd.SimpleLowercaseMapping); lower != nil { fmt.Printf("lower(%U) -> %U\n", p.Rune(0), lower[0]) } } if err := p.Err(); err != nil { fmt.Println(err) } fmt.Println("Number of runes visited:", count) // Read raw ranges from Scripts. p = ucd.New(strings.NewReader(scripts), ucd.KeepRanges) for p.Next() { start, end := p.Range(0) fmt.Printf("%04X..%04X: %s\n", start, end, p.String(1)) } if err := p.Err(); err != nil { fmt.Println(err) } // Output: // lower(U+00C0) -> U+00E0 // lower(U+00C1) -> U+00E1 // lower(U+00C2) -> U+00E2 // lower(U+00C3) -> U+00E3 // lower(U+00C4) -> U+00E4 // Number of runes visited: 6594 // 0000..001F: Common // 0020..0020: Common // 0021..0023: Common // 0024..0024: Common }
// CompositionExclusions.txt has form: // 0958 # ... // See http://unicode.org/reports/tr44/ for full explanation func loadCompositionExclusions() { f := gen.OpenUCDFile("CompositionExclusions.txt") defer f.Close() p := ucd.New(f) for p.Next() { c := &chars[p.Rune(0)] if c.excludeInComp { log.Fatalf("%U: Duplicate entry in exclusions.", c.codePoint) } c.excludeInComp = true } if e := p.Err(); e != nil { log.Fatal(e) } }
func loadUnicodeData() { f := gen.OpenUCDFile("UnicodeData.txt") defer f.Close() p := ucd.New(f) for p.Next() { r := p.Rune(ucd.CodePoint) char := &chars[r] char.ccc = uint8(p.Uint(ucd.CanonicalCombiningClass)) decmap := p.String(ucd.DecompMapping) exp, err := parseDecomposition(decmap, false) isCompat := false if err != nil { if len(decmap) > 0 { exp, err = parseDecomposition(decmap, true) if err != nil { log.Fatalf(`%U: bad decomp |%v|: "%s"`, r, decmap, err) } isCompat = true } } char.name = p.String(ucd.Name) char.codePoint = r char.forms[FCompatibility].decomp = exp if !isCompat { char.forms[FCanonical].decomp = exp } else { char.compatDecomp = true } if len(decmap) > 0 { char.forms[FCompatibility].decomp = exp } } if err := p.Err(); err != nil { log.Fatal(err) } }
func TestConformance(t *testing.T) { testtext.SkipIfNotLong(t) r := gen.OpenUnicodeFile("idna", "", "IdnaTest.txt") defer r.Close() section := "main" started := false p := ucd.New(r, ucd.CommentHandler(func(s string) { if started { section = strings.ToLower(strings.Split(s, " ")[0]) } })) transitional := New(Transitional(true), VerifyDNSLength(true)) nonTransitional := New(VerifyDNSLength(true)) for p.Next() { started = true // What to test profiles := []*Profile{} switch p.String(0) { case "T": profiles = append(profiles, transitional) case "N": profiles = append(profiles, nonTransitional) case "B": profiles = append(profiles, transitional) profiles = append(profiles, nonTransitional) } src := unescape(p.String(1)) wantToUnicode := unescape(p.String(2)) if wantToUnicode == "" { wantToUnicode = src } wantToASCII := unescape(p.String(3)) if wantToASCII == "" { wantToASCII = wantToUnicode } wantErrToUnicode := "" if strings.HasPrefix(wantToUnicode, "[") { wantErrToUnicode = wantToUnicode wantToUnicode = "" } wantErrToASCII := "" if strings.HasPrefix(wantToASCII, "[") { wantErrToASCII = wantToASCII wantToASCII = "" } // TODO: also do IDNA tests. // invalidInIDNA2008 := p.String(4) == "NV8" for _, p := range profiles { name := fmt.Sprintf("%s:%s", section, p) doTest(t, p.ToUnicode, name+":ToUnicode", src, wantToUnicode, wantErrToUnicode) doTest(t, p.ToASCII, name+":ToASCII", src, wantToASCII, wantErrToASCII) } } }
func TestConformance(t *testing.T) { testtext.SkipIfNotLong(t) r := gen.OpenUnicodeFile("idna", "", "IdnaTest.txt") defer r.Close() section := "main" started := false p := ucd.New(r, ucd.CommentHandler(func(s string) { if started { section = strings.ToLower(strings.Split(s, " ")[0]) } })) for p.Next() { started = true // What to test profiles := []*Profile{} switch p.String(0) { case "T": profiles = append(profiles, Transitional) case "N": profiles = append(profiles, NonTransitional) case "B": profiles = append(profiles, Transitional) profiles = append(profiles, NonTransitional) } src := unescape(p.String(1)) if incorrectTests[src] { continue } wantToUnicode := unescape(p.String(2)) if wantToUnicode == "" { wantToUnicode = src } wantToASCII := unescape(p.String(3)) if wantToASCII == "" { wantToASCII = wantToUnicode } test := "err:" if strings.HasPrefix(wantToUnicode, "[") { test += strings.Replace(strings.Trim(wantToUnicode, "[]"), " ", "", -1) } if strings.HasPrefix(wantToASCII, "[") { test += strings.Replace(strings.Trim(wantToASCII, "[]"), " ", "", -1) } if test == "err:" { test = "ok" } // TODO: also do IDNA tests. // invalidInIDNA2008 := p.String(4) == "NV8" for _, p := range profiles { testtext.Run(t, fmt.Sprintf("%s:%s/%s/%+q", section, test, p, src), func(t *testing.T) { got, err := p.ToUnicode(src) wantErr := strings.HasPrefix(wantToUnicode, "[") gotErr := err != nil if wantErr { if gotErr != wantErr { t.Errorf(`ToUnicode:err got %v; want %v (%s)`, gotErr, wantErr, wantToUnicode) } } else if got != wantToUnicode || gotErr != wantErr { t.Errorf(`ToUnicode: got %+q, %v (%v); want %+q, %v`, got, gotErr, err, wantToUnicode, wantErr) } got, err = p.ToASCII(src) wantErr = strings.HasPrefix(wantToASCII, "[") gotErr = err != nil if wantErr { if gotErr != wantErr { t.Errorf(`ToASCII:err got %v; want %v (%s)`, gotErr, wantErr, wantToASCII) } } else if got != wantToASCII || gotErr != wantErr { t.Errorf(`ToASCII: got %+q, %v (%v); want %+q, %v`, got, gotErr, err, wantToASCII, wantErr) } }) } } }