func (s *PGNSuite) TestPGNParseInfiniteLoopF4(c *C) { pgnstr := `[Event "BKL-Turnier"] [Site "Leipzig"] [Date "1984.??.??"] [Round "5"] [White "Polgar, Zsuzsa"] [Black "Moehring, Guenther"] [Result "1-0"] [WhiteElo "2275"] [BlackElo "2395"] [ECO "A49"] 1.d4 Nf6 2.Nf3 d6 3.b3 g6 4.Bb2 Bg7 5.g3 c5 6.Bg2 cxd4 7.Nxd4 d5 8.O-O O-O 9.Na3 Re8 10.Nf3 Nc6 11.c4 dxc4 12.Nxc4 Be6 13.Rc1 Rc8 14.Nfe5 Nxe5 15.Bxe5 Bxc4 16.Rxc4 Rxc4 17.bxc4 Qa5 18.Bxf6 Bxf6 19.Bxb7 Rd8 20.Qb3 Rb8 21.e3 h5 22.Rb1 h4 23.Qb5 Qc7 24.a4 hxg3 25.hxg3 Be5 26.Kg2 Bd6 27.a5 Bc5 28.a6 Rd8 29.Qc6 Qxc6+ 30.Bxc6 Rd2 31.Kf3 Rc2 32.Rb8+ Kg7 33.Bb5 Kf6 34.Rc8 Bb6 35.Ba4 Ra2 36.Bb5 Rc2 37.Ke4 e6 38.Kd3 Rc1 39.Kd2 Rb1 40.Kc2 Rb4 41.Rb8 Bc5 42.Rc8 Bb6 43.Rc6 Ba5 44.Rd6 g5 45.f4 gxf4 46.gxf4 Kf5 47.Rd7 Bb6 48.Rxf7+ Ke4 49.Rb7 Bc5 50.Kc3 Kxe3 51.Rc7 Bb6 52.Rc6 Ba5 53.Kc2 Kxf4 54.Rxe6 Bd8 55.Kc3 Rb1 56.Kd4 Rd1+ 57.Kc5 Kf5 58.Re8 Bb6+ 59.Kc6 Kf6 60.Kb7 Bg1 61.Ra8 Re1 62.Rf8+ Kg7 63.Rf5 Kg6 64.Rd5 Rc1 65.Ka8 Be3 66.Rd6+ Kf5 67.Rd3 Ke4 68.Rxe3+ Kxe3 69.Kxa7 Kd4 70.Kb6 Rg1 71.a7 Rg8 72.Kb7 Rg7+ 73.Kb6 1-0` r := strings.NewReader(pgnstr) sc := scanner.Scanner{} sc.Init(r) game, err := ParseGame(&sc) c.Assert(err, IsNil) // fmt.Println(game) c.Assert(game.Tags["Site"], Equals, "Leipzig") c.Assert(len(game.Moves), Equals, 145) }
func Example() { const src = ` // This is scanned code. if a > 10 { someParsable = text }` var s scanner.Scanner s.Filename = "example" s.Init(strings.NewReader(src)) var tok rune for tok != scanner.EOF { tok = s.Scan() fmt.Println("At position", s.Pos(), ":", s.TokenText()) } // Output: // At position example:3:4 : if // At position example:3:6 : a // At position example:3:8 : > // At position example:3:11 : 10 // At position example:3:13 : { // At position example:4:15 : someParsable // At position example:4:17 : = // At position example:4:22 : text // At position example:5:3 : } // At position example:5:3 : }
func getLineToken(line string) []string { var tokenScanner scanner.Scanner tokenScanner.Init(strings.NewReader(line)) return tokenize(&tokenScanner) }
// Parse the initial tag // e.g. `div` func ParseTagMatcher(selector *CSSSelector, s scanner.Scanner) error { tag := bytes.NewBuffer([]byte{}) defer func() { selector.Tag = tag.String() }() for { c := s.Next() switch c { case scanner.EOF: return nil case '.': return ParseClassMatcher(selector, s) case '#': return ParseIdMatcher(selector, s) case '[': return ParseAttrMatcher(selector, s) case ':': return ParsePseudo(selector, s) default: if _, err := tag.WriteRune(c); err != nil { return err } } } }
func copyUntilDelim(in *scanner.Scanner, out *bufio.Writer) error { var err error for err == nil && !isEOF(in) && in.Peek() != lDelim && in.Peek() != rDelim { err = copyNext(in, out) } return err }
func (this limitedStringSpan) scanString(s *scanner.Scanner) (string, error) { tok := s.Scan() if tok != scanner.EOF && tok == -2 { return s.TokenText(), nil } return "", NewInvalidTypeError() }
// ParseKeySet parses a string of comma separated keys // made of dot separated sections into a KeySet. func ParseKeySet(s string) (KeySet, error) { sc := new(scanner.Scanner).Init(strings.NewReader(s)) sc.Mode = scanner.ScanIdents | scanner.ScanStrings | scanner.ScanInts var k Key var ks KeySet for { // scan section switch sc.Scan() { case scanner.String: u, _ := strconv.Unquote(sc.TokenText()) if len(u) == 0 { return nil, fmt.Errorf("part of key missing in %q", s) } k = append(k, u) case scanner.Ident, scanner.Int: k = append(k, sc.TokenText()) default: return nil, fmt.Errorf("bad formatting in %q", s) } // scan separator switch sc.Scan() { case '.': continue case ',': ks = append(ks, k) k = nil case scanner.EOF: return append(ks, k), nil default: return nil, fmt.Errorf("invalid separator in %q", s) } } }
// parseKey reads a private key from r. It returns a map[string]string, // with the key-value pairs, or an error when the file is not correct. func parseKey(r io.Reader, file string) (map[string]string, error) { var s scanner.Scanner m := make(map[string]string) c := make(chan lex) k := "" s.Init(r) s.Mode = 0 s.Whitespace = 0 // Start the lexer go klexer(s, c) for l := range c { // It should alternate switch l.value { case _KEY: k = l.token case _VALUE: if k == "" { return nil, &ParseError{file, "No key seen", l} } //println("Setting", strings.ToLower(k), "to", l.token, "b") m[strings.ToLower(k)] = l.token k = "" } } return m, nil }
func scanArray(s *scanner.Scanner, out chan Part, stop villa.Stop) (toStop bool) { if scanRune(s, out, stop, TP_ARRAY_START, '[') { return true } skipWhitespaces(s) if s.Peek() != ']' { for { if scanValue(s, out, stop) { return true } skipWhitespaces(s) if s.Peek() != ',' { break } if scanRune(s, out, stop, TP_COMMA, ',') { return true } skipWhitespaces(s) } } return scanRune(s, out, stop, TP_ARRAY_END, ']') }
func NewDecoder(r io.Reader) *Decoder { var scan scanner.Scanner scan.Init(r) dec := Decoder{scan: scan} scan.Error = dec.setError return &dec }
// Parse an id matcher // e.g. `#my-picture` func ParseIdMatcher(selector *CSSSelector, s scanner.Scanner) error { var id bytes.Buffer defer func() { regexpStr := `^` + regexp.QuoteMeta(id.String()) + `$` selector.Attrs["id"] = regexp.MustCompile(regexpStr) }() for { c := s.Next() switch c { case scanner.EOF: return nil case '.': return ParseClassMatcher(selector, s) case '#': return ParseIdMatcher(selector, s) case '[': return ParseAttrMatcher(selector, s) case ':': return ParsePseudo(selector, s) default: if _, err := id.WriteRune(c); err != nil { return err } } } }
func cmdFind(root *CmdNode, path string, level int) (*CmdNode, error) { var s scanner.Scanner s.Error = func(s *scanner.Scanner, msg string) { log.Printf("command scan error: %s [%s]", msg, path) } s.Init(strings.NewReader(path)) parent := root for tok := s.Scan(); tok != scanner.EOF; tok = s.Scan() { //log.Printf("cmdFind: token: [%s]", s.TokenText()) label := s.TokenText() children := matchChildren(parent.Children, label) size := len(children) if size < 1 { return nil, fmt.Errorf("cmdFind: not found: [%s] under [%s]", label, parent.Path) } if size > 1 { return nil, fmt.Errorf("cmdFind: ambiguous: [%s] under [%s]", label, parent.Path) } parent = children[0] } //log.Printf("cmdFind: found [%s] as [%s]", path, parent.Path) return parent, nil }
// Parse the selector after ':' func ParsePseudo(selector *CSSSelector, s scanner.Scanner) error { if selector.Pseudo != nil { return fmt.Errorf("Combined multiple pseudo classes") } var b bytes.Buffer for s.Peek() != scanner.EOF { if _, err := b.WriteRune(s.Next()); err != nil { return err } } cmd := b.String() var err error switch { case cmd == "empty": selector.Pseudo = func(n *html.Node) bool { return n.FirstChild == nil } case cmd == "first-child": selector.Pseudo = firstChildPseudo case cmd == "last-child": selector.Pseudo = lastChildPseudo case cmd == "only-child": selector.Pseudo = func(n *html.Node) bool { return firstChildPseudo(n) && lastChildPseudo(n) } case cmd == "first-of-type": selector.Pseudo = firstOfTypePseudo case cmd == "last-of-type": selector.Pseudo = lastOfTypePseudo case cmd == "only-of-type": selector.Pseudo = func(n *html.Node) bool { return firstOfTypePseudo(n) && lastOfTypePseudo(n) } case strings.HasPrefix(cmd, "contains("): selector.Pseudo, err = parseContainsPseudo(cmd[len("contains("):]) if err != nil { return err } case strings.HasPrefix(cmd, "nth-child("), strings.HasPrefix(cmd, "nth-last-child("), strings.HasPrefix(cmd, "nth-last-of-type("), strings.HasPrefix(cmd, "nth-of-type("): if selector.Pseudo, err = parseNthPseudo(cmd); err != nil { return err } case strings.HasPrefix(cmd, "not("): if selector.Pseudo, err = parseNotPseudo(cmd[len("not("):]); err != nil { return err } case strings.HasPrefix(cmd, "parent-of("): if selector.Pseudo, err = parseParentOfPseudo(cmd[len("parent-of("):]); err != nil { return err } default: return fmt.Errorf("%s not a valid pseudo class", cmd) } return nil }
func resolveImportsRec(asts []ast, path string, imported []string, download bool) ([]ast, error) { var newAsts []ast top := true // Imports are required to be at the top of the file. for _, ast := range asts { name := parseImport(ast) if name == "" { newAsts = append(newAsts, ast) top = false continue } if !top { return nil, errors.New("import must be begin the module") } // Check for any import cycles. for _, importedModule := range imported { if name == importedModule { return nil, fmt.Errorf("import cycle: %s", append(imported, name)) } } modulePath := filepath.Join(path, name+".spec") var sc scanner.Scanner sc.Filename = modulePath if _, err := os.Stat(modulePath); os.IsNotExist(err) && download { GetSpec(name) } f, err := util.Open(modulePath) if err != nil { return nil, fmt.Errorf("unable to open import %s", name) } defer f.Close() sc.Init(bufio.NewReader(f)) parsed, err := parse(sc) if err != nil { return nil, err } // Rename module name to last name in import path name = filepath.Base(name) parsed, err = resolveImportsRec(parsed, path, append(imported, name), download) if err != nil { return nil, err } module := astModule{body: parsed, moduleName: astString(name)} newAsts = append(newAsts, module) } return newAsts, nil }
// Skip N tokens, if possible. Returns true if it worked out. func Skip(tokenizer *scanner.Scanner, n int) bool { for counter := 0; counter < n; counter++ { toktype := tokenizer.Next() if toktype == scanner.EOF { return false } } return true }
func scanRune(s *scanner.Scanner, out chan Part, stop villa.Stop, tp int, exp rune) (toStop bool) { start := s.Pos() if r := s.Next(); r == scanner.EOF { return output(out, stop, TP_EOF_UNEXPECTED, start, s.Pos()) } else if r != exp { return output(out, stop, TP_ERROR, start, s.Pos()) } return output(out, stop, tp, start, s.Pos()) }
// New returns new lexer func New(r io.Reader) *Lex { var s scanner.Scanner s.Init(r) // only scan characters. implement lexer myself. s.Mode &^= scanner.ScanChars | scanner.ScanRawStrings return &Lex{ Scanner: &s, } }
// Consume the next rune in `n` and return an error if it's not `r`. func match(in *scanner.Scanner, r rune) error { var err error if c := in.Next(); c == scanner.EOF { err = parseErr(in, "Expected '%c', got EOF", r) } else if c != r { err = parseErr(in, "Expected '%c', got '%c'", r, c) } return err }
func newLexer(stream io.Reader, fn string) *lexer { var s scanner.Scanner s.Init(stream) s.Whitespace = 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' ' s.Filename = fn return &lexer{ scanner: s, } }
func scanTo1(s *scanner.Scanner, target rune) bool { for { switch s.Next() { case scanner.EOF: return false case target: return true } } }
func TestMapScanner(t *testing.T) { in := make(chan interface{}) go func() { reader := strings.NewReader(mapInput) s := scanner.Scanner{} s.Init(reader) tok := s.Scan() for tok != scanner.EOF { tok = s.Scan() in <- s.TokenText() } close(in) }() out := Map(in, func(input interface{}) interface{} { key := input.(string) if len(key) == 0 { return "" } else { return string(key[0]) } }) for tok := range out { for _, val := range tok.Values { if !strings.HasPrefix(val.(string), tok.Key.(string)) { t.Error(val.(string) + ` should has "` + tok.Key.(string) + `" prefix`) } } } }
func TestReduceScanner(t *testing.T) { in := make(chan interface{}) go func() { reader := strings.NewReader(mapInput) s := scanner.Scanner{} s.Init(reader) tok := s.Scan() for tok != scanner.EOF { tok = s.Scan() in <- s.TokenText() } close(in) }() mapOut := Map(in, func(input interface{}) interface{} { key := input.(string) if len(key) == 0 { return "" } else { return string(key[0]) } }) redOut := Reduce(mapOut, func(key interface{}, values []interface{}) interface{} { return len(values) }) for tok := range redOut { if tok.Value.(int) == 0 { t.Error("expect reduce should be positive") } } }
func split(expr string) (keys []string, err error) { var msgs []string var s scanner.Scanner s.Init(strings.NewReader(expr)) s.Mode = scanner.ScanIdents | scanner.ScanInts | scanner.ScanStrings s.Error = func(s *scanner.Scanner, msg string) { msgs = append(msgs, fmt.Sprintf("%s %s", s.Pos(), msg)) } key := "" keys = []string{} for err == nil { t := s.Peek() // fmt.Printf(">>> %s: %s %s\n", s.Pos(), scanner.TokenString(t), s.TokenText()) switch t { case '[': key, err = scanBracketedKey(&s) case '.': s.Scan() continue case scanner.EOF: goto end default: key, err = scanKey(&s) } if len(msgs) > 0 { err = errors.New(strings.Join(msgs, "\n")) } if err == nil { keys = append(keys, key) } } end: return }
// gofmtFlags looks for a comment of the form // // //gofmt flags // // within the first maxLines lines of the given file, // and returns the flags string, if any. Otherwise it // returns the empty string. func gofmtFlags(filename string, maxLines int) string { f, err := os.Open(filename) if err != nil { return "" // ignore errors - they will be found later } defer f.Close() // initialize scanner var s scanner.Scanner s.Init(f) s.Error = func(*scanner.Scanner, string) {} // ignore errors s.Mode = scanner.GoTokens &^ scanner.SkipComments // want comments // look for //gofmt comment for s.Line <= maxLines { switch s.Scan() { case scanner.Comment: const prefix = "//gofmt " if t := s.TokenText(); strings.HasPrefix(t, prefix) { return strings.TrimSpace(t[len(prefix):]) } case scanner.EOF: return "" } } return "" }
func TestUnwindScanner(t *testing.T) { in := make(chan interface{}) go func() { reader := strings.NewReader(unwindInput) s := scanner.Scanner{} s.Init(reader) tok := s.Scan() for tok != scanner.EOF { tok = s.Scan() in <- s.TokenText() } close(in) }() out := Unwind(in, func(input interface{}, output chan<- interface{}) { output <- strings.ToLower(input.(string)) output <- strings.ToUpper(input.(string)) }) index := 0 for tok := range out { if index%2 == 0 { if strings.ToLower(tok.(string)) != tok.(string) { t.Error(tok, " should be lower cased") } } else { if strings.ToUpper(tok.(string)) != tok.(string) { t.Error(tok, " should be upper cased") } } index++ } }
// Parse updates an inverted index with the file's words func (index *InvertedIndex) Parse(filename string) { if index.alreadyParsed(filename) { return } file, err := os.Open(filename) if err != nil { cwd, _ := os.Getwd() fmt.Printf("The file named %s doesn't exist, the current directory is %s\n", filename, cwd) return } defer file.Close() var s scanner.Scanner s.Init(file) var token rune for token != scanner.EOF { token = s.Scan() tokenText := s.TokenText() _, found := index.indexMap[tokenText] pos := s.Pos() result := Result{File: filename, Line: pos.Line, Index: pos.Column - len(tokenText)} if !found { index.indexMap[tokenText] = []Result{result} } else { index.indexMap[tokenText] = append(index.indexMap[tokenText], result) } } index.files = append(index.files, filename) }
func scanWord(s *scanner.Scanner, out chan Part, stop villa.Stop, word []rune) (toStop bool) { start := s.Pos() for i := 0; i < len(word); i++ { if r := s.Next(); r == scanner.EOF { return output(out, stop, TP_EOF_UNEXPECTED, start, s.Pos()) } else if r != word[i] { return output(out, stop, TP_ERROR, start, s.Pos()) } } return output(out, stop, TP_KEYWORD, start, s.Pos()) }
func copyNext(in *scanner.Scanner, out *bufio.Writer) error { c := in.Next() if out != nil { if n, err := out.WriteRune(c); err == nil && n < 1 { return fmt.Errorf("Couldn't write: %c", c) } else if err != nil { return err } } return nil }
// Parse a selector // e.g. `div#my-button.btn[href^="http"]` func ParseSelector(cmd string) (selector CSSSelector, err error) { selector = CSSSelector{ Tag: "", Attrs: map[string]*regexp.Regexp{}, Pseudo: nil, } var s scanner.Scanner s.Init(strings.NewReader(cmd)) err = ParseTagMatcher(&selector, s) return }
// Get the contents from an URL and return a tokenizer and a ReadCloser func getWebPageTokenizer(url string) (*scanner.Scanner, io.ReadCloser) { var client http.Client resp, err := client.Get(url) if err != nil { log.Println("Could not retrieve " + url) return nil, nil } var tokenizer scanner.Scanner tokenizer.Init(resp.Body) return &tokenizer, resp.Body }