// Parse an id matcher // e.g. `#my-picture` func ParseIdMatcher(selector *CSSSelector, s scanner.Scanner) error { var id bytes.Buffer defer func() { regexpStr := `^` + regexp.QuoteMeta(id.String()) + `$` selector.Attrs["id"] = regexp.MustCompile(regexpStr) }() for { c := s.Next() switch c { case scanner.EOF: return nil case '.': return ParseClassMatcher(selector, s) case '#': return ParseIdMatcher(selector, s) case '[': return ParseAttrMatcher(selector, s) case ':': return ParsePseudo(selector, s) default: if _, err := id.WriteRune(c); err != nil { return err } } } }
func (w *wordsStruct) addString(str string) { var scan scanner.Scanner scan.Init(strings.NewReader(str)) for scan.Peek() != scanner.EOF { w.addChar(scan.Next()) } }
// Parse the initial tag // e.g. `div` func ParseTagMatcher(selector *CSSSelector, s scanner.Scanner) error { tag := bytes.NewBuffer([]byte{}) defer func() { selector.Tag = tag.String() }() for { c := s.Next() switch c { case scanner.EOF: return nil case '.': return ParseClassMatcher(selector, s) case '#': return ParseIdMatcher(selector, s) case '[': return ParseAttrMatcher(selector, s) case ':': return ParsePseudo(selector, s) default: if _, err := tag.WriteRune(c); err != nil { return err } } } }
func (d *Dict) expandDirective(in *scanner.Scanner, out *bufio.Writer) error { var err error c := in.Next() switch c { case lDelim: switch in.Peek() { case lDelim: return copyNext(in, out) case condDelim: in.Next() err = d.expandCond(in, out) default: if err = d.expandVar(in, out); err == nil { err = match(in, rDelim) } } case rDelim: if err = writeString(out, "}"); err == nil { err = match(in, rDelim) } case scanner.EOF: err = parseErr(in, "Expected '%c' or '%c', got EOF", lDelim, rDelim) default: err = parseErr(in, "Expected '%c' or '%c', got '%c'", lDelim, rDelim, c) } return err }
// Parse the selector after ':' func ParsePseudo(selector *CSSSelector, s scanner.Scanner) error { if selector.Pseudo != nil { return fmt.Errorf("Combined multiple pseudo classes") } var b bytes.Buffer for s.Peek() != scanner.EOF { if _, err := b.WriteRune(s.Next()); err != nil { return err } } cmd := b.String() var err error switch { case cmd == "empty": selector.Pseudo = func(n *html.Node) bool { return n.FirstChild == nil } case cmd == "first-child": selector.Pseudo = firstChildPseudo case cmd == "last-child": selector.Pseudo = lastChildPseudo case cmd == "only-child": selector.Pseudo = func(n *html.Node) bool { return firstChildPseudo(n) && lastChildPseudo(n) } case cmd == "first-of-type": selector.Pseudo = firstOfTypePseudo case cmd == "last-of-type": selector.Pseudo = lastOfTypePseudo case cmd == "only-of-type": selector.Pseudo = func(n *html.Node) bool { return firstOfTypePseudo(n) && lastOfTypePseudo(n) } case strings.HasPrefix(cmd, "contains("): selector.Pseudo, err = parseContainsPseudo(cmd[len("contains("):]) if err != nil { return err } case strings.HasPrefix(cmd, "nth-child("), strings.HasPrefix(cmd, "nth-last-child("), strings.HasPrefix(cmd, "nth-last-of-type("), strings.HasPrefix(cmd, "nth-of-type("): if selector.Pseudo, err = parseNthPseudo(cmd); err != nil { return err } case strings.HasPrefix(cmd, "not("): if selector.Pseudo, err = parseNotPseudo(cmd[len("not("):]); err != nil { return err } case strings.HasPrefix(cmd, "parent-of("): if selector.Pseudo, err = parseParentOfPseudo(cmd[len("parent-of("):]); err != nil { return err } default: return fmt.Errorf("%s not a valid pseudo class", cmd) } return nil }
func parseText(s *scanner.Scanner, depth int) ([]ast, error) { var slice []ast for { switch s.Scan() { case '+', '-', '/', '%', '*', '=', '<', '>', '!': slice = append(slice, parseIdent(s.TokenText())) case scanner.Ident: ident := s.TokenText() // Periods are allowed in package names. for s.Peek() == '.' { s.Next() ident += "." if s.Scan() != scanner.Ident { return nil, stitchError{pos: s.Pos(), err: fmt.Errorf("bad ident name: %s", ident)} } ident += s.TokenText() } slice = append(slice, parseIdent(ident)) case scanner.Float: x, _ := strconv.ParseFloat(s.TokenText(), 64) slice = append(slice, astFloat(x)) case scanner.Int: x, _ := strconv.Atoi(s.TokenText()) slice = append(slice, astInt(x)) case scanner.String: str := strings.Trim(s.TokenText(), "\"") slice = append(slice, astString(str)) case '(': // We need to save our position before recursing because the // scanner will have moved on by the time the recursive call // returns. pos := s.Pos() sexp, err := parseText(s, depth+1) if err != nil { return nil, err } slice = append(slice, astSexp{sexp: sexp, pos: pos}) case ')': if depth == 0 { return nil, stitchError{s.Pos(), errUnbalancedParens} } return slice, nil case scanner.EOF: if depth != 0 { return nil, stitchError{s.Pos(), errUnbalancedParens} } return slice, nil default: return nil, stitchError{s.Pos(), fmt.Errorf("bad element: %s", s.TokenText())} } } }
// Consume the next rune in `n` and return an error if it's not `r`. func match(in *scanner.Scanner, r rune) error { var err error if c := in.Next(); c == scanner.EOF { err = parseErr(in, "Expected '%c', got EOF", r) } else if c != r { err = parseErr(in, "Expected '%c', got '%c'", r, c) } return err }
// Skip N tokens, if possible. Returns true if it worked out. func Skip(tokenizer *scanner.Scanner, n int) bool { for counter := 0; counter < n; counter++ { toktype := tokenizer.Next() if toktype == scanner.EOF { return false } } return true }
func scanRune(s *scanner.Scanner, out chan Part, stop villa.Stop, tp int, exp rune) (toStop bool) { start := s.Pos() if r := s.Next(); r == scanner.EOF { return output(out, stop, TP_EOF_UNEXPECTED, start, s.Pos()) } else if r != exp { return output(out, stop, TP_ERROR, start, s.Pos()) } return output(out, stop, tp, start, s.Pos()) }
func scanTo1(s *scanner.Scanner, target rune) bool { for { switch s.Next() { case scanner.EOF: return false case target: return true } } }
func scanWord(s *scanner.Scanner, out chan Part, stop villa.Stop, word []rune) (toStop bool) { start := s.Pos() for i := 0; i < len(word); i++ { if r := s.Next(); r == scanner.EOF { return output(out, stop, TP_EOF_UNEXPECTED, start, s.Pos()) } else if r != word[i] { return output(out, stop, TP_ERROR, start, s.Pos()) } } return output(out, stop, TP_KEYWORD, start, s.Pos()) }
func (d *Dict) expandCond(in *scanner.Scanner, writer *bufio.Writer) error { expand, err := d.evalBool(in) if err != nil { return err } var out *bufio.Writer expanded := false for err == nil && !isEOF(in) { if !expanded && expand { out = writer expanded = true } if err = copyUntilDelim(in, out); err != nil { break } if in.Peek() == lDelim { in.Next() switch in.Peek() { case lDelim: err = copyNext(in, out) case condDelim: in.Next() if in.Peek() == rDelim { in.Next() return nil } else { err = d.expandCond(in, out) } case condElsif: in.Next() out = nil if in.Peek() == rDelim { in.Next() expand = true } else { expand, err = d.evalBool(in) } default: err = d.expandVar(in, out) if out == nil || err == nil { err = match(in, rDelim) } } } else { err = d.expandDirective(in, out) } } return err }
func copyNext(in *scanner.Scanner, out *bufio.Writer) error { c := in.Next() if out != nil { if n, err := out.WriteRune(c); err == nil && n < 1 { return fmt.Errorf("Couldn't write: %c", c) } else if err != nil { return err } } return nil }
func scanTo2(s *scanner.Scanner, target0, target1 rune) bool { for { switch s.Next() { case scanner.EOF: return false case target0: if s.Peek() == target1 { s.Next() // skip targe1 return true } } } }
func parseVariable(sc *scanner.Scanner) (result []byte, err error) { delims := []byte{byte(sc.Next())} if ch := sc.Peek(); ch == '{' { delims = append(delims, byte(sc.Next())) } name, err := parseName(sc) if err == nil && len(delims) > 1 && '}' != byte(sc.Peek()) { err = errInvalidSyntax } if err != nil { name = append(delims, name...) if len(delims) > 1 { name = append(name, byte(sc.Next())) } return name, err } if len(delims) > 1 { sc.Next() } return name, err }
func Tokenize(r io.Reader) []string { var scr1 scanner.Scanner scr1.Init(r) list1 := make([]string, 0, 100) for rune1 := scr1.Next(); rune1 != scanner.EOF; { for rune1 != scanner.EOF && unicode.IsSpace(rune1) { rune1 = scr1.Next() } if rune1 == '(' { list1 = append(list1, "(") rune1 = scr1.Next() } else if rune1 == ')' { list1 = append(list1, ")") rune1 = scr1.Next() } else { var token string token, rune1 = readtoken(&scr1, rune1) if token != "" { list1 = append(list1, token) } } } return list1 }
func readVar(in *scanner.Scanner) (s string, err error) { var buf bytes.Buffer for isVarRune(in.Peek()) { buf.WriteRune(in.Next()) } s = buf.String() if in.Peek() != rDelim { err = parseErr(in, "Unexpected character '%c'", in.Next()) } else if len(s) == 0 { err = parseErr(in, "Empty variable") } return }
func scanKeyword(s *scanner.Scanner, out chan Part, stop villa.Stop) (toStop bool) { start := s.Pos() switch s.Peek() { case scanner.EOF: s.Next() return output(out, stop, TP_EOF_UNEXPECTED, start, s.Pos()) case 't': return scanWord(s, out, stop, []rune("true")) case 'f': return scanWord(s, out, stop, []rune("false")) case 'n': return scanWord(s, out, stop, []rune("null")) } s.Next() return output(out, stop, TP_ERROR, start, s.Pos()) }
func parseName(sc *scanner.Scanner) (result []byte, err error) { if ch := sc.Peek(); scanner.EOF == ch { return result, errInvalidSyntax } for { if ch := sc.Peek(); unicode.IsLetter(ch) || unicode.IsDigit(ch) || '_' == ch { result = append(result, byte(sc.Next())) } else { if len(result) == 0 { err = errInvalidSyntax } return result, err } } }
func readtoken(r *scanner.Scanner, rune1 rune) (string, rune) { var buf1 bytes.Buffer quote := false for rune1 != scanner.EOF { if !quote { if rune1 == ')' || rune1 == '(' || unicode.IsSpace(rune1) { break } } if rune1 == '"' { quote = !quote } buf1.WriteRune(rune1) rune1 = r.Next() } return buf1.String(), rune1 }
// Parse a :contains("") selector // expects the input to be everything after the open parenthesis // e.g. for `contains("Help")` the argument would be `"Help")` func parseContainsPseudo(cmd string) (PseudoClass, error) { var s scanner.Scanner s.Init(strings.NewReader(cmd)) switch s.Next() { case '"': default: return nil, fmt.Errorf("Malformed 'contains(\"\")' selector") } textToContain := bytes.NewBuffer([]byte{}) for { r := s.Next() switch r { case '"': // ')' then EOF must follow '"' if s.Next() != ')' { return nil, fmt.Errorf("Malformed 'contains(\"\")' selector") } if s.Next() != scanner.EOF { return nil, fmt.Errorf("'contains(\"\")' must end selector") } text := textToContain.String() contains := func(node *html.Node) bool { for c := node.FirstChild; c != nil; c = c.NextSibling { if c.Type == html.TextNode { if strings.Contains(c.Data, text) { return true } } } return false } return contains, nil case '\\': s.Next() case scanner.EOF: return nil, fmt.Errorf("Malformed 'contains(\"\")' selector") default: if _, err := textToContain.WriteRune(r); err != nil { return nil, err } } } }
func ParseTags(s *scanner.Scanner, g *Game) error { //fmt.Println("starting tags parse") run := s.Peek() for run != scanner.EOF { switch run { case '[', ']', '\n', '\r': run = s.Next() case '1': return nil default: s.Scan() tag := s.TokenText() s.Scan() val := s.TokenText() //fmt.Println("tag:", tag, "; val:", val) g.Tags[tag] = strings.Trim(val, "\"") } run = s.Peek() } return nil }
func scanTo3(s *scanner.Scanner, target0, target1, target2 rune) bool { for { // match 0, 1 if !scanTo2(s, target0, target1) { // EOF return false } switch s.Peek() { case scanner.EOF: s.Next() return false case target2: // matched 2, found s.Next() return true case target1: if target0 == target1 { loop: for { switch s.Next() { case scanner.EOF: return false case target2: return true case target0: // keep scanning default: // not found, go to out loop break loop } } } } } }
// ProcessConfig replaces references of environment varialbes for the given data // Support variable syntax: $varname, ${varname} func ProcessConfig(data []byte, e *env.Env, escapeChar rune) ([]byte, error) { var result []byte var sc scanner.Scanner sc.Init(bytes.NewReader(data)) DONE: for { switch ch := sc.Peek(); ch { default: result = append(result, byte(sc.Next())) case scanner.EOF: break DONE case escapeChar: curr, next := sc.Next(), sc.Peek() if next != '$' { result = append(result, byte(curr)) } if next != scanner.EOF { result = append(result, byte(sc.Next())) } case '$': name, err := parseVariable(&sc) if err != nil { pos := sc.Pos() return result, fmt.Errorf(`parseError:%d:%d: %v %q`, pos.Line, pos.Offset, err, name) } result = append(result, e.Get(string(name))...) } } return result, nil }
func scanString(s *scanner.Scanner, out chan Part, stop villa.Stop) (toStop bool) { start := s.Pos() // start quote if r := s.Next(); r == scanner.EOF { return output(out, stop, TP_EOF_UNEXPECTED, start, s.Pos()) } else if r != '"' { return output(out, stop, TP_ERROR, start, s.Pos()) } // body for s.Peek() != '"' { if r := s.Next(); r == scanner.EOF { return output(out, stop, TP_EOF_UNEXPECTED, start, s.Pos()) } else if r == '\\' { switch s.Next() { case scanner.EOF: return output(out, stop, TP_EOF_UNEXPECTED, start, s.Pos()) case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': // just ok case 'u': for i := 0; i < 4; i++ { r := s.Next() if r == scanner.EOF { return output(out, stop, TP_EOF_UNEXPECTED, start, s.Pos()) } if !isHexadecimal(r) { return output(out, stop, TP_ERROR, start, s.Pos()) } } default: return output(out, stop, TP_ERROR, start, s.Pos()) } } } // end quote s.Next() return output(out, stop, TP_STRING, start, s.Pos()) }
func parseXML(sc *scanner.Scanner) (*Token, bool) { var entity = new(bytes.Buffer) token := new(Token) // Skip the '<' sc.Scan() switch sc.Peek() { case '/': token.Type = XMLEndToken sc.Next() case '!': log.Tracef("parseXML skipping comment") next := sc.Next() for next != '>' { next = sc.Next() } return nil, false default: token.Type = XMLStartToken } log.Tracef("parseXML creating %s element", token.Type) for { tok := sc.Scan() log.Tracef("parseXML found %s. Token is %v. Entity is: '%s'", sc.TokenText(), tok, entity.String()) switch { case tok == '>': token.Text = entity.String() return token, true case unicode.IsSpace(tok): return nil, false default: log.Tracef("parseXML appending %s to string", sc.TokenText()) entity.WriteString(sc.TokenText()) } } }
// regexpMatch tries to match the logic of filepath.Match but // does so using regexp logic. We do this so that we can expand the // wildcard set to include other things, like "**" to mean any number // of directories. This means that we should be backwards compatible // with filepath.Match(). We'll end up supporting more stuff, due to // the fact that we're using regexp, but that's ok - it does no harm. func regexpMatch(pattern, path string) (bool, error) { regStr := "^" // Do some syntax checking on the pattern. // filepath's Match() has some really weird rules that are inconsistent // so instead of trying to dup their logic, just call Match() for its // error state and if there is an error in the pattern return it. // If this becomes an issue we can remove this since its really only // needed in the error (syntax) case - which isn't really critical. if _, err := filepath.Match(pattern, path); err != nil { return false, err } // Go through the pattern and convert it to a regexp. // We use a scanner so we can support utf-8 chars. var scan scanner.Scanner scan.Init(strings.NewReader(pattern)) sl := string(os.PathSeparator) escSL := sl if sl == `\` { escSL += `\` } for scan.Peek() != scanner.EOF { ch := scan.Next() if ch == '*' { if scan.Peek() == '*' { // is some flavor of "**" scan.Next() if scan.Peek() == scanner.EOF { // is "**EOF" - to align with .gitignore just accept all regStr += ".*" } else { // is "**" regStr += "((.*" + escSL + ")|([^" + escSL + "]*))" } // Treat **/ as ** so eat the "/" if string(scan.Peek()) == sl { scan.Next() } } else { // is "*" so map it to anything but "/" regStr += "[^" + escSL + "]*" } } else if ch == '?' { // "?" is any char except "/" regStr += "[^" + escSL + "]" } else if strings.Index(".$", string(ch)) != -1 { // Escape some regexp special chars that have no meaning // in golang's filepath.Match regStr += `\` + string(ch) } else if ch == '\\' { // escape next char. Note that a trailing \ in the pattern // will be left alone (but need to escape it) if sl == `\` { // On windows map "\" to "\\", meaning an escaped backslash, // and then just continue because filepath.Match on // Windows doesn't allow escaping at all regStr += escSL continue } if scan.Peek() != scanner.EOF { regStr += `\` + string(scan.Next()) } else { regStr += `\` } } else { regStr += string(ch) } } regStr += "$" res, err := regexp.MatchString(regStr, path) // Map regexp's error to filepath's so no one knows we're not using filepath if err != nil { err = filepath.ErrBadPattern } return res, err }
func skipWhiteSpace(s *scanner.Scanner) { for isWhiteSpace(s.Peek()) { s.Next() } }
func scanBlock(s *scanner.Scanner) (blockType int, name string) { if s.Peek() != '<' { for s.Peek() != scanner.EOF && s.Peek() != '<' { s.Next() } return TP_FINAL, "" } // '<' s.Next() switch tp := s.Next(); tp { case '?': // PI // to find ?> scanTo2(s, '?', '>') return TP_FINAL, "" case '!': switch s.Next() { case scanner.EOF: // malformed case '[': // <![CDATA // find ]]> scanTo3(s, ']', ']', '>') case '-': // comments // find --> scanTo3(s, '-', '-', '>') return TP_COMMENT, "" default: // Attribute-List // find > scanTo1(s, '>') } return TP_FINAL, "" case '/': // end tag name := make([]rune, 0, 8) for { r := s.Next() if r == scanner.EOF || r == '>' { break } if isWhiteSpace(r) { scanTo1(s, '>') break } name = append(name, r) } if len(name) == 0 { // malformed return TP_FINAL, "" } return TP_END, string(name) case '>': // malformed return TP_FINAL, "" default: // start tag name := []rune{tp} for { r := s.Next() if r == scanner.EOF || r == '>' { break } if r == '/' { if s.Peek() == '>' { s.Next() return TP_FINAL, string(name) } } if isWhiteSpace(r) { loop: for { switch s.Next() { case scanner.EOF: return TP_FINAL, string(name) case '/': if s.Peek() == '>' { s.Next() return TP_FINAL, string(name) } case '>': break loop } } break } name = append(name, r) } return TP_START, string(name) } }
// Parse an attribute matcher // e.g. `[attr^="http"]` func ParseAttrMatcher(selector *CSSSelector, s scanner.Scanner) error { var attrKey bytes.Buffer var attrVal bytes.Buffer hasMatchVal := false matchType := '=' defer func() { if hasMatchVal { var regexpStr string switch matchType { case '=': regexpStr = `^` + regexp.QuoteMeta(attrVal.String()) + `$` case '*': regexpStr = regexp.QuoteMeta(attrVal.String()) case '$': regexpStr = regexp.QuoteMeta(attrVal.String()) + `$` case '^': regexpStr = `^` + regexp.QuoteMeta(attrVal.String()) case '~': regexpStr = `(\A|\s)` + regexp.QuoteMeta(attrVal.String()) + `(\s|\z)` } selector.Attrs[attrKey.String()] = regexp.MustCompile(regexpStr) } else { selector.Attrs[attrKey.String()] = regexp.MustCompile(`^.*$`) } }() // After reaching ']' proceed proceed := func() error { switch s.Next() { case scanner.EOF: return nil case '.': return ParseClassMatcher(selector, s) case '#': return ParseIdMatcher(selector, s) case '[': return ParseAttrMatcher(selector, s) case ':': return ParsePseudo(selector, s) default: return fmt.Errorf("Expected selector indicator after ']'") } } // Parse the attribute key matcher for !hasMatchVal { c := s.Next() switch c { case scanner.EOF: return fmt.Errorf("Unmatched open brace '['") case ']': // No attribute value matcher, proceed! return proceed() case '$', '^', '~', '*': matchType = c hasMatchVal = true if s.Next() != '=' { return fmt.Errorf("'%c' must be followed by a '='", matchType) } case '=': matchType = c hasMatchVal = true default: if _, err := attrKey.WriteRune(c); err != nil { return err } } } // figure out if the value is quoted c := s.Next() inQuote := false switch c { case scanner.EOF: return fmt.Errorf("Unmatched open brace '['") case ']': return proceed() case '"': inQuote = true default: if _, err := attrVal.WriteRune(c); err != nil { return err } } if inQuote { for { c := s.Next() switch c { case '\\': // consume another character if c = s.Next(); c == scanner.EOF { return fmt.Errorf("Unmatched open brace '['") } case '"': switch s.Next() { case ']': return proceed() default: return fmt.Errorf("Quote must end at ']'") } } if _, err := attrVal.WriteRune(c); err != nil { return err } } } else { for { c := s.Next() switch c { case scanner.EOF: return fmt.Errorf("Unmatched open brace '['") case ']': // No attribute value matcher, proceed! return proceed() } if _, err := attrVal.WriteRune(c); err != nil { return err } } } }