func parseCombinator(rdr io.ByteScanner, p *Link) error { rdr.UnreadByte() for c, err := rdr.ReadByte(); err != io.EOF; c, err = rdr.ReadByte() { if err != nil { return err } switch c { case '{': rdr.UnreadByte() return EOS case ',': return fmt.Errorf("Encountered ',' after combinator") case ' ', '\t', '\n', '\r', '\f': case '>', '+', '~': if p.Combinator == Descendant { p.Combinator = combinatorMap[c] } else { return fmt.Errorf("Can't combine multiple combinators") } default: rdr.UnreadByte() return nil } } return nil }
func (t *Terminal) Read(r io.ByteScanner) error { c, err := r.ReadByte() if err != nil { return err } switch { case c == 0x7: // bell // ignore case c == 0x8: // backspace if t.Col > 0 { t.Col-- } case c == 0x1b: return t.readEscape(r) case c == '\r': t.Col = 0 case c == '\n': t.Col = 0 t.Row++ t.fixPosition() case c == '\t': t.Col += 8 - (t.Col % 8) t.fixPosition() case c >= ' ' && c <= '~': t.writeRune(rune(c), t.Attr) default: r.UnreadByte() return t.readUTF8(r) } return nil }
func readString(buf io.ByteScanner) Literal { strbuf := []byte{} loop: for { b, err := buf.ReadByte() if err != nil { break } switch b { case '"': return String(string(strbuf)) case '\\': b, err := buf.ReadByte() if err != nil { break loop } switch b { case 't': strbuf = append(strbuf, '\t') case 'n': strbuf = append(strbuf, '\n') case '\\': strbuf = append(strbuf, '\\') case '"': strbuf = append(strbuf, '"') default: panic("read: unknown escape") } default: strbuf = append(strbuf, b) } } panic("read: unterminated string") return String("") }
func read(buf io.ByteScanner) Literal { skipws(buf) b, err := buf.ReadByte() if err == io.EOF { panic("read: premature end of file") } if err != nil { panic("read: input error: " + err.Error()) } switch b { case ')': panic("read: unmatched close-parenthesis") case '"': return readString(buf) case '\'': return readQuote(buf) case '`': return readQuasi(buf) case ',': return readComma(buf) case '&': return readAmpersand(buf) case '(': return readList(buf) } buf.UnreadByte() return readAtom(buf) }
func readAtom(buf io.ByteScanner) Literal { atomBuf := []byte{} loop: for { b, err := buf.ReadByte() if err != nil { break } switch b { case '(', ')', '\'', '"', ' ', '\t', '\n': buf.UnreadByte() break loop } atomBuf = append(atomBuf, b) } if len(atomBuf) == 0 { panic("read: empty atom") } atom := string(atomBuf) n, err := strconv.ParseFloat(atom, 64) if err == nil { return Number(n) } return Intern(atom) }
func nextLine(buf io.ByteScanner) { for { b, err := buf.ReadByte() if err != nil || b == '\n' { break } } }
func TestMockByteScanner(t *testing.T) { var s io.ByteScanner m := &ByteScannerMock{} s = m m.On("ReadByte").Return(byte('_'), nil) b, err := s.ReadByte() assert.Equal(t, byte('_'), b) assert.Equal(t, nil, err) }
func readTerminator(r io.ByteScanner, term byte) (bool, error) { tok, err := r.ReadByte() if err != nil { return false, err } else if tok == term { return true, nil } return false, r.UnreadByte() }
func (t *Terminal) expect(r io.ByteScanner, exp byte) (bool, error) { c, err := r.ReadByte() if err != nil { return false, err } ok := c == exp if !ok { log.Printf("expect %s failed, got %s", showChar(exp), showChar(c)) } return ok, nil }
func readAmpersand(buf io.ByteScanner) Literal { skipws(buf) b, err := buf.ReadByte() if err != nil { panic("read: incomplete input") } if b != '(' { panic("read: ill-formed ampersand") } x := readList(buf) return newListLiteral(Intern("ampersand"), x) }
func parseSequence(rdr io.ByteScanner) (Sequence, error) { seq := []SimpleSelector{} rdr.UnreadByte() for c, err := rdr.ReadByte(); err != io.EOF; c, err = rdr.ReadByte() { if err != nil { return nil, err } switch c { case '*': seq = append(seq, SimpleSelector{Type: Universal}) case '#': sel := SimpleSelector{Type: Id, AttrName: "id"} if err := parseSimpleSelector(rdr, &sel); err != nil { return nil, err } seq = append(seq, sel) case '.': sel := SimpleSelector{Type: Class, AttrName: "class"} if err := parseSimpleSelector(rdr, &sel); err != nil { return nil, err } seq = append(seq, sel) case ':': sel := SimpleSelector{Type: PseudoClass} if err := parseSimpleSelector(rdr, &sel); err != nil { return nil, err } seq = append(seq, sel) case '[': sel := SimpleSelector{Type: Attr} if err := parseSimpleAttr(rdr, &sel); err != nil { return nil, err } seq = append(seq, sel) case '{': rdr.UnreadByte() return seq, EOS case ' ', '\t', '\n', '\r', '\f', '>', '+', '~': rdr.UnreadByte() return seq, nil default: sel := SimpleSelector{Type: Tag, Tag: string(c)} if err := parseSimpleTag(rdr, &sel); err != nil { return nil, err } seq = append(seq, sel) } } return seq, nil }
func (t *Terminal) readTo(r io.ByteScanner, end byte) ([]byte, error) { var buf []byte for i := 0; i < 1000; i++ { c, err := r.ReadByte() if err != nil { return nil, err } if c == end { return buf, nil } buf = append(buf, c) } return nil, fmt.Errorf("term: readTo(%s) overlong", showChar(end)) }
func readComma(buf io.ByteScanner) Literal { b, err := buf.ReadByte() if err != nil { panic("read: incomplete comma") } tag := Intern("unquote") if b == '@' { tag = Intern("unquotesplicing") } else { buf.UnreadByte() } x := read(buf) return newListLiteral(tag, x) }
func scanSign(r io.ByteScanner) (neg bool, err error) { var ch byte if ch, err = r.ReadByte(); err != nil { return false, err } switch ch { case '-': neg = true case '+': // nothing to do default: r.UnreadByte() } return }
func (t *Terminal) readInt(r io.ByteScanner) (int, error) { n := 0 for i := 0; i < 20; i++ { c, err := r.ReadByte() if err != nil { return -1, err } if c >= '0' && c <= '9' { n = n*10 + int(c) - '0' } else { r.UnreadByte() return n, err } } return -1, fmt.Errorf("term: readInt overlong") }
func skipws(buf io.ByteScanner) { for { b, err := buf.ReadByte() if err != nil { break } switch b { case ' ', '\t', '\n': continue case ';': nextLine(buf) continue } buf.UnreadByte() break } }
func readList(buf io.ByteScanner) Literal { skipws(buf) items := []Literal{} for { b, err := buf.ReadByte() if err != nil { panic("read: premature end of file") } if b == ')' { break } buf.UnreadByte() items = append(items, read(buf)) skipws(buf) } return newListLiteral(items...) }
// parseExampleLine parses an example line. It does not parse any wiki syntax. func (p *Parser) parseExampleLine(r io.ByteScanner) { p.startElement("Example") defer p.endElement() p.startElement("Text") defer p.endElement() for { c, err := r.ReadByte() if err != nil { return } if c == '\n' { return } p.current.Text += string(c) p.nextColumn() } }
func parseSimpleSelector(rdr io.ByteScanner, sel *SimpleSelector) error { b, err := rdr.ReadByte() if err != nil && err != EOS { return err } bs, err := consumeValue(rdr) if err != nil && err != EOS { return err } bs = append([]byte{b}, bs...) if sel.Type == PseudoClass && bs[0] == ':' { sel.Type = PseudoElement bs = bs[1:] } sel.Value = string(bs) return err }
func (t *Terminal) readUTF8(r io.ByteScanner) error { c, err := r.ReadByte() if err != nil { return err } attr := t.Attr var uc rune n := 0 switch { case c&0xE0 == 0xB0: uc = rune(c & 0x1F) n = 2 case c&0xF0 == 0xE0: uc = rune(c & 0x0F) n = 3 default: if c&0xF0 == 0xF0 { log.Printf("term: not yet implemented: utf8 start %#v", c) } attr.SetInverse(true) t.writeRune('@', attr) return nil } for i := 1; i < n; i++ { c, err := r.ReadByte() if err != nil { return err } if c&0xC0 != 0x80 { log.Printf("term: not yet implemented: utf8 continuation %#v", c) attr.SetInverse(true) uc = '@' break } uc = uc<<6 | rune(c&0x3F) } t.writeRune(uc, attr) return nil }
func parseChain(rdr io.ByteScanner, chn *Chain) error { for c, err := rdr.ReadByte(); err != io.EOF; c, err = rdr.ReadByte() { if err != nil { return err } switch c { case ',': return fmt.Errorf("Parser does not handle groups") case ' ', '\t', '\n', '\r', '\f', '>', '+', '~': if chn.Head == nil { return fmt.Errorf("Starting selector chain with combinator %c", c) } part := Link{} if err := parseCombinator(rdr, &part); err != nil { return err } chn.Tail = append(chn.Tail, part) default: if chn.Head == nil { chn.Head, err = parseSequence(rdr) if err != nil && err != io.EOF { return err } } else { last := last(chn.Tail) if last != nil { last.Sequence, err = parseSequence(rdr) if err != nil && err != io.EOF { return err } } else { return fmt.Errorf( "Attempt to add tail seqence without combinator char: %c", c) } } } } return nil }
// parseHeaderBullet parses a bullet of the header. func (p *Parser) parseHeaderBullet(r io.ByteScanner) { level := 0 for { c, err := r.ReadByte() if err != nil { p.addError("ParseHeaderBullet", "unexpected EOF") return } if c == '*' { level += 1 p.nextColumn() } else if c == ' ' { p.nextColumn() break } else { p.nextColumn() p.addError("ParseHeaderBullet", "* or space expected") return } } p.current.Attr["level"] = fmt.Sprint(level) }
// parseDocument parses a document. func (p *Parser) parseDocument(r io.ByteScanner) { _, err := r.ReadByte() if err != nil { return } r.UnreadByte() p.root = addElement(nil, "Document") p.current = p.root for { _, err := r.ReadByte() if err != nil { break } r.UnreadByte() p.parseLine(r) } }
func consumeValue(rdr io.ByteScanner) ([]byte, error) { bs := []byte{} for c, err := rdr.ReadByte(); err != io.EOF; c, err = rdr.ReadByte() { if err != nil { return nil, err } switch c { case '{': rdr.UnreadByte() return bs, EOS case '>', '+', '~', ' ', '\t', '\n', '\f', ',', '.', '#', '[', ':': rdr.UnreadByte() return bs, nil default: bs = append(bs, c) } } return bs, nil }
// ReadGrid reads a grid from s using the same format as WriteTo. func ReadGrid(s io.ByteScanner) (Grid, error) { g := NewGrid() for r := 0; r < GridSize; r++ { for c := 0; c < GridSize; c++ { // read number b, err := s.ReadByte() if err != nil { return g, fmt.Errorf("failed to read sudoku grid row %d: %s", r+1, err) } if b == '_' { g[r][c] = Cell{} } else if b >= '1' && b <= '9' { g[r][c].resolve(b - '0') } else { return g, fmt.Errorf("fot a number %c at row %d", b, r+1) } if c != GridSize-1 { // read space b, err = s.ReadByte() if err != nil { return g, fmt.Errorf("failed to read sudoku grid row %d: %s", r+1, err) } if b != ' ' { return g, fmt.Errorf("unexpected character '%c' at row %d", b, r+1) } } else { // read newline b, err = s.ReadByte() if r == GridSize-1 && err == io.EOF { break // TODO: return EOF here? } if err != nil { return g, fmt.Errorf("failed to read sudoku grid row %d: %s", r+1, err) } if b != '\n' { // TODO: support Windows and MAC new lines return g, fmt.Errorf("unexpected character '%c' at row %d", b, r+1) } } } } return g, nil }
// Read from reader while f holds true, unread bytes if reset == true func readWhile(reader io.ByteScanner, f func(b byte) bool, reset bool) string { s := "" for { b, err := reader.ReadByte() if err != nil { return s } if !f(b) { reader.UnreadByte() return s } if reset { defer reader.UnreadByte() } s += string(b) } return s }
func parseSimpleAttr(rdr io.ByteScanner, sel *SimpleSelector) error { var name []byte var value []byte var c1 byte = 0 for c2, err := rdr.ReadByte(); err != io.EOF; c2, err = rdr.ReadByte() { if err != nil { return err } switch c2 { case ']': sel.AttrName = string(name) sel.Value = string(value) return nil case '=': if c1 == '~' { sel.AttrMatch = Contains } else if c1 == '|' { sel.AttrMatch = DashPrefix } else { sel.AttrMatch = Exactly } case '{': rdr.UnreadByte() return EOS case '~': case '|': // TODO(jwall): Substring matchers default: if sel.AttrMatch == Presence { name = append(name, c2) } else { value = append(value, c2) } } c1 = c2 } return fmt.Errorf("Didn't close Attribute Matcher") }
// Match provides a simple sequential ASCII text matcher. // It is specialized for processing well formed, structured printable ASCII. // s yields input bytes and p yields pattern bytes. // n and o correspond to the number of bytes read from s and p, respectively. // If m is non-nil, len(m) should be greater than or equal to the number of // capture-bytes in the pattern. // // Patterns: // * Printable ASCII bytes will be matched literally. // * All groups (specified by non-printable bytes in the pattern stream) // are non-greedy, and match zero or more characters. // * \x00: ASCII whitespace bytes. // * \x01: non-whitespace ASCII bytes. // * \xa0: lowercase letters. // * \xa1: uppercase letters. // * \xaf: letters. // * \xd0: decimal digits. // * \xd6: hexadecimal digits. // * \xd8: octal digits. // * \xd3: base-36 digits. // * \xfe: printable ASCII bytes. // * \xff: 8-bit bytes. // * All groups are capturing beside \x00. // * Use of other non-printable or non-ASCII bytes is undefined. func Match(s, p io.ByteScanner, m []string) (n, o int, err error) { var ( quit = false buf = make([]byte, 0, 1024) a, b byte c byte v int ) for !quit { a, err = p.ReadByte() switch { case err == io.EOF: err = nil fallthrough case err != nil: return } o++ if tab[a]&prg == 0 { c, err = s.ReadByte() if err != nil { return } n++ if a == c { continue } else if err = s.UnreadByte(); err == nil { err = ErrByteMismatch n-- } return } a = tab[a] b, err = p.ReadByte() if err == io.EOF { b = nop } else if err != nil { return } else if err = p.UnreadByte(); err != nil { o++ return } for { c, err = s.ReadByte() if err != nil { quit = true goto fill } n++ if tab[b]&prg == 0 && c == b || tab[b]&prg != 0 && tab[c]&tab[b] != 0 || a != any && tab[c]&a == 0 { break } else if a&^prg != ws { buf = append(buf, c) } } if err = s.UnreadByte(); err == nil { err = io.EOF n-- } fill: if a&^prg != ws { if _, ok := s.(*rb); ok { revbytes(buf) } m[v] = string(buf) v++ buf = buf[:0] } } return }
// scanExponent scans the longest possible prefix of r representing a decimal // ('e', 'E') or binary ('p') exponent, if any. It returns the exponent, the // exponent base (10 or 2), or a read or syntax error, if any. // // exponent = ( "E" | "e" | "p" ) [ sign ] digits . // sign = "+" | "-" . // digits = digit { digit } . // digit = "0" ... "9" . // // A binary exponent is only permitted if binExpOk is set. func scanExponent(r io.ByteScanner, binExpOk bool) (exp int64, base int, err error) { base = 10 var ch byte if ch, err = r.ReadByte(); err != nil { if err == io.EOF { err = nil // no exponent; same as e0 } return } switch ch { case 'e', 'E': // ok case 'p': if binExpOk { base = 2 break // ok } fallthrough // binary exponent not permitted default: r.UnreadByte() return // no exponent; same as e0 } var neg bool if neg, err = scanSign(r); err != nil { return } var digits []byte if neg { digits = append(digits, '-') } // no need to use nat.scan for exponent digits // since we only care about int64 values - the // from-scratch scan is easy enough and faster for i := 0; ; i++ { if ch, err = r.ReadByte(); err != nil { if err != io.EOF || i == 0 { return } err = nil break // i > 0 } if ch < '0' || '9' < ch { if i == 0 { r.UnreadByte() err = fmt.Errorf("invalid exponent (missing digits)") return } break // i > 0 } digits = append(digits, byte(ch)) } // i > 0 => we have at least one digit exp, err = strconv.ParseInt(string(digits), 10, 64) return }
// scan scans the number corresponding to the longest possible prefix // from r representing an unsigned number in a given conversion base. // It returns the corresponding natural number res, the actual base b, // a digit count, and a read or syntax error err, if any. // // number = [ prefix ] mantissa . // prefix = "0" [ "x" | "X" | "b" | "B" ] . // mantissa = digits | digits "." [ digits ] | "." digits . // digits = digit { digit } . // digit = "0" ... "9" | "a" ... "z" | "A" ... "Z" . // // Unless fracOk is set, the base argument must be 0 or a value between // 2 and MaxBase. If fracOk is set, the base argument must be one of // 0, 2, 10, or 16. Providing an invalid base argument leads to a run- // time panic. // // For base 0, the number prefix determines the actual base: A prefix of // ``0x'' or ``0X'' selects base 16; if fracOk is not set, the ``0'' prefix // selects base 8, and a ``0b'' or ``0B'' prefix selects base 2. Otherwise // the selected base is 10 and no prefix is accepted. // // If fracOk is set, an octal prefix is ignored (a leading ``0'' simply // stands for a zero digit), and a period followed by a fractional part // is permitted. The result value is computed as if there were no period // present; and the count value is used to determine the fractional part. // // A result digit count > 0 corresponds to the number of (non-prefix) digits // parsed. A digit count <= 0 indicates the presence of a period (if fracOk // is set, only), and -count is the number of fractional digits found. // In this case, the actual value of the scanned number is res * b**count. // func (z nat) scan(r io.ByteScanner, base int, fracOk bool) (res nat, b, count int, err error) { // reject illegal bases baseOk := base == 0 || !fracOk && 2 <= base && base <= MaxBase || fracOk && (base == 2 || base == 10 || base == 16) if !baseOk { panic(fmt.Sprintf("illegal number base %d", base)) } // one char look-ahead ch, err := r.ReadByte() if err != nil { return } // determine actual base b = base if base == 0 { // actual base is 10 unless there's a base prefix b = 10 if ch == '0' { count = 1 switch ch, err = r.ReadByte(); err { case nil: // possibly one of 0x, 0X, 0b, 0B if !fracOk { b = 8 } switch ch { case 'x', 'X': b = 16 case 'b', 'B': b = 2 } switch b { case 16, 2: count = 0 // prefix is not counted if ch, err = r.ReadByte(); err != nil { // io.EOF is also an error in this case return } case 8: count = 0 // prefix is not counted } case io.EOF: // input is "0" res = z[:0] err = nil return default: // read error return } } } // convert string // Algorithm: Collect digits in groups of at most n digits in di // and then use mulAddWW for every such group to add them to the // result. z = z[:0] b1 := Word(b) bn, n := maxPow(b1) // at most n digits in base b1 fit into Word di := Word(0) // 0 <= di < b1**i < bn i := 0 // 0 <= i < n dp := -1 // position of decimal point for { if fracOk && ch == '.' { fracOk = false dp = count // advance if ch, err = r.ReadByte(); err != nil { if err == io.EOF { err = nil break } return } } // convert rune into digit value d1 var d1 Word switch { case '0' <= ch && ch <= '9': d1 = Word(ch - '0') case 'a' <= ch && ch <= 'z': d1 = Word(ch - 'a' + 10) case 'A' <= ch && ch <= 'Z': d1 = Word(ch - 'A' + 10) default: d1 = MaxBase + 1 } if d1 >= b1 { r.UnreadByte() // ch does not belong to number anymore break } count++ // collect d1 in di di = di*b1 + d1 i++ // if di is "full", add it to the result if i == n { z = z.mulAddWW(z, bn, di) di = 0 i = 0 } // advance if ch, err = r.ReadByte(); err != nil { if err == io.EOF { err = nil break } return } } if count == 0 { // no digits found switch { case base == 0 && b == 8: // there was only the octal prefix 0 (possibly followed by digits > 7); // count as one digit and return base 10, not 8 count = 1 b = 10 case base != 0 || b != 8: // there was neither a mantissa digit nor the octal prefix 0 err = errors.New("syntax error scanning number") } return } // count > 0 // add remaining digits to result if i > 0 { z = z.mulAddWW(z, pow(b1, i), di) } res = z.norm() // adjust for fraction, if any if dp >= 0 { // 0 <= dp <= count > 0 count = dp - count } return }