func process(output io.Writer, input io.Reader) { lineno := 1 in := bufio.NewReader(input) out := bufio.NewWriter(output) var r rune read := func() bool { var err error r, _, err = in.ReadRune() if err == io.EOF { return true } if err != nil { panic(err) } if r == '\n' { lineno++ } return false } skipws := func() bool { for !read() { if strings.IndexRune(" \n\t\r", r) == -1 { return false } } return true } panicIf := func(f func() bool, err Error) { if f() { panic(err) } } var buf []rune readCode := func() string { if '{' != r { panic(ErrExpectedLBrace) } buf = []rune{r} nesting := 1 for { if read() { panic(ErrUnmatchedLBrace) } buf = append(buf, r) if '{' == r { nesting++ } else if '}' == r { nesting-- if 0 == nesting { break } } } return string(buf) } var root rule needRootRAngle := false var parse func(*rule) parse = func(node *rule) { for { panicIf(skipws, ErrUnexpectedEOF) if '<' == r { if node != &root || len(node.kid) > 0 { panic(ErrUnexpectedLAngle) } panicIf(skipws, ErrUnexpectedEOF) node.startCode = readCode() needRootRAngle = true continue } else if '>' == r { if node == &root { if !needRootRAngle { panic(ErrUnmatchedRAngle) } } panicIf(skipws, ErrUnexpectedEOF) node.endCode = readCode() return } delim := r panicIf(read, ErrUnexpectedEOF) var regex []rune for { if r == delim && (len(regex) == 0 || regex[len(regex)-1] != '\\') { break } if '\n' == r { panic(ErrUnexpectedNewline) } regex = append(regex, r) panicIf(read, ErrUnexpectedEOF) } if "" == string(regex) { break } panicIf(skipws, ErrUnexpectedEOF) x := new(rule) x.id = fmt.Sprintf("%d", lineno) node.kid = append(node.kid, x) x.regex = make([]rune, len(regex)) copy(x.regex, regex) if '<' == r { panicIf(skipws, ErrUnexpectedEOF) x.startCode = readCode() parse(x) } else { x.code = readCode() } } } parse(&root) buf = nil for done := skipws(); !done; done = read() { buf = append(buf, r) } fs := token.NewFileSet() // Append a blank line to make things easier when there are only package and // import declarations. t, err := parser.ParseFile(fs, "", string(buf)+"\n", parser.ImportsOnly) if err != nil { panic(err) } printer.Fprint(out, fs, t) var file *token.File fs.Iterate(func(f *token.File) bool { file = f return true }) // Skip over package and import declarations. This is why we appended a blank // line above. for m := file.LineCount(); m > 1; m-- { i := 0 for '\n' != buf[i] { i++ } buf = buf[i+1:] } out.WriteString(`import ("bufio";"io";"strings") type frame struct { i int s string line, column int } type Lexer struct { // The lexer runs in its own goroutine, and communicates via channel 'ch'. ch chan frame // We record the level of nesting because the action could return, and a // subsequent call expects to pick up where it left off. In other words, // we're simulating a coroutine. // TODO: Support a channel-based variant that compatible with Go's yacc. stack []frame stale bool // The 'l' and 'c' fields were added for // https://github.com/wagerlabs/docker/blob/65694e801a7b80930961d70c69cba9f2465459be/buildfile.nex // Since then, I introduced the built-in Line() and Column() functions. l, c int parseResult interface{} // The following line makes it easy for scripts to insert fields in the // generated code. // [NEX_END_OF_LEXER_STRUCT] } // NewLexerWithInit creates a new Lexer object, runs the given callback on it, // then returns it. func NewLexerWithInit(in io.Reader, initFun func(*Lexer)) *Lexer { type dfa struct { acc []bool // Accepting states. f []func(rune) int // Transitions. startf, endf []int // Transitions at start and end of input. nest []dfa } yylex := new(Lexer) if initFun != nil { initFun(yylex) } yylex.ch = make(chan frame) var scan func(in *bufio.Reader, ch chan frame, family []dfa, line, column int) scan = func(in *bufio.Reader, ch chan frame, family []dfa, line, column int) { // Index of DFA and length of highest-precedence match so far. matchi, matchn := 0, -1 var buf []rune n := 0 checkAccept := func(i int, st int) bool { // Higher precedence match? DFAs are run in parallel, so matchn is at most len(buf), hence we may omit the length equality check. if family[i].acc[st] && (matchn < n || matchi > i) { matchi, matchn = i, n return true } return false } var state [][2]int for i := 0; i < len(family); i++ { mark := make([]bool, len(family[i].startf)) // Every DFA starts at state 0. st := 0 for { state = append(state, [2]int{i, st}) mark[st] = true // As we're at the start of input, follow all ^ transitions and append to our list of start states. st = family[i].startf[st] if -1 == st || mark[st] { break } // We only check for a match after at least one transition. checkAccept(i, st) } } atEOF := false for { if n == len(buf) && !atEOF { r,_,err := in.ReadRune() switch err { case io.EOF: atEOF = true case nil: buf = append(buf, r) default: panic(err) } } if !atEOF { r := buf[n] n++ var nextState [][2]int for _, x := range state { x[1] = family[x[0]].f[x[1]](r) if -1 == x[1] { continue } nextState = append(nextState, x) checkAccept(x[0], x[1]) } state = nextState } else { dollar: // Handle $. for _, x := range state { mark := make([]bool, len(family[x[0]].endf)) for { mark[x[1]] = true x[1] = family[x[0]].endf[x[1]] if -1 == x[1] || mark[x[1]] { break } if checkAccept(x[0], x[1]) { // Unlike before, we can break off the search. Now that we're at the end, there's no need to maintain the state of each DFA. break dollar } } } state = nil } if state == nil { lcUpdate := func(r rune) { if r == '\n' { line++ column = 0 } else { column++ } } // All DFAs stuck. Return last match if it exists, otherwise advance by one rune and restart all DFAs. if matchn == -1 { if len(buf) == 0 { // This can only happen at the end of input. break } lcUpdate(buf[0]) buf = buf[1:] } else { text := string(buf[:matchn]) buf = buf[matchn:] matchn = -1 ch <- frame{matchi, text, line, column} if len(family[matchi].nest) > 0 { scan(bufio.NewReader(strings.NewReader(text)), ch, family[matchi].nest, line, column) } if atEOF { break } for _, r := range text { lcUpdate(r) } } n = 0 for i := 0; i < len(family); i++ { state = append(state, [2]int{i, 0}) } } } ch <- frame{-1, "", line, column} } go scan(bufio.NewReader(in), yylex.ch, []dfa{`) for _, kid := range root.kid { gen(out, kid) } out.WriteString(`}, 0, 0) return yylex } func NewLexer(in io.Reader) *Lexer { return NewLexerWithInit(in, nil) } // Text returns the matched text. func (yylex *Lexer) Text() string { return yylex.stack[len(yylex.stack) - 1].s } // Line returns the current line number. // The first line is 0. func (yylex *Lexer) Line() int { return yylex.stack[len(yylex.stack) - 1].line } // Column returns the current column number. // The first column is 0. func (yylex *Lexer) Column() int { return yylex.stack[len(yylex.stack) - 1].column } func (yylex *Lexer) next(lvl int) int { if lvl == len(yylex.stack) { l, c := 0, 0 if lvl > 0 { l, c = yylex.stack[lvl - 1].line, yylex.stack[lvl - 1].column } yylex.stack = append(yylex.stack, frame{0, "", l, c}) } if lvl == len(yylex.stack) - 1 { p := &yylex.stack[lvl] *p = <-yylex.ch yylex.stale = false } else { yylex.stale = true } return yylex.stack[lvl].i } func (yylex *Lexer) pop() { yylex.stack = yylex.stack[:len(yylex.stack) - 1] } `) if !*standalone { writeLex(out, root) out.WriteString(string(buf)) out.Flush() return } m := 0 const funmac = "NN_FUN" for m < len(buf) { m++ if funmac[:m] != string(buf[:m]) { out.WriteString(string(buf[:m])) buf = buf[m:] m = 0 } else if funmac == string(buf[:m]) { writeNNFun(out, root) buf = buf[m:] m = 0 } } out.WriteString(string(buf)) out.Flush() }
func process(output io.Writer, input io.Reader) error { lineno := 1 in := bufio.NewReader(input) out := bufio.NewWriter(output) var r rune read := func() bool { var err error r, _, err = in.ReadRune() if err == io.EOF { return true } if err != nil { panic(err) } if r == '\n' { lineno++ } return false } skipws := func() bool { for !read() { if strings.IndexRune(" \n\t\r", r) == -1 { return false } } return true } var buf []rune readCode := func() string { if '{' != r { panic(ErrExpectedLBrace) } buf = []rune{r} nesting := 1 for { if read() { panic(ErrUnmatchedLBrace) } buf = append(buf, r) if '{' == r { nesting++ } else if '}' == r { nesting-- if 0 == nesting { break } } } return string(buf) } var root rule needRootRAngle := false var parse func(*rule) error parse = func(node *rule) error { for { panicIf(skipws, ErrUnexpectedEOF) if '<' == r { if node != &root || len(node.kid) > 0 { panic(ErrUnexpectedLAngle) } panicIf(skipws, ErrUnexpectedEOF) node.startCode = readCode() needRootRAngle = true continue } else if '>' == r { if node == &root { if !needRootRAngle { panic(ErrUnmatchedRAngle) } } if skipws() { return ErrUnexpectedEOF } node.endCode = readCode() return nil } delim := r panicIf(read, ErrUnexpectedEOF) var regex []rune for { if r == delim && (len(regex) == 0 || regex[len(regex)-1] != '\\') { break } if '\n' == r { return ErrUnexpectedNewline } regex = append(regex, r) panicIf(read, ErrUnexpectedEOF) } if "" == string(regex) { break } panicIf(skipws, ErrUnexpectedEOF) x := new(rule) x.id = fmt.Sprintf("%d", lineno) node.kid = append(node.kid, x) x.regex = make([]rune, len(regex)) copy(x.regex, regex) if '<' == r { panicIf(skipws, ErrUnexpectedEOF) x.startCode = readCode() parse(x) } else { x.code = readCode() } } return nil } err := parse(&root) if err != nil { return err } buf = nil for done := skipws(); !done; done = read() { buf = append(buf, r) } fs := token.NewFileSet() // Append a blank line to make things easier when there are only package and // import declarations. t, err := parser.ParseFile(fs, "", string(buf)+"\n", parser.ImportsOnly) if err != nil { panic(err) } printer.Fprint(out, fs, t) var file *token.File fs.Iterate(func(f *token.File) bool { file = f return true }) // Skip over package and import declarations. This is why we appended a blank // line above. for m := file.LineCount(); m > 1; m-- { i := 0 for '\n' != buf[i] { i++ } buf = buf[i+1:] } prefixReplacer.WriteString(out, lexertext) for _, kid := range root.kid { gen(out, kid) } prefixReplacer.WriteString(out, lexeroutro) if !standalone { writeLex(out, root) out.WriteString(string(buf)) out.Flush() if len(outFilename) > 0 { gofmt() } return nil } m := 0 const funmac = "NN_FUN" for m < len(buf) { m++ if funmac[:m] != string(buf[:m]) { out.WriteString(string(buf[:m])) buf = buf[m:] m = 0 } else if funmac == string(buf[:m]) { writeNNFun(out, root) buf = buf[m:] m = 0 } } out.WriteString(string(buf)) out.Flush() if len(outFilename) > 0 { gofmt() } return nil }
func process(output io.Writer, input io.Reader) { lineno := 1 in := bufio.NewReader(input) out := bufio.NewWriter(output) var r rune read := func() bool { var err error r, _, err = in.ReadRune() if err == io.EOF { return true } if err != nil { panic(err) } if r == '\n' { lineno++ } return false } skipws := func() bool { for !read() { if strings.IndexRune(" \n\t\r", r) == -1 { return false } } return true } panicIf := func(f func() bool, err Error) { if f() { panic(err) } } var buf []rune readCode := func() string { if '{' != r { panic(ErrExpectedLBrace) } buf = []rune{r} nesting := 1 for { if read() { panic(ErrUnmatchedLBrace) } buf = append(buf, r) if '{' == r { nesting++ } else if '}' == r { nesting-- if 0 == nesting { break } } } return string(buf) } var root rule needRootRAngle := false var parse func(*rule) parse = func(node *rule) { for { panicIf(skipws, ErrUnexpectedEOF) if '<' == r { if node != &root || len(node.kid) > 0 { panic(ErrUnexpectedLAngle) } panicIf(skipws, ErrUnexpectedEOF) node.startCode = readCode() needRootRAngle = true continue } else if '>' == r { if node == &root { if !needRootRAngle { panic(ErrUnmatchedRAngle) } } panicIf(skipws, ErrUnexpectedEOF) node.endCode = readCode() return } delim := r panicIf(read, ErrUnexpectedEOF) var regex []rune for { if r == delim && (len(regex) == 0 || regex[len(regex)-1] != '\\') { break } if '\n' == r { panic(ErrUnexpectedNewline) } regex = append(regex, r) panicIf(read, ErrUnexpectedEOF) } if "" == string(regex) { break } panicIf(skipws, ErrUnexpectedEOF) x := new(rule) x.id = fmt.Sprintf("%d", lineno) node.kid = append(node.kid, x) x.regex = make([]rune, len(regex)) copy(x.regex, regex) if '<' == r { panicIf(skipws, ErrUnexpectedEOF) x.startCode = readCode() parse(x) } else { x.code = readCode() } } } parse(&root) buf = nil for done := skipws(); !done; done = read() { buf = append(buf, r) } fs := token.NewFileSet() t, err := parser.ParseFile(fs, "", string(buf), parser.ImportsOnly) if err != nil { panic(err) } printer.Fprint(out, fs, t) var file *token.File fs.Iterate(func(f *token.File) bool { file = f return true }) for m := file.LineCount(); m > 1; m-- { i := 0 for '\n' != buf[i] { i++ } buf = buf[i+1:] } ExtraType := *yyExtraName out.WriteString(`import ("bufio";"io";"strings") type intstring struct { i int s string } type Lexer struct { // The lexer runs in its own goroutine, and communicates via channel 'ch'. ch chan intstring // We record the level of nesting because the action could return, and a // subsequent call expects to pick up where it left off. In other words, // we're simulating a coroutine. // TODO: Support a channel-based variant that compatible with Go's yacc. stack []intstring stale bool // TODO: The following fields were added for // https://github.com/wagerlabs/docker/blob/65694e801a7b80930961d70c69cba9f2465459be/buildfile.nex // In general, there are times when it would be convenient to maintain // state in the Lexer object itself. Rather than adding fields for every // possible nex application, it should be configurable, like the 'yyextra' // field in Flex. l, c int // line number and character position `) if len(ExtraType) > 0 { out.WriteString(` yyextra ` + ExtraType + ` } func NewLexer(in io.Reader, yyextra ` + ExtraType + `) *Lexer { `) } else { out.WriteString(` } func NewLexer(in io.Reader) *Lexer { `) } out.WriteString(` type dfa struct { acc []bool // Accepting states. f []func(rune) int // Transitions. startf, endf []int // Transitions at start and end of input. nest []dfa } yylex := new(Lexer) yylex.ch = make(chan intstring)`) if len(ExtraType) > 0 { out.WriteString(` yylex.yyextra = yyextra `) } out.WriteString(` var scan func(in *bufio.Reader, ch chan intstring, family []dfa) scan = func(in *bufio.Reader, ch chan intstring, family []dfa) { // Index of DFA and length of highest-precedence match so far. matchi, matchn := 0, -1 var buf []rune n := 0 checkAccept := func(i int, st int) bool { // Higher precedence match? DFAs are run in parallel, so matchn is at most len(buf), hence we may omit the length equality check. if family[i].acc[st] && (matchn < n || matchi > i) { matchi, matchn = i, n return true } return false } var state [][2]int for i := 0; i < len(family); i++ { mark := make([]bool, len(family[i].startf)) // Every DFA starts at state 0. st := 0 for { state = append(state, [2]int{i, st}) mark[st] = true // As we're at the start of input, follow all ^ transitions and append to our list of start states. st = family[i].startf[st] if -1 == st || mark[st] { break } // We only check for a match after at least one transition. checkAccept(i, st) } } atEOF := false for { if n == len(buf) && !atEOF { r,_,err := in.ReadRune() switch err { case io.EOF: atEOF = true case nil: buf = append(buf, r) default: panic(err) } } if !atEOF { r := buf[n] n++ var nextState [][2]int for _, x := range state { x[1] = family[x[0]].f[x[1]](r) if -1 == x[1] { continue } nextState = append(nextState, x) checkAccept(x[0], x[1]) } state = nextState } else { dollar: // Handle $. for _, x := range state { mark := make([]bool, len(family[x[0]].endf)) for { mark[x[1]] = true x[1] = family[x[0]].endf[x[1]] if -1 == x[1] || mark[x[1]] { break } if checkAccept(x[0], x[1]) { // Unlike before, we can break off the search. Now that we're at the end, there's no need to maintain the state of each DFA. break dollar } } } state = nil } if state == nil { // All DFAs stuck. Return last match if it exists, otherwise advance by one rune and restart all DFAs. if matchn == -1 { if len(buf) == 0 { // This can only happen at the end of input. break } buf = buf[1:] } else { text := string(buf[:matchn]) buf = buf[matchn:] matchn = -1 ch <- intstring{matchi, text} if len(family[matchi].nest) > 0 { scan(bufio.NewReader(strings.NewReader(text)), ch, family[matchi].nest) } if atEOF { break } } n = 0 for i := 0; i < len(family); i++ { state = append(state, [2]int{i, 0}) } } } ch <- intstring{-1, ""} } go scan(bufio.NewReader(in), yylex.ch, []dfa{`) for _, kid := range root.kid { gen(out, kid) } out.WriteString(`}) return yylex } func (yylex *Lexer) Text() string { return yylex.stack[len(yylex.stack) - 1].s } func (yylex *Lexer) next(lvl int) int { if lvl == len(yylex.stack) { yylex.stack = append(yylex.stack, intstring{0, ""}) } if lvl == len(yylex.stack) - 1 { p := &yylex.stack[lvl] *p = <-yylex.ch yylex.stale = false } else { yylex.stale = true } return yylex.stack[lvl].i } func (yylex *Lexer) pop() { yylex.stack = yylex.stack[:len(yylex.stack) - 1] } `) if !*standalone { writeLex(out, root) out.WriteString(string(buf)) out.Flush() return } m := 0 const funmac = "NN_FUN" for m < len(buf) { m++ if funmac[:m] != string(buf[:m]) { out.WriteString(string(buf[:m])) buf = buf[m:] m = 0 } else if funmac == string(buf[:m]) { writeNNFun(out, root) buf = buf[m:] m = 0 } } out.WriteString(string(buf)) out.Flush() }
func process(output io.Writer, input io.Reader) { in := bufio.NewReader(input) out := bufio.NewWriter(output) var r rune done := false regex := make([]rune, 0, 8) read := func() { var er error r, _, er = in.ReadRune() if er == io.EOF { done = true } else if er != nil { panic(er.Error()) } } skipws := func() { for { read() if done { break } if strings.IndexRune(" \n\t\r", r) == -1 { break } } } var rules []*rule usercode := false familyn := 1 id := 0 newRule := func(family, index int) *rule { x := new(rule) rules = append(rules, x) x.family = family x.id = id x.index = index id++ return x } buf := make([]rune, 0, 8) readCode := func() string { if '{' != r { panic("expected {") } buf = buf[:0] nesting := 1 for { buf = append(buf, r) read() if done { panic("unmatched {") } if '{' == r { nesting++ } if '}' == r { nesting-- if 0 == nesting { break } } } buf = append(buf, r) return string(buf) } var decls string var parse func(int) parse = func(family int) { rulen := 0 declvar := func() { decls += fmt.Sprintf("var a%d [%d]dfa\n", family, rulen) } for !done { skipws() if done { break } regex = regex[:0] if '>' == r { if 0 == family { panic("unmatched >") } x := newRule(family, -1) x.code = "yylex = yylex.pop()\n" declvar() skipws() x.code += readCode() return } delim := r read() if done { panic("unterminated pattern") } for { if r == delim && (len(regex) == 0 || regex[len(regex)-1] != '\\') { break } if '\n' == r { panic("regex interrupted by newline") } regex = append(regex, r) read() if done { panic("unterminated pattern") } } if "" == string(regex) { usercode = true break } skipws() if done { panic("last pattern lacks action") } x := newRule(family, rulen) rulen++ x.regex = make([]rune, len(regex)) copy(x.regex, regex) nested := false if '<' == r { skipws() if done { panic("'<' lacks action") } x.code = fmt.Sprintf("yylex = yylex.push(%d)\n", familyn) nested = true } x.code += readCode() if nested { familyn++ parse(familyn - 1) } } if 0 != family { panic("unmatched <") } x := newRule(family, -1) x.code = "// [END]\n" declvar() } parse(0) if !usercode { return } skipws() buf = buf[:0] for !done { buf = append(buf, r) read() } fs := token.NewFileSet() t, err := parser.ParseFile(fs, "", string(buf), parser.ImportsOnly) if err != nil { panic(err.Error()) } printer.Fprint(out, fs, t) var file *token.File fs.Iterate(func(f *token.File) bool { file = f return true }) for m := file.LineCount(); m > 1; m-- { i := 0 for '\n' != buf[i] { i++ } buf = buf[i+1:] } fmt.Fprintf(out, `import ("bufio";"io";"strings") type dfa struct { acc []bool f []func(rune) int id int } type family struct { a []dfa endcase int } `) out.WriteString(decls) out.WriteString("var a []family\n") out.WriteString("func init() {\n") fmt.Fprintf(out, "a = make([]family, %d)\n", familyn) for _, x := range rules { gen(out, x) } for i := 0; i < familyn; i++ { fmt.Fprintf(out, "a[%d].a = a%d[:]\n", i, i) } out.WriteString(`} func getAction(c *frame) int { if -1 == c.match { return -1 } c.action = c.fam.a[c.match].id c.match = -1 return c.action } type frame struct { atEOF bool action, match, matchn, n int buf []rune text string in *bufio.Reader state []int fam family } func newFrame(in *bufio.Reader, index int) *frame { f := new(frame) f.buf = make([]rune, 0, 128) f.in = in f.match = -1 f.fam = a[index] f.state = make([]int, len(f.fam.a)) return f } type Lexer []*frame func NewLexer(in io.Reader) Lexer { stack := make([]*frame, 0, 4) stack = append(stack, newFrame(bufio.NewReader(in), 0)) return stack } func (stack Lexer) isDone() bool { return 1 == len(stack) && stack[0].atEOF } func (stack Lexer) nextAction() int { c := stack[len(stack) - 1] for { if c.atEOF { return c.fam.endcase } if c.n == len(c.buf) { r,_,er := c.in.ReadRune() switch er { case nil: c.buf = append(c.buf, r) case io.EOF: c.atEOF = true if c.n > 0 { c.text = string(c.buf) return getAction(c) } return c.fam.endcase default: panic(er.Error()) } } jammed := true r := c.buf[c.n] for i, x := range c.fam.a { if -1 == c.state[i] { continue } c.state[i] = x.f[c.state[i]](r) if -1 == c.state[i] { continue } jammed = false if x.acc[c.state[i]] { if -1 == c.match || c.matchn < c.n+1 || c.match > i { c.match = i c.matchn = c.n+1 } } } if jammed { a := getAction(c) if -1 == a { c.matchn = c.n + 1 } c.n = 0 for i, _ := range c.state { c.state[i] = 0 } c.text = string(c.buf[:c.matchn]) copy(c.buf, c.buf[c.matchn:]) c.buf = c.buf[:len(c.buf) - c.matchn] return a } c.n++ } panic("unreachable") } func (stack Lexer) push(index int) Lexer { c := stack[len(stack) - 1] return append(stack, newFrame(bufio.NewReader(strings.NewReader(c.text)), index)) } func (stack Lexer) pop() Lexer { return stack[:len(stack) - 1] } func (stack Lexer) Text() string { c := stack[len(stack) - 1] return c.text } `) if !*standalone { writeLex(out, rules) out.WriteString(string(buf)) out.Flush() return } m := 0 const funmac = "NN_FUN" for m < len(buf) { m++ if funmac[:m] != string(buf[:m]) { out.WriteString(string(buf[:m])) buf = buf[m:] m = 0 } else if funmac == string(buf[:m]) { writeNNFun(out, rules) buf = buf[m:] m = 0 } } out.WriteString(string(buf)) out.Flush() }