func fileTokens(tf *token.File) (toks []string, err error) { src, err := ioutil.ReadFile(tf.Name()) if err != nil { return nil, err } s := &scanner.Scanner{} s.Init(tf, src, nil, 0) tokmap := make(TokenSet) for { _, tok, lit := s.Scan() if tok == token.EOF { break } if tok == token.STRING { // XXX: what if strings are misspelled? lit = lit[1 : len(lit)-1] } tokmap[lit] = struct{}{} } for k, _ := range tokmap { toks = append(toks, k) } return toks, nil }
func (d *DIBuilder) getFile(file *token.File) llvm.Metadata { if diFile := d.files[file]; diFile.C != nil { return diFile } diFile := d.builder.CreateFile(d.remapFilePath(file.Name()), "") d.files[file] = diFile return diFile }
func (d *DIBuilder) getFile(file *token.File) llvm.Value { if diFile := d.files[file]; !diFile.IsNil() { return diFile } diFile := d.builder.CreateFile(d.remapFilePath(file.Name()), "") d.files[file] = diFile return diFile }
// findQueryPos searches fset for filename and translates the // specified file-relative byte offsets into token.Pos form. It // returns an error if the file was not found or the offsets were out // of bounds. // func findQueryPos(fset *token.FileSet, filename string, startOffset, endOffset int) (start, end token.Pos, err error) { var file *token.File fset.Iterate(func(f *token.File) bool { if sameFile(filename, f.Name()) { // (f.Name() is absolute) file = f return false // done } return true // continue }) if file == nil { err = fmt.Errorf("couldn't find file containing position") return } // Range check [start..end], inclusive of both end-points. if 0 <= startOffset && startOffset <= file.Size() { start = file.Pos(int(startOffset)) } else { err = fmt.Errorf("start position is beyond end of file") return } if 0 <= endOffset && endOffset <= file.Size() { end = file.Pos(int(endOffset)) } else { err = fmt.Errorf("end position is beyond end of file") return } return }
// parseQueryPos parses a string of the form "file:pos" or // file:start,end" where pos, start, end match #%d and represent byte // offsets, and returns the extent to which it refers. // // (Numbers without a '#' prefix are reserved for future use, // e.g. to indicate line/column positions.) // func parseQueryPos(fset *token.FileSet, queryPos string) (start, end token.Pos, err error) { if queryPos == "" { err = fmt.Errorf("no source position specified (-pos flag)") return } colon := strings.LastIndex(queryPos, ":") if colon < 0 { err = fmt.Errorf("invalid source position -pos=%q", queryPos) return } filename, offset := queryPos[:colon], queryPos[colon+1:] startOffset := -1 endOffset := -1 if hyphen := strings.Index(offset, ","); hyphen < 0 { // e.g. "foo.go:#123" startOffset = parseOctothorpDecimal(offset) endOffset = startOffset } else { // e.g. "foo.go:#123,#456" startOffset = parseOctothorpDecimal(offset[:hyphen]) endOffset = parseOctothorpDecimal(offset[hyphen+1:]) } if startOffset < 0 || endOffset < 0 { err = fmt.Errorf("invalid -pos offset %q", offset) return } var file *token.File fset.Iterate(func(f *token.File) bool { if sameFile(filename, f.Name()) { // (f.Name() is absolute) file = f return false // done } return true // continue }) if file == nil { err = fmt.Errorf("couldn't find file containing position -pos=%q", queryPos) return } // Range check [start..end], inclusive of both end-points. if 0 <= startOffset && startOffset <= file.Size() { start = file.Pos(int(startOffset)) } else { err = fmt.Errorf("start position is beyond end of file -pos=%q", queryPos) return } if 0 <= endOffset && endOffset <= file.Size() { end = file.Pos(int(endOffset)) } else { err = fmt.Errorf("end position is beyond end of file -pos=%q", queryPos) return } return }
// createCompileUnit creates and returns debug metadata for the compile // unit as a whole, using the first file in the file set as a representative // (the choice of file is arbitrary). func (d *DIBuilder) createCompileUnit() llvm.Metadata { var file *token.File d.fset.Iterate(func(f *token.File) bool { file = f return false }) dir, err := os.Getwd() if err != nil { panic("could not get current directory: " + err.Error()) } return d.builder.CreateCompileUnit(llvm.DICompileUnit{ Language: llvm.DW_LANG_Go, File: d.remapFilePath(file.Name()), Dir: dir, Producer: "llgo", }) }
// Init prepares the scanner S to tokenize the text src by setting the // scanner at the beginning of src. The scanner uses the file set file // for position information and it adds line information for each line. // It is ok to re-use the same file when re-scanning the same file as // line information which is already present is ignored. Init causes a // panic if the file size does not match the src size. // // Calls to Scan will use the error handler err if they encounter a // syntax error and err is not nil. Also, for each error encountered, // the Scanner field ErrorCount is incremented by one. The mode parameter // determines how comments, illegal characters, and semicolons are handled. // // Note that Init may call err if there is an error in the first character // of the file. // func (S *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode uint) { // Explicitly initialize all fields since a scanner may be reused. if file.Size() != len(src) { panic("file size does not match src len") } S.file = file S.dir, _ = filepath.Split(file.Name()) S.src = src S.err = err S.mode = mode S.ch = ' ' S.offset = 0 S.rdOffset = 0 S.lineOffset = 0 S.insertSemi = false S.ErrorCount = 0 S.next() }
// Init prepares the scanner s to tokenize the text src by setting the // scanner at the beginning of src. The scanner uses the file set file // for position information and it adds line information for each line. // It is ok to re-use the same file when re-scanning the same file as // line information which is already present is ignored. Init causes a // panic if the file size does not match the src size. // // Calls to Scan will invoke the error handler err if they encounter a // syntax error and err is not nil. Also, for each error encountered, // the Scanner field ErrorCount is incremented by one. The mode parameter // determines how comments are handled. // // Note that Init may call err if there is an error in the first character // of the file. // func (s *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode Mode) { // Explicitly initialize all fields since a scanner may be reused. if file.Size() != len(src) { panic("file size does not match src len") } s.file = file s.dir, _ = filepath.Split(file.Name()) s.src = src s.err = err s.mode = mode s.ch = ' ' s.offset = 0 s.rdOffset = 0 s.lineOffset = 0 s.insertSemi = false s.ErrorCount = 0 s.next() }
func (d *debugInfo) getCompileUnit(file *token.File) *debug.CompileUnitDescriptor { if d.cu == nil { d.cu = make(map[*token.File]*debug.CompileUnitDescriptor) } cu := d.cu[file] if cu == nil { var path string if file != nil { path = d.Fset.File(file.Pos(0)).Name() } cu = &debug.CompileUnitDescriptor{ Language: debug.DW_LANG_Go, Path: debug.FileDescriptor(path), Producer: "llgo", Runtime: LLGORuntimeVersion, } d.cu[file] = cu } return cu }
// fileOffsetToPos translates the specified file-relative byte offsets // into token.Pos form. It returns an error if the file was not found // or the offsets were out of bounds. // func fileOffsetToPos(file *token.File, startOffset, endOffset int) (start, end token.Pos, err error) { // Range check [start..end], inclusive of both end-points. if 0 <= startOffset && startOffset <= file.Size() { start = file.Pos(int(startOffset)) } else { err = fmt.Errorf("start position is beyond end of file") return } if 0 <= endOffset && endOffset <= file.Size() { end = file.Pos(int(endOffset)) } else { err = fmt.Errorf("end position is beyond end of file") return } return }
func (s *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode Mode) { if file.Size() != len(src) { panic(fmt.Sprintf("file size (%d) does not match src len (%d)", file.Size(), len(src))) } s.tokScanner = scan.Scanner{Matcher: getTokenMatcher()} s.errScanner = scan.Scanner{Matcher: getErrorMatcher()} s.src = skipBOM(src) s.tokScanner.SetSource(s.src) s.errScanner.SetSource(s.src) s.file = file s.fileBase = s.file.Base() s.dir, _ = filepath.Split(file.Name()) s.err = err s.mode = mode s.ErrorCount = 0 s.preSemi = false s.semiPos = 0 }
func (s *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode Mode) { //fmt.Println("Init src", strconv.Quote(string(src)), mode) if file.Size() != len(src) { panic(fmt.Sprintf("file size (%d) does not match src len (%d)", file.Size(), len(src))) } s.gombiScanner = newGombiScanner() s.SetSource(skipBOM(src)) s.file = file s.dir, _ = filepath.Split(file.Name()) s.err = err s.mode = mode s.ErrorCount = 0 s.lastIsPreSemi = false s.commentAfterPreSemi = false s.endOfLinePos = 0 s.endOfLine = 0 s.commentQueue.reset() }
// Init prepares the scanner s to tokenize the text src by setting the // scanner at the beginning of src. The scanner uses the file set file // for position information and it adds line information for each line. // It is ok to re-use the same file when re-scanning the same file as // line information which is already present is ignored. Init causes a // panic if the file size does not match the src size. // // Calls to Scan will invoke the error handler err if they encounter a // syntax error and err is not nil. Also, for each error encountered, // the Scanner field ErrorCount is incremented by one. The mode parameter // determines how comments are handled. // // Note that Init may call err if there is an error in the first character // of the file. // func (s *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode Mode) { // Explicitly initialize all fields since a scanner may be reused. if file.Size() != len(src) { panic(fmt.Sprintf("file size (%d) does not match src len (%d)", file.Size(), len(src))) } s.file = file s.dir, _ = filepath.Split(file.Name()) s.src = src s.err = err s.mode = mode s.ch = ' ' s.offset = 0 s.rdOffset = 0 s.lineOffset = 0 s.insertSemi = false s.ErrorCount = 0 s.next() if s.ch == bom { s.next() // ignore BOM at file beginning } }
func linenum(f *token.File, p token.Pos) int32 { return int32(f.Line(p)) }
// TODO(adonovan): make this a method: func (*token.File) Contains(token.Pos) func tokenFileContainsPos(f *token.File, pos token.Pos) bool { p := int(pos) base := f.Base() return base <= p && p < base+f.Size() }
func process(output io.Writer, input io.Reader) { in := bufio.NewReader(input) out := bufio.NewWriter(output) var r rune done := false regex := make([]rune, 0, 8) read := func() { var er error r, _, er = in.ReadRune() if er == io.EOF { done = true } else if er != nil { panic(er.Error()) } } skipws := func() { for { read() if done { break } if strings.IndexRune(" \n\t\r", r) == -1 { break } } } var rules []*rule usercode := false familyn := 1 id := 0 newRule := func(family, index int) *rule { x := new(rule) rules = append(rules, x) x.family = family x.id = id x.index = index id++ return x } buf := make([]rune, 0, 8) readCode := func() string { if '{' != r { panic("expected {") } buf = buf[:0] nesting := 1 for { buf = append(buf, r) read() if done { panic("unmatched {") } if '{' == r { nesting++ } if '}' == r { nesting-- if 0 == nesting { break } } } buf = append(buf, r) return string(buf) } var decls string var parse func(int) parse = func(family int) { rulen := 0 declvar := func() { decls += fmt.Sprintf("var a%d [%d]dfa\n", family, rulen) } for !done { skipws() if done { break } regex = regex[:0] if '>' == r { if 0 == family { panic("unmatched >") } x := newRule(family, -1) x.code = "yylex = yylex.pop()\n" declvar() skipws() x.code += readCode() return } delim := r read() if done { panic("unterminated pattern") } for { if r == delim && (len(regex) == 0 || regex[len(regex)-1] != '\\') { break } if '\n' == r { panic("regex interrupted by newline") } regex = append(regex, r) read() if done { panic("unterminated pattern") } } if "" == string(regex) { usercode = true break } skipws() if done { panic("last pattern lacks action") } x := newRule(family, rulen) rulen++ x.regex = make([]rune, len(regex)) copy(x.regex, regex) nested := false if '<' == r { skipws() if done { panic("'<' lacks action") } x.code = fmt.Sprintf("yylex = yylex.push(%d)\n", familyn) nested = true } x.code += readCode() if nested { familyn++ parse(familyn - 1) } } if 0 != family { panic("unmatched <") } x := newRule(family, -1) x.code = "// [END]\n" declvar() } parse(0) if !usercode { return } skipws() buf = buf[:0] for !done { buf = append(buf, r) read() } fs := token.NewFileSet() t, err := parser.ParseFile(fs, "", string(buf), parser.ImportsOnly) if err != nil { panic(err.Error()) } printer.Fprint(out, fs, t) var file *token.File fs.Iterate(func(f *token.File) bool { file = f return true }) for m := file.LineCount(); m > 1; m-- { i := 0 for '\n' != buf[i] { i++ } buf = buf[i+1:] } fmt.Fprintf(out, `import ("bufio";"io";"strings") type dfa struct { acc []bool f []func(rune) int id int } type family struct { a []dfa endcase int } `) out.WriteString(decls) out.WriteString("var a []family\n") out.WriteString("func init() {\n") fmt.Fprintf(out, "a = make([]family, %d)\n", familyn) for _, x := range rules { gen(out, x) } for i := 0; i < familyn; i++ { fmt.Fprintf(out, "a[%d].a = a%d[:]\n", i, i) } out.WriteString(`} func getAction(c *frame) int { if -1 == c.match { return -1 } c.action = c.fam.a[c.match].id c.match = -1 return c.action } type frame struct { atEOF bool action, match, matchn, n int buf []rune text string in *bufio.Reader state []int fam family } func newFrame(in *bufio.Reader, index int) *frame { f := new(frame) f.buf = make([]rune, 0, 128) f.in = in f.match = -1 f.fam = a[index] f.state = make([]int, len(f.fam.a)) return f } type Lexer []*frame func NewLexer(in io.Reader) Lexer { stack := make([]*frame, 0, 4) stack = append(stack, newFrame(bufio.NewReader(in), 0)) return stack } func (stack Lexer) isDone() bool { return 1 == len(stack) && stack[0].atEOF } func (stack Lexer) nextAction() int { c := stack[len(stack) - 1] for { if c.atEOF { return c.fam.endcase } if c.n == len(c.buf) { r,_,er := c.in.ReadRune() switch er { case nil: c.buf = append(c.buf, r) case io.EOF: c.atEOF = true if c.n > 0 { c.text = string(c.buf) return getAction(c) } return c.fam.endcase default: panic(er.Error()) } } jammed := true r := c.buf[c.n] for i, x := range c.fam.a { if -1 == c.state[i] { continue } c.state[i] = x.f[c.state[i]](r) if -1 == c.state[i] { continue } jammed = false if x.acc[c.state[i]] { if -1 == c.match || c.matchn < c.n+1 || c.match > i { c.match = i c.matchn = c.n+1 } } } if jammed { a := getAction(c) if -1 == a { c.matchn = c.n + 1 } c.n = 0 for i, _ := range c.state { c.state[i] = 0 } c.text = string(c.buf[:c.matchn]) copy(c.buf, c.buf[c.matchn:]) c.buf = c.buf[:len(c.buf) - c.matchn] return a } c.n++ } panic("unreachable") } func (stack Lexer) push(index int) Lexer { c := stack[len(stack) - 1] return append(stack, newFrame(bufio.NewReader(strings.NewReader(c.text)), index)) } func (stack Lexer) pop() Lexer { return stack[:len(stack) - 1] } func (stack Lexer) Text() string { c := stack[len(stack) - 1] return c.text } `) if !*standalone { writeLex(out, rules) out.WriteString(string(buf)) out.Flush() return } m := 0 const funmac = "NN_FUN" for m < len(buf) { m++ if funmac[:m] != string(buf[:m]) { out.WriteString(string(buf[:m])) buf = buf[m:] m = 0 } else if funmac == string(buf[:m]) { writeNNFun(out, rules) buf = buf[m:] m = 0 } } out.WriteString(string(buf)) out.Flush() }
func getRangeLinesAtLeastOne(f *token.File, Pos, End token.Pos, fileSize int) (lines []int, firstLineNum int) { lines = []int{} firstLineNum = -1 l := f.Line(Pos) for p := Pos; p <= End; p++ { if f.Line(p) > l { l = f.Line(p) if firstLineNum == -1 { firstLineNum = l } lines = append(lines, f.Offset(p)) } } if (int(End) == fileSize+f.Base()-1) || f.Line(End+1) > l { lines = append(lines, f.Offset(End+1)) if firstLineNum == -1 { firstLineNum = f.Line(End + 1) } } if firstLineNum < 0 { for p := End; ; p++ { if f.Line(p) > l { firstLineNum = l lines = append(lines, f.Offset(p)) break } } } return }
func process(output io.Writer, input io.Reader) error { lineno := 1 in := bufio.NewReader(input) out := bufio.NewWriter(output) var r rune read := func() bool { var err error r, _, err = in.ReadRune() if err == io.EOF { return true } if err != nil { panic(err) } if r == '\n' { lineno++ } return false } skipws := func() bool { for !read() { if strings.IndexRune(" \n\t\r", r) == -1 { return false } } return true } var buf []rune readCode := func() string { if '{' != r { panic(ErrExpectedLBrace) } buf = []rune{r} nesting := 1 for { if read() { panic(ErrUnmatchedLBrace) } buf = append(buf, r) if '{' == r { nesting++ } else if '}' == r { nesting-- if 0 == nesting { break } } } return string(buf) } var root rule needRootRAngle := false var parse func(*rule) error parse = func(node *rule) error { for { panicIf(skipws, ErrUnexpectedEOF) if '<' == r { if node != &root || len(node.kid) > 0 { panic(ErrUnexpectedLAngle) } panicIf(skipws, ErrUnexpectedEOF) node.startCode = readCode() needRootRAngle = true continue } else if '>' == r { if node == &root { if !needRootRAngle { panic(ErrUnmatchedRAngle) } } if skipws() { return ErrUnexpectedEOF } node.endCode = readCode() return nil } delim := r panicIf(read, ErrUnexpectedEOF) var regex []rune for { if r == delim && (len(regex) == 0 || regex[len(regex)-1] != '\\') { break } if '\n' == r { return ErrUnexpectedNewline } regex = append(regex, r) panicIf(read, ErrUnexpectedEOF) } if "" == string(regex) { break } panicIf(skipws, ErrUnexpectedEOF) x := new(rule) x.id = fmt.Sprintf("%d", lineno) node.kid = append(node.kid, x) x.regex = make([]rune, len(regex)) copy(x.regex, regex) if '<' == r { panicIf(skipws, ErrUnexpectedEOF) x.startCode = readCode() parse(x) } else { x.code = readCode() } } return nil } err := parse(&root) if err != nil { return err } buf = nil for done := skipws(); !done; done = read() { buf = append(buf, r) } fs := token.NewFileSet() // Append a blank line to make things easier when there are only package and // import declarations. t, err := parser.ParseFile(fs, "", string(buf)+"\n", parser.ImportsOnly) if err != nil { panic(err) } printer.Fprint(out, fs, t) var file *token.File fs.Iterate(func(f *token.File) bool { file = f return true }) // Skip over package and import declarations. This is why we appended a blank // line above. for m := file.LineCount(); m > 1; m-- { i := 0 for '\n' != buf[i] { i++ } buf = buf[i+1:] } prefixReplacer.WriteString(out, lexertext) for _, kid := range root.kid { gen(out, kid) } prefixReplacer.WriteString(out, lexeroutro) if !standalone { writeLex(out, root) out.WriteString(string(buf)) out.Flush() if len(outFilename) > 0 { gofmt() } return nil } m := 0 const funmac = "NN_FUN" for m < len(buf) { m++ if funmac[:m] != string(buf[:m]) { out.WriteString(string(buf[:m])) buf = buf[m:] m = 0 } else if funmac == string(buf[:m]) { writeNNFun(out, root) buf = buf[m:] m = 0 } } out.WriteString(string(buf)) out.Flush() if len(outFilename) > 0 { gofmt() } return nil }
func process(output io.Writer, input io.Reader) { lineno := 1 in := bufio.NewReader(input) out := bufio.NewWriter(output) var r rune read := func() bool { var err error r, _, err = in.ReadRune() if err == io.EOF { return true } if err != nil { panic(err) } if r == '\n' { lineno++ } return false } skipws := func() bool { for !read() { if strings.IndexRune(" \n\t\r", r) == -1 { return false } } return true } panicIf := func(f func() bool, err Error) { if f() { panic(err) } } var buf []rune readCode := func() string { if '{' != r { panic(ErrExpectedLBrace) } buf = []rune{r} nesting := 1 for { if read() { panic(ErrUnmatchedLBrace) } buf = append(buf, r) if '{' == r { nesting++ } else if '}' == r { nesting-- if 0 == nesting { break } } } return string(buf) } var root rule needRootRAngle := false var parse func(*rule) parse = func(node *rule) { for { panicIf(skipws, ErrUnexpectedEOF) if '<' == r { if node != &root || len(node.kid) > 0 { panic(ErrUnexpectedLAngle) } panicIf(skipws, ErrUnexpectedEOF) node.startCode = readCode() needRootRAngle = true continue } else if '>' == r { if node == &root { if !needRootRAngle { panic(ErrUnmatchedRAngle) } } panicIf(skipws, ErrUnexpectedEOF) node.endCode = readCode() return } delim := r panicIf(read, ErrUnexpectedEOF) var regex []rune for { if r == delim && (len(regex) == 0 || regex[len(regex)-1] != '\\') { break } if '\n' == r { panic(ErrUnexpectedNewline) } regex = append(regex, r) panicIf(read, ErrUnexpectedEOF) } if "" == string(regex) { break } panicIf(skipws, ErrUnexpectedEOF) x := new(rule) x.id = fmt.Sprintf("%d", lineno) node.kid = append(node.kid, x) x.regex = make([]rune, len(regex)) copy(x.regex, regex) if '<' == r { panicIf(skipws, ErrUnexpectedEOF) x.startCode = readCode() parse(x) } else { x.code = readCode() } } } parse(&root) buf = nil for done := skipws(); !done; done = read() { buf = append(buf, r) } fs := token.NewFileSet() // Append a blank line to make things easier when there are only package and // import declarations. t, err := parser.ParseFile(fs, "", string(buf)+"\n", parser.ImportsOnly) if err != nil { panic(err) } printer.Fprint(out, fs, t) var file *token.File fs.Iterate(func(f *token.File) bool { file = f return true }) // Skip over package and import declarations. This is why we appended a blank // line above. for m := file.LineCount(); m > 1; m-- { i := 0 for '\n' != buf[i] { i++ } buf = buf[i+1:] } out.WriteString(`import ("bufio";"io";"strings") type frame struct { i int s string line, column int } type Lexer struct { // The lexer runs in its own goroutine, and communicates via channel 'ch'. ch chan frame // We record the level of nesting because the action could return, and a // subsequent call expects to pick up where it left off. In other words, // we're simulating a coroutine. // TODO: Support a channel-based variant that compatible with Go's yacc. stack []frame stale bool // The 'l' and 'c' fields were added for // https://github.com/wagerlabs/docker/blob/65694e801a7b80930961d70c69cba9f2465459be/buildfile.nex // Since then, I introduced the built-in Line() and Column() functions. l, c int parseResult interface{} // The following line makes it easy for scripts to insert fields in the // generated code. // [NEX_END_OF_LEXER_STRUCT] } // NewLexerWithInit creates a new Lexer object, runs the given callback on it, // then returns it. func NewLexerWithInit(in io.Reader, initFun func(*Lexer)) *Lexer { type dfa struct { acc []bool // Accepting states. f []func(rune) int // Transitions. startf, endf []int // Transitions at start and end of input. nest []dfa } yylex := new(Lexer) if initFun != nil { initFun(yylex) } yylex.ch = make(chan frame) var scan func(in *bufio.Reader, ch chan frame, family []dfa, line, column int) scan = func(in *bufio.Reader, ch chan frame, family []dfa, line, column int) { // Index of DFA and length of highest-precedence match so far. matchi, matchn := 0, -1 var buf []rune n := 0 checkAccept := func(i int, st int) bool { // Higher precedence match? DFAs are run in parallel, so matchn is at most len(buf), hence we may omit the length equality check. if family[i].acc[st] && (matchn < n || matchi > i) { matchi, matchn = i, n return true } return false } var state [][2]int for i := 0; i < len(family); i++ { mark := make([]bool, len(family[i].startf)) // Every DFA starts at state 0. st := 0 for { state = append(state, [2]int{i, st}) mark[st] = true // As we're at the start of input, follow all ^ transitions and append to our list of start states. st = family[i].startf[st] if -1 == st || mark[st] { break } // We only check for a match after at least one transition. checkAccept(i, st) } } atEOF := false for { if n == len(buf) && !atEOF { r,_,err := in.ReadRune() switch err { case io.EOF: atEOF = true case nil: buf = append(buf, r) default: panic(err) } } if !atEOF { r := buf[n] n++ var nextState [][2]int for _, x := range state { x[1] = family[x[0]].f[x[1]](r) if -1 == x[1] { continue } nextState = append(nextState, x) checkAccept(x[0], x[1]) } state = nextState } else { dollar: // Handle $. for _, x := range state { mark := make([]bool, len(family[x[0]].endf)) for { mark[x[1]] = true x[1] = family[x[0]].endf[x[1]] if -1 == x[1] || mark[x[1]] { break } if checkAccept(x[0], x[1]) { // Unlike before, we can break off the search. Now that we're at the end, there's no need to maintain the state of each DFA. break dollar } } } state = nil } if state == nil { lcUpdate := func(r rune) { if r == '\n' { line++ column = 0 } else { column++ } } // All DFAs stuck. Return last match if it exists, otherwise advance by one rune and restart all DFAs. if matchn == -1 { if len(buf) == 0 { // This can only happen at the end of input. break } lcUpdate(buf[0]) buf = buf[1:] } else { text := string(buf[:matchn]) buf = buf[matchn:] matchn = -1 ch <- frame{matchi, text, line, column} if len(family[matchi].nest) > 0 { scan(bufio.NewReader(strings.NewReader(text)), ch, family[matchi].nest, line, column) } if atEOF { break } for _, r := range text { lcUpdate(r) } } n = 0 for i := 0; i < len(family); i++ { state = append(state, [2]int{i, 0}) } } } ch <- frame{-1, "", line, column} } go scan(bufio.NewReader(in), yylex.ch, []dfa{`) for _, kid := range root.kid { gen(out, kid) } out.WriteString(`}, 0, 0) return yylex } func NewLexer(in io.Reader) *Lexer { return NewLexerWithInit(in, nil) } // Text returns the matched text. func (yylex *Lexer) Text() string { return yylex.stack[len(yylex.stack) - 1].s } // Line returns the current line number. // The first line is 0. func (yylex *Lexer) Line() int { return yylex.stack[len(yylex.stack) - 1].line } // Column returns the current column number. // The first column is 0. func (yylex *Lexer) Column() int { return yylex.stack[len(yylex.stack) - 1].column } func (yylex *Lexer) next(lvl int) int { if lvl == len(yylex.stack) { l, c := 0, 0 if lvl > 0 { l, c = yylex.stack[lvl - 1].line, yylex.stack[lvl - 1].column } yylex.stack = append(yylex.stack, frame{0, "", l, c}) } if lvl == len(yylex.stack) - 1 { p := &yylex.stack[lvl] *p = <-yylex.ch yylex.stale = false } else { yylex.stale = true } return yylex.stack[lvl].i } func (yylex *Lexer) pop() { yylex.stack = yylex.stack[:len(yylex.stack) - 1] } `) if !*standalone { writeLex(out, root) out.WriteString(string(buf)) out.Flush() return } m := 0 const funmac = "NN_FUN" for m < len(buf) { m++ if funmac[:m] != string(buf[:m]) { out.WriteString(string(buf[:m])) buf = buf[m:] m = 0 } else if funmac == string(buf[:m]) { writeNNFun(out, root) buf = buf[m:] m = 0 } } out.WriteString(string(buf)) out.Flush() }
func GetRangeLines(f *token.File, Pos, End token.Pos, fileSize int) (lines []int, firstLineNum int) { lines = []int{} firstLineNum = -1 l := f.Line(Pos) for p := Pos; p <= End; p++ { if f.Line(p) > l { l = f.Line(p) if firstLineNum == -1 { firstLineNum = l } lines = append(lines, f.Offset(p)) } } print(End) print(" -> ") println(fileSize + f.Base() - 1) if (int(End) == fileSize+f.Base()-1) || f.Line(End+1) > l { lines = append(lines, f.Offset(End+1)) if firstLineNum == -1 { firstLineNum = f.Line(End + 1) } } return }
func printDecls(tf *token.File, f *ast.File) { for _, d := range f.Decls { fmt.Printf("> %d %d\n", tf.Offset(d.Pos()), tf.Offset(d.End())) } }
func GetLines(f *token.File) []int { lines := make([]int, 0, 20) l := -1 for i := f.Base(); i < f.Base()+f.Size(); i++ { if f.Line(token.Pos(i)) > l { l = f.Line(token.Pos(i)) lines = append(lines, f.Offset(token.Pos(i))) } } return lines }
func process(output io.Writer, input io.Reader) { lineno := 1 in := bufio.NewReader(input) out := bufio.NewWriter(output) var r rune read := func() bool { var err error r, _, err = in.ReadRune() if err == io.EOF { return true } if err != nil { panic(err) } if r == '\n' { lineno++ } return false } skipws := func() bool { for !read() { if strings.IndexRune(" \n\t\r", r) == -1 { return false } } return true } panicIf := func(f func() bool, err Error) { if f() { panic(err) } } var buf []rune readCode := func() string { if '{' != r { panic(ErrExpectedLBrace) } buf = []rune{r} nesting := 1 for { if read() { panic(ErrUnmatchedLBrace) } buf = append(buf, r) if '{' == r { nesting++ } else if '}' == r { nesting-- if 0 == nesting { break } } } return string(buf) } var root rule needRootRAngle := false var parse func(*rule) parse = func(node *rule) { for { panicIf(skipws, ErrUnexpectedEOF) if '<' == r { if node != &root || len(node.kid) > 0 { panic(ErrUnexpectedLAngle) } panicIf(skipws, ErrUnexpectedEOF) node.startCode = readCode() needRootRAngle = true continue } else if '>' == r { if node == &root { if !needRootRAngle { panic(ErrUnmatchedRAngle) } } panicIf(skipws, ErrUnexpectedEOF) node.endCode = readCode() return } delim := r panicIf(read, ErrUnexpectedEOF) var regex []rune for { if r == delim && (len(regex) == 0 || regex[len(regex)-1] != '\\') { break } if '\n' == r { panic(ErrUnexpectedNewline) } regex = append(regex, r) panicIf(read, ErrUnexpectedEOF) } if "" == string(regex) { break } panicIf(skipws, ErrUnexpectedEOF) x := new(rule) x.id = fmt.Sprintf("%d", lineno) node.kid = append(node.kid, x) x.regex = make([]rune, len(regex)) copy(x.regex, regex) if '<' == r { panicIf(skipws, ErrUnexpectedEOF) x.startCode = readCode() parse(x) } else { x.code = readCode() } } } parse(&root) buf = nil for done := skipws(); !done; done = read() { buf = append(buf, r) } fs := token.NewFileSet() t, err := parser.ParseFile(fs, "", string(buf), parser.ImportsOnly) if err != nil { panic(err) } printer.Fprint(out, fs, t) var file *token.File fs.Iterate(func(f *token.File) bool { file = f return true }) for m := file.LineCount(); m > 1; m-- { i := 0 for '\n' != buf[i] { i++ } buf = buf[i+1:] } ExtraType := *yyExtraName out.WriteString(`import ("bufio";"io";"strings") type intstring struct { i int s string } type Lexer struct { // The lexer runs in its own goroutine, and communicates via channel 'ch'. ch chan intstring // We record the level of nesting because the action could return, and a // subsequent call expects to pick up where it left off. In other words, // we're simulating a coroutine. // TODO: Support a channel-based variant that compatible with Go's yacc. stack []intstring stale bool // TODO: The following fields were added for // https://github.com/wagerlabs/docker/blob/65694e801a7b80930961d70c69cba9f2465459be/buildfile.nex // In general, there are times when it would be convenient to maintain // state in the Lexer object itself. Rather than adding fields for every // possible nex application, it should be configurable, like the 'yyextra' // field in Flex. l, c int // line number and character position `) if len(ExtraType) > 0 { out.WriteString(` yyextra ` + ExtraType + ` } func NewLexer(in io.Reader, yyextra ` + ExtraType + `) *Lexer { `) } else { out.WriteString(` } func NewLexer(in io.Reader) *Lexer { `) } out.WriteString(` type dfa struct { acc []bool // Accepting states. f []func(rune) int // Transitions. startf, endf []int // Transitions at start and end of input. nest []dfa } yylex := new(Lexer) yylex.ch = make(chan intstring)`) if len(ExtraType) > 0 { out.WriteString(` yylex.yyextra = yyextra `) } out.WriteString(` var scan func(in *bufio.Reader, ch chan intstring, family []dfa) scan = func(in *bufio.Reader, ch chan intstring, family []dfa) { // Index of DFA and length of highest-precedence match so far. matchi, matchn := 0, -1 var buf []rune n := 0 checkAccept := func(i int, st int) bool { // Higher precedence match? DFAs are run in parallel, so matchn is at most len(buf), hence we may omit the length equality check. if family[i].acc[st] && (matchn < n || matchi > i) { matchi, matchn = i, n return true } return false } var state [][2]int for i := 0; i < len(family); i++ { mark := make([]bool, len(family[i].startf)) // Every DFA starts at state 0. st := 0 for { state = append(state, [2]int{i, st}) mark[st] = true // As we're at the start of input, follow all ^ transitions and append to our list of start states. st = family[i].startf[st] if -1 == st || mark[st] { break } // We only check for a match after at least one transition. checkAccept(i, st) } } atEOF := false for { if n == len(buf) && !atEOF { r,_,err := in.ReadRune() switch err { case io.EOF: atEOF = true case nil: buf = append(buf, r) default: panic(err) } } if !atEOF { r := buf[n] n++ var nextState [][2]int for _, x := range state { x[1] = family[x[0]].f[x[1]](r) if -1 == x[1] { continue } nextState = append(nextState, x) checkAccept(x[0], x[1]) } state = nextState } else { dollar: // Handle $. for _, x := range state { mark := make([]bool, len(family[x[0]].endf)) for { mark[x[1]] = true x[1] = family[x[0]].endf[x[1]] if -1 == x[1] || mark[x[1]] { break } if checkAccept(x[0], x[1]) { // Unlike before, we can break off the search. Now that we're at the end, there's no need to maintain the state of each DFA. break dollar } } } state = nil } if state == nil { // All DFAs stuck. Return last match if it exists, otherwise advance by one rune and restart all DFAs. if matchn == -1 { if len(buf) == 0 { // This can only happen at the end of input. break } buf = buf[1:] } else { text := string(buf[:matchn]) buf = buf[matchn:] matchn = -1 ch <- intstring{matchi, text} if len(family[matchi].nest) > 0 { scan(bufio.NewReader(strings.NewReader(text)), ch, family[matchi].nest) } if atEOF { break } } n = 0 for i := 0; i < len(family); i++ { state = append(state, [2]int{i, 0}) } } } ch <- intstring{-1, ""} } go scan(bufio.NewReader(in), yylex.ch, []dfa{`) for _, kid := range root.kid { gen(out, kid) } out.WriteString(`}) return yylex } func (yylex *Lexer) Text() string { return yylex.stack[len(yylex.stack) - 1].s } func (yylex *Lexer) next(lvl int) int { if lvl == len(yylex.stack) { yylex.stack = append(yylex.stack, intstring{0, ""}) } if lvl == len(yylex.stack) - 1 { p := &yylex.stack[lvl] *p = <-yylex.ch yylex.stale = false } else { yylex.stale = true } return yylex.stack[lvl].i } func (yylex *Lexer) pop() { yylex.stack = yylex.stack[:len(yylex.stack) - 1] } `) if !*standalone { writeLex(out, root) out.WriteString(string(buf)) out.Flush() return } m := 0 const funmac = "NN_FUN" for m < len(buf) { m++ if funmac[:m] != string(buf[:m]) { out.WriteString(string(buf[:m])) buf = buf[m:] m = 0 } else if funmac == string(buf[:m]) { writeNNFun(out, root) buf = buf[m:] m = 0 } } out.WriteString(string(buf)) out.Flush() }