func verify(name, start string, r io.Reader) error { if r == nil { f, err := os.Open(name) if err != nil { return err } defer f.Close() r = f } src, err := ioutil.ReadAll(r) if err != nil { return err } if filepath.Ext(name) == ".html" || bytes.Index(src, open) >= 0 { src = extractEBNF(src) } grammar, err := ebnf.Parse(name, bytes.NewBuffer(src)) if err != nil { return err } return ebnf.Verify(grammar, start) }
// Parse parses an EBNF grammar from the file with the given filename // and an optional io.Reader. If the reader is not provided, the code // will attempt to open the file specified by name. In either case, the // filename will be used in error output and debug messages. // // The logic in Parse is extracted from golang.org/x/exp/ebnflint. // Unfortunately, it's not exported there, so we duplicate it here and // export it. It is modified a bit, though, to be more generic. func Parse(filename string, r io.Reader) (ebnf.Grammar, error) { if r == nil { f, err := os.Open(filename) if err != nil { return nil, err } defer f.Close() r = f } src, err := ioutil.ReadAll(r) if err != nil { return nil, err } src = CheckExtract(filename, src) grammar, err := ebnf.Parse(filename, bytes.NewBuffer(src)) if err != nil { return nil, err } return grammar, err }
//TODO:full docs func CompileLexer(starts [][]int, tokdefs map[string]int, grammar, start string) (lexer *Lexer, err error) { lexer = &Lexer{} defer func() { if e := recover(); e != nil { lexer = nil err = e.(error) } }() var prodnames string res, xref := map[int]string{}, map[int]string{} for tokdef, id := range tokdefs { if _, ok := res[id]; ok { panic(fmt.Errorf("duplicate id %d for token %q", id, tokdef)) } xref[id] = fmt.Sprintf("id-%d", id) if re, ok := isRE(tokdef); ok { res[id] = re continue } if grammar == "" || !isIdent(tokdef) { res[id] = regexp.QuoteMeta(tokdef) continue } if prodnames != "" { prodnames += " | " } prodnames += tokdef res[id] = "" } if prodnames != "" { var g ebnf.Grammar ebnfSrc := grammar + fmt.Sprintf("\n%s = %s .", start, prodnames) if g, err = ebnf.Parse(start, bytes.NewBufferString(ebnfSrc)); err != nil { panic(err) } if err = ebnf.Verify(g, start); err != nil { panic(err) } grammarREs := map[*ebnf.Production]string{} for tokdef, id := range tokdefs { if isIdent(tokdef) { res[id], xref[id] = ebnf2RE(g, tokdef, grammarREs), tokdef } } } if starts == nil { // create the default, all inclusive start set starts = [][]int{{}} for id := range res { starts[0] = append(starts[0], id) } } lexer.accept = lexer.nfa.NewState() lexer.starts = make([]*NfaState, len(starts)) for i, set := range starts { state := lexer.nfa.NewState() lexer.starts[i] = state for _, id := range set { var in, out *NfaState re, ok := res[int(id)] if !ok { panic(fmt.Errorf("unknown token id %d in set %d", id, i)) } if in, out, err = lexer.nfa.ParseRE(fmt.Sprintf("%s-%s", start, xref[int(id)]), re); err != nil { panic(err) } state.AddNonConsuming(&EpsilonEdge{int(id), in}) out.AddNonConsuming(&EpsilonEdge{0, lexer.accept}) } } lexer.nfa.reduce() return }