func (t *Tree) Compile(out io.Writer, optiFlags string) { counts := [TypeLast]uint{} nvar := 0 O := parseOptiFlags(optiFlags) for element := t.Front(); element != nil; element = element.Next() { node := element.Value.(Node) switch node.GetType() { case TypeRule: rule := node.(*rule) t.rules[rule.String()] = rule nvar += len(rule.variables) } } for name, r := range t.rules { if r.name == "" { r := &rule{name: name, id: t.ruleId} t.ruleId++ t.rules[name] = r t.PushBack(r) } } join([]func(){ func() { var countTypes func(node Node) countTypes = func(node Node) { t := node.GetType() counts[t]++ switch t { case TypeRule: countTypes(node.(Rule).GetExpression()) case TypeAlternate, TypeUnorderedAlternate, TypeSequence: for element := node.(List).Front(); element != nil; element = element.Next() { countTypes(element.Value.(Node)) } case TypePeekFor, TypePeekNot, TypeQuery, TypeStar, TypePlus: countTypes(node.(List).Front().Value.(Node)) } } for _, rule := range t.rules { countTypes(rule) } }, func() { var countRules func(node Node) ruleReached := make([]bool, len(t.rules)) countRules = func(node Node) { switch node.GetType() { case TypeRule: rule := node.(Rule) name, id := rule.String(), rule.GetId() if count, ok := t.rulesCount[name]; ok { t.rulesCount[name] = count + 1 } else { t.rulesCount[name] = 1 } if ruleReached[id] { return } ruleReached[id] = true countRules(rule.GetExpression()) case TypeName: countRules(t.rules[node.String()]) case TypeAlternate, TypeUnorderedAlternate, TypeSequence: for element := node.(List).Front(); element != nil; element = element.Next() { countRules(element.Value.(Node)) } case TypePeekFor, TypePeekNot, TypeQuery, TypeStar, TypePlus: countRules(node.(List).Front().Value.(Node)) } } for element := t.Front(); element != nil; element = element.Next() { node := element.Value.(Node) if node.GetType() == TypeRule { countRules(node.(*rule)) break } } }, func() { var checkRecursion func(node Node) bool ruleReached := make([]bool, len(t.rules)) checkRecursion = func(node Node) bool { switch node.GetType() { case TypeRule: rule := node.(Rule) id := rule.GetId() if ruleReached[id] { fmt.Fprintf(os.Stderr, "possible infinite left recursion in rule '%v'\n", node) return false } ruleReached[id] = true consumes := checkRecursion(rule.GetExpression()) ruleReached[id] = false return consumes case TypeAlternate: for element := node.(List).Front(); element != nil; element = element.Next() { if !checkRecursion(element.Value.(Node)) { return false } } return true case TypeSequence: for element := node.(List).Front(); element != nil; element = element.Next() { if checkRecursion(element.Value.(Node)) { return true } } case TypeName: return checkRecursion(t.rules[node.String()]) case TypePlus: return checkRecursion(node.(List).Front().Value.(Node)) case TypeCharacter, TypeString: return len(node.String()) > 0 case TypeDot, TypeClass: return true } return false } for _, rule := range t.rules { checkRecursion(rule) } }}) var inlineLeafes func(node Node) Node inlineLeafes = func(node Node) (ret Node) { ret = node switch node.GetType() { case TypeRule: rule := node.(Rule) switch x := rule.GetExpression(); x.GetType() { case TypeCharacter, TypeDot, TypeClass, TypeString: ret = x case TypePlus, TypeStar, TypeQuery, TypePeekNot, TypePeekFor: switch x.(List).Front().Value.(Node).GetType() { case TypeCharacter, TypeDot, TypeClass, TypeString: ret = x } } case TypeName: r := t.rules[node.String()] x := inlineLeafes(r) if r != x { stats.inlineLeafs++ ret = x } case TypeSequence, TypeAlternate: for el := node.(List).Front(); el != nil; el = el.Next() { el.Value = inlineLeafes(el.Value.(Node)) } case TypePlus, TypeStar, TypeQuery, TypePeekNot, TypePeekFor: v := &node.(List).Front().Value *v = inlineLeafes((*v).(Node)) } return } if O.inlineLeafs { for _, rule := range t.rules { inlineLeafes(rule.GetExpression()) } } if t._switch { var optimizeAlternates func(node Node) (consumes, eof, peek bool, class *characterClass) cache := make([]struct { reached, consumes, eof, peek bool class *characterClass }, len(t.rules)) optimizeAlternates = func(node Node) (consumes, eof, peek bool, class *characterClass) { switch node.GetType() { case TypeRule: rule := node.(Rule) if t.switchExcl != nil && t.switchExcl[rule.String()] { return } cache := &cache[rule.GetId()] if cache.reached { consumes, eof, peek, class = cache.consumes, cache.eof, cache.peek, cache.class if class == nil { class = anyChar } return } cache.reached = true consumes, eof, peek, class = optimizeAlternates(rule.GetExpression()) cache.consumes, cache.eof, cache.peek, cache.class = consumes, eof, peek, class case TypeName: consumes, eof, peek, class = optimizeAlternates(t.rules[node.String()]) case TypeDot: consumes, class = true, new(characterClass) for index, _ := range *class { class[index] = 0xff } case TypeString, TypeCharacter: if node.String() == "" { consumes, class = true, anyChar return } consumes, class = true, new(characterClass) b := node.String()[0] if b == '\\' { b = node.String()[1] switch b { case 'a': b = '\a' /* bel */ case 'b': b = '\b' /* bs */ case 'f': b = '\f' /* ff */ case 'n': b = '\n' /* nl */ case 'r': b = '\r' /* cr */ case 't': b = '\t' /* ht */ case 'v': b = '\v' /* vt */ default: if s := node.String(); len(s) == 4 { b = (s[1]-'0')*64 + (s[2]-'0')*8 + s[3] - '0' } } } class.add(b) case TypeClass: consumes, class = true, t.Classes[node.String()].Class case TypeAlternate: consumes, peek, class = true, true, new(characterClass) alternate := node.(List) mconsumes, meof, mpeek, properties, c := consumes, eof, peek, make([]struct { intersects bool class *characterClass }, alternate.Len()), 0 empty := false for element := alternate.Front(); element != nil; element = element.Next() { mconsumes, meof, mpeek, properties[c].class = optimizeAlternates(element.Value.(Node)) consumes, eof, peek = consumes && mconsumes, eof || meof, peek && mpeek if properties[c].class != nil { class.union(properties[c].class) if properties[c].class.len() == 0 { empty = true } } c++ } if eof { break } intersections := 0 compare: for ai, a := range properties[0 : len(properties)-1] { for _, b := range properties[ai+1:] { for i, v := range *a.class { if (b.class[i] & v) != 0 { intersections++ properties[ai].intersects = true continue compare } } } } if empty { class = new(characterClass) consumes = false break } if intersections < len(properties) && len(properties) >= 2 { c, unordered, ordered, max := 0, &nodeList{Type: TypeUnorderedAlternate}, &nodeList{Type: TypeAlternate}, 0 for element := alternate.Front(); element != nil; element = element.Next() { if properties[c].intersects { ordered.PushBack(element.Value) } else { class := &token{Type: TypeClass, string: properties[c].class.String(), class: properties[c].class} sequence, predicate, length := &nodeList{Type: TypeSequence}, &nodeList{Type: TypePeekFor}, properties[c].class.len() predicate.PushBack(class) sequence.PushBack(predicate) sequence.PushBack(element.Value) if element.Value.(Node).GetType() == TypeString && element.Value.(Node).String() == "" { unordered.PushBack(sequence) } else if element.Value.(Node).GetType() == TypeNil { unordered.PushBack(sequence) } else if length > max { unordered.PushBack(sequence) max = length } else { unordered.PushFront(sequence) } } c++ } alternate.Init() if ordered.Len() == 0 { alternate.SetType(TypeUnorderedAlternate) for element := unordered.Front(); element != nil; element = element.Next() { alternate.PushBack(element.Value) } } else { for element := ordered.Front(); element != nil; element = element.Next() { alternate.PushBack(element.Value) } if unordered.Len() == 1 { alternate.PushBack(unordered.Front().Value.(List).Front().Next().Value) } else { alternate.PushBack(unordered) } } } case TypeSequence: sequence := node.(List) meof, classes, c, element := eof, make([]struct { peek bool class *characterClass }, sequence.Len()), 0, sequence.Front() for ; !consumes && element != nil; element, c = element.Next(), c+1 { consumes, meof, classes[c].peek, classes[c].class = optimizeAlternates(element.Value.(Node)) eof, peek = eof || meof, peek || classes[c].peek } eof, peek, class = !consumes && eof, !consumes && peek, new(characterClass) for c--; c >= 0; c-- { if classes[c].class != nil { if classes[c].peek { class.intersection(classes[c].class) } else { class.union(classes[c].class) } } } for ; element != nil; element = element.Next() { optimizeAlternates(element.Value.(Node)) } case TypePeekNot: peek = true // might be buggy _, eof, _, _ = optimizeAlternates(node.(List).Front().Value.(Node)) class = new(characterClass) eof = !eof class = class.copy() class.complement() case TypePeekFor: peek = true fallthrough case TypeQuery, TypeStar: _, eof, _, class = optimizeAlternates(node.(List).Front().Value.(Node)) case TypePlus: consumes, eof, peek, class = optimizeAlternates(node.(List).Front().Value.(Node)) case TypeAction, TypeNil: class = new(characterClass) } return } for element := t.Front(); element != nil; element = element.Next() { node := element.Value.(Node) if node.GetType() == TypeRule { optimizeAlternates(node.(*rule)) break } } } w := newWriter(out) w.elimRestore = O.elimRestore print := func(format string, a ...interface{}) { if !w.dryRun { fmt.Fprintf(w, format, a...) } } var printRule func(node Node) var compile func(expression Node, ko *label) (chgFlags, chgFlags) printRule = func(node Node) { switch node.GetType() { case TypeRule: print("%v <- ", node) expression := node.(Rule).GetExpression() if expression != nilNode { printRule(expression) } case TypeDot: print(".") case TypeName: print("%v", node) case TypeCharacter, TypeString: print("'%v'", node) case TypeClass: print("[%v]", node) case TypePredicate: print("&{%v}", node) case TypeAction: print("{%v}", node) case TypeCommit: print("commit") case TypeBegin: print("<") case TypeEnd: print(">") case TypeAlternate: print("(") list := node.(List) element := list.Front() printRule(element.Value.(Node)) for element = element.Next(); element != nil; element = element.Next() { print(" / ") printRule(element.Value.(Node)) } print(")") case TypeUnorderedAlternate: print("(") element := node.(List).Front() printRule(element.Value.(Node)) for element = element.Next(); element != nil; element = element.Next() { print(" | ") printRule(element.Value.(Node)) } print(")") case TypeSequence: print("(") element := node.(List).Front() printRule(element.Value.(Node)) for element = element.Next(); element != nil; element = element.Next() { print(" ") printRule(element.Value.(Node)) } print(")") case TypePeekFor: print("&") printRule(node.(List).Front().Value.(Node)) case TypePeekNot: print("!") printRule(node.(List).Front().Value.(Node)) case TypeQuery: printRule(node.(List).Front().Value.(Node)) print("?") case TypeStar: printRule(node.(List).Front().Value.(Node)) print("*") case TypePlus: printRule(node.(List).Front().Value.(Node)) print("+") default: fmt.Fprintf(os.Stderr, "illegal node type: %v\n", node.GetType()) } } compileExpression := func(rule *rule, ko *label) (cko, c*k chgFlags) { nvar := len(rule.variables) if nvar > 0 { w.lnPrint("doarg(yyPush, %d)", nvar) } cko, c*k = compile(rule.GetExpression(), ko) if nvar > 0 { w.lnPrint("doarg(yyPop, %d)", nvar) cko.thPos = true c*k.thPos = true } return } canCompilePeek := func(node Node, jumpIfTrue bool, label *label) bool { if !O.peek { return false } switch node.GetType() { case TypeDot: label.cJump(jumpIfTrue, "(position < len(p.Buffer))") stats.Peek.Dot++ case TypeCharacter: label.cJump(jumpIfTrue, "peekChar('%v')", node) stats.Peek.Char++ case TypeClass: label.cJump(jumpIfTrue, "peekClass(%d)", t.Classes[node.String()].Index) stats.Peek.Class++ case TypePredicate: label.cJump(jumpIfTrue, "(%v)", node) default: return false } return true } compile = func(node Node, ko *label) (chgko, chgok chgFlags) { updateFlags := func(cko, c*k chgFlags) (chgFlags, chgFlags) { chgko, chgok = updateChgFlags(chgko, chgok, cko, c*k) return chgko, chgok } switch node.GetType() { case TypeRule: fmt.Fprintf(os.Stderr, "internal error #1 (%v)\n", node) case TypeDot: ko.cJump(false, "matchDot()") stats.Match.Dot++ chgok.pos = true case TypeName: varp := node.(*name).varp name := node.String() rule := t.rules[name] if t.inline && t.rulesCount[name] == 1 { chgko, chgok = compileExpression(rule, ko) } else { ko.cJump(false, "p.rules[rule%s]()", rule.GoString()) if len(rule.variables) != 0 || rule.hasActions { chgok.thPos = true } chgok.pos = true // safe guess } if varp != nil { w.lnPrint("doarg(yySet, %d)", varp.offset) chgok.thPos = true } case TypeCharacter: ko.cJump(false, "matchChar('%v')", node) stats.Match.Char++ chgok.pos = true case TypeString: if s := node.String(); s != "" { ko.cJump(false, "matchString(\"%s\")", s) stats.Match.String++ chgok.pos = true } case TypeClass: ko.cJump(false, "matchClass(%d)", t.Classes[node.String()].Index) chgok.pos = true case TypePredicate: ko.cJump(false, "(%v)", node) case TypeAction: w.lnPrint("do(%d)", node.(Action).GetId()) chgok.thPos = true case TypeCommit: ko.cJump(false, "(commit(thunkPosition0))") chgko.thPos = true case TypeBegin: if t.Actions != nil { w.lnPrint("begin = position") } case TypeEnd: if t.Actions != nil { w.lnPrint("end = position") } case TypeAlternate: list := node.(List) ok := w.newLabel() element := list.Front() if ok.unsafe() { w.begin() ok.save() } var next *label for element.Next() != nil { next = w.newLabel() cko, _ := updateFlags(compile(element.Value.(Node), next)) ok.jump() if next.used { ok.lrestore(next, cko.pos, cko.thPos) } element = element.Next() } if next == nil || next.used { updateFlags(compile(element.Value.(Node), ko)) } if ok.unsafe() { w.end() } if ok.used { ok.label() } case TypeUnorderedAlternate: list := node.(List) done, ok := ko, w.newLabel() w.begin() done.cJump(true, "position == len(p.Buffer)") w.lnPrint("switch p.Buffer[position] {") element := list.Front() for ; element != nil; element = element.Next() { sequence := element.Value.(List).Front() class := sequence.Value.(List).Front().Value.(Node).(Token).GetClass() node := sequence.Next().Value.(Node) if element.Next() == nil { if class.len() > 2 { w.lnPrint("default:") w.indent++ updateFlags(compile(node, done)) w.indent-- break } } w.lnPrint("case") comma := false for d := 0; d < 256; d++ { if class.has(uint8(d)) { if comma { print(",") } s := "" switch uint8(d) { case '\a': s = `\a` /* bel */ case '\b': s = `\b` /* bs */ case '\f': s = `\f` /* ff */ case '\n': s = `\n` /* nl */ case '\r': s = `\r` /* cr */ case '\t': s = `\t` /* ht */ case '\v': s = `\v` /* vt */ case '\\': s = `\\` /* \ */ case '\'': s = `\'` /* ' */ default: switch { case d >= 0 && d < 32 || d >= 0x80: s = fmt.Sprintf("\\%03o", d) default: s = fmt.Sprintf("%c", d) } } print(" '%s'", s) comma = true } } print(":") w.indent++ if O.unorderedFirstItem { updateFlags(compileOptFirst(w, node, done, compile)) } else { updateFlags(compile(node, done)) } w.lnPrint("break") w.indent-- if element.Next() == nil { w.lnPrint("default:") w.indent++ done.jump() w.indent-- } } w.lnPrint("}") w.end() if ok.used { ok.label() } case TypeSequence: var cs []string var peek Type var element0 = node.(List).Front() if O.seqPeekNot { for el := element0; el != nil; el = el.Next() { sub := el.Value.(Node) switch typ := sub.GetType(); typ { case TypePeekNot: switch child := sub.(List).Front().Value.(Node); child.GetType() { case TypeCharacter: cs = append(cs, "'"+child.String()+"'") continue } case TypeDot: if len(cs) > 0 { peek = typ element0 = el.Next() } default: if len(cs) > 1 { peek = typ element0 = el } } break } } if peek != 0 { stats.seqIfNot++ ko.cJump(true, "position == len(p.Buffer)") w.lnPrint("switch p.Buffer[position] {") w.lnPrint("case %s:", strings.Join(cs, ", ")) w.indent++ ko.jump() w.indent-- w.lnPrint("default:") w.indent++ if peek == TypeDot { w.lnPrint("position++") chgok.pos = true } } for element := element0; element != nil; element = element.Next() { cko, c*k := compile(element.Value.(Node), ko) if element.Next() == nil { if chgok.pos { cko.pos = true } if chgok.thPos { cko.thPos = true } } updateFlags(cko, c*k) } if peek != 0 { w.indent-- w.lnPrint("}") } case TypePeekFor: sub := node.(List).Front().Value.(Node) if canCompilePeek(sub, false, ko) { return } l := w.newLabel() l.saveBlock() cko, c*k := compile(sub, ko) l.lrestore(nil, c*k.pos, c*k.thPos) chgko = cko case TypePeekNot: sub := node.(List).Front().Value.(Node) if canCompilePeek(sub, true, ko) { return } ok := w.newLabel() ok.saveBlock() cko, c*k := compile(sub, ok) ko.jump() if ok.used { ok.restore(cko.pos, cko.thPos) } chgko = c*k case TypeQuery: sub := node.(List).Front().Value.(Node) switch sub.GetType() { case TypeCharacter: w.lnPrint("matchChar('%v')", sub) chgok.pos = true return case TypeDot: w.lnPrint("matchDot()") chgok.pos = true return } qko := w.newLabel() qok := w.newLabel() qko.saveBlock() cko, c*k := compile(sub, qko) if qko.unsafe() { qok.jump() } if qko.used { qko.restore(cko.pos, cko.thPos) } if qko.unsafe() { qok.label() } chgok = c*k case TypeStar: again := w.newLabel() out := w.newLabel() again.label() out.saveBlock() cko, c*k := compile(node.(List).Front().Value.(Node), out) again.jump() out.restore(cko.pos, cko.thPos) chgok = c*k case TypePlus: again := w.newLabel() out := w.newLabel() updateFlags(compile(node.(List).Front().Value.(Node), ko)) again.label() out.saveBlock() cko, _ := compile(node.(List).Front().Value.(Node), out) again.jump() if out.used { out.restore(cko.pos, cko.thPos) } case TypeNil: default: fmt.Fprintf(os.Stderr, "illegal node type: %v\n", node.GetType()) } return } // dry compilation // figure out which items need to restore position resp. thunkPosition, // storing into w.saveFlags w.setDry(true) for element := t.Front(); element != nil; element = element.Next() { node := element.Value.(Node) if node.GetType() != TypeRule { continue } rule := node.(*rule) expression := rule.GetExpression() if expression == nilNode { continue } ko := w.newLabel() ko.sid = 0 if count, ok := t.rulesCount[rule.String()]; !ok { } else if t.inline && count == 1 && ko.id != 0 { continue } ko.save() cko, _ := compileExpression(rule, ko) if ko.used { ko.restore(cko.pos, cko.thPos) } } w.setDry(false) if Verbose { log.Printf("%+v\n", stats) } tpl := template.New("parser") tpl.Funcs(template.FuncMap{ "len": itemLength, "def": func(key string) string { return t.defines[key] }, "id": func(identifier string) string { if t.defines["noexport"] != "" { return identifier } return strings.Title(identifier) }, "stats": func() *statValues { return &stats }, "nvar": func() int { return nvar }, "numRules": func() int { return len(t.rules) }, "sortedRules": func() (r []*rule) { for el := t.Front(); el != nil; el = el.Next() { node := el.Value.(Node) if node.GetType() != TypeRule { continue } r = append(r, node.(*rule)) } return }, "hasCommit": func() bool { return counts[TypeCommit] > 0 }, "actionBits": func() (bits int) { for n := len(t.Actions); n != 0; n >>= 1 { bits++ } switch { case bits < 8: bits = 8 case bits < 16: bits = 16 case bits < 32: bits = 32 case bits < 64: bits = 64 } return }, }) if _, err := tpl.Parse(parserTemplate); err != nil { log.Fatal(err) } if err := tpl.Execute(w, t); err != nil { log.Fatal(err) } /* now for the real compile pass */ for element := t.Front(); element != nil; element = element.Next() { node := element.Value.(Node) if node.GetType() != TypeRule { continue } rule := node.(*rule) expression := rule.GetExpression() if expression == nilNode { fmt.Fprintf(os.Stderr, "rule '%v' used but not defined\n", rule) w.lnPrint("nil,") continue } ko := w.newLabel() ko.sid = 0 w.lnPrint("/* %v ", rule.GetId()) printRule(rule) print(" */") if count, ok := t.rulesCount[rule.String()]; !ok { fmt.Fprintf(os.Stderr, "rule '%v' defined but not used\n", rule) } else if t.inline && count == 1 && ko.id != 0 { w.lnPrint("nil,") continue } w.lnPrint("func() bool {") w.indent++ ko.save() cko, _ := compileExpression(rule, ko) w.lnPrint("return true") if ko.used { ko.restore(cko.pos, cko.thPos) w.lnPrint("return false") } w.indent-- w.lnPrint("},") } print("\n\t}") print("\n}\n") for _, s := range t.trailers { print("%s", s) } }