func (p *Parser) branch(jmp, target *obj.Prog) { jmp.To = obj.Addr{ Type: obj.TYPE_BRANCH, Index: 0, } jmp.To.Val = target }
func zerorange(p *obj.Prog, frame int64, lo int64, hi int64) *obj.Prog { cnt := hi - lo if cnt == 0 { return p } if cnt < int64(4*gc.Widthptr) { for i := int64(0); i < cnt; i += int64(gc.Widthptr) { p = appendpp(p, ppc64.AMOVD, obj.TYPE_REG, ppc64.REGZERO, 0, obj.TYPE_MEM, ppc64.REGSP, 8+frame+lo+i) } } else if cnt <= int64(128*gc.Widthptr) { p = appendpp(p, ppc64.AADD, obj.TYPE_CONST, 0, 8+frame+lo-8, obj.TYPE_REG, ppc64.REGRT1, 0) p.Reg = ppc64.REGSP p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0) f := gc.Sysfunc("duffzero") p.To = gc.Naddr(f) gc.Afunclit(&p.To, f) p.To.Offset = 4 * (128 - cnt/int64(gc.Widthptr)) } else { p = appendpp(p, ppc64.AMOVD, obj.TYPE_CONST, 0, 8+frame+lo-8, obj.TYPE_REG, ppc64.REGTMP, 0) p = appendpp(p, ppc64.AADD, obj.TYPE_REG, ppc64.REGTMP, 0, obj.TYPE_REG, ppc64.REGRT1, 0) p.Reg = ppc64.REGSP p = appendpp(p, ppc64.AMOVD, obj.TYPE_CONST, 0, cnt, obj.TYPE_REG, ppc64.REGTMP, 0) p = appendpp(p, ppc64.AADD, obj.TYPE_REG, ppc64.REGTMP, 0, obj.TYPE_REG, ppc64.REGRT2, 0) p.Reg = ppc64.REGRT1 p = appendpp(p, ppc64.AMOVDU, obj.TYPE_REG, ppc64.REGZERO, 0, obj.TYPE_MEM, ppc64.REGRT1, int64(gc.Widthptr)) p1 := p p = appendpp(p, ppc64.ACMP, obj.TYPE_REG, ppc64.REGRT1, 0, obj.TYPE_REG, ppc64.REGRT2, 0) p = appendpp(p, ppc64.ABNE, obj.TYPE_NONE, 0, 0, obj.TYPE_BRANCH, 0, 0) gc.Patch(p, p1) } return p }
func outcode(a int, g2 *Addr2) { var p *obj.Prog var pl *obj.Plist if asm.Pass == 1 { goto out } p = new(obj.Prog) *p = obj.Prog{} p.Ctxt = asm.Ctxt p.As = int16(a) p.Lineno = stmtline p.From = g2.from p.To = g2.to p.Pc = int64(asm.PC) if lastpc == nil { pl = obj.Linknewplist(asm.Ctxt) pl.Firstpc = p } else { lastpc.Link = p } lastpc = p out: if a != obj.AGLOBL && a != obj.ADATA { asm.PC++ } }
func outgcode(a int, g1 *obj.Addr, reg int, g2, g3 *obj.Addr) { var p *obj.Prog var pl *obj.Plist if asm.Pass == 1 { goto out } p = asm.Ctxt.NewProg() p.As = int16(a) p.Lineno = stmtline if nosched != 0 { p.Mark |= ppc64.NOSCHED } p.From = *g1 p.Reg = int16(reg) p.From3 = *g2 p.To = *g3 p.Pc = int64(asm.PC) if lastpc == nil { pl = obj.Linknewplist(asm.Ctxt) pl.Firstpc = p } else { lastpc.Link = p } lastpc = p out: if a != obj.AGLOBL && a != obj.ADATA { asm.PC++ } }
func outcode(a int, g1 *obj.Addr, reg int, g2 *obj.Addr) { var p *obj.Prog var pl *obj.Plist if asm.Pass == 1 { goto out } if g1.Scale != 0 { if reg != 0 || g2.Scale != 0 { yyerror("bad addressing modes") } reg = int(g1.Scale) } else if g2.Scale != 0 { if reg != 0 { yyerror("bad addressing modes") } reg = int(g2.Scale) } p = asm.Ctxt.NewProg() p.As = int16(a) p.Lineno = stmtline if nosched != 0 { p.Mark |= ppc64.NOSCHED } p.From = *g1 p.Reg = int16(reg) p.To = *g2 p.Pc = int64(asm.PC) if lastpc == nil { pl = obj.Linknewplist(asm.Ctxt) pl.Firstpc = p } else { lastpc.Link = p } lastpc = p out: if a != obj.AGLOBL && a != obj.ADATA { asm.PC++ } }
func outcode(a, scond int32, g1 *obj.Addr, reg int32, g2 *obj.Addr) { var p *obj.Prog var pl *obj.Plist /* hack to make B.NE etc. work: turn it into the corresponding conditional */ if a == arm.AB { a = int32(bcode[(scond^arm.C_SCOND_XOR)&0xf]) scond = (scond &^ 0xf) | Always } if asm.Pass == 1 { goto out } p = new(obj.Prog) *p = obj.Prog{} p.Ctxt = asm.Ctxt p.As = int16(a) p.Lineno = stmtline p.Scond = uint8(scond) p.From = *g1 p.Reg = int16(reg) p.To = *g2 p.Pc = int64(asm.PC) if lastpc == nil { pl = obj.Linknewplist(asm.Ctxt) pl.Firstpc = p } else { lastpc.Link = p } lastpc = p out: if a != obj.AGLOBL && a != obj.ADATA { asm.PC++ } }
func zerorange(p *obj.Prog, frame int64, lo int64, hi int64, r0 *uint32) *obj.Prog { cnt := hi - lo if cnt == 0 { return p } if *r0 == 0 { p = appendpp(p, arm.AMOVW, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, arm.REG_R0, 0) *r0 = 1 } if cnt < int64(4*gc.Widthptr) { for i := int64(0); i < cnt; i += int64(gc.Widthptr) { p = appendpp(p, arm.AMOVW, obj.TYPE_REG, arm.REG_R0, 0, obj.TYPE_MEM, arm.REGSP, int32(4+frame+lo+i)) } } else if !gc.Nacl && (cnt <= int64(128*gc.Widthptr)) { p = appendpp(p, arm.AADD, obj.TYPE_CONST, 0, int32(4+frame+lo), obj.TYPE_REG, arm.REG_R1, 0) p.Reg = arm.REGSP p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0) f := gc.Sysfunc("duffzero") p.To = gc.Naddr(f) gc.Afunclit(&p.To, f) p.To.Offset = 4 * (128 - cnt/int64(gc.Widthptr)) } else { p = appendpp(p, arm.AADD, obj.TYPE_CONST, 0, int32(4+frame+lo), obj.TYPE_REG, arm.REG_R1, 0) p.Reg = arm.REGSP p = appendpp(p, arm.AADD, obj.TYPE_CONST, 0, int32(cnt), obj.TYPE_REG, arm.REG_R2, 0) p.Reg = arm.REG_R1 p = appendpp(p, arm.AMOVW, obj.TYPE_REG, arm.REG_R0, 0, obj.TYPE_MEM, arm.REG_R1, 4) p1 := p p.Scond |= arm.C_PBIT p = appendpp(p, arm.ACMP, obj.TYPE_REG, arm.REG_R1, 0, obj.TYPE_NONE, 0, 0) p.Reg = arm.REG_R2 p = appendpp(p, arm.ABNE, obj.TYPE_NONE, 0, 0, obj.TYPE_BRANCH, 0, 0) gc.Patch(p, p1) } return p }
/* * generate: * res = n; * simplifies and calls gmove. */ func cgen(n *gc.Node, res *gc.Node) { //print("cgen %N(%d) -> %N(%d)\n", n, n->addable, res, res->addable); if gc.Debug['g'] != 0 { gc.Dump("\ncgen-n", n) gc.Dump("cgen-res", res) } if n == nil || n.Type == nil { return } if res == nil || res.Type == nil { gc.Fatal("cgen: res nil") } for n.Op == gc.OCONVNOP { n = n.Left } switch n.Op { case gc.OSLICE, gc.OSLICEARR, gc.OSLICESTR, gc.OSLICE3, gc.OSLICE3ARR: if res.Op != gc.ONAME || res.Addable == 0 { var n1 gc.Node gc.Tempname(&n1, n.Type) gc.Cgen_slice(n, &n1) cgen(&n1, res) } else { gc.Cgen_slice(n, res) } return case gc.OEFACE: if res.Op != gc.ONAME || res.Addable == 0 { var n1 gc.Node gc.Tempname(&n1, n.Type) gc.Cgen_eface(n, &n1) cgen(&n1, res) } else { gc.Cgen_eface(n, res) } return } if n.Ullman >= gc.UINF { if n.Op == gc.OINDREG { gc.Fatal("cgen: this is going to misscompile") } if res.Ullman >= gc.UINF { var n1 gc.Node gc.Tempname(&n1, n.Type) cgen(n, &n1) cgen(&n1, res) return } } if gc.Isfat(n.Type) { if n.Type.Width < 0 { gc.Fatal("forgot to compute width for %v", gc.Tconv(n.Type, 0)) } sgen(n, res, n.Type.Width) return } if res.Addable == 0 { if n.Ullman > res.Ullman { var n1 gc.Node regalloc(&n1, n.Type, res) cgen(n, &n1) if n1.Ullman > res.Ullman { gc.Dump("n1", &n1) gc.Dump("res", res) gc.Fatal("loop in cgen") } cgen(&n1, res) regfree(&n1) return } var f int if res.Ullman >= gc.UINF { goto gen } if gc.Complexop(n, res) { gc.Complexgen(n, res) return } f = 1 // gen thru register switch n.Op { case gc.OLITERAL: if gc.Smallintconst(n) { f = 0 } case gc.OREGISTER: f = 0 } if !gc.Iscomplex[n.Type.Etype] { a := optoas(gc.OAS, res.Type) var addr obj.Addr if sudoaddable(a, res, &addr) { var p1 *obj.Prog if f != 0 { var n2 gc.Node regalloc(&n2, res.Type, nil) cgen(n, &n2) p1 = gins(a, &n2, nil) regfree(&n2) } else { p1 = gins(a, n, nil) } p1.To = addr if gc.Debug['g'] != 0 { fmt.Printf("%v [ignore previous line]\n", p1) } sudoclean() return } } gen: var n1 gc.Node igen(res, &n1, nil) cgen(n, &n1) regfree(&n1) return } // update addressability for string, slice // can't do in walk because n->left->addable // changes if n->left is an escaping local variable. switch n.Op { case gc.OSPTR, gc.OLEN: if gc.Isslice(n.Left.Type) || gc.Istype(n.Left.Type, gc.TSTRING) { n.Addable = n.Left.Addable } case gc.OCAP: if gc.Isslice(n.Left.Type) { n.Addable = n.Left.Addable } case gc.OITAB: n.Addable = n.Left.Addable } if gc.Complexop(n, res) { gc.Complexgen(n, res) return } // if both are addressable, move if n.Addable != 0 { if n.Op == gc.OREGISTER || res.Op == gc.OREGISTER { gmove(n, res) } else { var n1 gc.Node regalloc(&n1, n.Type, nil) gmove(n, &n1) cgen(&n1, res) regfree(&n1) } return } nl := n.Left nr := n.Right if nl != nil && nl.Ullman >= gc.UINF { if nr != nil && nr.Ullman >= gc.UINF { var n1 gc.Node gc.Tempname(&n1, nl.Type) cgen(nl, &n1) n2 := *n n2.Left = &n1 cgen(&n2, res) return } } if !gc.Iscomplex[n.Type.Etype] { a := optoas(gc.OAS, n.Type) var addr obj.Addr if sudoaddable(a, n, &addr) { if res.Op == gc.OREGISTER { p1 := gins(a, nil, res) p1.From = addr } else { var n2 gc.Node regalloc(&n2, n.Type, nil) p1 := gins(a, nil, &n2) p1.From = addr gins(a, &n2, res) regfree(&n2) } sudoclean() return } } // TODO(minux): we shouldn't reverse FP comparisons, but then we need to synthesize // OGE, OLE, and ONE ourselves. // if(nl != N && isfloat[n->type->etype] && isfloat[nl->type->etype]) goto flt; var a int switch n.Op { default: gc.Dump("cgen", n) gc.Fatal("cgen: unknown op %v", gc.Nconv(n, obj.FmtShort|obj.FmtSign)) // these call bgen to get a bool value case gc.OOROR, gc.OANDAND, gc.OEQ, gc.ONE, gc.OLT, gc.OLE, gc.OGE, gc.OGT, gc.ONOT: p1 := gc.Gbranch(ppc64.ABR, nil, 0) p2 := gc.Pc gmove(gc.Nodbool(true), res) p3 := gc.Gbranch(ppc64.ABR, nil, 0) gc.Patch(p1, gc.Pc) bgen(n, true, 0, p2) gmove(gc.Nodbool(false), res) gc.Patch(p3, gc.Pc) return case gc.OPLUS: cgen(nl, res) return // unary case gc.OCOM: a := optoas(gc.OXOR, nl.Type) var n1 gc.Node regalloc(&n1, nl.Type, nil) cgen(nl, &n1) var n2 gc.Node gc.Nodconst(&n2, nl.Type, -1) gins(a, &n2, &n1) gmove(&n1, res) regfree(&n1) return case gc.OMINUS: if gc.Isfloat[nl.Type.Etype] { nr = gc.Nodintconst(-1) gc.Convlit(&nr, n.Type) a = optoas(gc.OMUL, nl.Type) goto sbop } a := optoas(int(n.Op), nl.Type) // unary var n1 gc.Node regalloc(&n1, nl.Type, res) cgen(nl, &n1) gins(a, nil, &n1) gmove(&n1, res) regfree(&n1) return // symmetric binary case gc.OAND, gc.OOR, gc.OXOR, gc.OADD, gc.OMUL: a = optoas(int(n.Op), nl.Type) goto sbop // asymmetric binary case gc.OSUB: a = optoas(int(n.Op), nl.Type) goto abop case gc.OHMUL: cgen_hmul(nl, nr, res) case gc.OCONV: if n.Type.Width > nl.Type.Width { // If loading from memory, do conversion during load, // so as to avoid use of 8-bit register in, say, int(*byteptr). switch nl.Op { case gc.ODOT, gc.ODOTPTR, gc.OINDEX, gc.OIND, gc.ONAME: var n1 gc.Node igen(nl, &n1, res) var n2 gc.Node regalloc(&n2, n.Type, res) gmove(&n1, &n2) gmove(&n2, res) regfree(&n2) regfree(&n1) return } } var n1 gc.Node regalloc(&n1, nl.Type, res) var n2 gc.Node regalloc(&n2, n.Type, &n1) cgen(nl, &n1) // if we do the conversion n1 -> n2 here // reusing the register, then gmove won't // have to allocate its own register. gmove(&n1, &n2) gmove(&n2, res) regfree(&n2) regfree(&n1) case gc.ODOT, gc.ODOTPTR, gc.OINDEX, gc.OIND, gc.ONAME: // PHEAP or PPARAMREF var var n1 gc.Node igen(n, &n1, res) gmove(&n1, res) regfree(&n1) // interface table is first word of interface value case gc.OITAB: var n1 gc.Node igen(nl, &n1, res) n1.Type = n.Type gmove(&n1, res) regfree(&n1) // pointer is the first word of string or slice. case gc.OSPTR: if gc.Isconst(nl, gc.CTSTR) { var n1 gc.Node regalloc(&n1, gc.Types[gc.Tptr], res) p1 := gins(ppc64.AMOVD, nil, &n1) gc.Datastring(nl.Val.U.Sval, &p1.From) gmove(&n1, res) regfree(&n1) break } var n1 gc.Node igen(nl, &n1, res) n1.Type = n.Type gmove(&n1, res) regfree(&n1) case gc.OLEN: if gc.Istype(nl.Type, gc.TMAP) || gc.Istype(nl.Type, gc.TCHAN) { // map and chan have len in the first int-sized word. // a zero pointer means zero length var n1 gc.Node regalloc(&n1, gc.Types[gc.Tptr], res) cgen(nl, &n1) var n2 gc.Node gc.Nodconst(&n2, gc.Types[gc.Tptr], 0) gins(optoas(gc.OCMP, gc.Types[gc.Tptr]), &n1, &n2) p1 := gc.Gbranch(optoas(gc.OEQ, gc.Types[gc.Tptr]), nil, 0) n2 = n1 n2.Op = gc.OINDREG n2.Type = gc.Types[gc.Simtype[gc.TINT]] gmove(&n2, &n1) gc.Patch(p1, gc.Pc) gmove(&n1, res) regfree(&n1) break } if gc.Istype(nl.Type, gc.TSTRING) || gc.Isslice(nl.Type) { // both slice and string have len one pointer into the struct. // a zero pointer means zero length var n1 gc.Node igen(nl, &n1, res) n1.Type = gc.Types[gc.Simtype[gc.TUINT]] n1.Xoffset += int64(gc.Array_nel) gmove(&n1, res) regfree(&n1) break } gc.Fatal("cgen: OLEN: unknown type %v", gc.Tconv(nl.Type, obj.FmtLong)) case gc.OCAP: if gc.Istype(nl.Type, gc.TCHAN) { // chan has cap in the second int-sized word. // a zero pointer means zero length var n1 gc.Node regalloc(&n1, gc.Types[gc.Tptr], res) cgen(nl, &n1) var n2 gc.Node gc.Nodconst(&n2, gc.Types[gc.Tptr], 0) gins(optoas(gc.OCMP, gc.Types[gc.Tptr]), &n1, &n2) p1 := gc.Gbranch(optoas(gc.OEQ, gc.Types[gc.Tptr]), nil, 0) n2 = n1 n2.Op = gc.OINDREG n2.Xoffset = int64(gc.Widthint) n2.Type = gc.Types[gc.Simtype[gc.TINT]] gmove(&n2, &n1) gc.Patch(p1, gc.Pc) gmove(&n1, res) regfree(&n1) break } if gc.Isslice(nl.Type) { var n1 gc.Node igen(nl, &n1, res) n1.Type = gc.Types[gc.Simtype[gc.TUINT]] n1.Xoffset += int64(gc.Array_cap) gmove(&n1, res) regfree(&n1) break } gc.Fatal("cgen: OCAP: unknown type %v", gc.Tconv(nl.Type, obj.FmtLong)) case gc.OADDR: if n.Bounded { // let race detector avoid nil checks gc.Disable_checknil++ } agen(nl, res) if n.Bounded { gc.Disable_checknil-- } case gc.OCALLMETH: gc.Cgen_callmeth(n, 0) cgen_callret(n, res) case gc.OCALLINTER: cgen_callinter(n, res, 0) cgen_callret(n, res) case gc.OCALLFUNC: cgen_call(n, 0) cgen_callret(n, res) case gc.OMOD, gc.ODIV: if gc.Isfloat[n.Type.Etype] { a = optoas(int(n.Op), nl.Type) goto abop } if nl.Ullman >= nr.Ullman { var n1 gc.Node regalloc(&n1, nl.Type, res) cgen(nl, &n1) cgen_div(int(n.Op), &n1, nr, res) regfree(&n1) } else { var n2 gc.Node if !gc.Smallintconst(nr) { regalloc(&n2, nr.Type, res) cgen(nr, &n2) } else { n2 = *nr } cgen_div(int(n.Op), nl, &n2, res) if n2.Op != gc.OLITERAL { regfree(&n2) } } case gc.OLSH, gc.ORSH, gc.OLROT: cgen_shift(int(n.Op), n.Bounded, nl, nr, res) } return /* * put simplest on right - we'll generate into left * and then adjust it using the computation of right. * constants and variables have the same ullman * count, so look for constants specially. * * an integer constant we can use as an immediate * is simpler than a variable - we can use the immediate * in the adjustment instruction directly - so it goes * on the right. * * other constants, like big integers or floating point * constants, require a mov into a register, so those * might as well go on the left, so we can reuse that * register for the computation. */ sbop: // symmetric binary if nl.Ullman < nr.Ullman || (nl.Ullman == nr.Ullman && (gc.Smallintconst(nl) || (nr.Op == gc.OLITERAL && !gc.Smallintconst(nr)))) { r := nl nl = nr nr = r } abop: // asymmetric binary var n1 gc.Node var n2 gc.Node if nl.Ullman >= nr.Ullman { regalloc(&n1, nl.Type, res) cgen(nl, &n1) /* * This generates smaller code - it avoids a MOV - but it's * easily 10% slower due to not being able to * optimize/manipulate the move. * To see, run: go test -bench . crypto/md5 * with and without. * if(sudoaddable(a, nr, &addr)) { p1 = gins(a, N, &n1); p1->from = addr; gmove(&n1, res); sudoclean(); regfree(&n1); goto ret; } * */ // TODO(minux): enable using constants directly in certain instructions. //if(smallintconst(nr)) // n2 = *nr; //else { regalloc(&n2, nr.Type, nil) cgen(nr, &n2) } else //} { //if(smallintconst(nr)) // n2 = *nr; //else { regalloc(&n2, nr.Type, res) cgen(nr, &n2) //} regalloc(&n1, nl.Type, nil) cgen(nl, &n1) } gins(a, &n2, &n1) // Normalize result for types smaller than word. if n.Type.Width < int64(gc.Widthreg) { switch n.Op { case gc.OADD, gc.OSUB, gc.OMUL, gc.OLSH: gins(optoas(gc.OAS, n.Type), &n1, &n1) } } gmove(&n1, res) regfree(&n1) if n2.Op != gc.OLITERAL { regfree(&n2) } return }
func mergetemp(firstp *obj.Prog) { const ( debugmerge = 1 ) g := Flowstart(firstp, nil) if g == nil { return } // Build list of all mergeable variables. nvar := 0 for l := Curfn.Dcl; l != nil; l = l.Next { if canmerge(l.N) { nvar++ } } var_ := make([]TempVar, nvar) nvar = 0 var n *Node var v *TempVar for l := Curfn.Dcl; l != nil; l = l.Next { n = l.N if canmerge(n) { v = &var_[nvar] nvar++ n.Opt = v v.node = n } } // Build list of uses. // We assume that the earliest reference to a temporary is its definition. // This is not true of variables in general but our temporaries are all // single-use (that's why we have so many!). var p *obj.Prog var info ProgInfo for f := g.Start; f != nil; f = f.Link { p = f.Prog info = Thearch.Proginfo(p) if p.From.Node != nil && ((p.From.Node).(*Node)).Opt != nil && p.To.Node != nil && ((p.To.Node).(*Node)).Opt != nil { Fatal("double node %v", p) } v = nil n, _ = p.From.Node.(*Node) if n != nil { v, _ = n.Opt.(*TempVar) } if v == nil { n, _ = p.To.Node.(*Node) if n != nil { v, _ = n.Opt.(*TempVar) } } if v != nil { if v.def == nil { v.def = f } f.Data = v.use v.use = f if n == p.From.Node && (info.Flags&LeftAddr != 0) { v.addr = 1 } } } if debugmerge > 1 && Debug['v'] != 0 { Dumpit("before", g.Start, 0) } nkill := 0 // Special case. var p1 *obj.Prog var info1 ProgInfo var f *Flow for i := 0; i < len(var_); i++ { v = &var_[i] if v.addr != 0 { continue } // Used in only one instruction, which had better be a write. f = v.use if f != nil && f.Data.(*Flow) == nil { p = f.Prog info = Thearch.Proginfo(p) if p.To.Node == v.node && (info.Flags&RightWrite != 0) && info.Flags&RightRead == 0 { p.As = obj.ANOP p.To = obj.Addr{} v.removed = 1 if debugmerge > 0 && Debug['v'] != 0 { fmt.Printf("drop write-only %v\n", Sconv(v.node.Sym, 0)) } } else { Fatal("temp used and not set: %v", p) } nkill++ continue } // Written in one instruction, read in the next, otherwise unused, // no jumps to the next instruction. Happens mainly in 386 compiler. f = v.use if f != nil && f.Link == f.Data.(*Flow) && (f.Data.(*Flow)).Data.(*Flow) == nil && Uniqp(f.Link) == f { p = f.Prog info = Thearch.Proginfo(p) p1 = f.Link.Prog info1 = Thearch.Proginfo(p1) const ( SizeAny = SizeB | SizeW | SizeL | SizeQ | SizeF | SizeD ) if p.From.Node == v.node && p1.To.Node == v.node && (info.Flags&Move != 0) && (info.Flags|info1.Flags)&(LeftAddr|RightAddr) == 0 && info.Flags&SizeAny == info1.Flags&SizeAny { p1.From = p.From Thearch.Excise(f) v.removed = 1 if debugmerge > 0 && Debug['v'] != 0 { fmt.Printf("drop immediate-use %v\n", Sconv(v.node.Sym, 0)) } } nkill++ continue } } // Traverse live range of each variable to set start, end. // Each flood uses a new value of gen so that we don't have // to clear all the r->active words after each variable. gen := int32(0) for i := 0; i < len(var_); i++ { v = &var_[i] gen++ for f = v.use; f != nil; f = f.Data.(*Flow) { mergewalk(v, f, uint32(gen)) } if v.addr != 0 { gen++ for f = v.use; f != nil; f = f.Data.(*Flow) { varkillwalk(v, f, uint32(gen)) } } } // Sort variables by start. bystart := make([]*TempVar, len(var_)) for i := 0; i < len(var_); i++ { bystart[i] = &var_[i] } sort.Sort(startcmp(bystart[:len(var_)])) // List of in-use variables, sorted by end, so that the ones that // will last the longest are the earliest ones in the array. // The tail inuse[nfree:] holds no-longer-used variables. // In theory we should use a sorted tree so that insertions are // guaranteed O(log n) and then the loop is guaranteed O(n log n). // In practice, it doesn't really matter. inuse := make([]*TempVar, len(var_)) ninuse := 0 nfree := len(var_) var t *Type var v1 *TempVar var j int for i := 0; i < len(var_); i++ { v = bystart[i] if debugmerge > 0 && Debug['v'] != 0 { fmt.Printf("consider %v: removed=%d\n", Nconv(v.node, obj.FmtSharp), v.removed) } if v.removed != 0 { continue } // Expire no longer in use. for ninuse > 0 && inuse[ninuse-1].end < v.start { ninuse-- v1 = inuse[ninuse] nfree-- inuse[nfree] = v1 } if debugmerge > 0 && Debug['v'] != 0 { fmt.Printf("consider %v: removed=%d nfree=%d nvar=%d\n", Nconv(v.node, obj.FmtSharp), v.removed, nfree, len(var_)) } // Find old temp to reuse if possible. t = v.node.Type for j = nfree; j < len(var_); j++ { v1 = inuse[j] if debugmerge > 0 && Debug['v'] != 0 { fmt.Printf("consider %v: maybe %v: type=%v,%v addrtaken=%v,%v\n", Nconv(v.node, obj.FmtSharp), Nconv(v1.node, obj.FmtSharp), Tconv(t, 0), Tconv(v1.node.Type, 0), v.node.Addrtaken, v1.node.Addrtaken) } // Require the types to match but also require the addrtaken bits to match. // If a variable's address is taken, that disables registerization for the individual // words of the variable (for example, the base,len,cap of a slice). // We don't want to merge a non-addressed var with an addressed one and // inhibit registerization of the former. if Eqtype(t, v1.node.Type) && v.node.Addrtaken == v1.node.Addrtaken { inuse[j] = inuse[nfree] nfree++ if v1.merge != nil { v.merge = v1.merge } else { v.merge = v1 } nkill++ break } } // Sort v into inuse. j = ninuse ninuse++ for j > 0 && inuse[j-1].end < v.end { inuse[j] = inuse[j-1] j-- } inuse[j] = v } if debugmerge > 0 && Debug['v'] != 0 { fmt.Printf("%v [%d - %d]\n", Sconv(Curfn.Nname.Sym, 0), len(var_), nkill) var v *TempVar for i := 0; i < len(var_); i++ { v = &var_[i] fmt.Printf("var %v %v %d-%d", Nconv(v.node, obj.FmtSharp), Tconv(v.node.Type, 0), v.start, v.end) if v.addr != 0 { fmt.Printf(" addr=1") } if v.removed != 0 { fmt.Printf(" dead=1") } if v.merge != nil { fmt.Printf(" merge %v", Nconv(v.merge.node, obj.FmtSharp)) } if v.start == v.end && v.def != nil { fmt.Printf(" %v", v.def.Prog) } fmt.Printf("\n") } if debugmerge > 1 && Debug['v'] != 0 { Dumpit("after", g.Start, 0) } } // Update node references to use merged temporaries. for f := g.Start; f != nil; f = f.Link { p = f.Prog n, _ = p.From.Node.(*Node) if n != nil { v, _ = n.Opt.(*TempVar) if v != nil && v.merge != nil { p.From.Node = v.merge.node } } n, _ = p.To.Node.(*Node) if n != nil { v, _ = n.Opt.(*TempVar) if v != nil && v.merge != nil { p.To.Node = v.merge.node } } } // Delete merged nodes from declaration list. var l *NodeList for lp := &Curfn.Dcl; ; { l = *lp if l == nil { break } Curfn.Dcl.End = l n = l.N v, _ = n.Opt.(*TempVar) if v != nil && (v.merge != nil || v.removed != 0) { *lp = l.Next continue } lp = &l.Next } // Clear aux structures. for i := 0; i < len(var_); i++ { var_[i].node.Opt = nil } Flowend(g) }
func peep(firstp *obj.Prog) { g := (*gc.Graph)(gc.Flowstart(firstp, nil)) if g == nil { return } gactive = 0 var p *obj.Prog if false { // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim for r := (*gc.Flow)(g.Start); r != nil; r = r.Link { p = r.Prog switch p.As { case s390x.AMOVB, s390x.AMOVW, s390x.AMOVD: if regtyp(&p.To) { if p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_FCONST { conprop(r) } } } } } var r *gc.Flow var t int loop1: // if gc.Debug['P'] != 0 && gc.Debug['v'] != 0 { // gc.Dumpit("loop1", g.Start, 0) // } t = 0 for r = g.Start; r != nil; r = r.Link { p = r.Prog // TODO(austin) Handle smaller moves. arm and amd64 // distinguish between moves that moves that *must* // sign/zero extend and moves that don't care so they // can eliminate moves that don't care without // breaking moves that do care. This might let us // simplify or remove the next peep loop, too. if p.As == s390x.AMOVD || p.As == s390x.AFMOVD { if regtyp(&p.To) { // Try to eliminate reg->reg moves if regtyp(&p.From) { if p.From.Type == p.To.Type { if copyprop(r) { excise(r) t++ } else if subprop(r) && copyprop(r) { excise(r) t++ } } } // Convert uses to $0 to uses of R0 and // propagate R0 if regzer(&p.From) != 0 { if p.To.Type == obj.TYPE_REG { p.From.Type = obj.TYPE_REG p.From.Reg = s390x.REGZERO if copyprop(r) { excise(r) t++ } else if subprop(r) && copyprop(r) { excise(r) t++ } } } } } } if t != 0 { goto loop1 } if gc.Debug['P'] != 0 && gc.Debug['v'] != 0 { gc.Dumpit("pass7 copyprop", g.Start, 0) } /* * look for MOVB x,R; MOVB R,R (for small MOVs not handled above) */ var p1 *obj.Prog var r1 *gc.Flow for r := (*gc.Flow)(g.Start); r != nil; r = r.Link { p = r.Prog switch p.As { default: continue case s390x.AMOVH, s390x.AMOVHZ, s390x.AMOVB, s390x.AMOVBZ, s390x.AMOVW, s390x.AMOVWZ: if p.To.Type != obj.TYPE_REG { continue } } r1 = r.Link if r1 == nil { continue } p1 = r1.Prog if p1.As != p.As { continue } if p1.From.Type != obj.TYPE_REG || p1.From.Reg != p.To.Reg { continue } if p1.To.Type != obj.TYPE_REG || p1.To.Reg != p.To.Reg { continue } excise(r1) } if gc.Debug['P'] > 1 { goto ret /* allow following code improvement to be suppressed */ } if gc.Debug['p'] == 0 { // load pipelining // push any load from memory as early as possible // to give it time to complete before use. for r := (*gc.Flow)(g.Start); r != nil; r = r.Link { p = r.Prog switch p.As { case s390x.AMOVB, s390x.AMOVW, s390x.AMOVD: if regtyp(&p.To) && !regconsttyp(&p.From) { pushback(r) } } } if gc.Debug['P'] != 0 && gc.Debug['v'] != 0 { gc.Dumpit("pass8 push load as early as possible", g.Start, 0) } } /* * look for OP a, b, c; MOV c, d; -> OP a, b, d; */ for r := (*gc.Flow)(g.Start); r != nil; r = r.Link { if (gc.Debugmergeopmv != -1) && (mergeopmv_cnt >= gc.Debugmergeopmv) { break } p = r.Prog switch p.As { case s390x.AADD, s390x.AADDC, s390x.AADDME, s390x.AADDE, s390x.AADDZE, s390x.AAND, s390x.AANDN, s390x.ADIVW, s390x.ADIVWU, s390x.ADIVD, s390x.ADIVDU, s390x.AMULLW, s390x.AMULHD, s390x.AMULHDU, s390x.AMULLD, s390x.ANAND, s390x.ANOR, s390x.AOR, s390x.AORN, s390x.AREM, s390x.AREMU, s390x.AREMD, s390x.AREMDU, s390x.ARLWMI, s390x.ARLWNM, s390x.ASLW, s390x.ASRAW, s390x.ASRW, s390x.ASLD, s390x.ASRAD, s390x.ASRD, s390x.ASUB, s390x.ASUBC, s390x.ASUBME, s390x.ASUBE, s390x.ASUBZE, s390x.AXOR: if p.To.Type != obj.TYPE_REG { continue } if p.Reg == 0 { // Only for 3 ops instruction continue } default: continue } r1 := r.Link for ; r1 != nil; r1 = r1.Link { if r1.Prog.As != obj.ANOP { break } } if r1 == nil { continue } p1 := r1.Prog switch p1.As { case s390x.AMOVD, s390x.AMOVW, s390x.AMOVWZ, s390x.AMOVH, s390x.AMOVHZ, s390x.AMOVB, s390x.AMOVBZ: if p1.To.Type != obj.TYPE_REG { continue } default: continue } if p1.From.Type != obj.TYPE_REG || p1.From.Reg != p.To.Reg { continue } if trymergeopmv(r1) { p.To = p1.To excise(r1) mergeopmv_cnt += 1 } } if gc.Debug['v'] != 0 { gc.Dumpit("Merge operation and move", g.Start, 0) } /* * look for CMP x, y; Branch -> Compare and branch */ if gc.Debugcnb == 0 { goto ret } for r := (*gc.Flow)(g.Start); r != nil; r = r.Link { if (gc.Debugcnb != -1) && (cnb_cnt >= gc.Debugcnb) { break } p = r.Prog r1 = gc.Uniqs(r) if r1 == nil { continue } p1 = r1.Prog switch p.As { case s390x.ACMP: switch p1.As { case s390x.ABCL, s390x.ABC: continue case s390x.ABEQ: t = s390x.ACMPBEQ case s390x.ABGE: t = s390x.ACMPBGE case s390x.ABGT: t = s390x.ACMPBGT case s390x.ABLE: t = s390x.ACMPBLE case s390x.ABLT: t = s390x.ACMPBLT case s390x.ABNE: t = s390x.ACMPBNE default: continue } case s390x.ACMPU: switch p1.As { case s390x.ABCL, s390x.ABC: continue case s390x.ABEQ: t = s390x.ACMPUBEQ case s390x.ABGE: t = s390x.ACMPUBGE case s390x.ABGT: t = s390x.ACMPUBGT case s390x.ABLE: t = s390x.ACMPUBLE case s390x.ABLT: t = s390x.ACMPUBLT case s390x.ABNE: t = s390x.ACMPUBNE default: continue } case s390x.ACMPW, s390x.ACMPWU: continue default: continue } if gc.Debug['D'] != 0 { fmt.Printf("cnb %v; %v -> ", p, p1) } if p1.To.Sym != nil { continue } if p.To.Type == obj.TYPE_REG { p1.As = int16(t) p1.From = p.From p1.Reg = p.To.Reg p1.From3 = nil } else if p.To.Type == obj.TYPE_CONST { switch p.As { case s390x.ACMP, s390x.ACMPW: if (p.To.Offset < -(1 << 7)) || (p.To.Offset >= ((1 << 7) - 1)) { continue } case s390x.ACMPU, s390x.ACMPWU: if p.To.Offset >= (1 << 8) { continue } default: } p1.As = int16(t) p1.From = p.From p1.Reg = 0 p1.From3 = new(obj.Addr) *(p1.From3) = p.To } else { continue } if gc.Debug['D'] != 0 { fmt.Printf("%v\n", p1) } cnb_cnt += 1 excise(r) } if gc.Debug['v'] != 0 { gc.Dumpit("compare and branch", g.Start, 0) } ret: gc.Flowend(g) }