func outgcode(a int, g1 *obj.Addr, reg int, g2, g3 *obj.Addr) { var p *obj.Prog var pl *obj.Plist if asm.Pass == 1 { goto out } p = asm.Ctxt.NewProg() p.As = int16(a) p.Lineno = stmtline if nosched != 0 { p.Mark |= ppc64.NOSCHED } p.From = *g1 p.Reg = int16(reg) p.From3 = *g2 p.To = *g3 p.Pc = int64(asm.PC) if lastpc == nil { pl = obj.Linknewplist(asm.Ctxt) pl.Firstpc = p } else { lastpc.Link = p } lastpc = p out: if a != obj.AGLOBL && a != obj.ADATA { asm.PC++ } }
func outcode(a int, g2 *Addr2) { var p *obj.Prog var pl *obj.Plist if asm.Pass == 1 { goto out } p = new(obj.Prog) *p = obj.Prog{} p.Ctxt = asm.Ctxt p.As = int16(a) p.Lineno = stmtline p.From = g2.from p.To = g2.to p.Pc = int64(asm.PC) if lastpc == nil { pl = obj.Linknewplist(asm.Ctxt) pl.Firstpc = p } else { lastpc.Link = p } lastpc = p out: if a != obj.AGLOBL && a != obj.ADATA { asm.PC++ } }
func outcode(a int, g1 *obj.Addr, reg int, g2 *obj.Addr) { var p *obj.Prog var pl *obj.Plist if asm.Pass == 1 { goto out } if g1.Scale != 0 { if reg != 0 || g2.Scale != 0 { yyerror("bad addressing modes") } reg = int(g1.Scale) } else if g2.Scale != 0 { if reg != 0 { yyerror("bad addressing modes") } reg = int(g2.Scale) } p = asm.Ctxt.NewProg() p.As = int16(a) p.Lineno = stmtline if nosched != 0 { p.Mark |= ppc64.NOSCHED } p.From = *g1 p.Reg = int16(reg) p.To = *g2 p.Pc = int64(asm.PC) if lastpc == nil { pl = obj.Linknewplist(asm.Ctxt) pl.Firstpc = p } else { lastpc.Link = p } lastpc = p out: if a != obj.AGLOBL && a != obj.ADATA { asm.PC++ } }
func outcode(a, scond int32, g1 *obj.Addr, reg int32, g2 *obj.Addr) { var p *obj.Prog var pl *obj.Plist /* hack to make B.NE etc. work: turn it into the corresponding conditional */ if a == arm.AB { a = int32(bcode[(scond^arm.C_SCOND_XOR)&0xf]) scond = (scond &^ 0xf) | Always } if asm.Pass == 1 { goto out } p = new(obj.Prog) *p = obj.Prog{} p.Ctxt = asm.Ctxt p.As = int16(a) p.Lineno = stmtline p.Scond = uint8(scond) p.From = *g1 p.Reg = int16(reg) p.To = *g2 p.Pc = int64(asm.PC) if lastpc == nil { pl = obj.Linknewplist(asm.Ctxt) pl.Firstpc = p } else { lastpc.Link = p } lastpc = p out: if a != obj.AGLOBL && a != obj.ADATA { asm.PC++ } }
/* * The idea is to remove redundant constants. * $c1->v1 * ($c1->v2 s/$c1/v1)* * set v1 return * The v1->v2 should be eliminated by copy propagation. */ func constprop(c1 *obj.Addr, v1 *obj.Addr, r *gc.Flow) { if gc.Debug['P'] != 0 { fmt.Printf("constprop %v->%v\n", gc.Ctxt.Dconv(c1), gc.Ctxt.Dconv(v1)) } var p *obj.Prog for ; r != nil; r = r.S1 { p = r.Prog if gc.Debug['P'] != 0 { fmt.Printf("%v", p) } if gc.Uniqp(r) == nil { if gc.Debug['P'] != 0 { fmt.Printf("; merge; return\n") } return } if p.As == arm.AMOVW && copyas(&p.From, c1) { if gc.Debug['P'] != 0 { fmt.Printf("; sub%v/%v", gc.Ctxt.Dconv(&p.From), gc.Ctxt.Dconv(v1)) } p.From = *v1 } else if copyu(p, v1, nil) > 1 { if gc.Debug['P'] != 0 { fmt.Printf("; %vset; return\n", gc.Ctxt.Dconv(v1)) } return } if gc.Debug['P'] != 0 { fmt.Printf("\n") } if r.S2 != nil { constprop(c1, v1, r.S2) } } }
// If s==nil, copyu returns the set/use of v in p; otherwise, it // modifies p to replace reads of v with reads of s and returns 0 for // success or non-zero for failure. // // If s==nil, copy returns one of the following values: // _Read if v only used // _ReadWriteSame if v is set and used in one address (read-alter-rewrite; // can't substitute) // _Write if v is only set // _ReadWriteDiff if v is set in one address and used in another (so addresses // can be rewritten independently) // _None otherwise (not touched) func copyu(p *obj.Prog, v *obj.Addr, s *obj.Addr) usage { if p.From3Type() != obj.TYPE_NONE && p.From3Type() != obj.TYPE_CONST { // Currently we never generate a From3 with anything other than a constant in it. fmt.Printf("copyu: From3 (%v) not implemented\n", gc.Ctxt.Dconv(p.From3)) } switch p.As { default: fmt.Printf("copyu: can't find %v\n", p.As) return _ReadWriteSame case // read p.From, write p.To s390x.AMOVH, s390x.AMOVHZ, s390x.AMOVB, s390x.AMOVBZ, s390x.AMOVW, s390x.AMOVWZ, s390x.AMOVD, s390x.ANEG, s390x.AADDME, s390x.AADDZE, s390x.ASUBME, s390x.ASUBZE, s390x.AFMOVS, s390x.AFMOVD, s390x.ALEDBR, s390x.AFNEG, s390x.ALDEBR, s390x.ACLFEBR, s390x.ACLGEBR, s390x.ACLFDBR, s390x.ACLGDBR, s390x.ACFEBRA, s390x.ACGEBRA, s390x.ACFDBRA, s390x.ACGDBRA, s390x.ACELFBR, s390x.ACELGBR, s390x.ACDLFBR, s390x.ACDLGBR, s390x.ACEFBRA, s390x.ACEGBRA, s390x.ACDFBRA, s390x.ACDGBRA, s390x.AFSQRT: if s != nil { copysub(&p.From, v, s) // Update only indirect uses of v in p.To if !copyas(&p.To, v) { copysub(&p.To, v, s) } return _None } if copyas(&p.To, v) { // Fix up implicit from if p.From.Type == obj.TYPE_NONE { p.From = p.To } if copyau(&p.From, v) { return _ReadWriteDiff } return _Write } if copyau(&p.From, v) { return _Read } if copyau(&p.To, v) { // p.To only indirectly uses v return _Read } return _None // read p.From, read p.Reg, write p.To case s390x.AADD, s390x.AADDC, s390x.AADDE, s390x.ASUB, s390x.ASLW, s390x.ASRW, s390x.ASRAW, s390x.ASLD, s390x.ASRD, s390x.ASRAD, s390x.ARLL, s390x.ARLLG, s390x.AOR, s390x.AORN, s390x.AAND, s390x.AANDN, s390x.ANAND, s390x.ANOR, s390x.AXOR, s390x.AMULLW, s390x.AMULLD, s390x.AMULHD, s390x.AMULHDU, s390x.ADIVW, s390x.ADIVD, s390x.ADIVWU, s390x.ADIVDU, s390x.AFADDS, s390x.AFADD, s390x.AFSUBS, s390x.AFSUB, s390x.AFMULS, s390x.AFMUL, s390x.AFDIVS, s390x.AFDIV: if s != nil { copysub(&p.From, v, s) copysub1(p, v, s) // Update only indirect uses of v in p.To if !copyas(&p.To, v) { copysub(&p.To, v, s) } } if copyas(&p.To, v) { if p.Reg == 0 { p.Reg = p.To.Reg } if copyau(&p.From, v) || copyau1(p, v) { return _ReadWriteDiff } return _Write } if copyau(&p.From, v) { return _Read } if copyau1(p, v) { return _Read } if copyau(&p.To, v) { return _Read } return _None case s390x.ABEQ, s390x.ABGT, s390x.ABGE, s390x.ABLT, s390x.ABLE, s390x.ABNE, s390x.ABVC, s390x.ABVS: return _None case obj.ACHECKNIL, // read p.From s390x.ACMP, // read p.From, read p.To s390x.ACMPU, s390x.ACMPW, s390x.ACMPWU, s390x.AFCMPO, s390x.AFCMPU, s390x.ACEBR, s390x.AMVC, s390x.ACLC, s390x.AXC, s390x.AOC, s390x.ANC: if s != nil { copysub(&p.From, v, s) copysub(&p.To, v, s) return _None } if copyau(&p.From, v) { return _Read } if copyau(&p.To, v) { return _Read } return _None case s390x.ACMPBNE, s390x.ACMPBEQ, s390x.ACMPBLT, s390x.ACMPBLE, s390x.ACMPBGT, s390x.ACMPBGE, s390x.ACMPUBNE, s390x.ACMPUBEQ, s390x.ACMPUBLT, s390x.ACMPUBLE, s390x.ACMPUBGT, s390x.ACMPUBGE: if s != nil { copysub(&p.From, v, s) copysub1(p, v, s) return _None } if copyau(&p.From, v) { return _Read } if copyau1(p, v) { return _Read } return _None case s390x.ACLEAR: if s != nil { copysub(&p.To, v, s) return _None } if copyau(&p.To, v) { return _Read } return _None // go never generates a branch to a GPR // read p.To case s390x.ABR: if s != nil { copysub(&p.To, v, s) return _None } if copyau(&p.To, v) { return _Read } return _None case obj.ARET, obj.AUNDEF: if s != nil { return _None } // All registers die at this point, so claim // everything is set (and not used). return _Write case s390x.ABL: if v.Type == obj.TYPE_REG { if s390x.REGARG != -1 && v.Reg == s390x.REGARG { return _ReadWriteSame } if p.From.Type == obj.TYPE_REG && p.From.Reg == v.Reg { return _ReadWriteSame } if v.Reg == s390x.REGZERO { // Deliberately inserted nops set R0. return _ReadWriteSame } if v.Reg == s390x.REGCTXT { // Context register for closures. // TODO(mundaym): not sure if we need to exclude this. return _ReadWriteSame } } if s != nil { copysub(&p.To, v, s) return _None } if copyau(&p.To, v) { return _ReadWriteDiff } return _Write case obj.ATEXT: if v.Type == obj.TYPE_REG { if v.Reg == s390x.REGARG { return _Write } } return _None case obj.APCDATA, obj.AFUNCDATA, obj.AVARDEF, obj.AVARKILL, obj.AVARLIVE, obj.AUSEFIELD, obj.ANOP: return _None } }
func peep(firstp *obj.Prog) { g := (*gc.Graph)(gc.Flowstart(firstp, nil)) if g == nil { return } gactive = 0 // byte, word arithmetic elimination. elimshortmov(g) // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim var p *obj.Prog for r := (*gc.Flow)(g.Start); r != nil; r = r.Link { p = r.Prog switch p.As { case x86.ALEAL, x86.ALEAQ: if regtyp(&p.To) { if p.From.Sym != nil { if p.From.Index == x86.REG_NONE { conprop(r) } } } case x86.AMOVB, x86.AMOVW, x86.AMOVL, x86.AMOVQ, x86.AMOVSS, x86.AMOVSD: if regtyp(&p.To) { if p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_FCONST { conprop(r) } } } } var r *gc.Flow var r1 *gc.Flow var p1 *obj.Prog var t int loop1: if gc.Debug['P'] != 0 && gc.Debug['v'] != 0 { gc.Dumpit("loop1", g.Start, 0) } t = 0 for r = g.Start; r != nil; r = r.Link { p = r.Prog switch p.As { case x86.AMOVL, x86.AMOVQ, x86.AMOVSS, x86.AMOVSD: if regtyp(&p.To) { if regtyp(&p.From) { if copyprop(g, r) { excise(r) t++ } else if subprop(r) && copyprop(g, r) { excise(r) t++ } } } case x86.AMOVBLZX, x86.AMOVWLZX, x86.AMOVBLSX, x86.AMOVWLSX: if regtyp(&p.To) { r1 = rnops(gc.Uniqs(r)) if r1 != nil { p1 = r1.Prog if p.As == p1.As && p.To.Type == p1.From.Type && p.To.Reg == p1.From.Reg { p1.As = x86.AMOVL t++ } } } case x86.AMOVBQSX, x86.AMOVBQZX, x86.AMOVWQSX, x86.AMOVWQZX, x86.AMOVLQSX, x86.AMOVLQZX, x86.AMOVQL: if regtyp(&p.To) { r1 = rnops(gc.Uniqs(r)) if r1 != nil { p1 = r1.Prog if p.As == p1.As && p.To.Type == p1.From.Type && p.To.Reg == p1.From.Reg { p1.As = x86.AMOVQ t++ } } } case x86.AADDL, x86.AADDQ, x86.AADDW: if p.From.Type != obj.TYPE_CONST || needc(p.Link) { break } if p.From.Offset == -1 { if p.As == x86.AADDQ { p.As = x86.ADECQ } else if p.As == x86.AADDL { p.As = x86.ADECL } else { p.As = x86.ADECW } p.From = obj.Addr{} break } if p.From.Offset == 1 { if p.As == x86.AADDQ { p.As = x86.AINCQ } else if p.As == x86.AADDL { p.As = x86.AINCL } else { p.As = x86.AINCW } p.From = obj.Addr{} break } case x86.ASUBL, x86.ASUBQ, x86.ASUBW: if p.From.Type != obj.TYPE_CONST || needc(p.Link) { break } if p.From.Offset == -1 { if p.As == x86.ASUBQ { p.As = x86.AINCQ } else if p.As == x86.ASUBL { p.As = x86.AINCL } else { p.As = x86.AINCW } p.From = obj.Addr{} break } if p.From.Offset == 1 { if p.As == x86.ASUBQ { p.As = x86.ADECQ } else if p.As == x86.ASUBL { p.As = x86.ADECL } else { p.As = x86.ADECW } p.From = obj.Addr{} break } } } if t != 0 { goto loop1 } // MOVLQZX removal. // The MOVLQZX exists to avoid being confused for a // MOVL that is just copying 32-bit data around during // copyprop. Now that copyprop is done, remov MOVLQZX R1, R2 // if it is dominated by an earlier ADDL/MOVL/etc into R1 that // will have already cleared the high bits. // // MOVSD removal. // We never use packed registers, so a MOVSD between registers // can be replaced by MOVAPD, which moves the pair of float64s // instead of just the lower one. We only use the lower one, but // the processor can do better if we do moves using both. for r := (*gc.Flow)(g.Start); r != nil; r = r.Link { p = r.Prog if p.As == x86.AMOVLQZX { if regtyp(&p.From) { if p.From.Type == p.To.Type && p.From.Reg == p.To.Reg { if prevl(r, int(p.From.Reg)) { excise(r) } } } } if p.As == x86.AMOVSD { if regtyp(&p.From) { if regtyp(&p.To) { p.As = x86.AMOVAPD } } } } // load pipelining // push any load from memory as early as possible // to give it time to complete before use. for r := (*gc.Flow)(g.Start); r != nil; r = r.Link { p = r.Prog switch p.As { case x86.AMOVB, x86.AMOVW, x86.AMOVL, x86.AMOVQ, x86.AMOVLQZX: if regtyp(&p.To) && !regconsttyp(&p.From) { pushback(r) } } } gc.Flowend(g) }
func preprocess(ctxt *obj.Link, cursym *obj.LSym) { // TODO(minux): add morestack short-cuts with small fixed frame-size. ctxt.Cursym = cursym if cursym.Text == nil || cursym.Text.Link == nil { return } p := cursym.Text textstksiz := p.To.Offset cursym.Args = p.To.Val.(int32) cursym.Locals = int32(textstksiz) /* * find leaf subroutines * strip NOPs * expand RET * expand BECOME pseudo */ if ctxt.Debugvlog != 0 { fmt.Fprintf(ctxt.Bso, "%5.2f noops\n", obj.Cputime()) } ctxt.Bso.Flush() var q *obj.Prog var q1 *obj.Prog for p := cursym.Text; p != nil; p = p.Link { switch p.As { /* too hard, just leave alone */ case obj.ATEXT: q = p p.Mark |= LABEL | LEAF | SYNC if p.Link != nil { p.Link.Mark |= LABEL } case ANOR: q = p if p.To.Type == obj.TYPE_REG { if p.To.Reg == REGZERO { p.Mark |= LABEL | SYNC } } case ASYNC, AWORD: q = p p.Mark |= LABEL | SYNC continue case AMOVW, AMOVWZ, AMOVD: q = p if p.From.Reg >= REG_RESERVED || p.To.Reg >= REG_RESERVED { p.Mark |= LABEL | SYNC } continue case AFABS, AFADD, AFDIV, AFMADD, AFMOVD, /* case AFMOVDS: */ AFMOVS, /* case AFMOVSD: */ AFMSUB, AFMUL, AFNABS, AFNEG, AFNMADD, AFNMSUB, AFRSP, AFSUB: q = p p.Mark |= FLOAT continue case ABL, ABCL, obj.ADUFFZERO, obj.ADUFFCOPY: cursym.Text.Mark &^= LEAF fallthrough case ABC, ABEQ, ABGE, ABGT, ABLE, ABLT, ABNE, ABR, ABVC, ABVS, ACMPBEQ, ACMPBGE, ACMPBGT, ACMPBLE, ACMPBLT, ACMPBNE, ACMPUBEQ, ACMPUBGE, ACMPUBGT, ACMPUBLE, ACMPUBLT, ACMPUBNE: p.Mark |= BRANCH q = p q1 = p.Pcond if q1 != nil { for q1.As == obj.ANOP { q1 = q1.Link p.Pcond = q1 } if q1.Mark&LEAF == 0 { q1.Mark |= LABEL } } else { p.Mark |= LABEL } q1 = p.Link if q1 != nil { q1.Mark |= LABEL } continue case AFCMPO, AFCMPU: q = p p.Mark |= FCMP | FLOAT continue case obj.ARET: q = p if p.Link != nil { p.Link.Mark |= LABEL } continue case obj.ANOP: q1 = p.Link q.Link = q1 /* q is non-nop */ q1.Mark |= p.Mark continue default: q = p continue } } autosize := int32(0) var o int var p1 *obj.Prog var p2 *obj.Prog for p := cursym.Text; p != nil; p = p.Link { o = int(p.As) switch o { case obj.ATEXT: autosize = int32(textstksiz + 8) if (p.Mark&LEAF != 0) && autosize <= 8 { autosize = 0 } else if autosize&4 != 0 { autosize += 4 } p.To.Offset = int64(autosize) - 8 if p.From3.Offset&obj.NOSPLIT == 0 { p = stacksplit(ctxt, p, autosize) // emit split check } q = p if autosize != 0 { q = obj.Appendp(ctxt, p) q.As = AADD q.From.Type = obj.TYPE_CONST q.From.Offset = int64(-autosize) q.To.Type = obj.TYPE_REG q.To.Reg = REGSP q.Spadj = autosize } else if cursym.Text.Mark&LEAF == 0 { if ctxt.Debugvlog != 0 { fmt.Fprintf(ctxt.Bso, "save suppressed in: %s\n", cursym.Name) ctxt.Bso.Flush() } cursym.Text.Mark |= LEAF } if cursym.Text.Mark&LEAF != 0 { cursym.Leaf = 1 break } q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_REG q.From.Reg = REG_LR q.To.Type = obj.TYPE_MEM q.To.Reg = REGSP q.To.Offset = 0 if cursym.Text.From3.Offset&obj.WRAPPER != 0 { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOVD g_panic(g), R3 // CMP R0, R3 // BEQ end // MOVD panic_argp(R3), R4 // ADD $(autosize+8), R1, R5 // CMP R4, R5 // BNE end // ADD $8, R1, R6 // MOVD R6, panic_argp(R3) // end: // NOP // // The NOP is needed to give the jumps somewhere to land. // It is a liblink NOP, not a s390x NOP: it encodes to 0 instruction bytes. q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_MEM q.From.Reg = REGG q.From.Offset = 4 * int64(ctxt.Arch.Ptrsize) // G.panic q.To.Type = obj.TYPE_REG q.To.Reg = REG_R3 q = obj.Appendp(ctxt, q) q.As = ACMP q.From.Type = obj.TYPE_REG q.From.Reg = REG_R0 q.To.Type = obj.TYPE_REG q.To.Reg = REG_R3 q = obj.Appendp(ctxt, q) q.As = ABEQ q.To.Type = obj.TYPE_BRANCH p1 = q q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_MEM q.From.Reg = REG_R3 q.From.Offset = 0 // Panic.argp q.To.Type = obj.TYPE_REG q.To.Reg = REG_R4 q = obj.Appendp(ctxt, q) q.As = AADD q.From.Type = obj.TYPE_CONST q.From.Offset = int64(autosize) + 8 q.Reg = REGSP q.To.Type = obj.TYPE_REG q.To.Reg = REG_R5 q = obj.Appendp(ctxt, q) q.As = ACMP q.From.Type = obj.TYPE_REG q.From.Reg = REG_R4 q.To.Type = obj.TYPE_REG q.To.Reg = REG_R5 q = obj.Appendp(ctxt, q) q.As = ABNE q.To.Type = obj.TYPE_BRANCH p2 = q q = obj.Appendp(ctxt, q) q.As = AADD q.From.Type = obj.TYPE_CONST q.From.Offset = 8 q.Reg = REGSP q.To.Type = obj.TYPE_REG q.To.Reg = REG_R6 q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_REG q.From.Reg = REG_R6 q.To.Type = obj.TYPE_MEM q.To.Reg = REG_R3 q.To.Offset = 0 // Panic.argp q = obj.Appendp(ctxt, q) q.As = obj.ANOP p1.Pcond = q p2.Pcond = q } case obj.ARET: if p.From.Type == obj.TYPE_CONST { ctxt.Diag("using BECOME (%v) is not supported!", p) break } if p.To.Sym != nil { // retjmp p.As = ABR p.To.Type = obj.TYPE_BRANCH break } if cursym.Text.Mark&LEAF != 0 { if autosize == 0 { p.As = ABR p.From = obj.Addr{} p.To.Type = obj.TYPE_REG p.To.Reg = REG_LR p.Mark |= BRANCH break } p.As = AADD p.From.Type = obj.TYPE_CONST p.From.Offset = int64(autosize) p.To.Type = obj.TYPE_REG p.To.Reg = REGSP p.Spadj = -autosize q = obj.Appendp(ctxt, p) q.As = ABR q.From = obj.Addr{} q.To.Type = obj.TYPE_REG q.To.Reg = REG_LR q.Mark |= BRANCH q.Spadj = autosize break } p.As = AMOVD p.From.Type = obj.TYPE_MEM p.From.Reg = REGSP p.From.Offset = 0 p.To.Type = obj.TYPE_REG p.To.Reg = REG_LR q = p if autosize != 0 { q = obj.Appendp(ctxt, q) q.As = AADD q.From.Type = obj.TYPE_CONST q.From.Offset = int64(autosize) q.To.Type = obj.TYPE_REG q.To.Reg = REGSP q.Spadj = -autosize } q = obj.Appendp(ctxt, q) q.As = ABR q.From = obj.Addr{} q.To.Type = obj.TYPE_REG q.To.Reg = REG_LR q.Mark |= BRANCH q.Spadj = autosize case AADD: if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.From.Type == obj.TYPE_CONST { p.Spadj = int32(-p.From.Offset) } } } }
// If s==nil, copyu returns the set/use of v in p; otherwise, it // modifies p to replace reads of v with reads of s and returns 0 for // success or non-zero for failure. // // If s==nil, copy returns one of the following values: // 1 if v only used // 2 if v is set and used in one address (read-alter-rewrite; // can't substitute) // 3 if v is only set // 4 if v is set in one address and used in another (so addresses // can be rewritten independently) // 0 otherwise (not touched) func copyu(p *obj.Prog, v *obj.Addr, s *obj.Addr) int { if p.From3Type() != obj.TYPE_NONE { // never generates a from3 fmt.Printf("copyu: from3 (%v) not implemented\n", gc.Ctxt.Dconv(p.From3)) } switch p.As { default: fmt.Printf("copyu: can't find %v\n", obj.Aconv(int(p.As))) return 2 case obj.ANOP, /* read p->from, write p->to */ mips.AMOVV, mips.AMOVF, mips.AMOVD, mips.AMOVH, mips.AMOVHU, mips.AMOVB, mips.AMOVBU, mips.AMOVW, mips.AMOVWU, mips.AMOVFD, mips.AMOVDF, mips.AMOVDW, mips.AMOVWD, mips.AMOVFW, mips.AMOVWF, mips.AMOVDV, mips.AMOVVD, mips.AMOVFV, mips.AMOVVF, mips.ATRUNCFV, mips.ATRUNCDV, mips.ATRUNCFW, mips.ATRUNCDW: if s != nil { if copysub(&p.From, v, s, 1) != 0 { return 1 } // Update only indirect uses of v in p->to if !copyas(&p.To, v) { if copysub(&p.To, v, s, 1) != 0 { return 1 } } return 0 } if copyas(&p.To, v) { // Fix up implicit from if p.From.Type == obj.TYPE_NONE { p.From = p.To } if copyau(&p.From, v) { return 4 } return 3 } if copyau(&p.From, v) { return 1 } if copyau(&p.To, v) { // p->to only indirectly uses v return 1 } return 0 case mips.ASGT, /* read p->from, read p->reg, write p->to */ mips.ASGTU, mips.AADD, mips.AADDU, mips.ASUB, mips.ASUBU, mips.ASLL, mips.ASRL, mips.ASRA, mips.AOR, mips.ANOR, mips.AAND, mips.AXOR, mips.AADDV, mips.AADDVU, mips.ASUBV, mips.ASUBVU, mips.ASLLV, mips.ASRLV, mips.ASRAV, mips.AADDF, mips.AADDD, mips.ASUBF, mips.ASUBD, mips.AMULF, mips.AMULD, mips.ADIVF, mips.ADIVD: if s != nil { if copysub(&p.From, v, s, 1) != 0 { return 1 } if copysub1(p, v, s, 1) != 0 { return 1 } // Update only indirect uses of v in p->to if !copyas(&p.To, v) { if copysub(&p.To, v, s, 1) != 0 { return 1 } } return 0 } if copyas(&p.To, v) { if p.Reg == 0 { // Fix up implicit reg (e.g., ADD // R3,R4 -> ADD R3,R4,R4) so we can // update reg and to separately. p.Reg = p.To.Reg } if copyau(&p.From, v) { return 4 } if copyau1(p, v) { return 4 } return 3 } if copyau(&p.From, v) { return 1 } if copyau1(p, v) { return 1 } if copyau(&p.To, v) { return 1 } return 0 case obj.ACHECKNIL, /* read p->from */ mips.ABEQ, /* read p->from, read p->reg */ mips.ABNE, mips.ABGTZ, mips.ABGEZ, mips.ABLTZ, mips.ABLEZ, mips.ACMPEQD, mips.ACMPEQF, mips.ACMPGED, mips.ACMPGEF, mips.ACMPGTD, mips.ACMPGTF, mips.ABFPF, mips.ABFPT, mips.AMUL, mips.AMULU, mips.ADIV, mips.ADIVU, mips.AMULV, mips.AMULVU, mips.ADIVV, mips.ADIVVU: if s != nil { if copysub(&p.From, v, s, 1) != 0 { return 1 } return copysub1(p, v, s, 1) } if copyau(&p.From, v) { return 1 } if copyau1(p, v) { return 1 } return 0 case mips.AJMP: /* read p->to */ if s != nil { if copysub(&p.To, v, s, 1) != 0 { return 1 } return 0 } if copyau(&p.To, v) { return 1 } return 0 case mips.ARET: /* funny */ if s != nil { return 0 } // All registers die at this point, so claim // everything is set (and not used). return 3 case mips.AJAL: /* funny */ if v.Type == obj.TYPE_REG { // TODO(rsc): REG_R0 and REG_F0 used to be // (when register numbers started at 0) exregoffset and exfregoffset, // which are unset entirely. // It's strange that this handles R0 and F0 differently from the other // registers. Possible failure to optimize? if mips.REG_R0 < v.Reg && v.Reg <= mips.REG_R31 { return 2 } if v.Reg == mips.REGARG { return 2 } if mips.REG_F0 < v.Reg && v.Reg <= mips.REG_F31 { return 2 } } if p.From.Type == obj.TYPE_REG && v.Type == obj.TYPE_REG && p.From.Reg == v.Reg { return 2 } if s != nil { if copysub(&p.To, v, s, 1) != 0 { return 1 } return 0 } if copyau(&p.To, v) { return 4 } return 3 // R0 is zero, used by DUFFZERO, cannot be substituted. // R1 is ptr to memory, used and set, cannot be substituted. case obj.ADUFFZERO: if v.Type == obj.TYPE_REG { if v.Reg == 0 { return 1 } if v.Reg == 1 { return 2 } } return 0 // R1, R2 are ptr to src, dst, used and set, cannot be substituted. // R3 is scratch, set by DUFFCOPY, cannot be substituted. case obj.ADUFFCOPY: if v.Type == obj.TYPE_REG { if v.Reg == 1 || v.Reg == 2 { return 2 } if v.Reg == 3 { return 3 } } return 0 case obj.ATEXT: /* funny */ if v.Type == obj.TYPE_REG { if v.Reg == mips.REGARG { return 3 } } return 0 case obj.APCDATA, obj.AFUNCDATA, obj.AVARDEF, obj.AVARKILL, obj.AVARLIVE, obj.AUSEFIELD: return 0 } }
func progedit(ctxt *obj.Link, p *obj.Prog) { // Maintain information about code generation mode. if ctxt.Mode == 0 { ctxt.Mode = ctxt.Arch.Regsize * 8 } p.Mode = int8(ctxt.Mode) switch p.As { case AMODE: if p.From.Type == obj.TYPE_CONST || (p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_NONE) { switch int(p.From.Offset) { case 16, 32, 64: ctxt.Mode = int(p.From.Offset) } } obj.Nopout(p) } // Thread-local storage references use the TLS pseudo-register. // As a register, TLS refers to the thread-local storage base, and it // can only be loaded into another register: // // MOVQ TLS, AX // // An offset from the thread-local storage base is written off(reg)(TLS*1). // Semantically it is off(reg), but the (TLS*1) annotation marks this as // indexing from the loaded TLS base. This emits a relocation so that // if the linker needs to adjust the offset, it can. For example: // // MOVQ TLS, AX // MOVQ 0(AX)(TLS*1), CX // load g into CX // // On systems that support direct access to the TLS memory, this // pair of instructions can be reduced to a direct TLS memory reference: // // MOVQ 0(TLS), CX // load g into CX // // The 2-instruction and 1-instruction forms correspond to the two code // sequences for loading a TLS variable in the local exec model given in "ELF // Handling For Thread-Local Storage". // // We apply this rewrite on systems that support the 1-instruction form. // The decision is made using only the operating system and the -shared flag, // not the link mode. If some link modes on a particular operating system // require the 2-instruction form, then all builds for that operating system // will use the 2-instruction form, so that the link mode decision can be // delayed to link time. // // In this way, all supported systems use identical instructions to // access TLS, and they are rewritten appropriately first here in // liblink and then finally using relocations in the linker. // // When -shared is passed, we leave the code in the 2-instruction form but // assemble (and relocate) them in different ways to generate the initial // exec code sequence. It's a bit of a fluke that this is possible without // rewriting the instructions more comprehensively, and it only does because // we only support a single TLS variable (g). if CanUse1InsnTLS(ctxt) { // Reduce 2-instruction sequence to 1-instruction sequence. // Sequences like // MOVQ TLS, BX // ... off(BX)(TLS*1) ... // become // NOP // ... off(TLS) ... // // TODO(rsc): Remove the Hsolaris special case. It exists only to // guarantee we are producing byte-identical binaries as before this code. // But it should be unnecessary. if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 && ctxt.Headtype != obj.Hsolaris { obj.Nopout(p) } if p.From.Type == obj.TYPE_MEM && p.From.Index == REG_TLS && REG_AX <= p.From.Reg && p.From.Reg <= REG_R15 { p.From.Reg = REG_TLS p.From.Scale = 0 p.From.Index = REG_NONE } if p.To.Type == obj.TYPE_MEM && p.To.Index == REG_TLS && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { p.To.Reg = REG_TLS p.To.Scale = 0 p.To.Index = REG_NONE } } else { // load_g_cx, below, always inserts the 1-instruction sequence. Rewrite it // as the 2-instruction sequence if necessary. // MOVQ 0(TLS), BX // becomes // MOVQ TLS, BX // MOVQ 0(BX)(TLS*1), BX if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { q := obj.Appendp(ctxt, p) q.As = p.As q.From = p.From q.From.Type = obj.TYPE_MEM q.From.Reg = p.To.Reg q.From.Index = REG_TLS q.From.Scale = 2 // TODO: use 1 q.To = p.To p.From.Type = obj.TYPE_REG p.From.Reg = REG_TLS p.From.Index = REG_NONE p.From.Offset = 0 } } // TODO: Remove. if ctxt.Headtype == obj.Hwindows && p.Mode == 64 || ctxt.Headtype == obj.Hplan9 { if p.From.Scale == 1 && p.From.Index == REG_TLS { p.From.Scale = 2 } if p.To.Scale == 1 && p.To.Index == REG_TLS { p.To.Scale = 2 } } // Rewrite 0 to $0 in 3rd argument to CMPPS etc. // That's what the tables expect. switch p.As { case ACMPPD, ACMPPS, ACMPSD, ACMPSS: if p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_NONE && p.To.Reg == REG_NONE && p.To.Index == REG_NONE && p.To.Sym == nil { p.To.Type = obj.TYPE_CONST } } // Rewrite CALL/JMP/RET to symbol as TYPE_BRANCH. switch p.As { case obj.ACALL, obj.AJMP, obj.ARET: if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil { p.To.Type = obj.TYPE_BRANCH } } // Rewrite MOVL/MOVQ $XXX(FP/SP) as LEAL/LEAQ. if p.From.Type == obj.TYPE_ADDR && (ctxt.Arch.Thechar == '6' || p.From.Name != obj.NAME_EXTERN && p.From.Name != obj.NAME_STATIC) { switch p.As { case AMOVL: p.As = ALEAL p.From.Type = obj.TYPE_MEM case AMOVQ: p.As = ALEAQ p.From.Type = obj.TYPE_MEM } } if ctxt.Headtype == obj.Hnacl && p.Mode == 64 { if p.From3 != nil { nacladdr(ctxt, p, p.From3) } nacladdr(ctxt, p, &p.From) nacladdr(ctxt, p, &p.To) } // Rewrite float constants to values stored in memory. switch p.As { // Convert AMOVSS $(0), Xx to AXORPS Xx, Xx case AMOVSS: if p.From.Type == obj.TYPE_FCONST { // f == 0 can't be used here due to -0, so use Float64bits if f := p.From.Val.(float64); math.Float64bits(f) == 0 { if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 { p.As = AXORPS p.From = p.To break } } } fallthrough case AFMOVF, AFADDF, AFSUBF, AFSUBRF, AFMULF, AFDIVF, AFDIVRF, AFCOMF, AFCOMFP, AADDSS, ASUBSS, AMULSS, ADIVSS, ACOMISS, AUCOMISS: if p.From.Type == obj.TYPE_FCONST { f32 := float32(p.From.Val.(float64)) i32 := math.Float32bits(f32) literal := fmt.Sprintf("$f32.%08x", i32) s := obj.Linklookup(ctxt, literal, 0) p.From.Type = obj.TYPE_MEM p.From.Name = obj.NAME_EXTERN p.From.Sym = s p.From.Sym.Local = true p.From.Offset = 0 } case AMOVSD: // Convert AMOVSD $(0), Xx to AXORPS Xx, Xx if p.From.Type == obj.TYPE_FCONST { // f == 0 can't be used here due to -0, so use Float64bits if f := p.From.Val.(float64); math.Float64bits(f) == 0 { if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 { p.As = AXORPS p.From = p.To break } } } fallthrough case AFMOVD, AFADDD, AFSUBD, AFSUBRD, AFMULD, AFDIVD, AFDIVRD, AFCOMD, AFCOMDP, AADDSD, ASUBSD, AMULSD, ADIVSD, ACOMISD, AUCOMISD: if p.From.Type == obj.TYPE_FCONST { i64 := math.Float64bits(p.From.Val.(float64)) literal := fmt.Sprintf("$f64.%016x", i64) s := obj.Linklookup(ctxt, literal, 0) p.From.Type = obj.TYPE_MEM p.From.Name = obj.NAME_EXTERN p.From.Sym = s p.From.Sym.Local = true p.From.Offset = 0 } } if ctxt.Flag_dynlink { rewriteToUseGot(ctxt, p) } if ctxt.Flag_shared != 0 && p.Mode == 32 { rewriteToPcrel(ctxt, p) } }
func peep(firstp *obj.Prog) { g := gc.Flowstart(firstp, nil) if g == nil { return } gactive = 0 var p *obj.Prog var r *gc.Flow var t int loop1: if gc.Debug['P'] != 0 && gc.Debug['v'] != 0 { gc.Dumpit("loop1", g.Start, 0) } t = 0 for r = g.Start; r != nil; r = r.Link { p = r.Prog // TODO(minux) Handle smaller moves. arm and amd64 // distinguish between moves that *must* sign/zero // extend and moves that don't care so they // can eliminate moves that don't care without // breaking moves that do care. This might let us // simplify or remove the next peep loop, too. if p.As == arm64.AMOVD || p.As == arm64.AFMOVD { if regtyp(&p.To) { // Try to eliminate reg->reg moves if regtyp(&p.From) { if p.From.Type == p.To.Type { if copyprop(r) { excise(r) t++ } else if subprop(r) && copyprop(r) { excise(r) t++ } } } } } } if t != 0 { goto loop1 } /* * look for MOVB x,R; MOVB R,R (for small MOVs not handled above) */ var p1 *obj.Prog var r1 *gc.Flow for r := g.Start; r != nil; r = r.Link { p = r.Prog switch p.As { default: continue case arm64.AMOVH, arm64.AMOVHU, arm64.AMOVB, arm64.AMOVBU, arm64.AMOVW, arm64.AMOVWU: if p.To.Type != obj.TYPE_REG { continue } } r1 = r.Link if r1 == nil { continue } p1 = r1.Prog if p1.As != p.As { continue } if p1.From.Type != obj.TYPE_REG || p1.From.Reg != p.To.Reg { continue } if p1.To.Type != obj.TYPE_REG || p1.To.Reg != p.To.Reg { continue } excise(r1) } if gc.Debug['D'] > 1 { goto ret /* allow following code improvement to be suppressed */ } // MOVD $c, R'; ADD R', R (R' unused) -> ADD $c, R for r := g.Start; r != nil; r = r.Link { p = r.Prog switch p.As { default: continue case arm64.AMOVD: if p.To.Type != obj.TYPE_REG { continue } if p.From.Type != obj.TYPE_CONST { continue } if p.From.Offset < 0 || 4096 <= p.From.Offset { continue } } r1 = r.Link if r1 == nil { continue } p1 = r1.Prog if p1.As != arm64.AADD && p1.As != arm64.ASUB { // TODO(aram): also logical after we have bimm. continue } if p1.From.Type != obj.TYPE_REG || p1.From.Reg != p.To.Reg { continue } if p1.To.Type != obj.TYPE_REG { continue } if gc.Debug['P'] != 0 { fmt.Printf("encoding $%d directly into %v in:\n%v\n%v\n", p.From.Offset, obj.Aconv(p1.As), p, p1) } p1.From.Type = obj.TYPE_CONST p1.From = p.From excise(r) } /* TODO(minux): * look for OP x,y,R; CMP R, $0 -> OP.S x,y,R * when OP can set condition codes correctly */ ret: gc.Flowend(g) }
// If s==nil, copyu returns the set/use of v in p; otherwise, it // modifies p to replace reads of v with reads of s and returns 0 for // success or non-zero for failure. // // If s==nil, copy returns one of the following values: // 1 if v only used // 2 if v is set and used in one address (read-alter-rewrite; // can't substitute) // 3 if v is only set // 4 if v is set in one address and used in another (so addresses // can be rewritten independently) // 0 otherwise (not touched) func copyu(p *obj.Prog, v *obj.Addr, s *obj.Addr) int { if p.From3.Type != obj.TYPE_NONE { // 7g never generates a from3 fmt.Printf("copyu: from3 (%v) not implemented\n", gc.Ctxt.Dconv(&p.From3)) } if p.To2.Type != obj.TYPE_NONE { // 7g never generates a to2 fmt.Printf("copyu: to2 (%v) not implemented\n", gc.Ctxt.Dconv(&p.To2)) } switch p.As { default: fmt.Printf("copyu: can't find %v\n", obj.Aconv(int(p.As))) return 2 case obj.ANOP, /* read p->from, write p->to */ arm64.ANEG, arm64.AFNEGD, arm64.AFNEGS, arm64.AFSQRTD, arm64.AFCVTZSD, arm64.AFCVTZSS, arm64.AFCVTZSDW, arm64.AFCVTZSSW, arm64.AFCVTZUD, arm64.AFCVTZUS, arm64.AFCVTZUDW, arm64.AFCVTZUSW, arm64.AFCVTSD, arm64.AFCVTDS, arm64.ASCVTFD, arm64.ASCVTFS, arm64.ASCVTFWD, arm64.ASCVTFWS, arm64.AUCVTFD, arm64.AUCVTFS, arm64.AUCVTFWD, arm64.AUCVTFWS, arm64.AMOVB, arm64.AMOVBU, arm64.AMOVH, arm64.AMOVHU, arm64.AMOVW, arm64.AMOVWU, arm64.AMOVD, arm64.AFMOVS, arm64.AFMOVD: if p.Scond == 0 { if s != nil { if copysub(&p.From, v, s, 1) != 0 { return 1 } // Update only indirect uses of v in p->to if !copyas(&p.To, v) { if copysub(&p.To, v, s, 1) != 0 { return 1 } } return 0 } if copyas(&p.To, v) { // Fix up implicit from if p.From.Type == obj.TYPE_NONE { p.From = p.To } if copyau(&p.From, v) { return 4 } return 3 } if copyau(&p.From, v) { return 1 } if copyau(&p.To, v) { // p->to only indirectly uses v return 1 } return 0 } /* rar p->from, write p->to or read p->from, rar p->to */ if p.From.Type == obj.TYPE_MEM { if copyas(&p.From, v) { // No s!=nil check; need to fail // anyway in that case return 2 } if s != nil { if copysub(&p.To, v, s, 1) != 0 { return 1 } return 0 } if copyas(&p.To, v) { return 3 } } else if p.To.Type == obj.TYPE_MEM { if copyas(&p.To, v) { return 2 } if s != nil { if copysub(&p.From, v, s, 1) != 0 { return 1 } return 0 } if copyau(&p.From, v) { return 1 } } else { fmt.Printf("copyu: bad %v\n", p) } return 0 case arm64.AADD, /* read p->from, read p->reg, write p->to */ arm64.ASUB, arm64.AAND, arm64.AORR, arm64.AEOR, arm64.AMUL, arm64.ASMULL, arm64.AUMULL, arm64.ASMULH, arm64.AUMULH, arm64.ASDIV, arm64.AUDIV, arm64.ALSL, arm64.ALSR, arm64.AASR, arm64.AFADDD, arm64.AFADDS, arm64.AFSUBD, arm64.AFSUBS, arm64.AFMULD, arm64.AFMULS, arm64.AFDIVD, arm64.AFDIVS: if s != nil { if copysub(&p.From, v, s, 1) != 0 { return 1 } if copysub1(p, v, s, 1) != 0 { return 1 } // Update only indirect uses of v in p->to if !copyas(&p.To, v) { if copysub(&p.To, v, s, 1) != 0 { return 1 } } return 0 } if copyas(&p.To, v) { if p.Reg == 0 { // Fix up implicit reg (e.g., ADD // R3,R4 -> ADD R3,R4,R4) so we can // update reg and to separately. p.Reg = p.To.Reg } if copyau(&p.From, v) { return 4 } if copyau1(p, v) { return 4 } return 3 } if copyau(&p.From, v) { return 1 } if copyau1(p, v) { return 1 } if copyau(&p.To, v) { return 1 } return 0 case arm64.ABEQ, arm64.ABNE, arm64.ABGE, arm64.ABLT, arm64.ABGT, arm64.ABLE, arm64.ABLO, arm64.ABLS, arm64.ABHI, arm64.ABHS: return 0 case obj.ACHECKNIL, /* read p->from */ arm64.ACMP, /* read p->from, read p->reg */ arm64.AFCMPD, arm64.AFCMPS: if s != nil { if copysub(&p.From, v, s, 1) != 0 { return 1 } return copysub1(p, v, s, 1) } if copyau(&p.From, v) { return 1 } if copyau1(p, v) { return 1 } return 0 case arm64.AB: /* read p->to */ if s != nil { if copysub(&p.To, v, s, 1) != 0 { return 1 } return 0 } if copyau(&p.To, v) { return 1 } return 0 case obj.ARET: /* funny */ if s != nil { return 0 } // All registers die at this point, so claim // everything is set (and not used). return 3 case arm64.ABL: /* funny */ if p.From.Type == obj.TYPE_REG && v.Type == obj.TYPE_REG && p.From.Reg == v.Reg { return 2 } if s != nil { if copysub(&p.To, v, s, 1) != 0 { return 1 } return 0 } if copyau(&p.To, v) { return 4 } return 3 // R31 is zero, used by DUFFZERO, cannot be substituted. // R16 is ptr to memory, used and set, cannot be substituted. case obj.ADUFFZERO: if v.Type == obj.TYPE_REG { if v.Reg == 31 { return 1 } if v.Reg == 16 { return 2 } } return 0 // R16, R17 are ptr to src, dst, used and set, cannot be substituted. // R27 is scratch, set by DUFFCOPY, cannot be substituted. case obj.ADUFFCOPY: if v.Type == obj.TYPE_REG { if v.Reg == 16 || v.Reg == 17 { return 2 } if v.Reg == 27 { return 3 } } return 0 case arm64.AHINT, obj.ATEXT, obj.APCDATA, obj.AFUNCDATA, obj.AVARDEF, obj.AVARKILL: return 0 } }
// If s==nil, copyu returns the set/use of v in p; otherwise, it // modifies p to replace reads of v with reads of s and returns 0 for // success or non-zero for failure. // // If s==nil, copy returns one of the following values: // 1 if v only used // 2 if v is set and used in one address (read-alter-rewrite; // can't substitute) // 3 if v is only set // 4 if v is set in one address and used in another (so addresses // can be rewritten independently) // 0 otherwise (not touched) func copyu(p *obj.Prog, v *obj.Addr, s *obj.Addr) int { if p.From3Type() != obj.TYPE_NONE && p.From3Type() != obj.TYPE_CONST { // Currently we never generate a From3 with anything other than a constant in it. fmt.Printf("copyu: From3 (%v) not implemented\n", gc.Ctxt.Dconv(p.From3)) } switch p.As { default: fmt.Printf("copyu: can't find %v\n", obj.Aconv(int(p.As))) return 2 case obj.ANOP, /* read p->from, write p->to */ s390x.AMOVH, s390x.AMOVHZ, s390x.AMOVB, s390x.AMOVBZ, s390x.AMOVW, s390x.AMOVWZ, s390x.AMOVD, s390x.ANEG, s390x.AADDME, s390x.AADDZE, s390x.ASUBME, s390x.ASUBZE, s390x.AFMOVS, s390x.AFMOVD, s390x.AFRSP, s390x.AFNEG, s390x.ALDEBR, s390x.ACLFEBR, s390x.ACLGEBR, s390x.ACLFDBR, s390x.ACLGDBR, s390x.ACFEBRA, s390x.ACGEBRA, s390x.ACFDBRA, s390x.ACGDBRA, s390x.ACELFBR, s390x.ACELGBR, s390x.ACDLFBR, s390x.ACDLGBR, s390x.ACEFBRA, s390x.ACEGBRA, s390x.ACDFBRA, s390x.ACDGBRA, s390x.AFSQRT: if s != nil { if copysub(&p.From, v, s, 1) != 0 { return 1 } // Update only indirect uses of v in p->to if !copyas(&p.To, v) { if copysub(&p.To, v, s, 1) != 0 { return 1 } } return 0 } if copyas(&p.To, v) { // Fix up implicit from if p.From.Type == obj.TYPE_NONE { p.From = p.To } if copyau(&p.From, v) { return 4 } return 3 } if copyau(&p.From, v) { return 1 } if copyau(&p.To, v) { // p->to only indirectly uses v return 1 } return 0 // read p->from, read p->reg, rar p->to case s390x.ARLWMI: if copyas(&p.To, v) { return 2 } fallthrough // read p->from, read p->reg, write p->to case s390x.AADD, s390x.AADDC, s390x.AADDE, s390x.ASUB, s390x.ASLW, s390x.ASRW, s390x.ASRAW, s390x.ASLD, s390x.ASRD, s390x.ASRAD, s390x.AOR, s390x.AORN, s390x.AAND, s390x.AANDN, s390x.ANAND, s390x.ANOR, s390x.AXOR, s390x.AMULLW, s390x.AMULLD, s390x.ADIVW, s390x.ADIVD, s390x.ADIVWU, s390x.ADIVDU, s390x.AREM, s390x.AREMU, s390x.AREMD, s390x.AREMDU, s390x.ARLWNM, s390x.AFADDS, s390x.AFADD, s390x.AFSUBS, s390x.AFSUB, s390x.AFMULS, s390x.AFMUL, s390x.AFDIVS, s390x.AFDIV: if s != nil { if copysub(&p.From, v, s, 1) != 0 { return 1 } if copysub1(p, v, s, 1) != 0 { return 1 } // Update only indirect uses of v in p->to if !copyas(&p.To, v) { if copysub(&p.To, v, s, 1) != 0 { return 1 } } return 0 } if copyas(&p.To, v) { if p.Reg == 0 { // Fix up implicit reg (e.g., ADD // R3,R4 -> ADD R3,R4,R4) so we can // update reg and to separately. p.Reg = p.To.Reg } if copyau(&p.From, v) { return 4 } if copyau1(p, v) { return 4 } return 3 } if copyau(&p.From, v) { return 1 } if copyau1(p, v) { return 1 } if copyau(&p.To, v) { return 1 } return 0 case s390x.ABEQ, s390x.ABGT, s390x.ABGE, s390x.ABLT, s390x.ABLE, s390x.ABNE, s390x.ABVC, s390x.ABVS: return 0 case obj.ACHECKNIL, /* read p->from */ s390x.ACMP, /* read p->from, read p->to */ s390x.ACMPU, s390x.ACMPW, s390x.ACMPWU, s390x.AFCMPO, s390x.AFCMPU, s390x.ACEBR, s390x.AMVC, s390x.ACLC, s390x.AXC, s390x.AOC, s390x.ANC: if s != nil { if copysub(&p.From, v, s, 1) != 0 { return 1 } return copysub(&p.To, v, s, 1) } if copyau(&p.From, v) { return 1 } if copyau(&p.To, v) { return 1 } return 0 // go never generates a branch to a GPR // read p->to case s390x.ABR: if s != nil { if copysub(&p.To, v, s, 1) != 0 { return 1 } return 0 } if copyau(&p.To, v) { return 1 } return 0 case obj.ARET: if s != nil { return 0 } // All registers die at this point, so claim // everything is set (and not used). return 3 case s390x.ABL: if v.Type == obj.TYPE_REG { // TODO(rsc): REG_R0 and REG_F0 used to be // (when register numbers started at 0) exregoffset and exfregoffset, // which are unset entirely. // It's strange that this handles R0 and F0 differently from the other // registers. Possible failure to optimize? if s390x.REG_R0 < v.Reg && v.Reg <= s390x.REGEXT { return 2 } if v.Reg == s390x.REGARG { return 2 } if s390x.REG_F0 < v.Reg && v.Reg <= s390x.FREGEXT { return 2 } } if p.From.Type == obj.TYPE_REG && v.Type == obj.TYPE_REG && p.From.Reg == v.Reg { return 2 } if s != nil { if copysub(&p.To, v, s, 1) != 0 { return 1 } return 0 } if copyau(&p.To, v) { return 4 } return 3 case obj.ATEXT: if v.Type == obj.TYPE_REG { if v.Reg == s390x.REGARG { return 3 } } return 0 case obj.APCDATA, obj.AFUNCDATA, obj.AVARDEF, obj.AVARKILL: return 0 } }
// If s==nil, copyu returns the set/use of v in p; otherwise, it // modifies p to replace reads of v with reads of s and returns 0 for // success or non-zero for failure. // // If s==nil, copy returns one of the following values: // 1 if v only used // 2 if v is set and used in one address (read-alter-rewrite; // can't substitute) // 3 if v is only set // 4 if v is set in one address and used in another (so addresses // can be rewritten independently) // 0 otherwise (not touched) func copyu(p *obj.Prog, v *obj.Addr, s *obj.Addr) int { if p.From3Type() != obj.TYPE_NONE { // 9g never generates a from3 fmt.Printf("copyu: from3 (%v) not implemented\n", gc.Ctxt.Dconv(p.From3)) } switch p.As { default: fmt.Printf("copyu: can't find %v\n", p.As) return 2 case obj.ANOP, /* read p->from, write p->to */ ppc64.AMOVH, ppc64.AMOVHZ, ppc64.AMOVB, ppc64.AMOVBZ, ppc64.AMOVW, ppc64.AMOVWZ, ppc64.AMOVD, ppc64.ANEG, ppc64.ANEGCC, ppc64.AADDME, ppc64.AADDMECC, ppc64.AADDZE, ppc64.AADDZECC, ppc64.ASUBME, ppc64.ASUBMECC, ppc64.ASUBZE, ppc64.ASUBZECC, ppc64.AFCTIW, ppc64.AFCTIWZ, ppc64.AFCTID, ppc64.AFCTIDZ, ppc64.AFCFID, ppc64.AFCFIDCC, ppc64.AFCFIDU, ppc64.AFCFIDUCC, ppc64.AFMOVS, ppc64.AFMOVD, ppc64.AFRSP, ppc64.AFNEG, ppc64.AFNEGCC, ppc64.AFSQRT: if s != nil { if copysub(&p.From, v, s, true) { return 1 } // Update only indirect uses of v in p->to if !copyas(&p.To, v) { if copysub(&p.To, v, s, true) { return 1 } } return 0 } if copyas(&p.To, v) { // Fix up implicit from if p.From.Type == obj.TYPE_NONE { p.From = p.To } if copyau(&p.From, v) { return 4 } return 3 } if copyau(&p.From, v) { return 1 } if copyau(&p.To, v) { // p->to only indirectly uses v return 1 } return 0 case ppc64.AMOVBU, /* rar p->from, write p->to or read p->from, rar p->to */ ppc64.AMOVBZU, ppc64.AMOVHU, ppc64.AMOVHZU, ppc64.AMOVWZU, ppc64.AMOVDU: if p.From.Type == obj.TYPE_MEM { if copyas(&p.From, v) { // No s!=nil check; need to fail // anyway in that case return 2 } if s != nil { if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyas(&p.To, v) { return 3 } } else if p.To.Type == obj.TYPE_MEM { if copyas(&p.To, v) { return 2 } if s != nil { if copysub(&p.From, v, s, true) { return 1 } return 0 } if copyau(&p.From, v) { return 1 } } else { fmt.Printf("copyu: bad %v\n", p) } return 0 case ppc64.ARLWMI, /* read p->from, read p->reg, rar p->to */ ppc64.ARLWMICC: if copyas(&p.To, v) { return 2 } fallthrough /* fall through */ case ppc64.AADD, /* read p->from, read p->reg, write p->to */ ppc64.AADDC, ppc64.AADDE, ppc64.ASUB, ppc64.ASLW, ppc64.ASRW, ppc64.ASRAW, ppc64.ASLD, ppc64.ASRD, ppc64.ASRAD, ppc64.AOR, ppc64.AORCC, ppc64.AORN, ppc64.AORNCC, ppc64.AAND, ppc64.AANDCC, ppc64.AANDN, ppc64.AANDNCC, ppc64.ANAND, ppc64.ANANDCC, ppc64.ANOR, ppc64.ANORCC, ppc64.AXOR, ppc64.AMULHW, ppc64.AMULHWU, ppc64.AMULLW, ppc64.AMULLD, ppc64.ADIVW, ppc64.ADIVD, ppc64.ADIVWU, ppc64.ADIVDU, ppc64.AREM, ppc64.AREMU, ppc64.AREMD, ppc64.AREMDU, ppc64.ARLWNM, ppc64.ARLWNMCC, ppc64.AFADDS, ppc64.AFADD, ppc64.AFSUBS, ppc64.AFSUB, ppc64.AFMULS, ppc64.AFMUL, ppc64.AFDIVS, ppc64.AFDIV: if s != nil { if copysub(&p.From, v, s, true) { return 1 } if copysub1(p, v, s, true) { return 1 } // Update only indirect uses of v in p->to if !copyas(&p.To, v) { if copysub(&p.To, v, s, true) { return 1 } } return 0 } if copyas(&p.To, v) { if p.Reg == 0 { // Fix up implicit reg (e.g., ADD // R3,R4 -> ADD R3,R4,R4) so we can // update reg and to separately. p.Reg = p.To.Reg } if copyau(&p.From, v) { return 4 } if copyau1(p, v) { return 4 } return 3 } if copyau(&p.From, v) { return 1 } if copyau1(p, v) { return 1 } if copyau(&p.To, v) { return 1 } return 0 case ppc64.ABEQ, ppc64.ABGT, ppc64.ABGE, ppc64.ABLT, ppc64.ABLE, ppc64.ABNE, ppc64.ABVC, ppc64.ABVS: return 0 case obj.ACHECKNIL, /* read p->from */ ppc64.ACMP, /* read p->from, read p->to */ ppc64.ACMPU, ppc64.ACMPW, ppc64.ACMPWU, ppc64.AFCMPO, ppc64.AFCMPU: if s != nil { if copysub(&p.From, v, s, true) { return 1 } if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyau(&p.From, v) { return 1 } if copyau(&p.To, v) { return 1 } return 0 // 9g never generates a branch to a GPR (this isn't // even a normal instruction; liblink turns it in to a // mov and a branch). case ppc64.ABR: /* read p->to */ if s != nil { if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyau(&p.To, v) { return 1 } return 0 case obj.ARET: /* funny */ if s != nil { return 0 } // All registers die at this point, so claim // everything is set (and not used). return 3 case ppc64.ABL: /* funny */ if v.Type == obj.TYPE_REG { // TODO(rsc): REG_R0 and REG_F0 used to be // (when register numbers started at 0) exregoffset and exfregoffset, // which are unset entirely. // It's strange that this handles R0 and F0 differently from the other // registers. Possible failure to optimize? if ppc64.REG_R0 < v.Reg && v.Reg <= ppc64.REGEXT { return 2 } if v.Reg == ppc64.REGARG { return 2 } if ppc64.REG_F0 < v.Reg && v.Reg <= ppc64.FREGEXT { return 2 } } if p.From.Type == obj.TYPE_REG && v.Type == obj.TYPE_REG && p.From.Reg == v.Reg { return 2 } if s != nil { if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyau(&p.To, v) { return 4 } return 3 // R0 is zero, used by DUFFZERO, cannot be substituted. // R3 is ptr to memory, used and set, cannot be substituted. case obj.ADUFFZERO: if v.Type == obj.TYPE_REG { if v.Reg == 0 { return 1 } if v.Reg == 3 { return 2 } } return 0 // R3, R4 are ptr to src, dst, used and set, cannot be substituted. // R5 is scratch, set by DUFFCOPY, cannot be substituted. case obj.ADUFFCOPY: if v.Type == obj.TYPE_REG { if v.Reg == 3 || v.Reg == 4 { return 2 } if v.Reg == 5 { return 3 } } return 0 case obj.ATEXT: /* funny */ if v.Type == obj.TYPE_REG { if v.Reg == ppc64.REGARG { return 3 } } return 0 case obj.APCDATA, obj.AFUNCDATA, obj.AVARDEF, obj.AVARKILL, obj.AVARLIVE, obj.AUSEFIELD: return 0 } }
func preprocess(ctxt *obj.Link, cursym *obj.LSym) { // TODO(minux): add morestack short-cuts with small fixed frame-size. ctxt.Cursym = cursym if cursym.Text == nil || cursym.Text.Link == nil { return } p := cursym.Text textstksiz := p.To.Offset if textstksiz == -8 { // Compatibility hack. p.From3.Offset |= obj.NOFRAME textstksiz = 0 } if textstksiz%8 != 0 { ctxt.Diag("frame size %d not a multiple of 8", textstksiz) } if p.From3.Offset&obj.NOFRAME != 0 { if textstksiz != 0 { ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", textstksiz) } } cursym.Args = p.To.Val.(int32) cursym.Locals = int32(textstksiz) /* * find leaf subroutines * strip NOPs * expand RET * expand BECOME pseudo */ if ctxt.Debugvlog != 0 { ctxt.Logf("%5.2f noops\n", obj.Cputime()) } var q *obj.Prog var q1 *obj.Prog for p := cursym.Text; p != nil; p = p.Link { switch p.As { /* too hard, just leave alone */ case obj.ATEXT: q = p p.Mark |= LABEL | LEAF | SYNC if p.Link != nil { p.Link.Mark |= LABEL } case ASYNC, AWORD: q = p p.Mark |= LABEL | SYNC continue case AMOVW, AMOVWZ, AMOVD: q = p if p.From.Reg >= REG_RESERVED || p.To.Reg >= REG_RESERVED { p.Mark |= LABEL | SYNC } continue case AFABS, AFADD, AFDIV, AFMADD, AFMOVD, AFMOVS, AFMSUB, AFMUL, AFNABS, AFNEG, AFNMADD, AFNMSUB, ALEDBR, ALDEBR, AFSUB: q = p p.Mark |= FLOAT continue case ABL, ABCL, obj.ADUFFZERO, obj.ADUFFCOPY: cursym.Text.Mark &^= LEAF fallthrough case ABC, ABEQ, ABGE, ABGT, ABLE, ABLT, ABLEU, ABLTU, ABNE, ABR, ABVC, ABVS, ACMPBEQ, ACMPBGE, ACMPBGT, ACMPBLE, ACMPBLT, ACMPBNE, ACMPUBEQ, ACMPUBGE, ACMPUBGT, ACMPUBLE, ACMPUBLT, ACMPUBNE: p.Mark |= BRANCH q = p q1 = p.Pcond if q1 != nil { for q1.As == obj.ANOP { q1 = q1.Link p.Pcond = q1 } if q1.Mark&LEAF == 0 { q1.Mark |= LABEL } } else { p.Mark |= LABEL } q1 = p.Link if q1 != nil { q1.Mark |= LABEL } continue case AFCMPO, AFCMPU: q = p p.Mark |= FCMP | FLOAT continue case obj.ARET: q = p if p.Link != nil { p.Link.Mark |= LABEL } continue case obj.ANOP: q1 = p.Link q.Link = q1 /* q is non-nop */ q1.Mark |= p.Mark continue default: q = p continue } } autosize := int32(0) var p1 *obj.Prog var p2 *obj.Prog var pLast *obj.Prog var pPre *obj.Prog var pPreempt *obj.Prog wasSplit := false for p := cursym.Text; p != nil; p = p.Link { pLast = p switch p.As { case obj.ATEXT: autosize = int32(textstksiz) if p.Mark&LEAF != 0 && autosize == 0 && p.From3.Offset&obj.NOFRAME == 0 { // A leaf function with no locals has no frame. p.From3.Offset |= obj.NOFRAME } if p.From3.Offset&obj.NOFRAME == 0 { // If there is a stack frame at all, it includes // space to save the LR. autosize += int32(ctxt.FixedFrameSize()) } p.To.Offset = int64(autosize) q = p if p.From3.Offset&obj.NOSPLIT == 0 && p.From3.Offset&obj.NOFRAME == 0 { p, pPreempt = stacksplitPre(ctxt, p, autosize) // emit pre part of split check pPre = p wasSplit = true //need post part of split } if autosize != 0 { q = obj.Appendp(ctxt, p) q.As = AMOVD q.From.Type = obj.TYPE_ADDR q.From.Offset = int64(-autosize) q.From.Reg = REGSP // not actually needed - REGSP is assumed if no reg is provided q.To.Type = obj.TYPE_REG q.To.Reg = REGSP q.Spadj = autosize } else if cursym.Text.Mark&LEAF == 0 { // A very few functions that do not return to their caller // (e.g. gogo) are not identified as leaves but still have // no frame. cursym.Text.Mark |= LEAF } if cursym.Text.Mark&LEAF != 0 { cursym.Leaf = true break } q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_REG q.From.Reg = REG_LR q.To.Type = obj.TYPE_MEM q.To.Reg = REGSP q.To.Offset = 0 if cursym.Text.From3.Offset&obj.WRAPPER != 0 { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOVD g_panic(g), R3 // CMP R3, $0 // BEQ end // MOVD panic_argp(R3), R4 // ADD $(autosize+8), R1, R5 // CMP R4, R5 // BNE end // ADD $8, R1, R6 // MOVD R6, panic_argp(R3) // end: // NOP // // The NOP is needed to give the jumps somewhere to land. // It is a liblink NOP, not a s390x NOP: it encodes to 0 instruction bytes. q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_MEM q.From.Reg = REGG q.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // G.panic q.To.Type = obj.TYPE_REG q.To.Reg = REG_R3 q = obj.Appendp(ctxt, q) q.As = ACMP q.From.Type = obj.TYPE_REG q.From.Reg = REG_R3 q.To.Type = obj.TYPE_CONST q.To.Offset = 0 q = obj.Appendp(ctxt, q) q.As = ABEQ q.To.Type = obj.TYPE_BRANCH p1 = q q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_MEM q.From.Reg = REG_R3 q.From.Offset = 0 // Panic.argp q.To.Type = obj.TYPE_REG q.To.Reg = REG_R4 q = obj.Appendp(ctxt, q) q.As = AADD q.From.Type = obj.TYPE_CONST q.From.Offset = int64(autosize) + ctxt.FixedFrameSize() q.Reg = REGSP q.To.Type = obj.TYPE_REG q.To.Reg = REG_R5 q = obj.Appendp(ctxt, q) q.As = ACMP q.From.Type = obj.TYPE_REG q.From.Reg = REG_R4 q.To.Type = obj.TYPE_REG q.To.Reg = REG_R5 q = obj.Appendp(ctxt, q) q.As = ABNE q.To.Type = obj.TYPE_BRANCH p2 = q q = obj.Appendp(ctxt, q) q.As = AADD q.From.Type = obj.TYPE_CONST q.From.Offset = ctxt.FixedFrameSize() q.Reg = REGSP q.To.Type = obj.TYPE_REG q.To.Reg = REG_R6 q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_REG q.From.Reg = REG_R6 q.To.Type = obj.TYPE_MEM q.To.Reg = REG_R3 q.To.Offset = 0 // Panic.argp q = obj.Appendp(ctxt, q) q.As = obj.ANOP p1.Pcond = q p2.Pcond = q } case obj.ARET: if p.From.Type == obj.TYPE_CONST { ctxt.Diag("using BECOME (%v) is not supported!", p) break } retTarget := p.To.Sym if cursym.Text.Mark&LEAF != 0 { if autosize == 0 { p.As = ABR p.From = obj.Addr{} if retTarget == nil { p.To.Type = obj.TYPE_REG p.To.Reg = REG_LR } else { p.To.Type = obj.TYPE_BRANCH p.To.Sym = retTarget } p.Mark |= BRANCH break } p.As = AADD p.From.Type = obj.TYPE_CONST p.From.Offset = int64(autosize) p.To.Type = obj.TYPE_REG p.To.Reg = REGSP p.Spadj = -autosize q = obj.Appendp(ctxt, p) q.As = ABR q.From = obj.Addr{} q.To.Type = obj.TYPE_REG q.To.Reg = REG_LR q.Mark |= BRANCH q.Spadj = autosize break } p.As = AMOVD p.From.Type = obj.TYPE_MEM p.From.Reg = REGSP p.From.Offset = 0 p.To.Type = obj.TYPE_REG p.To.Reg = REG_LR q = p if autosize != 0 { q = obj.Appendp(ctxt, q) q.As = AADD q.From.Type = obj.TYPE_CONST q.From.Offset = int64(autosize) q.To.Type = obj.TYPE_REG q.To.Reg = REGSP q.Spadj = -autosize } q = obj.Appendp(ctxt, q) q.As = ABR q.From = obj.Addr{} if retTarget == nil { q.To.Type = obj.TYPE_REG q.To.Reg = REG_LR } else { q.To.Type = obj.TYPE_BRANCH q.To.Sym = retTarget } q.Mark |= BRANCH q.Spadj = autosize case AADD: if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.From.Type == obj.TYPE_CONST { p.Spadj = int32(-p.From.Offset) } } } if wasSplit { pLast = stacksplitPost(ctxt, pLast, pPre, pPreempt, autosize) // emit post part of split check } }
func progedit(ctxt *obj.Link, p *obj.Prog) { // Maintain information about code generation mode. if ctxt.Mode == 0 { ctxt.Mode = ctxt.Arch.Regsize * 8 } p.Mode = int8(ctxt.Mode) switch p.As { case AMODE: if p.From.Type == obj.TYPE_CONST || (p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_NONE) { switch int(p.From.Offset) { case 16, 32, 64: ctxt.Mode = int(p.From.Offset) } } obj.Nopout(p) } // Thread-local storage references use the TLS pseudo-register. // As a register, TLS refers to the thread-local storage base, and it // can only be loaded into another register: // // MOVQ TLS, AX // // An offset from the thread-local storage base is written off(reg)(TLS*1). // Semantically it is off(reg), but the (TLS*1) annotation marks this as // indexing from the loaded TLS base. This emits a relocation so that // if the linker needs to adjust the offset, it can. For example: // // MOVQ TLS, AX // MOVQ 8(AX)(TLS*1), CX // load m into CX // // On systems that support direct access to the TLS memory, this // pair of instructions can be reduced to a direct TLS memory reference: // // MOVQ 8(TLS), CX // load m into CX // // The 2-instruction and 1-instruction forms correspond roughly to // ELF TLS initial exec mode and ELF TLS local exec mode, respectively. // // We applies this rewrite on systems that support the 1-instruction form. // The decision is made using only the operating system (and probably // the -shared flag, eventually), not the link mode. If some link modes // on a particular operating system require the 2-instruction form, // then all builds for that operating system will use the 2-instruction // form, so that the link mode decision can be delayed to link time. // // In this way, all supported systems use identical instructions to // access TLS, and they are rewritten appropriately first here in // liblink and then finally using relocations in the linker. if canuselocaltls(ctxt) { // Reduce TLS initial exec model to TLS local exec model. // Sequences like // MOVQ TLS, BX // ... off(BX)(TLS*1) ... // become // NOP // ... off(TLS) ... // // TODO(rsc): Remove the Hsolaris special case. It exists only to // guarantee we are producing byte-identical binaries as before this code. // But it should be unnecessary. if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 && ctxt.Headtype != obj.Hsolaris { obj.Nopout(p) } if p.From.Type == obj.TYPE_MEM && p.From.Index == REG_TLS && REG_AX <= p.From.Reg && p.From.Reg <= REG_R15 { p.From.Reg = REG_TLS p.From.Scale = 0 p.From.Index = REG_NONE } if p.To.Type == obj.TYPE_MEM && p.To.Index == REG_TLS && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { p.To.Reg = REG_TLS p.To.Scale = 0 p.To.Index = REG_NONE } } else { // As a courtesy to the C compilers, rewrite TLS local exec load as TLS initial exec load. // The instruction // MOVQ off(TLS), BX // becomes the sequence // MOVQ TLS, BX // MOVQ off(BX)(TLS*1), BX // This allows the C compilers to emit references to m and g using the direct off(TLS) form. if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { q := obj.Appendp(ctxt, p) q.As = p.As q.From = p.From q.From.Type = obj.TYPE_MEM q.From.Reg = p.To.Reg q.From.Index = REG_TLS q.From.Scale = 2 // TODO: use 1 q.To = p.To p.From.Type = obj.TYPE_REG p.From.Reg = REG_TLS p.From.Index = REG_NONE p.From.Offset = 0 } } // TODO: Remove. if ctxt.Headtype == obj.Hwindows && p.Mode == 64 || ctxt.Headtype == obj.Hplan9 { if p.From.Scale == 1 && p.From.Index == REG_TLS { p.From.Scale = 2 } if p.To.Scale == 1 && p.To.Index == REG_TLS { p.To.Scale = 2 } } // Rewrite 0 to $0 in 3rd argment to CMPPS etc. // That's what the tables expect. switch p.As { case ACMPPD, ACMPPS, ACMPSD, ACMPSS: if p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_NONE && p.To.Reg == REG_NONE && p.To.Index == REG_NONE && p.To.Sym == nil { p.To.Type = obj.TYPE_CONST } } // Rewrite CALL/JMP/RET to symbol as TYPE_BRANCH. switch p.As { case obj.ACALL, obj.AJMP, obj.ARET: if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil { p.To.Type = obj.TYPE_BRANCH } } if ctxt.Headtype == obj.Hnacl && p.Mode == 64 { nacladdr(ctxt, p, &p.From3) nacladdr(ctxt, p, &p.From) nacladdr(ctxt, p, &p.To) } // Rewrite float constants to values stored in memory. switch p.As { // Convert AMOVSS $(0), Xx to AXORPS Xx, Xx case AMOVSS: if p.From.Type == obj.TYPE_FCONST { if p.From.U.Dval == 0 { if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 { p.As = AXORPS p.From = p.To break } } } fallthrough case AFMOVF, AFADDF, AFSUBF, AFSUBRF, AFMULF, AFDIVF, AFDIVRF, AFCOMF, AFCOMFP, AADDSS, ASUBSS, AMULSS, ADIVSS, ACOMISS, AUCOMISS: if p.From.Type == obj.TYPE_FCONST { f32 := float32(p.From.U.Dval) i32 := math.Float32bits(f32) literal := fmt.Sprintf("$f32.%08x", i32) s := obj.Linklookup(ctxt, literal, 0) if s.Type == 0 { s.Type = obj.SRODATA obj.Adduint32(ctxt, s, i32) s.Reachable = 0 } p.From.Type = obj.TYPE_MEM p.From.Name = obj.NAME_EXTERN p.From.Sym = s p.From.Offset = 0 } case AMOVSD: // Convert AMOVSD $(0), Xx to AXORPS Xx, Xx if p.From.Type == obj.TYPE_FCONST { if p.From.U.Dval == 0 { if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 { p.As = AXORPS p.From = p.To break } } } fallthrough case AFMOVD, AFADDD, AFSUBD, AFSUBRD, AFMULD, AFDIVD, AFDIVRD, AFCOMD, AFCOMDP, AADDSD, ASUBSD, AMULSD, ADIVSD, ACOMISD, AUCOMISD: if p.From.Type == obj.TYPE_FCONST { i64 := math.Float64bits(p.From.U.Dval) literal := fmt.Sprintf("$f64.%016x", i64) s := obj.Linklookup(ctxt, literal, 0) if s.Type == 0 { s.Type = obj.SRODATA obj.Adduint64(ctxt, s, i64) s.Reachable = 0 } p.From.Type = obj.TYPE_MEM p.From.Name = obj.NAME_EXTERN p.From.Sym = s p.From.Offset = 0 } } }
func mergetemp(firstp *obj.Prog) { const ( debugmerge = 1 ) g := Flowstart(firstp, nil) if g == nil { return } // Build list of all mergeable variables. nvar := 0 for l := Curfn.Dcl; l != nil; l = l.Next { if canmerge(l.N) { nvar++ } } var_ := make([]TempVar, nvar) nvar = 0 var n *Node var v *TempVar for l := Curfn.Dcl; l != nil; l = l.Next { n = l.N if canmerge(n) { v = &var_[nvar] nvar++ n.Opt = v v.node = n } } // Build list of uses. // We assume that the earliest reference to a temporary is its definition. // This is not true of variables in general but our temporaries are all // single-use (that's why we have so many!). var p *obj.Prog var info ProgInfo for f := g.Start; f != nil; f = f.Link { p = f.Prog info = Thearch.Proginfo(p) if p.From.Node != nil && ((p.From.Node).(*Node)).Opt != nil && p.To.Node != nil && ((p.To.Node).(*Node)).Opt != nil { Fatal("double node %v", p) } v = nil n, _ = p.From.Node.(*Node) if n != nil { v, _ = n.Opt.(*TempVar) } if v == nil { n, _ = p.To.Node.(*Node) if n != nil { v, _ = n.Opt.(*TempVar) } } if v != nil { if v.def == nil { v.def = f } f.Data = v.use v.use = f if n == p.From.Node && (info.Flags&LeftAddr != 0) { v.addr = 1 } } } if debugmerge > 1 && Debug['v'] != 0 { Dumpit("before", g.Start, 0) } nkill := 0 // Special case. var p1 *obj.Prog var info1 ProgInfo var f *Flow for i := 0; i < len(var_); i++ { v = &var_[i] if v.addr != 0 { continue } // Used in only one instruction, which had better be a write. f = v.use if f != nil && f.Data.(*Flow) == nil { p = f.Prog info = Thearch.Proginfo(p) if p.To.Node == v.node && (info.Flags&RightWrite != 0) && info.Flags&RightRead == 0 { p.As = obj.ANOP p.To = obj.Addr{} v.removed = 1 if debugmerge > 0 && Debug['v'] != 0 { fmt.Printf("drop write-only %v\n", Sconv(v.node.Sym, 0)) } } else { Fatal("temp used and not set: %v", p) } nkill++ continue } // Written in one instruction, read in the next, otherwise unused, // no jumps to the next instruction. Happens mainly in 386 compiler. f = v.use if f != nil && f.Link == f.Data.(*Flow) && (f.Data.(*Flow)).Data.(*Flow) == nil && Uniqp(f.Link) == f { p = f.Prog info = Thearch.Proginfo(p) p1 = f.Link.Prog info1 = Thearch.Proginfo(p1) const ( SizeAny = SizeB | SizeW | SizeL | SizeQ | SizeF | SizeD ) if p.From.Node == v.node && p1.To.Node == v.node && (info.Flags&Move != 0) && (info.Flags|info1.Flags)&(LeftAddr|RightAddr) == 0 && info.Flags&SizeAny == info1.Flags&SizeAny { p1.From = p.From Thearch.Excise(f) v.removed = 1 if debugmerge > 0 && Debug['v'] != 0 { fmt.Printf("drop immediate-use %v\n", Sconv(v.node.Sym, 0)) } } nkill++ continue } } // Traverse live range of each variable to set start, end. // Each flood uses a new value of gen so that we don't have // to clear all the r->active words after each variable. gen := int32(0) for i := 0; i < len(var_); i++ { v = &var_[i] gen++ for f = v.use; f != nil; f = f.Data.(*Flow) { mergewalk(v, f, uint32(gen)) } if v.addr != 0 { gen++ for f = v.use; f != nil; f = f.Data.(*Flow) { varkillwalk(v, f, uint32(gen)) } } } // Sort variables by start. bystart := make([]*TempVar, len(var_)) for i := 0; i < len(var_); i++ { bystart[i] = &var_[i] } sort.Sort(startcmp(bystart[:len(var_)])) // List of in-use variables, sorted by end, so that the ones that // will last the longest are the earliest ones in the array. // The tail inuse[nfree:] holds no-longer-used variables. // In theory we should use a sorted tree so that insertions are // guaranteed O(log n) and then the loop is guaranteed O(n log n). // In practice, it doesn't really matter. inuse := make([]*TempVar, len(var_)) ninuse := 0 nfree := len(var_) var t *Type var v1 *TempVar var j int for i := 0; i < len(var_); i++ { v = bystart[i] if debugmerge > 0 && Debug['v'] != 0 { fmt.Printf("consider %v: removed=%d\n", Nconv(v.node, obj.FmtSharp), v.removed) } if v.removed != 0 { continue } // Expire no longer in use. for ninuse > 0 && inuse[ninuse-1].end < v.start { ninuse-- v1 = inuse[ninuse] nfree-- inuse[nfree] = v1 } if debugmerge > 0 && Debug['v'] != 0 { fmt.Printf("consider %v: removed=%d nfree=%d nvar=%d\n", Nconv(v.node, obj.FmtSharp), v.removed, nfree, len(var_)) } // Find old temp to reuse if possible. t = v.node.Type for j = nfree; j < len(var_); j++ { v1 = inuse[j] if debugmerge > 0 && Debug['v'] != 0 { fmt.Printf("consider %v: maybe %v: type=%v,%v addrtaken=%v,%v\n", Nconv(v.node, obj.FmtSharp), Nconv(v1.node, obj.FmtSharp), Tconv(t, 0), Tconv(v1.node.Type, 0), v.node.Addrtaken, v1.node.Addrtaken) } // Require the types to match but also require the addrtaken bits to match. // If a variable's address is taken, that disables registerization for the individual // words of the variable (for example, the base,len,cap of a slice). // We don't want to merge a non-addressed var with an addressed one and // inhibit registerization of the former. if Eqtype(t, v1.node.Type) && v.node.Addrtaken == v1.node.Addrtaken { inuse[j] = inuse[nfree] nfree++ if v1.merge != nil { v.merge = v1.merge } else { v.merge = v1 } nkill++ break } } // Sort v into inuse. j = ninuse ninuse++ for j > 0 && inuse[j-1].end < v.end { inuse[j] = inuse[j-1] j-- } inuse[j] = v } if debugmerge > 0 && Debug['v'] != 0 { fmt.Printf("%v [%d - %d]\n", Sconv(Curfn.Nname.Sym, 0), len(var_), nkill) var v *TempVar for i := 0; i < len(var_); i++ { v = &var_[i] fmt.Printf("var %v %v %d-%d", Nconv(v.node, obj.FmtSharp), Tconv(v.node.Type, 0), v.start, v.end) if v.addr != 0 { fmt.Printf(" addr=1") } if v.removed != 0 { fmt.Printf(" dead=1") } if v.merge != nil { fmt.Printf(" merge %v", Nconv(v.merge.node, obj.FmtSharp)) } if v.start == v.end && v.def != nil { fmt.Printf(" %v", v.def.Prog) } fmt.Printf("\n") } if debugmerge > 1 && Debug['v'] != 0 { Dumpit("after", g.Start, 0) } } // Update node references to use merged temporaries. for f := g.Start; f != nil; f = f.Link { p = f.Prog n, _ = p.From.Node.(*Node) if n != nil { v, _ = n.Opt.(*TempVar) if v != nil && v.merge != nil { p.From.Node = v.merge.node } } n, _ = p.To.Node.(*Node) if n != nil { v, _ = n.Opt.(*TempVar) if v != nil && v.merge != nil { p.To.Node = v.merge.node } } } // Delete merged nodes from declaration list. var l *NodeList for lp := &Curfn.Dcl; ; { l = *lp if l == nil { break } Curfn.Dcl.End = l n = l.N v, _ = n.Opt.(*TempVar) if v != nil && (v.merge != nil || v.removed != 0) { *lp = l.Next continue } lp = &l.Next } // Clear aux structures. for i := 0; i < len(var_); i++ { var_[i].node.Opt = nil } Flowend(g) }
func peep(firstp *obj.Prog) { g := gc.Flowstart(firstp, nil) if g == nil { return } gactive = 0 // byte, word arithmetic elimination. elimshortmov(g) // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim var p *obj.Prog for r := g.Start; r != nil; r = r.Link { p = r.Prog switch p.As { case x86.ALEAL: if regtyp(&p.To) { if p.From.Sym != nil { if p.From.Index == x86.REG_NONE { conprop(r) } } } case x86.AMOVB, x86.AMOVW, x86.AMOVL, x86.AMOVSS, x86.AMOVSD: if regtyp(&p.To) { if p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_FCONST { conprop(r) } } } } var r1 *gc.Flow var p1 *obj.Prog var r *gc.Flow var t int loop1: if gc.Debug['P'] != 0 && gc.Debug['v'] != 0 { gc.Dumpit("loop1", g.Start, 0) } t = 0 for r = g.Start; r != nil; r = r.Link { p = r.Prog switch p.As { case x86.AMOVL, x86.AMOVSS, x86.AMOVSD: if regtyp(&p.To) { if regtyp(&p.From) { if copyprop(g, r) { excise(r) t++ } else if subprop(r) && copyprop(g, r) { excise(r) t++ } } } case x86.AMOVBLZX, x86.AMOVWLZX, x86.AMOVBLSX, x86.AMOVWLSX: if regtyp(&p.To) { r1 = rnops(gc.Uniqs(r)) if r1 != nil { p1 = r1.Prog if p.As == p1.As && p.To.Type == p1.From.Type && p.To.Reg == p1.From.Reg { p1.As = x86.AMOVL t++ } } } case x86.AADDL, x86.AADDW: if p.From.Type != obj.TYPE_CONST || needc(p.Link) { break } if p.From.Offset == -1 { if p.As == x86.AADDL { p.As = x86.ADECL } else { p.As = x86.ADECW } p.From = obj.Addr{} break } if p.From.Offset == 1 { if p.As == x86.AADDL { p.As = x86.AINCL } else { p.As = x86.AINCW } p.From = obj.Addr{} break } case x86.ASUBL, x86.ASUBW: if p.From.Type != obj.TYPE_CONST || needc(p.Link) { break } if p.From.Offset == -1 { if p.As == x86.ASUBL { p.As = x86.AINCL } else { p.As = x86.AINCW } p.From = obj.Addr{} break } if p.From.Offset == 1 { if p.As == x86.ASUBL { p.As = x86.ADECL } else { p.As = x86.ADECW } p.From = obj.Addr{} break } } } if t != 0 { goto loop1 } // MOVSD removal. // We never use packed registers, so a MOVSD between registers // can be replaced by MOVAPD, which moves the pair of float64s // instead of just the lower one. We only use the lower one, but // the processor can do better if we do moves using both. for r := g.Start; r != nil; r = r.Link { p = r.Prog if p.As == x86.AMOVSD { if regtyp(&p.From) { if regtyp(&p.To) { p.As = x86.AMOVAPD } } } } gc.Flowend(g) }
func peep(firstp *obj.Prog) { g := (*gc.Graph)(gc.Flowstart(firstp, nil)) if g == nil { return } gactive = 0 var p *obj.Prog if false { // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim for r := (*gc.Flow)(g.Start); r != nil; r = r.Link { p = r.Prog switch p.As { case s390x.AMOVB, s390x.AMOVW, s390x.AMOVD: if regtyp(&p.To) { if p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_FCONST { conprop(r) } } } } } var r *gc.Flow var t int loop1: // if gc.Debug['P'] != 0 && gc.Debug['v'] != 0 { // gc.Dumpit("loop1", g.Start, 0) // } t = 0 for r = g.Start; r != nil; r = r.Link { p = r.Prog // TODO(austin) Handle smaller moves. arm and amd64 // distinguish between moves that moves that *must* // sign/zero extend and moves that don't care so they // can eliminate moves that don't care without // breaking moves that do care. This might let us // simplify or remove the next peep loop, too. if p.As == s390x.AMOVD || p.As == s390x.AFMOVD { if regtyp(&p.To) { // Try to eliminate reg->reg moves if regtyp(&p.From) { if p.From.Type == p.To.Type { if copyprop(r) { excise(r) t++ } else if subprop(r) && copyprop(r) { excise(r) t++ } } } // Convert uses to $0 to uses of R0 and // propagate R0 if regzer(&p.From) != 0 { if p.To.Type == obj.TYPE_REG { p.From.Type = obj.TYPE_REG p.From.Reg = s390x.REGZERO if copyprop(r) { excise(r) t++ } else if subprop(r) && copyprop(r) { excise(r) t++ } } } } } } if t != 0 { goto loop1 } if gc.Debug['P'] != 0 && gc.Debug['v'] != 0 { gc.Dumpit("pass7 copyprop", g.Start, 0) } /* * look for MOVB x,R; MOVB R,R (for small MOVs not handled above) */ var p1 *obj.Prog var r1 *gc.Flow for r := (*gc.Flow)(g.Start); r != nil; r = r.Link { p = r.Prog switch p.As { default: continue case s390x.AMOVH, s390x.AMOVHZ, s390x.AMOVB, s390x.AMOVBZ, s390x.AMOVW, s390x.AMOVWZ: if p.To.Type != obj.TYPE_REG { continue } } r1 = r.Link if r1 == nil { continue } p1 = r1.Prog if p1.As != p.As { continue } if p1.From.Type != obj.TYPE_REG || p1.From.Reg != p.To.Reg { continue } if p1.To.Type != obj.TYPE_REG || p1.To.Reg != p.To.Reg { continue } excise(r1) } if gc.Debug['P'] > 1 { goto ret /* allow following code improvement to be suppressed */ } if gc.Debug['p'] == 0 { // load pipelining // push any load from memory as early as possible // to give it time to complete before use. for r := (*gc.Flow)(g.Start); r != nil; r = r.Link { p = r.Prog switch p.As { case s390x.AMOVB, s390x.AMOVW, s390x.AMOVD: if regtyp(&p.To) && !regconsttyp(&p.From) { pushback(r) } } } if gc.Debug['P'] != 0 && gc.Debug['v'] != 0 { gc.Dumpit("pass8 push load as early as possible", g.Start, 0) } } /* * look for OP a, b, c; MOV c, d; -> OP a, b, d; */ for r := (*gc.Flow)(g.Start); r != nil; r = r.Link { if (gc.Debugmergeopmv != -1) && (mergeopmv_cnt >= gc.Debugmergeopmv) { break } p = r.Prog switch p.As { case s390x.AADD, s390x.AADDC, s390x.AADDME, s390x.AADDE, s390x.AADDZE, s390x.AAND, s390x.AANDN, s390x.ADIVW, s390x.ADIVWU, s390x.ADIVD, s390x.ADIVDU, s390x.AMULLW, s390x.AMULHD, s390x.AMULHDU, s390x.AMULLD, s390x.ANAND, s390x.ANOR, s390x.AOR, s390x.AORN, s390x.AREM, s390x.AREMU, s390x.AREMD, s390x.AREMDU, s390x.ARLWMI, s390x.ARLWNM, s390x.ASLW, s390x.ASRAW, s390x.ASRW, s390x.ASLD, s390x.ASRAD, s390x.ASRD, s390x.ASUB, s390x.ASUBC, s390x.ASUBME, s390x.ASUBE, s390x.ASUBZE, s390x.AXOR: if p.To.Type != obj.TYPE_REG { continue } if p.Reg == 0 { // Only for 3 ops instruction continue } default: continue } r1 := r.Link for ; r1 != nil; r1 = r1.Link { if r1.Prog.As != obj.ANOP { break } } if r1 == nil { continue } p1 := r1.Prog switch p1.As { case s390x.AMOVD, s390x.AMOVW, s390x.AMOVWZ, s390x.AMOVH, s390x.AMOVHZ, s390x.AMOVB, s390x.AMOVBZ: if p1.To.Type != obj.TYPE_REG { continue } default: continue } if p1.From.Type != obj.TYPE_REG || p1.From.Reg != p.To.Reg { continue } if trymergeopmv(r1) { p.To = p1.To excise(r1) mergeopmv_cnt += 1 } } if gc.Debug['v'] != 0 { gc.Dumpit("Merge operation and move", g.Start, 0) } /* * look for CMP x, y; Branch -> Compare and branch */ if gc.Debugcnb == 0 { goto ret } for r := (*gc.Flow)(g.Start); r != nil; r = r.Link { if (gc.Debugcnb != -1) && (cnb_cnt >= gc.Debugcnb) { break } p = r.Prog r1 = gc.Uniqs(r) if r1 == nil { continue } p1 = r1.Prog switch p.As { case s390x.ACMP: switch p1.As { case s390x.ABCL, s390x.ABC: continue case s390x.ABEQ: t = s390x.ACMPBEQ case s390x.ABGE: t = s390x.ACMPBGE case s390x.ABGT: t = s390x.ACMPBGT case s390x.ABLE: t = s390x.ACMPBLE case s390x.ABLT: t = s390x.ACMPBLT case s390x.ABNE: t = s390x.ACMPBNE default: continue } case s390x.ACMPU: switch p1.As { case s390x.ABCL, s390x.ABC: continue case s390x.ABEQ: t = s390x.ACMPUBEQ case s390x.ABGE: t = s390x.ACMPUBGE case s390x.ABGT: t = s390x.ACMPUBGT case s390x.ABLE: t = s390x.ACMPUBLE case s390x.ABLT: t = s390x.ACMPUBLT case s390x.ABNE: t = s390x.ACMPUBNE default: continue } case s390x.ACMPW, s390x.ACMPWU: continue default: continue } if gc.Debug['D'] != 0 { fmt.Printf("cnb %v; %v -> ", p, p1) } if p1.To.Sym != nil { continue } if p.To.Type == obj.TYPE_REG { p1.As = int16(t) p1.From = p.From p1.Reg = p.To.Reg p1.From3 = nil } else if p.To.Type == obj.TYPE_CONST { switch p.As { case s390x.ACMP, s390x.ACMPW: if (p.To.Offset < -(1 << 7)) || (p.To.Offset >= ((1 << 7) - 1)) { continue } case s390x.ACMPU, s390x.ACMPWU: if p.To.Offset >= (1 << 8) { continue } default: } p1.As = int16(t) p1.From = p.From p1.Reg = 0 p1.From3 = new(obj.Addr) *(p1.From3) = p.To } else { continue } if gc.Debug['D'] != 0 { fmt.Printf("%v\n", p1) } cnb_cnt += 1 excise(r) } if gc.Debug['v'] != 0 { gc.Dumpit("compare and branch", g.Start, 0) } ret: gc.Flowend(g) }