func stacksplitPost(ctxt *obj.Link, p *obj.Prog, pPre *obj.Prog, pPreempt *obj.Prog) *obj.Prog { // MOVD LR, R5 p = obj.Appendp(ctxt, p) pPre.Pcond = p p.As = AMOVD p.From.Type = obj.TYPE_REG p.From.Reg = REG_LR p.To.Type = obj.TYPE_REG p.To.Reg = REG_R5 if pPreempt != nil { pPreempt.Pcond = p } // BL runtime.morestack(SB) p = obj.Appendp(ctxt, p) p.As = ABL p.To.Type = obj.TYPE_BRANCH if ctxt.Cursym.Cfunc { p.To.Sym = obj.Linklookup(ctxt, "runtime.morestackc", 0) } else if ctxt.Cursym.Text.From3.Offset&obj.NEEDCTXT == 0 { p.To.Sym = obj.Linklookup(ctxt, "runtime.morestack_noctxt", 0) } else { p.To.Sym = obj.Linklookup(ctxt, "runtime.morestack", 0) } // BR start p = obj.Appendp(ctxt, p) p.As = ABR p.To.Type = obj.TYPE_BRANCH p.Pcond = ctxt.Cursym.Text.Link return p }
// Called after regopt and peep have run. // Expand CHECKNIL pseudo-op into actual nil pointer check. func expandchecks(firstp *obj.Prog) { var p1 *obj.Prog var p2 *obj.Prog for p := firstp; p != nil; p = p.Link { if p.As != obj.ACHECKNIL { continue } if gc.Debug_checknil != 0 && p.Lineno > 1 { // p->lineno==1 in generated wrappers gc.Warnl(p.Lineno, "generated nil check") } // check is // CMP arg, $0 // JNE 2(PC) (likely) // MOV AX, 0 p1 = gc.Ctxt.NewProg() p2 = gc.Ctxt.NewProg() gc.Clearp(p1) gc.Clearp(p2) p1.Link = p2 p2.Link = p.Link p.Link = p1 p1.Lineno = p.Lineno p2.Lineno = p.Lineno p1.Pc = 9999 p2.Pc = 9999 p.As = cmpptr p.To.Type = obj.TYPE_CONST p.To.Offset = 0 p1.As = x86.AJNE p1.From.Type = obj.TYPE_CONST p1.From.Offset = 1 // likely p1.To.Type = obj.TYPE_BRANCH p1.To.Val = p2.Link // crash by write to memory address 0. // if possible, since we know arg is 0, use 0(arg), // which will be shorter to encode than plain 0. p2.As = x86.AMOVL p2.From.Type = obj.TYPE_REG p2.From.Reg = x86.REG_AX if regtyp(&p.From) { p2.To.Type = obj.TYPE_MEM p2.To.Reg = p.From.Reg } else { p2.To.Type = obj.TYPE_MEM p2.To.Reg = x86.REG_NONE } p2.To.Offset = 0 } }
func Prog(as obj.As) *obj.Prog { var p *obj.Prog if as == obj.AGLOBL { if ddumped { Fatalf("already dumped data") } if dpc == nil { dpc = Ctxt.NewProg() dfirst = dpc } p = dpc dpc = Ctxt.NewProg() p.Link = dpc } else { p = Pc Pc = Ctxt.NewProg() Clearp(Pc) p.Link = Pc } if lineno == 0 && Debug['K'] != 0 { Warn("prog: line 0") } p.As = as p.Lineno = lineno return p }
func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog) { // RegTo2 is set on the instructions we insert here so they don't get // processed twice. if p.RegTo2 != 0 { return } if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP { return } // Any Prog (aside from the above special cases) with an Addr with Name == // NAME_EXTERN, NAME_STATIC or NAME_GOTREF has a CALL __x86.get_pc_thunk.cx // inserted before it. isName := func(a *obj.Addr) bool { if a.Sym == nil || (a.Type != obj.TYPE_MEM && a.Type != obj.TYPE_ADDR) || a.Reg != 0 { return false } if a.Sym.Type == obj.STLSBSS { return false } return a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_STATIC || a.Name == obj.NAME_GOTREF } if isName(&p.From) && p.From.Type == obj.TYPE_ADDR { // Handle things like "MOVL $sym, (SP)" or "PUSHL $sym" by rewriting // to "MOVL $sym, CX; MOVL CX, (SP)" or "MOVL $sym, CX; PUSHL CX" // respectively. if p.To.Type != obj.TYPE_REG { q := obj.Appendp(ctxt, p) q.As = p.As q.From.Type = obj.TYPE_REG q.From.Reg = REG_CX q.To = p.To p.As = AMOVL p.To.Type = obj.TYPE_REG p.To.Reg = REG_CX p.To.Sym = nil p.To.Name = obj.NAME_NONE } } if !isName(&p.From) && !isName(&p.To) && (p.From3 == nil || !isName(p.From3)) { return } q := obj.Appendp(ctxt, p) q.RegTo2 = 1 r := obj.Appendp(ctxt, q) r.RegTo2 = 1 q.As = obj.ACALL q.To.Sym = obj.Linklookup(ctxt, "__x86.get_pc_thunk.cx", 0) q.To.Type = obj.TYPE_MEM q.To.Name = obj.NAME_EXTERN q.To.Sym.Local = true r.As = p.As r.Scond = p.Scond r.From = p.From r.From3 = p.From3 r.Reg = p.Reg r.To = p.To obj.Nopout(p) }
// Called after regopt and peep have run. // Expand CHECKNIL pseudo-op into actual nil pointer check. func expandchecks(firstp *obj.Prog) { var reg int var p1 *obj.Prog for p := firstp; p != nil; p = p.Link { if p.As != obj.ACHECKNIL { continue } if gc.Debug_checknil != 0 && p.Lineno > 1 { // p->lineno==1 in generated wrappers gc.Warnl(p.Lineno, "generated nil check") } if p.From.Type != obj.TYPE_REG { gc.Fatalf("invalid nil check %v", p) } reg = int(p.From.Reg) // check is // CMP arg, $0 // MOV.EQ arg, 0(arg) p1 = gc.Ctxt.NewProg() gc.Clearp(p1) p1.Link = p.Link p.Link = p1 p1.Lineno = p.Lineno p1.Pc = 9999 p1.As = arm.AMOVW p1.From.Type = obj.TYPE_REG p1.From.Reg = int16(reg) p1.To.Type = obj.TYPE_MEM p1.To.Reg = int16(reg) p1.To.Offset = 0 p1.Scond = arm.C_SCOND_EQ p.As = arm.ACMP p.From.Type = obj.TYPE_CONST p.From.Reg = 0 p.From.Offset = 0 p.Reg = int16(reg) } }
// Called after regopt and peep have run. // Expand CHECKNIL pseudo-op into actual nil pointer check. func expandchecks(firstp *obj.Prog) { var p1 *obj.Prog for p := firstp; p != nil; p = p.Link { if gc.Debug_checknil != 0 && gc.Ctxt.Debugvlog != 0 { fmt.Printf("expandchecks: %v\n", p) } if p.As != obj.ACHECKNIL { continue } if gc.Debug_checknil != 0 && p.Lineno > 1 { // p->lineno==1 in generated wrappers gc.Warnl(p.Lineno, "generated nil check") } if p.From.Type != obj.TYPE_REG { gc.Fatalf("invalid nil check %v\n", p) } // check is // CBNZ arg, 2(PC) // MOVD ZR, 0(arg) p1 = gc.Ctxt.NewProg() gc.Clearp(p1) p1.Link = p.Link p.Link = p1 p1.Lineno = p.Lineno p1.Pc = 9999 p.As = arm64.ACBNZ p.To.Type = obj.TYPE_BRANCH p.To.Val = p1.Link // crash by write to memory address 0. p1.As = arm64.AMOVD p1.From.Type = obj.TYPE_REG p1.From.Reg = arm64.REGZERO p1.To.Type = obj.TYPE_MEM p1.To.Reg = p.From.Reg p1.To.Offset = 0 } }
/* // instruction scheduling if(debug['Q'] == 0) return; curtext = nil; q = nil; // p - 1 q1 = firstp; // top of block o = 0; // count of instructions for(p = firstp; p != nil; p = p1) { p1 = p->link; o++; if(p->mark & NOSCHED){ if(q1 != p){ sched(q1, q); } for(; p != nil; p = p->link){ if(!(p->mark & NOSCHED)) break; q = p; } p1 = p; q1 = p; o = 0; continue; } if(p->mark & (LABEL|SYNC)) { if(q1 != p) sched(q1, q); q1 = p; o = 1; } if(p->mark & (BRANCH|SYNC)) { sched(q1, p); q1 = p1; o = 0; } if(o >= NSCHED) { sched(q1, p); q1 = p1; o = 0; } q = p; } */ func stacksplitPre(ctxt *obj.Link, p *obj.Prog, framesize int32) (*obj.Prog, *obj.Prog) { var q *obj.Prog // MOVD g_stackguard(g), R3 p = obj.Appendp(ctxt, p) p.As = AMOVD p.From.Type = obj.TYPE_MEM p.From.Reg = REGG p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 if ctxt.Cursym.Cfunc { p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 } p.To.Type = obj.TYPE_REG p.To.Reg = REG_R3 q = nil if framesize <= obj.StackSmall { // small stack: SP < stackguard // CMP stackguard, SP //p.To.Type = obj.TYPE_REG //p.To.Reg = REGSP // q1: BLT done p = obj.Appendp(ctxt, p) //q1 = p p.From.Type = obj.TYPE_REG p.From.Reg = REG_R3 p.Reg = REGSP p.As = ACMPUBGE p.To.Type = obj.TYPE_BRANCH //p = obj.Appendp(ctxt, p) //p.As = ACMPU //p.From.Type = obj.TYPE_REG //p.From.Reg = REG_R3 //p.To.Type = obj.TYPE_REG //p.To.Reg = REGSP //p = obj.Appendp(ctxt, p) //p.As = ABGE //p.To.Type = obj.TYPE_BRANCH } else if framesize <= obj.StackBig { // large stack: SP-framesize < stackguard-StackSmall // ADD $-framesize, SP, R4 // CMP stackguard, R4 p = obj.Appendp(ctxt, p) p.As = AADD p.From.Type = obj.TYPE_CONST p.From.Offset = int64(-framesize) p.Reg = REGSP p.To.Type = obj.TYPE_REG p.To.Reg = REG_R4 p = obj.Appendp(ctxt, p) p.From.Type = obj.TYPE_REG p.From.Reg = REG_R3 p.Reg = REG_R4 p.As = ACMPUBGE p.To.Type = obj.TYPE_BRANCH } else { // Such a large stack we need to protect against wraparound. // If SP is close to zero: // SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall) // The +StackGuard on both sides is required to keep the left side positive: // SP is allowed to be slightly below stackguard. See stack.h. // // Preemption sets stackguard to StackPreempt, a very large value. // That breaks the math above, so we have to check for that explicitly. // // stackguard is R3 // CMP R3, $StackPreempt // BEQ label-of-call-to-morestack // ADD $StackGuard, SP, R4 // SUB R3, R4 // MOVD $(framesize+(StackGuard-StackSmall)), TEMP // CMPUBGE TEMP, R4 p = obj.Appendp(ctxt, p) p.As = ACMP p.From.Type = obj.TYPE_REG p.From.Reg = REG_R3 p.To.Type = obj.TYPE_CONST p.To.Offset = obj.StackPreempt p = obj.Appendp(ctxt, p) q = p p.As = ABEQ p.To.Type = obj.TYPE_BRANCH p = obj.Appendp(ctxt, p) p.As = AADD p.From.Type = obj.TYPE_CONST p.From.Offset = obj.StackGuard p.Reg = REGSP p.To.Type = obj.TYPE_REG p.To.Reg = REG_R4 p = obj.Appendp(ctxt, p) p.As = ASUB p.From.Type = obj.TYPE_REG p.From.Reg = REG_R3 p.To.Type = obj.TYPE_REG p.To.Reg = REG_R4 p = obj.Appendp(ctxt, p) p.As = AMOVD p.From.Type = obj.TYPE_CONST p.From.Offset = int64(framesize) + obj.StackGuard - obj.StackSmall p.To.Type = obj.TYPE_REG p.To.Reg = REGTMP p = obj.Appendp(ctxt, p) p.From.Type = obj.TYPE_REG p.From.Reg = REGTMP p.Reg = REG_R4 p.As = ACMPUBGE p.To.Type = obj.TYPE_BRANCH } return p, q }
// Called after regopt and peep have run. // Expand CHECKNIL pseudo-op into actual nil pointer check. func expandchecks(firstp *obj.Prog) { var p1 *obj.Prog var p2 *obj.Prog for p := firstp; p != nil; p = p.Link { if gc.Debug_checknil != 0 && gc.Ctxt.Debugvlog != 0 { fmt.Printf("expandchecks: %v\n", p) } if p.As != obj.ACHECKNIL { continue } if gc.Debug_checknil != 0 && p.Lineno > 1 { // p->lineno==1 in generated wrappers gc.Warnl(p.Lineno, "generated nil check") } if p.From.Type != obj.TYPE_REG { gc.Fatalf("invalid nil check %v\n", p) } /* // check is // TD $4, R0, arg (R0 is always zero) // eqv. to: // tdeq r0, arg // NOTE: this needs special runtime support to make SIGTRAP recoverable. reg = p->from.reg; p->as = ATD; p->from = p->to = p->from3 = zprog.from; p->from.type = TYPE_CONST; p->from.offset = 4; p->from.reg = 0; p->reg = REGZERO; p->to.type = TYPE_REG; p->to.reg = reg; */ // check is // CMP arg, R0 // BNE 2(PC) [likely] // MOVD R0, 0(R0) p1 = gc.Ctxt.NewProg() p2 = gc.Ctxt.NewProg() gc.Clearp(p1) gc.Clearp(p2) p1.Link = p2 p2.Link = p.Link p.Link = p1 p1.Lineno = p.Lineno p2.Lineno = p.Lineno p1.Pc = 9999 p2.Pc = 9999 p.As = ppc64.ACMP p.To.Type = obj.TYPE_REG p.To.Reg = ppc64.REGZERO p1.As = ppc64.ABNE //p1->from.type = TYPE_CONST; //p1->from.offset = 1; // likely p1.To.Type = obj.TYPE_BRANCH p1.To.Val = p2.Link // crash by write to memory address 0. p2.As = ppc64.AMOVD p2.From.Type = obj.TYPE_REG p2.From.Reg = ppc64.REGZERO p2.To.Type = obj.TYPE_MEM p2.To.Reg = ppc64.REGZERO p2.To.Offset = 0 } }
func Clearp(p *obj.Prog) { obj.Nopout(p) p.As = obj.AEND p.Pc = int64(pcloc) pcloc++ }
func stacksplit(ctxt *obj.Link, p *obj.Prog, framesize int32) *obj.Prog { // MOVW g_stackguard(g), R1 p = obj.Appendp(ctxt, p) p.As = AMOVW p.From.Type = obj.TYPE_MEM p.From.Reg = REGG p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 if ctxt.Cursym.Cfunc { p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 } p.To.Type = obj.TYPE_REG p.To.Reg = REG_R1 if framesize <= obj.StackSmall { // small stack: SP < stackguard // CMP stackguard, SP p = obj.Appendp(ctxt, p) p.As = ACMP p.From.Type = obj.TYPE_REG p.From.Reg = REG_R1 p.Reg = REGSP } else if framesize <= obj.StackBig { // large stack: SP-framesize < stackguard-StackSmall // MOVW $-framesize(SP), R2 // CMP stackguard, R2 p = obj.Appendp(ctxt, p) p.As = AMOVW p.From.Type = obj.TYPE_ADDR p.From.Reg = REGSP p.From.Offset = int64(-framesize) p.To.Type = obj.TYPE_REG p.To.Reg = REG_R2 p = obj.Appendp(ctxt, p) p.As = ACMP p.From.Type = obj.TYPE_REG p.From.Reg = REG_R1 p.Reg = REG_R2 } else { // Such a large stack we need to protect against wraparound // if SP is close to zero. // SP-stackguard+StackGuard < framesize + (StackGuard-StackSmall) // The +StackGuard on both sides is required to keep the left side positive: // SP is allowed to be slightly below stackguard. See stack.h. // CMP $StackPreempt, R1 // MOVW.NE $StackGuard(SP), R2 // SUB.NE R1, R2 // MOVW.NE $(framesize+(StackGuard-StackSmall)), R3 // CMP.NE R3, R2 p = obj.Appendp(ctxt, p) p.As = ACMP p.From.Type = obj.TYPE_CONST p.From.Offset = int64(uint32(obj.StackPreempt & (1<<32 - 1))) p.Reg = REG_R1 p = obj.Appendp(ctxt, p) p.As = AMOVW p.From.Type = obj.TYPE_ADDR p.From.Reg = REGSP p.From.Offset = obj.StackGuard p.To.Type = obj.TYPE_REG p.To.Reg = REG_R2 p.Scond = C_SCOND_NE p = obj.Appendp(ctxt, p) p.As = ASUB p.From.Type = obj.TYPE_REG p.From.Reg = REG_R1 p.To.Type = obj.TYPE_REG p.To.Reg = REG_R2 p.Scond = C_SCOND_NE p = obj.Appendp(ctxt, p) p.As = AMOVW p.From.Type = obj.TYPE_ADDR p.From.Offset = int64(framesize) + (obj.StackGuard - obj.StackSmall) p.To.Type = obj.TYPE_REG p.To.Reg = REG_R3 p.Scond = C_SCOND_NE p = obj.Appendp(ctxt, p) p.As = ACMP p.From.Type = obj.TYPE_REG p.From.Reg = REG_R3 p.Reg = REG_R2 p.Scond = C_SCOND_NE } // BLS call-to-morestack bls := obj.Appendp(ctxt, p) bls.As = ABLS bls.To.Type = obj.TYPE_BRANCH var last *obj.Prog for last = ctxt.Cursym.Text; last.Link != nil; last = last.Link { } spfix := obj.Appendp(ctxt, last) spfix.As = obj.ANOP spfix.Spadj = -framesize // MOVW LR, R3 movw := obj.Appendp(ctxt, spfix) movw.As = AMOVW movw.From.Type = obj.TYPE_REG movw.From.Reg = REGLINK movw.To.Type = obj.TYPE_REG movw.To.Reg = REG_R3 bls.Pcond = movw // BL runtime.morestack call := obj.Appendp(ctxt, movw) call.As = obj.ACALL call.To.Type = obj.TYPE_BRANCH morestack := "runtime.morestack" switch { case ctxt.Cursym.Cfunc: morestack = "runtime.morestackc" case ctxt.Cursym.Text.From3.Offset&obj.NEEDCTXT == 0: morestack = "runtime.morestack_noctxt" } call.To.Sym = obj.Linklookup(ctxt, morestack, 0) // B start b := obj.Appendp(ctxt, call) b.As = obj.AJMP b.To.Type = obj.TYPE_BRANCH b.Pcond = ctxt.Cursym.Text.Link b.Spadj = +framesize return bls }
func preprocess(ctxt *obj.Link, cursym *obj.LSym) { autosize := int32(0) ctxt.Cursym = cursym if cursym.Text == nil || cursym.Text.Link == nil { return } softfloat(ctxt, cursym) p := cursym.Text autoffset := int32(p.To.Offset) if autoffset < 0 { autoffset = 0 } cursym.Locals = autoffset cursym.Args = p.To.Val.(int32) /* * find leaf subroutines * strip NOPs * expand RET * expand BECOME pseudo */ var q1 *obj.Prog var q *obj.Prog for p := cursym.Text; p != nil; p = p.Link { switch p.As { case obj.ATEXT: p.Mark |= LEAF case obj.ARET: break case ADIV, ADIVU, AMOD, AMODU: q = p if ctxt.Sym_div == nil { initdiv(ctxt) } cursym.Text.Mark &^= LEAF continue case obj.ANOP: q1 = p.Link q.Link = q1 /* q is non-nop */ if q1 != nil { q1.Mark |= p.Mark } continue case ABL, ABX, obj.ADUFFZERO, obj.ADUFFCOPY: cursym.Text.Mark &^= LEAF fallthrough case AB, ABEQ, ABNE, ABCS, ABHS, ABCC, ABLO, ABMI, ABPL, ABVS, ABVC, ABHI, ABLS, ABGE, ABLT, ABGT, ABLE: q1 = p.Pcond if q1 != nil { for q1.As == obj.ANOP { q1 = q1.Link p.Pcond = q1 } } } q = p } var p1 *obj.Prog var p2 *obj.Prog var q2 *obj.Prog for p := cursym.Text; p != nil; p = p.Link { o := p.As switch o { case obj.ATEXT: autosize = int32(p.To.Offset + 4) if autosize <= 4 { if cursym.Text.Mark&LEAF != 0 { p.To.Offset = -4 autosize = 0 } } if autosize == 0 && cursym.Text.Mark&LEAF == 0 { if ctxt.Debugvlog != 0 { fmt.Fprintf(ctxt.Bso, "save suppressed in: %s\n", cursym.Name) ctxt.Bso.Flush() } cursym.Text.Mark |= LEAF } if cursym.Text.Mark&LEAF != 0 { cursym.Leaf = true if autosize == 0 { break } } if p.From3.Offset&obj.NOSPLIT == 0 { p = stacksplit(ctxt, p, autosize) // emit split check } // MOVW.W R14,$-autosize(SP) p = obj.Appendp(ctxt, p) p.As = AMOVW p.Scond |= C_WBIT p.From.Type = obj.TYPE_REG p.From.Reg = REGLINK p.To.Type = obj.TYPE_MEM p.To.Offset = int64(-autosize) p.To.Reg = REGSP p.Spadj = autosize if cursym.Text.From3.Offset&obj.WRAPPER != 0 { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOVW g_panic(g), R1 // CMP $0, R1 // B.EQ end // MOVW panic_argp(R1), R2 // ADD $(autosize+4), R13, R3 // CMP R2, R3 // B.NE end // ADD $4, R13, R4 // MOVW R4, panic_argp(R1) // end: // NOP // // The NOP is needed to give the jumps somewhere to land. // It is a liblink NOP, not an ARM NOP: it encodes to 0 instruction bytes. p = obj.Appendp(ctxt, p) p.As = AMOVW p.From.Type = obj.TYPE_MEM p.From.Reg = REGG p.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // G.panic p.To.Type = obj.TYPE_REG p.To.Reg = REG_R1 p = obj.Appendp(ctxt, p) p.As = ACMP p.From.Type = obj.TYPE_CONST p.From.Offset = 0 p.Reg = REG_R1 p = obj.Appendp(ctxt, p) p.As = ABEQ p.To.Type = obj.TYPE_BRANCH p1 = p p = obj.Appendp(ctxt, p) p.As = AMOVW p.From.Type = obj.TYPE_MEM p.From.Reg = REG_R1 p.From.Offset = 0 // Panic.argp p.To.Type = obj.TYPE_REG p.To.Reg = REG_R2 p = obj.Appendp(ctxt, p) p.As = AADD p.From.Type = obj.TYPE_CONST p.From.Offset = int64(autosize) + 4 p.Reg = REG_R13 p.To.Type = obj.TYPE_REG p.To.Reg = REG_R3 p = obj.Appendp(ctxt, p) p.As = ACMP p.From.Type = obj.TYPE_REG p.From.Reg = REG_R2 p.Reg = REG_R3 p = obj.Appendp(ctxt, p) p.As = ABNE p.To.Type = obj.TYPE_BRANCH p2 = p p = obj.Appendp(ctxt, p) p.As = AADD p.From.Type = obj.TYPE_CONST p.From.Offset = 4 p.Reg = REG_R13 p.To.Type = obj.TYPE_REG p.To.Reg = REG_R4 p = obj.Appendp(ctxt, p) p.As = AMOVW p.From.Type = obj.TYPE_REG p.From.Reg = REG_R4 p.To.Type = obj.TYPE_MEM p.To.Reg = REG_R1 p.To.Offset = 0 // Panic.argp p = obj.Appendp(ctxt, p) p.As = obj.ANOP p1.Pcond = p p2.Pcond = p } case obj.ARET: nocache(p) if cursym.Text.Mark&LEAF != 0 { if autosize == 0 { p.As = AB p.From = obj.Addr{} if p.To.Sym != nil { // retjmp p.To.Type = obj.TYPE_BRANCH } else { p.To.Type = obj.TYPE_MEM p.To.Offset = 0 p.To.Reg = REGLINK } break } } p.As = AMOVW p.Scond |= C_PBIT p.From.Type = obj.TYPE_MEM p.From.Offset = int64(autosize) p.From.Reg = REGSP p.To.Type = obj.TYPE_REG p.To.Reg = REGPC // If there are instructions following // this ARET, they come from a branch // with the same stackframe, so no spadj. if p.To.Sym != nil { // retjmp p.To.Reg = REGLINK q2 = obj.Appendp(ctxt, p) q2.As = AB q2.To.Type = obj.TYPE_BRANCH q2.To.Sym = p.To.Sym p.To.Sym = nil p = q2 } case AADD: if p.From.Type == obj.TYPE_CONST && p.From.Reg == 0 && p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP { p.Spadj = int32(-p.From.Offset) } case ASUB: if p.From.Type == obj.TYPE_CONST && p.From.Reg == 0 && p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP { p.Spadj = int32(p.From.Offset) } case ADIV, ADIVU, AMOD, AMODU: if cursym.Text.From3.Offset&obj.NOSPLIT != 0 { ctxt.Diag("cannot divide in NOSPLIT function") } if ctxt.Debugdivmod != 0 { break } if p.From.Type != obj.TYPE_REG { break } if p.To.Type != obj.TYPE_REG { break } // Make copy because we overwrite p below. q1 := *p if q1.Reg == REGTMP || q1.Reg == 0 && q1.To.Reg == REGTMP { ctxt.Diag("div already using REGTMP: %v", p) } /* MOV m(g),REGTMP */ p.As = AMOVW p.Lineno = q1.Lineno p.From.Type = obj.TYPE_MEM p.From.Reg = REGG p.From.Offset = 6 * 4 // offset of g.m p.Reg = 0 p.To.Type = obj.TYPE_REG p.To.Reg = REGTMP /* MOV a,m_divmod(REGTMP) */ p = obj.Appendp(ctxt, p) p.As = AMOVW p.Lineno = q1.Lineno p.From.Type = obj.TYPE_REG p.From.Reg = q1.From.Reg p.To.Type = obj.TYPE_MEM p.To.Reg = REGTMP p.To.Offset = 8 * 4 // offset of m.divmod /* MOV b,REGTMP */ p = obj.Appendp(ctxt, p) p.As = AMOVW p.Lineno = q1.Lineno p.From.Type = obj.TYPE_REG p.From.Reg = q1.Reg if q1.Reg == 0 { p.From.Reg = q1.To.Reg } p.To.Type = obj.TYPE_REG p.To.Reg = REGTMP p.To.Offset = 0 /* CALL appropriate */ p = obj.Appendp(ctxt, p) p.As = ABL p.Lineno = q1.Lineno p.To.Type = obj.TYPE_BRANCH switch o { case ADIV: p.To.Sym = ctxt.Sym_div case ADIVU: p.To.Sym = ctxt.Sym_divu case AMOD: p.To.Sym = ctxt.Sym_mod case AMODU: p.To.Sym = ctxt.Sym_modu } /* MOV REGTMP, b */ p = obj.Appendp(ctxt, p) p.As = AMOVW p.Lineno = q1.Lineno p.From.Type = obj.TYPE_REG p.From.Reg = REGTMP p.From.Offset = 0 p.To.Type = obj.TYPE_REG p.To.Reg = q1.To.Reg case AMOVW: if (p.Scond&C_WBIT != 0) && p.To.Type == obj.TYPE_MEM && p.To.Reg == REGSP { p.Spadj = int32(-p.To.Offset) } if (p.Scond&C_PBIT != 0) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REGSP && p.To.Reg != REGPC { p.Spadj = int32(-p.From.Offset) } if p.From.Type == obj.TYPE_ADDR && p.From.Reg == REGSP && p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP { p.Spadj = int32(-p.From.Offset) } } } }
func peep(firstp *obj.Prog) { g := gc.Flowstart(firstp, nil) if g == nil { return } gactive = 0 // byte, word arithmetic elimination. elimshortmov(g) // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim var p *obj.Prog for r := g.Start; r != nil; r = r.Link { p = r.Prog switch p.As { case x86.ALEAL: if regtyp(&p.To) { if p.From.Sym != nil { if p.From.Index == x86.REG_NONE { conprop(r) } } } case x86.AMOVB, x86.AMOVW, x86.AMOVL, x86.AMOVSS, x86.AMOVSD: if regtyp(&p.To) { if p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_FCONST { conprop(r) } } } } var r1 *gc.Flow var p1 *obj.Prog var r *gc.Flow var t int loop1: if gc.Debug['P'] != 0 && gc.Debug['v'] != 0 { gc.Dumpit("loop1", g.Start, 0) } t = 0 for r = g.Start; r != nil; r = r.Link { p = r.Prog switch p.As { case x86.AMOVL, x86.AMOVSS, x86.AMOVSD: if regtyp(&p.To) { if regtyp(&p.From) { if copyprop(g, r) { excise(r) t++ } else if subprop(r) && copyprop(g, r) { excise(r) t++ } } } case x86.AMOVBLZX, x86.AMOVWLZX, x86.AMOVBLSX, x86.AMOVWLSX: if regtyp(&p.To) { r1 = rnops(gc.Uniqs(r)) if r1 != nil { p1 = r1.Prog if p.As == p1.As && p.To.Type == p1.From.Type && p.To.Reg == p1.From.Reg { p1.As = x86.AMOVL t++ } } } case x86.AADDL, x86.AADDW: if p.From.Type != obj.TYPE_CONST || needc(p.Link) { break } if p.From.Offset == -1 { if p.As == x86.AADDL { p.As = x86.ADECL } else { p.As = x86.ADECW } p.From = obj.Addr{} break } if p.From.Offset == 1 { if p.As == x86.AADDL { p.As = x86.AINCL } else { p.As = x86.AINCW } p.From = obj.Addr{} break } case x86.ASUBL, x86.ASUBW: if p.From.Type != obj.TYPE_CONST || needc(p.Link) { break } if p.From.Offset == -1 { if p.As == x86.ASUBL { p.As = x86.AINCL } else { p.As = x86.AINCW } p.From = obj.Addr{} break } if p.From.Offset == 1 { if p.As == x86.ASUBL { p.As = x86.ADECL } else { p.As = x86.ADECW } p.From = obj.Addr{} break } } } if t != 0 { goto loop1 } // MOVSD removal. // We never use packed registers, so a MOVSD between registers // can be replaced by MOVAPD, which moves the pair of float64s // instead of just the lower one. We only use the lower one, but // the processor can do better if we do moves using both. for r := g.Start; r != nil; r = r.Link { p = r.Prog if p.As == x86.AMOVSD { if regtyp(&p.From) { if regtyp(&p.To) { p.As = x86.AMOVAPD } } } } gc.Flowend(g) }
func xfol(ctxt *obj.Link, p *obj.Prog, last **obj.Prog) { var q *obj.Prog var r *obj.Prog var b obj.As for p != nil { a := p.As if a == ABR { q = p.Pcond if (p.Mark&NOSCHED != 0) || q != nil && (q.Mark&NOSCHED != 0) { p.Mark |= FOLL (*last).Link = p *last = p (*last).Pc = pc_cnt pc_cnt += 1 p = p.Link xfol(ctxt, p, last) p = q if p != nil && p.Mark&FOLL == 0 { continue } return } if q != nil { p.Mark |= FOLL p = q if p.Mark&FOLL == 0 { continue } } } if p.Mark&FOLL != 0 { q = p for i := 0; i < 4; i, q = i+1, q.Link { if q == *last || (q.Mark&NOSCHED != 0) { break } b = 0 /* set */ a = q.As if a == obj.ANOP { i-- continue } if a != ABR && a != obj.ARET { if q.Pcond == nil || (q.Pcond.Mark&FOLL != 0) { continue } b = relinv(a) if b == 0 { continue } } for { r = ctxt.NewProg() *r = *p if r.Mark&FOLL == 0 { fmt.Printf("can't happen 1\n") } r.Mark |= FOLL if p != q { p = p.Link (*last).Link = r *last = r (*last).Pc = pc_cnt pc_cnt += 1 continue } (*last).Link = r *last = r (*last).Pc = pc_cnt pc_cnt += 1 if a == ABR || a == obj.ARET { return } r.As = b r.Pcond = p.Link r.Link = p.Pcond if r.Link.Mark&FOLL == 0 { xfol(ctxt, r.Link, last) } if r.Pcond.Mark&FOLL == 0 { fmt.Printf("can't happen 2\n") } return } } a = ABR q = ctxt.NewProg() q.As = a q.Lineno = p.Lineno q.To.Type = obj.TYPE_BRANCH q.To.Offset = p.Pc q.Pcond = p p = q } p.Mark |= FOLL (*last).Link = p *last = p (*last).Pc = pc_cnt pc_cnt += 1 if a == ABR || a == obj.ARET { if p.Mark&NOSCHED != 0 { p = p.Link continue } return } if p.Pcond != nil { if a != ABL && p.Link != nil { xfol(ctxt, p.Link, last) p = p.Pcond if p == nil || (p.Mark&FOLL != 0) { return } continue } } p = p.Link } }
func progedit(ctxt *obj.Link, p *obj.Prog) { // Maintain information about code generation mode. if ctxt.Mode == 0 { ctxt.Mode = ctxt.Arch.RegSize * 8 } p.Mode = int8(ctxt.Mode) switch p.As { case AMODE: if p.From.Type == obj.TYPE_CONST || (p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_NONE) { switch int(p.From.Offset) { case 16, 32, 64: ctxt.Mode = int(p.From.Offset) } } obj.Nopout(p) } // Thread-local storage references use the TLS pseudo-register. // As a register, TLS refers to the thread-local storage base, and it // can only be loaded into another register: // // MOVQ TLS, AX // // An offset from the thread-local storage base is written off(reg)(TLS*1). // Semantically it is off(reg), but the (TLS*1) annotation marks this as // indexing from the loaded TLS base. This emits a relocation so that // if the linker needs to adjust the offset, it can. For example: // // MOVQ TLS, AX // MOVQ 0(AX)(TLS*1), CX // load g into CX // // On systems that support direct access to the TLS memory, this // pair of instructions can be reduced to a direct TLS memory reference: // // MOVQ 0(TLS), CX // load g into CX // // The 2-instruction and 1-instruction forms correspond to the two code // sequences for loading a TLS variable in the local exec model given in "ELF // Handling For Thread-Local Storage". // // We apply this rewrite on systems that support the 1-instruction form. // The decision is made using only the operating system and the -shared flag, // not the link mode. If some link modes on a particular operating system // require the 2-instruction form, then all builds for that operating system // will use the 2-instruction form, so that the link mode decision can be // delayed to link time. // // In this way, all supported systems use identical instructions to // access TLS, and they are rewritten appropriately first here in // liblink and then finally using relocations in the linker. // // When -shared is passed, we leave the code in the 2-instruction form but // assemble (and relocate) them in different ways to generate the initial // exec code sequence. It's a bit of a fluke that this is possible without // rewriting the instructions more comprehensively, and it only does because // we only support a single TLS variable (g). if CanUse1InsnTLS(ctxt) { // Reduce 2-instruction sequence to 1-instruction sequence. // Sequences like // MOVQ TLS, BX // ... off(BX)(TLS*1) ... // become // NOP // ... off(TLS) ... // // TODO(rsc): Remove the Hsolaris special case. It exists only to // guarantee we are producing byte-identical binaries as before this code. // But it should be unnecessary. if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 && ctxt.Headtype != obj.Hsolaris { obj.Nopout(p) } if p.From.Type == obj.TYPE_MEM && p.From.Index == REG_TLS && REG_AX <= p.From.Reg && p.From.Reg <= REG_R15 { p.From.Reg = REG_TLS p.From.Scale = 0 p.From.Index = REG_NONE } if p.To.Type == obj.TYPE_MEM && p.To.Index == REG_TLS && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { p.To.Reg = REG_TLS p.To.Scale = 0 p.To.Index = REG_NONE } } else { // load_g_cx, below, always inserts the 1-instruction sequence. Rewrite it // as the 2-instruction sequence if necessary. // MOVQ 0(TLS), BX // becomes // MOVQ TLS, BX // MOVQ 0(BX)(TLS*1), BX if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { q := obj.Appendp(ctxt, p) q.As = p.As q.From = p.From q.From.Type = obj.TYPE_MEM q.From.Reg = p.To.Reg q.From.Index = REG_TLS q.From.Scale = 2 // TODO: use 1 q.To = p.To p.From.Type = obj.TYPE_REG p.From.Reg = REG_TLS p.From.Index = REG_NONE p.From.Offset = 0 } } // TODO: Remove. if ctxt.Headtype == obj.Hwindows && p.Mode == 64 || ctxt.Headtype == obj.Hplan9 { if p.From.Scale == 1 && p.From.Index == REG_TLS { p.From.Scale = 2 } if p.To.Scale == 1 && p.To.Index == REG_TLS { p.To.Scale = 2 } } // Rewrite 0 to $0 in 3rd argument to CMPPS etc. // That's what the tables expect. switch p.As { case ACMPPD, ACMPPS, ACMPSD, ACMPSS: if p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_NONE && p.To.Reg == REG_NONE && p.To.Index == REG_NONE && p.To.Sym == nil { p.To.Type = obj.TYPE_CONST } } // Rewrite CALL/JMP/RET to symbol as TYPE_BRANCH. switch p.As { case obj.ACALL, obj.AJMP, obj.ARET: if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil { p.To.Type = obj.TYPE_BRANCH } } // Rewrite MOVL/MOVQ $XXX(FP/SP) as LEAL/LEAQ. if p.From.Type == obj.TYPE_ADDR && (ctxt.Arch.Family == sys.AMD64 || p.From.Name != obj.NAME_EXTERN && p.From.Name != obj.NAME_STATIC) { switch p.As { case AMOVL: p.As = ALEAL p.From.Type = obj.TYPE_MEM case AMOVQ: p.As = ALEAQ p.From.Type = obj.TYPE_MEM } } if ctxt.Headtype == obj.Hnacl && p.Mode == 64 { if p.From3 != nil { nacladdr(ctxt, p, p.From3) } nacladdr(ctxt, p, &p.From) nacladdr(ctxt, p, &p.To) } // Rewrite float constants to values stored in memory. switch p.As { // Convert AMOVSS $(0), Xx to AXORPS Xx, Xx case AMOVSS: if p.From.Type == obj.TYPE_FCONST { // f == 0 can't be used here due to -0, so use Float64bits if f := p.From.Val.(float64); math.Float64bits(f) == 0 { if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 { p.As = AXORPS p.From = p.To break } } } fallthrough case AFMOVF, AFADDF, AFSUBF, AFSUBRF, AFMULF, AFDIVF, AFDIVRF, AFCOMF, AFCOMFP, AADDSS, ASUBSS, AMULSS, ADIVSS, ACOMISS, AUCOMISS: if p.From.Type == obj.TYPE_FCONST { f32 := float32(p.From.Val.(float64)) i32 := math.Float32bits(f32) literal := fmt.Sprintf("$f32.%08x", i32) s := obj.Linklookup(ctxt, literal, 0) p.From.Type = obj.TYPE_MEM p.From.Name = obj.NAME_EXTERN p.From.Sym = s p.From.Sym.Local = true p.From.Offset = 0 } case AMOVSD: // Convert AMOVSD $(0), Xx to AXORPS Xx, Xx if p.From.Type == obj.TYPE_FCONST { // f == 0 can't be used here due to -0, so use Float64bits if f := p.From.Val.(float64); math.Float64bits(f) == 0 { if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 { p.As = AXORPS p.From = p.To break } } } fallthrough case AFMOVD, AFADDD, AFSUBD, AFSUBRD, AFMULD, AFDIVD, AFDIVRD, AFCOMD, AFCOMDP, AADDSD, ASUBSD, AMULSD, ADIVSD, ACOMISD, AUCOMISD: if p.From.Type == obj.TYPE_FCONST { i64 := math.Float64bits(p.From.Val.(float64)) literal := fmt.Sprintf("$f64.%016x", i64) s := obj.Linklookup(ctxt, literal, 0) p.From.Type = obj.TYPE_MEM p.From.Name = obj.NAME_EXTERN p.From.Sym = s p.From.Sym.Local = true p.From.Offset = 0 } } if ctxt.Flag_dynlink { rewriteToUseGot(ctxt, p) } if ctxt.Flag_shared && p.Mode == 32 { rewriteToPcrel(ctxt, p) } }
func preprocess(ctxt *obj.Link, cursym *obj.LSym) { if ctxt.Headtype == obj.Hplan9 && ctxt.Plan9privates == nil { ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0) } ctxt.Cursym = cursym if cursym.Text == nil || cursym.Text.Link == nil { return } p := cursym.Text autoffset := int32(p.To.Offset) if autoffset < 0 { autoffset = 0 } var bpsize int if p.Mode == 64 && ctxt.Framepointer_enabled && autoffset > 0 && p.From3.Offset&obj.NOFRAME == 0 { // Make room for to save a base pointer. If autoffset == 0, // this might do something special like a tail jump to // another function, so in that case we omit this. bpsize = ctxt.Arch.PtrSize autoffset += int32(bpsize) p.To.Offset += int64(bpsize) } else { bpsize = 0 } textarg := int64(p.To.Val.(int32)) cursym.Args = int32(textarg) cursym.Locals = int32(p.To.Offset) // TODO(rsc): Remove. if p.Mode == 32 && cursym.Locals < 0 { cursym.Locals = 0 } // TODO(rsc): Remove 'p.Mode == 64 &&'. if p.Mode == 64 && autoffset < obj.StackSmall && p.From3Offset()&obj.NOSPLIT == 0 { for q := p; q != nil; q = q.Link { if q.As == obj.ACALL { goto noleaf } if (q.As == obj.ADUFFCOPY || q.As == obj.ADUFFZERO) && autoffset >= obj.StackSmall-8 { goto noleaf } } p.From3.Offset |= obj.NOSPLIT noleaf: } if p.From3Offset()&obj.NOSPLIT == 0 || p.From3Offset()&obj.WRAPPER != 0 { p = obj.Appendp(ctxt, p) p = load_g_cx(ctxt, p) // load g into CX } if cursym.Text.From3Offset()&obj.NOSPLIT == 0 { p = stacksplit(ctxt, p, autoffset, int32(textarg)) // emit split check } if autoffset != 0 { if autoffset%int32(ctxt.Arch.RegSize) != 0 { ctxt.Diag("unaligned stack size %d", autoffset) } p = obj.Appendp(ctxt, p) p.As = AADJSP p.From.Type = obj.TYPE_CONST p.From.Offset = int64(autoffset) p.Spadj = autoffset } else { // zero-byte stack adjustment. // Insert a fake non-zero adjustment so that stkcheck can // recognize the end of the stack-splitting prolog. p = obj.Appendp(ctxt, p) p.As = obj.ANOP p.Spadj = int32(-ctxt.Arch.PtrSize) p = obj.Appendp(ctxt, p) p.As = obj.ANOP p.Spadj = int32(ctxt.Arch.PtrSize) } deltasp := autoffset if bpsize > 0 { // Save caller's BP p = obj.Appendp(ctxt, p) p.As = AMOVQ p.From.Type = obj.TYPE_REG p.From.Reg = REG_BP p.To.Type = obj.TYPE_MEM p.To.Reg = REG_SP p.To.Scale = 1 p.To.Offset = int64(autoffset) - int64(bpsize) // Move current frame to BP p = obj.Appendp(ctxt, p) p.As = ALEAQ p.From.Type = obj.TYPE_MEM p.From.Reg = REG_SP p.From.Scale = 1 p.From.Offset = int64(autoffset) - int64(bpsize) p.To.Type = obj.TYPE_REG p.To.Reg = REG_BP } if cursym.Text.From3Offset()&obj.WRAPPER != 0 { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOVQ g_panic(CX), BX // TESTQ BX, BX // JEQ end // LEAQ (autoffset+8)(SP), DI // CMPQ panic_argp(BX), DI // JNE end // MOVQ SP, panic_argp(BX) // end: // NOP // // The NOP is needed to give the jumps somewhere to land. // It is a liblink NOP, not an x86 NOP: it encodes to 0 instruction bytes. p = obj.Appendp(ctxt, p) p.As = AMOVQ p.From.Type = obj.TYPE_MEM p.From.Reg = REG_CX p.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // G.panic p.To.Type = obj.TYPE_REG p.To.Reg = REG_BX if ctxt.Headtype == obj.Hnacl && p.Mode == 64 { p.As = AMOVL p.From.Type = obj.TYPE_MEM p.From.Reg = REG_R15 p.From.Scale = 1 p.From.Index = REG_CX } if p.Mode == 32 { p.As = AMOVL } p = obj.Appendp(ctxt, p) p.As = ATESTQ p.From.Type = obj.TYPE_REG p.From.Reg = REG_BX p.To.Type = obj.TYPE_REG p.To.Reg = REG_BX if ctxt.Headtype == obj.Hnacl || p.Mode == 32 { p.As = ATESTL } p = obj.Appendp(ctxt, p) p.As = AJEQ p.To.Type = obj.TYPE_BRANCH p1 := p p = obj.Appendp(ctxt, p) p.As = ALEAQ p.From.Type = obj.TYPE_MEM p.From.Reg = REG_SP p.From.Offset = int64(autoffset) + int64(ctxt.Arch.RegSize) p.To.Type = obj.TYPE_REG p.To.Reg = REG_DI if ctxt.Headtype == obj.Hnacl || p.Mode == 32 { p.As = ALEAL } p = obj.Appendp(ctxt, p) p.As = ACMPQ p.From.Type = obj.TYPE_MEM p.From.Reg = REG_BX p.From.Offset = 0 // Panic.argp p.To.Type = obj.TYPE_REG p.To.Reg = REG_DI if ctxt.Headtype == obj.Hnacl && p.Mode == 64 { p.As = ACMPL p.From.Type = obj.TYPE_MEM p.From.Reg = REG_R15 p.From.Scale = 1 p.From.Index = REG_BX } if p.Mode == 32 { p.As = ACMPL } p = obj.Appendp(ctxt, p) p.As = AJNE p.To.Type = obj.TYPE_BRANCH p2 := p p = obj.Appendp(ctxt, p) p.As = AMOVQ p.From.Type = obj.TYPE_REG p.From.Reg = REG_SP p.To.Type = obj.TYPE_MEM p.To.Reg = REG_BX p.To.Offset = 0 // Panic.argp if ctxt.Headtype == obj.Hnacl && p.Mode == 64 { p.As = AMOVL p.To.Type = obj.TYPE_MEM p.To.Reg = REG_R15 p.To.Scale = 1 p.To.Index = REG_BX } if p.Mode == 32 { p.As = AMOVL } p = obj.Appendp(ctxt, p) p.As = obj.ANOP p1.Pcond = p p2.Pcond = p } var a int var pcsize int for ; p != nil; p = p.Link { pcsize = int(p.Mode) / 8 a = int(p.From.Name) if a == obj.NAME_AUTO { p.From.Offset += int64(deltasp) - int64(bpsize) } if a == obj.NAME_PARAM { p.From.Offset += int64(deltasp) + int64(pcsize) } if p.From3 != nil { a = int(p.From3.Name) if a == obj.NAME_AUTO { p.From3.Offset += int64(deltasp) - int64(bpsize) } if a == obj.NAME_PARAM { p.From3.Offset += int64(deltasp) + int64(pcsize) } } a = int(p.To.Name) if a == obj.NAME_AUTO { p.To.Offset += int64(deltasp) - int64(bpsize) } if a == obj.NAME_PARAM { p.To.Offset += int64(deltasp) + int64(pcsize) } switch p.As { default: continue case APUSHL, APUSHFL: deltasp += 4 p.Spadj = 4 continue case APUSHQ, APUSHFQ: deltasp += 8 p.Spadj = 8 continue case APUSHW, APUSHFW: deltasp += 2 p.Spadj = 2 continue case APOPL, APOPFL: deltasp -= 4 p.Spadj = -4 continue case APOPQ, APOPFQ: deltasp -= 8 p.Spadj = -8 continue case APOPW, APOPFW: deltasp -= 2 p.Spadj = -2 continue case obj.ARET: break } if autoffset != deltasp { ctxt.Diag("unbalanced PUSH/POP") } if autoffset != 0 { if bpsize > 0 { // Restore caller's BP p.As = AMOVQ p.From.Type = obj.TYPE_MEM p.From.Reg = REG_SP p.From.Scale = 1 p.From.Offset = int64(autoffset) - int64(bpsize) p.To.Type = obj.TYPE_REG p.To.Reg = REG_BP p = obj.Appendp(ctxt, p) } p.As = AADJSP p.From.Type = obj.TYPE_CONST p.From.Offset = int64(-autoffset) p.Spadj = -autoffset p = obj.Appendp(ctxt, p) p.As = obj.ARET // If there are instructions following // this ARET, they come from a branch // with the same stackframe, so undo // the cleanup. p.Spadj = +autoffset } if p.To.Sym != nil { // retjmp p.As = obj.AJMP } } } func indir_cx(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) { if ctxt.Headtype == obj.Hnacl && p.Mode == 64 { a.Type = obj.TYPE_MEM a.Reg = REG_R15 a.Index = REG_CX a.Scale = 1 return } a.Type = obj.TYPE_MEM a.Reg = REG_CX } // Append code to p to load g into cx. // Overwrites p with the first instruction (no first appendp). // Overwriting p is unusual but it lets use this in both the // prologue (caller must call appendp first) and in the epilogue. // Returns last new instruction. func load_g_cx(ctxt *obj.Link, p *obj.Prog) *obj.Prog { p.As = AMOVQ if ctxt.Arch.PtrSize == 4 { p.As = AMOVL } p.From.Type = obj.TYPE_MEM p.From.Reg = REG_TLS p.From.Offset = 0 p.To.Type = obj.TYPE_REG p.To.Reg = REG_CX next := p.Link progedit(ctxt, p) for p.Link != next { p = p.Link } if p.From.Index == REG_TLS { p.From.Scale = 2 } return p } // Append code to p to check for stack split. // Appends to (does not overwrite) p. // Assumes g is in CX. // Returns last new instruction. func stacksplit(ctxt *obj.Link, p *obj.Prog, framesize int32, textarg int32) *obj.Prog { cmp := ACMPQ lea := ALEAQ mov := AMOVQ sub := ASUBQ if ctxt.Headtype == obj.Hnacl || p.Mode == 32 { cmp = ACMPL lea = ALEAL mov = AMOVL sub = ASUBL } var q1 *obj.Prog if framesize <= obj.StackSmall { // small stack: SP <= stackguard // CMPQ SP, stackguard p = obj.Appendp(ctxt, p) p.As = cmp p.From.Type = obj.TYPE_REG p.From.Reg = REG_SP indir_cx(ctxt, p, &p.To) p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 if ctxt.Cursym.Cfunc { p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 } } else if framesize <= obj.StackBig { // large stack: SP-framesize <= stackguard-StackSmall // LEAQ -xxx(SP), AX // CMPQ AX, stackguard p = obj.Appendp(ctxt, p) p.As = lea p.From.Type = obj.TYPE_MEM p.From.Reg = REG_SP p.From.Offset = -(int64(framesize) - obj.StackSmall) p.To.Type = obj.TYPE_REG p.To.Reg = REG_AX p = obj.Appendp(ctxt, p) p.As = cmp p.From.Type = obj.TYPE_REG p.From.Reg = REG_AX indir_cx(ctxt, p, &p.To) p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 if ctxt.Cursym.Cfunc { p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 } } else { // Such a large stack we need to protect against wraparound. // If SP is close to zero: // SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall) // The +StackGuard on both sides is required to keep the left side positive: // SP is allowed to be slightly below stackguard. See stack.h. // // Preemption sets stackguard to StackPreempt, a very large value. // That breaks the math above, so we have to check for that explicitly. // MOVQ stackguard, CX // CMPQ CX, $StackPreempt // JEQ label-of-call-to-morestack // LEAQ StackGuard(SP), AX // SUBQ CX, AX // CMPQ AX, $(framesize+(StackGuard-StackSmall)) p = obj.Appendp(ctxt, p) p.As = mov indir_cx(ctxt, p, &p.From) p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 if ctxt.Cursym.Cfunc { p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 } p.To.Type = obj.TYPE_REG p.To.Reg = REG_SI p = obj.Appendp(ctxt, p) p.As = cmp p.From.Type = obj.TYPE_REG p.From.Reg = REG_SI p.To.Type = obj.TYPE_CONST p.To.Offset = obj.StackPreempt if p.Mode == 32 { p.To.Offset = int64(uint32(obj.StackPreempt & (1<<32 - 1))) } p = obj.Appendp(ctxt, p) p.As = AJEQ p.To.Type = obj.TYPE_BRANCH q1 = p p = obj.Appendp(ctxt, p) p.As = lea p.From.Type = obj.TYPE_MEM p.From.Reg = REG_SP p.From.Offset = obj.StackGuard p.To.Type = obj.TYPE_REG p.To.Reg = REG_AX p = obj.Appendp(ctxt, p) p.As = sub p.From.Type = obj.TYPE_REG p.From.Reg = REG_SI p.To.Type = obj.TYPE_REG p.To.Reg = REG_AX p = obj.Appendp(ctxt, p) p.As = cmp p.From.Type = obj.TYPE_REG p.From.Reg = REG_AX p.To.Type = obj.TYPE_CONST p.To.Offset = int64(framesize) + (obj.StackGuard - obj.StackSmall) } // common jls := obj.Appendp(ctxt, p) jls.As = AJLS jls.To.Type = obj.TYPE_BRANCH var last *obj.Prog for last = ctxt.Cursym.Text; last.Link != nil; last = last.Link { } spfix := obj.Appendp(ctxt, last) spfix.As = obj.ANOP spfix.Spadj = -framesize call := obj.Appendp(ctxt, spfix) call.Lineno = ctxt.Cursym.Text.Lineno call.Mode = ctxt.Cursym.Text.Mode call.As = obj.ACALL call.To.Type = obj.TYPE_BRANCH call.To.Name = obj.NAME_EXTERN morestack := "runtime.morestack" switch { case ctxt.Cursym.Cfunc: morestack = "runtime.morestackc" case ctxt.Cursym.Text.From3Offset()&obj.NEEDCTXT == 0: morestack = "runtime.morestack_noctxt" } call.To.Sym = obj.Linklookup(ctxt, morestack, 0) // When compiling 386 code for dynamic linking, the call needs to be adjusted // to follow PIC rules. This in turn can insert more instructions, so we need // to keep track of the start of the call (where the jump will be to) and the // end (which following instructions are appended to). callend := call progedit(ctxt, callend) for ; callend.Link != nil; callend = callend.Link { progedit(ctxt, callend.Link) } jmp := obj.Appendp(ctxt, callend) jmp.As = obj.AJMP jmp.To.Type = obj.TYPE_BRANCH jmp.Pcond = ctxt.Cursym.Text.Link jmp.Spadj = +framesize jls.Pcond = call if q1 != nil { q1.Pcond = call } return jls } func follow(ctxt *obj.Link, s *obj.LSym) { ctxt.Cursym = s firstp := ctxt.NewProg() lastp := firstp xfol(ctxt, s.Text, &lastp) lastp.Link = nil s.Text = firstp.Link } func nofollow(a obj.As) bool { switch a { case obj.AJMP, obj.ARET, AIRETL, AIRETQ, AIRETW, ARETFL, ARETFQ, ARETFW, obj.AUNDEF: return true } return false } func pushpop(a obj.As) bool { switch a { case APUSHL, APUSHFL, APUSHQ, APUSHFQ, APUSHW, APUSHFW, APOPL, APOPFL, APOPQ, APOPFQ, APOPW, APOPFW: return true } return false } func relinv(a obj.As) obj.As { switch a { case AJEQ: return AJNE case AJNE: return AJEQ case AJLE: return AJGT case AJLS: return AJHI case AJLT: return AJGE case AJMI: return AJPL case AJGE: return AJLT case AJPL: return AJMI case AJGT: return AJLE case AJHI: return AJLS case AJCS: return AJCC case AJCC: return AJCS case AJPS: return AJPC case AJPC: return AJPS case AJOS: return AJOC case AJOC: return AJOS } log.Fatalf("unknown relation: %s", obj.Aconv(a)) return 0 } func xfol(ctxt *obj.Link, p *obj.Prog, last **obj.Prog) { var q *obj.Prog var i int var a obj.As loop: if p == nil { return } if p.As == obj.AJMP { q = p.Pcond if q != nil && q.As != obj.ATEXT { /* mark instruction as done and continue layout at target of jump */ p.Mark |= DONE p = q if p.Mark&DONE == 0 { goto loop } } } if p.Mark&DONE != 0 { /* * p goes here, but already used it elsewhere. * copy up to 4 instructions or else branch to other copy. */ i = 0 q = p for ; i < 4; i, q = i+1, q.Link { if q == nil { break } if q == *last { break } a = q.As if a == obj.ANOP { i-- continue } if nofollow(a) || pushpop(a) { break // NOTE(rsc): arm does goto copy } if q.Pcond == nil || q.Pcond.Mark&DONE != 0 { continue } if a == obj.ACALL || a == ALOOP { continue } for { if p.As == obj.ANOP { p = p.Link continue } q = obj.Copyp(ctxt, p) p = p.Link q.Mark |= DONE (*last).Link = q *last = q if q.As != a || q.Pcond == nil || q.Pcond.Mark&DONE != 0 { continue } q.As = relinv(q.As) p = q.Pcond q.Pcond = q.Link q.Link = p xfol(ctxt, q.Link, last) p = q.Link if p.Mark&DONE != 0 { return } goto loop /* */ } } q = ctxt.NewProg() q.As = obj.AJMP q.Lineno = p.Lineno q.To.Type = obj.TYPE_BRANCH q.To.Offset = p.Pc q.Pcond = p p = q } /* emit p */ p.Mark |= DONE (*last).Link = p *last = p a = p.As /* continue loop with what comes after p */ if nofollow(a) { return } if p.Pcond != nil && a != obj.ACALL { /* * some kind of conditional branch. * recurse to follow one path. * continue loop on the other. */ q = obj.Brchain(ctxt, p.Pcond) if q != nil { p.Pcond = q } q = obj.Brchain(ctxt, p.Link) if q != nil { p.Link = q } if p.From.Type == obj.TYPE_CONST { if p.From.Offset == 1 { /* * expect conditional jump to be taken. * rewrite so that's the fall-through case. */ p.As = relinv(a) q = p.Link p.Link = p.Pcond p.Pcond = q } } else { q = p.Link if q.Mark&DONE != 0 { if a != ALOOP { p.As = relinv(a) p.Link = p.Pcond p.Pcond = q } } } xfol(ctxt, p.Link, last) if p.Pcond.Mark&DONE != 0 { return } p = p.Pcond goto loop } p = p.Link goto loop } var unaryDst = map[obj.As]bool{ ABSWAPL: true, ABSWAPQ: true, ACMPXCHG8B: true, ADECB: true, ADECL: true, ADECQ: true, ADECW: true, AINCB: true, AINCL: true, AINCQ: true, AINCW: true, ANEGB: true, ANEGL: true, ANEGQ: true, ANEGW: true, ANOTB: true, ANOTL: true, ANOTQ: true, ANOTW: true, APOPL: true, APOPQ: true, APOPW: true, ASETCC: true, ASETCS: true, ASETEQ: true, ASETGE: true, ASETGT: true, ASETHI: true, ASETLE: true, ASETLS: true, ASETLT: true, ASETMI: true, ASETNE: true, ASETOC: true, ASETOS: true, ASETPC: true, ASETPL: true, ASETPS: true, AFFREE: true, AFLDENV: true, AFSAVE: true, AFSTCW: true, AFSTENV: true, AFSTSW: true, AFXSAVE: true, AFXSAVE64: true, ASTMXCSR: true, } var Linkamd64 = obj.LinkArch{ Arch: sys.ArchAMD64, Preprocess: preprocess, Assemble: span6, Follow: follow, Progedit: progedit, UnaryDst: unaryDst, } var Linkamd64p32 = obj.LinkArch{ Arch: sys.ArchAMD64P32, Preprocess: preprocess, Assemble: span6, Follow: follow, Progedit: progedit, UnaryDst: unaryDst, } var Link386 = obj.LinkArch{ Arch: sys.Arch386, Preprocess: preprocess, Assemble: span6, Follow: follow, Progedit: progedit, UnaryDst: unaryDst, }
// Rewrite p, if necessary, to access global data via the global offset table. func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog) { var add, lea, mov obj.As var reg int16 if p.Mode == 64 { add = AADDQ lea = ALEAQ mov = AMOVQ reg = REG_R15 } else { add = AADDL lea = ALEAL mov = AMOVL reg = REG_CX } if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO { // ADUFFxxx $offset // becomes // $MOV runtime.duffxxx@GOT, $reg // $ADD $offset, $reg // CALL $reg var sym *obj.LSym if p.As == obj.ADUFFZERO { sym = obj.Linklookup(ctxt, "runtime.duffzero", 0) } else { sym = obj.Linklookup(ctxt, "runtime.duffcopy", 0) } offset := p.To.Offset p.As = mov p.From.Type = obj.TYPE_MEM p.From.Name = obj.NAME_GOTREF p.From.Sym = sym p.To.Type = obj.TYPE_REG p.To.Reg = reg p.To.Offset = 0 p.To.Sym = nil p1 := obj.Appendp(ctxt, p) p1.As = add p1.From.Type = obj.TYPE_CONST p1.From.Offset = offset p1.To.Type = obj.TYPE_REG p1.To.Reg = reg p2 := obj.Appendp(ctxt, p1) p2.As = obj.ACALL p2.To.Type = obj.TYPE_REG p2.To.Reg = reg } // We only care about global data: NAME_EXTERN means a global // symbol in the Go sense, and p.Sym.Local is true for a few // internally defined symbols. if p.As == lea && p.From.Type == obj.TYPE_MEM && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local { // $LEA sym, Rx becomes $MOV $sym, Rx which will be rewritten below p.As = mov p.From.Type = obj.TYPE_ADDR } if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local { // $MOV $sym, Rx becomes $MOV sym@GOT, Rx // $MOV $sym+<off>, Rx becomes $MOV sym@GOT, Rx; $LEA <off>(Rx), Rx // On 386 only, more complicated things like PUSHL $sym become $MOV sym@GOT, CX; PUSHL CX cmplxdest := false pAs := p.As var dest obj.Addr if p.To.Type != obj.TYPE_REG || pAs != mov { if p.Mode == 64 { ctxt.Diag("do not know how to handle LEA-type insn to non-register in %v with -dynlink", p) } cmplxdest = true dest = p.To p.As = mov p.To.Type = obj.TYPE_REG p.To.Reg = REG_CX p.To.Sym = nil p.To.Name = obj.NAME_NONE } p.From.Type = obj.TYPE_MEM p.From.Name = obj.NAME_GOTREF q := p if p.From.Offset != 0 { q = obj.Appendp(ctxt, p) q.As = lea q.From.Type = obj.TYPE_MEM q.From.Reg = p.To.Reg q.From.Offset = p.From.Offset q.To = p.To p.From.Offset = 0 } if cmplxdest { q = obj.Appendp(ctxt, q) q.As = pAs q.To = dest q.From.Type = obj.TYPE_REG q.From.Reg = REG_CX } } if p.From3 != nil && p.From3.Name == obj.NAME_EXTERN { ctxt.Diag("don't know how to handle %v with -dynlink", p) } var source *obj.Addr // MOVx sym, Ry becomes $MOV sym@GOT, R15; MOVx (R15), Ry // MOVx Ry, sym becomes $MOV sym@GOT, R15; MOVx Ry, (R15) // An addition may be inserted between the two MOVs if there is an offset. if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local { if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local { ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p) } source = &p.From } else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local { source = &p.To } else { return } if p.As == obj.ACALL { // When dynlinking on 386, almost any call might end up being a call // to a PLT, so make sure the GOT pointer is loaded into BX. // RegTo2 is set on the replacement call insn to stop it being // processed when it is in turn passed to progedit. if p.Mode == 64 || (p.To.Sym != nil && p.To.Sym.Local) || p.RegTo2 != 0 { return } p1 := obj.Appendp(ctxt, p) p2 := obj.Appendp(ctxt, p1) p1.As = ALEAL p1.From.Type = obj.TYPE_MEM p1.From.Name = obj.NAME_STATIC p1.From.Sym = obj.Linklookup(ctxt, "_GLOBAL_OFFSET_TABLE_", 0) p1.To.Type = obj.TYPE_REG p1.To.Reg = REG_BX p2.As = p.As p2.Scond = p.Scond p2.From = p.From p2.From3 = p.From3 p2.Reg = p.Reg p2.To = p.To // p.To.Type was set to TYPE_BRANCH above, but that makes checkaddr // in ../pass.go complain, so set it back to TYPE_MEM here, until p2 // itself gets passed to progedit. p2.To.Type = obj.TYPE_MEM p2.RegTo2 = 1 obj.Nopout(p) return } if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ARET || p.As == obj.AJMP { return } if source.Type != obj.TYPE_MEM { ctxt.Diag("don't know how to handle %v with -dynlink", p) } p1 := obj.Appendp(ctxt, p) p2 := obj.Appendp(ctxt, p1) p1.As = mov p1.From.Type = obj.TYPE_MEM p1.From.Sym = source.Sym p1.From.Name = obj.NAME_GOTREF p1.To.Type = obj.TYPE_REG p1.To.Reg = reg p2.As = p.As p2.From = p.From p2.To = p.To if p.From.Name == obj.NAME_EXTERN { p2.From.Reg = reg p2.From.Name = obj.NAME_NONE p2.From.Sym = nil } else if p.To.Name == obj.NAME_EXTERN { p2.To.Reg = reg p2.To.Name = obj.NAME_NONE p2.To.Sym = nil } else { return } obj.Nopout(p) }
func peep(firstp *obj.Prog) { g := gc.Flowstart(firstp, nil) if g == nil { return } gactive = 0 // byte, word arithmetic elimination. elimshortmov(g) // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim var p *obj.Prog for r := g.Start; r != nil; r = r.Link { p = r.Prog switch p.As { case x86.ALEAL, x86.ALEAQ: if regtyp(&p.To) { if p.From.Sym != nil { if p.From.Index == x86.REG_NONE { conprop(r) } } } case x86.AMOVB, x86.AMOVW, x86.AMOVL, x86.AMOVQ, x86.AMOVSS, x86.AMOVSD: if regtyp(&p.To) { if p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_FCONST { conprop(r) } } } } var r *gc.Flow var r1 *gc.Flow var p1 *obj.Prog var t int loop1: if gc.Debug['P'] != 0 && gc.Debug['v'] != 0 { gc.Dumpit("loop1", g.Start, 0) } t = 0 for r = g.Start; r != nil; r = r.Link { p = r.Prog switch p.As { case x86.AMOVL, x86.AMOVQ, x86.AMOVSS, x86.AMOVSD: if regtyp(&p.To) { if regtyp(&p.From) { if copyprop(g, r) { excise(r) t++ } else if subprop(r) && copyprop(g, r) { excise(r) t++ } } } case x86.AMOVBLZX, x86.AMOVWLZX, x86.AMOVBLSX, x86.AMOVWLSX: if regtyp(&p.To) { r1 = rnops(gc.Uniqs(r)) if r1 != nil { p1 = r1.Prog if p.As == p1.As && p.To.Type == p1.From.Type && p.To.Reg == p1.From.Reg { p1.As = x86.AMOVL t++ } } } case x86.AMOVBQSX, x86.AMOVBQZX, x86.AMOVWQSX, x86.AMOVWQZX, x86.AMOVLQSX, x86.AMOVLQZX, x86.AMOVQL: if regtyp(&p.To) { r1 = rnops(gc.Uniqs(r)) if r1 != nil { p1 = r1.Prog if p.As == p1.As && p.To.Type == p1.From.Type && p.To.Reg == p1.From.Reg { p1.As = x86.AMOVQ t++ } } } case x86.AADDL, x86.AADDQ, x86.AADDW: if p.From.Type != obj.TYPE_CONST || needc(p.Link) { break } if p.From.Offset == -1 { if p.As == x86.AADDQ { p.As = x86.ADECQ } else if p.As == x86.AADDL { p.As = x86.ADECL } else { p.As = x86.ADECW } p.From = obj.Addr{} break } if p.From.Offset == 1 { if p.As == x86.AADDQ { p.As = x86.AINCQ } else if p.As == x86.AADDL { p.As = x86.AINCL } else { p.As = x86.AINCW } p.From = obj.Addr{} break } case x86.ASUBL, x86.ASUBQ, x86.ASUBW: if p.From.Type != obj.TYPE_CONST || needc(p.Link) { break } if p.From.Offset == -1 { if p.As == x86.ASUBQ { p.As = x86.AINCQ } else if p.As == x86.ASUBL { p.As = x86.AINCL } else { p.As = x86.AINCW } p.From = obj.Addr{} break } if p.From.Offset == 1 { if p.As == x86.ASUBQ { p.As = x86.ADECQ } else if p.As == x86.ASUBL { p.As = x86.ADECL } else { p.As = x86.ADECW } p.From = obj.Addr{} break } } } if t != 0 { goto loop1 } // MOVLQZX removal. // The MOVLQZX exists to avoid being confused for a // MOVL that is just copying 32-bit data around during // copyprop. Now that copyprop is done, remov MOVLQZX R1, R2 // if it is dominated by an earlier ADDL/MOVL/etc into R1 that // will have already cleared the high bits. // // MOVSD removal. // We never use packed registers, so a MOVSD between registers // can be replaced by MOVAPD, which moves the pair of float64s // instead of just the lower one. We only use the lower one, but // the processor can do better if we do moves using both. for r := g.Start; r != nil; r = r.Link { p = r.Prog if p.As == x86.AMOVLQZX { if regtyp(&p.From) { if p.From.Type == p.To.Type && p.From.Reg == p.To.Reg { if prevl(r, p.From.Reg) { excise(r) } } } } if p.As == x86.AMOVSD { if regtyp(&p.From) { if regtyp(&p.To) { p.As = x86.AMOVAPD } } } } // load pipelining // push any load from memory as early as possible // to give it time to complete before use. for r := g.Start; r != nil; r = r.Link { p = r.Prog switch p.As { case x86.AMOVB, x86.AMOVW, x86.AMOVL, x86.AMOVQ, x86.AMOVLQZX: if regtyp(&p.To) && !regconsttyp(&p.From) { pushback(r) } } } gc.Flowend(g) }
// movb elimination. // movb is simulated by the linker // when a register other than ax, bx, cx, dx // is used, so rewrite to other instructions // when possible. a movb into a register // can smash the entire 32-bit register without // causing any trouble. // // TODO: Using the Q forms here instead of the L forms // seems unnecessary, and it makes the instructions longer. func elimshortmov(g *gc.Graph) { var p *obj.Prog for r := g.Start; r != nil; r = r.Link { p = r.Prog if regtyp(&p.To) { switch p.As { case x86.AINCB, x86.AINCW: p.As = x86.AINCQ case x86.ADECB, x86.ADECW: p.As = x86.ADECQ case x86.ANEGB, x86.ANEGW: p.As = x86.ANEGQ case x86.ANOTB, x86.ANOTW: p.As = x86.ANOTQ } if regtyp(&p.From) || p.From.Type == obj.TYPE_CONST { // move or arithmetic into partial register. // from another register or constant can be movl. // we don't switch to 64-bit arithmetic if it can // change how the carry bit is set (and the carry bit is needed). switch p.As { case x86.AMOVB, x86.AMOVW: p.As = x86.AMOVQ case x86.AADDB, x86.AADDW: if !needc(p.Link) { p.As = x86.AADDQ } case x86.ASUBB, x86.ASUBW: if !needc(p.Link) { p.As = x86.ASUBQ } case x86.AMULB, x86.AMULW: p.As = x86.AMULQ case x86.AIMULB, x86.AIMULW: p.As = x86.AIMULQ case x86.AANDB, x86.AANDW: p.As = x86.AANDQ case x86.AORB, x86.AORW: p.As = x86.AORQ case x86.AXORB, x86.AXORW: p.As = x86.AXORQ case x86.ASHLB, x86.ASHLW: p.As = x86.ASHLQ } } else if p.From.Type != obj.TYPE_REG { // explicit zero extension, but don't // do that if source is a byte register // (only AH can occur and it's forbidden). switch p.As { case x86.AMOVB: p.As = x86.AMOVBQZX case x86.AMOVW: p.As = x86.AMOVWQZX } } } } }
// movb elimination. // movb is simulated by the linker // when a register other than ax, bx, cx, dx // is used, so rewrite to other instructions // when possible. a movb into a register // can smash the entire 64-bit register without // causing any trouble. func elimshortmov(g *gc.Graph) { var p *obj.Prog for r := g.Start; r != nil; r = r.Link { p = r.Prog if regtyp(&p.To) { switch p.As { case x86.AINCB, x86.AINCW: p.As = x86.AINCL case x86.ADECB, x86.ADECW: p.As = x86.ADECL case x86.ANEGB, x86.ANEGW: p.As = x86.ANEGL case x86.ANOTB, x86.ANOTW: p.As = x86.ANOTL } if regtyp(&p.From) || p.From.Type == obj.TYPE_CONST { // move or arithmetic into partial register. // from another register or constant can be movl. // we don't switch to 32-bit arithmetic if it can // change how the carry bit is set (and the carry bit is needed). switch p.As { case x86.AMOVB, x86.AMOVW: p.As = x86.AMOVL case x86.AADDB, x86.AADDW: if !needc(p.Link) { p.As = x86.AADDL } case x86.ASUBB, x86.ASUBW: if !needc(p.Link) { p.As = x86.ASUBL } case x86.AMULB, x86.AMULW: p.As = x86.AMULL case x86.AIMULB, x86.AIMULW: p.As = x86.AIMULL case x86.AANDB, x86.AANDW: p.As = x86.AANDL case x86.AORB, x86.AORW: p.As = x86.AORL case x86.AXORB, x86.AXORW: p.As = x86.AXORL case x86.ASHLB, x86.ASHLW: p.As = x86.ASHLL } } else { // explicit zero extension switch p.As { case x86.AMOVB: p.As = x86.AMOVBLZX case x86.AMOVW: p.As = x86.AMOVWLZX } } } } }
// fuseMultiple merges memory loads and stores into load multiple and // store multiple operations. // // Looks for this pattern (sequence of loads or stores): // MOVD R1, 0(R15) // MOVD R2, 8(R15) // MOVD R3, 16(R15) // Replaces with: // STMG R1, R3, 0(R15) func fuseMultiple(r *gc.Flow) int { n := 0 var fused *obj.Prog for ; r != nil; r = r.Link { // If there is a branch into the instruction stream then // we can't fuse into previous instructions. if gc.Uniqp(r) == nil { fused = nil } p := r.Prog isStore := isGPR(&p.From) && isBDMem(&p.To) isLoad := isGPR(&p.To) && isBDMem(&p.From) // are we a candidate? size := int64(0) switch p.As { default: fused = nil continue case obj.ANOP: // skip over nops continue case s390x.AMOVW, s390x.AMOVWZ: size = 4 // TODO(mundaym): 32-bit load multiple is currently not supported // as it requires sign/zero extension. if !isStore { fused = nil continue } case s390x.AMOVD: size = 8 if !isLoad && !isStore { fused = nil continue } } // If we merge two loads/stores with different source/destination Nodes // then we will lose a reference the second Node which means that the // compiler might mark the Node as unused and free its slot on the stack. // TODO(mundaym): allow this by adding a dummy reference to the Node. if fused == nil || fused.From.Node != p.From.Node || fused.From.Type != p.From.Type || fused.To.Node != p.To.Node || fused.To.Type != p.To.Type { fused = p continue } // check two addresses ca := func(a, b *obj.Addr, offset int64) bool { return a.Reg == b.Reg && a.Offset+offset == b.Offset && a.Sym == b.Sym && a.Name == b.Name } switch fused.As { default: fused = p case s390x.AMOVW, s390x.AMOVWZ: if size == 4 && fused.From.Reg+1 == p.From.Reg && ca(&fused.To, &p.To, 4) { fused.As = s390x.ASTMY fused.Reg = p.From.Reg excise(r) n++ } else { fused = p } case s390x.AMOVD: if size == 8 && fused.From.Reg+1 == p.From.Reg && ca(&fused.To, &p.To, 8) { fused.As = s390x.ASTMG fused.Reg = p.From.Reg excise(r) n++ } else if size == 8 && fused.To.Reg+1 == p.To.Reg && ca(&fused.From, &p.From, 8) { fused.As = s390x.ALMG fused.Reg = fused.To.Reg fused.To.Reg = p.To.Reg excise(r) n++ } else { fused = p } case s390x.ASTMG, s390x.ASTMY: if (fused.As == s390x.ASTMY && size != 4) || (fused.As == s390x.ASTMG && size != 8) { fused = p continue } offset := size * int64(fused.Reg-fused.From.Reg+1) if fused.Reg+1 == p.From.Reg && ca(&fused.To, &p.To, offset) { fused.Reg = p.From.Reg excise(r) n++ } else { fused = p } case s390x.ALMG: offset := 8 * int64(fused.To.Reg-fused.Reg+1) if size == 8 && fused.To.Reg+1 == p.To.Reg && ca(&fused.From, &p.From, offset) { fused.To.Reg = p.To.Reg excise(r) n++ } else { fused = p } } } return n }
// Rewrite p, if necessary, to access global data via the global offset table. func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog) { if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO { // ADUFFxxx $offset // becomes // MOVW runtime.duffxxx@GOT, R9 // ADD $offset, R9 // CALL (R9) var sym *obj.LSym if p.As == obj.ADUFFZERO { sym = obj.Linklookup(ctxt, "runtime.duffzero", 0) } else { sym = obj.Linklookup(ctxt, "runtime.duffcopy", 0) } offset := p.To.Offset p.As = AMOVW p.From.Type = obj.TYPE_MEM p.From.Name = obj.NAME_GOTREF p.From.Sym = sym p.To.Type = obj.TYPE_REG p.To.Reg = REG_R9 p.To.Name = obj.NAME_NONE p.To.Offset = 0 p.To.Sym = nil p1 := obj.Appendp(ctxt, p) p1.As = AADD p1.From.Type = obj.TYPE_CONST p1.From.Offset = offset p1.To.Type = obj.TYPE_REG p1.To.Reg = REG_R9 p2 := obj.Appendp(ctxt, p1) p2.As = obj.ACALL p2.To.Type = obj.TYPE_MEM p2.To.Reg = REG_R9 return } // We only care about global data: NAME_EXTERN means a global // symbol in the Go sense, and p.Sym.Local is true for a few // internally defined symbols. if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local { // MOVW $sym, Rx becomes MOVW sym@GOT, Rx // MOVW $sym+<off>, Rx becomes MOVW sym@GOT, Rx; ADD <off>, Rx if p.As != AMOVW { ctxt.Diag("do not know how to handle TYPE_ADDR in %v with -dynlink", p) } if p.To.Type != obj.TYPE_REG { ctxt.Diag("do not know how to handle LEAQ-type insn to non-register in %v with -dynlink", p) } p.From.Type = obj.TYPE_MEM p.From.Name = obj.NAME_GOTREF if p.From.Offset != 0 { q := obj.Appendp(ctxt, p) q.As = AADD q.From.Type = obj.TYPE_CONST q.From.Offset = p.From.Offset q.To = p.To p.From.Offset = 0 } } if p.From3 != nil && p.From3.Name == obj.NAME_EXTERN { ctxt.Diag("don't know how to handle %v with -dynlink", p) } var source *obj.Addr // MOVx sym, Ry becomes MOVW sym@GOT, R9; MOVx (R9), Ry // MOVx Ry, sym becomes MOVW sym@GOT, R9; MOVx Ry, (R9) // An addition may be inserted between the two MOVs if there is an offset. if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local { if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local { ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p) } source = &p.From } else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local { source = &p.To } else { return } if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP { return } if source.Sym.Type == obj.STLSBSS { return } if source.Type != obj.TYPE_MEM { ctxt.Diag("don't know how to handle %v with -dynlink", p) } p1 := obj.Appendp(ctxt, p) p2 := obj.Appendp(ctxt, p1) p1.As = AMOVW p1.From.Type = obj.TYPE_MEM p1.From.Sym = source.Sym p1.From.Name = obj.NAME_GOTREF p1.To.Type = obj.TYPE_REG p1.To.Reg = REG_R9 p2.As = p.As p2.From = p.From p2.To = p.To if p.From.Name == obj.NAME_EXTERN { p2.From.Reg = REG_R9 p2.From.Name = obj.NAME_NONE p2.From.Sym = nil } else if p.To.Name == obj.NAME_EXTERN { p2.To.Reg = REG_R9 p2.To.Name = obj.NAME_NONE p2.To.Sym = nil } else { return } obj.Nopout(p) }
func progedit(ctxt *obj.Link, p *obj.Prog) { p.From.Class = 0 p.To.Class = 0 // $0 results in C_ZCON, which matches both C_REG and various // C_xCON, however the C_REG cases in asmout don't expect a // constant, so they will use the register fields and assemble // a R0. To prevent that, rewrite $0 as ZR. if p.From.Type == obj.TYPE_CONST && p.From.Offset == 0 { p.From.Type = obj.TYPE_REG p.From.Reg = REGZERO } if p.To.Type == obj.TYPE_CONST && p.To.Offset == 0 { p.To.Type = obj.TYPE_REG p.To.Reg = REGZERO } // Rewrite BR/BL to symbol as TYPE_BRANCH. switch p.As { case AB, ABL, obj.ARET, obj.ADUFFZERO, obj.ADUFFCOPY: if p.To.Sym != nil { p.To.Type = obj.TYPE_BRANCH } break } // Rewrite float constants to values stored in memory. switch p.As { case AFMOVS: if p.From.Type == obj.TYPE_FCONST { f32 := float32(p.From.Val.(float64)) i32 := math.Float32bits(f32) literal := fmt.Sprintf("$f32.%08x", i32) s := obj.Linklookup(ctxt, literal, 0) s.Size = 4 p.From.Type = obj.TYPE_MEM p.From.Sym = s p.From.Sym.Local = true p.From.Name = obj.NAME_EXTERN p.From.Offset = 0 } case AFMOVD: if p.From.Type == obj.TYPE_FCONST { i64 := math.Float64bits(p.From.Val.(float64)) literal := fmt.Sprintf("$f64.%016x", i64) s := obj.Linklookup(ctxt, literal, 0) s.Size = 8 p.From.Type = obj.TYPE_MEM p.From.Sym = s p.From.Sym.Local = true p.From.Name = obj.NAME_EXTERN p.From.Offset = 0 } break } // Rewrite negative immediates as positive immediates with // complementary instruction. switch p.As { case AADD, AADDW, ASUB, ASUBW, ACMP, ACMPW, ACMN, ACMNW: if p.From.Type == obj.NAME_EXTERN && p.From.Offset < 0 { p.From.Offset = -p.From.Offset p.As = complements[p.As] } break } if ctxt.Flag_dynlink { rewriteToUseGot(ctxt, p) } }
func progedit(ctxt *obj.Link, p *obj.Prog) { p.From.Class = 0 p.To.Class = 0 // Rewrite B/BL to symbol as TYPE_BRANCH. switch p.As { case AB, ABL, obj.ADUFFZERO, obj.ADUFFCOPY: if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil { p.To.Type = obj.TYPE_BRANCH } } // Replace TLS register fetches on older ARM processors. switch p.As { // Treat MRC 15, 0, <reg>, C13, C0, 3 specially. case AMRC: if p.To.Offset&0xffff0fff == 0xee1d0f70 { // Because the instruction might be rewritten to a BL which returns in R0 // the register must be zero. if p.To.Offset&0xf000 != 0 { ctxt.Diag("%v: TLS MRC instruction must write to R0 as it might get translated into a BL instruction", p.Line()) } if ctxt.Goarm < 7 { // Replace it with BL runtime.read_tls_fallback(SB) for ARM CPUs that lack the tls extension. if progedit_tlsfallback == nil { progedit_tlsfallback = obj.Linklookup(ctxt, "runtime.read_tls_fallback", 0) } // MOVW LR, R11 p.As = AMOVW p.From.Type = obj.TYPE_REG p.From.Reg = REGLINK p.To.Type = obj.TYPE_REG p.To.Reg = REGTMP // BL runtime.read_tls_fallback(SB) p = obj.Appendp(ctxt, p) p.As = ABL p.To.Type = obj.TYPE_BRANCH p.To.Sym = progedit_tlsfallback p.To.Offset = 0 // MOVW R11, LR p = obj.Appendp(ctxt, p) p.As = AMOVW p.From.Type = obj.TYPE_REG p.From.Reg = REGTMP p.To.Type = obj.TYPE_REG p.To.Reg = REGLINK break } } // Otherwise, MRC/MCR instructions need no further treatment. p.As = AWORD } // Rewrite float constants to values stored in memory. switch p.As { case AMOVF: if p.From.Type == obj.TYPE_FCONST && chipfloat5(ctxt, p.From.Val.(float64)) < 0 && (chipzero5(ctxt, p.From.Val.(float64)) < 0 || p.Scond&C_SCOND != C_SCOND_NONE) { f32 := float32(p.From.Val.(float64)) i32 := math.Float32bits(f32) literal := fmt.Sprintf("$f32.%08x", i32) s := obj.Linklookup(ctxt, literal, 0) p.From.Type = obj.TYPE_MEM p.From.Sym = s p.From.Name = obj.NAME_EXTERN p.From.Offset = 0 } case AMOVD: if p.From.Type == obj.TYPE_FCONST && chipfloat5(ctxt, p.From.Val.(float64)) < 0 && (chipzero5(ctxt, p.From.Val.(float64)) < 0 || p.Scond&C_SCOND != C_SCOND_NONE) { i64 := math.Float64bits(p.From.Val.(float64)) literal := fmt.Sprintf("$f64.%016x", i64) s := obj.Linklookup(ctxt, literal, 0) p.From.Type = obj.TYPE_MEM p.From.Sym = s p.From.Name = obj.NAME_EXTERN p.From.Offset = 0 } } if ctxt.Flag_dynlink { rewriteToUseGot(ctxt, p) } }
func stacksplit(ctxt *obj.Link, p *obj.Prog, framesize int32) *obj.Prog { // MOV g_stackguard(g), R1 p = obj.Appendp(ctxt, p) p.As = AMOVD p.From.Type = obj.TYPE_MEM p.From.Reg = REGG p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 if ctxt.Cursym.Cfunc { p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 } p.To.Type = obj.TYPE_REG p.To.Reg = REG_R1 q := (*obj.Prog)(nil) if framesize <= obj.StackSmall { // small stack: SP < stackguard // MOV SP, R2 // CMP stackguard, R2 p = obj.Appendp(ctxt, p) p.As = AMOVD p.From.Type = obj.TYPE_REG p.From.Reg = REGSP p.To.Type = obj.TYPE_REG p.To.Reg = REG_R2 p = obj.Appendp(ctxt, p) p.As = ACMP p.From.Type = obj.TYPE_REG p.From.Reg = REG_R1 p.Reg = REG_R2 } else if framesize <= obj.StackBig { // large stack: SP-framesize < stackguard-StackSmall // SUB $framesize, SP, R2 // CMP stackguard, R2 p = obj.Appendp(ctxt, p) p.As = ASUB p.From.Type = obj.TYPE_CONST p.From.Offset = int64(framesize) p.Reg = REGSP p.To.Type = obj.TYPE_REG p.To.Reg = REG_R2 p = obj.Appendp(ctxt, p) p.As = ACMP p.From.Type = obj.TYPE_REG p.From.Reg = REG_R1 p.Reg = REG_R2 } else { // Such a large stack we need to protect against wraparound // if SP is close to zero. // SP-stackguard+StackGuard < framesize + (StackGuard-StackSmall) // The +StackGuard on both sides is required to keep the left side positive: // SP is allowed to be slightly below stackguard. See stack.h. // CMP $StackPreempt, R1 // BEQ label_of_call_to_morestack // ADD $StackGuard, SP, R2 // SUB R1, R2 // MOV $(framesize+(StackGuard-StackSmall)), R3 // CMP R3, R2 p = obj.Appendp(ctxt, p) p.As = ACMP p.From.Type = obj.TYPE_CONST p.From.Offset = obj.StackPreempt p.Reg = REG_R1 p = obj.Appendp(ctxt, p) q = p p.As = ABEQ p.To.Type = obj.TYPE_BRANCH p = obj.Appendp(ctxt, p) p.As = AADD p.From.Type = obj.TYPE_CONST p.From.Offset = obj.StackGuard p.Reg = REGSP p.To.Type = obj.TYPE_REG p.To.Reg = REG_R2 p = obj.Appendp(ctxt, p) p.As = ASUB p.From.Type = obj.TYPE_REG p.From.Reg = REG_R1 p.To.Type = obj.TYPE_REG p.To.Reg = REG_R2 p = obj.Appendp(ctxt, p) p.As = AMOVD p.From.Type = obj.TYPE_CONST p.From.Offset = int64(framesize) + (obj.StackGuard - obj.StackSmall) p.To.Type = obj.TYPE_REG p.To.Reg = REG_R3 p = obj.Appendp(ctxt, p) p.As = ACMP p.From.Type = obj.TYPE_REG p.From.Reg = REG_R3 p.Reg = REG_R2 } // BLS do-morestack bls := obj.Appendp(ctxt, p) bls.As = ABLS bls.To.Type = obj.TYPE_BRANCH var last *obj.Prog for last = ctxt.Cursym.Text; last.Link != nil; last = last.Link { } spfix := obj.Appendp(ctxt, last) spfix.As = obj.ANOP spfix.Spadj = -framesize // MOV LR, R3 movlr := obj.Appendp(ctxt, spfix) movlr.As = AMOVD movlr.From.Type = obj.TYPE_REG movlr.From.Reg = REGLINK movlr.To.Type = obj.TYPE_REG movlr.To.Reg = REG_R3 if q != nil { q.Pcond = movlr } bls.Pcond = movlr debug := movlr if false { debug = obj.Appendp(ctxt, debug) debug.As = AMOVD debug.From.Type = obj.TYPE_CONST debug.From.Offset = int64(framesize) debug.To.Type = obj.TYPE_REG debug.To.Reg = REGTMP } // BL runtime.morestack(SB) call := obj.Appendp(ctxt, debug) call.As = ABL call.To.Type = obj.TYPE_BRANCH morestack := "runtime.morestack" switch { case ctxt.Cursym.Cfunc: morestack = "runtime.morestackc" case ctxt.Cursym.Text.From3.Offset&obj.NEEDCTXT == 0: morestack = "runtime.morestack_noctxt" } call.To.Sym = obj.Linklookup(ctxt, morestack, 0) // B start jmp := obj.Appendp(ctxt, call) jmp.As = AB jmp.To.Type = obj.TYPE_BRANCH jmp.Pcond = ctxt.Cursym.Text.Link jmp.Spadj = +framesize // placeholder for bls's jump target // p = obj.Appendp(ctxt, p) // p.As = obj.ANOP return bls }
func xfol(ctxt *obj.Link, p *obj.Prog, last **obj.Prog) { var q *obj.Prog var r *obj.Prog var i int loop: if p == nil { return } a := p.As if a == AB { q = p.Pcond if q != nil && q.As != obj.ATEXT { p.Mark |= FOLL p = q if p.Mark&FOLL == 0 { goto loop } } } if p.Mark&FOLL != 0 { i = 0 q = p for ; i < 4; i, q = i+1, q.Link { if q == *last || q == nil { break } a = q.As if a == obj.ANOP { i-- continue } if a == AB || (a == obj.ARET && q.Scond == C_SCOND_NONE) || a == ARFE || a == obj.AUNDEF { goto copy } if q.Pcond == nil || (q.Pcond.Mark&FOLL != 0) { continue } if a != ABEQ && a != ABNE { continue } copy: for { r = ctxt.NewProg() *r = *p if r.Mark&FOLL == 0 { fmt.Printf("can't happen 1\n") } r.Mark |= FOLL if p != q { p = p.Link (*last).Link = r *last = r continue } (*last).Link = r *last = r if a == AB || (a == obj.ARET && q.Scond == C_SCOND_NONE) || a == ARFE || a == obj.AUNDEF { return } r.As = ABNE if a == ABNE { r.As = ABEQ } r.Pcond = p.Link r.Link = p.Pcond if r.Link.Mark&FOLL == 0 { xfol(ctxt, r.Link, last) } if r.Pcond.Mark&FOLL == 0 { fmt.Printf("can't happen 2\n") } return } } a = AB q = ctxt.NewProg() q.As = a q.Lineno = p.Lineno q.To.Type = obj.TYPE_BRANCH q.To.Offset = p.Pc q.Pcond = p p = q } p.Mark |= FOLL (*last).Link = p *last = p if a == AB || (a == obj.ARET && p.Scond == C_SCOND_NONE) || a == ARFE || a == obj.AUNDEF { return } if p.Pcond != nil { if a != ABL && a != ABX && p.Link != nil { q = obj.Brchain(ctxt, p.Link) if a != obj.ATEXT { if q != nil && (q.Mark&FOLL != 0) { p.As = relinv(a) p.Link = p.Pcond p.Pcond = q } } xfol(ctxt, p.Link, last) q = obj.Brchain(ctxt, p.Pcond) if q == nil { q = p.Pcond } if q.Mark&FOLL != 0 { p.Pcond = q return } p = q goto loop } } p = p.Link goto loop }
func preprocess(ctxt *obj.Link, cursym *obj.LSym) { ctxt.Cursym = cursym if cursym.Text == nil || cursym.Text.Link == nil { return } p := cursym.Text textstksiz := p.To.Offset aoffset := int32(textstksiz) cursym.Args = p.To.Val.(int32) cursym.Locals = int32(textstksiz) /* * find leaf subroutines * strip NOPs * expand RET */ ctxt.Bso.Flush() q := (*obj.Prog)(nil) var q1 *obj.Prog for p := cursym.Text; p != nil; p = p.Link { switch p.As { case obj.ATEXT: p.Mark |= LEAF case obj.ARET: break case obj.ANOP: q1 = p.Link q.Link = q1 /* q is non-nop */ q1.Mark |= p.Mark continue case ABL, obj.ADUFFZERO, obj.ADUFFCOPY: cursym.Text.Mark &^= LEAF fallthrough case ACBNZ, ACBZ, ACBNZW, ACBZW, ATBZ, ATBNZ, AB, ABEQ, ABNE, ABCS, ABHS, ABCC, ABLO, ABMI, ABPL, ABVS, ABVC, ABHI, ABLS, ABGE, ABLT, ABGT, ABLE, AADR, /* strange */ AADRP: q1 = p.Pcond if q1 != nil { for q1.As == obj.ANOP { q1 = q1.Link p.Pcond = q1 } } break } q = p } var q2 *obj.Prog var retjmp *obj.LSym for p := cursym.Text; p != nil; p = p.Link { o := p.As switch o { case obj.ATEXT: cursym.Text = p if textstksiz < 0 { ctxt.Autosize = 0 } else { ctxt.Autosize = int32(textstksiz + 8) } if (cursym.Text.Mark&LEAF != 0) && ctxt.Autosize <= 8 { ctxt.Autosize = 0 } else if ctxt.Autosize&(16-1) != 0 { // The frame includes an LR. // If the frame size is 8, it's only an LR, // so there's no potential for breaking references to // local variables by growing the frame size, // because there are no local variables. // But otherwise, if there is a non-empty locals section, // the author of the code is responsible for making sure // that the frame size is 8 mod 16. if ctxt.Autosize == 8 { ctxt.Autosize += 8 cursym.Locals += 8 } else { ctxt.Diag("%v: unaligned frame size %d - must be 8 mod 16 (or 0)", p, ctxt.Autosize-8) } } p.To.Offset = int64(ctxt.Autosize) - 8 if ctxt.Autosize == 0 && !(cursym.Text.Mark&LEAF != 0) { if ctxt.Debugvlog != 0 { fmt.Fprintf(ctxt.Bso, "save suppressed in: %s\n", cursym.Text.From.Sym.Name) } ctxt.Bso.Flush() cursym.Text.Mark |= LEAF } if !(p.From3.Offset&obj.NOSPLIT != 0) { p = stacksplit(ctxt, p, ctxt.Autosize) // emit split check } aoffset = ctxt.Autosize if aoffset > 0xF0 { aoffset = 0xF0 } if cursym.Text.Mark&LEAF != 0 { cursym.Leaf = true if ctxt.Autosize == 0 { break } aoffset = 0 } q = p if ctxt.Autosize > aoffset { q = ctxt.NewProg() q.As = ASUB q.Lineno = p.Lineno q.From.Type = obj.TYPE_CONST q.From.Offset = int64(ctxt.Autosize) - int64(aoffset) q.To.Type = obj.TYPE_REG q.To.Reg = REGSP q.Spadj = int32(q.From.Offset) q.Link = p.Link p.Link = q if cursym.Text.Mark&LEAF != 0 { break } } q1 = ctxt.NewProg() q1.As = AMOVD q1.Lineno = p.Lineno q1.From.Type = obj.TYPE_REG q1.From.Reg = REGLINK q1.To.Type = obj.TYPE_MEM q1.Scond = C_XPRE q1.To.Offset = int64(-aoffset) q1.To.Reg = REGSP q1.Link = q.Link q1.Spadj = aoffset q.Link = q1 if cursym.Text.From3.Offset&obj.WRAPPER != 0 { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOV g_panic(g), R1 // CMP ZR, R1 // BEQ end // MOV panic_argp(R1), R2 // ADD $(autosize+8), RSP, R3 // CMP R2, R3 // BNE end // ADD $8, RSP, R4 // MOVD R4, panic_argp(R1) // end: // NOP // // The NOP is needed to give the jumps somewhere to land. // It is a liblink NOP, not a ARM64 NOP: it encodes to 0 instruction bytes. q = q1 q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_MEM q.From.Reg = REGG q.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // G.panic q.To.Type = obj.TYPE_REG q.To.Reg = REG_R1 q = obj.Appendp(ctxt, q) q.As = ACMP q.From.Type = obj.TYPE_REG q.From.Reg = REGZERO q.Reg = REG_R1 q = obj.Appendp(ctxt, q) q.As = ABEQ q.To.Type = obj.TYPE_BRANCH q1 = q q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_MEM q.From.Reg = REG_R1 q.From.Offset = 0 // Panic.argp q.To.Type = obj.TYPE_REG q.To.Reg = REG_R2 q = obj.Appendp(ctxt, q) q.As = AADD q.From.Type = obj.TYPE_CONST q.From.Offset = int64(ctxt.Autosize) + 8 q.Reg = REGSP q.To.Type = obj.TYPE_REG q.To.Reg = REG_R3 q = obj.Appendp(ctxt, q) q.As = ACMP q.From.Type = obj.TYPE_REG q.From.Reg = REG_R2 q.Reg = REG_R3 q = obj.Appendp(ctxt, q) q.As = ABNE q.To.Type = obj.TYPE_BRANCH q2 = q q = obj.Appendp(ctxt, q) q.As = AADD q.From.Type = obj.TYPE_CONST q.From.Offset = 8 q.Reg = REGSP q.To.Type = obj.TYPE_REG q.To.Reg = REG_R4 q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_REG q.From.Reg = REG_R4 q.To.Type = obj.TYPE_MEM q.To.Reg = REG_R1 q.To.Offset = 0 // Panic.argp q = obj.Appendp(ctxt, q) q.As = obj.ANOP q1.Pcond = q q2.Pcond = q } case obj.ARET: nocache(p) if p.From.Type == obj.TYPE_CONST { ctxt.Diag("using BECOME (%v) is not supported!", p) break } retjmp = p.To.Sym p.To = obj.Addr{} if cursym.Text.Mark&LEAF != 0 { if ctxt.Autosize != 0 { p.As = AADD p.From.Type = obj.TYPE_CONST p.From.Offset = int64(ctxt.Autosize) p.To.Type = obj.TYPE_REG p.To.Reg = REGSP p.Spadj = -ctxt.Autosize } } else { /* want write-back pre-indexed SP+autosize -> SP, loading REGLINK*/ aoffset = ctxt.Autosize if aoffset > 0xF0 { aoffset = 0xF0 } p.As = AMOVD p.From.Type = obj.TYPE_MEM p.Scond = C_XPOST p.From.Offset = int64(aoffset) p.From.Reg = REGSP p.To.Type = obj.TYPE_REG p.To.Reg = REGLINK p.Spadj = -aoffset if ctxt.Autosize > aoffset { q = ctxt.NewProg() q.As = AADD q.From.Type = obj.TYPE_CONST q.From.Offset = int64(ctxt.Autosize) - int64(aoffset) q.To.Type = obj.TYPE_REG q.To.Reg = REGSP q.Link = p.Link q.Spadj = int32(-q.From.Offset) q.Lineno = p.Lineno p.Link = q p = q } } if p.As != obj.ARET { q = ctxt.NewProg() q.Lineno = p.Lineno q.Link = p.Link p.Link = q p = q } if retjmp != nil { // retjmp p.As = AB p.To.Type = obj.TYPE_BRANCH p.To.Sym = retjmp p.Spadj = +ctxt.Autosize break } p.As = obj.ARET p.To.Type = obj.TYPE_MEM p.To.Offset = 0 p.To.Reg = REGLINK p.Spadj = +ctxt.Autosize case AADD, ASUB: if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.From.Type == obj.TYPE_CONST { if p.As == AADD { p.Spadj = int32(-p.From.Offset) } else { p.Spadj = int32(+p.From.Offset) } } break } } }
func preprocess(ctxt *obj.Link, cursym *obj.LSym) { // TODO(minux): add morestack short-cuts with small fixed frame-size. ctxt.Cursym = cursym if cursym.Text == nil || cursym.Text.Link == nil { return } p := cursym.Text textstksiz := p.To.Offset if textstksiz == -8 { // Compatibility hack. p.From3.Offset |= obj.NOFRAME textstksiz = 0 } if textstksiz%8 != 0 { ctxt.Diag("frame size %d not a multiple of 8", textstksiz) } if p.From3.Offset&obj.NOFRAME != 0 { if textstksiz != 0 { ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", textstksiz) } } cursym.Args = p.To.Val.(int32) cursym.Locals = int32(textstksiz) /* * find leaf subroutines * strip NOPs * expand RET * expand BECOME pseudo */ if ctxt.Debugvlog != 0 { fmt.Fprintf(ctxt.Bso, "%5.2f noops\n", obj.Cputime()) } ctxt.Bso.Flush() var q *obj.Prog var q1 *obj.Prog for p := cursym.Text; p != nil; p = p.Link { switch p.As { /* too hard, just leave alone */ case obj.ATEXT: q = p p.Mark |= LABEL | LEAF | SYNC if p.Link != nil { p.Link.Mark |= LABEL } case ANOR: q = p if p.To.Type == obj.TYPE_REG { if p.To.Reg == REGZERO { p.Mark |= LABEL | SYNC } } case ASYNC, AWORD: q = p p.Mark |= LABEL | SYNC continue case AMOVW, AMOVWZ, AMOVD: q = p if p.From.Reg >= REG_RESERVED || p.To.Reg >= REG_RESERVED { p.Mark |= LABEL | SYNC } continue case AFABS, AFADD, AFDIV, AFMADD, AFMOVD, AFMOVS, AFMSUB, AFMUL, AFNABS, AFNEG, AFNMADD, AFNMSUB, ALEDBR, ALDEBR, AFSUB: q = p p.Mark |= FLOAT continue case ABL, ABCL, obj.ADUFFZERO, obj.ADUFFCOPY: cursym.Text.Mark &^= LEAF fallthrough case ABC, ABEQ, ABGE, ABGT, ABLE, ABLT, ABNE, ABR, ABVC, ABVS, ACMPBEQ, ACMPBGE, ACMPBGT, ACMPBLE, ACMPBLT, ACMPBNE, ACMPUBEQ, ACMPUBGE, ACMPUBGT, ACMPUBLE, ACMPUBLT, ACMPUBNE: p.Mark |= BRANCH q = p q1 = p.Pcond if q1 != nil { for q1.As == obj.ANOP { q1 = q1.Link p.Pcond = q1 } if q1.Mark&LEAF == 0 { q1.Mark |= LABEL } } else { p.Mark |= LABEL } q1 = p.Link if q1 != nil { q1.Mark |= LABEL } continue case AFCMPO, AFCMPU: q = p p.Mark |= FCMP | FLOAT continue case obj.ARET: q = p if p.Link != nil { p.Link.Mark |= LABEL } continue case obj.ANOP: q1 = p.Link q.Link = q1 /* q is non-nop */ q1.Mark |= p.Mark continue default: q = p continue } } autosize := int32(0) var p1 *obj.Prog var p2 *obj.Prog var pLast *obj.Prog var pPre *obj.Prog var pPreempt *obj.Prog wasSplit := false for p := cursym.Text; p != nil; p = p.Link { pLast = p switch p.As { case obj.ATEXT: autosize = int32(textstksiz) if p.Mark&LEAF != 0 && autosize == 0 && p.From3.Offset&obj.NOFRAME == 0 { // A leaf function with no locals has no frame. p.From3.Offset |= obj.NOFRAME } if p.From3.Offset&obj.NOFRAME == 0 { // If there is a stack frame at all, it includes // space to save the LR. autosize += int32(ctxt.FixedFrameSize()) } p.To.Offset = int64(autosize) q = p if p.From3.Offset&obj.NOSPLIT == 0 { p, pPreempt = stacksplitPre(ctxt, p, autosize) // emit pre part of split check pPre = p wasSplit = true //need post part of split } if autosize != 0 { q = obj.Appendp(ctxt, p) q.As = AMOVD q.From.Type = obj.TYPE_ADDR q.From.Offset = int64(-autosize) q.From.Reg = REGSP // not actually needed - REGSP is assumed if no reg is provided q.To.Type = obj.TYPE_REG q.To.Reg = REGSP q.Spadj = autosize } else if cursym.Text.Mark&LEAF == 0 { // A very few functions that do not return to their caller // (e.g. gogo) are not identified as leaves but still have // no frame. cursym.Text.Mark |= LEAF } if cursym.Text.Mark&LEAF != 0 { cursym.Leaf = true break } q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_REG q.From.Reg = REG_LR q.To.Type = obj.TYPE_MEM q.To.Reg = REGSP q.To.Offset = 0 if cursym.Text.From3.Offset&obj.WRAPPER != 0 { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOVD g_panic(g), R3 // CMP R0, R3 // BEQ end // MOVD panic_argp(R3), R4 // ADD $(autosize+8), R1, R5 // CMP R4, R5 // BNE end // ADD $8, R1, R6 // MOVD R6, panic_argp(R3) // end: // NOP // // The NOP is needed to give the jumps somewhere to land. // It is a liblink NOP, not a s390x NOP: it encodes to 0 instruction bytes. q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_MEM q.From.Reg = REGG q.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // G.panic q.To.Type = obj.TYPE_REG q.To.Reg = REG_R3 q = obj.Appendp(ctxt, q) q.As = ACMP q.From.Type = obj.TYPE_REG q.From.Reg = REG_R0 q.To.Type = obj.TYPE_REG q.To.Reg = REG_R3 q = obj.Appendp(ctxt, q) q.As = ABEQ q.To.Type = obj.TYPE_BRANCH p1 = q q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_MEM q.From.Reg = REG_R3 q.From.Offset = 0 // Panic.argp q.To.Type = obj.TYPE_REG q.To.Reg = REG_R4 q = obj.Appendp(ctxt, q) q.As = AADD q.From.Type = obj.TYPE_CONST q.From.Offset = int64(autosize) + ctxt.FixedFrameSize() q.Reg = REGSP q.To.Type = obj.TYPE_REG q.To.Reg = REG_R5 q = obj.Appendp(ctxt, q) q.As = ACMP q.From.Type = obj.TYPE_REG q.From.Reg = REG_R4 q.To.Type = obj.TYPE_REG q.To.Reg = REG_R5 q = obj.Appendp(ctxt, q) q.As = ABNE q.To.Type = obj.TYPE_BRANCH p2 = q q = obj.Appendp(ctxt, q) q.As = AADD q.From.Type = obj.TYPE_CONST q.From.Offset = ctxt.FixedFrameSize() q.Reg = REGSP q.To.Type = obj.TYPE_REG q.To.Reg = REG_R6 q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_REG q.From.Reg = REG_R6 q.To.Type = obj.TYPE_MEM q.To.Reg = REG_R3 q.To.Offset = 0 // Panic.argp q = obj.Appendp(ctxt, q) q.As = obj.ANOP p1.Pcond = q p2.Pcond = q } case obj.ARET: if p.From.Type == obj.TYPE_CONST { ctxt.Diag("using BECOME (%v) is not supported!", p) break } retTarget := p.To.Sym if cursym.Text.Mark&LEAF != 0 { if autosize == 0 { p.As = ABR p.From = obj.Addr{} if retTarget == nil { p.To.Type = obj.TYPE_REG p.To.Reg = REG_LR } else { p.To.Type = obj.TYPE_BRANCH p.To.Sym = retTarget } p.Mark |= BRANCH break } p.As = AADD p.From.Type = obj.TYPE_CONST p.From.Offset = int64(autosize) p.To.Type = obj.TYPE_REG p.To.Reg = REGSP p.Spadj = -autosize q = obj.Appendp(ctxt, p) q.As = ABR q.From = obj.Addr{} q.To.Type = obj.TYPE_REG q.To.Reg = REG_LR q.Mark |= BRANCH q.Spadj = autosize break } p.As = AMOVD p.From.Type = obj.TYPE_MEM p.From.Reg = REGSP p.From.Offset = 0 p.To.Type = obj.TYPE_REG p.To.Reg = REG_LR q = p if autosize != 0 { q = obj.Appendp(ctxt, q) q.As = AADD q.From.Type = obj.TYPE_CONST q.From.Offset = int64(autosize) q.To.Type = obj.TYPE_REG q.To.Reg = REGSP q.Spadj = -autosize } q = obj.Appendp(ctxt, q) q.As = ABR q.From = obj.Addr{} if retTarget == nil { q.To.Type = obj.TYPE_REG q.To.Reg = REG_LR } else { q.To.Type = obj.TYPE_BRANCH q.To.Sym = retTarget } q.Mark |= BRANCH q.Spadj = autosize case AADD: if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.From.Type == obj.TYPE_CONST { p.Spadj = int32(-p.From.Offset) } } } if wasSplit { pLast = stacksplitPost(ctxt, pLast, pPre, pPreempt) // emit post part of split check } }
func progedit(ctxt *obj.Link, p *obj.Prog) { p.From.Class = 0 p.To.Class = 0 // Rewrite BR/BL to symbol as TYPE_BRANCH. switch p.As { case ABR, ABL, obj.ARET, obj.ADUFFZERO, obj.ADUFFCOPY: if p.To.Sym != nil { p.To.Type = obj.TYPE_BRANCH } } // Rewrite float constants to values stored in memory unless they are +0. switch p.As { case AFMOVS: if p.From.Type == obj.TYPE_FCONST { f32 := float32(p.From.Val.(float64)) i32 := math.Float32bits(f32) if i32 == 0 { // +0 break } literal := fmt.Sprintf("$f32.%08x", i32) s := obj.Linklookup(ctxt, literal, 0) s.Size = 4 p.From.Type = obj.TYPE_MEM p.From.Sym = s p.From.Sym.Local = true p.From.Name = obj.NAME_EXTERN p.From.Offset = 0 } case AFMOVD: if p.From.Type == obj.TYPE_FCONST { i64 := math.Float64bits(p.From.Val.(float64)) if i64 == 0 { // +0 break } literal := fmt.Sprintf("$f64.%016x", i64) s := obj.Linklookup(ctxt, literal, 0) s.Size = 8 p.From.Type = obj.TYPE_MEM p.From.Sym = s p.From.Sym.Local = true p.From.Name = obj.NAME_EXTERN p.From.Offset = 0 } // put constants not loadable by LOAD IMMEDIATE into memory case AMOVD: if p.From.Type == obj.TYPE_CONST { val := p.From.Offset if int64(int32(val)) != val && int64(uint32(val)) != val && int64(uint64(val)&(0xffffffff<<32)) != val { literal := fmt.Sprintf("$i64.%016x", uint64(p.From.Offset)) s := obj.Linklookup(ctxt, literal, 0) s.Size = 8 p.From.Type = obj.TYPE_MEM p.From.Sym = s p.From.Sym.Local = true p.From.Name = obj.NAME_EXTERN p.From.Offset = 0 } } } // Rewrite SUB constants into ADD. switch p.As { case ASUBC: if p.From.Type == obj.TYPE_CONST { p.From.Offset = -p.From.Offset p.As = AADDC } case ASUB: if p.From.Type == obj.TYPE_CONST { p.From.Offset = -p.From.Offset p.As = AADD } } if ctxt.Flag_dynlink { rewriteToUseGot(ctxt, p) } }
func peep(firstp *obj.Prog) { g := gc.Flowstart(firstp, nil) if g == nil { return } gactive = 0 var p *obj.Prog var r *gc.Flow var t obj.As loop1: if gc.Debug['P'] != 0 && gc.Debug['v'] != 0 { gc.Dumpit("loop1", g.Start, 0) } t = 0 for r = g.Start; r != nil; r = r.Link { p = r.Prog // TODO(austin) Handle smaller moves. arm and amd64 // distinguish between moves that moves that *must* // sign/zero extend and moves that don't care so they // can eliminate moves that don't care without // breaking moves that do care. This might let us // simplify or remove the next peep loop, too. if p.As == ppc64.AMOVD || p.As == ppc64.AFMOVD { if regtyp(&p.To) { // Try to eliminate reg->reg moves if regtyp(&p.From) { if p.From.Type == p.To.Type { if copyprop(r) { excise(r) t++ } else if subprop(r) && copyprop(r) { excise(r) t++ } } } // Convert uses to $0 to uses of R0 and // propagate R0 if regzer(&p.From) { if p.To.Type == obj.TYPE_REG { p.From.Type = obj.TYPE_REG p.From.Reg = ppc64.REGZERO if copyprop(r) { excise(r) t++ } else if subprop(r) && copyprop(r) { excise(r) t++ } } } } } } if t != 0 { goto loop1 } /* * look for MOVB x,R; MOVB R,R (for small MOVs not handled above) */ var p1 *obj.Prog var r1 *gc.Flow for r := g.Start; r != nil; r = r.Link { p = r.Prog switch p.As { default: continue case ppc64.AMOVH, ppc64.AMOVHZ, ppc64.AMOVB, ppc64.AMOVBZ, ppc64.AMOVW, ppc64.AMOVWZ: if p.To.Type != obj.TYPE_REG { continue } } r1 = r.Link if r1 == nil { continue } p1 = r1.Prog if p1.As != p.As { continue } if p1.From.Type != obj.TYPE_REG || p1.From.Reg != p.To.Reg { continue } if p1.To.Type != obj.TYPE_REG || p1.To.Reg != p.To.Reg { continue } excise(r1) } if gc.Debug['D'] > 1 { goto ret /* allow following code improvement to be suppressed */ } /* * look for OP x,y,R; CMP R, $0 -> OPCC x,y,R * when OP can set condition codes correctly */ for r := g.Start; r != nil; r = r.Link { p = r.Prog switch p.As { case ppc64.ACMP, ppc64.ACMPW: /* always safe? */ if !regzer(&p.To) { continue } r1 = r.S1 if r1 == nil { continue } switch r1.Prog.As { default: continue /* the conditions can be complex and these are currently little used */ case ppc64.ABCL, ppc64.ABC: continue case ppc64.ABEQ, ppc64.ABGE, ppc64.ABGT, ppc64.ABLE, ppc64.ABLT, ppc64.ABNE, ppc64.ABVC, ppc64.ABVS: break } r1 = r for { r1 = gc.Uniqp(r1) if r1 == nil || r1.Prog.As != obj.ANOP { break } } if r1 == nil { continue } p1 = r1.Prog if p1.To.Type != obj.TYPE_REG || p1.To.Reg != p.From.Reg { continue } switch p1.As { /* irregular instructions */ case ppc64.ASUB, ppc64.AADD, ppc64.AXOR, ppc64.AOR: if p1.From.Type == obj.TYPE_CONST || p1.From.Type == obj.TYPE_ADDR { continue } } switch p1.As { default: continue case ppc64.AMOVW, ppc64.AMOVD: if p1.From.Type != obj.TYPE_REG { continue } continue case ppc64.AANDCC, ppc64.AANDNCC, ppc64.AORCC, ppc64.AORNCC, ppc64.AXORCC, ppc64.ASUBCC, ppc64.ASUBECC, ppc64.ASUBMECC, ppc64.ASUBZECC, ppc64.AADDCC, ppc64.AADDCCC, ppc64.AADDECC, ppc64.AADDMECC, ppc64.AADDZECC, ppc64.ARLWMICC, ppc64.ARLWNMCC, /* don't deal with floating point instructions for now */ /* case AFABS: case AFADD: case AFADDS: case AFCTIW: case AFCTIWZ: case AFDIV: case AFDIVS: case AFMADD: case AFMADDS: case AFMOVD: case AFMSUB: case AFMSUBS: case AFMUL: case AFMULS: case AFNABS: case AFNEG: case AFNMADD: case AFNMADDS: case AFNMSUB: case AFNMSUBS: case AFRSP: case AFSUB: case AFSUBS: case ACNTLZW: case AMTFSB0: case AMTFSB1: */ ppc64.AADD, ppc64.AADDV, ppc64.AADDC, ppc64.AADDCV, ppc64.AADDME, ppc64.AADDMEV, ppc64.AADDE, ppc64.AADDEV, ppc64.AADDZE, ppc64.AADDZEV, ppc64.AAND, ppc64.AANDN, ppc64.ADIVW, ppc64.ADIVWV, ppc64.ADIVWU, ppc64.ADIVWUV, ppc64.ADIVD, ppc64.ADIVDV, ppc64.ADIVDU, ppc64.ADIVDUV, ppc64.AEQV, ppc64.AEXTSB, ppc64.AEXTSH, ppc64.AEXTSW, ppc64.AMULHW, ppc64.AMULHWU, ppc64.AMULLW, ppc64.AMULLWV, ppc64.AMULHD, ppc64.AMULHDU, ppc64.AMULLD, ppc64.AMULLDV, ppc64.ANAND, ppc64.ANEG, ppc64.ANEGV, ppc64.ANOR, ppc64.AOR, ppc64.AORN, ppc64.AREM, ppc64.AREMV, ppc64.AREMU, ppc64.AREMUV, ppc64.AREMD, ppc64.AREMDV, ppc64.AREMDU, ppc64.AREMDUV, ppc64.ARLWMI, ppc64.ARLWNM, ppc64.ASLW, ppc64.ASRAW, ppc64.ASRW, ppc64.ASLD, ppc64.ASRAD, ppc64.ASRD, ppc64.ASUB, ppc64.ASUBV, ppc64.ASUBC, ppc64.ASUBCV, ppc64.ASUBME, ppc64.ASUBMEV, ppc64.ASUBE, ppc64.ASUBEV, ppc64.ASUBZE, ppc64.ASUBZEV, ppc64.AXOR: t = variant2as(p1.As, as2variant(p1.As)|V_CC) } if gc.Debug['D'] != 0 { fmt.Printf("cmp %v; %v -> ", p1, p) } p1.As = t if gc.Debug['D'] != 0 { fmt.Printf("%v\n", p1) } excise(r) continue } } ret: gc.Flowend(g) }
/* // instruction scheduling if(debug['Q'] == 0) return; curtext = nil; q = nil; // p - 1 q1 = firstp; // top of block o = 0; // count of instructions for(p = firstp; p != nil; p = p1) { p1 = p->link; o++; if(p->mark & NOSCHED){ if(q1 != p){ sched(q1, q); } for(; p != nil; p = p->link){ if(!(p->mark & NOSCHED)) break; q = p; } p1 = p; q1 = p; o = 0; continue; } if(p->mark & (LABEL|SYNC)) { if(q1 != p) sched(q1, q); q1 = p; o = 1; } if(p->mark & (BRANCH|SYNC)) { sched(q1, p); q1 = p1; o = 0; } if(o >= NSCHED) { sched(q1, p); q1 = p1; o = 0; } q = p; } */ func stacksplit(ctxt *obj.Link, p *obj.Prog, framesize int32) *obj.Prog { // MOVD g_stackguard(g), R3 p = obj.Appendp(ctxt, p) p.As = AMOVD p.From.Type = obj.TYPE_MEM p.From.Reg = REGG p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 if ctxt.Cursym.Cfunc { p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 } p.To.Type = obj.TYPE_REG p.To.Reg = REG_R3 var q *obj.Prog if framesize <= obj.StackSmall { // small stack: SP < stackguard // CMP stackguard, SP p = obj.Appendp(ctxt, p) p.As = ACMPU p.From.Type = obj.TYPE_REG p.From.Reg = REG_R3 p.To.Type = obj.TYPE_REG p.To.Reg = REGSP } else if framesize <= obj.StackBig { // large stack: SP-framesize < stackguard-StackSmall // ADD $-framesize, SP, R4 // CMP stackguard, R4 p = obj.Appendp(ctxt, p) p.As = AADD p.From.Type = obj.TYPE_CONST p.From.Offset = int64(-framesize) p.Reg = REGSP p.To.Type = obj.TYPE_REG p.To.Reg = REG_R4 p = obj.Appendp(ctxt, p) p.As = ACMPU p.From.Type = obj.TYPE_REG p.From.Reg = REG_R3 p.To.Type = obj.TYPE_REG p.To.Reg = REG_R4 } else { // Such a large stack we need to protect against wraparound. // If SP is close to zero: // SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall) // The +StackGuard on both sides is required to keep the left side positive: // SP is allowed to be slightly below stackguard. See stack.h. // // Preemption sets stackguard to StackPreempt, a very large value. // That breaks the math above, so we have to check for that explicitly. // // stackguard is R3 // CMP R3, $StackPreempt // BEQ label-of-call-to-morestack // ADD $StackGuard, SP, R4 // SUB R3, R4 // MOVD $(framesize+(StackGuard-StackSmall)), R31 // CMPU R31, R4 p = obj.Appendp(ctxt, p) p.As = ACMP p.From.Type = obj.TYPE_REG p.From.Reg = REG_R3 p.To.Type = obj.TYPE_CONST p.To.Offset = obj.StackPreempt p = obj.Appendp(ctxt, p) q = p p.As = ABEQ p.To.Type = obj.TYPE_BRANCH p = obj.Appendp(ctxt, p) p.As = AADD p.From.Type = obj.TYPE_CONST p.From.Offset = obj.StackGuard p.Reg = REGSP p.To.Type = obj.TYPE_REG p.To.Reg = REG_R4 p = obj.Appendp(ctxt, p) p.As = ASUB p.From.Type = obj.TYPE_REG p.From.Reg = REG_R3 p.To.Type = obj.TYPE_REG p.To.Reg = REG_R4 p = obj.Appendp(ctxt, p) p.As = AMOVD p.From.Type = obj.TYPE_CONST p.From.Offset = int64(framesize) + obj.StackGuard - obj.StackSmall p.To.Type = obj.TYPE_REG p.To.Reg = REGTMP p = obj.Appendp(ctxt, p) p.As = ACMPU p.From.Type = obj.TYPE_REG p.From.Reg = REGTMP p.To.Type = obj.TYPE_REG p.To.Reg = REG_R4 } // q1: BLT done p = obj.Appendp(ctxt, p) q1 := p p.As = ABLT p.To.Type = obj.TYPE_BRANCH // MOVD LR, R5 p = obj.Appendp(ctxt, p) p.As = AMOVD p.From.Type = obj.TYPE_REG p.From.Reg = REG_LR p.To.Type = obj.TYPE_REG p.To.Reg = REG_R5 if q != nil { q.Pcond = p } var morestacksym *obj.LSym if ctxt.Cursym.Cfunc { morestacksym = obj.Linklookup(ctxt, "runtime.morestackc", 0) } else if ctxt.Cursym.Text.From3.Offset&obj.NEEDCTXT == 0 { morestacksym = obj.Linklookup(ctxt, "runtime.morestack_noctxt", 0) } else { morestacksym = obj.Linklookup(ctxt, "runtime.morestack", 0) } if ctxt.Flag_dynlink { // Avoid calling morestack via a PLT when dynamically linking. The // PLT stubs generated by the system linker on ppc64le when "std r2, // 24(r1)" to save the TOC pointer in their callers stack // frame. Unfortunately (and necessarily) morestack is called before // the function that calls it sets up its frame and so the PLT ends // up smashing the saved TOC pointer for its caller's caller. // // According to the ABI documentation there is a mechanism to avoid // the TOC save that the PLT stub does (put a R_PPC64_TOCSAVE // relocation on the nop after the call to morestack) but at the time // of writing it is not supported at all by gold and my attempt to // use it with ld.bfd caused an internal linker error. So this hack // seems preferable. // MOVD $runtime.morestack(SB), R12 p = obj.Appendp(ctxt, p) p.As = AMOVD p.From.Type = obj.TYPE_MEM p.From.Sym = morestacksym p.From.Name = obj.NAME_GOTREF p.To.Type = obj.TYPE_REG p.To.Reg = REG_R12 // MOVD R12, CTR p = obj.Appendp(ctxt, p) p.As = AMOVD p.From.Type = obj.TYPE_REG p.From.Reg = REG_R12 p.To.Type = obj.TYPE_REG p.To.Reg = REG_CTR // BL CTR p = obj.Appendp(ctxt, p) p.As = obj.ACALL p.From.Type = obj.TYPE_REG p.From.Reg = REG_R12 p.To.Type = obj.TYPE_REG p.To.Reg = REG_CTR } else { // BL runtime.morestack(SB) p = obj.Appendp(ctxt, p) p.As = ABL p.To.Type = obj.TYPE_BRANCH p.To.Sym = morestacksym } // BR start p = obj.Appendp(ctxt, p) p.As = ABR p.To.Type = obj.TYPE_BRANCH p.Pcond = ctxt.Cursym.Text.Link // placeholder for q1's jump target p = obj.Appendp(ctxt, p) p.As = obj.ANOP // zero-width place holder q1.Pcond = p return p }