func zerorange(p *obj.Prog, frame int64, lo int64, hi int64) *obj.Prog { cnt := hi - lo if cnt == 0 { return p } if cnt < int64(4*gc.Widthptr) { for i := int64(0); i < cnt; i += int64(gc.Widthptr) { p = appendpp(p, ppc64.AMOVD, obj.TYPE_REG, ppc64.REGZERO, 0, obj.TYPE_MEM, ppc64.REGSP, gc.Ctxt.FixedFrameSize()+frame+lo+i) } } else if cnt <= int64(128*gc.Widthptr) { p = appendpp(p, ppc64.AADD, obj.TYPE_CONST, 0, gc.Ctxt.FixedFrameSize()+frame+lo-8, obj.TYPE_REG, ppc64.REGRT1, 0) p.Reg = ppc64.REGSP p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0) f := gc.Sysfunc("duffzero") gc.Naddr(&p.To, f) gc.Afunclit(&p.To, f) p.To.Offset = 4 * (128 - cnt/int64(gc.Widthptr)) } else { p = appendpp(p, ppc64.AMOVD, obj.TYPE_CONST, 0, gc.Ctxt.FixedFrameSize()+frame+lo-8, obj.TYPE_REG, ppc64.REGTMP, 0) p = appendpp(p, ppc64.AADD, obj.TYPE_REG, ppc64.REGTMP, 0, obj.TYPE_REG, ppc64.REGRT1, 0) p.Reg = ppc64.REGSP p = appendpp(p, ppc64.AMOVD, obj.TYPE_CONST, 0, cnt, obj.TYPE_REG, ppc64.REGTMP, 0) p = appendpp(p, ppc64.AADD, obj.TYPE_REG, ppc64.REGTMP, 0, obj.TYPE_REG, ppc64.REGRT2, 0) p.Reg = ppc64.REGRT1 p = appendpp(p, ppc64.AMOVDU, obj.TYPE_REG, ppc64.REGZERO, 0, obj.TYPE_MEM, ppc64.REGRT1, int64(gc.Widthptr)) p1 := p p = appendpp(p, ppc64.ACMP, obj.TYPE_REG, ppc64.REGRT1, 0, obj.TYPE_REG, ppc64.REGRT2, 0) p = appendpp(p, ppc64.ABNE, obj.TYPE_NONE, 0, 0, obj.TYPE_BRANCH, 0, 0) gc.Patch(p, p1) } return p }
/* * insert n into reg slot of p */ func raddr(n *gc.Node, p *obj.Prog) { var a obj.Addr gc.Naddr(&a, n) if a.Type != obj.TYPE_REG { if n != nil { gc.Fatalf("bad in raddr: %v", n.Op) } else { gc.Fatalf("bad in raddr: <null>") } p.Reg = 0 } else { p.Reg = a.Reg } }
func zerorange(p *obj.Prog, frame int64, lo int64, hi int64) *obj.Prog { cnt := hi - lo if cnt == 0 { return p } if cnt < int64(4*gc.Widthptr) { for i := int64(0); i < cnt; i += int64(gc.Widthptr) { p = appendpp(p, mips.AMOVV, obj.TYPE_REG, mips.REGZERO, 0, obj.TYPE_MEM, mips.REGSP, 8+frame+lo+i) } // TODO(dfc): https://golang.org/issue/12108 // If DUFFZERO is used inside a tail call (see genwrapper) it will // overwrite the link register. } else if false && cnt <= int64(128*gc.Widthptr) { p = appendpp(p, mips.AADDV, obj.TYPE_CONST, 0, 8+frame+lo-8, obj.TYPE_REG, mips.REGRT1, 0) p.Reg = mips.REGSP p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0) f := gc.Sysfunc("duffzero") gc.Naddr(&p.To, f) gc.Afunclit(&p.To, f) p.To.Offset = 8 * (128 - cnt/int64(gc.Widthptr)) } else { // ADDV $(8+frame+lo-8), SP, r1 // ADDV $cnt, r1, r2 // loop: // MOVV R0, (Widthptr)r1 // ADDV $Widthptr, r1 // BNE r1, r2, loop p = appendpp(p, mips.AADDV, obj.TYPE_CONST, 0, 8+frame+lo-8, obj.TYPE_REG, mips.REGRT1, 0) p.Reg = mips.REGSP p = appendpp(p, mips.AADDV, obj.TYPE_CONST, 0, cnt, obj.TYPE_REG, mips.REGRT2, 0) p.Reg = mips.REGRT1 p = appendpp(p, mips.AMOVV, obj.TYPE_REG, mips.REGZERO, 0, obj.TYPE_MEM, mips.REGRT1, int64(gc.Widthptr)) p1 := p p = appendpp(p, mips.AADDV, obj.TYPE_CONST, 0, int64(gc.Widthptr), obj.TYPE_REG, mips.REGRT1, 0) p = appendpp(p, mips.ABNE, obj.TYPE_REG, mips.REGRT1, 0, obj.TYPE_BRANCH, 0, 0) p.Reg = mips.REGRT2 gc.Patch(p, p1) } return p }
func zerorange(p *obj.Prog, frame int64, lo int64, hi int64, r0 *uint32) *obj.Prog { cnt := hi - lo if cnt == 0 { return p } if *r0 == 0 { p = appendpp(p, arm.AMOVW, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, arm.REG_R0, 0) *r0 = 1 } if cnt < int64(4*gc.Widthptr) { for i := int64(0); i < cnt; i += int64(gc.Widthptr) { p = appendpp(p, arm.AMOVW, obj.TYPE_REG, arm.REG_R0, 0, obj.TYPE_MEM, arm.REGSP, int32(4+frame+lo+i)) } } else if !gc.Nacl && (cnt <= int64(128*gc.Widthptr)) { p = appendpp(p, arm.AADD, obj.TYPE_CONST, 0, int32(4+frame+lo), obj.TYPE_REG, arm.REG_R1, 0) p.Reg = arm.REGSP p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0) f := gc.Sysfunc("duffzero") gc.Naddr(&p.To, f) gc.Afunclit(&p.To, f) p.To.Offset = 4 * (128 - cnt/int64(gc.Widthptr)) } else { p = appendpp(p, arm.AADD, obj.TYPE_CONST, 0, int32(4+frame+lo), obj.TYPE_REG, arm.REG_R1, 0) p.Reg = arm.REGSP p = appendpp(p, arm.AADD, obj.TYPE_CONST, 0, int32(cnt), obj.TYPE_REG, arm.REG_R2, 0) p.Reg = arm.REG_R1 p = appendpp(p, arm.AMOVW, obj.TYPE_REG, arm.REG_R0, 0, obj.TYPE_MEM, arm.REG_R1, 4) p1 := p p.Scond |= arm.C_PBIT p = appendpp(p, arm.ACMP, obj.TYPE_REG, arm.REG_R1, 0, obj.TYPE_NONE, 0, 0) p.Reg = arm.REG_R2 p = appendpp(p, arm.ABNE, obj.TYPE_NONE, 0, 0, obj.TYPE_BRANCH, 0, 0) gc.Patch(p, p1) } return p }
func zerorange(p *obj.Prog, frame int64, lo int64, hi int64) *obj.Prog { cnt := hi - lo if cnt == 0 { return p } if cnt < int64(4*gc.Widthptr) { for i := int64(0); i < cnt; i += int64(gc.Widthptr) { p = appendpp(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGZERO, 0, obj.TYPE_MEM, arm64.REGSP, 8+frame+lo+i) } } else if cnt <= int64(128*gc.Widthptr) && !darwin { // darwin ld64 cannot handle BR26 reloc with non-zero addend p = appendpp(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGSP, 0, obj.TYPE_REG, arm64.REGRT1, 0) p = appendpp(p, arm64.AADD, obj.TYPE_CONST, 0, 8+frame+lo-8, obj.TYPE_REG, arm64.REGRT1, 0) p.Reg = arm64.REGRT1 p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0) f := gc.Sysfunc("duffzero") gc.Naddr(&p.To, f) gc.Afunclit(&p.To, f) p.To.Offset = 4 * (128 - cnt/int64(gc.Widthptr)) } else { p = appendpp(p, arm64.AMOVD, obj.TYPE_CONST, 0, 8+frame+lo-8, obj.TYPE_REG, arm64.REGTMP, 0) p = appendpp(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGSP, 0, obj.TYPE_REG, arm64.REGRT1, 0) p = appendpp(p, arm64.AADD, obj.TYPE_REG, arm64.REGTMP, 0, obj.TYPE_REG, arm64.REGRT1, 0) p.Reg = arm64.REGRT1 p = appendpp(p, arm64.AMOVD, obj.TYPE_CONST, 0, cnt, obj.TYPE_REG, arm64.REGTMP, 0) p = appendpp(p, arm64.AADD, obj.TYPE_REG, arm64.REGTMP, 0, obj.TYPE_REG, arm64.REGRT2, 0) p.Reg = arm64.REGRT1 p = appendpp(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGZERO, 0, obj.TYPE_MEM, arm64.REGRT1, int64(gc.Widthptr)) p.Scond = arm64.C_XPRE p1 := p p = appendpp(p, arm64.ACMP, obj.TYPE_REG, arm64.REGRT1, 0, obj.TYPE_NONE, 0, 0) p.Reg = arm64.REGRT2 p = appendpp(p, arm64.ABNE, obj.TYPE_NONE, 0, 0, obj.TYPE_BRANCH, 0, 0) gc.Patch(p, p1) } return p }
/* * generate high multiply * res = (nl * nr) >> wordsize */ func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) { if nl.Ullman < nr.Ullman { nl, nr = nr, nl } t := nl.Type w := t.Width * 8 var n1 gc.Node gc.Regalloc(&n1, t, res) gc.Cgen(nl, &n1) var n2 gc.Node gc.Regalloc(&n2, t, nil) gc.Cgen(nr, &n2) switch gc.Simtype[t.Etype] { case gc.TINT8, gc.TINT16: gins(optoas(gc.OMUL, t), &n2, &n1) gshift(arm.AMOVW, &n1, arm.SHIFT_AR, int32(w), &n1) case gc.TUINT8, gc.TUINT16: gins(optoas(gc.OMUL, t), &n2, &n1) gshift(arm.AMOVW, &n1, arm.SHIFT_LR, int32(w), &n1) // perform a long multiplication. case gc.TINT32, gc.TUINT32: var p *obj.Prog if t.IsSigned() { p = gins(arm.AMULL, &n2, nil) } else { p = gins(arm.AMULLU, &n2, nil) } // n2 * n1 -> (n1 n2) p.Reg = n1.Reg p.To.Type = obj.TYPE_REGREG p.To.Reg = n1.Reg p.To.Offset = int64(n2.Reg) default: gc.Fatalf("cgen_hmul %v", t) } gc.Cgen(&n1, res) gc.Regfree(&n1) gc.Regfree(&n2) }
func stacksplit(ctxt *obj.Link, p *obj.Prog, framesize int32) *obj.Prog { // MOVW g_stackguard(g), R1 p = obj.Appendp(ctxt, p) p.As = AMOVW p.From.Type = obj.TYPE_MEM p.From.Reg = REGG p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 if ctxt.Cursym.Cfunc { p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 } p.To.Type = obj.TYPE_REG p.To.Reg = REG_R1 if framesize <= obj.StackSmall { // small stack: SP < stackguard // CMP stackguard, SP p = obj.Appendp(ctxt, p) p.As = ACMP p.From.Type = obj.TYPE_REG p.From.Reg = REG_R1 p.Reg = REGSP } else if framesize <= obj.StackBig { // large stack: SP-framesize < stackguard-StackSmall // MOVW $-framesize(SP), R2 // CMP stackguard, R2 p = obj.Appendp(ctxt, p) p.As = AMOVW p.From.Type = obj.TYPE_ADDR p.From.Reg = REGSP p.From.Offset = int64(-framesize) p.To.Type = obj.TYPE_REG p.To.Reg = REG_R2 p = obj.Appendp(ctxt, p) p.As = ACMP p.From.Type = obj.TYPE_REG p.From.Reg = REG_R1 p.Reg = REG_R2 } else { // Such a large stack we need to protect against wraparound // if SP is close to zero. // SP-stackguard+StackGuard < framesize + (StackGuard-StackSmall) // The +StackGuard on both sides is required to keep the left side positive: // SP is allowed to be slightly below stackguard. See stack.h. // CMP $StackPreempt, R1 // MOVW.NE $StackGuard(SP), R2 // SUB.NE R1, R2 // MOVW.NE $(framesize+(StackGuard-StackSmall)), R3 // CMP.NE R3, R2 p = obj.Appendp(ctxt, p) p.As = ACMP p.From.Type = obj.TYPE_CONST p.From.Offset = int64(uint32(obj.StackPreempt & (1<<32 - 1))) p.Reg = REG_R1 p = obj.Appendp(ctxt, p) p.As = AMOVW p.From.Type = obj.TYPE_ADDR p.From.Reg = REGSP p.From.Offset = obj.StackGuard p.To.Type = obj.TYPE_REG p.To.Reg = REG_R2 p.Scond = C_SCOND_NE p = obj.Appendp(ctxt, p) p.As = ASUB p.From.Type = obj.TYPE_REG p.From.Reg = REG_R1 p.To.Type = obj.TYPE_REG p.To.Reg = REG_R2 p.Scond = C_SCOND_NE p = obj.Appendp(ctxt, p) p.As = AMOVW p.From.Type = obj.TYPE_ADDR p.From.Offset = int64(framesize) + (obj.StackGuard - obj.StackSmall) p.To.Type = obj.TYPE_REG p.To.Reg = REG_R3 p.Scond = C_SCOND_NE p = obj.Appendp(ctxt, p) p.As = ACMP p.From.Type = obj.TYPE_REG p.From.Reg = REG_R3 p.Reg = REG_R2 p.Scond = C_SCOND_NE } // BLS call-to-morestack bls := obj.Appendp(ctxt, p) bls.As = ABLS bls.To.Type = obj.TYPE_BRANCH var last *obj.Prog for last = ctxt.Cursym.Text; last.Link != nil; last = last.Link { } spfix := obj.Appendp(ctxt, last) spfix.As = obj.ANOP spfix.Spadj = -framesize // MOVW LR, R3 movw := obj.Appendp(ctxt, spfix) movw.As = AMOVW movw.From.Type = obj.TYPE_REG movw.From.Reg = REGLINK movw.To.Type = obj.TYPE_REG movw.To.Reg = REG_R3 bls.Pcond = movw // BL runtime.morestack call := obj.Appendp(ctxt, movw) call.As = obj.ACALL call.To.Type = obj.TYPE_BRANCH morestack := "runtime.morestack" switch { case ctxt.Cursym.Cfunc: morestack = "runtime.morestackc" case ctxt.Cursym.Text.From3.Offset&obj.NEEDCTXT == 0: morestack = "runtime.morestack_noctxt" } call.To.Sym = obj.Linklookup(ctxt, morestack, 0) // B start b := obj.Appendp(ctxt, call) b.As = obj.AJMP b.To.Type = obj.TYPE_BRANCH b.Pcond = ctxt.Cursym.Text.Link b.Spadj = +framesize return bls }
/* // instruction scheduling if(debug['Q'] == 0) return; curtext = nil; q = nil; // p - 1 q1 = firstp; // top of block o = 0; // count of instructions for(p = firstp; p != nil; p = p1) { p1 = p->link; o++; if(p->mark & NOSCHED){ if(q1 != p){ sched(q1, q); } for(; p != nil; p = p->link){ if(!(p->mark & NOSCHED)) break; q = p; } p1 = p; q1 = p; o = 0; continue; } if(p->mark & (LABEL|SYNC)) { if(q1 != p) sched(q1, q); q1 = p; o = 1; } if(p->mark & (BRANCH|SYNC)) { sched(q1, p); q1 = p1; o = 0; } if(o >= NSCHED) { sched(q1, p); q1 = p1; o = 0; } q = p; } */ func stacksplitPre(ctxt *obj.Link, p *obj.Prog, framesize int32) (*obj.Prog, *obj.Prog) { var q *obj.Prog // MOVD g_stackguard(g), R3 p = obj.Appendp(ctxt, p) p.As = AMOVD p.From.Type = obj.TYPE_MEM p.From.Reg = REGG p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 if ctxt.Cursym.Cfunc { p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 } p.To.Type = obj.TYPE_REG p.To.Reg = REG_R3 q = nil if framesize <= obj.StackSmall { // small stack: SP < stackguard // CMP stackguard, SP //p.To.Type = obj.TYPE_REG //p.To.Reg = REGSP // q1: BLT done p = obj.Appendp(ctxt, p) //q1 = p p.From.Type = obj.TYPE_REG p.From.Reg = REG_R3 p.Reg = REGSP p.As = ACMPUBGE p.To.Type = obj.TYPE_BRANCH //p = obj.Appendp(ctxt, p) //p.As = ACMPU //p.From.Type = obj.TYPE_REG //p.From.Reg = REG_R3 //p.To.Type = obj.TYPE_REG //p.To.Reg = REGSP //p = obj.Appendp(ctxt, p) //p.As = ABGE //p.To.Type = obj.TYPE_BRANCH } else if framesize <= obj.StackBig { // large stack: SP-framesize < stackguard-StackSmall // ADD $-framesize, SP, R4 // CMP stackguard, R4 p = obj.Appendp(ctxt, p) p.As = AADD p.From.Type = obj.TYPE_CONST p.From.Offset = int64(-framesize) p.Reg = REGSP p.To.Type = obj.TYPE_REG p.To.Reg = REG_R4 p = obj.Appendp(ctxt, p) p.From.Type = obj.TYPE_REG p.From.Reg = REG_R3 p.Reg = REG_R4 p.As = ACMPUBGE p.To.Type = obj.TYPE_BRANCH } else { // Such a large stack we need to protect against wraparound. // If SP is close to zero: // SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall) // The +StackGuard on both sides is required to keep the left side positive: // SP is allowed to be slightly below stackguard. See stack.h. // // Preemption sets stackguard to StackPreempt, a very large value. // That breaks the math above, so we have to check for that explicitly. // // stackguard is R3 // CMP R3, $StackPreempt // BEQ label-of-call-to-morestack // ADD $StackGuard, SP, R4 // SUB R3, R4 // MOVD $(framesize+(StackGuard-StackSmall)), TEMP // CMPUBGE TEMP, R4 p = obj.Appendp(ctxt, p) p.As = ACMP p.From.Type = obj.TYPE_REG p.From.Reg = REG_R3 p.To.Type = obj.TYPE_CONST p.To.Offset = obj.StackPreempt p = obj.Appendp(ctxt, p) q = p p.As = ABEQ p.To.Type = obj.TYPE_BRANCH p = obj.Appendp(ctxt, p) p.As = AADD p.From.Type = obj.TYPE_CONST p.From.Offset = obj.StackGuard p.Reg = REGSP p.To.Type = obj.TYPE_REG p.To.Reg = REG_R4 p = obj.Appendp(ctxt, p) p.As = ASUB p.From.Type = obj.TYPE_REG p.From.Reg = REG_R3 p.To.Type = obj.TYPE_REG p.To.Reg = REG_R4 p = obj.Appendp(ctxt, p) p.As = AMOVD p.From.Type = obj.TYPE_CONST p.From.Offset = int64(framesize) + obj.StackGuard - obj.StackSmall p.To.Type = obj.TYPE_REG p.To.Reg = REGTMP p = obj.Appendp(ctxt, p) p.From.Type = obj.TYPE_REG p.From.Reg = REGTMP p.Reg = REG_R4 p.As = ACMPUBGE p.To.Type = obj.TYPE_BRANCH } return p, q }
func preprocess(ctxt *obj.Link, cursym *obj.LSym) { // TODO(minux): add morestack short-cuts with small fixed frame-size. ctxt.Cursym = cursym if cursym.Text == nil || cursym.Text.Link == nil { return } p := cursym.Text textstksiz := p.To.Offset if textstksiz == -8 { // Compatibility hack. p.From3.Offset |= obj.NOFRAME textstksiz = 0 } if textstksiz%8 != 0 { ctxt.Diag("frame size %d not a multiple of 8", textstksiz) } if p.From3.Offset&obj.NOFRAME != 0 { if textstksiz != 0 { ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", textstksiz) } } cursym.Args = p.To.Val.(int32) cursym.Locals = int32(textstksiz) /* * find leaf subroutines * strip NOPs * expand RET * expand BECOME pseudo */ if ctxt.Debugvlog != 0 { fmt.Fprintf(ctxt.Bso, "%5.2f noops\n", obj.Cputime()) } ctxt.Bso.Flush() var q *obj.Prog var q1 *obj.Prog for p := cursym.Text; p != nil; p = p.Link { switch p.As { /* too hard, just leave alone */ case obj.ATEXT: q = p p.Mark |= LABEL | LEAF | SYNC if p.Link != nil { p.Link.Mark |= LABEL } case ANOR: q = p if p.To.Type == obj.TYPE_REG { if p.To.Reg == REGZERO { p.Mark |= LABEL | SYNC } } case ASYNC, AWORD: q = p p.Mark |= LABEL | SYNC continue case AMOVW, AMOVWZ, AMOVD: q = p if p.From.Reg >= REG_RESERVED || p.To.Reg >= REG_RESERVED { p.Mark |= LABEL | SYNC } continue case AFABS, AFADD, AFDIV, AFMADD, AFMOVD, AFMOVS, AFMSUB, AFMUL, AFNABS, AFNEG, AFNMADD, AFNMSUB, ALEDBR, ALDEBR, AFSUB: q = p p.Mark |= FLOAT continue case ABL, ABCL, obj.ADUFFZERO, obj.ADUFFCOPY: cursym.Text.Mark &^= LEAF fallthrough case ABC, ABEQ, ABGE, ABGT, ABLE, ABLT, ABNE, ABR, ABVC, ABVS, ACMPBEQ, ACMPBGE, ACMPBGT, ACMPBLE, ACMPBLT, ACMPBNE, ACMPUBEQ, ACMPUBGE, ACMPUBGT, ACMPUBLE, ACMPUBLT, ACMPUBNE: p.Mark |= BRANCH q = p q1 = p.Pcond if q1 != nil { for q1.As == obj.ANOP { q1 = q1.Link p.Pcond = q1 } if q1.Mark&LEAF == 0 { q1.Mark |= LABEL } } else { p.Mark |= LABEL } q1 = p.Link if q1 != nil { q1.Mark |= LABEL } continue case AFCMPO, AFCMPU: q = p p.Mark |= FCMP | FLOAT continue case obj.ARET: q = p if p.Link != nil { p.Link.Mark |= LABEL } continue case obj.ANOP: q1 = p.Link q.Link = q1 /* q is non-nop */ q1.Mark |= p.Mark continue default: q = p continue } } autosize := int32(0) var p1 *obj.Prog var p2 *obj.Prog var pLast *obj.Prog var pPre *obj.Prog var pPreempt *obj.Prog wasSplit := false for p := cursym.Text; p != nil; p = p.Link { pLast = p switch p.As { case obj.ATEXT: autosize = int32(textstksiz) if p.Mark&LEAF != 0 && autosize == 0 && p.From3.Offset&obj.NOFRAME == 0 { // A leaf function with no locals has no frame. p.From3.Offset |= obj.NOFRAME } if p.From3.Offset&obj.NOFRAME == 0 { // If there is a stack frame at all, it includes // space to save the LR. autosize += int32(ctxt.FixedFrameSize()) } p.To.Offset = int64(autosize) q = p if p.From3.Offset&obj.NOSPLIT == 0 { p, pPreempt = stacksplitPre(ctxt, p, autosize) // emit pre part of split check pPre = p wasSplit = true //need post part of split } if autosize != 0 { q = obj.Appendp(ctxt, p) q.As = AMOVD q.From.Type = obj.TYPE_ADDR q.From.Offset = int64(-autosize) q.From.Reg = REGSP // not actually needed - REGSP is assumed if no reg is provided q.To.Type = obj.TYPE_REG q.To.Reg = REGSP q.Spadj = autosize } else if cursym.Text.Mark&LEAF == 0 { // A very few functions that do not return to their caller // (e.g. gogo) are not identified as leaves but still have // no frame. cursym.Text.Mark |= LEAF } if cursym.Text.Mark&LEAF != 0 { cursym.Leaf = true break } q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_REG q.From.Reg = REG_LR q.To.Type = obj.TYPE_MEM q.To.Reg = REGSP q.To.Offset = 0 if cursym.Text.From3.Offset&obj.WRAPPER != 0 { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOVD g_panic(g), R3 // CMP R0, R3 // BEQ end // MOVD panic_argp(R3), R4 // ADD $(autosize+8), R1, R5 // CMP R4, R5 // BNE end // ADD $8, R1, R6 // MOVD R6, panic_argp(R3) // end: // NOP // // The NOP is needed to give the jumps somewhere to land. // It is a liblink NOP, not a s390x NOP: it encodes to 0 instruction bytes. q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_MEM q.From.Reg = REGG q.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // G.panic q.To.Type = obj.TYPE_REG q.To.Reg = REG_R3 q = obj.Appendp(ctxt, q) q.As = ACMP q.From.Type = obj.TYPE_REG q.From.Reg = REG_R0 q.To.Type = obj.TYPE_REG q.To.Reg = REG_R3 q = obj.Appendp(ctxt, q) q.As = ABEQ q.To.Type = obj.TYPE_BRANCH p1 = q q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_MEM q.From.Reg = REG_R3 q.From.Offset = 0 // Panic.argp q.To.Type = obj.TYPE_REG q.To.Reg = REG_R4 q = obj.Appendp(ctxt, q) q.As = AADD q.From.Type = obj.TYPE_CONST q.From.Offset = int64(autosize) + ctxt.FixedFrameSize() q.Reg = REGSP q.To.Type = obj.TYPE_REG q.To.Reg = REG_R5 q = obj.Appendp(ctxt, q) q.As = ACMP q.From.Type = obj.TYPE_REG q.From.Reg = REG_R4 q.To.Type = obj.TYPE_REG q.To.Reg = REG_R5 q = obj.Appendp(ctxt, q) q.As = ABNE q.To.Type = obj.TYPE_BRANCH p2 = q q = obj.Appendp(ctxt, q) q.As = AADD q.From.Type = obj.TYPE_CONST q.From.Offset = ctxt.FixedFrameSize() q.Reg = REGSP q.To.Type = obj.TYPE_REG q.To.Reg = REG_R6 q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_REG q.From.Reg = REG_R6 q.To.Type = obj.TYPE_MEM q.To.Reg = REG_R3 q.To.Offset = 0 // Panic.argp q = obj.Appendp(ctxt, q) q.As = obj.ANOP p1.Pcond = q p2.Pcond = q } case obj.ARET: if p.From.Type == obj.TYPE_CONST { ctxt.Diag("using BECOME (%v) is not supported!", p) break } retTarget := p.To.Sym if cursym.Text.Mark&LEAF != 0 { if autosize == 0 { p.As = ABR p.From = obj.Addr{} if retTarget == nil { p.To.Type = obj.TYPE_REG p.To.Reg = REG_LR } else { p.To.Type = obj.TYPE_BRANCH p.To.Sym = retTarget } p.Mark |= BRANCH break } p.As = AADD p.From.Type = obj.TYPE_CONST p.From.Offset = int64(autosize) p.To.Type = obj.TYPE_REG p.To.Reg = REGSP p.Spadj = -autosize q = obj.Appendp(ctxt, p) q.As = ABR q.From = obj.Addr{} q.To.Type = obj.TYPE_REG q.To.Reg = REG_LR q.Mark |= BRANCH q.Spadj = autosize break } p.As = AMOVD p.From.Type = obj.TYPE_MEM p.From.Reg = REGSP p.From.Offset = 0 p.To.Type = obj.TYPE_REG p.To.Reg = REG_LR q = p if autosize != 0 { q = obj.Appendp(ctxt, q) q.As = AADD q.From.Type = obj.TYPE_CONST q.From.Offset = int64(autosize) q.To.Type = obj.TYPE_REG q.To.Reg = REGSP q.Spadj = -autosize } q = obj.Appendp(ctxt, q) q.As = ABR q.From = obj.Addr{} if retTarget == nil { q.To.Type = obj.TYPE_REG q.To.Reg = REG_LR } else { q.To.Type = obj.TYPE_BRANCH q.To.Sym = retTarget } q.Mark |= BRANCH q.Spadj = autosize case AADD: if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.From.Type == obj.TYPE_CONST { p.Spadj = int32(-p.From.Offset) } } } if wasSplit { pLast = stacksplitPost(ctxt, pLast, pPre, pPreempt) // emit post part of split check } }
func stacksplit(ctxt *obj.Link, p *obj.Prog, framesize int32) *obj.Prog { // MOV g_stackguard(g), R1 p = obj.Appendp(ctxt, p) p.As = AMOVD p.From.Type = obj.TYPE_MEM p.From.Reg = REGG p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 if ctxt.Cursym.Cfunc { p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 } p.To.Type = obj.TYPE_REG p.To.Reg = REG_R1 q := (*obj.Prog)(nil) if framesize <= obj.StackSmall { // small stack: SP < stackguard // MOV SP, R2 // CMP stackguard, R2 p = obj.Appendp(ctxt, p) p.As = AMOVD p.From.Type = obj.TYPE_REG p.From.Reg = REGSP p.To.Type = obj.TYPE_REG p.To.Reg = REG_R2 p = obj.Appendp(ctxt, p) p.As = ACMP p.From.Type = obj.TYPE_REG p.From.Reg = REG_R1 p.Reg = REG_R2 } else if framesize <= obj.StackBig { // large stack: SP-framesize < stackguard-StackSmall // SUB $framesize, SP, R2 // CMP stackguard, R2 p = obj.Appendp(ctxt, p) p.As = ASUB p.From.Type = obj.TYPE_CONST p.From.Offset = int64(framesize) p.Reg = REGSP p.To.Type = obj.TYPE_REG p.To.Reg = REG_R2 p = obj.Appendp(ctxt, p) p.As = ACMP p.From.Type = obj.TYPE_REG p.From.Reg = REG_R1 p.Reg = REG_R2 } else { // Such a large stack we need to protect against wraparound // if SP is close to zero. // SP-stackguard+StackGuard < framesize + (StackGuard-StackSmall) // The +StackGuard on both sides is required to keep the left side positive: // SP is allowed to be slightly below stackguard. See stack.h. // CMP $StackPreempt, R1 // BEQ label_of_call_to_morestack // ADD $StackGuard, SP, R2 // SUB R1, R2 // MOV $(framesize+(StackGuard-StackSmall)), R3 // CMP R3, R2 p = obj.Appendp(ctxt, p) p.As = ACMP p.From.Type = obj.TYPE_CONST p.From.Offset = obj.StackPreempt p.Reg = REG_R1 p = obj.Appendp(ctxt, p) q = p p.As = ABEQ p.To.Type = obj.TYPE_BRANCH p = obj.Appendp(ctxt, p) p.As = AADD p.From.Type = obj.TYPE_CONST p.From.Offset = obj.StackGuard p.Reg = REGSP p.To.Type = obj.TYPE_REG p.To.Reg = REG_R2 p = obj.Appendp(ctxt, p) p.As = ASUB p.From.Type = obj.TYPE_REG p.From.Reg = REG_R1 p.To.Type = obj.TYPE_REG p.To.Reg = REG_R2 p = obj.Appendp(ctxt, p) p.As = AMOVD p.From.Type = obj.TYPE_CONST p.From.Offset = int64(framesize) + (obj.StackGuard - obj.StackSmall) p.To.Type = obj.TYPE_REG p.To.Reg = REG_R3 p = obj.Appendp(ctxt, p) p.As = ACMP p.From.Type = obj.TYPE_REG p.From.Reg = REG_R3 p.Reg = REG_R2 } // BLS do-morestack bls := obj.Appendp(ctxt, p) bls.As = ABLS bls.To.Type = obj.TYPE_BRANCH var last *obj.Prog for last = ctxt.Cursym.Text; last.Link != nil; last = last.Link { } spfix := obj.Appendp(ctxt, last) spfix.As = obj.ANOP spfix.Spadj = -framesize // MOV LR, R3 movlr := obj.Appendp(ctxt, spfix) movlr.As = AMOVD movlr.From.Type = obj.TYPE_REG movlr.From.Reg = REGLINK movlr.To.Type = obj.TYPE_REG movlr.To.Reg = REG_R3 if q != nil { q.Pcond = movlr } bls.Pcond = movlr debug := movlr if false { debug = obj.Appendp(ctxt, debug) debug.As = AMOVD debug.From.Type = obj.TYPE_CONST debug.From.Offset = int64(framesize) debug.To.Type = obj.TYPE_REG debug.To.Reg = REGTMP } // BL runtime.morestack(SB) call := obj.Appendp(ctxt, debug) call.As = ABL call.To.Type = obj.TYPE_BRANCH morestack := "runtime.morestack" switch { case ctxt.Cursym.Cfunc: morestack = "runtime.morestackc" case ctxt.Cursym.Text.From3.Offset&obj.NEEDCTXT == 0: morestack = "runtime.morestack_noctxt" } call.To.Sym = obj.Linklookup(ctxt, morestack, 0) // B start jmp := obj.Appendp(ctxt, call) jmp.As = AB jmp.To.Type = obj.TYPE_BRANCH jmp.Pcond = ctxt.Cursym.Text.Link jmp.Spadj = +framesize // placeholder for bls's jump target // p = obj.Appendp(ctxt, p) // p.As = obj.ANOP return bls }
// If s==nil, copyu returns the set/use of v in p; otherwise, it // modifies p to replace reads of v with reads of s and returns 0 for // success or non-zero for failure. // // If s==nil, copy returns one of the following values: // 1 if v only used // 2 if v is set and used in one address (read-alter-rewrite; // can't substitute) // 3 if v is only set // 4 if v is set in one address and used in another (so addresses // can be rewritten independently) // 0 otherwise (not touched) func copyu(p *obj.Prog, v *obj.Addr, s *obj.Addr) int { if p.From3Type() != obj.TYPE_NONE { // 7g never generates a from3 fmt.Printf("copyu: from3 (%v) not implemented\n", gc.Ctxt.Dconv(p.From3)) } if p.RegTo2 != obj.REG_NONE { // 7g never generates a to2 fmt.Printf("copyu: RegTo2 (%v) not implemented\n", obj.Rconv(int(p.RegTo2))) } switch p.As { default: fmt.Printf("copyu: can't find %v\n", obj.Aconv(p.As)) return 2 case obj.ANOP, /* read p->from, write p->to */ arm64.ANEG, arm64.AFNEGD, arm64.AFNEGS, arm64.AFSQRTD, arm64.AFCVTZSD, arm64.AFCVTZSS, arm64.AFCVTZSDW, arm64.AFCVTZSSW, arm64.AFCVTZUD, arm64.AFCVTZUS, arm64.AFCVTZUDW, arm64.AFCVTZUSW, arm64.AFCVTSD, arm64.AFCVTDS, arm64.ASCVTFD, arm64.ASCVTFS, arm64.ASCVTFWD, arm64.ASCVTFWS, arm64.AUCVTFD, arm64.AUCVTFS, arm64.AUCVTFWD, arm64.AUCVTFWS, arm64.AMOVB, arm64.AMOVBU, arm64.AMOVH, arm64.AMOVHU, arm64.AMOVW, arm64.AMOVWU, arm64.AMOVD, arm64.AFMOVS, arm64.AFMOVD: if p.Scond == 0 { if s != nil { if copysub(&p.From, v, s, true) { return 1 } // Update only indirect uses of v in p->to if !copyas(&p.To, v) { if copysub(&p.To, v, s, true) { return 1 } } return 0 } if copyas(&p.To, v) { // Fix up implicit from if p.From.Type == obj.TYPE_NONE { p.From = p.To } if copyau(&p.From, v) { return 4 } return 3 } if copyau(&p.From, v) { return 1 } if copyau(&p.To, v) { // p->to only indirectly uses v return 1 } return 0 } /* rar p->from, write p->to or read p->from, rar p->to */ if p.From.Type == obj.TYPE_MEM { if copyas(&p.From, v) { // No s!=nil check; need to fail // anyway in that case return 2 } if s != nil { if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyas(&p.To, v) { return 3 } } else if p.To.Type == obj.TYPE_MEM { if copyas(&p.To, v) { return 2 } if s != nil { if copysub(&p.From, v, s, true) { return 1 } return 0 } if copyau(&p.From, v) { return 1 } } else { fmt.Printf("copyu: bad %v\n", p) } return 0 case arm64.AADD, /* read p->from, read p->reg, write p->to */ arm64.AADDS, arm64.ASUB, arm64.AADC, arm64.AAND, arm64.AORR, arm64.AEOR, arm64.AROR, arm64.AMUL, arm64.ASMULL, arm64.AUMULL, arm64.ASMULH, arm64.AUMULH, arm64.ASDIV, arm64.AUDIV, arm64.ALSL, arm64.ALSR, arm64.AASR, arm64.AFADDD, arm64.AFADDS, arm64.AFSUBD, arm64.AFSUBS, arm64.AFMULD, arm64.AFMULS, arm64.AFDIVD, arm64.AFDIVS: if s != nil { if copysub(&p.From, v, s, true) { return 1 } if copysub1(p, v, s, true) { return 1 } // Update only indirect uses of v in p->to if !copyas(&p.To, v) { if copysub(&p.To, v, s, true) { return 1 } } return 0 } if copyas(&p.To, v) { if p.Reg == 0 { // Fix up implicit reg (e.g., ADD // R3,R4 -> ADD R3,R4,R4) so we can // update reg and to separately. p.Reg = p.To.Reg } if copyau(&p.From, v) { return 4 } if copyau1(p, v) { return 4 } return 3 } if copyau(&p.From, v) { return 1 } if copyau1(p, v) { return 1 } if copyau(&p.To, v) { return 1 } return 0 case arm64.ABEQ, arm64.ABNE, arm64.ABGE, arm64.ABLT, arm64.ABGT, arm64.ABLE, arm64.ABLO, arm64.ABLS, arm64.ABHI, arm64.ABHS: return 0 case obj.ACHECKNIL, /* read p->from */ arm64.ACMP, /* read p->from, read p->reg */ arm64.AFCMPD, arm64.AFCMPS: if s != nil { if copysub(&p.From, v, s, true) { return 1 } if copysub1(p, v, s, true) { return 1 } return 0 } if copyau(&p.From, v) { return 1 } if copyau1(p, v) { return 1 } return 0 case arm64.AB: /* read p->to */ if s != nil { if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyau(&p.To, v) { return 1 } return 0 case obj.ARET: /* funny */ if s != nil { return 0 } // All registers die at this point, so claim // everything is set (and not used). return 3 case arm64.ABL: /* funny */ if p.From.Type == obj.TYPE_REG && v.Type == obj.TYPE_REG && p.From.Reg == v.Reg { return 2 } if s != nil { if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyau(&p.To, v) { return 4 } return 3 // R31 is zero, used by DUFFZERO, cannot be substituted. // R16 is ptr to memory, used and set, cannot be substituted. case obj.ADUFFZERO: if v.Type == obj.TYPE_REG { if v.Reg == 31 { return 1 } if v.Reg == 16 { return 2 } } return 0 // R16, R17 are ptr to src, dst, used and set, cannot be substituted. // R27 is scratch, set by DUFFCOPY, cannot be substituted. case obj.ADUFFCOPY: if v.Type == obj.TYPE_REG { if v.Reg == 16 || v.Reg == 17 { return 2 } if v.Reg == 27 { return 3 } } return 0 case arm64.AHINT, obj.ATEXT, obj.APCDATA, obj.AFUNCDATA, obj.AVARDEF, obj.AVARKILL, obj.AVARLIVE, obj.AUSEFIELD: return 0 } }
// copysub1 replaces p.Reg with s.Reg if p.Reg and v.Reg are direct // references to the same register. func copysub1(p *obj.Prog, v, s *obj.Addr) { if copyau1(p, v) { p.Reg = s.Reg } }
func preprocess(ctxt *obj.Link, cursym *obj.LSym) { // TODO(minux): add morestack short-cuts with small fixed frame-size. ctxt.Cursym = cursym if cursym.Text == nil || cursym.Text.Link == nil { return } p := cursym.Text textstksiz := p.To.Offset if textstksiz == -8 { // Compatibility hack. p.From3.Offset |= obj.NOFRAME textstksiz = 0 } if textstksiz%8 != 0 { ctxt.Diag("frame size %d not a multiple of 8", textstksiz) } if p.From3.Offset&obj.NOFRAME != 0 { if textstksiz != 0 { ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", textstksiz) } } cursym.Args = p.To.Val.(int32) cursym.Locals = int32(textstksiz) /* * find leaf subroutines * strip NOPs * expand RET * expand BECOME pseudo */ if ctxt.Debugvlog != 0 { fmt.Fprintf(ctxt.Bso, "%5.2f noops\n", obj.Cputime()) } ctxt.Bso.Flush() var q *obj.Prog var q1 *obj.Prog for p := cursym.Text; p != nil; p = p.Link { switch p.As { /* too hard, just leave alone */ case obj.ATEXT: q = p p.Mark |= LABEL | LEAF | SYNC if p.Link != nil { p.Link.Mark |= LABEL } case ANOR: q = p if p.To.Type == obj.TYPE_REG { if p.To.Reg == REGZERO { p.Mark |= LABEL | SYNC } } case ALWAR, ALBAR, ASTBCCC, ASTWCCC, AECIWX, AECOWX, AEIEIO, AICBI, AISYNC, ATLBIE, ATLBIEL, ASLBIA, ASLBIE, ASLBMFEE, ASLBMFEV, ASLBMTE, ADCBF, ADCBI, ADCBST, ADCBT, ADCBTST, ADCBZ, ASYNC, ATLBSYNC, APTESYNC, ALWSYNC, ATW, AWORD, ARFI, ARFCI, ARFID, AHRFID: q = p p.Mark |= LABEL | SYNC continue case AMOVW, AMOVWZ, AMOVD: q = p if p.From.Reg >= REG_SPECIAL || p.To.Reg >= REG_SPECIAL { p.Mark |= LABEL | SYNC } continue case AFABS, AFABSCC, AFADD, AFADDCC, AFCTIW, AFCTIWCC, AFCTIWZ, AFCTIWZCC, AFDIV, AFDIVCC, AFMADD, AFMADDCC, AFMOVD, AFMOVDU, /* case AFMOVDS: */ AFMOVS, AFMOVSU, /* case AFMOVSD: */ AFMSUB, AFMSUBCC, AFMUL, AFMULCC, AFNABS, AFNABSCC, AFNEG, AFNEGCC, AFNMADD, AFNMADDCC, AFNMSUB, AFNMSUBCC, AFRSP, AFRSPCC, AFSUB, AFSUBCC: q = p p.Mark |= FLOAT continue case ABL, ABCL, obj.ADUFFZERO, obj.ADUFFCOPY: cursym.Text.Mark &^= LEAF fallthrough case ABC, ABEQ, ABGE, ABGT, ABLE, ABLT, ABNE, ABR, ABVC, ABVS: p.Mark |= BRANCH q = p q1 = p.Pcond if q1 != nil { for q1.As == obj.ANOP { q1 = q1.Link p.Pcond = q1 } if q1.Mark&LEAF == 0 { q1.Mark |= LABEL } } else { p.Mark |= LABEL } q1 = p.Link if q1 != nil { q1.Mark |= LABEL } continue case AFCMPO, AFCMPU: q = p p.Mark |= FCMP | FLOAT continue case obj.ARET: q = p if p.Link != nil { p.Link.Mark |= LABEL } continue case obj.ANOP: q1 = p.Link q.Link = q1 /* q is non-nop */ q1.Mark |= p.Mark continue default: q = p continue } } autosize := int32(0) var aoffset int var mov obj.As var p1 *obj.Prog var p2 *obj.Prog for p := cursym.Text; p != nil; p = p.Link { o := p.As switch o { case obj.ATEXT: mov = AMOVD aoffset = 0 autosize = int32(textstksiz) if p.Mark&LEAF != 0 && autosize == 0 && p.From3.Offset&obj.NOFRAME == 0 { // A leaf function with no locals has no frame. p.From3.Offset |= obj.NOFRAME } if p.From3.Offset&obj.NOFRAME == 0 { // If there is a stack frame at all, it includes // space to save the LR. autosize += int32(ctxt.FixedFrameSize()) } p.To.Offset = int64(autosize) q = p if ctxt.Flag_shared && cursym.Name != "runtime.duffzero" && cursym.Name != "runtime.duffcopy" && cursym.Name != "runtime.stackBarrier" { // When compiling Go into PIC, all functions must start // with instructions to load the TOC pointer into r2: // // addis r2, r12, .TOC.-func@ha // addi r2, r2, .TOC.-func@l+4 // // We could probably skip this prologue in some situations // but it's a bit subtle. However, it is both safe and // necessary to leave the prologue off duffzero and // duffcopy as we rely on being able to jump to a specific // instruction offset for them, and stackBarrier is only // ever called from an overwritten LR-save slot on the // stack (when r12 will not be remotely the right thing) // but fortunately does not access global data. // // These are AWORDS because there is no (afaict) way to // generate the addis instruction except as part of the // load of a large constant, and in that case there is no // way to use r12 as the source. q = obj.Appendp(ctxt, q) q.As = AWORD q.Lineno = p.Lineno q.From.Type = obj.TYPE_CONST q.From.Offset = 0x3c4c0000 q = obj.Appendp(ctxt, q) q.As = AWORD q.Lineno = p.Lineno q.From.Type = obj.TYPE_CONST q.From.Offset = 0x38420000 rel := obj.Addrel(ctxt.Cursym) rel.Off = 0 rel.Siz = 8 rel.Sym = obj.Linklookup(ctxt, ".TOC.", 0) rel.Type = obj.R_ADDRPOWER_PCREL } if cursym.Text.From3.Offset&obj.NOSPLIT == 0 { q = stacksplit(ctxt, q, autosize) // emit split check } if autosize != 0 { /* use MOVDU to adjust R1 when saving R31, if autosize is small */ if cursym.Text.Mark&LEAF == 0 && autosize >= -BIG && autosize <= BIG { mov = AMOVDU aoffset = int(-autosize) } else { q = obj.Appendp(ctxt, q) q.As = AADD q.Lineno = p.Lineno q.From.Type = obj.TYPE_CONST q.From.Offset = int64(-autosize) q.To.Type = obj.TYPE_REG q.To.Reg = REGSP q.Spadj = +autosize } } else if cursym.Text.Mark&LEAF == 0 { // A very few functions that do not return to their caller // (e.g. gogo) are not identified as leaves but still have // no frame. cursym.Text.Mark |= LEAF } if cursym.Text.Mark&LEAF != 0 { cursym.Leaf = true break } q = obj.Appendp(ctxt, q) q.As = AMOVD q.Lineno = p.Lineno q.From.Type = obj.TYPE_REG q.From.Reg = REG_LR q.To.Type = obj.TYPE_REG q.To.Reg = REGTMP q = obj.Appendp(ctxt, q) q.As = mov q.Lineno = p.Lineno q.From.Type = obj.TYPE_REG q.From.Reg = REGTMP q.To.Type = obj.TYPE_MEM q.To.Offset = int64(aoffset) q.To.Reg = REGSP if q.As == AMOVDU { q.Spadj = int32(-aoffset) } if ctxt.Flag_shared { q = obj.Appendp(ctxt, q) q.As = AMOVD q.Lineno = p.Lineno q.From.Type = obj.TYPE_REG q.From.Reg = REG_R2 q.To.Type = obj.TYPE_MEM q.To.Reg = REGSP q.To.Offset = 24 } if cursym.Text.From3.Offset&obj.WRAPPER != 0 { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOVD g_panic(g), R3 // CMP R0, R3 // BEQ end // MOVD panic_argp(R3), R4 // ADD $(autosize+8), R1, R5 // CMP R4, R5 // BNE end // ADD $8, R1, R6 // MOVD R6, panic_argp(R3) // end: // NOP // // The NOP is needed to give the jumps somewhere to land. // It is a liblink NOP, not a ppc64 NOP: it encodes to 0 instruction bytes. q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_MEM q.From.Reg = REGG q.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // G.panic q.To.Type = obj.TYPE_REG q.To.Reg = REG_R3 q = obj.Appendp(ctxt, q) q.As = ACMP q.From.Type = obj.TYPE_REG q.From.Reg = REG_R0 q.To.Type = obj.TYPE_REG q.To.Reg = REG_R3 q = obj.Appendp(ctxt, q) q.As = ABEQ q.To.Type = obj.TYPE_BRANCH p1 = q q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_MEM q.From.Reg = REG_R3 q.From.Offset = 0 // Panic.argp q.To.Type = obj.TYPE_REG q.To.Reg = REG_R4 q = obj.Appendp(ctxt, q) q.As = AADD q.From.Type = obj.TYPE_CONST q.From.Offset = int64(autosize) + ctxt.FixedFrameSize() q.Reg = REGSP q.To.Type = obj.TYPE_REG q.To.Reg = REG_R5 q = obj.Appendp(ctxt, q) q.As = ACMP q.From.Type = obj.TYPE_REG q.From.Reg = REG_R4 q.To.Type = obj.TYPE_REG q.To.Reg = REG_R5 q = obj.Appendp(ctxt, q) q.As = ABNE q.To.Type = obj.TYPE_BRANCH p2 = q q = obj.Appendp(ctxt, q) q.As = AADD q.From.Type = obj.TYPE_CONST q.From.Offset = ctxt.FixedFrameSize() q.Reg = REGSP q.To.Type = obj.TYPE_REG q.To.Reg = REG_R6 q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_REG q.From.Reg = REG_R6 q.To.Type = obj.TYPE_MEM q.To.Reg = REG_R3 q.To.Offset = 0 // Panic.argp q = obj.Appendp(ctxt, q) q.As = obj.ANOP p1.Pcond = q p2.Pcond = q } case obj.ARET: if p.From.Type == obj.TYPE_CONST { ctxt.Diag("using BECOME (%v) is not supported!", p) break } retTarget := p.To.Sym if cursym.Text.Mark&LEAF != 0 { if autosize == 0 { p.As = ABR p.From = obj.Addr{} if retTarget == nil { p.To.Type = obj.TYPE_REG p.To.Reg = REG_LR } else { p.To.Type = obj.TYPE_BRANCH p.To.Sym = retTarget } p.Mark |= BRANCH break } p.As = AADD p.From.Type = obj.TYPE_CONST p.From.Offset = int64(autosize) p.To.Type = obj.TYPE_REG p.To.Reg = REGSP p.Spadj = -autosize q = ctxt.NewProg() q.As = ABR q.Lineno = p.Lineno q.To.Type = obj.TYPE_REG q.To.Reg = REG_LR q.Mark |= BRANCH q.Spadj = +autosize q.Link = p.Link p.Link = q break } p.As = AMOVD p.From.Type = obj.TYPE_MEM p.From.Offset = 0 p.From.Reg = REGSP p.To.Type = obj.TYPE_REG p.To.Reg = REGTMP q = ctxt.NewProg() q.As = AMOVD q.Lineno = p.Lineno q.From.Type = obj.TYPE_REG q.From.Reg = REGTMP q.To.Type = obj.TYPE_REG q.To.Reg = REG_LR q.Link = p.Link p.Link = q p = q if false { // Debug bad returns q = ctxt.NewProg() q.As = AMOVD q.Lineno = p.Lineno q.From.Type = obj.TYPE_MEM q.From.Offset = 0 q.From.Reg = REGTMP q.To.Type = obj.TYPE_REG q.To.Reg = REGTMP q.Link = p.Link p.Link = q p = q } if autosize != 0 { q = ctxt.NewProg() q.As = AADD q.Lineno = p.Lineno q.From.Type = obj.TYPE_CONST q.From.Offset = int64(autosize) q.To.Type = obj.TYPE_REG q.To.Reg = REGSP q.Spadj = -autosize q.Link = p.Link p.Link = q } q1 = ctxt.NewProg() q1.As = ABR q1.Lineno = p.Lineno if retTarget == nil { q1.To.Type = obj.TYPE_REG q1.To.Reg = REG_LR } else { q1.To.Type = obj.TYPE_BRANCH q1.To.Sym = retTarget } q1.Mark |= BRANCH q1.Spadj = +autosize q1.Link = q.Link q.Link = q1 case AADD: if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.From.Type == obj.TYPE_CONST { p.Spadj = int32(-p.From.Offset) } } } }
// copysub1 replaces v with s in p1->reg if f==true or indicates if it could if f==false. // Returns true on failure to substitute (it always succeeds on mips). // TODO(dfc) remove unused return value, remove calls with f=false as they do nothing. func copysub1(p1 *obj.Prog, v *obj.Addr, s *obj.Addr, f bool) bool { if f && copyau1(p1, v) { p1.Reg = s.Reg } return false }
// If s==nil, copyu returns the set/use of v in p; otherwise, it // modifies p to replace reads of v with reads of s and returns 0 for // success or non-zero for failure. // // If s==nil, copy returns one of the following values: // 1 if v only used // 2 if v is set and used in one address (read-alter-rewrite; // can't substitute) // 3 if v is only set // 4 if v is set in one address and used in another (so addresses // can be rewritten independently) // 0 otherwise (not touched) func copyu(p *obj.Prog, v *obj.Addr, s *obj.Addr) int { if p.From3Type() != obj.TYPE_NONE { // 9g never generates a from3 fmt.Printf("copyu: from3 (%v) not implemented\n", gc.Ctxt.Dconv(p.From3)) } switch p.As { default: fmt.Printf("copyu: can't find %v\n", obj.Aconv(p.As)) return 2 case obj.ANOP, /* read p->from, write p->to */ ppc64.AMOVH, ppc64.AMOVHZ, ppc64.AMOVB, ppc64.AMOVBZ, ppc64.AMOVW, ppc64.AMOVWZ, ppc64.AMOVD, ppc64.ANEG, ppc64.ANEGCC, ppc64.AADDME, ppc64.AADDMECC, ppc64.AADDZE, ppc64.AADDZECC, ppc64.ASUBME, ppc64.ASUBMECC, ppc64.ASUBZE, ppc64.ASUBZECC, ppc64.AFCTIW, ppc64.AFCTIWZ, ppc64.AFCTID, ppc64.AFCTIDZ, ppc64.AFCFID, ppc64.AFCFIDCC, ppc64.AFCFIDU, ppc64.AFCFIDUCC, ppc64.AFMOVS, ppc64.AFMOVD, ppc64.AFRSP, ppc64.AFNEG, ppc64.AFNEGCC, ppc64.AFSQRT: if s != nil { if copysub(&p.From, v, s, true) { return 1 } // Update only indirect uses of v in p->to if !copyas(&p.To, v) { if copysub(&p.To, v, s, true) { return 1 } } return 0 } if copyas(&p.To, v) { // Fix up implicit from if p.From.Type == obj.TYPE_NONE { p.From = p.To } if copyau(&p.From, v) { return 4 } return 3 } if copyau(&p.From, v) { return 1 } if copyau(&p.To, v) { // p->to only indirectly uses v return 1 } return 0 case ppc64.AMOVBU, /* rar p->from, write p->to or read p->from, rar p->to */ ppc64.AMOVBZU, ppc64.AMOVHU, ppc64.AMOVHZU, ppc64.AMOVWZU, ppc64.AMOVDU: if p.From.Type == obj.TYPE_MEM { if copyas(&p.From, v) { // No s!=nil check; need to fail // anyway in that case return 2 } if s != nil { if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyas(&p.To, v) { return 3 } } else if p.To.Type == obj.TYPE_MEM { if copyas(&p.To, v) { return 2 } if s != nil { if copysub(&p.From, v, s, true) { return 1 } return 0 } if copyau(&p.From, v) { return 1 } } else { fmt.Printf("copyu: bad %v\n", p) } return 0 case ppc64.ARLWMI, /* read p->from, read p->reg, rar p->to */ ppc64.ARLWMICC: if copyas(&p.To, v) { return 2 } fallthrough /* fall through */ case ppc64.AADD, /* read p->from, read p->reg, write p->to */ ppc64.AADDC, ppc64.AADDE, ppc64.ASUB, ppc64.ASLW, ppc64.ASRW, ppc64.ASRAW, ppc64.ASLD, ppc64.ASRD, ppc64.ASRAD, ppc64.AOR, ppc64.AORCC, ppc64.AORN, ppc64.AORNCC, ppc64.AAND, ppc64.AANDCC, ppc64.AANDN, ppc64.AANDNCC, ppc64.ANAND, ppc64.ANANDCC, ppc64.ANOR, ppc64.ANORCC, ppc64.AXOR, ppc64.AMULHW, ppc64.AMULHWU, ppc64.AMULLW, ppc64.AMULLD, ppc64.ADIVW, ppc64.ADIVD, ppc64.ADIVWU, ppc64.ADIVDU, ppc64.AREM, ppc64.AREMU, ppc64.AREMD, ppc64.AREMDU, ppc64.ARLWNM, ppc64.ARLWNMCC, ppc64.AFADDS, ppc64.AFADD, ppc64.AFSUBS, ppc64.AFSUB, ppc64.AFMULS, ppc64.AFMUL, ppc64.AFDIVS, ppc64.AFDIV: if s != nil { if copysub(&p.From, v, s, true) { return 1 } if copysub1(p, v, s, true) { return 1 } // Update only indirect uses of v in p->to if !copyas(&p.To, v) { if copysub(&p.To, v, s, true) { return 1 } } return 0 } if copyas(&p.To, v) { if p.Reg == 0 { // Fix up implicit reg (e.g., ADD // R3,R4 -> ADD R3,R4,R4) so we can // update reg and to separately. p.Reg = p.To.Reg } if copyau(&p.From, v) { return 4 } if copyau1(p, v) { return 4 } return 3 } if copyau(&p.From, v) { return 1 } if copyau1(p, v) { return 1 } if copyau(&p.To, v) { return 1 } return 0 case ppc64.ABEQ, ppc64.ABGT, ppc64.ABGE, ppc64.ABLT, ppc64.ABLE, ppc64.ABNE, ppc64.ABVC, ppc64.ABVS: return 0 case obj.ACHECKNIL, /* read p->from */ ppc64.ACMP, /* read p->from, read p->to */ ppc64.ACMPU, ppc64.ACMPW, ppc64.ACMPWU, ppc64.AFCMPO, ppc64.AFCMPU: if s != nil { if copysub(&p.From, v, s, true) { return 1 } if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyau(&p.From, v) { return 1 } if copyau(&p.To, v) { return 1 } return 0 // 9g never generates a branch to a GPR (this isn't // even a normal instruction; liblink turns it in to a // mov and a branch). case ppc64.ABR: /* read p->to */ if s != nil { if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyau(&p.To, v) { return 1 } return 0 case obj.ARET: /* funny */ if s != nil { return 0 } // All registers die at this point, so claim // everything is set (and not used). return 3 case ppc64.ABL: /* funny */ if v.Type == obj.TYPE_REG { // TODO(rsc): REG_R0 and REG_F0 used to be // (when register numbers started at 0) exregoffset and exfregoffset, // which are unset entirely. // It's strange that this handles R0 and F0 differently from the other // registers. Possible failure to optimize? if ppc64.REG_R0 < v.Reg && v.Reg <= ppc64.REGEXT { return 2 } if v.Reg == ppc64.REGARG { return 2 } if ppc64.REG_F0 < v.Reg && v.Reg <= ppc64.FREGEXT { return 2 } } if p.From.Type == obj.TYPE_REG && v.Type == obj.TYPE_REG && p.From.Reg == v.Reg { return 2 } if s != nil { if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyau(&p.To, v) { return 4 } return 3 // R0 is zero, used by DUFFZERO, cannot be substituted. // R3 is ptr to memory, used and set, cannot be substituted. case obj.ADUFFZERO: if v.Type == obj.TYPE_REG { if v.Reg == 0 { return 1 } if v.Reg == 3 { return 2 } } return 0 // R3, R4 are ptr to src, dst, used and set, cannot be substituted. // R5 is scratch, set by DUFFCOPY, cannot be substituted. case obj.ADUFFCOPY: if v.Type == obj.TYPE_REG { if v.Reg == 3 || v.Reg == 4 { return 2 } if v.Reg == 5 { return 3 } } return 0 case obj.ATEXT: /* funny */ if v.Type == obj.TYPE_REG { if v.Reg == ppc64.REGARG { return 3 } } return 0 case obj.APCDATA, obj.AFUNCDATA, obj.AVARDEF, obj.AVARKILL, obj.AVARLIVE, obj.AUSEFIELD: return 0 } }
/* * return * 1 if v only used (and substitute), * 2 if read-alter-rewrite * 3 if set * 4 if set and used * 0 otherwise (not touched) */ func copyu(p *obj.Prog, v *obj.Addr, s *obj.Addr) int { switch p.As { default: fmt.Printf("copyu: can't find %v\n", obj.Aconv(p.As)) return 2 case arm.AMOVM: if v.Type != obj.TYPE_REG { return 0 } if p.From.Type == obj.TYPE_CONST { /* read reglist, read/rar */ if s != nil { if p.From.Offset&(1<<uint(v.Reg)) != 0 { return 1 } if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyau(&p.To, v) { if p.Scond&arm.C_WBIT != 0 { return 2 } return 1 } if p.From.Offset&(1<<uint(v.Reg)) != 0 { return 1 /* read/rar, write reglist */ } } else { if s != nil { if p.To.Offset&(1<<uint(v.Reg)) != 0 { return 1 } if copysub(&p.From, v, s, true) { return 1 } return 0 } if copyau(&p.From, v) { if p.Scond&arm.C_WBIT != 0 { return 2 } if p.To.Offset&(1<<uint(v.Reg)) != 0 { return 4 } return 1 } if p.To.Offset&(1<<uint(v.Reg)) != 0 { return 3 } } return 0 case obj.ANOP, /* read,, write */ arm.ASQRTD, arm.AMOVW, arm.AMOVF, arm.AMOVD, arm.AMOVH, arm.AMOVHS, arm.AMOVHU, arm.AMOVB, arm.AMOVBS, arm.AMOVBU, arm.AMOVFW, arm.AMOVWF, arm.AMOVDW, arm.AMOVWD, arm.AMOVFD, arm.AMOVDF: if p.Scond&(arm.C_WBIT|arm.C_PBIT) != 0 { if v.Type == obj.TYPE_REG { if p.From.Type == obj.TYPE_MEM || p.From.Type == obj.TYPE_SHIFT { if p.From.Reg == v.Reg { return 2 } } else { if p.To.Reg == v.Reg { return 2 } } } } if s != nil { if copysub(&p.From, v, s, true) { return 1 } if !copyas(&p.To, v) { if copysub(&p.To, v, s, true) { return 1 } } return 0 } if copyas(&p.To, v) { if p.Scond != arm.C_SCOND_NONE { return 2 } if copyau(&p.From, v) { return 4 } return 3 } if copyau(&p.From, v) { return 1 } if copyau(&p.To, v) { return 1 } return 0 case arm.AMULLU, /* read, read, write, write */ arm.AMULL, arm.AMULA, arm.AMVN: return 2 case arm.AADD, /* read, read, write */ arm.AADC, arm.ASUB, arm.ASBC, arm.ARSB, arm.ASLL, arm.ASRL, arm.ASRA, arm.AORR, arm.AAND, arm.AEOR, arm.AMUL, arm.AMULU, arm.ADIV, arm.ADIVU, arm.AMOD, arm.AMODU, arm.AADDF, arm.AADDD, arm.ASUBF, arm.ASUBD, arm.AMULF, arm.AMULD, arm.ADIVF, arm.ADIVD, obj.ACHECKNIL, /* read */ arm.ACMPF, /* read, read, */ arm.ACMPD, arm.ACMP, arm.ACMN, arm.ATST: /* read,, */ if s != nil { if copysub(&p.From, v, s, true) { return 1 } if copysub1(p, v, s, true) { return 1 } if !copyas(&p.To, v) { if copysub(&p.To, v, s, true) { return 1 } } return 0 } if copyas(&p.To, v) { if p.Scond != arm.C_SCOND_NONE { return 2 } if p.Reg == 0 { p.Reg = p.To.Reg } if copyau(&p.From, v) { return 4 } if copyau1(p, v) { return 4 } return 3 } if copyau(&p.From, v) { return 1 } if copyau1(p, v) { return 1 } if copyau(&p.To, v) { return 1 } return 0 case arm.ABEQ, /* read, read */ arm.ABNE, arm.ABCS, arm.ABHS, arm.ABCC, arm.ABLO, arm.ABMI, arm.ABPL, arm.ABVS, arm.ABVC, arm.ABHI, arm.ABLS, arm.ABGE, arm.ABLT, arm.ABGT, arm.ABLE: if s != nil { if copysub(&p.From, v, s, true) { return 1 } if copysub1(p, v, s, true) { return 1 } return 0 } if copyau(&p.From, v) { return 1 } if copyau1(p, v) { return 1 } return 0 case arm.AB: /* funny */ if s != nil { if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyau(&p.To, v) { return 1 } return 0 case obj.ARET: /* funny */ if s != nil { return 1 } return 3 case arm.ABL: /* funny */ if v.Type == obj.TYPE_REG { // TODO(rsc): REG_R0 and REG_F0 used to be // (when register numbers started at 0) exregoffset and exfregoffset, // which are unset entirely. // It's strange that this handles R0 and F0 differently from the other // registers. Possible failure to optimize? if arm.REG_R0 < v.Reg && v.Reg <= arm.REGEXT { return 2 } if v.Reg == arm.REGARG { return 2 } if arm.REG_F0 < v.Reg && v.Reg <= arm.FREGEXT { return 2 } } if p.From.Type == obj.TYPE_REG && v.Type == obj.TYPE_REG && p.From.Reg == v.Reg { return 2 } if s != nil { if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyau(&p.To, v) { return 4 } return 3 // R0 is zero, used by DUFFZERO, cannot be substituted. // R1 is ptr to memory, used and set, cannot be substituted. case obj.ADUFFZERO: if v.Type == obj.TYPE_REG { if v.Reg == arm.REG_R0 { return 1 } if v.Reg == arm.REG_R0+1 { return 2 } } return 0 // R0 is scratch, set by DUFFCOPY, cannot be substituted. // R1, R2 areptr to src, dst, used and set, cannot be substituted. case obj.ADUFFCOPY: if v.Type == obj.TYPE_REG { if v.Reg == arm.REG_R0 { return 3 } if v.Reg == arm.REG_R0+1 || v.Reg == arm.REG_R0+2 { return 2 } } return 0 case obj.ATEXT: /* funny */ if v.Type == obj.TYPE_REG { if v.Reg == arm.REGARG { return 3 } } return 0 case obj.APCDATA, obj.AFUNCDATA, obj.AVARDEF, obj.AVARKILL, obj.AVARLIVE, obj.AUSEFIELD: return 0 } }
// UNUSED func peep(firstp *obj.Prog) { g := gc.Flowstart(firstp, nil) if g == nil { return } gactive = 0 var p *obj.Prog var t int loop1: if gc.Debug['P'] != 0 && gc.Debug['v'] != 0 { gc.Dumpit("loop1", g.Start, 0) } t = 0 for r := g.Start; r != nil; r = r.Link { p = r.Prog switch p.As { /* * elide shift into TYPE_SHIFT operand of subsequent instruction */ // if(shiftprop(r)) { // excise(r); // t++; // break; // } case arm.ASLL, arm.ASRL, arm.ASRA: break case arm.AMOVB, arm.AMOVH, arm.AMOVW, arm.AMOVF, arm.AMOVD: if regtyp(&p.From) { if p.From.Type == p.To.Type && isfloatreg(&p.From) == isfloatreg(&p.To) { if p.Scond == arm.C_SCOND_NONE { if copyprop(g, r) { excise(r) t++ break } if subprop(r) && copyprop(g, r) { excise(r) t++ break } } } } case arm.AMOVHS, arm.AMOVHU, arm.AMOVBS, arm.AMOVBU: if p.From.Type == obj.TYPE_REG { if shortprop(r) { t++ } } } } /* if(p->scond == C_SCOND_NONE) if(regtyp(&p->to)) if(isdconst(&p->from)) { constprop(&p->from, &p->to, r->s1); } break; */ if t != 0 { goto loop1 } for r := g.Start; r != nil; r = r.Link { p = r.Prog switch p.As { /* * EOR -1,x,y => MVN x,y */ case arm.AEOR: if isdconst(&p.From) && p.From.Offset == -1 { p.As = arm.AMVN p.From.Type = obj.TYPE_REG if p.Reg != 0 { p.From.Reg = p.Reg } else { p.From.Reg = p.To.Reg } p.Reg = 0 } } } for r := g.Start; r != nil; r = r.Link { p = r.Prog switch p.As { case arm.AMOVW, arm.AMOVB, arm.AMOVBS, arm.AMOVBU: if p.From.Type == obj.TYPE_MEM && p.From.Offset == 0 { xtramodes(g, r, &p.From) } else if p.To.Type == obj.TYPE_MEM && p.To.Offset == 0 { xtramodes(g, r, &p.To) } else { continue } } } // case ACMP: // /* // * elide CMP $0,x if calculation of x can set condition codes // */ // if(isdconst(&p->from) || p->from.offset != 0) // continue; // r2 = r->s1; // if(r2 == nil) // continue; // t = r2->prog->as; // switch(t) { // default: // continue; // case ABEQ: // case ABNE: // case ABMI: // case ABPL: // break; // case ABGE: // t = ABPL; // break; // case ABLT: // t = ABMI; // break; // case ABHI: // t = ABNE; // break; // case ABLS: // t = ABEQ; // break; // } // r1 = r; // do // r1 = uniqp(r1); // while (r1 != nil && r1->prog->as == ANOP); // if(r1 == nil) // continue; // p1 = r1->prog; // if(p1->to.type != TYPE_REG) // continue; // if(p1->to.reg != p->reg) // if(!(p1->as == AMOVW && p1->from.type == TYPE_REG && p1->from.reg == p->reg)) // continue; // // switch(p1->as) { // default: // continue; // case AMOVW: // if(p1->from.type != TYPE_REG) // continue; // case AAND: // case AEOR: // case AORR: // case ABIC: // case AMVN: // case ASUB: // case ARSB: // case AADD: // case AADC: // case ASBC: // case ARSC: // break; // } // p1->scond |= C_SBIT; // r2->prog->as = t; // excise(r); // continue; // predicate(g); gc.Flowend(g) }
/* // instruction scheduling if(debug['Q'] == 0) return; curtext = nil; q = nil; // p - 1 q1 = firstp; // top of block o = 0; // count of instructions for(p = firstp; p != nil; p = p1) { p1 = p->link; o++; if(p->mark & NOSCHED){ if(q1 != p){ sched(q1, q); } for(; p != nil; p = p->link){ if(!(p->mark & NOSCHED)) break; q = p; } p1 = p; q1 = p; o = 0; continue; } if(p->mark & (LABEL|SYNC)) { if(q1 != p) sched(q1, q); q1 = p; o = 1; } if(p->mark & (BRANCH|SYNC)) { sched(q1, p); q1 = p1; o = 0; } if(o >= NSCHED) { sched(q1, p); q1 = p1; o = 0; } q = p; } */ func stacksplit(ctxt *obj.Link, p *obj.Prog, framesize int32) *obj.Prog { // MOVD g_stackguard(g), R3 p = obj.Appendp(ctxt, p) p.As = AMOVD p.From.Type = obj.TYPE_MEM p.From.Reg = REGG p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 if ctxt.Cursym.Cfunc { p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 } p.To.Type = obj.TYPE_REG p.To.Reg = REG_R3 var q *obj.Prog if framesize <= obj.StackSmall { // small stack: SP < stackguard // CMP stackguard, SP p = obj.Appendp(ctxt, p) p.As = ACMPU p.From.Type = obj.TYPE_REG p.From.Reg = REG_R3 p.To.Type = obj.TYPE_REG p.To.Reg = REGSP } else if framesize <= obj.StackBig { // large stack: SP-framesize < stackguard-StackSmall // ADD $-framesize, SP, R4 // CMP stackguard, R4 p = obj.Appendp(ctxt, p) p.As = AADD p.From.Type = obj.TYPE_CONST p.From.Offset = int64(-framesize) p.Reg = REGSP p.To.Type = obj.TYPE_REG p.To.Reg = REG_R4 p = obj.Appendp(ctxt, p) p.As = ACMPU p.From.Type = obj.TYPE_REG p.From.Reg = REG_R3 p.To.Type = obj.TYPE_REG p.To.Reg = REG_R4 } else { // Such a large stack we need to protect against wraparound. // If SP is close to zero: // SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall) // The +StackGuard on both sides is required to keep the left side positive: // SP is allowed to be slightly below stackguard. See stack.h. // // Preemption sets stackguard to StackPreempt, a very large value. // That breaks the math above, so we have to check for that explicitly. // // stackguard is R3 // CMP R3, $StackPreempt // BEQ label-of-call-to-morestack // ADD $StackGuard, SP, R4 // SUB R3, R4 // MOVD $(framesize+(StackGuard-StackSmall)), R31 // CMPU R31, R4 p = obj.Appendp(ctxt, p) p.As = ACMP p.From.Type = obj.TYPE_REG p.From.Reg = REG_R3 p.To.Type = obj.TYPE_CONST p.To.Offset = obj.StackPreempt p = obj.Appendp(ctxt, p) q = p p.As = ABEQ p.To.Type = obj.TYPE_BRANCH p = obj.Appendp(ctxt, p) p.As = AADD p.From.Type = obj.TYPE_CONST p.From.Offset = obj.StackGuard p.Reg = REGSP p.To.Type = obj.TYPE_REG p.To.Reg = REG_R4 p = obj.Appendp(ctxt, p) p.As = ASUB p.From.Type = obj.TYPE_REG p.From.Reg = REG_R3 p.To.Type = obj.TYPE_REG p.To.Reg = REG_R4 p = obj.Appendp(ctxt, p) p.As = AMOVD p.From.Type = obj.TYPE_CONST p.From.Offset = int64(framesize) + obj.StackGuard - obj.StackSmall p.To.Type = obj.TYPE_REG p.To.Reg = REGTMP p = obj.Appendp(ctxt, p) p.As = ACMPU p.From.Type = obj.TYPE_REG p.From.Reg = REGTMP p.To.Type = obj.TYPE_REG p.To.Reg = REG_R4 } // q1: BLT done p = obj.Appendp(ctxt, p) q1 := p p.As = ABLT p.To.Type = obj.TYPE_BRANCH // MOVD LR, R5 p = obj.Appendp(ctxt, p) p.As = AMOVD p.From.Type = obj.TYPE_REG p.From.Reg = REG_LR p.To.Type = obj.TYPE_REG p.To.Reg = REG_R5 if q != nil { q.Pcond = p } var morestacksym *obj.LSym if ctxt.Cursym.Cfunc { morestacksym = obj.Linklookup(ctxt, "runtime.morestackc", 0) } else if ctxt.Cursym.Text.From3.Offset&obj.NEEDCTXT == 0 { morestacksym = obj.Linklookup(ctxt, "runtime.morestack_noctxt", 0) } else { morestacksym = obj.Linklookup(ctxt, "runtime.morestack", 0) } if ctxt.Flag_dynlink { // Avoid calling morestack via a PLT when dynamically linking. The // PLT stubs generated by the system linker on ppc64le when "std r2, // 24(r1)" to save the TOC pointer in their callers stack // frame. Unfortunately (and necessarily) morestack is called before // the function that calls it sets up its frame and so the PLT ends // up smashing the saved TOC pointer for its caller's caller. // // According to the ABI documentation there is a mechanism to avoid // the TOC save that the PLT stub does (put a R_PPC64_TOCSAVE // relocation on the nop after the call to morestack) but at the time // of writing it is not supported at all by gold and my attempt to // use it with ld.bfd caused an internal linker error. So this hack // seems preferable. // MOVD $runtime.morestack(SB), R12 p = obj.Appendp(ctxt, p) p.As = AMOVD p.From.Type = obj.TYPE_MEM p.From.Sym = morestacksym p.From.Name = obj.NAME_GOTREF p.To.Type = obj.TYPE_REG p.To.Reg = REG_R12 // MOVD R12, CTR p = obj.Appendp(ctxt, p) p.As = AMOVD p.From.Type = obj.TYPE_REG p.From.Reg = REG_R12 p.To.Type = obj.TYPE_REG p.To.Reg = REG_CTR // BL CTR p = obj.Appendp(ctxt, p) p.As = obj.ACALL p.From.Type = obj.TYPE_REG p.From.Reg = REG_R12 p.To.Type = obj.TYPE_REG p.To.Reg = REG_CTR } else { // BL runtime.morestack(SB) p = obj.Appendp(ctxt, p) p.As = ABL p.To.Type = obj.TYPE_BRANCH p.To.Sym = morestacksym } // BR start p = obj.Appendp(ctxt, p) p.As = ABR p.To.Type = obj.TYPE_BRANCH p.Pcond = ctxt.Cursym.Text.Link // placeholder for q1's jump target p = obj.Appendp(ctxt, p) p.As = obj.ANOP // zero-width place holder q1.Pcond = p return p }
func stacksplit(ctxt *obj.Link, p *obj.Prog, framesize int32) *obj.Prog { // MOVV g_stackguard(g), R1 p = obj.Appendp(ctxt, p) p.As = AMOVV p.From.Type = obj.TYPE_MEM p.From.Reg = REGG p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0 if ctxt.Cursym.Cfunc { p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1 } p.To.Type = obj.TYPE_REG p.To.Reg = REG_R1 var q *obj.Prog if framesize <= obj.StackSmall { // small stack: SP < stackguard // AGTU SP, stackguard, R1 p = obj.Appendp(ctxt, p) p.As = ASGTU p.From.Type = obj.TYPE_REG p.From.Reg = REGSP p.Reg = REG_R1 p.To.Type = obj.TYPE_REG p.To.Reg = REG_R1 } else if framesize <= obj.StackBig { // large stack: SP-framesize < stackguard-StackSmall // ADDV $-framesize, SP, R2 // SGTU R2, stackguard, R1 p = obj.Appendp(ctxt, p) p.As = AADDV p.From.Type = obj.TYPE_CONST p.From.Offset = int64(-framesize) p.Reg = REGSP p.To.Type = obj.TYPE_REG p.To.Reg = REG_R2 p = obj.Appendp(ctxt, p) p.As = ASGTU p.From.Type = obj.TYPE_REG p.From.Reg = REG_R2 p.Reg = REG_R1 p.To.Type = obj.TYPE_REG p.To.Reg = REG_R1 } else { // Such a large stack we need to protect against wraparound. // If SP is close to zero: // SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall) // The +StackGuard on both sides is required to keep the left side positive: // SP is allowed to be slightly below stackguard. See stack.h. // // Preemption sets stackguard to StackPreempt, a very large value. // That breaks the math above, so we have to check for that explicitly. // // stackguard is R1 // MOVV $StackPreempt, R2 // BEQ R1, R2, label-of-call-to-morestack // ADDV $StackGuard, SP, R2 // SUBVU R1, R2 // MOVV $(framesize+(StackGuard-StackSmall)), R1 // SGTU R2, R1, R1 p = obj.Appendp(ctxt, p) p.As = AMOVV p.From.Type = obj.TYPE_CONST p.From.Offset = obj.StackPreempt p.To.Type = obj.TYPE_REG p.To.Reg = REG_R2 p = obj.Appendp(ctxt, p) q = p p.As = ABEQ p.From.Type = obj.TYPE_REG p.From.Reg = REG_R1 p.Reg = REG_R2 p.To.Type = obj.TYPE_BRANCH p.Mark |= BRANCH p = obj.Appendp(ctxt, p) p.As = AADDV p.From.Type = obj.TYPE_CONST p.From.Offset = obj.StackGuard p.Reg = REGSP p.To.Type = obj.TYPE_REG p.To.Reg = REG_R2 p = obj.Appendp(ctxt, p) p.As = ASUBVU p.From.Type = obj.TYPE_REG p.From.Reg = REG_R1 p.To.Type = obj.TYPE_REG p.To.Reg = REG_R2 p = obj.Appendp(ctxt, p) p.As = AMOVV p.From.Type = obj.TYPE_CONST p.From.Offset = int64(framesize) + obj.StackGuard - obj.StackSmall p.To.Type = obj.TYPE_REG p.To.Reg = REG_R1 p = obj.Appendp(ctxt, p) p.As = ASGTU p.From.Type = obj.TYPE_REG p.From.Reg = REG_R2 p.Reg = REG_R1 p.To.Type = obj.TYPE_REG p.To.Reg = REG_R1 } // q1: BNE R1, done p = obj.Appendp(ctxt, p) q1 := p p.As = ABNE p.From.Type = obj.TYPE_REG p.From.Reg = REG_R1 p.To.Type = obj.TYPE_BRANCH p.Mark |= BRANCH // MOVV LINK, R3 p = obj.Appendp(ctxt, p) p.As = AMOVV p.From.Type = obj.TYPE_REG p.From.Reg = REGLINK p.To.Type = obj.TYPE_REG p.To.Reg = REG_R3 if q != nil { q.Pcond = p p.Mark |= LABEL } // JAL runtime.morestack(SB) p = obj.Appendp(ctxt, p) p.As = AJAL p.To.Type = obj.TYPE_BRANCH if ctxt.Cursym.Cfunc { p.To.Sym = obj.Linklookup(ctxt, "runtime.morestackc", 0) } else if ctxt.Cursym.Text.From3.Offset&obj.NEEDCTXT == 0 { p.To.Sym = obj.Linklookup(ctxt, "runtime.morestack_noctxt", 0) } else { p.To.Sym = obj.Linklookup(ctxt, "runtime.morestack", 0) } p.Mark |= BRANCH // JMP start p = obj.Appendp(ctxt, p) p.As = AJMP p.To.Type = obj.TYPE_BRANCH p.Pcond = ctxt.Cursym.Text.Link p.Mark |= BRANCH // placeholder for q1's jump target p = obj.Appendp(ctxt, p) p.As = obj.ANOP // zero-width place holder q1.Pcond = p return p }
// fuseMultiple merges memory loads and stores into load multiple and // store multiple operations. // // Looks for this pattern (sequence of loads or stores): // MOVD R1, 0(R15) // MOVD R2, 8(R15) // MOVD R3, 16(R15) // Replaces with: // STMG R1, R3, 0(R15) func fuseMultiple(r *gc.Flow) int { n := 0 var fused *obj.Prog for ; r != nil; r = r.Link { // If there is a branch into the instruction stream then // we can't fuse into previous instructions. if gc.Uniqp(r) == nil { fused = nil } p := r.Prog isStore := isGPR(&p.From) && isBDMem(&p.To) isLoad := isGPR(&p.To) && isBDMem(&p.From) // are we a candidate? size := int64(0) switch p.As { default: fused = nil continue case obj.ANOP: // skip over nops continue case s390x.AMOVW, s390x.AMOVWZ: size = 4 // TODO(mundaym): 32-bit load multiple is currently not supported // as it requires sign/zero extension. if !isStore { fused = nil continue } case s390x.AMOVD: size = 8 if !isLoad && !isStore { fused = nil continue } } // If we merge two loads/stores with different source/destination Nodes // then we will lose a reference the second Node which means that the // compiler might mark the Node as unused and free its slot on the stack. // TODO(mundaym): allow this by adding a dummy reference to the Node. if fused == nil || fused.From.Node != p.From.Node || fused.From.Type != p.From.Type || fused.To.Node != p.To.Node || fused.To.Type != p.To.Type { fused = p continue } // check two addresses ca := func(a, b *obj.Addr, offset int64) bool { return a.Reg == b.Reg && a.Offset+offset == b.Offset && a.Sym == b.Sym && a.Name == b.Name } switch fused.As { default: fused = p case s390x.AMOVW, s390x.AMOVWZ: if size == 4 && fused.From.Reg+1 == p.From.Reg && ca(&fused.To, &p.To, 4) { fused.As = s390x.ASTMY fused.Reg = p.From.Reg excise(r) n++ } else { fused = p } case s390x.AMOVD: if size == 8 && fused.From.Reg+1 == p.From.Reg && ca(&fused.To, &p.To, 8) { fused.As = s390x.ASTMG fused.Reg = p.From.Reg excise(r) n++ } else if size == 8 && fused.To.Reg+1 == p.To.Reg && ca(&fused.From, &p.From, 8) { fused.As = s390x.ALMG fused.Reg = fused.To.Reg fused.To.Reg = p.To.Reg excise(r) n++ } else { fused = p } case s390x.ASTMG, s390x.ASTMY: if (fused.As == s390x.ASTMY && size != 4) || (fused.As == s390x.ASTMG && size != 8) { fused = p continue } offset := size * int64(fused.Reg-fused.From.Reg+1) if fused.Reg+1 == p.From.Reg && ca(&fused.To, &p.To, offset) { fused.Reg = p.From.Reg excise(r) n++ } else { fused = p } case s390x.ALMG: offset := 8 * int64(fused.To.Reg-fused.Reg+1) if size == 8 && fused.To.Reg+1 == p.To.Reg && ca(&fused.From, &p.From, offset) { fused.To.Reg = p.To.Reg excise(r) n++ } else { fused = p } } } return n }
/* * ASLL x,y,w * .. (not use w, not set x y w) * AXXX w,a,b (a != w) * .. (not use w) * (set w) * ----------- changed to * .. * AXXX (x<<y),a,b * .. */ func shiftprop(r *gc.Flow) bool { p := r.Prog if p.To.Type != obj.TYPE_REG { if gc.Debug['P'] != 0 { fmt.Printf("\tBOTCH: result not reg; FAILURE\n") } return false } n := p.To.Reg var a obj.Addr if p.Reg != 0 && p.Reg != p.To.Reg { a.Type = obj.TYPE_REG a.Reg = p.Reg } if gc.Debug['P'] != 0 { fmt.Printf("shiftprop\n%v", p) } r1 := r var p1 *obj.Prog for { /* find first use of shift result; abort if shift operands or result are changed */ r1 = gc.Uniqs(r1) if r1 == nil { if gc.Debug['P'] != 0 { fmt.Printf("\tbranch; FAILURE\n") } return false } if gc.Uniqp(r1) == nil { if gc.Debug['P'] != 0 { fmt.Printf("\tmerge; FAILURE\n") } return false } p1 = r1.Prog if gc.Debug['P'] != 0 { fmt.Printf("\n%v", p1) } switch copyu(p1, &p.To, nil) { case 0: /* not used or set */ if (p.From.Type == obj.TYPE_REG && copyu(p1, &p.From, nil) > 1) || (a.Type == obj.TYPE_REG && copyu(p1, &a, nil) > 1) { if gc.Debug['P'] != 0 { fmt.Printf("\targs modified; FAILURE\n") } return false } continue case 3: /* set, not used */ { if gc.Debug['P'] != 0 { fmt.Printf("\tBOTCH: noref; FAILURE\n") } return false } } break } /* check whether substitution can be done */ switch p1.As { default: if gc.Debug['P'] != 0 { fmt.Printf("\tnon-dpi; FAILURE\n") } return false case arm.AAND, arm.AEOR, arm.AADD, arm.AADC, arm.AORR, arm.ASUB, arm.ASBC, arm.ARSB, arm.ARSC: if p1.Reg == n || (p1.Reg == 0 && p1.To.Type == obj.TYPE_REG && p1.To.Reg == n) { if p1.From.Type != obj.TYPE_REG { if gc.Debug['P'] != 0 { fmt.Printf("\tcan't swap; FAILURE\n") } return false } p1.Reg = p1.From.Reg p1.From.Reg = n switch p1.As { case arm.ASUB: p1.As = arm.ARSB case arm.ARSB: p1.As = arm.ASUB case arm.ASBC: p1.As = arm.ARSC case arm.ARSC: p1.As = arm.ASBC } if gc.Debug['P'] != 0 { fmt.Printf("\t=>%v", p1) } } fallthrough case arm.ABIC, arm.ATST, arm.ACMP, arm.ACMN: if p1.Reg == n { if gc.Debug['P'] != 0 { fmt.Printf("\tcan't swap; FAILURE\n") } return false } if p1.Reg == 0 && p1.To.Reg == n { if gc.Debug['P'] != 0 { fmt.Printf("\tshift result used twice; FAILURE\n") } return false } // case AMVN: if p1.From.Type == obj.TYPE_SHIFT { if gc.Debug['P'] != 0 { fmt.Printf("\tshift result used in shift; FAILURE\n") } return false } if p1.From.Type != obj.TYPE_REG || p1.From.Reg != n { if gc.Debug['P'] != 0 { fmt.Printf("\tBOTCH: where is it used?; FAILURE\n") } return false } } /* check whether shift result is used subsequently */ p2 := p1 if p1.To.Reg != n { var p1 *obj.Prog for { r1 = gc.Uniqs(r1) if r1 == nil { if gc.Debug['P'] != 0 { fmt.Printf("\tinconclusive; FAILURE\n") } return false } p1 = r1.Prog if gc.Debug['P'] != 0 { fmt.Printf("\n%v", p1) } switch copyu(p1, &p.To, nil) { case 0: /* not used or set */ continue case 3: /* set, not used */ break default: /* used */ if gc.Debug['P'] != 0 { fmt.Printf("\treused; FAILURE\n") } return false } break } } /* make the substitution */ p2.From.Reg = 0 o := p.Reg if o == 0 { o = p.To.Reg } o &= 15 switch p.From.Type { case obj.TYPE_CONST: o |= int16(p.From.Offset&0x1f) << 7 case obj.TYPE_REG: o |= 1<<4 | (p.From.Reg&15)<<8 } switch p.As { case arm.ASLL: o |= 0 << 5 case arm.ASRL: o |= 1 << 5 case arm.ASRA: o |= 2 << 5 } p2.From = obj.Addr{} p2.From.Type = obj.TYPE_SHIFT p2.From.Offset = int64(o) if gc.Debug['P'] != 0 { fmt.Printf("\t=>%v\tSUCCEED\n", p2) } return true }
// If s==nil, copyu returns the set/use of v in p; otherwise, it // modifies p to replace reads of v with reads of s and returns 0 for // success or non-zero for failure. // // If s==nil, copy returns one of the following values: // 1 if v only used // 2 if v is set and used in one address (read-alter-rewrite; // can't substitute) // 3 if v is only set // 4 if v is set in one address and used in another (so addresses // can be rewritten independently) // 0 otherwise (not touched) func copyu(p *obj.Prog, v *obj.Addr, s *obj.Addr) int { if p.From3Type() != obj.TYPE_NONE { // never generates a from3 fmt.Printf("copyu: from3 (%v) not implemented\n", gc.Ctxt.Dconv(p.From3)) } switch p.As { default: fmt.Printf("copyu: can't find %v\n", obj.Aconv(p.As)) return 2 case obj.ANOP, /* read p->from, write p->to */ mips.AMOVV, mips.AMOVF, mips.AMOVD, mips.AMOVH, mips.AMOVHU, mips.AMOVB, mips.AMOVBU, mips.AMOVW, mips.AMOVWU, mips.AMOVFD, mips.AMOVDF, mips.AMOVDW, mips.AMOVWD, mips.AMOVFW, mips.AMOVWF, mips.AMOVDV, mips.AMOVVD, mips.AMOVFV, mips.AMOVVF, mips.ATRUNCFV, mips.ATRUNCDV, mips.ATRUNCFW, mips.ATRUNCDW: if s != nil { if copysub(&p.From, v, s, true) { return 1 } // Update only indirect uses of v in p->to if !copyas(&p.To, v) { if copysub(&p.To, v, s, true) { return 1 } } return 0 } if copyas(&p.To, v) { // Fix up implicit from if p.From.Type == obj.TYPE_NONE { p.From = p.To } if copyau(&p.From, v) { return 4 } return 3 } if copyau(&p.From, v) { return 1 } if copyau(&p.To, v) { // p->to only indirectly uses v return 1 } return 0 case mips.ASGT, /* read p->from, read p->reg, write p->to */ mips.ASGTU, mips.AADD, mips.AADDU, mips.ASUB, mips.ASUBU, mips.ASLL, mips.ASRL, mips.ASRA, mips.AOR, mips.ANOR, mips.AAND, mips.AXOR, mips.AADDV, mips.AADDVU, mips.ASUBV, mips.ASUBVU, mips.ASLLV, mips.ASRLV, mips.ASRAV, mips.AADDF, mips.AADDD, mips.ASUBF, mips.ASUBD, mips.AMULF, mips.AMULD, mips.ADIVF, mips.ADIVD: if s != nil { if copysub(&p.From, v, s, true) { return 1 } if copysub1(p, v, s, true) { return 1 } // Update only indirect uses of v in p->to if !copyas(&p.To, v) { if copysub(&p.To, v, s, true) { return 1 } } return 0 } if copyas(&p.To, v) { if p.Reg == 0 { // Fix up implicit reg (e.g., ADD // R3,R4 -> ADD R3,R4,R4) so we can // update reg and to separately. p.Reg = p.To.Reg } if copyau(&p.From, v) { return 4 } if copyau1(p, v) { return 4 } return 3 } if copyau(&p.From, v) { return 1 } if copyau1(p, v) { return 1 } if copyau(&p.To, v) { return 1 } return 0 case obj.ACHECKNIL, /* read p->from */ mips.ABEQ, /* read p->from, read p->reg */ mips.ABNE, mips.ABGTZ, mips.ABGEZ, mips.ABLTZ, mips.ABLEZ, mips.ACMPEQD, mips.ACMPEQF, mips.ACMPGED, mips.ACMPGEF, mips.ACMPGTD, mips.ACMPGTF, mips.ABFPF, mips.ABFPT, mips.AMUL, mips.AMULU, mips.ADIV, mips.ADIVU, mips.AMULV, mips.AMULVU, mips.ADIVV, mips.ADIVVU: if s != nil { if copysub(&p.From, v, s, true) { return 1 } if copysub1(p, v, s, true) { return 1 } return 0 } if copyau(&p.From, v) { return 1 } if copyau1(p, v) { return 1 } return 0 case mips.AJMP: /* read p->to */ if s != nil { if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyau(&p.To, v) { return 1 } return 0 case mips.ARET: /* funny */ if s != nil { return 0 } // All registers die at this point, so claim // everything is set (and not used). return 3 case mips.AJAL: /* funny */ if v.Type == obj.TYPE_REG { // TODO(rsc): REG_R0 and REG_F0 used to be // (when register numbers started at 0) exregoffset and exfregoffset, // which are unset entirely. // It's strange that this handles R0 and F0 differently from the other // registers. Possible failure to optimize? if mips.REG_R0 < v.Reg && v.Reg <= mips.REG_R31 { return 2 } if v.Reg == mips.REGARG { return 2 } if mips.REG_F0 < v.Reg && v.Reg <= mips.REG_F31 { return 2 } } if p.From.Type == obj.TYPE_REG && v.Type == obj.TYPE_REG && p.From.Reg == v.Reg { return 2 } if s != nil { if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyau(&p.To, v) { return 4 } return 3 // R0 is zero, used by DUFFZERO, cannot be substituted. // R1 is ptr to memory, used and set, cannot be substituted. case obj.ADUFFZERO: if v.Type == obj.TYPE_REG { if v.Reg == 0 { return 1 } if v.Reg == 1 { return 2 } } return 0 // R1, R2 are ptr to src, dst, used and set, cannot be substituted. // R3 is scratch, set by DUFFCOPY, cannot be substituted. case obj.ADUFFCOPY: if v.Type == obj.TYPE_REG { if v.Reg == 1 || v.Reg == 2 { return 2 } if v.Reg == 3 { return 3 } } return 0 case obj.ATEXT: /* funny */ if v.Type == obj.TYPE_REG { if v.Reg == mips.REGARG { return 3 } } return 0 case obj.APCDATA, obj.AFUNCDATA, obj.AVARDEF, obj.AVARKILL, obj.AVARLIVE, obj.AUSEFIELD: return 0 } }
// If s==nil, copyu returns the set/use of v in p; otherwise, it // modifies p to replace reads of v with reads of s and returns 0 for // success or non-zero for failure. // // If s==nil, copy returns one of the following values: // _Read if v only used // _ReadWriteSame if v is set and used in one address (read-alter-rewrite; // can't substitute) // _Write if v is only set // _ReadWriteDiff if v is set in one address and used in another (so addresses // can be rewritten independently) // _None otherwise (not touched) func copyu(p *obj.Prog, v *obj.Addr, s *obj.Addr) usage { if p.From3Type() != obj.TYPE_NONE && p.From3Type() != obj.TYPE_CONST { // Currently we never generate a From3 with anything other than a constant in it. fmt.Printf("copyu: From3 (%v) not implemented\n", gc.Ctxt.Dconv(p.From3)) } switch p.As { default: fmt.Printf("copyu: can't find %v\n", obj.Aconv(p.As)) return _ReadWriteSame case // read p.From, write p.To s390x.AMOVH, s390x.AMOVHZ, s390x.AMOVB, s390x.AMOVBZ, s390x.AMOVW, s390x.AMOVWZ, s390x.AMOVD, s390x.ANEG, s390x.AADDME, s390x.AADDZE, s390x.ASUBME, s390x.ASUBZE, s390x.AFMOVS, s390x.AFMOVD, s390x.ALEDBR, s390x.AFNEG, s390x.ALDEBR, s390x.ACLFEBR, s390x.ACLGEBR, s390x.ACLFDBR, s390x.ACLGDBR, s390x.ACFEBRA, s390x.ACGEBRA, s390x.ACFDBRA, s390x.ACGDBRA, s390x.ACELFBR, s390x.ACELGBR, s390x.ACDLFBR, s390x.ACDLGBR, s390x.ACEFBRA, s390x.ACEGBRA, s390x.ACDFBRA, s390x.ACDGBRA, s390x.AFSQRT: if s != nil { copysub(&p.From, v, s) // Update only indirect uses of v in p.To if !copyas(&p.To, v) { copysub(&p.To, v, s) } return _None } if copyas(&p.To, v) { // Fix up implicit from if p.From.Type == obj.TYPE_NONE { p.From = p.To } if copyau(&p.From, v) { return _ReadWriteDiff } return _Write } if copyau(&p.From, v) { return _Read } if copyau(&p.To, v) { // p.To only indirectly uses v return _Read } return _None // read p.From, read p.Reg, write p.To case s390x.AADD, s390x.AADDC, s390x.AADDE, s390x.ASUB, s390x.ASLW, s390x.ASRW, s390x.ASRAW, s390x.ASLD, s390x.ASRD, s390x.ASRAD, s390x.ARLL, s390x.ARLLG, s390x.AOR, s390x.AORN, s390x.AAND, s390x.AANDN, s390x.ANAND, s390x.ANOR, s390x.AXOR, s390x.AMULLW, s390x.AMULLD, s390x.AMULHD, s390x.AMULHDU, s390x.ADIVW, s390x.ADIVD, s390x.ADIVWU, s390x.ADIVDU, s390x.AFADDS, s390x.AFADD, s390x.AFSUBS, s390x.AFSUB, s390x.AFMULS, s390x.AFMUL, s390x.AFDIVS, s390x.AFDIV: if s != nil { copysub(&p.From, v, s) copysub1(p, v, s) // Update only indirect uses of v in p.To if !copyas(&p.To, v) { copysub(&p.To, v, s) } } if copyas(&p.To, v) { if p.Reg == 0 { p.Reg = p.To.Reg } if copyau(&p.From, v) || copyau1(p, v) { return _ReadWriteDiff } return _Write } if copyau(&p.From, v) { return _Read } if copyau1(p, v) { return _Read } if copyau(&p.To, v) { return _Read } return _None case s390x.ABEQ, s390x.ABGT, s390x.ABGE, s390x.ABLT, s390x.ABLE, s390x.ABNE, s390x.ABVC, s390x.ABVS: return _None case obj.ACHECKNIL, // read p.From s390x.ACMP, // read p.From, read p.To s390x.ACMPU, s390x.ACMPW, s390x.ACMPWU, s390x.AFCMPO, s390x.AFCMPU, s390x.ACEBR, s390x.AMVC, s390x.ACLC, s390x.AXC, s390x.AOC, s390x.ANC: if s != nil { copysub(&p.From, v, s) copysub(&p.To, v, s) return _None } if copyau(&p.From, v) { return _Read } if copyau(&p.To, v) { return _Read } return _None case s390x.ACMPBNE, s390x.ACMPBEQ, s390x.ACMPBLT, s390x.ACMPBLE, s390x.ACMPBGT, s390x.ACMPBGE, s390x.ACMPUBNE, s390x.ACMPUBEQ, s390x.ACMPUBLT, s390x.ACMPUBLE, s390x.ACMPUBGT, s390x.ACMPUBGE: if s != nil { copysub(&p.From, v, s) copysub1(p, v, s) return _None } if copyau(&p.From, v) { return _Read } if copyau1(p, v) { return _Read } return _None case s390x.ACLEAR: if s != nil { copysub(&p.To, v, s) return _None } if copyau(&p.To, v) { return _Read } return _None // go never generates a branch to a GPR // read p.To case s390x.ABR: if s != nil { copysub(&p.To, v, s) return _None } if copyau(&p.To, v) { return _Read } return _None case obj.ARET, obj.AUNDEF: if s != nil { return _None } // All registers die at this point, so claim // everything is set (and not used). return _Write case s390x.ABL: if v.Type == obj.TYPE_REG { if s390x.REGARG != -1 && v.Reg == s390x.REGARG { return _ReadWriteSame } if p.From.Type == obj.TYPE_REG && p.From.Reg == v.Reg { return _ReadWriteSame } if v.Reg == s390x.REGZERO { // Deliberately inserted nops set R0. return _ReadWriteSame } if v.Reg == s390x.REGCTXT { // Context register for closures. // TODO(mundaym): not sure if we need to exclude this. return _ReadWriteSame } } if s != nil { copysub(&p.To, v, s) return _None } if copyau(&p.To, v) { return _ReadWriteDiff } return _Write case obj.ATEXT: if v.Type == obj.TYPE_REG { if v.Reg == s390x.REGARG { return _Write } } return _None case obj.APCDATA, obj.AFUNCDATA, obj.AVARDEF, obj.AVARKILL, obj.AVARLIVE, obj.AUSEFIELD, obj.ANOP: return _None } }
// zerorange clears the stack in the given range. func zerorange(p *obj.Prog, frame int64, lo int64, hi int64) *obj.Prog { cnt := hi - lo if cnt == 0 { return p } // Adjust the frame to account for LR. frame += gc.Ctxt.FixedFrameSize() offset := frame + lo reg := int16(s390x.REGSP) // If the offset cannot fit in a 12-bit unsigned displacement then we // need to create a copy of the stack pointer that we can adjust. // We also need to do this if we are going to loop. if offset < 0 || offset > 4096-clearLoopCutoff || cnt > clearLoopCutoff { p = appendpp(p, s390x.AADD, obj.TYPE_CONST, 0, offset, obj.TYPE_REG, s390x.REGRT1, 0) p.Reg = int16(s390x.REGSP) reg = s390x.REGRT1 offset = 0 } // Generate a loop of large clears. if cnt > clearLoopCutoff { n := cnt - (cnt % 256) end := int16(s390x.REGRT2) p = appendpp(p, s390x.AADD, obj.TYPE_CONST, 0, offset+n, obj.TYPE_REG, end, 0) p.Reg = reg p = appendpp(p, s390x.AXC, obj.TYPE_MEM, reg, offset, obj.TYPE_MEM, reg, offset) p.From3 = new(obj.Addr) p.From3.Type = obj.TYPE_CONST p.From3.Offset = 256 pl := p p = appendpp(p, s390x.AADD, obj.TYPE_CONST, 0, 256, obj.TYPE_REG, reg, 0) p = appendpp(p, s390x.ACMP, obj.TYPE_REG, reg, 0, obj.TYPE_REG, end, 0) p = appendpp(p, s390x.ABNE, obj.TYPE_NONE, 0, 0, obj.TYPE_BRANCH, 0, 0) gc.Patch(p, pl) cnt -= n } // Generate remaining clear instructions without a loop. for cnt > 0 { n := cnt // Can clear at most 256 bytes per instruction. if n > 256 { n = 256 } switch n { // Handle very small clears with move instructions. case 8, 4, 2, 1: ins := s390x.AMOVB switch n { case 8: ins = s390x.AMOVD case 4: ins = s390x.AMOVW case 2: ins = s390x.AMOVH } p = appendpp(p, ins, obj.TYPE_CONST, 0, 0, obj.TYPE_MEM, reg, offset) // Handle clears that would require multiple move instructions with XC. default: p = appendpp(p, s390x.AXC, obj.TYPE_MEM, reg, offset, obj.TYPE_MEM, reg, offset) p.From3 = new(obj.Addr) p.From3.Type = obj.TYPE_CONST p.From3.Offset = n } cnt -= n offset += n } return p }
func preprocess(ctxt *obj.Link, cursym *obj.LSym) { // TODO(minux): add morestack short-cuts with small fixed frame-size. ctxt.Cursym = cursym // a switch for enabling/disabling instruction scheduling nosched := true if cursym.Text == nil || cursym.Text.Link == nil { return } p := cursym.Text textstksiz := p.To.Offset cursym.Args = p.To.Val.(int32) cursym.Locals = int32(textstksiz) /* * find leaf subroutines * strip NOPs * expand RET * expand BECOME pseudo */ if ctxt.Debugvlog != 0 { fmt.Fprintf(ctxt.Bso, "%5.2f noops\n", obj.Cputime()) } ctxt.Bso.Flush() var q *obj.Prog var q1 *obj.Prog for p := cursym.Text; p != nil; p = p.Link { switch p.As { /* too hard, just leave alone */ case obj.ATEXT: q = p p.Mark |= LABEL | LEAF | SYNC if p.Link != nil { p.Link.Mark |= LABEL } /* too hard, just leave alone */ case AMOVW, AMOVV: q = p if p.To.Type == obj.TYPE_REG && p.To.Reg >= REG_SPECIAL { p.Mark |= LABEL | SYNC break } if p.From.Type == obj.TYPE_REG && p.From.Reg >= REG_SPECIAL { p.Mark |= LABEL | SYNC } /* too hard, just leave alone */ case ASYSCALL, AWORD, ATLBWR, ATLBWI, ATLBP, ATLBR: q = p p.Mark |= LABEL | SYNC case ANOR: q = p if p.To.Type == obj.TYPE_REG { if p.To.Reg == REGZERO { p.Mark |= LABEL | SYNC } } case ABGEZAL, ABLTZAL, AJAL, obj.ADUFFZERO, obj.ADUFFCOPY: cursym.Text.Mark &^= LEAF fallthrough case AJMP, ABEQ, ABGEZ, ABGTZ, ABLEZ, ABLTZ, ABNE, ABFPT, ABFPF: if p.As == ABFPT || p.As == ABFPF { // We don't treat ABFPT and ABFPF as branches here, // so that we will always fill nop (0x0) in their // delay slot during assembly. // This is to workaround a kernel FPU emulator bug // where it uses the user stack to simulate the // instruction in the delay slot if it's not 0x0, // and somehow that leads to SIGSEGV when the kernel // jump to the stack. p.Mark |= SYNC } else { p.Mark |= BRANCH } q = p q1 = p.Pcond if q1 != nil { for q1.As == obj.ANOP { q1 = q1.Link p.Pcond = q1 } if q1.Mark&LEAF == 0 { q1.Mark |= LABEL } } //else { // p.Mark |= LABEL //} q1 = p.Link if q1 != nil { q1.Mark |= LABEL } continue case ARET: q = p if p.Link != nil { p.Link.Mark |= LABEL } continue case obj.ANOP: q1 = p.Link q.Link = q1 /* q is non-nop */ q1.Mark |= p.Mark continue default: q = p continue } } autosize := int32(0) var p1 *obj.Prog var p2 *obj.Prog for p := cursym.Text; p != nil; p = p.Link { o := p.As switch o { case obj.ATEXT: autosize = int32(textstksiz + 8) if (p.Mark&LEAF != 0) && autosize <= 8 { autosize = 0 } else if autosize&4 != 0 { autosize += 4 } p.To.Offset = int64(autosize) - 8 if p.From3.Offset&obj.NOSPLIT == 0 { p = stacksplit(ctxt, p, autosize) // emit split check } q = p if autosize != 0 { q = obj.Appendp(ctxt, p) q.As = AADDV q.Lineno = p.Lineno q.From.Type = obj.TYPE_CONST q.From.Offset = int64(-autosize) q.To.Type = obj.TYPE_REG q.To.Reg = REGSP q.Spadj = +autosize } else if cursym.Text.Mark&LEAF == 0 { if cursym.Text.From3.Offset&obj.NOSPLIT != 0 { if ctxt.Debugvlog != 0 { fmt.Fprintf(ctxt.Bso, "save suppressed in: %s\n", cursym.Name) ctxt.Bso.Flush() } cursym.Text.Mark |= LEAF } } if cursym.Text.Mark&LEAF != 0 { cursym.Leaf = true break } q = obj.Appendp(ctxt, q) q.As = AMOVV q.Lineno = p.Lineno q.From.Type = obj.TYPE_REG q.From.Reg = REGLINK q.To.Type = obj.TYPE_MEM q.To.Offset = int64(0) q.To.Reg = REGSP if cursym.Text.From3.Offset&obj.WRAPPER != 0 { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOVV g_panic(g), R1 // BEQ R1, end // MOVV panic_argp(R1), R2 // ADDV $(autosize+8), R29, R3 // BNE R2, R3, end // ADDV $8, R29, R2 // MOVV R2, panic_argp(R1) // end: // NOP // // The NOP is needed to give the jumps somewhere to land. // It is a liblink NOP, not an mips NOP: it encodes to 0 instruction bytes. q = obj.Appendp(ctxt, q) q.As = AMOVV q.From.Type = obj.TYPE_MEM q.From.Reg = REGG q.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // G.panic q.To.Type = obj.TYPE_REG q.To.Reg = REG_R1 q = obj.Appendp(ctxt, q) q.As = ABEQ q.From.Type = obj.TYPE_REG q.From.Reg = REG_R1 q.To.Type = obj.TYPE_BRANCH q.Mark |= BRANCH p1 = q q = obj.Appendp(ctxt, q) q.As = AMOVV q.From.Type = obj.TYPE_MEM q.From.Reg = REG_R1 q.From.Offset = 0 // Panic.argp q.To.Type = obj.TYPE_REG q.To.Reg = REG_R2 q = obj.Appendp(ctxt, q) q.As = AADDV q.From.Type = obj.TYPE_CONST q.From.Offset = int64(autosize) + 8 q.Reg = REGSP q.To.Type = obj.TYPE_REG q.To.Reg = REG_R3 q = obj.Appendp(ctxt, q) q.As = ABNE q.From.Type = obj.TYPE_REG q.From.Reg = REG_R2 q.Reg = REG_R3 q.To.Type = obj.TYPE_BRANCH q.Mark |= BRANCH p2 = q q = obj.Appendp(ctxt, q) q.As = AADDV q.From.Type = obj.TYPE_CONST q.From.Offset = 8 q.Reg = REGSP q.To.Type = obj.TYPE_REG q.To.Reg = REG_R2 q = obj.Appendp(ctxt, q) q.As = AMOVV q.From.Type = obj.TYPE_REG q.From.Reg = REG_R2 q.To.Type = obj.TYPE_MEM q.To.Reg = REG_R1 q.To.Offset = 0 // Panic.argp q = obj.Appendp(ctxt, q) q.As = obj.ANOP p1.Pcond = q p2.Pcond = q } case ARET: if p.From.Type == obj.TYPE_CONST { ctxt.Diag("using BECOME (%v) is not supported!", p) break } if p.To.Sym != nil { // retjmp p.As = AJMP p.To.Type = obj.TYPE_BRANCH break } if cursym.Text.Mark&LEAF != 0 { if autosize == 0 { p.As = AJMP p.From = obj.Addr{} p.To.Type = obj.TYPE_MEM p.To.Offset = 0 p.To.Reg = REGLINK p.Mark |= BRANCH break } p.As = AADDV p.From.Type = obj.TYPE_CONST p.From.Offset = int64(autosize) p.To.Type = obj.TYPE_REG p.To.Reg = REGSP p.Spadj = -autosize q = ctxt.NewProg() q.As = AJMP q.Lineno = p.Lineno q.To.Type = obj.TYPE_MEM q.To.Offset = 0 q.To.Reg = REGLINK q.Mark |= BRANCH q.Spadj = +autosize q.Link = p.Link p.Link = q break } p.As = AMOVV p.From.Type = obj.TYPE_MEM p.From.Offset = 0 p.From.Reg = REGSP p.To.Type = obj.TYPE_REG p.To.Reg = REG_R4 if false { // Debug bad returns q = ctxt.NewProg() q.As = AMOVV q.Lineno = p.Lineno q.From.Type = obj.TYPE_MEM q.From.Offset = 0 q.From.Reg = REG_R4 q.To.Type = obj.TYPE_REG q.To.Reg = REGTMP q.Link = p.Link p.Link = q p = q } if autosize != 0 { q = ctxt.NewProg() q.As = AADDV q.Lineno = p.Lineno q.From.Type = obj.TYPE_CONST q.From.Offset = int64(autosize) q.To.Type = obj.TYPE_REG q.To.Reg = REGSP q.Spadj = -autosize q.Link = p.Link p.Link = q } q1 = ctxt.NewProg() q1.As = AJMP q1.Lineno = p.Lineno q1.To.Type = obj.TYPE_MEM q1.To.Offset = 0 q1.To.Reg = REG_R4 q1.Mark |= BRANCH q1.Spadj = +autosize q1.Link = q.Link q.Link = q1 case AADDV, AADDVU: if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.From.Type == obj.TYPE_CONST { p.Spadj = int32(-p.From.Offset) } } } if nosched { // if we don't do instruction scheduling, simply add // NOP after each branch instruction. for p = cursym.Text; p != nil; p = p.Link { if p.Mark&BRANCH != 0 { addnop(ctxt, p) } } return } // instruction scheduling q = nil // p - 1 q1 = cursym.Text // top of block o := 0 // count of instructions for p = cursym.Text; p != nil; p = p1 { p1 = p.Link o++ if p.Mark&NOSCHED != 0 { if q1 != p { sched(ctxt, q1, q) } for ; p != nil; p = p.Link { if p.Mark&NOSCHED == 0 { break } q = p } p1 = p q1 = p o = 0 continue } if p.Mark&(LABEL|SYNC) != 0 { if q1 != p { sched(ctxt, q1, q) } q1 = p o = 1 } if p.Mark&(BRANCH|SYNC) != 0 { sched(ctxt, q1, p) q1 = p1 o = 0 } if o >= NSCHED { sched(ctxt, q1, p) q1 = p1 o = 0 } q = p } }