/* * The idea is to remove redundant constants. * $c1->v1 * ($c1->v2 s/$c1/v1)* * set v1 return * The v1->v2 should be eliminated by copy propagation. */ func constprop(c1 *obj.Addr, v1 *obj.Addr, r *gc.Flow) { if gc.Debug['P'] != 0 { fmt.Printf("constprop %v->%v\n", gc.Ctxt.Dconv(c1), gc.Ctxt.Dconv(v1)) } var p *obj.Prog for ; r != nil; r = r.S1 { p = r.Prog if gc.Debug['P'] != 0 { fmt.Printf("%v", p) } if gc.Uniqp(r) == nil { if gc.Debug['P'] != 0 { fmt.Printf("; merge; return\n") } return } if p.As == arm.AMOVW && copyas(&p.From, c1) { if gc.Debug['P'] != 0 { fmt.Printf("; sub%v/%v", gc.Ctxt.Dconv(&p.From), gc.Ctxt.Dconv(v1)) } p.From = *v1 } else if copyu(p, v1, nil) > 1 { if gc.Debug['P'] != 0 { fmt.Printf("; %vset; return\n", gc.Ctxt.Dconv(v1)) } return } if gc.Debug['P'] != 0 { fmt.Printf("\n") } if r.S2 != nil { constprop(c1, v1, r.S2) } } }
// If s==nil, copyu returns the set/use of v in p; otherwise, it // modifies p to replace reads of v with reads of s and returns 0 for // success or non-zero for failure. // // If s==nil, copy returns one of the following values: // 1 if v only used // 2 if v is set and used in one address (read-alter-rewrite; // can't substitute) // 3 if v is only set // 4 if v is set in one address and used in another (so addresses // can be rewritten independently) // 0 otherwise (not touched) func copyu(p *obj.Prog, v *obj.Addr, s *obj.Addr) int { if p.From3Type() != obj.TYPE_NONE { // never generates a from3 fmt.Printf("copyu: from3 (%v) not implemented\n", gc.Ctxt.Dconv(p.From3)) } switch p.As { default: fmt.Printf("copyu: can't find %v\n", obj.Aconv(p.As)) return 2 case obj.ANOP, /* read p->from, write p->to */ mips.AMOVV, mips.AMOVF, mips.AMOVD, mips.AMOVH, mips.AMOVHU, mips.AMOVB, mips.AMOVBU, mips.AMOVW, mips.AMOVWU, mips.AMOVFD, mips.AMOVDF, mips.AMOVDW, mips.AMOVWD, mips.AMOVFW, mips.AMOVWF, mips.AMOVDV, mips.AMOVVD, mips.AMOVFV, mips.AMOVVF, mips.ATRUNCFV, mips.ATRUNCDV, mips.ATRUNCFW, mips.ATRUNCDW: if s != nil { if copysub(&p.From, v, s, true) { return 1 } // Update only indirect uses of v in p->to if !copyas(&p.To, v) { if copysub(&p.To, v, s, true) { return 1 } } return 0 } if copyas(&p.To, v) { // Fix up implicit from if p.From.Type == obj.TYPE_NONE { p.From = p.To } if copyau(&p.From, v) { return 4 } return 3 } if copyau(&p.From, v) { return 1 } if copyau(&p.To, v) { // p->to only indirectly uses v return 1 } return 0 case mips.ASGT, /* read p->from, read p->reg, write p->to */ mips.ASGTU, mips.AADD, mips.AADDU, mips.ASUB, mips.ASUBU, mips.ASLL, mips.ASRL, mips.ASRA, mips.AOR, mips.ANOR, mips.AAND, mips.AXOR, mips.AADDV, mips.AADDVU, mips.ASUBV, mips.ASUBVU, mips.ASLLV, mips.ASRLV, mips.ASRAV, mips.AADDF, mips.AADDD, mips.ASUBF, mips.ASUBD, mips.AMULF, mips.AMULD, mips.ADIVF, mips.ADIVD: if s != nil { if copysub(&p.From, v, s, true) { return 1 } if copysub1(p, v, s, true) { return 1 } // Update only indirect uses of v in p->to if !copyas(&p.To, v) { if copysub(&p.To, v, s, true) { return 1 } } return 0 } if copyas(&p.To, v) { if p.Reg == 0 { // Fix up implicit reg (e.g., ADD // R3,R4 -> ADD R3,R4,R4) so we can // update reg and to separately. p.Reg = p.To.Reg } if copyau(&p.From, v) { return 4 } if copyau1(p, v) { return 4 } return 3 } if copyau(&p.From, v) { return 1 } if copyau1(p, v) { return 1 } if copyau(&p.To, v) { return 1 } return 0 case obj.ACHECKNIL, /* read p->from */ mips.ABEQ, /* read p->from, read p->reg */ mips.ABNE, mips.ABGTZ, mips.ABGEZ, mips.ABLTZ, mips.ABLEZ, mips.ACMPEQD, mips.ACMPEQF, mips.ACMPGED, mips.ACMPGEF, mips.ACMPGTD, mips.ACMPGTF, mips.ABFPF, mips.ABFPT, mips.AMUL, mips.AMULU, mips.ADIV, mips.ADIVU, mips.AMULV, mips.AMULVU, mips.ADIVV, mips.ADIVVU: if s != nil { if copysub(&p.From, v, s, true) { return 1 } if copysub1(p, v, s, true) { return 1 } return 0 } if copyau(&p.From, v) { return 1 } if copyau1(p, v) { return 1 } return 0 case mips.AJMP: /* read p->to */ if s != nil { if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyau(&p.To, v) { return 1 } return 0 case mips.ARET: /* funny */ if s != nil { return 0 } // All registers die at this point, so claim // everything is set (and not used). return 3 case mips.AJAL: /* funny */ if v.Type == obj.TYPE_REG { // TODO(rsc): REG_R0 and REG_F0 used to be // (when register numbers started at 0) exregoffset and exfregoffset, // which are unset entirely. // It's strange that this handles R0 and F0 differently from the other // registers. Possible failure to optimize? if mips.REG_R0 < v.Reg && v.Reg <= mips.REG_R31 { return 2 } if v.Reg == mips.REGARG { return 2 } if mips.REG_F0 < v.Reg && v.Reg <= mips.REG_F31 { return 2 } } if p.From.Type == obj.TYPE_REG && v.Type == obj.TYPE_REG && p.From.Reg == v.Reg { return 2 } if s != nil { if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyau(&p.To, v) { return 4 } return 3 // R0 is zero, used by DUFFZERO, cannot be substituted. // R1 is ptr to memory, used and set, cannot be substituted. case obj.ADUFFZERO: if v.Type == obj.TYPE_REG { if v.Reg == 0 { return 1 } if v.Reg == 1 { return 2 } } return 0 // R1, R2 are ptr to src, dst, used and set, cannot be substituted. // R3 is scratch, set by DUFFCOPY, cannot be substituted. case obj.ADUFFCOPY: if v.Type == obj.TYPE_REG { if v.Reg == 1 || v.Reg == 2 { return 2 } if v.Reg == 3 { return 3 } } return 0 case obj.ATEXT: /* funny */ if v.Type == obj.TYPE_REG { if v.Reg == mips.REGARG { return 3 } } return 0 case obj.APCDATA, obj.AFUNCDATA, obj.AVARDEF, obj.AVARKILL, obj.AVARLIVE, obj.AUSEFIELD: return 0 } }
// If s==nil, copyu returns the set/use of v in p; otherwise, it // modifies p to replace reads of v with reads of s and returns 0 for // success or non-zero for failure. // // If s==nil, copy returns one of the following values: // 1 if v only used // 2 if v is set and used in one address (read-alter-rewrite; // can't substitute) // 3 if v is only set // 4 if v is set in one address and used in another (so addresses // can be rewritten independently) // 0 otherwise (not touched) func copyu(p *obj.Prog, v *obj.Addr, s *obj.Addr) int { if p.From3Type() != obj.TYPE_NONE { // 9g never generates a from3 fmt.Printf("copyu: from3 (%v) not implemented\n", gc.Ctxt.Dconv(p.From3)) } switch p.As { default: fmt.Printf("copyu: can't find %v\n", obj.Aconv(p.As)) return 2 case obj.ANOP, /* read p->from, write p->to */ ppc64.AMOVH, ppc64.AMOVHZ, ppc64.AMOVB, ppc64.AMOVBZ, ppc64.AMOVW, ppc64.AMOVWZ, ppc64.AMOVD, ppc64.ANEG, ppc64.ANEGCC, ppc64.AADDME, ppc64.AADDMECC, ppc64.AADDZE, ppc64.AADDZECC, ppc64.ASUBME, ppc64.ASUBMECC, ppc64.ASUBZE, ppc64.ASUBZECC, ppc64.AFCTIW, ppc64.AFCTIWZ, ppc64.AFCTID, ppc64.AFCTIDZ, ppc64.AFCFID, ppc64.AFCFIDCC, ppc64.AFCFIDU, ppc64.AFCFIDUCC, ppc64.AFMOVS, ppc64.AFMOVD, ppc64.AFRSP, ppc64.AFNEG, ppc64.AFNEGCC, ppc64.AFSQRT: if s != nil { if copysub(&p.From, v, s, true) { return 1 } // Update only indirect uses of v in p->to if !copyas(&p.To, v) { if copysub(&p.To, v, s, true) { return 1 } } return 0 } if copyas(&p.To, v) { // Fix up implicit from if p.From.Type == obj.TYPE_NONE { p.From = p.To } if copyau(&p.From, v) { return 4 } return 3 } if copyau(&p.From, v) { return 1 } if copyau(&p.To, v) { // p->to only indirectly uses v return 1 } return 0 case ppc64.AMOVBU, /* rar p->from, write p->to or read p->from, rar p->to */ ppc64.AMOVBZU, ppc64.AMOVHU, ppc64.AMOVHZU, ppc64.AMOVWZU, ppc64.AMOVDU: if p.From.Type == obj.TYPE_MEM { if copyas(&p.From, v) { // No s!=nil check; need to fail // anyway in that case return 2 } if s != nil { if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyas(&p.To, v) { return 3 } } else if p.To.Type == obj.TYPE_MEM { if copyas(&p.To, v) { return 2 } if s != nil { if copysub(&p.From, v, s, true) { return 1 } return 0 } if copyau(&p.From, v) { return 1 } } else { fmt.Printf("copyu: bad %v\n", p) } return 0 case ppc64.ARLWMI, /* read p->from, read p->reg, rar p->to */ ppc64.ARLWMICC: if copyas(&p.To, v) { return 2 } fallthrough /* fall through */ case ppc64.AADD, /* read p->from, read p->reg, write p->to */ ppc64.AADDC, ppc64.AADDE, ppc64.ASUB, ppc64.ASLW, ppc64.ASRW, ppc64.ASRAW, ppc64.ASLD, ppc64.ASRD, ppc64.ASRAD, ppc64.AOR, ppc64.AORCC, ppc64.AORN, ppc64.AORNCC, ppc64.AAND, ppc64.AANDCC, ppc64.AANDN, ppc64.AANDNCC, ppc64.ANAND, ppc64.ANANDCC, ppc64.ANOR, ppc64.ANORCC, ppc64.AXOR, ppc64.AMULHW, ppc64.AMULHWU, ppc64.AMULLW, ppc64.AMULLD, ppc64.ADIVW, ppc64.ADIVD, ppc64.ADIVWU, ppc64.ADIVDU, ppc64.AREM, ppc64.AREMU, ppc64.AREMD, ppc64.AREMDU, ppc64.ARLWNM, ppc64.ARLWNMCC, ppc64.AFADDS, ppc64.AFADD, ppc64.AFSUBS, ppc64.AFSUB, ppc64.AFMULS, ppc64.AFMUL, ppc64.AFDIVS, ppc64.AFDIV: if s != nil { if copysub(&p.From, v, s, true) { return 1 } if copysub1(p, v, s, true) { return 1 } // Update only indirect uses of v in p->to if !copyas(&p.To, v) { if copysub(&p.To, v, s, true) { return 1 } } return 0 } if copyas(&p.To, v) { if p.Reg == 0 { // Fix up implicit reg (e.g., ADD // R3,R4 -> ADD R3,R4,R4) so we can // update reg and to separately. p.Reg = p.To.Reg } if copyau(&p.From, v) { return 4 } if copyau1(p, v) { return 4 } return 3 } if copyau(&p.From, v) { return 1 } if copyau1(p, v) { return 1 } if copyau(&p.To, v) { return 1 } return 0 case ppc64.ABEQ, ppc64.ABGT, ppc64.ABGE, ppc64.ABLT, ppc64.ABLE, ppc64.ABNE, ppc64.ABVC, ppc64.ABVS: return 0 case obj.ACHECKNIL, /* read p->from */ ppc64.ACMP, /* read p->from, read p->to */ ppc64.ACMPU, ppc64.ACMPW, ppc64.ACMPWU, ppc64.AFCMPO, ppc64.AFCMPU: if s != nil { if copysub(&p.From, v, s, true) { return 1 } if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyau(&p.From, v) { return 1 } if copyau(&p.To, v) { return 1 } return 0 // 9g never generates a branch to a GPR (this isn't // even a normal instruction; liblink turns it in to a // mov and a branch). case ppc64.ABR: /* read p->to */ if s != nil { if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyau(&p.To, v) { return 1 } return 0 case obj.ARET: /* funny */ if s != nil { return 0 } // All registers die at this point, so claim // everything is set (and not used). return 3 case ppc64.ABL: /* funny */ if v.Type == obj.TYPE_REG { // TODO(rsc): REG_R0 and REG_F0 used to be // (when register numbers started at 0) exregoffset and exfregoffset, // which are unset entirely. // It's strange that this handles R0 and F0 differently from the other // registers. Possible failure to optimize? if ppc64.REG_R0 < v.Reg && v.Reg <= ppc64.REGEXT { return 2 } if v.Reg == ppc64.REGARG { return 2 } if ppc64.REG_F0 < v.Reg && v.Reg <= ppc64.FREGEXT { return 2 } } if p.From.Type == obj.TYPE_REG && v.Type == obj.TYPE_REG && p.From.Reg == v.Reg { return 2 } if s != nil { if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyau(&p.To, v) { return 4 } return 3 // R0 is zero, used by DUFFZERO, cannot be substituted. // R3 is ptr to memory, used and set, cannot be substituted. case obj.ADUFFZERO: if v.Type == obj.TYPE_REG { if v.Reg == 0 { return 1 } if v.Reg == 3 { return 2 } } return 0 // R3, R4 are ptr to src, dst, used and set, cannot be substituted. // R5 is scratch, set by DUFFCOPY, cannot be substituted. case obj.ADUFFCOPY: if v.Type == obj.TYPE_REG { if v.Reg == 3 || v.Reg == 4 { return 2 } if v.Reg == 5 { return 3 } } return 0 case obj.ATEXT: /* funny */ if v.Type == obj.TYPE_REG { if v.Reg == ppc64.REGARG { return 3 } } return 0 case obj.APCDATA, obj.AFUNCDATA, obj.AVARDEF, obj.AVARKILL, obj.AVARLIVE, obj.AUSEFIELD: return 0 } }
func peep(firstp *obj.Prog) { g := gc.Flowstart(firstp, nil) if g == nil { return } gactive = 0 // byte, word arithmetic elimination. elimshortmov(g) // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim var p *obj.Prog for r := g.Start; r != nil; r = r.Link { p = r.Prog switch p.As { case x86.ALEAL: if regtyp(&p.To) { if p.From.Sym != nil { if p.From.Index == x86.REG_NONE { conprop(r) } } } case x86.AMOVB, x86.AMOVW, x86.AMOVL, x86.AMOVSS, x86.AMOVSD: if regtyp(&p.To) { if p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_FCONST { conprop(r) } } } } var r1 *gc.Flow var p1 *obj.Prog var r *gc.Flow var t int loop1: if gc.Debug['P'] != 0 && gc.Debug['v'] != 0 { gc.Dumpit("loop1", g.Start, 0) } t = 0 for r = g.Start; r != nil; r = r.Link { p = r.Prog switch p.As { case x86.AMOVL, x86.AMOVSS, x86.AMOVSD: if regtyp(&p.To) { if regtyp(&p.From) { if copyprop(g, r) { excise(r) t++ } else if subprop(r) && copyprop(g, r) { excise(r) t++ } } } case x86.AMOVBLZX, x86.AMOVWLZX, x86.AMOVBLSX, x86.AMOVWLSX: if regtyp(&p.To) { r1 = rnops(gc.Uniqs(r)) if r1 != nil { p1 = r1.Prog if p.As == p1.As && p.To.Type == p1.From.Type && p.To.Reg == p1.From.Reg { p1.As = x86.AMOVL t++ } } } case x86.AADDL, x86.AADDW: if p.From.Type != obj.TYPE_CONST || needc(p.Link) { break } if p.From.Offset == -1 { if p.As == x86.AADDL { p.As = x86.ADECL } else { p.As = x86.ADECW } p.From = obj.Addr{} break } if p.From.Offset == 1 { if p.As == x86.AADDL { p.As = x86.AINCL } else { p.As = x86.AINCW } p.From = obj.Addr{} break } case x86.ASUBL, x86.ASUBW: if p.From.Type != obj.TYPE_CONST || needc(p.Link) { break } if p.From.Offset == -1 { if p.As == x86.ASUBL { p.As = x86.AINCL } else { p.As = x86.AINCW } p.From = obj.Addr{} break } if p.From.Offset == 1 { if p.As == x86.ASUBL { p.As = x86.ADECL } else { p.As = x86.ADECW } p.From = obj.Addr{} break } } } if t != 0 { goto loop1 } // MOVSD removal. // We never use packed registers, so a MOVSD between registers // can be replaced by MOVAPD, which moves the pair of float64s // instead of just the lower one. We only use the lower one, but // the processor can do better if we do moves using both. for r := g.Start; r != nil; r = r.Link { p = r.Prog if p.As == x86.AMOVSD { if regtyp(&p.From) { if regtyp(&p.To) { p.As = x86.AMOVAPD } } } } gc.Flowend(g) }
func preprocess(ctxt *obj.Link, cursym *obj.LSym) { // TODO(minux): add morestack short-cuts with small fixed frame-size. ctxt.Cursym = cursym if cursym.Text == nil || cursym.Text.Link == nil { return } p := cursym.Text textstksiz := p.To.Offset if textstksiz == -8 { // Compatibility hack. p.From3.Offset |= obj.NOFRAME textstksiz = 0 } if textstksiz%8 != 0 { ctxt.Diag("frame size %d not a multiple of 8", textstksiz) } if p.From3.Offset&obj.NOFRAME != 0 { if textstksiz != 0 { ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", textstksiz) } } cursym.Args = p.To.Val.(int32) cursym.Locals = int32(textstksiz) /* * find leaf subroutines * strip NOPs * expand RET * expand BECOME pseudo */ if ctxt.Debugvlog != 0 { fmt.Fprintf(ctxt.Bso, "%5.2f noops\n", obj.Cputime()) } ctxt.Bso.Flush() var q *obj.Prog var q1 *obj.Prog for p := cursym.Text; p != nil; p = p.Link { switch p.As { /* too hard, just leave alone */ case obj.ATEXT: q = p p.Mark |= LABEL | LEAF | SYNC if p.Link != nil { p.Link.Mark |= LABEL } case ANOR: q = p if p.To.Type == obj.TYPE_REG { if p.To.Reg == REGZERO { p.Mark |= LABEL | SYNC } } case ASYNC, AWORD: q = p p.Mark |= LABEL | SYNC continue case AMOVW, AMOVWZ, AMOVD: q = p if p.From.Reg >= REG_RESERVED || p.To.Reg >= REG_RESERVED { p.Mark |= LABEL | SYNC } continue case AFABS, AFADD, AFDIV, AFMADD, AFMOVD, AFMOVS, AFMSUB, AFMUL, AFNABS, AFNEG, AFNMADD, AFNMSUB, ALEDBR, ALDEBR, AFSUB: q = p p.Mark |= FLOAT continue case ABL, ABCL, obj.ADUFFZERO, obj.ADUFFCOPY: cursym.Text.Mark &^= LEAF fallthrough case ABC, ABEQ, ABGE, ABGT, ABLE, ABLT, ABNE, ABR, ABVC, ABVS, ACMPBEQ, ACMPBGE, ACMPBGT, ACMPBLE, ACMPBLT, ACMPBNE, ACMPUBEQ, ACMPUBGE, ACMPUBGT, ACMPUBLE, ACMPUBLT, ACMPUBNE: p.Mark |= BRANCH q = p q1 = p.Pcond if q1 != nil { for q1.As == obj.ANOP { q1 = q1.Link p.Pcond = q1 } if q1.Mark&LEAF == 0 { q1.Mark |= LABEL } } else { p.Mark |= LABEL } q1 = p.Link if q1 != nil { q1.Mark |= LABEL } continue case AFCMPO, AFCMPU: q = p p.Mark |= FCMP | FLOAT continue case obj.ARET: q = p if p.Link != nil { p.Link.Mark |= LABEL } continue case obj.ANOP: q1 = p.Link q.Link = q1 /* q is non-nop */ q1.Mark |= p.Mark continue default: q = p continue } } autosize := int32(0) var p1 *obj.Prog var p2 *obj.Prog var pLast *obj.Prog var pPre *obj.Prog var pPreempt *obj.Prog wasSplit := false for p := cursym.Text; p != nil; p = p.Link { pLast = p switch p.As { case obj.ATEXT: autosize = int32(textstksiz) if p.Mark&LEAF != 0 && autosize == 0 && p.From3.Offset&obj.NOFRAME == 0 { // A leaf function with no locals has no frame. p.From3.Offset |= obj.NOFRAME } if p.From3.Offset&obj.NOFRAME == 0 { // If there is a stack frame at all, it includes // space to save the LR. autosize += int32(ctxt.FixedFrameSize()) } p.To.Offset = int64(autosize) q = p if p.From3.Offset&obj.NOSPLIT == 0 { p, pPreempt = stacksplitPre(ctxt, p, autosize) // emit pre part of split check pPre = p wasSplit = true //need post part of split } if autosize != 0 { q = obj.Appendp(ctxt, p) q.As = AMOVD q.From.Type = obj.TYPE_ADDR q.From.Offset = int64(-autosize) q.From.Reg = REGSP // not actually needed - REGSP is assumed if no reg is provided q.To.Type = obj.TYPE_REG q.To.Reg = REGSP q.Spadj = autosize } else if cursym.Text.Mark&LEAF == 0 { // A very few functions that do not return to their caller // (e.g. gogo) are not identified as leaves but still have // no frame. cursym.Text.Mark |= LEAF } if cursym.Text.Mark&LEAF != 0 { cursym.Leaf = true break } q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_REG q.From.Reg = REG_LR q.To.Type = obj.TYPE_MEM q.To.Reg = REGSP q.To.Offset = 0 if cursym.Text.From3.Offset&obj.WRAPPER != 0 { // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame // // MOVD g_panic(g), R3 // CMP R0, R3 // BEQ end // MOVD panic_argp(R3), R4 // ADD $(autosize+8), R1, R5 // CMP R4, R5 // BNE end // ADD $8, R1, R6 // MOVD R6, panic_argp(R3) // end: // NOP // // The NOP is needed to give the jumps somewhere to land. // It is a liblink NOP, not a s390x NOP: it encodes to 0 instruction bytes. q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_MEM q.From.Reg = REGG q.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // G.panic q.To.Type = obj.TYPE_REG q.To.Reg = REG_R3 q = obj.Appendp(ctxt, q) q.As = ACMP q.From.Type = obj.TYPE_REG q.From.Reg = REG_R0 q.To.Type = obj.TYPE_REG q.To.Reg = REG_R3 q = obj.Appendp(ctxt, q) q.As = ABEQ q.To.Type = obj.TYPE_BRANCH p1 = q q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_MEM q.From.Reg = REG_R3 q.From.Offset = 0 // Panic.argp q.To.Type = obj.TYPE_REG q.To.Reg = REG_R4 q = obj.Appendp(ctxt, q) q.As = AADD q.From.Type = obj.TYPE_CONST q.From.Offset = int64(autosize) + ctxt.FixedFrameSize() q.Reg = REGSP q.To.Type = obj.TYPE_REG q.To.Reg = REG_R5 q = obj.Appendp(ctxt, q) q.As = ACMP q.From.Type = obj.TYPE_REG q.From.Reg = REG_R4 q.To.Type = obj.TYPE_REG q.To.Reg = REG_R5 q = obj.Appendp(ctxt, q) q.As = ABNE q.To.Type = obj.TYPE_BRANCH p2 = q q = obj.Appendp(ctxt, q) q.As = AADD q.From.Type = obj.TYPE_CONST q.From.Offset = ctxt.FixedFrameSize() q.Reg = REGSP q.To.Type = obj.TYPE_REG q.To.Reg = REG_R6 q = obj.Appendp(ctxt, q) q.As = AMOVD q.From.Type = obj.TYPE_REG q.From.Reg = REG_R6 q.To.Type = obj.TYPE_MEM q.To.Reg = REG_R3 q.To.Offset = 0 // Panic.argp q = obj.Appendp(ctxt, q) q.As = obj.ANOP p1.Pcond = q p2.Pcond = q } case obj.ARET: if p.From.Type == obj.TYPE_CONST { ctxt.Diag("using BECOME (%v) is not supported!", p) break } retTarget := p.To.Sym if cursym.Text.Mark&LEAF != 0 { if autosize == 0 { p.As = ABR p.From = obj.Addr{} if retTarget == nil { p.To.Type = obj.TYPE_REG p.To.Reg = REG_LR } else { p.To.Type = obj.TYPE_BRANCH p.To.Sym = retTarget } p.Mark |= BRANCH break } p.As = AADD p.From.Type = obj.TYPE_CONST p.From.Offset = int64(autosize) p.To.Type = obj.TYPE_REG p.To.Reg = REGSP p.Spadj = -autosize q = obj.Appendp(ctxt, p) q.As = ABR q.From = obj.Addr{} q.To.Type = obj.TYPE_REG q.To.Reg = REG_LR q.Mark |= BRANCH q.Spadj = autosize break } p.As = AMOVD p.From.Type = obj.TYPE_MEM p.From.Reg = REGSP p.From.Offset = 0 p.To.Type = obj.TYPE_REG p.To.Reg = REG_LR q = p if autosize != 0 { q = obj.Appendp(ctxt, q) q.As = AADD q.From.Type = obj.TYPE_CONST q.From.Offset = int64(autosize) q.To.Type = obj.TYPE_REG q.To.Reg = REGSP q.Spadj = -autosize } q = obj.Appendp(ctxt, q) q.As = ABR q.From = obj.Addr{} if retTarget == nil { q.To.Type = obj.TYPE_REG q.To.Reg = REG_LR } else { q.To.Type = obj.TYPE_BRANCH q.To.Sym = retTarget } q.Mark |= BRANCH q.Spadj = autosize case AADD: if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.From.Type == obj.TYPE_CONST { p.Spadj = int32(-p.From.Offset) } } } if wasSplit { pLast = stacksplitPost(ctxt, pLast, pPre, pPreempt) // emit post part of split check } }
// If s==nil, copyu returns the set/use of v in p; otherwise, it // modifies p to replace reads of v with reads of s and returns 0 for // success or non-zero for failure. // // If s==nil, copy returns one of the following values: // 1 if v only used // 2 if v is set and used in one address (read-alter-rewrite; // can't substitute) // 3 if v is only set // 4 if v is set in one address and used in another (so addresses // can be rewritten independently) // 0 otherwise (not touched) func copyu(p *obj.Prog, v *obj.Addr, s *obj.Addr) int { if p.From3Type() != obj.TYPE_NONE { // 7g never generates a from3 fmt.Printf("copyu: from3 (%v) not implemented\n", gc.Ctxt.Dconv(p.From3)) } if p.RegTo2 != obj.REG_NONE { // 7g never generates a to2 fmt.Printf("copyu: RegTo2 (%v) not implemented\n", obj.Rconv(int(p.RegTo2))) } switch p.As { default: fmt.Printf("copyu: can't find %v\n", obj.Aconv(p.As)) return 2 case obj.ANOP, /* read p->from, write p->to */ arm64.ANEG, arm64.AFNEGD, arm64.AFNEGS, arm64.AFSQRTD, arm64.AFCVTZSD, arm64.AFCVTZSS, arm64.AFCVTZSDW, arm64.AFCVTZSSW, arm64.AFCVTZUD, arm64.AFCVTZUS, arm64.AFCVTZUDW, arm64.AFCVTZUSW, arm64.AFCVTSD, arm64.AFCVTDS, arm64.ASCVTFD, arm64.ASCVTFS, arm64.ASCVTFWD, arm64.ASCVTFWS, arm64.AUCVTFD, arm64.AUCVTFS, arm64.AUCVTFWD, arm64.AUCVTFWS, arm64.AMOVB, arm64.AMOVBU, arm64.AMOVH, arm64.AMOVHU, arm64.AMOVW, arm64.AMOVWU, arm64.AMOVD, arm64.AFMOVS, arm64.AFMOVD: if p.Scond == 0 { if s != nil { if copysub(&p.From, v, s, true) { return 1 } // Update only indirect uses of v in p->to if !copyas(&p.To, v) { if copysub(&p.To, v, s, true) { return 1 } } return 0 } if copyas(&p.To, v) { // Fix up implicit from if p.From.Type == obj.TYPE_NONE { p.From = p.To } if copyau(&p.From, v) { return 4 } return 3 } if copyau(&p.From, v) { return 1 } if copyau(&p.To, v) { // p->to only indirectly uses v return 1 } return 0 } /* rar p->from, write p->to or read p->from, rar p->to */ if p.From.Type == obj.TYPE_MEM { if copyas(&p.From, v) { // No s!=nil check; need to fail // anyway in that case return 2 } if s != nil { if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyas(&p.To, v) { return 3 } } else if p.To.Type == obj.TYPE_MEM { if copyas(&p.To, v) { return 2 } if s != nil { if copysub(&p.From, v, s, true) { return 1 } return 0 } if copyau(&p.From, v) { return 1 } } else { fmt.Printf("copyu: bad %v\n", p) } return 0 case arm64.AADD, /* read p->from, read p->reg, write p->to */ arm64.AADDS, arm64.ASUB, arm64.AADC, arm64.AAND, arm64.AORR, arm64.AEOR, arm64.AROR, arm64.AMUL, arm64.ASMULL, arm64.AUMULL, arm64.ASMULH, arm64.AUMULH, arm64.ASDIV, arm64.AUDIV, arm64.ALSL, arm64.ALSR, arm64.AASR, arm64.AFADDD, arm64.AFADDS, arm64.AFSUBD, arm64.AFSUBS, arm64.AFMULD, arm64.AFMULS, arm64.AFDIVD, arm64.AFDIVS: if s != nil { if copysub(&p.From, v, s, true) { return 1 } if copysub1(p, v, s, true) { return 1 } // Update only indirect uses of v in p->to if !copyas(&p.To, v) { if copysub(&p.To, v, s, true) { return 1 } } return 0 } if copyas(&p.To, v) { if p.Reg == 0 { // Fix up implicit reg (e.g., ADD // R3,R4 -> ADD R3,R4,R4) so we can // update reg and to separately. p.Reg = p.To.Reg } if copyau(&p.From, v) { return 4 } if copyau1(p, v) { return 4 } return 3 } if copyau(&p.From, v) { return 1 } if copyau1(p, v) { return 1 } if copyau(&p.To, v) { return 1 } return 0 case arm64.ABEQ, arm64.ABNE, arm64.ABGE, arm64.ABLT, arm64.ABGT, arm64.ABLE, arm64.ABLO, arm64.ABLS, arm64.ABHI, arm64.ABHS: return 0 case obj.ACHECKNIL, /* read p->from */ arm64.ACMP, /* read p->from, read p->reg */ arm64.AFCMPD, arm64.AFCMPS: if s != nil { if copysub(&p.From, v, s, true) { return 1 } if copysub1(p, v, s, true) { return 1 } return 0 } if copyau(&p.From, v) { return 1 } if copyau1(p, v) { return 1 } return 0 case arm64.AB: /* read p->to */ if s != nil { if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyau(&p.To, v) { return 1 } return 0 case obj.ARET: /* funny */ if s != nil { return 0 } // All registers die at this point, so claim // everything is set (and not used). return 3 case arm64.ABL: /* funny */ if p.From.Type == obj.TYPE_REG && v.Type == obj.TYPE_REG && p.From.Reg == v.Reg { return 2 } if s != nil { if copysub(&p.To, v, s, true) { return 1 } return 0 } if copyau(&p.To, v) { return 4 } return 3 // R31 is zero, used by DUFFZERO, cannot be substituted. // R16 is ptr to memory, used and set, cannot be substituted. case obj.ADUFFZERO: if v.Type == obj.TYPE_REG { if v.Reg == 31 { return 1 } if v.Reg == 16 { return 2 } } return 0 // R16, R17 are ptr to src, dst, used and set, cannot be substituted. // R27 is scratch, set by DUFFCOPY, cannot be substituted. case obj.ADUFFCOPY: if v.Type == obj.TYPE_REG { if v.Reg == 16 || v.Reg == 17 { return 2 } if v.Reg == 27 { return 3 } } return 0 case arm64.AHINT, obj.ATEXT, obj.APCDATA, obj.AFUNCDATA, obj.AVARDEF, obj.AVARKILL, obj.AVARLIVE, obj.AUSEFIELD: return 0 } }
func peep(firstp *obj.Prog) { g := gc.Flowstart(firstp, nil) if g == nil { return } gactive = 0 var p *obj.Prog var r *gc.Flow var t int loop1: if gc.Debug['P'] != 0 && gc.Debug['v'] != 0 { gc.Dumpit("loop1", g.Start, 0) } t = 0 for r = g.Start; r != nil; r = r.Link { p = r.Prog // TODO(minux) Handle smaller moves. arm and amd64 // distinguish between moves that *must* sign/zero // extend and moves that don't care so they // can eliminate moves that don't care without // breaking moves that do care. This might let us // simplify or remove the next peep loop, too. if p.As == arm64.AMOVD || p.As == arm64.AFMOVD { if regtyp(&p.To) { // Try to eliminate reg->reg moves if regtyp(&p.From) { if p.From.Type == p.To.Type { if copyprop(r) { excise(r) t++ } else if subprop(r) && copyprop(r) { excise(r) t++ } } } } } } if t != 0 { goto loop1 } /* * look for MOVB x,R; MOVB R,R (for small MOVs not handled above) */ var p1 *obj.Prog var r1 *gc.Flow for r := g.Start; r != nil; r = r.Link { p = r.Prog switch p.As { default: continue case arm64.AMOVH, arm64.AMOVHU, arm64.AMOVB, arm64.AMOVBU, arm64.AMOVW, arm64.AMOVWU: if p.To.Type != obj.TYPE_REG { continue } } r1 = r.Link if r1 == nil { continue } p1 = r1.Prog if p1.As != p.As { continue } if p1.From.Type != obj.TYPE_REG || p1.From.Reg != p.To.Reg { continue } if p1.To.Type != obj.TYPE_REG || p1.To.Reg != p.To.Reg { continue } excise(r1) } if gc.Debug['D'] > 1 { goto ret /* allow following code improvement to be suppressed */ } // MOVD $c, R'; ADD R', R (R' unused) -> ADD $c, R for r := g.Start; r != nil; r = r.Link { p = r.Prog switch p.As { default: continue case arm64.AMOVD: if p.To.Type != obj.TYPE_REG { continue } if p.From.Type != obj.TYPE_CONST { continue } if p.From.Offset < 0 || 4096 <= p.From.Offset { continue } } r1 = r.Link if r1 == nil { continue } p1 = r1.Prog if p1.As != arm64.AADD && p1.As != arm64.ASUB { // TODO(aram): also logical after we have bimm. continue } if p1.From.Type != obj.TYPE_REG || p1.From.Reg != p.To.Reg { continue } if p1.To.Type != obj.TYPE_REG { continue } if gc.Debug['P'] != 0 { fmt.Printf("encoding $%d directly into %v in:\n%v\n%v\n", p.From.Offset, obj.Aconv(p1.As), p, p1) } p1.From.Type = obj.TYPE_CONST p1.From = p.From excise(r) } /* TODO(minux): * look for OP x,y,R; CMP R, $0 -> OP.S x,y,R * when OP can set condition codes correctly */ ret: gc.Flowend(g) }
// If s==nil, copyu returns the set/use of v in p; otherwise, it // modifies p to replace reads of v with reads of s and returns 0 for // success or non-zero for failure. // // If s==nil, copy returns one of the following values: // _Read if v only used // _ReadWriteSame if v is set and used in one address (read-alter-rewrite; // can't substitute) // _Write if v is only set // _ReadWriteDiff if v is set in one address and used in another (so addresses // can be rewritten independently) // _None otherwise (not touched) func copyu(p *obj.Prog, v *obj.Addr, s *obj.Addr) usage { if p.From3Type() != obj.TYPE_NONE && p.From3Type() != obj.TYPE_CONST { // Currently we never generate a From3 with anything other than a constant in it. fmt.Printf("copyu: From3 (%v) not implemented\n", gc.Ctxt.Dconv(p.From3)) } switch p.As { default: fmt.Printf("copyu: can't find %v\n", obj.Aconv(p.As)) return _ReadWriteSame case // read p.From, write p.To s390x.AMOVH, s390x.AMOVHZ, s390x.AMOVB, s390x.AMOVBZ, s390x.AMOVW, s390x.AMOVWZ, s390x.AMOVD, s390x.ANEG, s390x.AADDME, s390x.AADDZE, s390x.ASUBME, s390x.ASUBZE, s390x.AFMOVS, s390x.AFMOVD, s390x.ALEDBR, s390x.AFNEG, s390x.ALDEBR, s390x.ACLFEBR, s390x.ACLGEBR, s390x.ACLFDBR, s390x.ACLGDBR, s390x.ACFEBRA, s390x.ACGEBRA, s390x.ACFDBRA, s390x.ACGDBRA, s390x.ACELFBR, s390x.ACELGBR, s390x.ACDLFBR, s390x.ACDLGBR, s390x.ACEFBRA, s390x.ACEGBRA, s390x.ACDFBRA, s390x.ACDGBRA, s390x.AFSQRT: if s != nil { copysub(&p.From, v, s) // Update only indirect uses of v in p.To if !copyas(&p.To, v) { copysub(&p.To, v, s) } return _None } if copyas(&p.To, v) { // Fix up implicit from if p.From.Type == obj.TYPE_NONE { p.From = p.To } if copyau(&p.From, v) { return _ReadWriteDiff } return _Write } if copyau(&p.From, v) { return _Read } if copyau(&p.To, v) { // p.To only indirectly uses v return _Read } return _None // read p.From, read p.Reg, write p.To case s390x.AADD, s390x.AADDC, s390x.AADDE, s390x.ASUB, s390x.ASLW, s390x.ASRW, s390x.ASRAW, s390x.ASLD, s390x.ASRD, s390x.ASRAD, s390x.ARLL, s390x.ARLLG, s390x.AOR, s390x.AORN, s390x.AAND, s390x.AANDN, s390x.ANAND, s390x.ANOR, s390x.AXOR, s390x.AMULLW, s390x.AMULLD, s390x.AMULHD, s390x.AMULHDU, s390x.ADIVW, s390x.ADIVD, s390x.ADIVWU, s390x.ADIVDU, s390x.AFADDS, s390x.AFADD, s390x.AFSUBS, s390x.AFSUB, s390x.AFMULS, s390x.AFMUL, s390x.AFDIVS, s390x.AFDIV: if s != nil { copysub(&p.From, v, s) copysub1(p, v, s) // Update only indirect uses of v in p.To if !copyas(&p.To, v) { copysub(&p.To, v, s) } } if copyas(&p.To, v) { if p.Reg == 0 { p.Reg = p.To.Reg } if copyau(&p.From, v) || copyau1(p, v) { return _ReadWriteDiff } return _Write } if copyau(&p.From, v) { return _Read } if copyau1(p, v) { return _Read } if copyau(&p.To, v) { return _Read } return _None case s390x.ABEQ, s390x.ABGT, s390x.ABGE, s390x.ABLT, s390x.ABLE, s390x.ABNE, s390x.ABVC, s390x.ABVS: return _None case obj.ACHECKNIL, // read p.From s390x.ACMP, // read p.From, read p.To s390x.ACMPU, s390x.ACMPW, s390x.ACMPWU, s390x.AFCMPO, s390x.AFCMPU, s390x.ACEBR, s390x.AMVC, s390x.ACLC, s390x.AXC, s390x.AOC, s390x.ANC: if s != nil { copysub(&p.From, v, s) copysub(&p.To, v, s) return _None } if copyau(&p.From, v) { return _Read } if copyau(&p.To, v) { return _Read } return _None case s390x.ACMPBNE, s390x.ACMPBEQ, s390x.ACMPBLT, s390x.ACMPBLE, s390x.ACMPBGT, s390x.ACMPBGE, s390x.ACMPUBNE, s390x.ACMPUBEQ, s390x.ACMPUBLT, s390x.ACMPUBLE, s390x.ACMPUBGT, s390x.ACMPUBGE: if s != nil { copysub(&p.From, v, s) copysub1(p, v, s) return _None } if copyau(&p.From, v) { return _Read } if copyau1(p, v) { return _Read } return _None case s390x.ACLEAR: if s != nil { copysub(&p.To, v, s) return _None } if copyau(&p.To, v) { return _Read } return _None // go never generates a branch to a GPR // read p.To case s390x.ABR: if s != nil { copysub(&p.To, v, s) return _None } if copyau(&p.To, v) { return _Read } return _None case obj.ARET, obj.AUNDEF: if s != nil { return _None } // All registers die at this point, so claim // everything is set (and not used). return _Write case s390x.ABL: if v.Type == obj.TYPE_REG { if s390x.REGARG != -1 && v.Reg == s390x.REGARG { return _ReadWriteSame } if p.From.Type == obj.TYPE_REG && p.From.Reg == v.Reg { return _ReadWriteSame } if v.Reg == s390x.REGZERO { // Deliberately inserted nops set R0. return _ReadWriteSame } if v.Reg == s390x.REGCTXT { // Context register for closures. // TODO(mundaym): not sure if we need to exclude this. return _ReadWriteSame } } if s != nil { copysub(&p.To, v, s) return _None } if copyau(&p.To, v) { return _ReadWriteDiff } return _Write case obj.ATEXT: if v.Type == obj.TYPE_REG { if v.Reg == s390x.REGARG { return _Write } } return _None case obj.APCDATA, obj.AFUNCDATA, obj.AVARDEF, obj.AVARKILL, obj.AVARLIVE, obj.AUSEFIELD, obj.ANOP: return _None } }
func progedit(ctxt *obj.Link, p *obj.Prog) { // Maintain information about code generation mode. if ctxt.Mode == 0 { ctxt.Mode = ctxt.Arch.RegSize * 8 } p.Mode = int8(ctxt.Mode) switch p.As { case AMODE: if p.From.Type == obj.TYPE_CONST || (p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_NONE) { switch int(p.From.Offset) { case 16, 32, 64: ctxt.Mode = int(p.From.Offset) } } obj.Nopout(p) } // Thread-local storage references use the TLS pseudo-register. // As a register, TLS refers to the thread-local storage base, and it // can only be loaded into another register: // // MOVQ TLS, AX // // An offset from the thread-local storage base is written off(reg)(TLS*1). // Semantically it is off(reg), but the (TLS*1) annotation marks this as // indexing from the loaded TLS base. This emits a relocation so that // if the linker needs to adjust the offset, it can. For example: // // MOVQ TLS, AX // MOVQ 0(AX)(TLS*1), CX // load g into CX // // On systems that support direct access to the TLS memory, this // pair of instructions can be reduced to a direct TLS memory reference: // // MOVQ 0(TLS), CX // load g into CX // // The 2-instruction and 1-instruction forms correspond to the two code // sequences for loading a TLS variable in the local exec model given in "ELF // Handling For Thread-Local Storage". // // We apply this rewrite on systems that support the 1-instruction form. // The decision is made using only the operating system and the -shared flag, // not the link mode. If some link modes on a particular operating system // require the 2-instruction form, then all builds for that operating system // will use the 2-instruction form, so that the link mode decision can be // delayed to link time. // // In this way, all supported systems use identical instructions to // access TLS, and they are rewritten appropriately first here in // liblink and then finally using relocations in the linker. // // When -shared is passed, we leave the code in the 2-instruction form but // assemble (and relocate) them in different ways to generate the initial // exec code sequence. It's a bit of a fluke that this is possible without // rewriting the instructions more comprehensively, and it only does because // we only support a single TLS variable (g). if CanUse1InsnTLS(ctxt) { // Reduce 2-instruction sequence to 1-instruction sequence. // Sequences like // MOVQ TLS, BX // ... off(BX)(TLS*1) ... // become // NOP // ... off(TLS) ... // // TODO(rsc): Remove the Hsolaris special case. It exists only to // guarantee we are producing byte-identical binaries as before this code. // But it should be unnecessary. if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 && ctxt.Headtype != obj.Hsolaris { obj.Nopout(p) } if p.From.Type == obj.TYPE_MEM && p.From.Index == REG_TLS && REG_AX <= p.From.Reg && p.From.Reg <= REG_R15 { p.From.Reg = REG_TLS p.From.Scale = 0 p.From.Index = REG_NONE } if p.To.Type == obj.TYPE_MEM && p.To.Index == REG_TLS && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { p.To.Reg = REG_TLS p.To.Scale = 0 p.To.Index = REG_NONE } } else { // load_g_cx, below, always inserts the 1-instruction sequence. Rewrite it // as the 2-instruction sequence if necessary. // MOVQ 0(TLS), BX // becomes // MOVQ TLS, BX // MOVQ 0(BX)(TLS*1), BX if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { q := obj.Appendp(ctxt, p) q.As = p.As q.From = p.From q.From.Type = obj.TYPE_MEM q.From.Reg = p.To.Reg q.From.Index = REG_TLS q.From.Scale = 2 // TODO: use 1 q.To = p.To p.From.Type = obj.TYPE_REG p.From.Reg = REG_TLS p.From.Index = REG_NONE p.From.Offset = 0 } } // TODO: Remove. if ctxt.Headtype == obj.Hwindows && p.Mode == 64 || ctxt.Headtype == obj.Hplan9 { if p.From.Scale == 1 && p.From.Index == REG_TLS { p.From.Scale = 2 } if p.To.Scale == 1 && p.To.Index == REG_TLS { p.To.Scale = 2 } } // Rewrite 0 to $0 in 3rd argument to CMPPS etc. // That's what the tables expect. switch p.As { case ACMPPD, ACMPPS, ACMPSD, ACMPSS: if p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_NONE && p.To.Reg == REG_NONE && p.To.Index == REG_NONE && p.To.Sym == nil { p.To.Type = obj.TYPE_CONST } } // Rewrite CALL/JMP/RET to symbol as TYPE_BRANCH. switch p.As { case obj.ACALL, obj.AJMP, obj.ARET: if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil { p.To.Type = obj.TYPE_BRANCH } } // Rewrite MOVL/MOVQ $XXX(FP/SP) as LEAL/LEAQ. if p.From.Type == obj.TYPE_ADDR && (ctxt.Arch.Family == sys.AMD64 || p.From.Name != obj.NAME_EXTERN && p.From.Name != obj.NAME_STATIC) { switch p.As { case AMOVL: p.As = ALEAL p.From.Type = obj.TYPE_MEM case AMOVQ: p.As = ALEAQ p.From.Type = obj.TYPE_MEM } } if ctxt.Headtype == obj.Hnacl && p.Mode == 64 { if p.From3 != nil { nacladdr(ctxt, p, p.From3) } nacladdr(ctxt, p, &p.From) nacladdr(ctxt, p, &p.To) } // Rewrite float constants to values stored in memory. switch p.As { // Convert AMOVSS $(0), Xx to AXORPS Xx, Xx case AMOVSS: if p.From.Type == obj.TYPE_FCONST { // f == 0 can't be used here due to -0, so use Float64bits if f := p.From.Val.(float64); math.Float64bits(f) == 0 { if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 { p.As = AXORPS p.From = p.To break } } } fallthrough case AFMOVF, AFADDF, AFSUBF, AFSUBRF, AFMULF, AFDIVF, AFDIVRF, AFCOMF, AFCOMFP, AADDSS, ASUBSS, AMULSS, ADIVSS, ACOMISS, AUCOMISS: if p.From.Type == obj.TYPE_FCONST { f32 := float32(p.From.Val.(float64)) i32 := math.Float32bits(f32) literal := fmt.Sprintf("$f32.%08x", i32) s := obj.Linklookup(ctxt, literal, 0) p.From.Type = obj.TYPE_MEM p.From.Name = obj.NAME_EXTERN p.From.Sym = s p.From.Sym.Local = true p.From.Offset = 0 } case AMOVSD: // Convert AMOVSD $(0), Xx to AXORPS Xx, Xx if p.From.Type == obj.TYPE_FCONST { // f == 0 can't be used here due to -0, so use Float64bits if f := p.From.Val.(float64); math.Float64bits(f) == 0 { if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 { p.As = AXORPS p.From = p.To break } } } fallthrough case AFMOVD, AFADDD, AFSUBD, AFSUBRD, AFMULD, AFDIVD, AFDIVRD, AFCOMD, AFCOMDP, AADDSD, ASUBSD, AMULSD, ADIVSD, ACOMISD, AUCOMISD: if p.From.Type == obj.TYPE_FCONST { i64 := math.Float64bits(p.From.Val.(float64)) literal := fmt.Sprintf("$f64.%016x", i64) s := obj.Linklookup(ctxt, literal, 0) p.From.Type = obj.TYPE_MEM p.From.Name = obj.NAME_EXTERN p.From.Sym = s p.From.Sym.Local = true p.From.Offset = 0 } } if ctxt.Flag_dynlink { rewriteToUseGot(ctxt, p) } if ctxt.Flag_shared && p.Mode == 32 { rewriteToPcrel(ctxt, p) } }
func peep(firstp *obj.Prog) { g := gc.Flowstart(firstp, nil) if g == nil { return } gactive = 0 // byte, word arithmetic elimination. elimshortmov(g) // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim var p *obj.Prog for r := g.Start; r != nil; r = r.Link { p = r.Prog switch p.As { case x86.ALEAL, x86.ALEAQ: if regtyp(&p.To) { if p.From.Sym != nil { if p.From.Index == x86.REG_NONE { conprop(r) } } } case x86.AMOVB, x86.AMOVW, x86.AMOVL, x86.AMOVQ, x86.AMOVSS, x86.AMOVSD: if regtyp(&p.To) { if p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_FCONST { conprop(r) } } } } var r *gc.Flow var r1 *gc.Flow var p1 *obj.Prog var t int loop1: if gc.Debug['P'] != 0 && gc.Debug['v'] != 0 { gc.Dumpit("loop1", g.Start, 0) } t = 0 for r = g.Start; r != nil; r = r.Link { p = r.Prog switch p.As { case x86.AMOVL, x86.AMOVQ, x86.AMOVSS, x86.AMOVSD: if regtyp(&p.To) { if regtyp(&p.From) { if copyprop(g, r) { excise(r) t++ } else if subprop(r) && copyprop(g, r) { excise(r) t++ } } } case x86.AMOVBLZX, x86.AMOVWLZX, x86.AMOVBLSX, x86.AMOVWLSX: if regtyp(&p.To) { r1 = rnops(gc.Uniqs(r)) if r1 != nil { p1 = r1.Prog if p.As == p1.As && p.To.Type == p1.From.Type && p.To.Reg == p1.From.Reg { p1.As = x86.AMOVL t++ } } } case x86.AMOVBQSX, x86.AMOVBQZX, x86.AMOVWQSX, x86.AMOVWQZX, x86.AMOVLQSX, x86.AMOVLQZX, x86.AMOVQL: if regtyp(&p.To) { r1 = rnops(gc.Uniqs(r)) if r1 != nil { p1 = r1.Prog if p.As == p1.As && p.To.Type == p1.From.Type && p.To.Reg == p1.From.Reg { p1.As = x86.AMOVQ t++ } } } case x86.AADDL, x86.AADDQ, x86.AADDW: if p.From.Type != obj.TYPE_CONST || needc(p.Link) { break } if p.From.Offset == -1 { if p.As == x86.AADDQ { p.As = x86.ADECQ } else if p.As == x86.AADDL { p.As = x86.ADECL } else { p.As = x86.ADECW } p.From = obj.Addr{} break } if p.From.Offset == 1 { if p.As == x86.AADDQ { p.As = x86.AINCQ } else if p.As == x86.AADDL { p.As = x86.AINCL } else { p.As = x86.AINCW } p.From = obj.Addr{} break } case x86.ASUBL, x86.ASUBQ, x86.ASUBW: if p.From.Type != obj.TYPE_CONST || needc(p.Link) { break } if p.From.Offset == -1 { if p.As == x86.ASUBQ { p.As = x86.AINCQ } else if p.As == x86.ASUBL { p.As = x86.AINCL } else { p.As = x86.AINCW } p.From = obj.Addr{} break } if p.From.Offset == 1 { if p.As == x86.ASUBQ { p.As = x86.ADECQ } else if p.As == x86.ASUBL { p.As = x86.ADECL } else { p.As = x86.ADECW } p.From = obj.Addr{} break } } } if t != 0 { goto loop1 } // MOVLQZX removal. // The MOVLQZX exists to avoid being confused for a // MOVL that is just copying 32-bit data around during // copyprop. Now that copyprop is done, remov MOVLQZX R1, R2 // if it is dominated by an earlier ADDL/MOVL/etc into R1 that // will have already cleared the high bits. // // MOVSD removal. // We never use packed registers, so a MOVSD between registers // can be replaced by MOVAPD, which moves the pair of float64s // instead of just the lower one. We only use the lower one, but // the processor can do better if we do moves using both. for r := g.Start; r != nil; r = r.Link { p = r.Prog if p.As == x86.AMOVLQZX { if regtyp(&p.From) { if p.From.Type == p.To.Type && p.From.Reg == p.To.Reg { if prevl(r, p.From.Reg) { excise(r) } } } } if p.As == x86.AMOVSD { if regtyp(&p.From) { if regtyp(&p.To) { p.As = x86.AMOVAPD } } } } // load pipelining // push any load from memory as early as possible // to give it time to complete before use. for r := g.Start; r != nil; r = r.Link { p = r.Prog switch p.As { case x86.AMOVB, x86.AMOVW, x86.AMOVL, x86.AMOVQ, x86.AMOVLQZX: if regtyp(&p.To) && !regconsttyp(&p.From) { pushback(r) } } } gc.Flowend(g) }