func progedit(ctxt *obj.Link, p *obj.Prog) { // Maintain information about code generation mode. if ctxt.Mode == 0 { ctxt.Mode = ctxt.Arch.Regsize * 8 } p.Mode = int8(ctxt.Mode) switch p.As { case AMODE: if p.From.Type == obj.TYPE_CONST || (p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_NONE) { switch int(p.From.Offset) { case 16, 32, 64: ctxt.Mode = int(p.From.Offset) } } obj.Nopout(p) } // Thread-local storage references use the TLS pseudo-register. // As a register, TLS refers to the thread-local storage base, and it // can only be loaded into another register: // // MOVQ TLS, AX // // An offset from the thread-local storage base is written off(reg)(TLS*1). // Semantically it is off(reg), but the (TLS*1) annotation marks this as // indexing from the loaded TLS base. This emits a relocation so that // if the linker needs to adjust the offset, it can. For example: // // MOVQ TLS, AX // MOVQ 0(AX)(TLS*1), CX // load g into CX // // On systems that support direct access to the TLS memory, this // pair of instructions can be reduced to a direct TLS memory reference: // // MOVQ 0(TLS), CX // load g into CX // // The 2-instruction and 1-instruction forms correspond to the two code // sequences for loading a TLS variable in the local exec model given in "ELF // Handling For Thread-Local Storage". // // We apply this rewrite on systems that support the 1-instruction form. // The decision is made using only the operating system and the -shared flag, // not the link mode. If some link modes on a particular operating system // require the 2-instruction form, then all builds for that operating system // will use the 2-instruction form, so that the link mode decision can be // delayed to link time. // // In this way, all supported systems use identical instructions to // access TLS, and they are rewritten appropriately first here in // liblink and then finally using relocations in the linker. // // When -shared is passed, we leave the code in the 2-instruction form but // assemble (and relocate) them in different ways to generate the initial // exec code sequence. It's a bit of a fluke that this is possible without // rewriting the instructions more comprehensively, and it only does because // we only support a single TLS variable (g). if canuse1insntls(ctxt) { // Reduce 2-instruction sequence to 1-instruction sequence. // Sequences like // MOVQ TLS, BX // ... off(BX)(TLS*1) ... // become // NOP // ... off(TLS) ... // // TODO(rsc): Remove the Hsolaris special case. It exists only to // guarantee we are producing byte-identical binaries as before this code. // But it should be unnecessary. if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 && ctxt.Headtype != obj.Hsolaris { obj.Nopout(p) } if p.From.Type == obj.TYPE_MEM && p.From.Index == REG_TLS && REG_AX <= p.From.Reg && p.From.Reg <= REG_R15 { p.From.Reg = REG_TLS p.From.Scale = 0 p.From.Index = REG_NONE } if p.To.Type == obj.TYPE_MEM && p.To.Index == REG_TLS && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { p.To.Reg = REG_TLS p.To.Scale = 0 p.To.Index = REG_NONE } } else { // load_g_cx, below, always inserts the 1-instruction sequence. Rewrite it // as the 2-instruction sequence if necessary. // MOVQ 0(TLS), BX // becomes // MOVQ TLS, BX // MOVQ 0(BX)(TLS*1), BX if (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { q := obj.Appendp(ctxt, p) q.As = p.As q.From = p.From q.From.Type = obj.TYPE_MEM q.From.Reg = p.To.Reg q.From.Index = REG_TLS q.From.Scale = 2 // TODO: use 1 q.To = p.To p.From.Type = obj.TYPE_REG p.From.Reg = REG_TLS p.From.Index = REG_NONE p.From.Offset = 0 } } // TODO: Remove. if ctxt.Headtype == obj.Hwindows && p.Mode == 64 || ctxt.Headtype == obj.Hplan9 { if p.From.Scale == 1 && p.From.Index == REG_TLS { p.From.Scale = 2 } if p.To.Scale == 1 && p.To.Index == REG_TLS { p.To.Scale = 2 } } // Rewrite 0 to $0 in 3rd argment to CMPPS etc. // That's what the tables expect. switch p.As { case ACMPPD, ACMPPS, ACMPSD, ACMPSS: if p.To.Type == obj.TYPE_MEM && p.To.Name == obj.NAME_NONE && p.To.Reg == REG_NONE && p.To.Index == REG_NONE && p.To.Sym == nil { p.To.Type = obj.TYPE_CONST } } // Rewrite CALL/JMP/RET to symbol as TYPE_BRANCH. switch p.As { case obj.ACALL, obj.AJMP, obj.ARET: if p.To.Type == obj.TYPE_MEM && (p.To.Name == obj.NAME_EXTERN || p.To.Name == obj.NAME_STATIC) && p.To.Sym != nil { p.To.Type = obj.TYPE_BRANCH } } // Rewrite MOVL/MOVQ $XXX(FP/SP) as LEAL/LEAQ. if p.From.Type == obj.TYPE_ADDR && (ctxt.Arch.Thechar == '6' || p.From.Name != obj.NAME_EXTERN && p.From.Name != obj.NAME_STATIC) { switch p.As { case AMOVL: p.As = ALEAL p.From.Type = obj.TYPE_MEM case AMOVQ: p.As = ALEAQ p.From.Type = obj.TYPE_MEM } } if ctxt.Headtype == obj.Hnacl && p.Mode == 64 { nacladdr(ctxt, p, &p.From3) nacladdr(ctxt, p, &p.From) nacladdr(ctxt, p, &p.To) } // Rewrite float constants to values stored in memory. switch p.As { // Convert AMOVSS $(0), Xx to AXORPS Xx, Xx case AMOVSS: if p.From.Type == obj.TYPE_FCONST { if p.From.Val.(float64) == 0 { if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 { p.As = AXORPS p.From = p.To break } } } fallthrough case AFMOVF, AFADDF, AFSUBF, AFSUBRF, AFMULF, AFDIVF, AFDIVRF, AFCOMF, AFCOMFP, AADDSS, ASUBSS, AMULSS, ADIVSS, ACOMISS, AUCOMISS: if p.From.Type == obj.TYPE_FCONST { f32 := float32(p.From.Val.(float64)) i32 := math.Float32bits(f32) literal := fmt.Sprintf("$f32.%08x", i32) s := obj.Linklookup(ctxt, literal, 0) if s.Type == 0 { s.Type = obj.SRODATA obj.Adduint32(ctxt, s, i32) } p.From.Type = obj.TYPE_MEM p.From.Name = obj.NAME_EXTERN p.From.Sym = s p.From.Sym.Local = true p.From.Offset = 0 } case AMOVSD: // Convert AMOVSD $(0), Xx to AXORPS Xx, Xx if p.From.Type == obj.TYPE_FCONST { if p.From.Val.(float64) == 0 { if p.To.Type == obj.TYPE_REG && REG_X0 <= p.To.Reg && p.To.Reg <= REG_X15 { p.As = AXORPS p.From = p.To break } } } fallthrough case AFMOVD, AFADDD, AFSUBD, AFSUBRD, AFMULD, AFDIVD, AFDIVRD, AFCOMD, AFCOMDP, AADDSD, ASUBSD, AMULSD, ADIVSD, ACOMISD, AUCOMISD: if p.From.Type == obj.TYPE_FCONST { i64 := math.Float64bits(p.From.Val.(float64)) literal := fmt.Sprintf("$f64.%016x", i64) s := obj.Linklookup(ctxt, literal, 0) if s.Type == 0 { s.Type = obj.SRODATA obj.Adduint64(ctxt, s, i64) } p.From.Type = obj.TYPE_MEM p.From.Name = obj.NAME_EXTERN p.From.Sym = s p.From.Sym.Local = true p.From.Offset = 0 } } if ctxt.Flag_dynlink && (p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO) { var sym *obj.LSym if p.As == obj.ADUFFZERO { sym = obj.Linklookup(ctxt, "runtime.duffzero", 0) } else { sym = obj.Linklookup(ctxt, "runtime.duffcopy", 0) } offset := p.To.Offset p.As = AMOVQ p.From.Type = obj.TYPE_MEM p.From.Name = obj.NAME_GOTREF p.From.Sym = sym p.To.Type = obj.TYPE_REG p.To.Reg = REG_R15 p.To.Offset = 0 p.To.Sym = nil p1 := obj.Appendp(ctxt, p) p1.As = AADDQ p1.From.Type = obj.TYPE_CONST p1.From.Offset = offset p1.To.Type = obj.TYPE_REG p1.To.Reg = REG_R15 p2 := obj.Appendp(ctxt, p1) p2.As = obj.ACALL p2.To.Type = obj.TYPE_REG p2.To.Reg = REG_R15 } if ctxt.Flag_dynlink { if p.As == ALEAQ && p.From.Type == obj.TYPE_MEM && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local { p.As = AMOVQ p.From.Type = obj.TYPE_ADDR } if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local { if p.As != AMOVQ { ctxt.Diag("do not know how to handle TYPE_ADDR in %v with -dynlink", p) } if p.To.Type != obj.TYPE_REG { ctxt.Diag("do not know how to handle LEAQ-type insn to non-register in %v with -dynlink", p) } p.From.Type = obj.TYPE_MEM p.From.Name = obj.NAME_GOTREF if p.From.Offset != 0 { q := obj.Appendp(ctxt, p) q.As = AADDQ q.From.Type = obj.TYPE_CONST q.From.Offset = p.From.Offset q.To = p.To p.From.Offset = 0 } } if p.From3.Name == obj.NAME_EXTERN { ctxt.Diag("don't know how to handle %v with -dynlink", p) } if p.To2.Name == obj.NAME_EXTERN { ctxt.Diag("don't know how to handle %v with -dynlink", p) } var source *obj.Addr if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local { if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local { ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p) } source = &p.From } else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local { source = &p.To } else { return } if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP { return } if source.Type != obj.TYPE_MEM { ctxt.Diag("don't know how to handle %v with -dynlink", p) } p1 := obj.Appendp(ctxt, p) p2 := obj.Appendp(ctxt, p1) p1.As = AMOVQ p1.From.Type = obj.TYPE_MEM p1.From.Sym = source.Sym p1.From.Name = obj.NAME_GOTREF p1.To.Type = obj.TYPE_REG p1.To.Reg = REG_R15 p2.As = p.As p2.From = p.From p2.To = p.To if p.From.Name == obj.NAME_EXTERN { p2.From.Reg = REG_R15 p2.From.Name = obj.NAME_NONE p2.From.Sym = nil } else if p.To.Name == obj.NAME_EXTERN { p2.To.Reg = REG_R15 p2.To.Name = obj.NAME_NONE p2.To.Sym = nil } else { return } l := p.Link l2 := p2.Link *p = *p1 *p1 = *p2 p.Link = l p1.Link = l2 } }
func peep(firstp *obj.Prog) { g := (*gc.Graph)(gc.Flowstart(firstp, nil)) if g == nil { return } gactive = 0 // byte, word arithmetic elimination. elimshortmov(g) // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim var p *obj.Prog for r := (*gc.Flow)(g.Start); r != nil; r = r.Link { p = r.Prog switch p.As { case x86.ALEAL, x86.ALEAQ: if regtyp(&p.To) { if p.From.Sym != nil { if p.From.Index == x86.REG_NONE { conprop(r) } } } case x86.AMOVB, x86.AMOVW, x86.AMOVL, x86.AMOVQ, x86.AMOVSS, x86.AMOVSD: if regtyp(&p.To) { if p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_FCONST { conprop(r) } } } } var r *gc.Flow var r1 *gc.Flow var p1 *obj.Prog var t int loop1: if gc.Debug['P'] != 0 && gc.Debug['v'] != 0 { gc.Dumpit("loop1", g.Start, 0) } t = 0 for r = g.Start; r != nil; r = r.Link { p = r.Prog switch p.As { case x86.AMOVL, x86.AMOVQ, x86.AMOVSS, x86.AMOVSD: if regtyp(&p.To) { if regtyp(&p.From) { if copyprop(g, r) { excise(r) t++ } else if subprop(r) && copyprop(g, r) { excise(r) t++ } } } case x86.AMOVBLZX, x86.AMOVWLZX, x86.AMOVBLSX, x86.AMOVWLSX: if regtyp(&p.To) { r1 = rnops(gc.Uniqs(r)) if r1 != nil { p1 = r1.Prog if p.As == p1.As && p.To.Type == p1.From.Type && p.To.Reg == p1.From.Reg { p1.As = x86.AMOVL t++ } } } case x86.AMOVBQSX, x86.AMOVBQZX, x86.AMOVWQSX, x86.AMOVWQZX, x86.AMOVLQSX, x86.AMOVLQZX, x86.AMOVQL: if regtyp(&p.To) { r1 = rnops(gc.Uniqs(r)) if r1 != nil { p1 = r1.Prog if p.As == p1.As && p.To.Type == p1.From.Type && p.To.Reg == p1.From.Reg { p1.As = x86.AMOVQ t++ } } } case x86.AADDL, x86.AADDQ, x86.AADDW: if p.From.Type != obj.TYPE_CONST || needc(p.Link) { break } if p.From.Offset == -1 { if p.As == x86.AADDQ { p.As = x86.ADECQ } else if p.As == x86.AADDL { p.As = x86.ADECL } else { p.As = x86.ADECW } p.From = obj.Addr{} break } if p.From.Offset == 1 { if p.As == x86.AADDQ { p.As = x86.AINCQ } else if p.As == x86.AADDL { p.As = x86.AINCL } else { p.As = x86.AINCW } p.From = obj.Addr{} break } case x86.ASUBL, x86.ASUBQ, x86.ASUBW: if p.From.Type != obj.TYPE_CONST || needc(p.Link) { break } if p.From.Offset == -1 { if p.As == x86.ASUBQ { p.As = x86.AINCQ } else if p.As == x86.ASUBL { p.As = x86.AINCL } else { p.As = x86.AINCW } p.From = obj.Addr{} break } if p.From.Offset == 1 { if p.As == x86.ASUBQ { p.As = x86.ADECQ } else if p.As == x86.ASUBL { p.As = x86.ADECL } else { p.As = x86.ADECW } p.From = obj.Addr{} break } } } if t != 0 { goto loop1 } // MOVLQZX removal. // The MOVLQZX exists to avoid being confused for a // MOVL that is just copying 32-bit data around during // copyprop. Now that copyprop is done, remov MOVLQZX R1, R2 // if it is dominated by an earlier ADDL/MOVL/etc into R1 that // will have already cleared the high bits. // // MOVSD removal. // We never use packed registers, so a MOVSD between registers // can be replaced by MOVAPD, which moves the pair of float64s // instead of just the lower one. We only use the lower one, but // the processor can do better if we do moves using both. for r := (*gc.Flow)(g.Start); r != nil; r = r.Link { p = r.Prog if p.As == x86.AMOVLQZX { if regtyp(&p.From) { if p.From.Type == p.To.Type && p.From.Reg == p.To.Reg { if prevl(r, int(p.From.Reg)) { excise(r) } } } } if p.As == x86.AMOVSD { if regtyp(&p.From) { if regtyp(&p.To) { p.As = x86.AMOVAPD } } } } // load pipelining // push any load from memory as early as possible // to give it time to complete before use. for r := (*gc.Flow)(g.Start); r != nil; r = r.Link { p = r.Prog switch p.As { case x86.AMOVB, x86.AMOVW, x86.AMOVL, x86.AMOVQ, x86.AMOVLQZX: if regtyp(&p.To) && !regconsttyp(&p.From) { pushback(r) } } } gc.Flowend(g) }