// scratchFpMem initializes an Addr (field of a Prog) // to reference the scratchpad memory for movement between // F and G registers for FP conversions. func scratchFpMem(s *gc.SSAGenState, a *obj.Addr) { a.Type = obj.TYPE_MEM a.Name = obj.NAME_AUTO a.Node = s.ScratchFpMem a.Sym = gc.Linksym(s.ScratchFpMem.Sym) a.Reg = ppc64.REGSP }
func zerorange(p *obj.Prog, frame int64, lo int64, hi int64, ax *uint32) *obj.Prog { cnt := hi - lo if cnt == 0 { return p } if *ax == 0 { p = appendpp(p, x86.AMOVL, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0) *ax = 1 } if cnt <= int64(4*gc.Widthreg) { for i := int64(0); i < cnt; i += int64(gc.Widthreg) { p = appendpp(p, x86.AMOVL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo+i) } } else if !gc.Nacl && cnt <= int64(128*gc.Widthreg) { p = appendpp(p, x86.ALEAL, obj.TYPE_MEM, x86.REG_SP, frame+lo, obj.TYPE_REG, x86.REG_DI, 0) p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, 1*(128-cnt/int64(gc.Widthreg))) p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) } else { p = appendpp(p, x86.AMOVL, obj.TYPE_CONST, 0, cnt/int64(gc.Widthreg), obj.TYPE_REG, x86.REG_CX, 0) p = appendpp(p, x86.ALEAL, obj.TYPE_MEM, x86.REG_SP, frame+lo, obj.TYPE_REG, x86.REG_DI, 0) p = appendpp(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0) p = appendpp(p, x86.ASTOSL, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0) } return p }
func zerorange(p *obj.Prog, frame int64, lo int64, hi int64, ax *uint32) *obj.Prog { cnt := hi - lo if cnt == 0 { return p } if *ax == 0 { p = appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0) *ax = 1 } if cnt%int64(gc.Widthreg) != 0 { // should only happen with nacl if cnt%int64(gc.Widthptr) != 0 { gc.Fatalf("zerorange count not a multiple of widthptr %d", cnt) } p = appendpp(p, x86.AMOVL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo) lo += int64(gc.Widthptr) cnt -= int64(gc.Widthptr) } if cnt <= int64(4*gc.Widthreg) { for i := int64(0); i < cnt; i += int64(gc.Widthreg) { p = appendpp(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo+i) } } else if !gc.Nacl && (cnt <= int64(128*gc.Widthreg)) { q := cnt / int64(gc.Widthreg) p = appendpp(p, leaptr, obj.TYPE_MEM, x86.REG_SP, frame+lo+dzDI(q), obj.TYPE_REG, x86.REG_DI, 0) p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, dzOff(q)) p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) } else { p = appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(gc.Widthreg), obj.TYPE_REG, x86.REG_CX, 0) p = appendpp(p, leaptr, obj.TYPE_MEM, x86.REG_SP, frame+lo, obj.TYPE_REG, x86.REG_DI, 0) p = appendpp(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0) p = appendpp(p, x86.ASTOSQ, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0) } return p }
func blockcopy(n, ns *gc.Node, osrc, odst, w int64) { var noddi gc.Node gc.Nodreg(&noddi, gc.Types[gc.Tptr], x86.REG_DI) var nodsi gc.Node gc.Nodreg(&nodsi, gc.Types[gc.Tptr], x86.REG_SI) var nodl gc.Node var nodr gc.Node if n.Ullman >= ns.Ullman { gc.Agenr(n, &nodr, &nodsi) if ns.Op == gc.ONAME { gc.Gvardef(ns) } gc.Agenr(ns, &nodl, &noddi) } else { if ns.Op == gc.ONAME { gc.Gvardef(ns) } gc.Agenr(ns, &nodl, &noddi) gc.Agenr(n, &nodr, &nodsi) } if nodl.Reg != x86.REG_DI { gmove(&nodl, &noddi) } if nodr.Reg != x86.REG_SI { gmove(&nodr, &nodsi) } gc.Regfree(&nodl) gc.Regfree(&nodr) c := w % 8 // bytes q := w / 8 // quads var oldcx gc.Node var cx gc.Node savex(x86.REG_CX, &cx, &oldcx, nil, gc.Types[gc.TINT64]) // if we are copying forward on the stack and // the src and dst overlap, then reverse direction if osrc < odst && odst < osrc+w { // reverse direction gins(x86.ASTD, nil, nil) // set direction flag if c > 0 { gconreg(addptr, w-1, x86.REG_SI) gconreg(addptr, w-1, x86.REG_DI) gconreg(movptr, c, x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.AMOVSB, nil, nil) // MOVB *(SI)-,*(DI)- } if q > 0 { if c > 0 { gconreg(addptr, -7, x86.REG_SI) gconreg(addptr, -7, x86.REG_DI) } else { gconreg(addptr, w-8, x86.REG_SI) gconreg(addptr, w-8, x86.REG_DI) } gconreg(movptr, q, x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)-,*(DI)- } // we leave with the flag clear gins(x86.ACLD, nil, nil) } else { // normal direction if q > 128 || (gc.Nacl && q >= 4) { gconreg(movptr, q, x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+ } else if q >= 4 { var oldx0 gc.Node var x0 gc.Node savex(x86.REG_X0, &x0, &oldx0, nil, gc.Types[gc.TFLOAT64]) p := gins(obj.ADUFFCOPY, nil, nil) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) // 64 blocks taking 14 bytes each // see ../../../../runtime/mkduff.go p.To.Offset = 14 * (64 - q/2) restx(&x0, &oldx0) if q%2 != 0 { gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+ } } else if !gc.Nacl && c == 0 { // We don't need the MOVSQ side-effect of updating SI and DI, // and issuing a sequence of MOVQs directly is faster. nodsi.Op = gc.OINDREG noddi.Op = gc.OINDREG for q > 0 { gmove(&nodsi, &cx) // MOVQ x+(SI),CX gmove(&cx, &noddi) // MOVQ CX,x+(DI) nodsi.Xoffset += 8 noddi.Xoffset += 8 q-- } } else { for q > 0 { gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+ q-- } } // copy the remaining c bytes if w < 4 || c <= 1 || (odst < osrc && osrc < odst+w) { for c > 0 { gins(x86.AMOVSB, nil, nil) // MOVB *(SI)+,*(DI)+ c-- } } else if w < 8 || c <= 4 { nodsi.Op = gc.OINDREG noddi.Op = gc.OINDREG cx.Type = gc.Types[gc.TINT32] nodsi.Type = gc.Types[gc.TINT32] noddi.Type = gc.Types[gc.TINT32] if c > 4 { nodsi.Xoffset = 0 noddi.Xoffset = 0 gmove(&nodsi, &cx) gmove(&cx, &noddi) } nodsi.Xoffset = c - 4 noddi.Xoffset = c - 4 gmove(&nodsi, &cx) gmove(&cx, &noddi) } else { nodsi.Op = gc.OINDREG noddi.Op = gc.OINDREG cx.Type = gc.Types[gc.TINT64] nodsi.Type = gc.Types[gc.TINT64] noddi.Type = gc.Types[gc.TINT64] nodsi.Xoffset = c - 8 noddi.Xoffset = c - 8 gmove(&nodsi, &cx) gmove(&cx, &noddi) } } restx(&cx, &oldcx) }
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { s.SetLineno(v.Line) switch v.Op { case ssa.OpInitMem: // memory arg needs no code case ssa.OpArg: // input args need no code case ssa.OpSP, ssa.OpSB, ssa.OpGetG: // nothing to do case ssa.OpCopy, ssa.OpARM64MOVDconvert, ssa.OpARM64MOVDreg: if v.Type.IsMemory() { return } x := gc.SSARegNum(v.Args[0]) y := gc.SSARegNum(v) if x == y { return } as := arm64.AMOVD if v.Type.IsFloat() { switch v.Type.Size() { case 4: as = arm64.AFMOVS case 8: as = arm64.AFMOVD default: panic("bad float size") } } p := gc.Prog(as) p.From.Type = obj.TYPE_REG p.From.Reg = x p.To.Type = obj.TYPE_REG p.To.Reg = y case ssa.OpARM64MOVDnop: if gc.SSARegNum(v) != gc.SSARegNum(v.Args[0]) { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } // nothing to do case ssa.OpLoadReg: if v.Type.IsFlags() { v.Unimplementedf("load flags not implemented: %v", v.LongString()) return } p := gc.Prog(loadByType(v.Type)) n, off := gc.AutoVar(v.Args[0]) p.From.Type = obj.TYPE_MEM p.From.Node = n p.From.Sym = gc.Linksym(n.Sym) p.From.Offset = off if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { p.From.Name = obj.NAME_PARAM p.From.Offset += n.Xoffset } else { p.From.Name = obj.NAME_AUTO } p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpPhi: gc.CheckLoweredPhi(v) case ssa.OpStoreReg: if v.Type.IsFlags() { v.Unimplementedf("store flags not implemented: %v", v.LongString()) return } p := gc.Prog(storeByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) n, off := gc.AutoVar(v) p.To.Type = obj.TYPE_MEM p.To.Node = n p.To.Sym = gc.Linksym(n.Sym) p.To.Offset = off if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { p.To.Name = obj.NAME_PARAM p.To.Offset += n.Xoffset } else { p.To.Name = obj.NAME_AUTO } case ssa.OpARM64ADD, ssa.OpARM64SUB, ssa.OpARM64AND, ssa.OpARM64OR, ssa.OpARM64XOR, ssa.OpARM64BIC, ssa.OpARM64MUL, ssa.OpARM64MULW, ssa.OpARM64MULH, ssa.OpARM64UMULH, ssa.OpARM64MULL, ssa.OpARM64UMULL, ssa.OpARM64DIV, ssa.OpARM64UDIV, ssa.OpARM64DIVW, ssa.OpARM64UDIVW, ssa.OpARM64MOD, ssa.OpARM64UMOD, ssa.OpARM64MODW, ssa.OpARM64UMODW, ssa.OpARM64SLL, ssa.OpARM64SRL, ssa.OpARM64SRA, ssa.OpARM64FADDS, ssa.OpARM64FADDD, ssa.OpARM64FSUBS, ssa.OpARM64FSUBD, ssa.OpARM64FMULS, ssa.OpARM64FMULD, ssa.OpARM64FDIVS, ssa.OpARM64FDIVD: r := gc.SSARegNum(v) r1 := gc.SSARegNum(v.Args[0]) r2 := gc.SSARegNum(v.Args[1]) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpARM64ADDconst, ssa.OpARM64SUBconst, ssa.OpARM64ANDconst, ssa.OpARM64ORconst, ssa.OpARM64XORconst, ssa.OpARM64BICconst, ssa.OpARM64SLLconst, ssa.OpARM64SRLconst, ssa.OpARM64SRAconst, ssa.OpARM64RORconst, ssa.OpARM64RORWconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARM64ADDshiftLL, ssa.OpARM64SUBshiftLL, ssa.OpARM64ANDshiftLL, ssa.OpARM64ORshiftLL, ssa.OpARM64XORshiftLL, ssa.OpARM64BICshiftLL: genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm64.SHIFT_LL, v.AuxInt) case ssa.OpARM64ADDshiftRL, ssa.OpARM64SUBshiftRL, ssa.OpARM64ANDshiftRL, ssa.OpARM64ORshiftRL, ssa.OpARM64XORshiftRL, ssa.OpARM64BICshiftRL: genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm64.SHIFT_LR, v.AuxInt) case ssa.OpARM64ADDshiftRA, ssa.OpARM64SUBshiftRA, ssa.OpARM64ANDshiftRA, ssa.OpARM64ORshiftRA, ssa.OpARM64XORshiftRA, ssa.OpARM64BICshiftRA: genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm64.SHIFT_AR, v.AuxInt) case ssa.OpARM64MOVDconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARM64FMOVSconst, ssa.OpARM64FMOVDconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_FCONST p.From.Val = math.Float64frombits(uint64(v.AuxInt)) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARM64CMP, ssa.OpARM64CMPW, ssa.OpARM64CMN, ssa.OpARM64CMNW, ssa.OpARM64FCMPS, ssa.OpARM64FCMPD: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) p.Reg = gc.SSARegNum(v.Args[0]) case ssa.OpARM64CMPconst, ssa.OpARM64CMPWconst, ssa.OpARM64CMNconst, ssa.OpARM64CMNWconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.Reg = gc.SSARegNum(v.Args[0]) case ssa.OpARM64CMPshiftLL: genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm64.SHIFT_LL, v.AuxInt) case ssa.OpARM64CMPshiftRL: genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm64.SHIFT_LR, v.AuxInt) case ssa.OpARM64CMPshiftRA: genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm64.SHIFT_AR, v.AuxInt) case ssa.OpARM64MOVDaddr: p := gc.Prog(arm64.AMOVD) p.From.Type = obj.TYPE_ADDR p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) var wantreg string // MOVD $sym+off(base), R // the assembler expands it as the following: // - base is SP: add constant offset to SP (R13) // when constant is large, tmp register (R11) may be used // - base is SB: load external address from constant pool (use relocation) switch v.Aux.(type) { default: v.Fatalf("aux is of unknown type %T", v.Aux) case *ssa.ExternSymbol: wantreg = "SB" gc.AddAux(&p.From, v) case *ssa.ArgSymbol, *ssa.AutoSymbol: wantreg = "SP" gc.AddAux(&p.From, v) case nil: // No sym, just MOVD $off(SP), R wantreg = "SP" p.From.Reg = arm64.REGSP p.From.Offset = v.AuxInt } if reg := gc.SSAReg(v.Args[0]); reg.Name() != wantreg { v.Fatalf("bad reg %s for symbol type %T, want %s", reg.Name(), v.Aux, wantreg) } case ssa.OpARM64MOVBload, ssa.OpARM64MOVBUload, ssa.OpARM64MOVHload, ssa.OpARM64MOVHUload, ssa.OpARM64MOVWload, ssa.OpARM64MOVWUload, ssa.OpARM64MOVDload, ssa.OpARM64FMOVSload, ssa.OpARM64FMOVDload: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARM64MOVBstore, ssa.OpARM64MOVHstore, ssa.OpARM64MOVWstore, ssa.OpARM64MOVDstore, ssa.OpARM64FMOVSstore, ssa.OpARM64FMOVDstore: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.To, v) case ssa.OpARM64MOVBstorezero, ssa.OpARM64MOVHstorezero, ssa.OpARM64MOVWstorezero, ssa.OpARM64MOVDstorezero: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = arm64.REGZERO p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.To, v) case ssa.OpARM64MOVBreg, ssa.OpARM64MOVBUreg, ssa.OpARM64MOVHreg, ssa.OpARM64MOVHUreg, ssa.OpARM64MOVWreg, ssa.OpARM64MOVWUreg: a := v.Args[0] for a.Op == ssa.OpCopy || a.Op == ssa.OpARM64MOVDreg { a = a.Args[0] } if a.Op == ssa.OpLoadReg { t := a.Type switch { case v.Op == ssa.OpARM64MOVBreg && t.Size() == 1 && t.IsSigned(), v.Op == ssa.OpARM64MOVBUreg && t.Size() == 1 && !t.IsSigned(), v.Op == ssa.OpARM64MOVHreg && t.Size() == 2 && t.IsSigned(), v.Op == ssa.OpARM64MOVHUreg && t.Size() == 2 && !t.IsSigned(), v.Op == ssa.OpARM64MOVWreg && t.Size() == 4 && t.IsSigned(), v.Op == ssa.OpARM64MOVWUreg && t.Size() == 4 && !t.IsSigned(): // arg is a proper-typed load, already zero/sign-extended, don't extend again if gc.SSARegNum(v) == gc.SSARegNum(v.Args[0]) { return } p := gc.Prog(arm64.AMOVD) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) return default: } } fallthrough case ssa.OpARM64MVN, ssa.OpARM64NEG, ssa.OpARM64FNEGS, ssa.OpARM64FNEGD, ssa.OpARM64FSQRTD, ssa.OpARM64FCVTZSSW, ssa.OpARM64FCVTZSDW, ssa.OpARM64FCVTZUSW, ssa.OpARM64FCVTZUDW, ssa.OpARM64FCVTZSS, ssa.OpARM64FCVTZSD, ssa.OpARM64FCVTZUS, ssa.OpARM64FCVTZUD, ssa.OpARM64SCVTFWS, ssa.OpARM64SCVTFWD, ssa.OpARM64SCVTFS, ssa.OpARM64SCVTFD, ssa.OpARM64UCVTFWS, ssa.OpARM64UCVTFWD, ssa.OpARM64UCVTFS, ssa.OpARM64UCVTFD, ssa.OpARM64FCVTSD, ssa.OpARM64FCVTDS, ssa.OpARM64REV, ssa.OpARM64REVW, ssa.OpARM64REV16W: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARM64CSELULT, ssa.OpARM64CSELULT0: r1 := int16(arm64.REGZERO) if v.Op == ssa.OpARM64CSELULT { r1 = gc.SSARegNum(v.Args[1]) } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg p.From.Reg = arm64.COND_LO p.Reg = gc.SSARegNum(v.Args[0]) p.From3 = &obj.Addr{Type: obj.TYPE_REG, Reg: r1} p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARM64DUFFZERO: // runtime.duffzero expects start address - 8 in R16 p := gc.Prog(arm64.ASUB) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 p.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = arm64.REG_R16 p = gc.Prog(obj.ADUFFZERO) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) p.To.Offset = v.AuxInt case ssa.OpARM64LoweredZero: // MOVD.P ZR, 8(R16) // CMP Rarg1, R16 // BLE -2(PC) // arg1 is the address of the last element to zero p := gc.Prog(arm64.AMOVD) p.Scond = arm64.C_XPOST p.From.Type = obj.TYPE_REG p.From.Reg = arm64.REGZERO p.To.Type = obj.TYPE_MEM p.To.Reg = arm64.REG_R16 p.To.Offset = 8 p2 := gc.Prog(arm64.ACMP) p2.From.Type = obj.TYPE_REG p2.From.Reg = gc.SSARegNum(v.Args[1]) p2.Reg = arm64.REG_R16 p3 := gc.Prog(arm64.ABLE) p3.To.Type = obj.TYPE_BRANCH gc.Patch(p3, p) case ssa.OpARM64LoweredMove: // MOVD.P 8(R16), Rtmp // MOVD.P Rtmp, 8(R17) // CMP Rarg2, R16 // BLE -3(PC) // arg2 is the address of the last element of src p := gc.Prog(arm64.AMOVD) p.Scond = arm64.C_XPOST p.From.Type = obj.TYPE_MEM p.From.Reg = arm64.REG_R16 p.From.Offset = 8 p.To.Type = obj.TYPE_REG p.To.Reg = arm64.REGTMP p2 := gc.Prog(arm64.AMOVD) p2.Scond = arm64.C_XPOST p2.From.Type = obj.TYPE_REG p2.From.Reg = arm64.REGTMP p2.To.Type = obj.TYPE_MEM p2.To.Reg = arm64.REG_R17 p2.To.Offset = 8 p3 := gc.Prog(arm64.ACMP) p3.From.Type = obj.TYPE_REG p3.From.Reg = gc.SSARegNum(v.Args[2]) p3.Reg = arm64.REG_R16 p4 := gc.Prog(arm64.ABLE) p4.To.Type = obj.TYPE_BRANCH gc.Patch(p4, p) case ssa.OpARM64CALLstatic: if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym { // Deferred calls will appear to be returning to // the CALL deferreturn(SB) that we are about to emit. // However, the stack trace code will show the line // of the instruction byte before the return PC. // To avoid that being an unrelated instruction, // insert an actual hardware NOP that will have the right line number. // This is different from obj.ANOP, which is a virtual no-op // that doesn't make it into the instruction stream. ginsnop() } p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym)) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARM64CALLclosure: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Offset = 0 p.To.Reg = gc.SSARegNum(v.Args[0]) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARM64CALLdefer: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Deferproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARM64CALLgo: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Newproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARM64CALLinter: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Offset = 0 p.To.Reg = gc.SSARegNum(v.Args[0]) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARM64LoweredNilCheck: // Optimization - if the subsequent block has a load or store // at the same address, we don't need to issue this instruction. mem := v.Args[1] for _, w := range v.Block.Succs[0].Block().Values { if w.Op == ssa.OpPhi { if w.Type.IsMemory() { mem = w } continue } if len(w.Args) == 0 || !w.Args[len(w.Args)-1].Type.IsMemory() { // w doesn't use a store - can't be a memory op. continue } if w.Args[len(w.Args)-1] != mem { v.Fatalf("wrong store after nilcheck v=%s w=%s", v, w) } switch w.Op { case ssa.OpARM64MOVBload, ssa.OpARM64MOVBUload, ssa.OpARM64MOVHload, ssa.OpARM64MOVHUload, ssa.OpARM64MOVWload, ssa.OpARM64MOVWUload, ssa.OpARM64MOVDload, ssa.OpARM64FMOVSload, ssa.OpARM64FMOVDload, ssa.OpARM64MOVBstore, ssa.OpARM64MOVHstore, ssa.OpARM64MOVWstore, ssa.OpARM64MOVDstore, ssa.OpARM64FMOVSstore, ssa.OpARM64FMOVDstore: // arg0 is ptr, auxint is offset if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage { if gc.Debug_checknil != 0 && int(v.Line) > 1 { gc.Warnl(v.Line, "removed nil check") } return } case ssa.OpARM64DUFFZERO, ssa.OpARM64LoweredZero: // arg0 is ptr if w.Args[0] == v.Args[0] { if gc.Debug_checknil != 0 && int(v.Line) > 1 { gc.Warnl(v.Line, "removed nil check") } return } case ssa.OpARM64LoweredMove: // arg0 is dst ptr, arg1 is src ptr if w.Args[0] == v.Args[0] || w.Args[1] == v.Args[0] { if gc.Debug_checknil != 0 && int(v.Line) > 1 { gc.Warnl(v.Line, "removed nil check") } return } default: } if w.Type.IsMemory() { if w.Op == ssa.OpVarDef || w.Op == ssa.OpVarKill || w.Op == ssa.OpVarLive { // these ops are OK mem = w continue } // We can't delay the nil check past the next store. break } } // Issue a load which will fault if arg is nil. p := gc.Prog(arm64.AMOVB) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = arm64.REGTMP if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers gc.Warnl(v.Line, "generated nil check") } case ssa.OpVarDef: gc.Gvardef(v.Aux.(*gc.Node)) case ssa.OpVarKill: gc.Gvarkill(v.Aux.(*gc.Node)) case ssa.OpVarLive: gc.Gvarlive(v.Aux.(*gc.Node)) case ssa.OpKeepAlive: if !v.Args[0].Type.IsPtrShaped() { v.Fatalf("keeping non-pointer alive %v", v.Args[0]) } n, off := gc.AutoVar(v.Args[0]) if n == nil { v.Fatalf("KeepLive with non-spilled value %s %s", v, v.Args[0]) } if off != 0 { v.Fatalf("KeepLive with non-zero offset spill location %s:%d", n, off) } gc.Gvarlive(n) case ssa.OpARM64Equal, ssa.OpARM64NotEqual, ssa.OpARM64LessThan, ssa.OpARM64LessEqual, ssa.OpARM64GreaterThan, ssa.OpARM64GreaterEqual, ssa.OpARM64LessThanU, ssa.OpARM64LessEqualU, ssa.OpARM64GreaterThanU, ssa.OpARM64GreaterEqualU: // generate boolean values using CSET p := gc.Prog(arm64.ACSET) p.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg p.From.Reg = condBits[v.Op] p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpSelect0, ssa.OpSelect1: // nothing to do case ssa.OpARM64LoweredGetClosurePtr: // Closure pointer is R26 (arm64.REGCTXT). gc.CheckLoweredGetClosurePtr(v) case ssa.OpARM64FlagEQ, ssa.OpARM64FlagLT_ULT, ssa.OpARM64FlagLT_UGT, ssa.OpARM64FlagGT_ULT, ssa.OpARM64FlagGT_UGT: v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) case ssa.OpARM64InvertFlags: v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) default: v.Unimplementedf("genValue not implemented: %s", v.LongString()) } }
func clearfat(nl *gc.Node) { /* clear a fat object */ if gc.Debug['g'] != 0 { gc.Dump("\nclearfat", nl) } w := nl.Type.Width // Avoid taking the address for simple enough types. if gc.Componentgen(nil, nl) { return } c := w % 8 // bytes q := w / 8 // quads if q < 4 { // Write sequence of MOV 0, off(base) instead of using STOSQ. // The hope is that although the code will be slightly longer, // the MOVs will have no dependencies and pipeline better // than the unrolled STOSQ loop. // NOTE: Must use agen, not igen, so that optimizer sees address // being taken. We are not writing on field boundaries. var n1 gc.Node gc.Agenr(nl, &n1, nil) n1.Op = gc.OINDREG var z gc.Node gc.Nodconst(&z, gc.Types[gc.TUINT64], 0) for { tmp14 := q q-- if tmp14 <= 0 { break } n1.Type = z.Type gins(x86.AMOVQ, &z, &n1) n1.Xoffset += 8 } if c >= 4 { gc.Nodconst(&z, gc.Types[gc.TUINT32], 0) n1.Type = z.Type gins(x86.AMOVL, &z, &n1) n1.Xoffset += 4 c -= 4 } gc.Nodconst(&z, gc.Types[gc.TUINT8], 0) for { tmp15 := c c-- if tmp15 <= 0 { break } n1.Type = z.Type gins(x86.AMOVB, &z, &n1) n1.Xoffset++ } gc.Regfree(&n1) return } var oldn1 gc.Node var n1 gc.Node savex(x86.REG_DI, &n1, &oldn1, nil, gc.Types[gc.Tptr]) gc.Agen(nl, &n1) var ax gc.Node var oldax gc.Node savex(x86.REG_AX, &ax, &oldax, nil, gc.Types[gc.Tptr]) gconreg(x86.AMOVL, 0, x86.REG_AX) if q > 128 || gc.Nacl { gconreg(movptr, q, x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.ASTOSQ, nil, nil) // STOQ AL,*(DI)+ } else { if di := dzDI(q); di != 0 { gconreg(addptr, di, x86.REG_DI) } p := gins(obj.ADUFFZERO, nil, nil) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) p.To.Offset = dzOff(q) } z := ax di := n1 if w >= 8 && c >= 4 { di.Op = gc.OINDREG z.Type = gc.Types[gc.TINT64] di.Type = z.Type p := gins(x86.AMOVQ, &z, &di) p.To.Scale = 1 p.To.Offset = c - 8 } else if c >= 4 { di.Op = gc.OINDREG z.Type = gc.Types[gc.TINT32] di.Type = z.Type gins(x86.AMOVL, &z, &di) if c > 4 { p := gins(x86.AMOVL, &z, &di) p.To.Scale = 1 p.To.Offset = c - 4 } } else { for c > 0 { gins(x86.ASTOSB, nil, nil) // STOB AL,*(DI)+ c-- } } restx(&n1, &oldn1) restx(&ax, &oldax) }
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { s.SetLineno(v.Line) switch v.Op { case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL, ssa.OpAMD64ADDW: r := gc.SSARegNum(v) r1 := gc.SSARegNum(v.Args[0]) r2 := gc.SSARegNum(v.Args[1]) switch { case r == r1: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.To.Type = obj.TYPE_REG p.To.Reg = r case r == r2: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r default: var asm obj.As switch v.Op { case ssa.OpAMD64ADDQ: asm = x86.ALEAQ case ssa.OpAMD64ADDL: asm = x86.ALEAL case ssa.OpAMD64ADDW: asm = x86.ALEAL } p := gc.Prog(asm) p.From.Type = obj.TYPE_MEM p.From.Reg = r1 p.From.Scale = 1 p.From.Index = r2 p.To.Type = obj.TYPE_REG p.To.Reg = r } // 2-address opcode arithmetic, symmetric case ssa.OpAMD64ADDB, ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL, ssa.OpAMD64ANDW, ssa.OpAMD64ANDB, ssa.OpAMD64ORQ, ssa.OpAMD64ORL, ssa.OpAMD64ORW, ssa.OpAMD64ORB, ssa.OpAMD64XORQ, ssa.OpAMD64XORL, ssa.OpAMD64XORW, ssa.OpAMD64XORB, ssa.OpAMD64MULQ, ssa.OpAMD64MULL, ssa.OpAMD64MULW, ssa.OpAMD64MULB, ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64PXOR: r := gc.SSARegNum(v) x := gc.SSARegNum(v.Args[0]) y := gc.SSARegNum(v.Args[1]) if x != r && y != r { opregreg(moveByType(v.Type), r, x) x = r } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG p.To.Reg = r if x == r { p.From.Reg = y } else { p.From.Reg = x } // 2-address opcode arithmetic, not symmetric case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL, ssa.OpAMD64SUBW, ssa.OpAMD64SUBB: r := gc.SSARegNum(v) x := gc.SSARegNum(v.Args[0]) y := gc.SSARegNum(v.Args[1]) var neg bool if y == r { // compute -(y-x) instead x, y = y, x neg = true } if x != r { opregreg(moveByType(v.Type), r, x) } opregreg(v.Op.Asm(), r, y) if neg { if v.Op == ssa.OpAMD64SUBQ { p := gc.Prog(x86.ANEGQ) p.To.Type = obj.TYPE_REG p.To.Reg = r } else { // Avoids partial registers write p := gc.Prog(x86.ANEGL) p.To.Type = obj.TYPE_REG p.To.Reg = r } } case ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD: r := gc.SSARegNum(v) x := gc.SSARegNum(v.Args[0]) y := gc.SSARegNum(v.Args[1]) if y == r && x != r { // r/y := x op r/y, need to preserve x and rewrite to // r/y := r/y op x15 x15 := int16(x86.REG_X15) // register move y to x15 // register move x to y // rename y with x15 opregreg(moveByType(v.Type), x15, y) opregreg(moveByType(v.Type), r, x) y = x15 } else if x != r { opregreg(moveByType(v.Type), r, x) } opregreg(v.Op.Asm(), r, y) case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW, ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU, ssa.OpAMD64MODQ, ssa.OpAMD64MODL, ssa.OpAMD64MODW, ssa.OpAMD64MODQU, ssa.OpAMD64MODLU, ssa.OpAMD64MODWU: // Arg[0] is already in AX as it's the only register we allow // and AX is the only output x := gc.SSARegNum(v.Args[1]) // CPU faults upon signed overflow, which occurs when most // negative int is divided by -1. var j *obj.Prog if v.Op == ssa.OpAMD64DIVQ || v.Op == ssa.OpAMD64DIVL || v.Op == ssa.OpAMD64DIVW || v.Op == ssa.OpAMD64MODQ || v.Op == ssa.OpAMD64MODL || v.Op == ssa.OpAMD64MODW { var c *obj.Prog switch v.Op { case ssa.OpAMD64DIVQ, ssa.OpAMD64MODQ: c = gc.Prog(x86.ACMPQ) j = gc.Prog(x86.AJEQ) // go ahead and sign extend to save doing it later gc.Prog(x86.ACQO) case ssa.OpAMD64DIVL, ssa.OpAMD64MODL: c = gc.Prog(x86.ACMPL) j = gc.Prog(x86.AJEQ) gc.Prog(x86.ACDQ) case ssa.OpAMD64DIVW, ssa.OpAMD64MODW: c = gc.Prog(x86.ACMPW) j = gc.Prog(x86.AJEQ) gc.Prog(x86.ACWD) } c.From.Type = obj.TYPE_REG c.From.Reg = x c.To.Type = obj.TYPE_CONST c.To.Offset = -1 j.To.Type = obj.TYPE_BRANCH } // for unsigned ints, we sign extend by setting DX = 0 // signed ints were sign extended above if v.Op == ssa.OpAMD64DIVQU || v.Op == ssa.OpAMD64MODQU || v.Op == ssa.OpAMD64DIVLU || v.Op == ssa.OpAMD64MODLU || v.Op == ssa.OpAMD64DIVWU || v.Op == ssa.OpAMD64MODWU { c := gc.Prog(x86.AXORQ) c.From.Type = obj.TYPE_REG c.From.Reg = x86.REG_DX c.To.Type = obj.TYPE_REG c.To.Reg = x86.REG_DX } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = x // signed division, rest of the check for -1 case if j != nil { j2 := gc.Prog(obj.AJMP) j2.To.Type = obj.TYPE_BRANCH var n *obj.Prog if v.Op == ssa.OpAMD64DIVQ || v.Op == ssa.OpAMD64DIVL || v.Op == ssa.OpAMD64DIVW { // n * -1 = -n n = gc.Prog(x86.ANEGQ) n.To.Type = obj.TYPE_REG n.To.Reg = x86.REG_AX } else { // n % -1 == 0 n = gc.Prog(x86.AXORQ) n.From.Type = obj.TYPE_REG n.From.Reg = x86.REG_DX n.To.Type = obj.TYPE_REG n.To.Reg = x86.REG_DX } j.To.Val = n j2.To.Val = s.Pc() } case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULW, ssa.OpAMD64HMULB, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU, ssa.OpAMD64HMULWU, ssa.OpAMD64HMULBU: // the frontend rewrites constant division by 8/16/32 bit integers into // HMUL by a constant // SSA rewrites generate the 64 bit versions // Arg[0] is already in AX as it's the only register we allow // and DX is the only output we care about (the high bits) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) // IMULB puts the high portion in AH instead of DL, // so move it to DL for consistency if v.Type.Size() == 1 { m := gc.Prog(x86.AMOVB) m.From.Type = obj.TYPE_REG m.From.Reg = x86.REG_AH m.To.Type = obj.TYPE_REG m.To.Reg = x86.REG_DX } case ssa.OpAMD64AVGQU: // compute (x+y)/2 unsigned. // Do a 64-bit add, the overflow goes into the carry. // Shift right once and pull the carry back into the 63rd bit. r := gc.SSARegNum(v) x := gc.SSARegNum(v.Args[0]) y := gc.SSARegNum(v.Args[1]) if x != r && y != r { opregreg(moveByType(v.Type), r, x) x = r } p := gc.Prog(x86.AADDQ) p.From.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG p.To.Reg = r if x == r { p.From.Reg = y } else { p.From.Reg = x } p = gc.Prog(x86.ARCRQ) p.From.Type = obj.TYPE_CONST p.From.Offset = 1 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, ssa.OpAMD64SHLW, ssa.OpAMD64SHLB, ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB, ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB: x := gc.SSARegNum(v.Args[0]) r := gc.SSARegNum(v) if x != r { if r == x86.REG_CX { v.Fatalf("can't implement %s, target and shift both in CX", v.LongString()) } p := gc.Prog(moveByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = x p.To.Type = obj.TYPE_REG p.To.Reg = r } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) // should be CX p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst, ssa.OpAMD64ADDWconst: r := gc.SSARegNum(v) a := gc.SSARegNum(v.Args[0]) if r == a { if v.AuxInt2Int64() == 1 { var asm obj.As switch v.Op { // Software optimization manual recommends add $1,reg. // But inc/dec is 1 byte smaller. ICC always uses inc // Clang/GCC choose depending on flags, but prefer add. // Experiments show that inc/dec is both a little faster // and make a binary a little smaller. case ssa.OpAMD64ADDQconst: asm = x86.AINCQ case ssa.OpAMD64ADDLconst: asm = x86.AINCL case ssa.OpAMD64ADDWconst: asm = x86.AINCL } p := gc.Prog(asm) p.To.Type = obj.TYPE_REG p.To.Reg = r return } else if v.AuxInt2Int64() == -1 { var asm obj.As switch v.Op { case ssa.OpAMD64ADDQconst: asm = x86.ADECQ case ssa.OpAMD64ADDLconst: asm = x86.ADECL case ssa.OpAMD64ADDWconst: asm = x86.ADECL } p := gc.Prog(asm) p.To.Type = obj.TYPE_REG p.To.Reg = r return } else { p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt2Int64() p.To.Type = obj.TYPE_REG p.To.Reg = r return } } var asm obj.As switch v.Op { case ssa.OpAMD64ADDQconst: asm = x86.ALEAQ case ssa.OpAMD64ADDLconst: asm = x86.ALEAL case ssa.OpAMD64ADDWconst: asm = x86.ALEAL } p := gc.Prog(asm) p.From.Type = obj.TYPE_MEM p.From.Reg = a p.From.Offset = v.AuxInt2Int64() p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64CMOVQEQconst, ssa.OpAMD64CMOVLEQconst, ssa.OpAMD64CMOVWEQconst, ssa.OpAMD64CMOVQNEconst, ssa.OpAMD64CMOVLNEconst, ssa.OpAMD64CMOVWNEconst: r := gc.SSARegNum(v) x := gc.SSARegNum(v.Args[0]) // Arg0 is in/out, move in to out if not already same if r != x { p := gc.Prog(moveByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = x p.To.Type = obj.TYPE_REG p.To.Reg = r } // Constant into AX, after arg0 movement in case arg0 is in AX p := gc.Prog(moveByType(v.Type)) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt2Int64() p.To.Type = obj.TYPE_REG p.To.Reg = x86.REG_AX p = gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_AX p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst, ssa.OpAMD64MULWconst, ssa.OpAMD64MULBconst: r := gc.SSARegNum(v) x := gc.SSARegNum(v.Args[0]) if r != x { p := gc.Prog(moveByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = x p.To.Type = obj.TYPE_REG p.To.Reg = r } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt2Int64() p.To.Type = obj.TYPE_REG p.To.Reg = r // TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2 // instead of using the MOVQ above. //p.From3 = new(obj.Addr) //p.From3.Type = obj.TYPE_REG //p.From3.Reg = gc.SSARegNum(v.Args[0]) case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst, ssa.OpAMD64SUBWconst: x := gc.SSARegNum(v.Args[0]) r := gc.SSARegNum(v) // We have 3-op add (lea), so transforming a = b - const into // a = b + (- const), saves us 1 instruction. We can't fit // - (-1 << 31) into 4 bytes offset in lea. // We handle 2-address just fine below. if v.AuxInt2Int64() == -1<<31 || x == r { if x != r { // This code compensates for the fact that the register allocator // doesn't understand 2-address instructions yet. TODO: fix that. p := gc.Prog(moveByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = x p.To.Type = obj.TYPE_REG p.To.Reg = r } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt2Int64() p.To.Type = obj.TYPE_REG p.To.Reg = r } else if x == r && v.AuxInt2Int64() == -1 { var asm obj.As // x = x - (-1) is the same as x++ // See OpAMD64ADDQconst comments about inc vs add $1,reg switch v.Op { case ssa.OpAMD64SUBQconst: asm = x86.AINCQ case ssa.OpAMD64SUBLconst: asm = x86.AINCL case ssa.OpAMD64SUBWconst: asm = x86.AINCL } p := gc.Prog(asm) p.To.Type = obj.TYPE_REG p.To.Reg = r } else if x == r && v.AuxInt2Int64() == 1 { var asm obj.As switch v.Op { case ssa.OpAMD64SUBQconst: asm = x86.ADECQ case ssa.OpAMD64SUBLconst: asm = x86.ADECL case ssa.OpAMD64SUBWconst: asm = x86.ADECL } p := gc.Prog(asm) p.To.Type = obj.TYPE_REG p.To.Reg = r } else { var asm obj.As switch v.Op { case ssa.OpAMD64SUBQconst: asm = x86.ALEAQ case ssa.OpAMD64SUBLconst: asm = x86.ALEAL case ssa.OpAMD64SUBWconst: asm = x86.ALEAL } p := gc.Prog(asm) p.From.Type = obj.TYPE_MEM p.From.Reg = x p.From.Offset = -v.AuxInt2Int64() p.To.Type = obj.TYPE_REG p.To.Reg = r } case ssa.OpAMD64ADDBconst, ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst, ssa.OpAMD64ANDWconst, ssa.OpAMD64ANDBconst, ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst, ssa.OpAMD64ORWconst, ssa.OpAMD64ORBconst, ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst, ssa.OpAMD64XORWconst, ssa.OpAMD64XORBconst, ssa.OpAMD64SUBBconst, ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst, ssa.OpAMD64SHLWconst, ssa.OpAMD64SHLBconst, ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst, ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst, ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst: // This code compensates for the fact that the register allocator // doesn't understand 2-address instructions yet. TODO: fix that. x := gc.SSARegNum(v.Args[0]) r := gc.SSARegNum(v) if x != r { p := gc.Prog(moveByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = x p.To.Type = obj.TYPE_REG p.To.Reg = r } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt2Int64() p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask: r := gc.SSARegNum(v) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8: p := gc.Prog(x86.ALEAQ) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) switch v.Op { case ssa.OpAMD64LEAQ1: p.From.Scale = 1 case ssa.OpAMD64LEAQ2: p.From.Scale = 2 case ssa.OpAMD64LEAQ4: p.From.Scale = 4 case ssa.OpAMD64LEAQ8: p.From.Scale = 8 } p.From.Index = gc.SSARegNum(v.Args[1]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64LEAQ: p := gc.Prog(x86.ALEAQ) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB, ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB: opregreg(v.Op.Asm(), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[0])) case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD: // Go assembler has swapped operands for UCOMISx relative to CMP, // must account for that right here. opregreg(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1])) case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_CONST p.To.Offset = v.AuxInt2Int64() case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt2Int64() p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v.Args[0]) case ssa.OpAMD64MOVBconst, ssa.OpAMD64MOVWconst, ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst: x := gc.SSARegNum(v) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt2Int64() p.To.Type = obj.TYPE_REG p.To.Reg = x // If flags are live at this instruction, suppress the // MOV $0,AX -> XOR AX,AX optimization. if v.Aux != nil { p.Mark |= x86.PRESERVEFLAGS } case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst: x := gc.SSARegNum(v) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_FCONST p.From.Val = math.Float64frombits(uint64(v.AuxInt)) p.To.Type = obj.TYPE_REG p.To.Reg = x case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVQloadidx1: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.From.Scale = 1 p.From.Index = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.From.Scale = 8 p.From.Index = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVLloadidx1: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.From.Scale = 1 p.From.Index = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.From.Scale = 4 p.From.Index = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVWloadidx1: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.From.Scale = 1 p.From.Index = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVWloadidx2: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.From.Scale = 2 p.From.Index = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVBloadidx1: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.From.Scale = 1 p.From.Index = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.To, v) case ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Scale = 8 p.To.Index = gc.SSARegNum(v.Args[1]) gc.AddAux(&p.To, v) case ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Scale = 4 p.To.Index = gc.SSARegNum(v.Args[1]) gc.AddAux(&p.To, v) case ssa.OpAMD64MOVWstoreidx2: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Scale = 2 p.To.Index = gc.SSARegNum(v.Args[1]) gc.AddAux(&p.To, v) case ssa.OpAMD64MOVBstoreidx1: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Scale = 1 p.To.Index = gc.SSARegNum(v.Args[1]) gc.AddAux(&p.To, v) case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST sc := v.AuxValAndOff() i := sc.Val() switch v.Op { case ssa.OpAMD64MOVBstoreconst: i = int64(int8(i)) case ssa.OpAMD64MOVWstoreconst: i = int64(int16(i)) case ssa.OpAMD64MOVLstoreconst: i = int64(int32(i)) case ssa.OpAMD64MOVQstoreconst: } p.From.Offset = i p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux2(&p.To, v, sc.Off()) case ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST sc := v.AuxValAndOff() switch v.Op { case ssa.OpAMD64MOVBstoreconstidx1: p.From.Offset = int64(int8(sc.Val())) p.To.Scale = 1 case ssa.OpAMD64MOVWstoreconstidx2: p.From.Offset = int64(int16(sc.Val())) p.To.Scale = 2 case ssa.OpAMD64MOVLstoreconstidx4: p.From.Offset = int64(int32(sc.Val())) p.To.Scale = 4 case ssa.OpAMD64MOVQstoreconstidx8: p.From.Offset = sc.Val() p.To.Scale = 8 } p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Index = gc.SSARegNum(v.Args[1]) gc.AddAux2(&p.To, v, sc.Off()) case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX, ssa.OpAMD64CVTSL2SS, ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ, ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS: opregreg(v.Op.Asm(), gc.SSARegNum(v), gc.SSARegNum(v.Args[0])) case ssa.OpAMD64DUFFZERO: p := gc.Prog(obj.ADUFFZERO) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) p.To.Offset = v.AuxInt case ssa.OpAMD64MOVOconst: if v.AuxInt != 0 { v.Unimplementedf("MOVOconst can only do constant=0") } r := gc.SSARegNum(v) opregreg(x86.AXORPS, r, r) case ssa.OpAMD64DUFFCOPY: p := gc.Prog(obj.ADUFFCOPY) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) p.To.Offset = v.AuxInt case ssa.OpCopy, ssa.OpAMD64MOVQconvert: // TODO: use MOVQreg for reg->reg copies instead of OpCopy? if v.Type.IsMemory() { return } x := gc.SSARegNum(v.Args[0]) y := gc.SSARegNum(v) if x != y { opregreg(moveByType(v.Type), y, x) } case ssa.OpLoadReg: if v.Type.IsFlags() { v.Unimplementedf("load flags not implemented: %v", v.LongString()) return } p := gc.Prog(loadByType(v.Type)) n, off := gc.AutoVar(v.Args[0]) p.From.Type = obj.TYPE_MEM p.From.Node = n p.From.Sym = gc.Linksym(n.Sym) p.From.Offset = off if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { p.From.Name = obj.NAME_PARAM p.From.Offset += n.Xoffset } else { p.From.Name = obj.NAME_AUTO } p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpStoreReg: if v.Type.IsFlags() { v.Unimplementedf("store flags not implemented: %v", v.LongString()) return } p := gc.Prog(storeByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) n, off := gc.AutoVar(v) p.To.Type = obj.TYPE_MEM p.To.Node = n p.To.Sym = gc.Linksym(n.Sym) p.To.Offset = off if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { p.To.Name = obj.NAME_PARAM p.To.Offset += n.Xoffset } else { p.To.Name = obj.NAME_AUTO } case ssa.OpPhi: // just check to make sure regalloc and stackalloc did it right if v.Type.IsMemory() { return } f := v.Block.Func loc := f.RegAlloc[v.ID] for _, a := range v.Args { if aloc := f.RegAlloc[a.ID]; aloc != loc { // TODO: .Equal() instead? v.Fatalf("phi arg at different location than phi: %v @ %v, but arg %v @ %v\n%s\n", v, loc, a, aloc, v.Block.Func) } } case ssa.OpInitMem: // memory arg needs no code case ssa.OpArg: // input args need no code case ssa.OpAMD64LoweredGetClosurePtr: // Output is hardwired to DX only, // and DX contains the closure pointer on // closure entry, and this "instruction" // is scheduled to the very beginning // of the entry block. case ssa.OpAMD64LoweredGetG: r := gc.SSARegNum(v) // See the comments in cmd/internal/obj/x86/obj6.go // near CanUse1InsnTLS for a detailed explanation of these instructions. if x86.CanUse1InsnTLS(gc.Ctxt) { // MOVQ (TLS), r p := gc.Prog(x86.AMOVQ) p.From.Type = obj.TYPE_MEM p.From.Reg = x86.REG_TLS p.To.Type = obj.TYPE_REG p.To.Reg = r } else { // MOVQ TLS, r // MOVQ (r)(TLS*1), r p := gc.Prog(x86.AMOVQ) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_TLS p.To.Type = obj.TYPE_REG p.To.Reg = r q := gc.Prog(x86.AMOVQ) q.From.Type = obj.TYPE_MEM q.From.Reg = r q.From.Index = x86.REG_TLS q.From.Scale = 1 q.To.Type = obj.TYPE_REG q.To.Reg = r } case ssa.OpAMD64CALLstatic: if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym { // Deferred calls will appear to be returning to // the CALL deferreturn(SB) that we are about to emit. // However, the stack trace code will show the line // of the instruction byte before the return PC. // To avoid that being an unrelated instruction, // insert an actual hardware NOP that will have the right line number. // This is different from obj.ANOP, which is a virtual no-op // that doesn't make it into the instruction stream. ginsnop() } p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym)) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpAMD64CALLclosure: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v.Args[0]) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpAMD64CALLdefer: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Deferproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpAMD64CALLgo: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Newproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpAMD64CALLinter: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v.Args[0]) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, ssa.OpAMD64NEGW, ssa.OpAMD64NEGB, ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL, ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL, ssa.OpAMD64NOTW, ssa.OpAMD64NOTB: x := gc.SSARegNum(v.Args[0]) r := gc.SSARegNum(v) if x != r { p := gc.Prog(moveByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = x p.To.Type = obj.TYPE_REG p.To.Reg = r } p := gc.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSFW, ssa.OpAMD64BSRQ, ssa.OpAMD64BSRL, ssa.OpAMD64BSRW, ssa.OpAMD64SQRTSD: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpSP, ssa.OpSB: // nothing to do case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE, ssa.OpAMD64SETL, ssa.OpAMD64SETLE, ssa.OpAMD64SETG, ssa.OpAMD64SETGE, ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF, ssa.OpAMD64SETB, ssa.OpAMD64SETBE, ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN, ssa.OpAMD64SETA, ssa.OpAMD64SETAE: p := gc.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64SETNEF: p := gc.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) q := gc.Prog(x86.ASETPS) q.To.Type = obj.TYPE_REG q.To.Reg = x86.REG_AX // ORL avoids partial register write and is smaller than ORQ, used by old compiler opregreg(x86.AORL, gc.SSARegNum(v), x86.REG_AX) case ssa.OpAMD64SETEQF: p := gc.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) q := gc.Prog(x86.ASETPC) q.To.Type = obj.TYPE_REG q.To.Reg = x86.REG_AX // ANDL avoids partial register write and is smaller than ANDQ, used by old compiler opregreg(x86.AANDL, gc.SSARegNum(v), x86.REG_AX) case ssa.OpAMD64InvertFlags: v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT: v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) case ssa.OpAMD64REPSTOSQ: gc.Prog(x86.AREP) gc.Prog(x86.ASTOSQ) case ssa.OpAMD64REPMOVSQ: gc.Prog(x86.AREP) gc.Prog(x86.AMOVSQ) case ssa.OpVarDef: gc.Gvardef(v.Aux.(*gc.Node)) case ssa.OpVarKill: gc.Gvarkill(v.Aux.(*gc.Node)) case ssa.OpVarLive: gc.Gvarlive(v.Aux.(*gc.Node)) case ssa.OpAMD64LoweredNilCheck: // Optimization - if the subsequent block has a load or store // at the same address, we don't need to issue this instruction. mem := v.Args[1] for _, w := range v.Block.Succs[0].Values { if w.Op == ssa.OpPhi { if w.Type.IsMemory() { mem = w } continue } if len(w.Args) == 0 || !w.Args[len(w.Args)-1].Type.IsMemory() { // w doesn't use a store - can't be a memory op. continue } if w.Args[len(w.Args)-1] != mem { v.Fatalf("wrong store after nilcheck v=%s w=%s", v, w) } switch w.Op { case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVOload, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVOstore: if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage { if gc.Debug_checknil != 0 && int(v.Line) > 1 { gc.Warnl(v.Line, "removed nil check") } return } case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: off := ssa.ValAndOff(v.AuxInt).Off() if w.Args[0] == v.Args[0] && w.Aux == nil && off >= 0 && off < minZeroPage { if gc.Debug_checknil != 0 && int(v.Line) > 1 { gc.Warnl(v.Line, "removed nil check") } return } } if w.Type.IsMemory() { if w.Op == ssa.OpVarDef || w.Op == ssa.OpVarKill || w.Op == ssa.OpVarLive { // these ops are OK mem = w continue } // We can't delay the nil check past the next store. break } } // Issue a load which will fault if the input is nil. // TODO: We currently use the 2-byte instruction TESTB AX, (reg). // Should we use the 3-byte TESTB $0, (reg) instead? It is larger // but it doesn't have false dependency on AX. // Or maybe allocate an output register and use MOVL (reg),reg2 ? // That trades clobbering flags for clobbering a register. p := gc.Prog(x86.ATESTB) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_AX p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.To, v) if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers gc.Warnl(v.Line, "generated nil check") } default: v.Unimplementedf("genValue not implemented: %s", v.LongString()) } }
func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { s.SetLineno(b.Line) switch b.Kind { case ssa.BlockPlain: if b.Succs[0].Block() != next { p := gc.Prog(obj.AJMP) p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) } case ssa.BlockDefer: // defer returns in R0: // 0 if we should continue executing // 1 if we should jump to deferreturn call p := gc.Prog(arm.ACMP) p.From.Type = obj.TYPE_CONST p.From.Offset = 0 p.Reg = arm.REG_R0 p = gc.Prog(arm.ABNE) p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) if b.Succs[0].Block() != next { p := gc.Prog(obj.AJMP) p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) } case ssa.BlockExit: gc.Prog(obj.AUNDEF) // tell plive.go that we never reach here case ssa.BlockRet: gc.Prog(obj.ARET) case ssa.BlockRetJmp: p := gc.Prog(obj.ARET) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(b.Aux.(*gc.Sym)) case ssa.BlockARMEQ, ssa.BlockARMNE, ssa.BlockARMLT, ssa.BlockARMGE, ssa.BlockARMLE, ssa.BlockARMGT, ssa.BlockARMULT, ssa.BlockARMUGT, ssa.BlockARMULE, ssa.BlockARMUGE: jmp := blockJump[b.Kind] var p *obj.Prog switch next { case b.Succs[0].Block(): p = gc.Prog(jmp.invasm) p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) case b.Succs[1].Block(): p = gc.Prog(jmp.asm) p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) default: p = gc.Prog(jmp.asm) p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) q := gc.Prog(obj.AJMP) q.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()}) } default: b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString()) } }
func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { s.SetLineno(b.Line) switch b.Kind { case ssa.BlockPlain: if b.Succs[0].Block() != next { p := gc.Prog(obj.AJMP) p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) } case ssa.BlockDefer: // defer returns in R1: // 0 if we should continue executing // 1 if we should jump to deferreturn call p := gc.Prog(mips.ABNE) p.From.Type = obj.TYPE_REG p.From.Reg = mips.REGZERO p.Reg = mips.REG_R1 p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) if b.Succs[0].Block() != next { p := gc.Prog(obj.AJMP) p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) } case ssa.BlockExit: gc.Prog(obj.AUNDEF) // tell plive.go that we never reach here case ssa.BlockRet: gc.Prog(obj.ARET) case ssa.BlockRetJmp: p := gc.Prog(obj.ARET) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(b.Aux.(*gc.Sym)) case ssa.BlockMIPSEQ, ssa.BlockMIPSNE, ssa.BlockMIPSLTZ, ssa.BlockMIPSGEZ, ssa.BlockMIPSLEZ, ssa.BlockMIPSGTZ, ssa.BlockMIPSFPT, ssa.BlockMIPSFPF: jmp := blockJump[b.Kind] var p *obj.Prog switch next { case b.Succs[0].Block(): p = gc.Prog(jmp.invasm) p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) case b.Succs[1].Block(): p = gc.Prog(jmp.asm) p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) default: p = gc.Prog(jmp.asm) p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) q := gc.Prog(obj.AJMP) q.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()}) } if !b.Control.Type.IsFlags() { p.From.Type = obj.TYPE_REG p.From.Reg = b.Control.Reg() } default: b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString()) } }
func zerorange(p *obj.Prog, frame int64, lo int64, hi int64, ax *uint32, x0 *uint32) *obj.Prog { cnt := hi - lo if cnt == 0 { return p } if cnt%int64(gc.Widthreg) != 0 { // should only happen with nacl if cnt%int64(gc.Widthptr) != 0 { gc.Fatalf("zerorange count not a multiple of widthptr %d", cnt) } if *ax == 0 { p = gc.Appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0) *ax = 1 } p = gc.Appendpp(p, x86.AMOVL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo) lo += int64(gc.Widthptr) cnt -= int64(gc.Widthptr) } if cnt == 8 { if *ax == 0 { p = gc.Appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0) *ax = 1 } p = gc.Appendpp(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo) } else if !isPlan9 && cnt <= int64(8*gc.Widthreg) { if *x0 == 0 { p = gc.Appendpp(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_REG, x86.REG_X0, 0) *x0 = 1 } for i := int64(0); i < cnt/16; i++ { p = gc.Appendpp(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo+i*16) } if cnt%16 != 0 { p = gc.Appendpp(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_MEM, x86.REG_SP, frame+lo+cnt-int64(16)) } } else if !gc.Nacl && !isPlan9 && (cnt <= int64(128*gc.Widthreg)) { if *x0 == 0 { p = gc.Appendpp(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_REG, x86.REG_X0, 0) *x0 = 1 } p = gc.Appendpp(p, leaptr, obj.TYPE_MEM, x86.REG_SP, frame+lo+dzDI(cnt), obj.TYPE_REG, x86.REG_DI, 0) p = gc.Appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, dzOff(cnt)) p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) if cnt%16 != 0 { p = gc.Appendpp(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_MEM, x86.REG_DI, -int64(8)) } } else { if *ax == 0 { p = gc.Appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0) *ax = 1 } p = gc.Appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(gc.Widthreg), obj.TYPE_REG, x86.REG_CX, 0) p = gc.Appendpp(p, leaptr, obj.TYPE_MEM, x86.REG_SP, frame+lo, obj.TYPE_REG, x86.REG_DI, 0) p = gc.Appendpp(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0) p = gc.Appendpp(p, x86.ASTOSQ, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0) } return p }
func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { s.SetLineno(b.Line) switch b.Kind { case ssa.BlockDefer: // defer returns in R3: // 0 if we should continue executing // 1 if we should jump to deferreturn call p := gc.Prog(ppc64.ACMP) p.From.Type = obj.TYPE_REG p.From.Reg = ppc64.REG_R3 p.To.Type = obj.TYPE_REG p.To.Reg = ppc64.REG_R0 p = gc.Prog(ppc64.ABNE) p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) if b.Succs[0].Block() != next { p := gc.Prog(obj.AJMP) p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) } case ssa.BlockPlain, ssa.BlockCall, ssa.BlockCheck: if b.Succs[0].Block() != next { p := gc.Prog(obj.AJMP) p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) } case ssa.BlockExit: gc.Prog(obj.AUNDEF) // tell plive.go that we never reach here case ssa.BlockRet: gc.Prog(obj.ARET) case ssa.BlockRetJmp: p := gc.Prog(obj.AJMP) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(b.Aux.(*gc.Sym)) case ssa.BlockPPC64EQ, ssa.BlockPPC64NE, ssa.BlockPPC64LT, ssa.BlockPPC64GE, ssa.BlockPPC64LE, ssa.BlockPPC64GT, ssa.BlockPPC64FLT, ssa.BlockPPC64FGE, ssa.BlockPPC64FLE, ssa.BlockPPC64FGT: jmp := blockJump[b.Kind] likely := b.Likely var p *obj.Prog switch next { case b.Succs[0].Block(): p = gc.Prog(jmp.invasm) likely *= -1 p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()}) if jmp.invasmun { // TODO: The second branch is probably predict-not-taken since it is for FP unordered q := gc.Prog(ppc64.ABVS) q.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()}) } case b.Succs[1].Block(): p = gc.Prog(jmp.asm) p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) if jmp.asmeq { q := gc.Prog(ppc64.ABEQ) q.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[0].Block()}) } default: p = gc.Prog(jmp.asm) p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()}) if jmp.asmeq { q := gc.Prog(ppc64.ABEQ) q.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[0].Block()}) } q := gc.Prog(obj.AJMP) q.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()}) } // liblink reorders the instruction stream as it sees fit. // Pass along what we know so liblink can make use of it. // TODO: Once we've fully switched to SSA, // make liblink leave our output alone. //switch likely { //case ssa.BranchUnlikely: // p.From.Type = obj.TYPE_CONST // p.From.Offset = 0 //case ssa.BranchLikely: // p.From.Type = obj.TYPE_CONST // p.From.Offset = 1 //} default: b.Unimplementedf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString()) } }
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { s.SetLineno(v.Line) switch v.Op { case ssa.OpInitMem: // memory arg needs no code case ssa.OpArg: // input args need no code case ssa.OpSP, ssa.OpSB, ssa.OpGetG: // nothing to do case ssa.OpCopy, ssa.OpPPC64MOVDconvert: t := v.Type if t.IsMemory() { return } x := gc.SSARegNum(v.Args[0]) y := gc.SSARegNum(v) if x != y { rt := obj.TYPE_REG op := ppc64.AMOVD if t.IsFloat() { op = ppc64.AFMOVD } p := gc.Prog(op) p.From.Type = rt p.From.Reg = x p.To.Type = rt p.To.Reg = y } case ssa.OpPPC64Xf2i64: { x := gc.SSARegNum(v.Args[0]) y := gc.SSARegNum(v) p := gc.Prog(ppc64.AFMOVD) p.From.Type = obj.TYPE_REG p.From.Reg = x scratchFpMem(s, &p.To) p = gc.Prog(ppc64.AMOVD) p.To.Type = obj.TYPE_REG p.To.Reg = y scratchFpMem(s, &p.From) } case ssa.OpPPC64Xi2f64: { x := gc.SSARegNum(v.Args[0]) y := gc.SSARegNum(v) p := gc.Prog(ppc64.AMOVD) p.From.Type = obj.TYPE_REG p.From.Reg = x scratchFpMem(s, &p.To) p = gc.Prog(ppc64.AFMOVD) p.To.Type = obj.TYPE_REG p.To.Reg = y scratchFpMem(s, &p.From) } case ssa.OpPPC64LoweredGetClosurePtr: // Closure pointer is R11 (already) gc.CheckLoweredGetClosurePtr(v) case ssa.OpLoadReg: loadOp := loadByType(v.Type) n, off := gc.AutoVar(v.Args[0]) p := gc.Prog(loadOp) p.From.Type = obj.TYPE_MEM p.From.Node = n p.From.Sym = gc.Linksym(n.Sym) p.From.Offset = off if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { p.From.Name = obj.NAME_PARAM p.From.Offset += n.Xoffset } else { p.From.Name = obj.NAME_AUTO } p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpStoreReg: storeOp := storeByType(v.Type) n, off := gc.AutoVar(v) p := gc.Prog(storeOp) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_MEM p.To.Node = n p.To.Sym = gc.Linksym(n.Sym) p.To.Offset = off if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { p.To.Name = obj.NAME_PARAM p.To.Offset += n.Xoffset } else { p.To.Name = obj.NAME_AUTO } case ssa.OpPPC64DIVD: // For now, // // cmp arg1, -1 // be ahead // v = arg0 / arg1 // b over // ahead: v = - arg0 // over: nop r := gc.SSARegNum(v) r0 := gc.SSARegNum(v.Args[0]) r1 := gc.SSARegNum(v.Args[1]) p := gc.Prog(ppc64.ACMP) p.From.Type = obj.TYPE_REG p.From.Reg = r1 p.To.Type = obj.TYPE_CONST p.To.Offset = -1 pbahead := gc.Prog(ppc64.ABEQ) pbahead.To.Type = obj.TYPE_BRANCH p = gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r1 p.Reg = r0 p.To.Type = obj.TYPE_REG p.To.Reg = r pbover := gc.Prog(obj.AJMP) pbover.To.Type = obj.TYPE_BRANCH p = gc.Prog(ppc64.ANEG) p.To.Type = obj.TYPE_REG p.To.Reg = r p.From.Type = obj.TYPE_REG p.From.Reg = r0 gc.Patch(pbahead, p) p = gc.Prog(obj.ANOP) gc.Patch(pbover, p) case ssa.OpPPC64DIVW: // word-width version of above r := gc.SSARegNum(v) r0 := gc.SSARegNum(v.Args[0]) r1 := gc.SSARegNum(v.Args[1]) p := gc.Prog(ppc64.ACMPW) p.From.Type = obj.TYPE_REG p.From.Reg = r1 p.To.Type = obj.TYPE_CONST p.To.Offset = -1 pbahead := gc.Prog(ppc64.ABEQ) pbahead.To.Type = obj.TYPE_BRANCH p = gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r1 p.Reg = r0 p.To.Type = obj.TYPE_REG p.To.Reg = r pbover := gc.Prog(obj.AJMP) pbover.To.Type = obj.TYPE_BRANCH p = gc.Prog(ppc64.ANEG) p.To.Type = obj.TYPE_REG p.To.Reg = r p.From.Type = obj.TYPE_REG p.From.Reg = r0 gc.Patch(pbahead, p) p = gc.Prog(obj.ANOP) gc.Patch(pbover, p) case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS, ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU, ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW, ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU, ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64XOR, ssa.OpPPC64EQV: r := gc.SSARegNum(v) r1 := gc.SSARegNum(v.Args[0]) r2 := gc.SSARegNum(v.Args[1]) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpPPC64MaskIfNotCarry: r := gc.SSARegNum(v) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = ppc64.REGZERO p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpPPC64ADDconstForCarry: r1 := gc.SSARegNum(v.Args[0]) p := gc.Prog(v.Op.Asm()) p.Reg = r1 p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect. case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FRSP: r := gc.SSARegNum(v) p := gc.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = r p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst, ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst: p := gc.Prog(v.Op.Asm()) p.Reg = gc.SSARegNum(v.Args[0]) if v.Aux != nil { p.From.Type = obj.TYPE_CONST p.From.Offset = gc.AuxOffset(v) } else { p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt } p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpPPC64MOVDaddr: p := gc.Prog(ppc64.AMOVD) p.From.Type = obj.TYPE_ADDR p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) var wantreg string // Suspect comment, copied from ARM code // MOVD $sym+off(base), R // the assembler expands it as the following: // - base is SP: add constant offset to SP // when constant is large, tmp register (R11) may be used // - base is SB: load external address from constant pool (use relocation) switch v.Aux.(type) { default: v.Fatalf("aux is of unknown type %T", v.Aux) case *ssa.ExternSymbol: wantreg = "SB" gc.AddAux(&p.From, v) case *ssa.ArgSymbol, *ssa.AutoSymbol: wantreg = "SP" gc.AddAux(&p.From, v) case nil: // No sym, just MOVD $off(SP), R wantreg = "SP" p.From.Reg = ppc64.REGSP p.From.Offset = v.AuxInt } if reg := gc.SSAReg(v.Args[0]); reg.Name() != wantreg { v.Fatalf("bad reg %s for symbol type %T, want %s", reg.Name(), v.Aux, wantreg) } case ssa.OpPPC64MOVDconst, ssa.OpPPC64MOVWconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_FCONST p.From.Val = math.Float64frombits(uint64(v.AuxInt)) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v.Args[1]) case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_CONST p.To.Offset = v.AuxInt case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg: // Shift in register to required size p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Reg = gc.SSARegNum(v) p.To.Type = obj.TYPE_REG case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload, ssa.OpPPC64MOVBload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = ppc64.REGZERO p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.To, v) case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.To, v) case ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.To, v) case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan, ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual: // On Power7 or later, can use isel instruction: // for a < b, a > b, a = b: // rt := 1 // isel rt,rt,r0,cond // for a >= b, a <= b, a != b: // rt := 1 // isel rt,0,rt,!cond // However, PPCbe support is for older machines than that, // and isel (which looks a lot like fsel) isn't recognized // yet by the Go assembler. So for now, use the old instruction // sequence, which we'll need anyway. // TODO: add support for isel on PPCle and use it. // generate boolean values // use conditional move p := gc.Prog(ppc64.AMOVW) p.From.Type = obj.TYPE_CONST p.From.Offset = 1 p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) pb := gc.Prog(condOps[v.Op]) pb.To.Type = obj.TYPE_BRANCH p = gc.Prog(ppc64.AMOVW) p.From.Type = obj.TYPE_CONST p.From.Offset = 0 p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) p = gc.Prog(obj.ANOP) gc.Patch(pb, p) case ssa.OpPPC64FLessEqual, // These include a second branch for EQ -- dealing with NaN prevents REL= to !REL conversion ssa.OpPPC64FGreaterEqual: p := gc.Prog(ppc64.AMOVW) p.From.Type = obj.TYPE_CONST p.From.Offset = 1 p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) pb0 := gc.Prog(condOps[v.Op]) pb0.To.Type = obj.TYPE_BRANCH pb1 := gc.Prog(ppc64.ABEQ) pb1.To.Type = obj.TYPE_BRANCH p = gc.Prog(ppc64.AMOVW) p.From.Type = obj.TYPE_CONST p.From.Offset = 0 p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) p = gc.Prog(obj.ANOP) gc.Patch(pb0, p) gc.Patch(pb1, p) case ssa.OpPPC64LoweredZero: // Similar to how this is done on ARM, // except that PPC MOVDU x,off(y) is *(y+off) = x; y=y+off // not store-and-increment. // Therefore R3 should be dest-align // and arg1 should be dest+size-align // HOWEVER, the input dest address cannot be dest-align because // that does not necessarily address valid memory and it's not // known how that might be optimized. Therefore, correct it in // in the expansion: // // ADD -8,R3,R3 // MOVDU R0, 8(R3) // CMP R3, Rarg1 // BL -2(PC) // arg1 is the address of the last element to zero // auxint is alignment var sz int64 var movu obj.As switch { case v.AuxInt%8 == 0: sz = 8 movu = ppc64.AMOVDU case v.AuxInt%4 == 0: sz = 4 movu = ppc64.AMOVWZU // MOVWU instruction not implemented case v.AuxInt%2 == 0: sz = 2 movu = ppc64.AMOVHU default: sz = 1 movu = ppc64.AMOVBU } p := gc.Prog(ppc64.AADD) p.Reg = gc.SSARegNum(v.Args[0]) p.From.Type = obj.TYPE_CONST p.From.Offset = -sz p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v.Args[0]) p = gc.Prog(movu) p.From.Type = obj.TYPE_REG p.From.Reg = ppc64.REG_R0 p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Offset = sz p2 := gc.Prog(ppc64.ACMPU) p2.From.Type = obj.TYPE_REG p2.From.Reg = gc.SSARegNum(v.Args[0]) p2.To.Reg = gc.SSARegNum(v.Args[1]) p2.To.Type = obj.TYPE_REG p3 := gc.Prog(ppc64.ABLT) p3.To.Type = obj.TYPE_BRANCH gc.Patch(p3, p) case ssa.OpPPC64LoweredMove: // Similar to how this is done on ARM, // except that PPC MOVDU x,off(y) is *(y+off) = x; y=y+off, // not store-and-increment. // Inputs must be valid pointers to memory, // so adjust arg0 and arg1 as part of the expansion. // arg2 should be src+size-align, // // ADD -8,R3,R3 // ADD -8,R4,R4 // MOVDU 8(R4), Rtmp // MOVDU Rtmp, 8(R3) // CMP R4, Rarg2 // BL -3(PC) // arg2 is the address of the last element of src // auxint is alignment var sz int64 var movu obj.As switch { case v.AuxInt%8 == 0: sz = 8 movu = ppc64.AMOVDU case v.AuxInt%4 == 0: sz = 4 movu = ppc64.AMOVWZU // MOVWU instruction not implemented case v.AuxInt%2 == 0: sz = 2 movu = ppc64.AMOVHU default: sz = 1 movu = ppc64.AMOVBU } p := gc.Prog(ppc64.AADD) p.Reg = gc.SSARegNum(v.Args[0]) p.From.Type = obj.TYPE_CONST p.From.Offset = -sz p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v.Args[0]) p = gc.Prog(ppc64.AADD) p.Reg = gc.SSARegNum(v.Args[1]) p.From.Type = obj.TYPE_CONST p.From.Offset = -sz p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v.Args[1]) p = gc.Prog(movu) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[1]) p.From.Offset = sz p.To.Type = obj.TYPE_REG p.To.Reg = ppc64.REGTMP p2 := gc.Prog(movu) p2.From.Type = obj.TYPE_REG p2.From.Reg = ppc64.REGTMP p2.To.Type = obj.TYPE_MEM p2.To.Reg = gc.SSARegNum(v.Args[0]) p2.To.Offset = sz p3 := gc.Prog(ppc64.ACMPU) p3.From.Reg = gc.SSARegNum(v.Args[1]) p3.From.Type = obj.TYPE_REG p3.To.Reg = gc.SSARegNum(v.Args[2]) p3.To.Type = obj.TYPE_REG p4 := gc.Prog(ppc64.ABLT) p4.To.Type = obj.TYPE_BRANCH gc.Patch(p4, p) case ssa.OpPPC64CALLstatic: if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym { // Deferred calls will appear to be returning to // the CALL deferreturn(SB) that we are about to emit. // However, the stack trace code will show the line // of the instruction byte before the return PC. // To avoid that being an unrelated instruction, // insert two actual hardware NOPs that will have the right line number. // This is different from obj.ANOP, which is a virtual no-op // that doesn't make it into the instruction stream. // PPC64 is unusual because TWO nops are required // (see gc/cgen.go, gc/plive.go -- copy of comment below) // // On ppc64, when compiling Go into position // independent code on ppc64le we insert an // instruction to reload the TOC pointer from the // stack as well. See the long comment near // jmpdefer in runtime/asm_ppc64.s for why. // If the MOVD is not needed, insert a hardware NOP // so that the same number of instructions are used // on ppc64 in both shared and non-shared modes. ginsnop() if gc.Ctxt.Flag_shared { p := gc.Prog(ppc64.AMOVD) p.From.Type = obj.TYPE_MEM p.From.Offset = 24 p.From.Reg = ppc64.REGSP p.To.Type = obj.TYPE_REG p.To.Reg = ppc64.REG_R2 } else { ginsnop() } } p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym)) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter: p := gc.Prog(ppc64.AMOVD) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = ppc64.REG_CTR if gc.Ctxt.Flag_shared && p.From.Reg != ppc64.REG_R12 { // Make sure function pointer is in R12 as well when // compiling Go into PIC. // TODO(mwhudson): it would obviously be better to // change the register allocation to put the value in // R12 already, but I don't know how to do that. // TODO: We have the technology now to implement TODO above. q := gc.Prog(ppc64.AMOVD) q.From = p.From q.To.Type = obj.TYPE_REG q.To.Reg = ppc64.REG_R12 } pp := gc.Prog(obj.ACALL) pp.To.Type = obj.TYPE_REG pp.To.Reg = ppc64.REG_CTR if gc.Ctxt.Flag_shared { // When compiling Go into PIC, the function we just // called via pointer might have been implemented in // a separate module and so overwritten the TOC // pointer in R2; reload it. q := gc.Prog(ppc64.AMOVD) q.From.Type = obj.TYPE_MEM q.From.Offset = 24 q.From.Reg = ppc64.REGSP q.To.Type = obj.TYPE_REG q.To.Reg = ppc64.REG_R2 } if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpPPC64CALLdefer: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Deferproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpPPC64CALLgo: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Newproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpVarDef: gc.Gvardef(v.Aux.(*gc.Node)) case ssa.OpVarKill: gc.Gvarkill(v.Aux.(*gc.Node)) case ssa.OpVarLive: gc.Gvarlive(v.Aux.(*gc.Node)) case ssa.OpKeepAlive: if !v.Args[0].Type.IsPtrShaped() { v.Fatalf("keeping non-pointer alive %v", v.Args[0]) } n, off := gc.AutoVar(v.Args[0]) if n == nil { v.Fatalf("KeepLive with non-spilled value %s %s", v, v.Args[0]) } if off != 0 { v.Fatalf("KeepLive with non-zero offset spill location %s:%d", n, off) } gc.Gvarlive(n) case ssa.OpPhi: // just check to make sure regalloc and stackalloc did it right if v.Type.IsMemory() { return } f := v.Block.Func loc := f.RegAlloc[v.ID] for _, a := range v.Args { if aloc := f.RegAlloc[a.ID]; aloc != loc { // TODO: .Equal() instead? v.Fatalf("phi arg at different location than phi: %v @ %v, but arg %v @ %v\n%s\n", v, loc, a, aloc, v.Block.Func) } } case ssa.OpPPC64LoweredNilCheck: // Optimization - if the subsequent block has a load or store // at the same address, we don't need to issue this instruction. // mem := v.Args[1] // for _, w := range v.Block.Succs[0].Block().Values { // if w.Op == ssa.OpPhi { // if w.Type.IsMemory() { // mem = w // } // continue // } // if len(w.Args) == 0 || !w.Args[len(w.Args)-1].Type.IsMemory() { // // w doesn't use a store - can't be a memory op. // continue // } // if w.Args[len(w.Args)-1] != mem { // v.Fatalf("wrong store after nilcheck v=%s w=%s", v, w) // } // switch w.Op { // case ssa.OpPPC64MOVBload, ssa.OpPPC64MOVBUload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVHUload, // ssa.OpPPC64MOVWload, ssa.OpPPC64MOVFload, ssa.OpPPC64MOVDload, // ssa.OpPPC64MOVBstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVWstore, // ssa.OpPPC64MOVFstore, ssa.OpPPC64MOVDstore: // // arg0 is ptr, auxint is offset // if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage { // if gc.Debug_checknil != 0 && int(v.Line) > 1 { // gc.Warnl(v.Line, "removed nil check") // } // return // } // case ssa.OpPPC64DUFFZERO, ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroU: // // arg0 is ptr // if w.Args[0] == v.Args[0] { // if gc.Debug_checknil != 0 && int(v.Line) > 1 { // gc.Warnl(v.Line, "removed nil check") // } // return // } // case ssa.OpPPC64DUFFCOPY, ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveU: // // arg0 is dst ptr, arg1 is src ptr // if w.Args[0] == v.Args[0] || w.Args[1] == v.Args[0] { // if gc.Debug_checknil != 0 && int(v.Line) > 1 { // gc.Warnl(v.Line, "removed nil check") // } // return // } // default: // } // if w.Type.IsMemory() { // if w.Op == ssa.OpVarDef || w.Op == ssa.OpVarKill || w.Op == ssa.OpVarLive { // // these ops are OK // mem = w // continue // } // // We can't delay the nil check past the next store. // break // } // } // Issue a load which will fault if arg is nil. p := gc.Prog(ppc64.AMOVB) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = ppc64.REGTMP if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers gc.Warnl(v.Line, "generated nil check") } case ssa.OpPPC64InvertFlags: v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT: v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) default: v.Unimplementedf("genValue not implemented: %s", v.LongString()) } }
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { s.SetLineno(v.Line) switch v.Op { case ssa.OpInitMem: // memory arg needs no code case ssa.OpArg: // input args need no code case ssa.OpSP, ssa.OpSB, ssa.OpGetG: // nothing to do case ssa.OpCopy, ssa.OpARM64MOVDconvert, ssa.OpARM64MOVDreg: if v.Type.IsMemory() { return } x := v.Args[0].Reg() y := v.Reg() if x == y { return } as := arm64.AMOVD if v.Type.IsFloat() { switch v.Type.Size() { case 4: as = arm64.AFMOVS case 8: as = arm64.AFMOVD default: panic("bad float size") } } p := gc.Prog(as) p.From.Type = obj.TYPE_REG p.From.Reg = x p.To.Type = obj.TYPE_REG p.To.Reg = y case ssa.OpARM64MOVDnop: if v.Reg() != v.Args[0].Reg() { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } // nothing to do case ssa.OpLoadReg: if v.Type.IsFlags() { v.Fatalf("load flags not implemented: %v", v.LongString()) return } p := gc.Prog(loadByType(v.Type)) gc.AddrAuto(&p.From, v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpPhi: gc.CheckLoweredPhi(v) case ssa.OpStoreReg: if v.Type.IsFlags() { v.Fatalf("store flags not implemented: %v", v.LongString()) return } p := gc.Prog(storeByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() gc.AddrAuto(&p.To, v) case ssa.OpARM64ADD, ssa.OpARM64SUB, ssa.OpARM64AND, ssa.OpARM64OR, ssa.OpARM64XOR, ssa.OpARM64BIC, ssa.OpARM64MUL, ssa.OpARM64MULW, ssa.OpARM64MULH, ssa.OpARM64UMULH, ssa.OpARM64MULL, ssa.OpARM64UMULL, ssa.OpARM64DIV, ssa.OpARM64UDIV, ssa.OpARM64DIVW, ssa.OpARM64UDIVW, ssa.OpARM64MOD, ssa.OpARM64UMOD, ssa.OpARM64MODW, ssa.OpARM64UMODW, ssa.OpARM64SLL, ssa.OpARM64SRL, ssa.OpARM64SRA, ssa.OpARM64FADDS, ssa.OpARM64FADDD, ssa.OpARM64FSUBS, ssa.OpARM64FSUBD, ssa.OpARM64FMULS, ssa.OpARM64FMULD, ssa.OpARM64FDIVS, ssa.OpARM64FDIVD: r := v.Reg() r1 := v.Args[0].Reg() r2 := v.Args[1].Reg() p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpARM64ADDconst, ssa.OpARM64SUBconst, ssa.OpARM64ANDconst, ssa.OpARM64ORconst, ssa.OpARM64XORconst, ssa.OpARM64BICconst, ssa.OpARM64SLLconst, ssa.OpARM64SRLconst, ssa.OpARM64SRAconst, ssa.OpARM64RORconst, ssa.OpARM64RORWconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpARM64ADDshiftLL, ssa.OpARM64SUBshiftLL, ssa.OpARM64ANDshiftLL, ssa.OpARM64ORshiftLL, ssa.OpARM64XORshiftLL, ssa.OpARM64BICshiftLL: genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_LL, v.AuxInt) case ssa.OpARM64ADDshiftRL, ssa.OpARM64SUBshiftRL, ssa.OpARM64ANDshiftRL, ssa.OpARM64ORshiftRL, ssa.OpARM64XORshiftRL, ssa.OpARM64BICshiftRL: genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_LR, v.AuxInt) case ssa.OpARM64ADDshiftRA, ssa.OpARM64SUBshiftRA, ssa.OpARM64ANDshiftRA, ssa.OpARM64ORshiftRA, ssa.OpARM64XORshiftRA, ssa.OpARM64BICshiftRA: genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_AR, v.AuxInt) case ssa.OpARM64MOVDconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpARM64FMOVSconst, ssa.OpARM64FMOVDconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_FCONST p.From.Val = math.Float64frombits(uint64(v.AuxInt)) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpARM64CMP, ssa.OpARM64CMPW, ssa.OpARM64CMN, ssa.OpARM64CMNW, ssa.OpARM64FCMPS, ssa.OpARM64FCMPD: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.Reg = v.Args[0].Reg() case ssa.OpARM64CMPconst, ssa.OpARM64CMPWconst, ssa.OpARM64CMNconst, ssa.OpARM64CMNWconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.Reg = v.Args[0].Reg() case ssa.OpARM64CMPshiftLL: genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_LL, v.AuxInt) case ssa.OpARM64CMPshiftRL: genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_LR, v.AuxInt) case ssa.OpARM64CMPshiftRA: genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_AR, v.AuxInt) case ssa.OpARM64MOVDaddr: p := gc.Prog(arm64.AMOVD) p.From.Type = obj.TYPE_ADDR p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() var wantreg string // MOVD $sym+off(base), R // the assembler expands it as the following: // - base is SP: add constant offset to SP (R13) // when constant is large, tmp register (R11) may be used // - base is SB: load external address from constant pool (use relocation) switch v.Aux.(type) { default: v.Fatalf("aux is of unknown type %T", v.Aux) case *ssa.ExternSymbol: wantreg = "SB" gc.AddAux(&p.From, v) case *ssa.ArgSymbol, *ssa.AutoSymbol: wantreg = "SP" gc.AddAux(&p.From, v) case nil: // No sym, just MOVD $off(SP), R wantreg = "SP" p.From.Reg = arm64.REGSP p.From.Offset = v.AuxInt } if reg := v.Args[0].RegName(); reg != wantreg { v.Fatalf("bad reg %s for symbol type %T, want %s", reg, v.Aux, wantreg) } case ssa.OpARM64MOVBload, ssa.OpARM64MOVBUload, ssa.OpARM64MOVHload, ssa.OpARM64MOVHUload, ssa.OpARM64MOVWload, ssa.OpARM64MOVWUload, ssa.OpARM64MOVDload, ssa.OpARM64FMOVSload, ssa.OpARM64FMOVDload: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpARM64LDAR, ssa.OpARM64LDARW: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg0() case ssa.OpARM64MOVBstore, ssa.OpARM64MOVHstore, ssa.OpARM64MOVWstore, ssa.OpARM64MOVDstore, ssa.OpARM64FMOVSstore, ssa.OpARM64FMOVDstore, ssa.OpARM64STLR, ssa.OpARM64STLRW: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() gc.AddAux(&p.To, v) case ssa.OpARM64MOVBstorezero, ssa.OpARM64MOVHstorezero, ssa.OpARM64MOVWstorezero, ssa.OpARM64MOVDstorezero: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = arm64.REGZERO p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() gc.AddAux(&p.To, v) case ssa.OpARM64LoweredAtomicExchange64, ssa.OpARM64LoweredAtomicExchange32: // LDAXR (Rarg0), Rout // STLXR Rarg1, (Rarg0), Rtmp // CBNZ Rtmp, -2(PC) ld := arm64.ALDAXR st := arm64.ASTLXR if v.Op == ssa.OpARM64LoweredAtomicExchange32 { ld = arm64.ALDAXRW st = arm64.ASTLXRW } r0 := v.Args[0].Reg() r1 := v.Args[1].Reg() out := v.Reg0() p := gc.Prog(ld) p.From.Type = obj.TYPE_MEM p.From.Reg = r0 p.To.Type = obj.TYPE_REG p.To.Reg = out p1 := gc.Prog(st) p1.From.Type = obj.TYPE_REG p1.From.Reg = r1 p1.To.Type = obj.TYPE_MEM p1.To.Reg = r0 p1.RegTo2 = arm64.REGTMP p2 := gc.Prog(arm64.ACBNZ) p2.From.Type = obj.TYPE_REG p2.From.Reg = arm64.REGTMP p2.To.Type = obj.TYPE_BRANCH gc.Patch(p2, p) case ssa.OpARM64LoweredAtomicAdd64, ssa.OpARM64LoweredAtomicAdd32: // LDAXR (Rarg0), Rout // ADD Rarg1, Rout // STLXR Rout, (Rarg0), Rtmp // CBNZ Rtmp, -3(PC) ld := arm64.ALDAXR st := arm64.ASTLXR if v.Op == ssa.OpARM64LoweredAtomicAdd32 { ld = arm64.ALDAXRW st = arm64.ASTLXRW } r0 := v.Args[0].Reg() r1 := v.Args[1].Reg() out := v.Reg0() p := gc.Prog(ld) p.From.Type = obj.TYPE_MEM p.From.Reg = r0 p.To.Type = obj.TYPE_REG p.To.Reg = out p1 := gc.Prog(arm64.AADD) p1.From.Type = obj.TYPE_REG p1.From.Reg = r1 p1.To.Type = obj.TYPE_REG p1.To.Reg = out p2 := gc.Prog(st) p2.From.Type = obj.TYPE_REG p2.From.Reg = out p2.To.Type = obj.TYPE_MEM p2.To.Reg = r0 p2.RegTo2 = arm64.REGTMP p3 := gc.Prog(arm64.ACBNZ) p3.From.Type = obj.TYPE_REG p3.From.Reg = arm64.REGTMP p3.To.Type = obj.TYPE_BRANCH gc.Patch(p3, p) case ssa.OpARM64LoweredAtomicCas64, ssa.OpARM64LoweredAtomicCas32: // LDAXR (Rarg0), Rtmp // CMP Rarg1, Rtmp // BNE 3(PC) // STLXR Rarg2, (Rarg0), Rtmp // CBNZ Rtmp, -4(PC) // CSET EQ, Rout ld := arm64.ALDAXR st := arm64.ASTLXR cmp := arm64.ACMP if v.Op == ssa.OpARM64LoweredAtomicCas32 { ld = arm64.ALDAXRW st = arm64.ASTLXRW cmp = arm64.ACMPW } r0 := v.Args[0].Reg() r1 := v.Args[1].Reg() r2 := v.Args[2].Reg() out := v.Reg0() p := gc.Prog(ld) p.From.Type = obj.TYPE_MEM p.From.Reg = r0 p.To.Type = obj.TYPE_REG p.To.Reg = arm64.REGTMP p1 := gc.Prog(cmp) p1.From.Type = obj.TYPE_REG p1.From.Reg = r1 p1.Reg = arm64.REGTMP p2 := gc.Prog(arm64.ABNE) p2.To.Type = obj.TYPE_BRANCH p3 := gc.Prog(st) p3.From.Type = obj.TYPE_REG p3.From.Reg = r2 p3.To.Type = obj.TYPE_MEM p3.To.Reg = r0 p3.RegTo2 = arm64.REGTMP p4 := gc.Prog(arm64.ACBNZ) p4.From.Type = obj.TYPE_REG p4.From.Reg = arm64.REGTMP p4.To.Type = obj.TYPE_BRANCH gc.Patch(p4, p) p5 := gc.Prog(arm64.ACSET) p5.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg p5.From.Reg = arm64.COND_EQ p5.To.Type = obj.TYPE_REG p5.To.Reg = out gc.Patch(p2, p5) case ssa.OpARM64LoweredAtomicAnd8, ssa.OpARM64LoweredAtomicOr8: // LDAXRB (Rarg0), Rtmp // AND/OR Rarg1, Rtmp // STLXRB Rtmp, (Rarg0), Rtmp // CBNZ Rtmp, -3(PC) r0 := v.Args[0].Reg() r1 := v.Args[1].Reg() p := gc.Prog(arm64.ALDAXRB) p.From.Type = obj.TYPE_MEM p.From.Reg = r0 p.To.Type = obj.TYPE_REG p.To.Reg = arm64.REGTMP p1 := gc.Prog(v.Op.Asm()) p1.From.Type = obj.TYPE_REG p1.From.Reg = r1 p1.To.Type = obj.TYPE_REG p1.To.Reg = arm64.REGTMP p2 := gc.Prog(arm64.ASTLXRB) p2.From.Type = obj.TYPE_REG p2.From.Reg = arm64.REGTMP p2.To.Type = obj.TYPE_MEM p2.To.Reg = r0 p2.RegTo2 = arm64.REGTMP p3 := gc.Prog(arm64.ACBNZ) p3.From.Type = obj.TYPE_REG p3.From.Reg = arm64.REGTMP p3.To.Type = obj.TYPE_BRANCH gc.Patch(p3, p) case ssa.OpARM64MOVBreg, ssa.OpARM64MOVBUreg, ssa.OpARM64MOVHreg, ssa.OpARM64MOVHUreg, ssa.OpARM64MOVWreg, ssa.OpARM64MOVWUreg: a := v.Args[0] for a.Op == ssa.OpCopy || a.Op == ssa.OpARM64MOVDreg { a = a.Args[0] } if a.Op == ssa.OpLoadReg { t := a.Type switch { case v.Op == ssa.OpARM64MOVBreg && t.Size() == 1 && t.IsSigned(), v.Op == ssa.OpARM64MOVBUreg && t.Size() == 1 && !t.IsSigned(), v.Op == ssa.OpARM64MOVHreg && t.Size() == 2 && t.IsSigned(), v.Op == ssa.OpARM64MOVHUreg && t.Size() == 2 && !t.IsSigned(), v.Op == ssa.OpARM64MOVWreg && t.Size() == 4 && t.IsSigned(), v.Op == ssa.OpARM64MOVWUreg && t.Size() == 4 && !t.IsSigned(): // arg is a proper-typed load, already zero/sign-extended, don't extend again if v.Reg() == v.Args[0].Reg() { return } p := gc.Prog(arm64.AMOVD) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() return default: } } fallthrough case ssa.OpARM64MVN, ssa.OpARM64NEG, ssa.OpARM64FNEGS, ssa.OpARM64FNEGD, ssa.OpARM64FSQRTD, ssa.OpARM64FCVTZSSW, ssa.OpARM64FCVTZSDW, ssa.OpARM64FCVTZUSW, ssa.OpARM64FCVTZUDW, ssa.OpARM64FCVTZSS, ssa.OpARM64FCVTZSD, ssa.OpARM64FCVTZUS, ssa.OpARM64FCVTZUD, ssa.OpARM64SCVTFWS, ssa.OpARM64SCVTFWD, ssa.OpARM64SCVTFS, ssa.OpARM64SCVTFD, ssa.OpARM64UCVTFWS, ssa.OpARM64UCVTFWD, ssa.OpARM64UCVTFS, ssa.OpARM64UCVTFD, ssa.OpARM64FCVTSD, ssa.OpARM64FCVTDS, ssa.OpARM64REV, ssa.OpARM64REVW, ssa.OpARM64REV16W, ssa.OpARM64RBIT, ssa.OpARM64RBITW, ssa.OpARM64CLZ, ssa.OpARM64CLZW: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpARM64CSELULT, ssa.OpARM64CSELULT0: r1 := int16(arm64.REGZERO) if v.Op == ssa.OpARM64CSELULT { r1 = v.Args[1].Reg() } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg p.From.Reg = arm64.COND_LO p.Reg = v.Args[0].Reg() p.From3 = &obj.Addr{Type: obj.TYPE_REG, Reg: r1} p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpARM64DUFFZERO: // runtime.duffzero expects start address - 8 in R16 p := gc.Prog(arm64.ASUB) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 p.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = arm64.REG_R16 p = gc.Prog(obj.ADUFFZERO) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) p.To.Offset = v.AuxInt case ssa.OpARM64LoweredZero: // MOVD.P ZR, 8(R16) // CMP Rarg1, R16 // BLE -2(PC) // arg1 is the address of the last element to zero p := gc.Prog(arm64.AMOVD) p.Scond = arm64.C_XPOST p.From.Type = obj.TYPE_REG p.From.Reg = arm64.REGZERO p.To.Type = obj.TYPE_MEM p.To.Reg = arm64.REG_R16 p.To.Offset = 8 p2 := gc.Prog(arm64.ACMP) p2.From.Type = obj.TYPE_REG p2.From.Reg = v.Args[1].Reg() p2.Reg = arm64.REG_R16 p3 := gc.Prog(arm64.ABLE) p3.To.Type = obj.TYPE_BRANCH gc.Patch(p3, p) case ssa.OpARM64DUFFCOPY: p := gc.Prog(obj.ADUFFCOPY) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) p.To.Offset = v.AuxInt case ssa.OpARM64LoweredMove: // MOVD.P 8(R16), Rtmp // MOVD.P Rtmp, 8(R17) // CMP Rarg2, R16 // BLE -3(PC) // arg2 is the address of the last element of src p := gc.Prog(arm64.AMOVD) p.Scond = arm64.C_XPOST p.From.Type = obj.TYPE_MEM p.From.Reg = arm64.REG_R16 p.From.Offset = 8 p.To.Type = obj.TYPE_REG p.To.Reg = arm64.REGTMP p2 := gc.Prog(arm64.AMOVD) p2.Scond = arm64.C_XPOST p2.From.Type = obj.TYPE_REG p2.From.Reg = arm64.REGTMP p2.To.Type = obj.TYPE_MEM p2.To.Reg = arm64.REG_R17 p2.To.Offset = 8 p3 := gc.Prog(arm64.ACMP) p3.From.Type = obj.TYPE_REG p3.From.Reg = v.Args[2].Reg() p3.Reg = arm64.REG_R16 p4 := gc.Prog(arm64.ABLE) p4.To.Type = obj.TYPE_BRANCH gc.Patch(p4, p) case ssa.OpARM64CALLstatic: if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym { // Deferred calls will appear to be returning to // the CALL deferreturn(SB) that we are about to emit. // However, the stack trace code will show the line // of the instruction byte before the return PC. // To avoid that being an unrelated instruction, // insert an actual hardware NOP that will have the right line number. // This is different from obj.ANOP, which is a virtual no-op // that doesn't make it into the instruction stream. ginsnop() } p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym)) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARM64CALLclosure: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Offset = 0 p.To.Reg = v.Args[0].Reg() if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARM64CALLdefer: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Deferproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARM64CALLgo: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Newproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARM64CALLinter: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Offset = 0 p.To.Reg = v.Args[0].Reg() if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARM64LoweredNilCheck: // Issue a load which will fault if arg is nil. p := gc.Prog(arm64.AMOVB) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = arm64.REGTMP if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers gc.Warnl(v.Line, "generated nil check") } case ssa.OpVarDef: gc.Gvardef(v.Aux.(*gc.Node)) case ssa.OpVarKill: gc.Gvarkill(v.Aux.(*gc.Node)) case ssa.OpVarLive: gc.Gvarlive(v.Aux.(*gc.Node)) case ssa.OpKeepAlive: gc.KeepAlive(v) case ssa.OpARM64Equal, ssa.OpARM64NotEqual, ssa.OpARM64LessThan, ssa.OpARM64LessEqual, ssa.OpARM64GreaterThan, ssa.OpARM64GreaterEqual, ssa.OpARM64LessThanU, ssa.OpARM64LessEqualU, ssa.OpARM64GreaterThanU, ssa.OpARM64GreaterEqualU: // generate boolean values using CSET p := gc.Prog(arm64.ACSET) p.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg p.From.Reg = condBits[v.Op] p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpSelect0, ssa.OpSelect1: // nothing to do case ssa.OpARM64LoweredGetClosurePtr: // Closure pointer is R26 (arm64.REGCTXT). gc.CheckLoweredGetClosurePtr(v) case ssa.OpARM64FlagEQ, ssa.OpARM64FlagLT_ULT, ssa.OpARM64FlagLT_UGT, ssa.OpARM64FlagGT_ULT, ssa.OpARM64FlagGT_UGT: v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) case ssa.OpARM64InvertFlags: v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) default: v.Fatalf("genValue not implemented: %s", v.LongString()) } }
func blockcopy(n, res *gc.Node, osrc, odst, w int64) { var dst gc.Node gc.Nodreg(&dst, gc.Types[gc.Tptr], x86.REG_DI) var src gc.Node gc.Nodreg(&src, gc.Types[gc.Tptr], x86.REG_SI) var tsrc gc.Node gc.Tempname(&tsrc, gc.Types[gc.Tptr]) var tdst gc.Node gc.Tempname(&tdst, gc.Types[gc.Tptr]) if !n.Addable { gc.Agen(n, &tsrc) } if !res.Addable { gc.Agen(res, &tdst) } if n.Addable { gc.Agen(n, &src) } else { gmove(&tsrc, &src) } if res.Op == gc.ONAME { gc.Gvardef(res) } if res.Addable { gc.Agen(res, &dst) } else { gmove(&tdst, &dst) } c := int32(w % 4) // bytes q := int32(w / 4) // doublewords // if we are copying forward on the stack and // the src and dst overlap, then reverse direction if osrc < odst && odst < osrc+w { // reverse direction gins(x86.ASTD, nil, nil) // set direction flag if c > 0 { gconreg(x86.AADDL, w-1, x86.REG_SI) gconreg(x86.AADDL, w-1, x86.REG_DI) gconreg(x86.AMOVL, int64(c), x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.AMOVSB, nil, nil) // MOVB *(SI)-,*(DI)- } if q > 0 { if c > 0 { gconreg(x86.AADDL, -3, x86.REG_SI) gconreg(x86.AADDL, -3, x86.REG_DI) } else { gconreg(x86.AADDL, w-4, x86.REG_SI) gconreg(x86.AADDL, w-4, x86.REG_DI) } gconreg(x86.AMOVL, int64(q), x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.AMOVSL, nil, nil) // MOVL *(SI)-,*(DI)- } // we leave with the flag clear gins(x86.ACLD, nil, nil) } else { gins(x86.ACLD, nil, nil) // paranoia. TODO(rsc): remove? // normal direction if q > 128 || (q >= 4 && gc.Nacl) { gconreg(x86.AMOVL, int64(q), x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.AMOVSL, nil, nil) // MOVL *(SI)+,*(DI)+ } else if q >= 4 { p := gins(obj.ADUFFCOPY, nil, nil) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) // 10 and 128 = magic constants: see ../../runtime/asm_386.s p.To.Offset = 10 * (128 - int64(q)) } else if !gc.Nacl && c == 0 { var cx gc.Node gc.Nodreg(&cx, gc.Types[gc.TINT32], x86.REG_CX) // We don't need the MOVSL side-effect of updating SI and DI, // and issuing a sequence of MOVLs directly is faster. src.Op = gc.OINDREG dst.Op = gc.OINDREG for q > 0 { gmove(&src, &cx) // MOVL x+(SI),CX gmove(&cx, &dst) // MOVL CX,x+(DI) src.Xoffset += 4 dst.Xoffset += 4 q-- } } else { for q > 0 { gins(x86.AMOVSL, nil, nil) // MOVL *(SI)+,*(DI)+ q-- } } for c > 0 { gins(x86.AMOVSB, nil, nil) // MOVB *(SI)+,*(DI)+ c-- } } }
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { s.SetLineno(v.Line) switch v.Op { case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL: r := gc.SSARegNum(v) r1 := gc.SSARegNum(v.Args[0]) r2 := gc.SSARegNum(v.Args[1]) switch { case r == r1: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.To.Type = obj.TYPE_REG p.To.Reg = r case r == r2: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r default: var asm obj.As if v.Op == ssa.OpAMD64ADDQ { asm = x86.ALEAQ } else { asm = x86.ALEAL } p := gc.Prog(asm) p.From.Type = obj.TYPE_MEM p.From.Reg = r1 p.From.Scale = 1 p.From.Index = r2 p.To.Type = obj.TYPE_REG p.To.Reg = r } // 2-address opcode arithmetic case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL, ssa.OpAMD64MULQ, ssa.OpAMD64MULL, ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL, ssa.OpAMD64ORQ, ssa.OpAMD64ORL, ssa.OpAMD64XORQ, ssa.OpAMD64XORL, ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB, ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB, ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD, ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD, ssa.OpAMD64PXOR: r := gc.SSARegNum(v) if r != gc.SSARegNum(v.Args[0]) { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } opregreg(v.Op.Asm(), r, gc.SSARegNum(v.Args[1])) case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU: // Arg[0] (the dividend) is in AX. // Arg[1] (the divisor) can be in any other register. // Result[0] (the quotient) is in AX. // Result[1] (the remainder) is in DX. r := gc.SSARegNum(v.Args[1]) // Zero extend dividend. c := gc.Prog(x86.AXORL) c.From.Type = obj.TYPE_REG c.From.Reg = x86.REG_DX c.To.Type = obj.TYPE_REG c.To.Reg = x86.REG_DX // Issue divide. p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW: // Arg[0] (the dividend) is in AX. // Arg[1] (the divisor) can be in any other register. // Result[0] (the quotient) is in AX. // Result[1] (the remainder) is in DX. r := gc.SSARegNum(v.Args[1]) // CPU faults upon signed overflow, which occurs when the most // negative int is divided by -1. Handle divide by -1 as a special case. var c *obj.Prog switch v.Op { case ssa.OpAMD64DIVQ: c = gc.Prog(x86.ACMPQ) case ssa.OpAMD64DIVL: c = gc.Prog(x86.ACMPL) case ssa.OpAMD64DIVW: c = gc.Prog(x86.ACMPW) } c.From.Type = obj.TYPE_REG c.From.Reg = r c.To.Type = obj.TYPE_CONST c.To.Offset = -1 j1 := gc.Prog(x86.AJEQ) j1.To.Type = obj.TYPE_BRANCH // Sign extend dividend. switch v.Op { case ssa.OpAMD64DIVQ: gc.Prog(x86.ACQO) case ssa.OpAMD64DIVL: gc.Prog(x86.ACDQ) case ssa.OpAMD64DIVW: gc.Prog(x86.ACWD) } // Issue divide. p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r // Skip over -1 fixup code. j2 := gc.Prog(obj.AJMP) j2.To.Type = obj.TYPE_BRANCH // Issue -1 fixup code. // n / -1 = -n n1 := gc.Prog(x86.ANEGQ) n1.To.Type = obj.TYPE_REG n1.To.Reg = x86.REG_AX // n % -1 == 0 n2 := gc.Prog(x86.AXORL) n2.From.Type = obj.TYPE_REG n2.From.Reg = x86.REG_DX n2.To.Type = obj.TYPE_REG n2.To.Reg = x86.REG_DX // TODO(khr): issue only the -1 fixup code we need. // For instance, if only the quotient is used, no point in zeroing the remainder. j1.To.Val = n1 j2.To.Val = s.Pc() case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULW, ssa.OpAMD64HMULB, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU, ssa.OpAMD64HMULWU, ssa.OpAMD64HMULBU: // the frontend rewrites constant division by 8/16/32 bit integers into // HMUL by a constant // SSA rewrites generate the 64 bit versions // Arg[0] is already in AX as it's the only register we allow // and DX is the only output we care about (the high bits) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) // IMULB puts the high portion in AH instead of DL, // so move it to DL for consistency if v.Type.Size() == 1 { m := gc.Prog(x86.AMOVB) m.From.Type = obj.TYPE_REG m.From.Reg = x86.REG_AH m.To.Type = obj.TYPE_REG m.To.Reg = x86.REG_DX } case ssa.OpAMD64AVGQU: // compute (x+y)/2 unsigned. // Do a 64-bit add, the overflow goes into the carry. // Shift right once and pull the carry back into the 63rd bit. r := gc.SSARegNum(v) if r != gc.SSARegNum(v.Args[0]) { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } p := gc.Prog(x86.AADDQ) p.From.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG p.To.Reg = r p.From.Reg = gc.SSARegNum(v.Args[1]) p = gc.Prog(x86.ARCRQ) p.From.Type = obj.TYPE_CONST p.From.Offset = 1 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst: r := gc.SSARegNum(v) a := gc.SSARegNum(v.Args[0]) if r == a { if v.AuxInt == 1 { var asm obj.As // Software optimization manual recommends add $1,reg. // But inc/dec is 1 byte smaller. ICC always uses inc // Clang/GCC choose depending on flags, but prefer add. // Experiments show that inc/dec is both a little faster // and make a binary a little smaller. if v.Op == ssa.OpAMD64ADDQconst { asm = x86.AINCQ } else { asm = x86.AINCL } p := gc.Prog(asm) p.To.Type = obj.TYPE_REG p.To.Reg = r return } if v.AuxInt == -1 { var asm obj.As if v.Op == ssa.OpAMD64ADDQconst { asm = x86.ADECQ } else { asm = x86.ADECL } p := gc.Prog(asm) p.To.Type = obj.TYPE_REG p.To.Reg = r return } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = r return } var asm obj.As if v.Op == ssa.OpAMD64ADDQconst { asm = x86.ALEAQ } else { asm = x86.ALEAL } p := gc.Prog(asm) p.From.Type = obj.TYPE_MEM p.From.Reg = a p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64CMOVQEQconst, ssa.OpAMD64CMOVLEQconst, ssa.OpAMD64CMOVWEQconst, ssa.OpAMD64CMOVQNEconst, ssa.OpAMD64CMOVLNEconst, ssa.OpAMD64CMOVWNEconst: r := gc.SSARegNum(v) if r != gc.SSARegNum(v.Args[0]) { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } // Constant into AX p := gc.Prog(moveByType(v.Type)) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = x86.REG_AX p = gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_AX p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst: r := gc.SSARegNum(v) if r != gc.SSARegNum(v.Args[0]) { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = r // TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2 // then we don't need to use resultInArg0 for these ops. //p.From3 = new(obj.Addr) //p.From3.Type = obj.TYPE_REG //p.From3.Reg = gc.SSARegNum(v.Args[0]) case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst, ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst, ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst, ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst, ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst, ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst, ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst, ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst: r := gc.SSARegNum(v) if r != gc.SSARegNum(v.Args[0]) { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask: r := gc.SSARegNum(v) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8: r := gc.SSARegNum(v.Args[0]) i := gc.SSARegNum(v.Args[1]) p := gc.Prog(x86.ALEAQ) switch v.Op { case ssa.OpAMD64LEAQ1: p.From.Scale = 1 if i == x86.REG_SP { r, i = i, r } case ssa.OpAMD64LEAQ2: p.From.Scale = 2 case ssa.OpAMD64LEAQ4: p.From.Scale = 4 case ssa.OpAMD64LEAQ8: p.From.Scale = 8 } p.From.Type = obj.TYPE_MEM p.From.Reg = r p.From.Index = i gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB, ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB: opregreg(v.Op.Asm(), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[0])) case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD: // Go assembler has swapped operands for UCOMISx relative to CMP, // must account for that right here. opregreg(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1])) case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_CONST p.To.Offset = v.AuxInt case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v.Args[0]) case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst: x := gc.SSARegNum(v) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = x // If flags are live at this instruction, suppress the // MOV $0,AX -> XOR AX,AX optimization. if v.Aux != nil { p.Mark |= x86.PRESERVEFLAGS } case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst: x := gc.SSARegNum(v) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_FCONST p.From.Val = math.Float64frombits(uint64(v.AuxInt)) p.To.Type = obj.TYPE_REG p.To.Reg = x case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.From.Scale = 8 p.From.Index = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.From.Scale = 4 p.From.Index = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVWloadidx2: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.From.Scale = 2 p.From.Index = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1: r := gc.SSARegNum(v.Args[0]) i := gc.SSARegNum(v.Args[1]) if i == x86.REG_SP { r, i = i, r } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = r p.From.Scale = 1 p.From.Index = i gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.To, v) case ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Scale = 8 p.To.Index = gc.SSARegNum(v.Args[1]) gc.AddAux(&p.To, v) case ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Scale = 4 p.To.Index = gc.SSARegNum(v.Args[1]) gc.AddAux(&p.To, v) case ssa.OpAMD64MOVWstoreidx2: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Scale = 2 p.To.Index = gc.SSARegNum(v.Args[1]) gc.AddAux(&p.To, v) case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1: r := gc.SSARegNum(v.Args[0]) i := gc.SSARegNum(v.Args[1]) if i == x86.REG_SP { r, i = i, r } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_MEM p.To.Reg = r p.To.Scale = 1 p.To.Index = i gc.AddAux(&p.To, v) case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST sc := v.AuxValAndOff() p.From.Offset = sc.Val() p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux2(&p.To, v, sc.Off()) case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST sc := v.AuxValAndOff() p.From.Offset = sc.Val() r := gc.SSARegNum(v.Args[0]) i := gc.SSARegNum(v.Args[1]) switch v.Op { case ssa.OpAMD64MOVBstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx1: p.To.Scale = 1 if i == x86.REG_SP { r, i = i, r } case ssa.OpAMD64MOVWstoreconstidx2: p.To.Scale = 2 case ssa.OpAMD64MOVLstoreconstidx4: p.To.Scale = 4 case ssa.OpAMD64MOVQstoreconstidx8: p.To.Scale = 8 } p.To.Type = obj.TYPE_MEM p.To.Reg = r p.To.Index = i gc.AddAux2(&p.To, v, sc.Off()) case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX, ssa.OpAMD64CVTSL2SS, ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ, ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS: opregreg(v.Op.Asm(), gc.SSARegNum(v), gc.SSARegNum(v.Args[0])) case ssa.OpAMD64DUFFZERO: off := duffStart(v.AuxInt) adj := duffAdj(v.AuxInt) var p *obj.Prog if adj != 0 { p = gc.Prog(x86.AADDQ) p.From.Type = obj.TYPE_CONST p.From.Offset = adj p.To.Type = obj.TYPE_REG p.To.Reg = x86.REG_DI } p = gc.Prog(obj.ADUFFZERO) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) p.To.Offset = off case ssa.OpAMD64MOVOconst: if v.AuxInt != 0 { v.Unimplementedf("MOVOconst can only do constant=0") } r := gc.SSARegNum(v) opregreg(x86.AXORPS, r, r) case ssa.OpAMD64DUFFCOPY: p := gc.Prog(obj.ADUFFCOPY) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) p.To.Offset = v.AuxInt case ssa.OpCopy, ssa.OpAMD64MOVQconvert, ssa.OpAMD64MOVLconvert: // TODO: use MOVQreg for reg->reg copies instead of OpCopy? if v.Type.IsMemory() { return } x := gc.SSARegNum(v.Args[0]) y := gc.SSARegNum(v) if x != y { opregreg(moveByType(v.Type), y, x) } case ssa.OpLoadReg: if v.Type.IsFlags() { v.Unimplementedf("load flags not implemented: %v", v.LongString()) return } p := gc.Prog(loadByType(v.Type)) n, off := gc.AutoVar(v.Args[0]) p.From.Type = obj.TYPE_MEM p.From.Node = n p.From.Sym = gc.Linksym(n.Sym) p.From.Offset = off if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { p.From.Name = obj.NAME_PARAM p.From.Offset += n.Xoffset } else { p.From.Name = obj.NAME_AUTO } p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpStoreReg: if v.Type.IsFlags() { v.Unimplementedf("store flags not implemented: %v", v.LongString()) return } p := gc.Prog(storeByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) n, off := gc.AutoVar(v) p.To.Type = obj.TYPE_MEM p.To.Node = n p.To.Sym = gc.Linksym(n.Sym) p.To.Offset = off if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { p.To.Name = obj.NAME_PARAM p.To.Offset += n.Xoffset } else { p.To.Name = obj.NAME_AUTO } case ssa.OpPhi: gc.CheckLoweredPhi(v) case ssa.OpInitMem: // memory arg needs no code case ssa.OpArg: // input args need no code case ssa.OpAMD64LoweredGetClosurePtr: // Closure pointer is DX. gc.CheckLoweredGetClosurePtr(v) case ssa.OpAMD64LoweredGetG: r := gc.SSARegNum(v) // See the comments in cmd/internal/obj/x86/obj6.go // near CanUse1InsnTLS for a detailed explanation of these instructions. if x86.CanUse1InsnTLS(gc.Ctxt) { // MOVQ (TLS), r p := gc.Prog(x86.AMOVQ) p.From.Type = obj.TYPE_MEM p.From.Reg = x86.REG_TLS p.To.Type = obj.TYPE_REG p.To.Reg = r } else { // MOVQ TLS, r // MOVQ (r)(TLS*1), r p := gc.Prog(x86.AMOVQ) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_TLS p.To.Type = obj.TYPE_REG p.To.Reg = r q := gc.Prog(x86.AMOVQ) q.From.Type = obj.TYPE_MEM q.From.Reg = r q.From.Index = x86.REG_TLS q.From.Scale = 1 q.To.Type = obj.TYPE_REG q.To.Reg = r } case ssa.OpAMD64CALLstatic: if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym { // Deferred calls will appear to be returning to // the CALL deferreturn(SB) that we are about to emit. // However, the stack trace code will show the line // of the instruction byte before the return PC. // To avoid that being an unrelated instruction, // insert an actual hardware NOP that will have the right line number. // This is different from obj.ANOP, which is a virtual no-op // that doesn't make it into the instruction stream. ginsnop() } p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym)) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpAMD64CALLclosure: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v.Args[0]) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpAMD64CALLdefer: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Deferproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpAMD64CALLgo: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Newproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpAMD64CALLinter: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v.Args[0]) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL, ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL: r := gc.SSARegNum(v) if r != gc.SSARegNum(v.Args[0]) { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } p := gc.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSFW, ssa.OpAMD64BSRQ, ssa.OpAMD64BSRL, ssa.OpAMD64BSRW, ssa.OpAMD64SQRTSD: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpSP, ssa.OpSB: // nothing to do case ssa.OpSelect0, ssa.OpSelect1: // nothing to do case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE, ssa.OpAMD64SETL, ssa.OpAMD64SETLE, ssa.OpAMD64SETG, ssa.OpAMD64SETGE, ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF, ssa.OpAMD64SETB, ssa.OpAMD64SETBE, ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN, ssa.OpAMD64SETA, ssa.OpAMD64SETAE: p := gc.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64SETNEF: p := gc.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) q := gc.Prog(x86.ASETPS) q.To.Type = obj.TYPE_REG q.To.Reg = x86.REG_AX // ORL avoids partial register write and is smaller than ORQ, used by old compiler opregreg(x86.AORL, gc.SSARegNum(v), x86.REG_AX) case ssa.OpAMD64SETEQF: p := gc.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) q := gc.Prog(x86.ASETPC) q.To.Type = obj.TYPE_REG q.To.Reg = x86.REG_AX // ANDL avoids partial register write and is smaller than ANDQ, used by old compiler opregreg(x86.AANDL, gc.SSARegNum(v), x86.REG_AX) case ssa.OpAMD64InvertFlags: v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT: v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) case ssa.OpAMD64REPSTOSQ: gc.Prog(x86.AREP) gc.Prog(x86.ASTOSQ) case ssa.OpAMD64REPMOVSQ: gc.Prog(x86.AREP) gc.Prog(x86.AMOVSQ) case ssa.OpVarDef: gc.Gvardef(v.Aux.(*gc.Node)) case ssa.OpVarKill: gc.Gvarkill(v.Aux.(*gc.Node)) case ssa.OpVarLive: gc.Gvarlive(v.Aux.(*gc.Node)) case ssa.OpKeepAlive: if !v.Args[0].Type.IsPtrShaped() { v.Fatalf("keeping non-pointer alive %v", v.Args[0]) } n, off := gc.AutoVar(v.Args[0]) if n == nil { v.Fatalf("KeepLive with non-spilled value %s %s", v, v.Args[0]) } if off != 0 { v.Fatalf("KeepLive with non-zero offset spill location %s:%d", n, off) } gc.Gvarlive(n) case ssa.OpAMD64LoweredNilCheck: // Optimization - if the subsequent block has a load or store // at the same address, we don't need to issue this instruction. mem := v.Args[1] for _, w := range v.Block.Succs[0].Block().Values { if w.Op == ssa.OpPhi { if w.Type.IsMemory() { mem = w } continue } if len(w.Args) == 0 || !w.Args[len(w.Args)-1].Type.IsMemory() { // w doesn't use a store - can't be a memory op. continue } if w.Args[len(w.Args)-1] != mem { v.Fatalf("wrong store after nilcheck v=%s w=%s", v, w) } switch w.Op { case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVOload, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVOstore: if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage { if gc.Debug_checknil != 0 && int(v.Line) > 1 { gc.Warnl(v.Line, "removed nil check") } return } case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: off := ssa.ValAndOff(v.AuxInt).Off() if w.Args[0] == v.Args[0] && w.Aux == nil && off >= 0 && off < minZeroPage { if gc.Debug_checknil != 0 && int(v.Line) > 1 { gc.Warnl(v.Line, "removed nil check") } return } } if w.Type.IsMemory() { if w.Op == ssa.OpVarDef || w.Op == ssa.OpVarKill || w.Op == ssa.OpVarLive { // these ops are OK mem = w continue } // We can't delay the nil check past the next store. break } } // Issue a load which will fault if the input is nil. // TODO: We currently use the 2-byte instruction TESTB AX, (reg). // Should we use the 3-byte TESTB $0, (reg) instead? It is larger // but it doesn't have false dependency on AX. // Or maybe allocate an output register and use MOVL (reg),reg2 ? // That trades clobbering flags for clobbering a register. p := gc.Prog(x86.ATESTB) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_AX p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.To, v) if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers gc.Warnl(v.Line, "generated nil check") } default: v.Unimplementedf("genValue not implemented: %s", v.LongString()) } }
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { s.SetLineno(v.Line) switch v.Op { case ssa.OpInitMem: // memory arg needs no code case ssa.OpArg: // input args need no code case ssa.OpSP, ssa.OpSB, ssa.OpGetG: // nothing to do case ssa.OpSelect0, ssa.OpSelect1: // nothing to do case ssa.OpCopy, ssa.OpMIPSMOVWconvert, ssa.OpMIPSMOVWreg: t := v.Type if t.IsMemory() { return } x := v.Args[0].Reg() y := v.Reg() if x == y { return } as := mips.AMOVW if isFPreg(x) && isFPreg(y) { as = mips.AMOVF if t.Size() == 8 { as = mips.AMOVD } } p := gc.Prog(as) p.From.Type = obj.TYPE_REG p.From.Reg = x p.To.Type = obj.TYPE_REG p.To.Reg = y if isHILO(x) && isHILO(y) || isHILO(x) && isFPreg(y) || isFPreg(x) && isHILO(y) { // cannot move between special registers, use TMP as intermediate p.To.Reg = mips.REGTMP p = gc.Prog(mips.AMOVW) p.From.Type = obj.TYPE_REG p.From.Reg = mips.REGTMP p.To.Type = obj.TYPE_REG p.To.Reg = y } case ssa.OpMIPSMOVWnop: if v.Reg() != v.Args[0].Reg() { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } // nothing to do case ssa.OpLoadReg: if v.Type.IsFlags() { v.Fatalf("load flags not implemented: %v", v.LongString()) return } r := v.Reg() p := gc.Prog(loadByType(v.Type, r)) gc.AddrAuto(&p.From, v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = r if isHILO(r) { // cannot directly load, load to TMP and move p.To.Reg = mips.REGTMP p = gc.Prog(mips.AMOVW) p.From.Type = obj.TYPE_REG p.From.Reg = mips.REGTMP p.To.Type = obj.TYPE_REG p.To.Reg = r } case ssa.OpStoreReg: if v.Type.IsFlags() { v.Fatalf("store flags not implemented: %v", v.LongString()) return } r := v.Args[0].Reg() if isHILO(r) { // cannot directly store, move to TMP and store p := gc.Prog(mips.AMOVW) p.From.Type = obj.TYPE_REG p.From.Reg = r p.To.Type = obj.TYPE_REG p.To.Reg = mips.REGTMP r = mips.REGTMP } p := gc.Prog(storeByType(v.Type, r)) p.From.Type = obj.TYPE_REG p.From.Reg = r gc.AddrAuto(&p.To, v) case ssa.OpMIPSADD, ssa.OpMIPSSUB, ssa.OpMIPSAND, ssa.OpMIPSOR, ssa.OpMIPSXOR, ssa.OpMIPSNOR, ssa.OpMIPSSLL, ssa.OpMIPSSRL, ssa.OpMIPSSRA, ssa.OpMIPSADDF, ssa.OpMIPSADDD, ssa.OpMIPSSUBF, ssa.OpMIPSSUBD, ssa.OpMIPSMULF, ssa.OpMIPSMULD, ssa.OpMIPSDIVF, ssa.OpMIPSDIVD, ssa.OpMIPSMUL: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpMIPSSGT, ssa.OpMIPSSGTU: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpMIPSSGTzero, ssa.OpMIPSSGTUzero: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.Reg = mips.REGZERO p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpMIPSADDconst, ssa.OpMIPSSUBconst, ssa.OpMIPSANDconst, ssa.OpMIPSORconst, ssa.OpMIPSXORconst, ssa.OpMIPSNORconst, ssa.OpMIPSSLLconst, ssa.OpMIPSSRLconst, ssa.OpMIPSSRAconst, ssa.OpMIPSSGTconst, ssa.OpMIPSSGTUconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpMIPSMULT, ssa.OpMIPSMULTU, ssa.OpMIPSDIV, ssa.OpMIPSDIVU: // result in hi,lo p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.Reg = v.Args[0].Reg() case ssa.OpMIPSMOVWconst: r := v.Reg() p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = r if isFPreg(r) || isHILO(r) { // cannot move into FP or special registers, use TMP as intermediate p.To.Reg = mips.REGTMP p = gc.Prog(mips.AMOVW) p.From.Type = obj.TYPE_REG p.From.Reg = mips.REGTMP p.To.Type = obj.TYPE_REG p.To.Reg = r } case ssa.OpMIPSMOVFconst, ssa.OpMIPSMOVDconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_FCONST p.From.Val = math.Float64frombits(uint64(v.AuxInt)) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpMIPSCMOVZ: if v.Reg() != v.Args[0].Reg() { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[2].Reg() p.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpMIPSCMOVZzero: if v.Reg() != v.Args[0].Reg() { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.Reg = mips.REGZERO p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpMIPSCMPEQF, ssa.OpMIPSCMPEQD, ssa.OpMIPSCMPGEF, ssa.OpMIPSCMPGED, ssa.OpMIPSCMPGTF, ssa.OpMIPSCMPGTD: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.Reg = v.Args[1].Reg() case ssa.OpMIPSMOVWaddr: p := gc.Prog(mips.AMOVW) p.From.Type = obj.TYPE_ADDR var wantreg string // MOVW $sym+off(base), R // the assembler expands it as the following: // - base is SP: add constant offset to SP (R29) // when constant is large, tmp register (R23) may be used // - base is SB: load external address with relocation switch v.Aux.(type) { default: v.Fatalf("aux is of unknown type %T", v.Aux) case *ssa.ExternSymbol: wantreg = "SB" gc.AddAux(&p.From, v) case *ssa.ArgSymbol, *ssa.AutoSymbol: wantreg = "SP" gc.AddAux(&p.From, v) case nil: // No sym, just MOVW $off(SP), R wantreg = "SP" p.From.Reg = mips.REGSP p.From.Offset = v.AuxInt } if reg := v.Args[0].RegName(); reg != wantreg { v.Fatalf("bad reg %s for symbol type %T, want %s", reg, v.Aux, wantreg) } p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpMIPSMOVBload, ssa.OpMIPSMOVBUload, ssa.OpMIPSMOVHload, ssa.OpMIPSMOVHUload, ssa.OpMIPSMOVWload, ssa.OpMIPSMOVFload, ssa.OpMIPSMOVDload: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpMIPSMOVBstore, ssa.OpMIPSMOVHstore, ssa.OpMIPSMOVWstore, ssa.OpMIPSMOVFstore, ssa.OpMIPSMOVDstore: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() gc.AddAux(&p.To, v) case ssa.OpMIPSMOVBstorezero, ssa.OpMIPSMOVHstorezero, ssa.OpMIPSMOVWstorezero: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = mips.REGZERO p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() gc.AddAux(&p.To, v) case ssa.OpMIPSMOVBreg, ssa.OpMIPSMOVBUreg, ssa.OpMIPSMOVHreg, ssa.OpMIPSMOVHUreg: a := v.Args[0] for a.Op == ssa.OpCopy || a.Op == ssa.OpMIPSMOVWreg || a.Op == ssa.OpMIPSMOVWnop { a = a.Args[0] } if a.Op == ssa.OpLoadReg { t := a.Type switch { case v.Op == ssa.OpMIPSMOVBreg && t.Size() == 1 && t.IsSigned(), v.Op == ssa.OpMIPSMOVBUreg && t.Size() == 1 && !t.IsSigned(), v.Op == ssa.OpMIPSMOVHreg && t.Size() == 2 && t.IsSigned(), v.Op == ssa.OpMIPSMOVHUreg && t.Size() == 2 && !t.IsSigned(): // arg is a proper-typed load, already zero/sign-extended, don't extend again if v.Reg() == v.Args[0].Reg() { return } p := gc.Prog(mips.AMOVW) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() return default: } } fallthrough case ssa.OpMIPSMOVWF, ssa.OpMIPSMOVWD, ssa.OpMIPSTRUNCFW, ssa.OpMIPSTRUNCDW, ssa.OpMIPSMOVFD, ssa.OpMIPSMOVDF, ssa.OpMIPSNEGF, ssa.OpMIPSNEGD, ssa.OpMIPSSQRTD, ssa.OpMIPSCLZ: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpMIPSNEG: // SUB from REGZERO p := gc.Prog(mips.ASUBU) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.Reg = mips.REGZERO p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpMIPSLoweredZero: // SUBU $4, R1 // MOVW R0, 4(R1) // ADDU $4, R1 // BNE Rarg1, R1, -2(PC) // arg1 is the address of the last element to zero var sz int64 var mov obj.As switch { case v.AuxInt%4 == 0: sz = 4 mov = mips.AMOVW case v.AuxInt%2 == 0: sz = 2 mov = mips.AMOVH default: sz = 1 mov = mips.AMOVB } p := gc.Prog(mips.ASUBU) p.From.Type = obj.TYPE_CONST p.From.Offset = sz p.To.Type = obj.TYPE_REG p.To.Reg = mips.REG_R1 p2 := gc.Prog(mov) p2.From.Type = obj.TYPE_REG p2.From.Reg = mips.REGZERO p2.To.Type = obj.TYPE_MEM p2.To.Reg = mips.REG_R1 p2.To.Offset = sz p3 := gc.Prog(mips.AADDU) p3.From.Type = obj.TYPE_CONST p3.From.Offset = sz p3.To.Type = obj.TYPE_REG p3.To.Reg = mips.REG_R1 p4 := gc.Prog(mips.ABNE) p4.From.Type = obj.TYPE_REG p4.From.Reg = v.Args[1].Reg() p4.Reg = mips.REG_R1 p4.To.Type = obj.TYPE_BRANCH gc.Patch(p4, p2) case ssa.OpMIPSLoweredMove: // SUBU $4, R1 // MOVW 4(R1), Rtmp // MOVW Rtmp, (R2) // ADDU $4, R1 // ADDU $4, R2 // BNE Rarg2, R1, -4(PC) // arg2 is the address of the last element of src var sz int64 var mov obj.As switch { case v.AuxInt%4 == 0: sz = 4 mov = mips.AMOVW case v.AuxInt%2 == 0: sz = 2 mov = mips.AMOVH default: sz = 1 mov = mips.AMOVB } p := gc.Prog(mips.ASUBU) p.From.Type = obj.TYPE_CONST p.From.Offset = sz p.To.Type = obj.TYPE_REG p.To.Reg = mips.REG_R1 p2 := gc.Prog(mov) p2.From.Type = obj.TYPE_MEM p2.From.Reg = mips.REG_R1 p2.From.Offset = sz p2.To.Type = obj.TYPE_REG p2.To.Reg = mips.REGTMP p3 := gc.Prog(mov) p3.From.Type = obj.TYPE_REG p3.From.Reg = mips.REGTMP p3.To.Type = obj.TYPE_MEM p3.To.Reg = mips.REG_R2 p4 := gc.Prog(mips.AADDU) p4.From.Type = obj.TYPE_CONST p4.From.Offset = sz p4.To.Type = obj.TYPE_REG p4.To.Reg = mips.REG_R1 p5 := gc.Prog(mips.AADDU) p5.From.Type = obj.TYPE_CONST p5.From.Offset = sz p5.To.Type = obj.TYPE_REG p5.To.Reg = mips.REG_R2 p6 := gc.Prog(mips.ABNE) p6.From.Type = obj.TYPE_REG p6.From.Reg = v.Args[2].Reg() p6.Reg = mips.REG_R1 p6.To.Type = obj.TYPE_BRANCH gc.Patch(p6, p2) case ssa.OpMIPSCALLstatic: if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym { // Deferred calls will appear to be returning to // the CALL deferreturn(SB) that we are about to emit. // However, the stack trace code will show the line // of the instruction byte before the return PC. // To avoid that being an unrelated instruction, // insert an actual hardware NOP that will have the right line number. // This is different from obj.ANOP, which is a virtual no-op // that doesn't make it into the instruction stream. ginsnop() } p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym)) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpMIPSCALLclosure: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Offset = 0 p.To.Reg = v.Args[0].Reg() if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpMIPSCALLdefer: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Deferproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpMIPSCALLgo: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Newproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpMIPSCALLinter: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Offset = 0 p.To.Reg = v.Args[0].Reg() if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpMIPSLoweredAtomicLoad: gc.Prog(mips.ASYNC) p := gc.Prog(mips.AMOVW) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg0() gc.Prog(mips.ASYNC) case ssa.OpMIPSLoweredAtomicStore: gc.Prog(mips.ASYNC) p := gc.Prog(mips.AMOVW) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() gc.Prog(mips.ASYNC) case ssa.OpMIPSLoweredAtomicStorezero: gc.Prog(mips.ASYNC) p := gc.Prog(mips.AMOVW) p.From.Type = obj.TYPE_REG p.From.Reg = mips.REGZERO p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() gc.Prog(mips.ASYNC) case ssa.OpMIPSLoweredAtomicExchange: // SYNC // MOVW Rarg1, Rtmp // LL (Rarg0), Rout // SC Rtmp, (Rarg0) // BEQ Rtmp, -3(PC) // SYNC gc.Prog(mips.ASYNC) p := gc.Prog(mips.AMOVW) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = mips.REGTMP p1 := gc.Prog(mips.ALL) p1.From.Type = obj.TYPE_MEM p1.From.Reg = v.Args[0].Reg() p1.To.Type = obj.TYPE_REG p1.To.Reg = v.Reg0() p2 := gc.Prog(mips.ASC) p2.From.Type = obj.TYPE_REG p2.From.Reg = mips.REGTMP p2.To.Type = obj.TYPE_MEM p2.To.Reg = v.Args[0].Reg() p3 := gc.Prog(mips.ABEQ) p3.From.Type = obj.TYPE_REG p3.From.Reg = mips.REGTMP p3.To.Type = obj.TYPE_BRANCH gc.Patch(p3, p) gc.Prog(mips.ASYNC) case ssa.OpMIPSLoweredAtomicAdd: // SYNC // LL (Rarg0), Rout // ADDU Rarg1, Rout, Rtmp // SC Rtmp, (Rarg0) // BEQ Rtmp, -3(PC) // SYNC // ADDU Rarg1, Rout gc.Prog(mips.ASYNC) p := gc.Prog(mips.ALL) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg0() p1 := gc.Prog(mips.AADDU) p1.From.Type = obj.TYPE_REG p1.From.Reg = v.Args[1].Reg() p1.Reg = v.Reg0() p1.To.Type = obj.TYPE_REG p1.To.Reg = mips.REGTMP p2 := gc.Prog(mips.ASC) p2.From.Type = obj.TYPE_REG p2.From.Reg = mips.REGTMP p2.To.Type = obj.TYPE_MEM p2.To.Reg = v.Args[0].Reg() p3 := gc.Prog(mips.ABEQ) p3.From.Type = obj.TYPE_REG p3.From.Reg = mips.REGTMP p3.To.Type = obj.TYPE_BRANCH gc.Patch(p3, p) gc.Prog(mips.ASYNC) p4 := gc.Prog(mips.AADDU) p4.From.Type = obj.TYPE_REG p4.From.Reg = v.Args[1].Reg() p4.Reg = v.Reg0() p4.To.Type = obj.TYPE_REG p4.To.Reg = v.Reg0() case ssa.OpMIPSLoweredAtomicAddconst: // SYNC // LL (Rarg0), Rout // ADDU $auxInt, Rout, Rtmp // SC Rtmp, (Rarg0) // BEQ Rtmp, -3(PC) // SYNC // ADDU $auxInt, Rout gc.Prog(mips.ASYNC) p := gc.Prog(mips.ALL) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg0() p1 := gc.Prog(mips.AADDU) p1.From.Type = obj.TYPE_CONST p1.From.Offset = v.AuxInt p1.Reg = v.Reg0() p1.To.Type = obj.TYPE_REG p1.To.Reg = mips.REGTMP p2 := gc.Prog(mips.ASC) p2.From.Type = obj.TYPE_REG p2.From.Reg = mips.REGTMP p2.To.Type = obj.TYPE_MEM p2.To.Reg = v.Args[0].Reg() p3 := gc.Prog(mips.ABEQ) p3.From.Type = obj.TYPE_REG p3.From.Reg = mips.REGTMP p3.To.Type = obj.TYPE_BRANCH gc.Patch(p3, p) gc.Prog(mips.ASYNC) p4 := gc.Prog(mips.AADDU) p4.From.Type = obj.TYPE_CONST p4.From.Offset = v.AuxInt p4.Reg = v.Reg0() p4.To.Type = obj.TYPE_REG p4.To.Reg = v.Reg0() case ssa.OpMIPSLoweredAtomicAnd, ssa.OpMIPSLoweredAtomicOr: // SYNC // LL (Rarg0), Rtmp // AND/OR Rarg1, Rtmp // SC Rtmp, (Rarg0) // BEQ Rtmp, -3(PC) // SYNC gc.Prog(mips.ASYNC) p := gc.Prog(mips.ALL) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = mips.REGTMP p1 := gc.Prog(v.Op.Asm()) p1.From.Type = obj.TYPE_REG p1.From.Reg = v.Args[1].Reg() p1.Reg = mips.REGTMP p1.To.Type = obj.TYPE_REG p1.To.Reg = mips.REGTMP p2 := gc.Prog(mips.ASC) p2.From.Type = obj.TYPE_REG p2.From.Reg = mips.REGTMP p2.To.Type = obj.TYPE_MEM p2.To.Reg = v.Args[0].Reg() p3 := gc.Prog(mips.ABEQ) p3.From.Type = obj.TYPE_REG p3.From.Reg = mips.REGTMP p3.To.Type = obj.TYPE_BRANCH gc.Patch(p3, p) gc.Prog(mips.ASYNC) case ssa.OpMIPSLoweredAtomicCas: // MOVW $0, Rout // SYNC // LL (Rarg0), Rtmp // BNE Rtmp, Rarg1, 4(PC) // MOVW Rarg2, Rout // SC Rout, (Rarg0) // BEQ Rout, -4(PC) // SYNC p := gc.Prog(mips.AMOVW) p.From.Type = obj.TYPE_REG p.From.Reg = mips.REGZERO p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg0() gc.Prog(mips.ASYNC) p1 := gc.Prog(mips.ALL) p1.From.Type = obj.TYPE_MEM p1.From.Reg = v.Args[0].Reg() p1.To.Type = obj.TYPE_REG p1.To.Reg = mips.REGTMP p2 := gc.Prog(mips.ABNE) p2.From.Type = obj.TYPE_REG p2.From.Reg = v.Args[1].Reg() p2.Reg = mips.REGTMP p2.To.Type = obj.TYPE_BRANCH p3 := gc.Prog(mips.AMOVW) p3.From.Type = obj.TYPE_REG p3.From.Reg = v.Args[2].Reg() p3.To.Type = obj.TYPE_REG p3.To.Reg = v.Reg0() p4 := gc.Prog(mips.ASC) p4.From.Type = obj.TYPE_REG p4.From.Reg = v.Reg0() p4.To.Type = obj.TYPE_MEM p4.To.Reg = v.Args[0].Reg() p5 := gc.Prog(mips.ABEQ) p5.From.Type = obj.TYPE_REG p5.From.Reg = v.Reg0() p5.To.Type = obj.TYPE_BRANCH gc.Patch(p5, p1) gc.Prog(mips.ASYNC) p6 := gc.Prog(obj.ANOP) gc.Patch(p2, p6) case ssa.OpVarDef: gc.Gvardef(v.Aux.(*gc.Node)) case ssa.OpVarKill: gc.Gvarkill(v.Aux.(*gc.Node)) case ssa.OpVarLive: gc.Gvarlive(v.Aux.(*gc.Node)) case ssa.OpKeepAlive: gc.KeepAlive(v) case ssa.OpPhi: gc.CheckLoweredPhi(v) case ssa.OpMIPSLoweredNilCheck: // Issue a load which will fault if arg is nil. p := gc.Prog(mips.AMOVB) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = mips.REGTMP if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers gc.Warnl(v.Line, "generated nil check") } case ssa.OpMIPSFPFlagTrue, ssa.OpMIPSFPFlagFalse: // MOVW $1, r // CMOVF R0, r cmov := mips.ACMOVF if v.Op == ssa.OpMIPSFPFlagFalse { cmov = mips.ACMOVT } p := gc.Prog(mips.AMOVW) p.From.Type = obj.TYPE_CONST p.From.Offset = 1 p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() p1 := gc.Prog(cmov) p1.From.Type = obj.TYPE_REG p1.From.Reg = mips.REGZERO p1.To.Type = obj.TYPE_REG p1.To.Reg = v.Reg() case ssa.OpMIPSLoweredGetClosurePtr: // Closure pointer is R22 (mips.REGCTXT). gc.CheckLoweredGetClosurePtr(v) default: v.Fatalf("genValue not implemented: %s", v.LongString()) } }
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { s.SetLineno(v.Line) switch v.Op { case ssa.OpInitMem: // memory arg needs no code case ssa.OpArg: // input args need no code case ssa.OpSP, ssa.OpSB, ssa.OpGetG: // nothing to do case ssa.OpCopy, ssa.OpARMMOVWconvert, ssa.OpARMMOVWreg: if v.Type.IsMemory() { return } x := gc.SSARegNum(v.Args[0]) y := gc.SSARegNum(v) if x == y { return } as := arm.AMOVW if v.Type.IsFloat() { switch v.Type.Size() { case 4: as = arm.AMOVF case 8: as = arm.AMOVD default: panic("bad float size") } } p := gc.Prog(as) p.From.Type = obj.TYPE_REG p.From.Reg = x p.To.Type = obj.TYPE_REG p.To.Reg = y case ssa.OpARMMOVWnop: if gc.SSARegNum(v) != gc.SSARegNum(v.Args[0]) { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } // nothing to do case ssa.OpLoadReg: if v.Type.IsFlags() { v.Unimplementedf("load flags not implemented: %v", v.LongString()) return } p := gc.Prog(loadByType(v.Type)) n, off := gc.AutoVar(v.Args[0]) p.From.Type = obj.TYPE_MEM p.From.Node = n p.From.Sym = gc.Linksym(n.Sym) p.From.Offset = off if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { p.From.Name = obj.NAME_PARAM p.From.Offset += n.Xoffset } else { p.From.Name = obj.NAME_AUTO } p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpPhi: gc.CheckLoweredPhi(v) case ssa.OpStoreReg: if v.Type.IsFlags() { v.Unimplementedf("store flags not implemented: %v", v.LongString()) return } p := gc.Prog(storeByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) n, off := gc.AutoVar(v) p.To.Type = obj.TYPE_MEM p.To.Node = n p.To.Sym = gc.Linksym(n.Sym) p.To.Offset = off if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { p.To.Name = obj.NAME_PARAM p.To.Offset += n.Xoffset } else { p.To.Name = obj.NAME_AUTO } case ssa.OpARMDIV, ssa.OpARMDIVU, ssa.OpARMMOD, ssa.OpARMMODU: // Note: for software division the assembler rewrite these // instructions to sequence of instructions: // - it puts numerator in R11 and denominator in g.m.divmod // and call (say) _udiv // - _udiv saves R0-R3 on stack and call udiv, restores R0-R3 // before return // - udiv does the actual work //TODO: set approperiate regmasks and call udiv directly? // need to be careful for negative case // Or, as soft div is already expensive, we don't care? fallthrough case ssa.OpARMADD, ssa.OpARMADC, ssa.OpARMSUB, ssa.OpARMSBC, ssa.OpARMRSB, ssa.OpARMAND, ssa.OpARMOR, ssa.OpARMXOR, ssa.OpARMBIC, ssa.OpARMMUL, ssa.OpARMADDF, ssa.OpARMADDD, ssa.OpARMSUBF, ssa.OpARMSUBD, ssa.OpARMMULF, ssa.OpARMMULD, ssa.OpARMDIVF, ssa.OpARMDIVD: r := gc.SSARegNum(v) r1 := gc.SSARegNum(v.Args[0]) r2 := gc.SSARegNum(v.Args[1]) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpARMADDS, ssa.OpARMSUBS: r := gc.SSARegNum1(v) r1 := gc.SSARegNum(v.Args[0]) r2 := gc.SSARegNum(v.Args[1]) p := gc.Prog(v.Op.Asm()) p.Scond = arm.C_SBIT p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpARMSLL, ssa.OpARMSRL, ssa.OpARMSRA: r := gc.SSARegNum(v) r1 := gc.SSARegNum(v.Args[0]) r2 := gc.SSARegNum(v.Args[1]) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpARMSRAcond: // ARM shift instructions uses only the low-order byte of the shift amount // generate conditional instructions to deal with large shifts // flag is already set // SRA.HS $31, Rarg0, Rdst // shift 31 bits to get the sign bit // SRA.LO Rarg1, Rarg0, Rdst r := gc.SSARegNum(v) r1 := gc.SSARegNum(v.Args[0]) r2 := gc.SSARegNum(v.Args[1]) p := gc.Prog(arm.ASRA) p.Scond = arm.C_SCOND_HS p.From.Type = obj.TYPE_CONST p.From.Offset = 31 p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r p = gc.Prog(arm.ASRA) p.Scond = arm.C_SCOND_LO p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpARMADDconst, ssa.OpARMADCconst, ssa.OpARMSUBconst, ssa.OpARMSBCconst, ssa.OpARMRSBconst, ssa.OpARMRSCconst, ssa.OpARMANDconst, ssa.OpARMORconst, ssa.OpARMXORconst, ssa.OpARMBICconst, ssa.OpARMSLLconst, ssa.OpARMSRLconst, ssa.OpARMSRAconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARMADDSconst, ssa.OpARMSUBSconst, ssa.OpARMRSBSconst: p := gc.Prog(v.Op.Asm()) p.Scond = arm.C_SBIT p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum1(v) case ssa.OpARMSRRconst: genshift(arm.AMOVW, 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v), arm.SHIFT_RR, v.AuxInt) case ssa.OpARMADDshiftLL, ssa.OpARMADCshiftLL, ssa.OpARMSUBshiftLL, ssa.OpARMSBCshiftLL, ssa.OpARMRSBshiftLL, ssa.OpARMRSCshiftLL, ssa.OpARMANDshiftLL, ssa.OpARMORshiftLL, ssa.OpARMXORshiftLL, ssa.OpARMBICshiftLL: genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm.SHIFT_LL, v.AuxInt) case ssa.OpARMADDSshiftLL, ssa.OpARMSUBSshiftLL, ssa.OpARMRSBSshiftLL: p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum1(v), arm.SHIFT_LL, v.AuxInt) p.Scond = arm.C_SBIT case ssa.OpARMADDshiftRL, ssa.OpARMADCshiftRL, ssa.OpARMSUBshiftRL, ssa.OpARMSBCshiftRL, ssa.OpARMRSBshiftRL, ssa.OpARMRSCshiftRL, ssa.OpARMANDshiftRL, ssa.OpARMORshiftRL, ssa.OpARMXORshiftRL, ssa.OpARMBICshiftRL: genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm.SHIFT_LR, v.AuxInt) case ssa.OpARMADDSshiftRL, ssa.OpARMSUBSshiftRL, ssa.OpARMRSBSshiftRL: p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum1(v), arm.SHIFT_LR, v.AuxInt) p.Scond = arm.C_SBIT case ssa.OpARMADDshiftRA, ssa.OpARMADCshiftRA, ssa.OpARMSUBshiftRA, ssa.OpARMSBCshiftRA, ssa.OpARMRSBshiftRA, ssa.OpARMRSCshiftRA, ssa.OpARMANDshiftRA, ssa.OpARMORshiftRA, ssa.OpARMXORshiftRA, ssa.OpARMBICshiftRA: genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm.SHIFT_AR, v.AuxInt) case ssa.OpARMADDSshiftRA, ssa.OpARMSUBSshiftRA, ssa.OpARMRSBSshiftRA: p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum1(v), arm.SHIFT_AR, v.AuxInt) p.Scond = arm.C_SBIT case ssa.OpARMMVNshiftLL: genshift(v.Op.Asm(), 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v), arm.SHIFT_LL, v.AuxInt) case ssa.OpARMMVNshiftRL: genshift(v.Op.Asm(), 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v), arm.SHIFT_LR, v.AuxInt) case ssa.OpARMMVNshiftRA: genshift(v.Op.Asm(), 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v), arm.SHIFT_AR, v.AuxInt) case ssa.OpARMMVNshiftLLreg: genregshift(v.Op.Asm(), 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm.SHIFT_LL) case ssa.OpARMMVNshiftRLreg: genregshift(v.Op.Asm(), 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm.SHIFT_LR) case ssa.OpARMMVNshiftRAreg: genregshift(v.Op.Asm(), 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm.SHIFT_AR) case ssa.OpARMADDshiftLLreg, ssa.OpARMADCshiftLLreg, ssa.OpARMSUBshiftLLreg, ssa.OpARMSBCshiftLLreg, ssa.OpARMRSBshiftLLreg, ssa.OpARMRSCshiftLLreg, ssa.OpARMANDshiftLLreg, ssa.OpARMORshiftLLreg, ssa.OpARMXORshiftLLreg, ssa.OpARMBICshiftLLreg: genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum(v), arm.SHIFT_LL) case ssa.OpARMADDSshiftLLreg, ssa.OpARMSUBSshiftLLreg, ssa.OpARMRSBSshiftLLreg: p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum1(v), arm.SHIFT_LL) p.Scond = arm.C_SBIT case ssa.OpARMADDshiftRLreg, ssa.OpARMADCshiftRLreg, ssa.OpARMSUBshiftRLreg, ssa.OpARMSBCshiftRLreg, ssa.OpARMRSBshiftRLreg, ssa.OpARMRSCshiftRLreg, ssa.OpARMANDshiftRLreg, ssa.OpARMORshiftRLreg, ssa.OpARMXORshiftRLreg, ssa.OpARMBICshiftRLreg: genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum(v), arm.SHIFT_LR) case ssa.OpARMADDSshiftRLreg, ssa.OpARMSUBSshiftRLreg, ssa.OpARMRSBSshiftRLreg: p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum1(v), arm.SHIFT_LR) p.Scond = arm.C_SBIT case ssa.OpARMADDshiftRAreg, ssa.OpARMADCshiftRAreg, ssa.OpARMSUBshiftRAreg, ssa.OpARMSBCshiftRAreg, ssa.OpARMRSBshiftRAreg, ssa.OpARMRSCshiftRAreg, ssa.OpARMANDshiftRAreg, ssa.OpARMORshiftRAreg, ssa.OpARMXORshiftRAreg, ssa.OpARMBICshiftRAreg: genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum(v), arm.SHIFT_AR) case ssa.OpARMADDSshiftRAreg, ssa.OpARMSUBSshiftRAreg, ssa.OpARMRSBSshiftRAreg: p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum1(v), arm.SHIFT_AR) p.Scond = arm.C_SBIT case ssa.OpARMHMUL, ssa.OpARMHMULU: // 32-bit high multiplication p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.Reg = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REGREG p.To.Reg = gc.SSARegNum(v) p.To.Offset = arm.REGTMP // throw away low 32-bit into tmp register case ssa.OpARMMULLU: // 32-bit multiplication, results 64-bit, high 32-bit in out0, low 32-bit in out1 p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.Reg = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REGREG p.To.Reg = gc.SSARegNum0(v) // high 32-bit p.To.Offset = int64(gc.SSARegNum1(v)) // low 32-bit case ssa.OpARMMULA: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.Reg = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REGREG2 p.To.Reg = gc.SSARegNum(v) // result p.To.Offset = int64(gc.SSARegNum(v.Args[2])) // addend case ssa.OpARMMOVWconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARMMOVFconst, ssa.OpARMMOVDconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_FCONST p.From.Val = math.Float64frombits(uint64(v.AuxInt)) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARMCMP, ssa.OpARMCMN, ssa.OpARMTST, ssa.OpARMTEQ, ssa.OpARMCMPF, ssa.OpARMCMPD: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG // Special layout in ARM assembly // Comparing to x86, the operands of ARM's CMP are reversed. p.From.Reg = gc.SSARegNum(v.Args[1]) p.Reg = gc.SSARegNum(v.Args[0]) case ssa.OpARMCMPconst, ssa.OpARMCMNconst, ssa.OpARMTSTconst, ssa.OpARMTEQconst: // Special layout in ARM assembly p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.Reg = gc.SSARegNum(v.Args[0]) case ssa.OpARMCMPF0, ssa.OpARMCMPD0: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) case ssa.OpARMCMPshiftLL: genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm.SHIFT_LL, v.AuxInt) case ssa.OpARMCMPshiftRL: genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm.SHIFT_LR, v.AuxInt) case ssa.OpARMCMPshiftRA: genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm.SHIFT_AR, v.AuxInt) case ssa.OpARMCMPshiftLLreg: genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), 0, arm.SHIFT_LL) case ssa.OpARMCMPshiftRLreg: genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), 0, arm.SHIFT_LR) case ssa.OpARMCMPshiftRAreg: genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), 0, arm.SHIFT_AR) case ssa.OpARMMOVWaddr: p := gc.Prog(arm.AMOVW) p.From.Type = obj.TYPE_ADDR p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) var wantreg string // MOVW $sym+off(base), R // the assembler expands it as the following: // - base is SP: add constant offset to SP (R13) // when constant is large, tmp register (R11) may be used // - base is SB: load external address from constant pool (use relocation) switch v.Aux.(type) { default: v.Fatalf("aux is of unknown type %T", v.Aux) case *ssa.ExternSymbol: wantreg = "SB" gc.AddAux(&p.From, v) case *ssa.ArgSymbol, *ssa.AutoSymbol: wantreg = "SP" gc.AddAux(&p.From, v) case nil: // No sym, just MOVW $off(SP), R wantreg = "SP" p.From.Reg = arm.REGSP p.From.Offset = v.AuxInt } if reg := gc.SSAReg(v.Args[0]); reg.Name() != wantreg { v.Fatalf("bad reg %s for symbol type %T, want %s", reg.Name(), v.Aux, wantreg) } case ssa.OpARMMOVBload, ssa.OpARMMOVBUload, ssa.OpARMMOVHload, ssa.OpARMMOVHUload, ssa.OpARMMOVWload, ssa.OpARMMOVFload, ssa.OpARMMOVDload: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARMMOVBstore, ssa.OpARMMOVHstore, ssa.OpARMMOVWstore, ssa.OpARMMOVFstore, ssa.OpARMMOVDstore: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.To, v) case ssa.OpARMMOVWloadidx: // this is just shift 0 bits fallthrough case ssa.OpARMMOVWloadshiftLL: p := genshift(v.Op.Asm(), 0, gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm.SHIFT_LL, v.AuxInt) p.From.Reg = gc.SSARegNum(v.Args[0]) case ssa.OpARMMOVWloadshiftRL: p := genshift(v.Op.Asm(), 0, gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm.SHIFT_LR, v.AuxInt) p.From.Reg = gc.SSARegNum(v.Args[0]) case ssa.OpARMMOVWloadshiftRA: p := genshift(v.Op.Asm(), 0, gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm.SHIFT_AR, v.AuxInt) p.From.Reg = gc.SSARegNum(v.Args[0]) case ssa.OpARMMOVWstoreidx: // this is just shift 0 bits fallthrough case ssa.OpARMMOVWstoreshiftLL: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_SHIFT p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Offset = int64(makeshift(gc.SSARegNum(v.Args[1]), arm.SHIFT_LL, v.AuxInt)) case ssa.OpARMMOVWstoreshiftRL: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_SHIFT p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Offset = int64(makeshift(gc.SSARegNum(v.Args[1]), arm.SHIFT_LR, v.AuxInt)) case ssa.OpARMMOVWstoreshiftRA: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_SHIFT p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Offset = int64(makeshift(gc.SSARegNum(v.Args[1]), arm.SHIFT_AR, v.AuxInt)) case ssa.OpARMMOVBreg, ssa.OpARMMOVBUreg, ssa.OpARMMOVHreg, ssa.OpARMMOVHUreg: a := v.Args[0] for a.Op == ssa.OpCopy || a.Op == ssa.OpARMMOVWreg || a.Op == ssa.OpARMMOVWnop { a = a.Args[0] } if a.Op == ssa.OpLoadReg { t := a.Type switch { case v.Op == ssa.OpARMMOVBreg && t.Size() == 1 && t.IsSigned(), v.Op == ssa.OpARMMOVBUreg && t.Size() == 1 && !t.IsSigned(), v.Op == ssa.OpARMMOVHreg && t.Size() == 2 && t.IsSigned(), v.Op == ssa.OpARMMOVHUreg && t.Size() == 2 && !t.IsSigned(): // arg is a proper-typed load, already zero/sign-extended, don't extend again if gc.SSARegNum(v) == gc.SSARegNum(v.Args[0]) { return } p := gc.Prog(arm.AMOVW) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) return default: } } fallthrough case ssa.OpARMMVN, ssa.OpARMSQRTD, ssa.OpARMNEGF, ssa.OpARMNEGD, ssa.OpARMMOVWF, ssa.OpARMMOVWD, ssa.OpARMMOVFW, ssa.OpARMMOVDW, ssa.OpARMMOVFD, ssa.OpARMMOVDF: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARMMOVWUF, ssa.OpARMMOVWUD, ssa.OpARMMOVFWU, ssa.OpARMMOVDWU: p := gc.Prog(v.Op.Asm()) p.Scond = arm.C_UBIT p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARMCMOVWHSconst: p := gc.Prog(arm.AMOVW) p.Scond = arm.C_SCOND_HS p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARMCMOVWLSconst: p := gc.Prog(arm.AMOVW) p.Scond = arm.C_SCOND_LS p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARMCALLstatic: if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym { // Deferred calls will appear to be returning to // the CALL deferreturn(SB) that we are about to emit. // However, the stack trace code will show the line // of the instruction byte before the return PC. // To avoid that being an unrelated instruction, // insert an actual hardware NOP that will have the right line number. // This is different from obj.ANOP, which is a virtual no-op // that doesn't make it into the instruction stream. ginsnop() } p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym)) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARMCALLclosure: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Offset = 0 p.To.Reg = gc.SSARegNum(v.Args[0]) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARMCALLdefer: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Deferproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARMCALLgo: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Newproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARMCALLinter: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Offset = 0 p.To.Reg = gc.SSARegNum(v.Args[0]) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARMDUFFZERO: p := gc.Prog(obj.ADUFFZERO) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) p.To.Offset = v.AuxInt case ssa.OpARMDUFFCOPY: p := gc.Prog(obj.ADUFFCOPY) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) p.To.Offset = v.AuxInt case ssa.OpARMLoweredNilCheck: // Optimization - if the subsequent block has a load or store // at the same address, we don't need to issue this instruction. mem := v.Args[1] for _, w := range v.Block.Succs[0].Block().Values { if w.Op == ssa.OpPhi { if w.Type.IsMemory() { mem = w } continue } if len(w.Args) == 0 || !w.Args[len(w.Args)-1].Type.IsMemory() { // w doesn't use a store - can't be a memory op. continue } if w.Args[len(w.Args)-1] != mem { v.Fatalf("wrong store after nilcheck v=%s w=%s", v, w) } switch w.Op { case ssa.OpARMMOVBload, ssa.OpARMMOVBUload, ssa.OpARMMOVHload, ssa.OpARMMOVHUload, ssa.OpARMMOVWload, ssa.OpARMMOVFload, ssa.OpARMMOVDload, ssa.OpARMMOVBstore, ssa.OpARMMOVHstore, ssa.OpARMMOVWstore, ssa.OpARMMOVFstore, ssa.OpARMMOVDstore: // arg0 is ptr, auxint is offset if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage { if gc.Debug_checknil != 0 && int(v.Line) > 1 { gc.Warnl(v.Line, "removed nil check") } return } case ssa.OpARMDUFFZERO, ssa.OpARMLoweredZero: // arg0 is ptr if w.Args[0] == v.Args[0] { if gc.Debug_checknil != 0 && int(v.Line) > 1 { gc.Warnl(v.Line, "removed nil check") } return } case ssa.OpARMDUFFCOPY, ssa.OpARMLoweredMove: // arg0 is dst ptr, arg1 is src ptr if w.Args[0] == v.Args[0] || w.Args[1] == v.Args[0] { if gc.Debug_checknil != 0 && int(v.Line) > 1 { gc.Warnl(v.Line, "removed nil check") } return } default: } if w.Type.IsMemory() { if w.Op == ssa.OpVarDef || w.Op == ssa.OpVarKill || w.Op == ssa.OpVarLive { // these ops are OK mem = w continue } // We can't delay the nil check past the next store. break } } // Issue a load which will fault if arg is nil. p := gc.Prog(arm.AMOVB) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = arm.REGTMP if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers gc.Warnl(v.Line, "generated nil check") } case ssa.OpARMLoweredZero: // MOVW.P Rarg2, 4(R1) // CMP Rarg1, R1 // BLE -2(PC) // arg1 is the address of the last element to zero // arg2 is known to be zero // auxint is alignment var sz int64 var mov obj.As switch { case v.AuxInt%4 == 0: sz = 4 mov = arm.AMOVW case v.AuxInt%2 == 0: sz = 2 mov = arm.AMOVH default: sz = 1 mov = arm.AMOVB } p := gc.Prog(mov) p.Scond = arm.C_PBIT p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_MEM p.To.Reg = arm.REG_R1 p.To.Offset = sz p2 := gc.Prog(arm.ACMP) p2.From.Type = obj.TYPE_REG p2.From.Reg = gc.SSARegNum(v.Args[1]) p2.Reg = arm.REG_R1 p3 := gc.Prog(arm.ABLE) p3.To.Type = obj.TYPE_BRANCH gc.Patch(p3, p) case ssa.OpARMLoweredMove: // MOVW.P 4(R1), Rtmp // MOVW.P Rtmp, 4(R2) // CMP Rarg2, R1 // BLE -3(PC) // arg2 is the address of the last element of src // auxint is alignment var sz int64 var mov obj.As switch { case v.AuxInt%4 == 0: sz = 4 mov = arm.AMOVW case v.AuxInt%2 == 0: sz = 2 mov = arm.AMOVH default: sz = 1 mov = arm.AMOVB } p := gc.Prog(mov) p.Scond = arm.C_PBIT p.From.Type = obj.TYPE_MEM p.From.Reg = arm.REG_R1 p.From.Offset = sz p.To.Type = obj.TYPE_REG p.To.Reg = arm.REGTMP p2 := gc.Prog(mov) p2.Scond = arm.C_PBIT p2.From.Type = obj.TYPE_REG p2.From.Reg = arm.REGTMP p2.To.Type = obj.TYPE_MEM p2.To.Reg = arm.REG_R2 p2.To.Offset = sz p3 := gc.Prog(arm.ACMP) p3.From.Type = obj.TYPE_REG p3.From.Reg = gc.SSARegNum(v.Args[2]) p3.Reg = arm.REG_R1 p4 := gc.Prog(arm.ABLE) p4.To.Type = obj.TYPE_BRANCH gc.Patch(p4, p) case ssa.OpVarDef: gc.Gvardef(v.Aux.(*gc.Node)) case ssa.OpVarKill: gc.Gvarkill(v.Aux.(*gc.Node)) case ssa.OpVarLive: gc.Gvarlive(v.Aux.(*gc.Node)) case ssa.OpKeepAlive: if !v.Args[0].Type.IsPtrShaped() { v.Fatalf("keeping non-pointer alive %v", v.Args[0]) } n, off := gc.AutoVar(v.Args[0]) if n == nil { v.Fatalf("KeepLive with non-spilled value %s %s", v, v.Args[0]) } if off != 0 { v.Fatalf("KeepLive with non-zero offset spill location %s:%d", n, off) } gc.Gvarlive(n) case ssa.OpARMEqual, ssa.OpARMNotEqual, ssa.OpARMLessThan, ssa.OpARMLessEqual, ssa.OpARMGreaterThan, ssa.OpARMGreaterEqual, ssa.OpARMLessThanU, ssa.OpARMLessEqualU, ssa.OpARMGreaterThanU, ssa.OpARMGreaterEqualU: // generate boolean values // use conditional move p := gc.Prog(arm.AMOVW) p.From.Type = obj.TYPE_CONST p.From.Offset = 0 p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) p = gc.Prog(arm.AMOVW) p.Scond = condBits[v.Op] p.From.Type = obj.TYPE_CONST p.From.Offset = 1 p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpSelect0, ssa.OpSelect1: // nothing to do case ssa.OpARMLoweredGetClosurePtr: // Closure pointer is R7 (arm.REGCTXT). gc.CheckLoweredGetClosurePtr(v) case ssa.OpARMFlagEQ, ssa.OpARMFlagLT_ULT, ssa.OpARMFlagLT_UGT, ssa.OpARMFlagGT_ULT, ssa.OpARMFlagGT_UGT: v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) case ssa.OpARMInvertFlags: v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) default: v.Unimplementedf("genValue not implemented: %s", v.LongString()) } }
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { s.SetLineno(v.Line) switch v.Op { case ssa.OpInitMem: // memory arg needs no code case ssa.OpArg: // input args need no code case ssa.OpSP, ssa.OpSB, ssa.OpGetG: // nothing to do case ssa.OpCopy, ssa.OpARMMOVWconvert, ssa.OpARMMOVWreg: if v.Type.IsMemory() { return } x := v.Args[0].Reg() y := v.Reg() if x == y { return } as := arm.AMOVW if v.Type.IsFloat() { switch v.Type.Size() { case 4: as = arm.AMOVF case 8: as = arm.AMOVD default: panic("bad float size") } } p := gc.Prog(as) p.From.Type = obj.TYPE_REG p.From.Reg = x p.To.Type = obj.TYPE_REG p.To.Reg = y case ssa.OpARMMOVWnop: if v.Reg() != v.Args[0].Reg() { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } // nothing to do case ssa.OpLoadReg: if v.Type.IsFlags() { v.Fatalf("load flags not implemented: %v", v.LongString()) return } p := gc.Prog(loadByType(v.Type)) gc.AddrAuto(&p.From, v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpPhi: gc.CheckLoweredPhi(v) case ssa.OpStoreReg: if v.Type.IsFlags() { v.Fatalf("store flags not implemented: %v", v.LongString()) return } p := gc.Prog(storeByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() gc.AddrAuto(&p.To, v) case ssa.OpARMUDIVrtcall: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = obj.Linklookup(gc.Ctxt, "udiv", 0) case ssa.OpARMADD, ssa.OpARMADC, ssa.OpARMSUB, ssa.OpARMSBC, ssa.OpARMRSB, ssa.OpARMAND, ssa.OpARMOR, ssa.OpARMXOR, ssa.OpARMBIC, ssa.OpARMMUL, ssa.OpARMADDF, ssa.OpARMADDD, ssa.OpARMSUBF, ssa.OpARMSUBD, ssa.OpARMMULF, ssa.OpARMMULD, ssa.OpARMDIVF, ssa.OpARMDIVD: r := v.Reg() r1 := v.Args[0].Reg() r2 := v.Args[1].Reg() p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpARMADDS, ssa.OpARMSUBS: r := v.Reg0() r1 := v.Args[0].Reg() r2 := v.Args[1].Reg() p := gc.Prog(v.Op.Asm()) p.Scond = arm.C_SBIT p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpARMSLL, ssa.OpARMSRL, ssa.OpARMSRA: r := v.Reg() r1 := v.Args[0].Reg() r2 := v.Args[1].Reg() p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpARMSRAcond: // ARM shift instructions uses only the low-order byte of the shift amount // generate conditional instructions to deal with large shifts // flag is already set // SRA.HS $31, Rarg0, Rdst // shift 31 bits to get the sign bit // SRA.LO Rarg1, Rarg0, Rdst r := v.Reg() r1 := v.Args[0].Reg() r2 := v.Args[1].Reg() p := gc.Prog(arm.ASRA) p.Scond = arm.C_SCOND_HS p.From.Type = obj.TYPE_CONST p.From.Offset = 31 p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r p = gc.Prog(arm.ASRA) p.Scond = arm.C_SCOND_LO p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpARMADDconst, ssa.OpARMADCconst, ssa.OpARMSUBconst, ssa.OpARMSBCconst, ssa.OpARMRSBconst, ssa.OpARMRSCconst, ssa.OpARMANDconst, ssa.OpARMORconst, ssa.OpARMXORconst, ssa.OpARMBICconst, ssa.OpARMSLLconst, ssa.OpARMSRLconst, ssa.OpARMSRAconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpARMADDSconst, ssa.OpARMSUBSconst, ssa.OpARMRSBSconst: p := gc.Prog(v.Op.Asm()) p.Scond = arm.C_SBIT p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg0() case ssa.OpARMSRRconst: genshift(arm.AMOVW, 0, v.Args[0].Reg(), v.Reg(), arm.SHIFT_RR, v.AuxInt) case ssa.OpARMADDshiftLL, ssa.OpARMADCshiftLL, ssa.OpARMSUBshiftLL, ssa.OpARMSBCshiftLL, ssa.OpARMRSBshiftLL, ssa.OpARMRSCshiftLL, ssa.OpARMANDshiftLL, ssa.OpARMORshiftLL, ssa.OpARMXORshiftLL, ssa.OpARMBICshiftLL: genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm.SHIFT_LL, v.AuxInt) case ssa.OpARMADDSshiftLL, ssa.OpARMSUBSshiftLL, ssa.OpARMRSBSshiftLL: p := genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg0(), arm.SHIFT_LL, v.AuxInt) p.Scond = arm.C_SBIT case ssa.OpARMADDshiftRL, ssa.OpARMADCshiftRL, ssa.OpARMSUBshiftRL, ssa.OpARMSBCshiftRL, ssa.OpARMRSBshiftRL, ssa.OpARMRSCshiftRL, ssa.OpARMANDshiftRL, ssa.OpARMORshiftRL, ssa.OpARMXORshiftRL, ssa.OpARMBICshiftRL: genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm.SHIFT_LR, v.AuxInt) case ssa.OpARMADDSshiftRL, ssa.OpARMSUBSshiftRL, ssa.OpARMRSBSshiftRL: p := genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg0(), arm.SHIFT_LR, v.AuxInt) p.Scond = arm.C_SBIT case ssa.OpARMADDshiftRA, ssa.OpARMADCshiftRA, ssa.OpARMSUBshiftRA, ssa.OpARMSBCshiftRA, ssa.OpARMRSBshiftRA, ssa.OpARMRSCshiftRA, ssa.OpARMANDshiftRA, ssa.OpARMORshiftRA, ssa.OpARMXORshiftRA, ssa.OpARMBICshiftRA: genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm.SHIFT_AR, v.AuxInt) case ssa.OpARMADDSshiftRA, ssa.OpARMSUBSshiftRA, ssa.OpARMRSBSshiftRA: p := genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg0(), arm.SHIFT_AR, v.AuxInt) p.Scond = arm.C_SBIT case ssa.OpARMXORshiftRR: genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm.SHIFT_RR, v.AuxInt) case ssa.OpARMMVNshiftLL: genshift(v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm.SHIFT_LL, v.AuxInt) case ssa.OpARMMVNshiftRL: genshift(v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm.SHIFT_LR, v.AuxInt) case ssa.OpARMMVNshiftRA: genshift(v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm.SHIFT_AR, v.AuxInt) case ssa.OpARMMVNshiftLLreg: genregshift(v.Op.Asm(), 0, v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm.SHIFT_LL) case ssa.OpARMMVNshiftRLreg: genregshift(v.Op.Asm(), 0, v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm.SHIFT_LR) case ssa.OpARMMVNshiftRAreg: genregshift(v.Op.Asm(), 0, v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm.SHIFT_AR) case ssa.OpARMADDshiftLLreg, ssa.OpARMADCshiftLLreg, ssa.OpARMSUBshiftLLreg, ssa.OpARMSBCshiftLLreg, ssa.OpARMRSBshiftLLreg, ssa.OpARMRSCshiftLLreg, ssa.OpARMANDshiftLLreg, ssa.OpARMORshiftLLreg, ssa.OpARMXORshiftLLreg, ssa.OpARMBICshiftLLreg: genregshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg(), v.Reg(), arm.SHIFT_LL) case ssa.OpARMADDSshiftLLreg, ssa.OpARMSUBSshiftLLreg, ssa.OpARMRSBSshiftLLreg: p := genregshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg(), v.Reg0(), arm.SHIFT_LL) p.Scond = arm.C_SBIT case ssa.OpARMADDshiftRLreg, ssa.OpARMADCshiftRLreg, ssa.OpARMSUBshiftRLreg, ssa.OpARMSBCshiftRLreg, ssa.OpARMRSBshiftRLreg, ssa.OpARMRSCshiftRLreg, ssa.OpARMANDshiftRLreg, ssa.OpARMORshiftRLreg, ssa.OpARMXORshiftRLreg, ssa.OpARMBICshiftRLreg: genregshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg(), v.Reg(), arm.SHIFT_LR) case ssa.OpARMADDSshiftRLreg, ssa.OpARMSUBSshiftRLreg, ssa.OpARMRSBSshiftRLreg: p := genregshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg(), v.Reg0(), arm.SHIFT_LR) p.Scond = arm.C_SBIT case ssa.OpARMADDshiftRAreg, ssa.OpARMADCshiftRAreg, ssa.OpARMSUBshiftRAreg, ssa.OpARMSBCshiftRAreg, ssa.OpARMRSBshiftRAreg, ssa.OpARMRSCshiftRAreg, ssa.OpARMANDshiftRAreg, ssa.OpARMORshiftRAreg, ssa.OpARMXORshiftRAreg, ssa.OpARMBICshiftRAreg: genregshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg(), v.Reg(), arm.SHIFT_AR) case ssa.OpARMADDSshiftRAreg, ssa.OpARMSUBSshiftRAreg, ssa.OpARMRSBSshiftRAreg: p := genregshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg(), v.Reg0(), arm.SHIFT_AR) p.Scond = arm.C_SBIT case ssa.OpARMHMUL, ssa.OpARMHMULU: // 32-bit high multiplication p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_REGREG p.To.Reg = v.Reg() p.To.Offset = arm.REGTMP // throw away low 32-bit into tmp register case ssa.OpARMMULLU: // 32-bit multiplication, results 64-bit, high 32-bit in out0, low 32-bit in out1 p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_REGREG p.To.Reg = v.Reg0() // high 32-bit p.To.Offset = int64(v.Reg1()) // low 32-bit case ssa.OpARMMULA: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_REGREG2 p.To.Reg = v.Reg() // result p.To.Offset = int64(v.Args[2].Reg()) // addend case ssa.OpARMMOVWconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpARMMOVFconst, ssa.OpARMMOVDconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_FCONST p.From.Val = math.Float64frombits(uint64(v.AuxInt)) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpARMCMP, ssa.OpARMCMN, ssa.OpARMTST, ssa.OpARMTEQ, ssa.OpARMCMPF, ssa.OpARMCMPD: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG // Special layout in ARM assembly // Comparing to x86, the operands of ARM's CMP are reversed. p.From.Reg = v.Args[1].Reg() p.Reg = v.Args[0].Reg() case ssa.OpARMCMPconst, ssa.OpARMCMNconst, ssa.OpARMTSTconst, ssa.OpARMTEQconst: // Special layout in ARM assembly p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.Reg = v.Args[0].Reg() case ssa.OpARMCMPF0, ssa.OpARMCMPD0: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() case ssa.OpARMCMPshiftLL: genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm.SHIFT_LL, v.AuxInt) case ssa.OpARMCMPshiftRL: genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm.SHIFT_LR, v.AuxInt) case ssa.OpARMCMPshiftRA: genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm.SHIFT_AR, v.AuxInt) case ssa.OpARMCMPshiftLLreg: genregshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg(), 0, arm.SHIFT_LL) case ssa.OpARMCMPshiftRLreg: genregshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg(), 0, arm.SHIFT_LR) case ssa.OpARMCMPshiftRAreg: genregshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg(), 0, arm.SHIFT_AR) case ssa.OpARMMOVWaddr: p := gc.Prog(arm.AMOVW) p.From.Type = obj.TYPE_ADDR p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() var wantreg string // MOVW $sym+off(base), R // the assembler expands it as the following: // - base is SP: add constant offset to SP (R13) // when constant is large, tmp register (R11) may be used // - base is SB: load external address from constant pool (use relocation) switch v.Aux.(type) { default: v.Fatalf("aux is of unknown type %T", v.Aux) case *ssa.ExternSymbol: wantreg = "SB" gc.AddAux(&p.From, v) case *ssa.ArgSymbol, *ssa.AutoSymbol: wantreg = "SP" gc.AddAux(&p.From, v) case nil: // No sym, just MOVW $off(SP), R wantreg = "SP" p.From.Reg = arm.REGSP p.From.Offset = v.AuxInt } if reg := v.Args[0].RegName(); reg != wantreg { v.Fatalf("bad reg %s for symbol type %T, want %s", reg, v.Aux, wantreg) } case ssa.OpARMMOVBload, ssa.OpARMMOVBUload, ssa.OpARMMOVHload, ssa.OpARMMOVHUload, ssa.OpARMMOVWload, ssa.OpARMMOVFload, ssa.OpARMMOVDload: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpARMMOVBstore, ssa.OpARMMOVHstore, ssa.OpARMMOVWstore, ssa.OpARMMOVFstore, ssa.OpARMMOVDstore: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() gc.AddAux(&p.To, v) case ssa.OpARMMOVWloadidx: // this is just shift 0 bits fallthrough case ssa.OpARMMOVWloadshiftLL: p := genshift(v.Op.Asm(), 0, v.Args[1].Reg(), v.Reg(), arm.SHIFT_LL, v.AuxInt) p.From.Reg = v.Args[0].Reg() case ssa.OpARMMOVWloadshiftRL: p := genshift(v.Op.Asm(), 0, v.Args[1].Reg(), v.Reg(), arm.SHIFT_LR, v.AuxInt) p.From.Reg = v.Args[0].Reg() case ssa.OpARMMOVWloadshiftRA: p := genshift(v.Op.Asm(), 0, v.Args[1].Reg(), v.Reg(), arm.SHIFT_AR, v.AuxInt) p.From.Reg = v.Args[0].Reg() case ssa.OpARMMOVWstoreidx: // this is just shift 0 bits fallthrough case ssa.OpARMMOVWstoreshiftLL: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[2].Reg() p.To.Type = obj.TYPE_SHIFT p.To.Reg = v.Args[0].Reg() p.To.Offset = int64(makeshift(v.Args[1].Reg(), arm.SHIFT_LL, v.AuxInt)) case ssa.OpARMMOVWstoreshiftRL: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[2].Reg() p.To.Type = obj.TYPE_SHIFT p.To.Reg = v.Args[0].Reg() p.To.Offset = int64(makeshift(v.Args[1].Reg(), arm.SHIFT_LR, v.AuxInt)) case ssa.OpARMMOVWstoreshiftRA: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[2].Reg() p.To.Type = obj.TYPE_SHIFT p.To.Reg = v.Args[0].Reg() p.To.Offset = int64(makeshift(v.Args[1].Reg(), arm.SHIFT_AR, v.AuxInt)) case ssa.OpARMMOVBreg, ssa.OpARMMOVBUreg, ssa.OpARMMOVHreg, ssa.OpARMMOVHUreg: a := v.Args[0] for a.Op == ssa.OpCopy || a.Op == ssa.OpARMMOVWreg || a.Op == ssa.OpARMMOVWnop { a = a.Args[0] } if a.Op == ssa.OpLoadReg { t := a.Type switch { case v.Op == ssa.OpARMMOVBreg && t.Size() == 1 && t.IsSigned(), v.Op == ssa.OpARMMOVBUreg && t.Size() == 1 && !t.IsSigned(), v.Op == ssa.OpARMMOVHreg && t.Size() == 2 && t.IsSigned(), v.Op == ssa.OpARMMOVHUreg && t.Size() == 2 && !t.IsSigned(): // arg is a proper-typed load, already zero/sign-extended, don't extend again if v.Reg() == v.Args[0].Reg() { return } p := gc.Prog(arm.AMOVW) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() return default: } } fallthrough case ssa.OpARMMVN, ssa.OpARMCLZ, ssa.OpARMSQRTD, ssa.OpARMNEGF, ssa.OpARMNEGD, ssa.OpARMMOVWF, ssa.OpARMMOVWD, ssa.OpARMMOVFW, ssa.OpARMMOVDW, ssa.OpARMMOVFD, ssa.OpARMMOVDF: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpARMMOVWUF, ssa.OpARMMOVWUD, ssa.OpARMMOVFWU, ssa.OpARMMOVDWU: p := gc.Prog(v.Op.Asm()) p.Scond = arm.C_UBIT p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpARMCMOVWHSconst: p := gc.Prog(arm.AMOVW) p.Scond = arm.C_SCOND_HS p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpARMCMOVWLSconst: p := gc.Prog(arm.AMOVW) p.Scond = arm.C_SCOND_LS p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpARMCALLstatic: if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym { // Deferred calls will appear to be returning to // the CALL deferreturn(SB) that we are about to emit. // However, the stack trace code will show the line // of the instruction byte before the return PC. // To avoid that being an unrelated instruction, // insert an actual hardware NOP that will have the right line number. // This is different from obj.ANOP, which is a virtual no-op // that doesn't make it into the instruction stream. ginsnop() } p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym)) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARMCALLclosure: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Offset = 0 p.To.Reg = v.Args[0].Reg() if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARMCALLdefer: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Deferproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARMCALLgo: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Newproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARMCALLinter: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Offset = 0 p.To.Reg = v.Args[0].Reg() if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARMDUFFZERO: p := gc.Prog(obj.ADUFFZERO) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) p.To.Offset = v.AuxInt case ssa.OpARMDUFFCOPY: p := gc.Prog(obj.ADUFFCOPY) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) p.To.Offset = v.AuxInt case ssa.OpARMLoweredNilCheck: // Issue a load which will fault if arg is nil. p := gc.Prog(arm.AMOVB) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = arm.REGTMP if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers gc.Warnl(v.Line, "generated nil check") } case ssa.OpARMLoweredZero: // MOVW.P Rarg2, 4(R1) // CMP Rarg1, R1 // BLE -2(PC) // arg1 is the address of the last element to zero // arg2 is known to be zero // auxint is alignment var sz int64 var mov obj.As switch { case v.AuxInt%4 == 0: sz = 4 mov = arm.AMOVW case v.AuxInt%2 == 0: sz = 2 mov = arm.AMOVH default: sz = 1 mov = arm.AMOVB } p := gc.Prog(mov) p.Scond = arm.C_PBIT p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[2].Reg() p.To.Type = obj.TYPE_MEM p.To.Reg = arm.REG_R1 p.To.Offset = sz p2 := gc.Prog(arm.ACMP) p2.From.Type = obj.TYPE_REG p2.From.Reg = v.Args[1].Reg() p2.Reg = arm.REG_R1 p3 := gc.Prog(arm.ABLE) p3.To.Type = obj.TYPE_BRANCH gc.Patch(p3, p) case ssa.OpARMLoweredMove: // MOVW.P 4(R1), Rtmp // MOVW.P Rtmp, 4(R2) // CMP Rarg2, R1 // BLE -3(PC) // arg2 is the address of the last element of src // auxint is alignment var sz int64 var mov obj.As switch { case v.AuxInt%4 == 0: sz = 4 mov = arm.AMOVW case v.AuxInt%2 == 0: sz = 2 mov = arm.AMOVH default: sz = 1 mov = arm.AMOVB } p := gc.Prog(mov) p.Scond = arm.C_PBIT p.From.Type = obj.TYPE_MEM p.From.Reg = arm.REG_R1 p.From.Offset = sz p.To.Type = obj.TYPE_REG p.To.Reg = arm.REGTMP p2 := gc.Prog(mov) p2.Scond = arm.C_PBIT p2.From.Type = obj.TYPE_REG p2.From.Reg = arm.REGTMP p2.To.Type = obj.TYPE_MEM p2.To.Reg = arm.REG_R2 p2.To.Offset = sz p3 := gc.Prog(arm.ACMP) p3.From.Type = obj.TYPE_REG p3.From.Reg = v.Args[2].Reg() p3.Reg = arm.REG_R1 p4 := gc.Prog(arm.ABLE) p4.To.Type = obj.TYPE_BRANCH gc.Patch(p4, p) case ssa.OpVarDef: gc.Gvardef(v.Aux.(*gc.Node)) case ssa.OpVarKill: gc.Gvarkill(v.Aux.(*gc.Node)) case ssa.OpVarLive: gc.Gvarlive(v.Aux.(*gc.Node)) case ssa.OpKeepAlive: gc.KeepAlive(v) case ssa.OpARMEqual, ssa.OpARMNotEqual, ssa.OpARMLessThan, ssa.OpARMLessEqual, ssa.OpARMGreaterThan, ssa.OpARMGreaterEqual, ssa.OpARMLessThanU, ssa.OpARMLessEqualU, ssa.OpARMGreaterThanU, ssa.OpARMGreaterEqualU: // generate boolean values // use conditional move p := gc.Prog(arm.AMOVW) p.From.Type = obj.TYPE_CONST p.From.Offset = 0 p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() p = gc.Prog(arm.AMOVW) p.Scond = condBits[v.Op] p.From.Type = obj.TYPE_CONST p.From.Offset = 1 p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpSelect0, ssa.OpSelect1: // nothing to do case ssa.OpARMLoweredGetClosurePtr: // Closure pointer is R7 (arm.REGCTXT). gc.CheckLoweredGetClosurePtr(v) case ssa.OpARMFlagEQ, ssa.OpARMFlagLT_ULT, ssa.OpARMFlagLT_UGT, ssa.OpARMFlagGT_ULT, ssa.OpARMFlagGT_UGT: v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) case ssa.OpARMInvertFlags: v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) default: v.Fatalf("genValue not implemented: %s", v.LongString()) } }
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { s.SetLineno(v.Line) switch v.Op { case ssa.OpS390XSLD, ssa.OpS390XSLW, ssa.OpS390XSRD, ssa.OpS390XSRW, ssa.OpS390XSRAD, ssa.OpS390XSRAW: r := v.Reg() r1 := v.Args[0].Reg() r2 := v.Args[1].Reg() if r2 == s390x.REG_R0 { v.Fatalf("cannot use R0 as shift value %s", v.LongString()) } p := opregreg(v.Op.Asm(), r, r2) if r != r1 { p.Reg = r1 } case ssa.OpS390XADD, ssa.OpS390XADDW, ssa.OpS390XSUB, ssa.OpS390XSUBW, ssa.OpS390XAND, ssa.OpS390XANDW, ssa.OpS390XOR, ssa.OpS390XORW, ssa.OpS390XXOR, ssa.OpS390XXORW: r := v.Reg() r1 := v.Args[0].Reg() r2 := v.Args[1].Reg() p := opregreg(v.Op.Asm(), r, r2) if r != r1 { p.Reg = r1 } // 2-address opcode arithmetic case ssa.OpS390XMULLD, ssa.OpS390XMULLW, ssa.OpS390XMULHD, ssa.OpS390XMULHDU, ssa.OpS390XFADDS, ssa.OpS390XFADD, ssa.OpS390XFSUBS, ssa.OpS390XFSUB, ssa.OpS390XFMULS, ssa.OpS390XFMUL, ssa.OpS390XFDIVS, ssa.OpS390XFDIV: r := v.Reg() if r != v.Args[0].Reg() { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } opregreg(v.Op.Asm(), r, v.Args[1].Reg()) case ssa.OpS390XDIVD, ssa.OpS390XDIVW, ssa.OpS390XDIVDU, ssa.OpS390XDIVWU, ssa.OpS390XMODD, ssa.OpS390XMODW, ssa.OpS390XMODDU, ssa.OpS390XMODWU: // TODO(mundaym): use the temp registers every time like x86 does with AX? dividend := v.Args[0].Reg() divisor := v.Args[1].Reg() // CPU faults upon signed overflow, which occurs when most // negative int is divided by -1. var j *obj.Prog if v.Op == ssa.OpS390XDIVD || v.Op == ssa.OpS390XDIVW || v.Op == ssa.OpS390XMODD || v.Op == ssa.OpS390XMODW { var c *obj.Prog c = gc.Prog(s390x.ACMP) j = gc.Prog(s390x.ABEQ) c.From.Type = obj.TYPE_REG c.From.Reg = divisor c.To.Type = obj.TYPE_CONST c.To.Offset = -1 j.To.Type = obj.TYPE_BRANCH } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = divisor p.Reg = 0 p.To.Type = obj.TYPE_REG p.To.Reg = dividend // signed division, rest of the check for -1 case if j != nil { j2 := gc.Prog(s390x.ABR) j2.To.Type = obj.TYPE_BRANCH var n *obj.Prog if v.Op == ssa.OpS390XDIVD || v.Op == ssa.OpS390XDIVW { // n * -1 = -n n = gc.Prog(s390x.ANEG) n.To.Type = obj.TYPE_REG n.To.Reg = dividend } else { // n % -1 == 0 n = gc.Prog(s390x.AXOR) n.From.Type = obj.TYPE_REG n.From.Reg = dividend n.To.Type = obj.TYPE_REG n.To.Reg = dividend } j.To.Val = n j2.To.Val = s.Pc() } case ssa.OpS390XADDconst, ssa.OpS390XADDWconst: opregregimm(v.Op.Asm(), v.Reg(), v.Args[0].Reg(), v.AuxInt) case ssa.OpS390XMULLDconst, ssa.OpS390XMULLWconst, ssa.OpS390XSUBconst, ssa.OpS390XSUBWconst, ssa.OpS390XANDconst, ssa.OpS390XANDWconst, ssa.OpS390XORconst, ssa.OpS390XORWconst, ssa.OpS390XXORconst, ssa.OpS390XXORWconst: r := v.Reg() if r != v.Args[0].Reg() { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpS390XSLDconst, ssa.OpS390XSLWconst, ssa.OpS390XSRDconst, ssa.OpS390XSRWconst, ssa.OpS390XSRADconst, ssa.OpS390XSRAWconst, ssa.OpS390XRLLGconst, ssa.OpS390XRLLconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt r := v.Reg() r1 := v.Args[0].Reg() if r != r1 { p.Reg = r1 } p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpS390XSUBEcarrymask, ssa.OpS390XSUBEWcarrymask: r := v.Reg() p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpS390XMOVDaddridx: r := v.Args[0].Reg() i := v.Args[1].Reg() p := gc.Prog(s390x.AMOVD) p.From.Scale = 1 if i == s390x.REGSP { r, i = i, r } p.From.Type = obj.TYPE_ADDR p.From.Reg = r p.From.Index = i gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpS390XMOVDaddr: p := gc.Prog(s390x.AMOVD) p.From.Type = obj.TYPE_ADDR p.From.Reg = v.Args[0].Reg() gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpS390XCMP, ssa.OpS390XCMPW, ssa.OpS390XCMPU, ssa.OpS390XCMPWU: opregreg(v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg()) case ssa.OpS390XFCMPS, ssa.OpS390XFCMP: opregreg(v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg()) case ssa.OpS390XCMPconst, ssa.OpS390XCMPWconst, ssa.OpS390XCMPUconst, ssa.OpS390XCMPWUconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_CONST p.To.Offset = v.AuxInt case ssa.OpS390XMOVDconst: x := v.Reg() p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = x case ssa.OpS390XFMOVSconst, ssa.OpS390XFMOVDconst: x := v.Reg() p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_FCONST p.From.Val = math.Float64frombits(uint64(v.AuxInt)) p.To.Type = obj.TYPE_REG p.To.Reg = x case ssa.OpS390XMOVDload, ssa.OpS390XMOVWZload, ssa.OpS390XMOVHZload, ssa.OpS390XMOVBZload, ssa.OpS390XMOVDBRload, ssa.OpS390XMOVWBRload, ssa.OpS390XMOVHBRload, ssa.OpS390XMOVBload, ssa.OpS390XMOVHload, ssa.OpS390XMOVWload, ssa.OpS390XFMOVSload, ssa.OpS390XFMOVDload: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpS390XMOVBZloadidx, ssa.OpS390XMOVHZloadidx, ssa.OpS390XMOVWZloadidx, ssa.OpS390XMOVDloadidx, ssa.OpS390XMOVHBRloadidx, ssa.OpS390XMOVWBRloadidx, ssa.OpS390XMOVDBRloadidx, ssa.OpS390XFMOVSloadidx, ssa.OpS390XFMOVDloadidx: r := v.Args[0].Reg() i := v.Args[1].Reg() if i == s390x.REGSP { r, i = i, r } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = r p.From.Scale = 1 p.From.Index = i gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpS390XMOVBstore, ssa.OpS390XMOVHstore, ssa.OpS390XMOVWstore, ssa.OpS390XMOVDstore, ssa.OpS390XMOVHBRstore, ssa.OpS390XMOVWBRstore, ssa.OpS390XMOVDBRstore, ssa.OpS390XFMOVSstore, ssa.OpS390XFMOVDstore: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() gc.AddAux(&p.To, v) case ssa.OpS390XMOVBstoreidx, ssa.OpS390XMOVHstoreidx, ssa.OpS390XMOVWstoreidx, ssa.OpS390XMOVDstoreidx, ssa.OpS390XMOVHBRstoreidx, ssa.OpS390XMOVWBRstoreidx, ssa.OpS390XMOVDBRstoreidx, ssa.OpS390XFMOVSstoreidx, ssa.OpS390XFMOVDstoreidx: r := v.Args[0].Reg() i := v.Args[1].Reg() if i == s390x.REGSP { r, i = i, r } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[2].Reg() p.To.Type = obj.TYPE_MEM p.To.Reg = r p.To.Scale = 1 p.To.Index = i gc.AddAux(&p.To, v) case ssa.OpS390XMOVDstoreconst, ssa.OpS390XMOVWstoreconst, ssa.OpS390XMOVHstoreconst, ssa.OpS390XMOVBstoreconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST sc := v.AuxValAndOff() p.From.Offset = sc.Val() p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() gc.AddAux2(&p.To, v, sc.Off()) case ssa.OpS390XMOVBreg, ssa.OpS390XMOVHreg, ssa.OpS390XMOVWreg, ssa.OpS390XMOVBZreg, ssa.OpS390XMOVHZreg, ssa.OpS390XMOVWZreg, ssa.OpS390XCEFBRA, ssa.OpS390XCDFBRA, ssa.OpS390XCEGBRA, ssa.OpS390XCDGBRA, ssa.OpS390XCFEBRA, ssa.OpS390XCFDBRA, ssa.OpS390XCGEBRA, ssa.OpS390XCGDBRA, ssa.OpS390XLDEBR, ssa.OpS390XLEDBR, ssa.OpS390XFNEG, ssa.OpS390XFNEGS: opregreg(v.Op.Asm(), v.Reg(), v.Args[0].Reg()) case ssa.OpS390XCLEAR: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST sc := v.AuxValAndOff() p.From.Offset = sc.Val() p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() gc.AddAux2(&p.To, v, sc.Off()) case ssa.OpCopy, ssa.OpS390XMOVDconvert: if v.Type.IsMemory() { return } x := v.Args[0].Reg() y := v.Reg() if x != y { opregreg(moveByType(v.Type), y, x) } case ssa.OpLoadReg: if v.Type.IsFlags() { v.Fatalf("load flags not implemented: %v", v.LongString()) return } p := gc.Prog(loadByType(v.Type)) gc.AddrAuto(&p.From, v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpStoreReg: if v.Type.IsFlags() { v.Fatalf("store flags not implemented: %v", v.LongString()) return } p := gc.Prog(storeByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() gc.AddrAuto(&p.To, v) case ssa.OpPhi: gc.CheckLoweredPhi(v) case ssa.OpInitMem: // memory arg needs no code case ssa.OpArg: // input args need no code case ssa.OpS390XLoweredGetClosurePtr: // Closure pointer is R12 (already) gc.CheckLoweredGetClosurePtr(v) case ssa.OpS390XLoweredGetG: r := v.Reg() p := gc.Prog(s390x.AMOVD) p.From.Type = obj.TYPE_REG p.From.Reg = s390x.REGG p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpS390XCALLstatic: if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym { // Deferred calls will appear to be returning to // the CALL deferreturn(SB) that we are about to emit. // However, the stack trace code will show the line // of the instruction byte before the return PC. // To avoid that being an unrelated instruction, // insert an actual hardware NOP that will have the right line number. // This is different from obj.ANOP, which is a virtual no-op // that doesn't make it into the instruction stream. ginsnop() } p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym)) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpS390XCALLclosure: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_REG p.To.Reg = v.Args[0].Reg() if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpS390XCALLdefer: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Deferproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpS390XCALLgo: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Newproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpS390XCALLinter: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_REG p.To.Reg = v.Args[0].Reg() if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpS390XFLOGR, ssa.OpS390XNEG, ssa.OpS390XNEGW, ssa.OpS390XMOVWBR, ssa.OpS390XMOVDBR: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpS390XNOT, ssa.OpS390XNOTW: v.Fatalf("NOT/NOTW generated %s", v.LongString()) case ssa.OpS390XMOVDEQ, ssa.OpS390XMOVDNE, ssa.OpS390XMOVDLT, ssa.OpS390XMOVDLE, ssa.OpS390XMOVDGT, ssa.OpS390XMOVDGE, ssa.OpS390XMOVDGTnoinv, ssa.OpS390XMOVDGEnoinv: r := v.Reg() if r != v.Args[0].Reg() { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpS390XFSQRT: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpSP, ssa.OpSB: // nothing to do case ssa.OpVarDef: gc.Gvardef(v.Aux.(*gc.Node)) case ssa.OpVarKill: gc.Gvarkill(v.Aux.(*gc.Node)) case ssa.OpVarLive: gc.Gvarlive(v.Aux.(*gc.Node)) case ssa.OpKeepAlive: gc.KeepAlive(v) case ssa.OpS390XInvertFlags: v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) case ssa.OpS390XFlagEQ, ssa.OpS390XFlagLT, ssa.OpS390XFlagGT: v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) case ssa.OpS390XLoweredNilCheck: // Issue a load which will fault if the input is nil. p := gc.Prog(s390x.AMOVBZ) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = s390x.REGTMP if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers gc.Warnl(v.Line, "generated nil check") } case ssa.OpS390XMVC: vo := v.AuxValAndOff() p := gc.Prog(s390x.AMVC) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[1].Reg() p.From.Offset = vo.Off() p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() p.To.Offset = vo.Off() p.From3 = new(obj.Addr) p.From3.Type = obj.TYPE_CONST p.From3.Offset = vo.Val() case ssa.OpS390XSTMG2, ssa.OpS390XSTMG3, ssa.OpS390XSTMG4, ssa.OpS390XSTM2, ssa.OpS390XSTM3, ssa.OpS390XSTM4: for i := 2; i < len(v.Args)-1; i++ { if v.Args[i].Reg() != v.Args[i-1].Reg()+1 { v.Fatalf("invalid store multiple %s", v.LongString()) } } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.Reg = v.Args[len(v.Args)-2].Reg() p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() gc.AddAux(&p.To, v) case ssa.OpS390XLoweredMove: // Inputs must be valid pointers to memory, // so adjust arg0 and arg1 as part of the expansion. // arg2 should be src+size, // // mvc: MVC $256, 0(R2), 0(R1) // MOVD $256(R1), R1 // MOVD $256(R2), R2 // CMP R2, Rarg2 // BNE mvc // MVC $rem, 0(R2), 0(R1) // if rem > 0 // arg2 is the last address to move in the loop + 256 mvc := gc.Prog(s390x.AMVC) mvc.From.Type = obj.TYPE_MEM mvc.From.Reg = v.Args[1].Reg() mvc.To.Type = obj.TYPE_MEM mvc.To.Reg = v.Args[0].Reg() mvc.From3 = new(obj.Addr) mvc.From3.Type = obj.TYPE_CONST mvc.From3.Offset = 256 for i := 0; i < 2; i++ { movd := gc.Prog(s390x.AMOVD) movd.From.Type = obj.TYPE_ADDR movd.From.Reg = v.Args[i].Reg() movd.From.Offset = 256 movd.To.Type = obj.TYPE_REG movd.To.Reg = v.Args[i].Reg() } cmpu := gc.Prog(s390x.ACMPU) cmpu.From.Reg = v.Args[1].Reg() cmpu.From.Type = obj.TYPE_REG cmpu.To.Reg = v.Args[2].Reg() cmpu.To.Type = obj.TYPE_REG bne := gc.Prog(s390x.ABLT) bne.To.Type = obj.TYPE_BRANCH gc.Patch(bne, mvc) if v.AuxInt > 0 { mvc := gc.Prog(s390x.AMVC) mvc.From.Type = obj.TYPE_MEM mvc.From.Reg = v.Args[1].Reg() mvc.To.Type = obj.TYPE_MEM mvc.To.Reg = v.Args[0].Reg() mvc.From3 = new(obj.Addr) mvc.From3.Type = obj.TYPE_CONST mvc.From3.Offset = v.AuxInt } case ssa.OpS390XLoweredZero: // Input must be valid pointers to memory, // so adjust arg0 as part of the expansion. // arg1 should be src+size, // // clear: CLEAR $256, 0(R1) // MOVD $256(R1), R1 // CMP R1, Rarg1 // BNE clear // CLEAR $rem, 0(R1) // if rem > 0 // arg1 is the last address to zero in the loop + 256 clear := gc.Prog(s390x.ACLEAR) clear.From.Type = obj.TYPE_CONST clear.From.Offset = 256 clear.To.Type = obj.TYPE_MEM clear.To.Reg = v.Args[0].Reg() movd := gc.Prog(s390x.AMOVD) movd.From.Type = obj.TYPE_ADDR movd.From.Reg = v.Args[0].Reg() movd.From.Offset = 256 movd.To.Type = obj.TYPE_REG movd.To.Reg = v.Args[0].Reg() cmpu := gc.Prog(s390x.ACMPU) cmpu.From.Reg = v.Args[0].Reg() cmpu.From.Type = obj.TYPE_REG cmpu.To.Reg = v.Args[1].Reg() cmpu.To.Type = obj.TYPE_REG bne := gc.Prog(s390x.ABLT) bne.To.Type = obj.TYPE_BRANCH gc.Patch(bne, clear) if v.AuxInt > 0 { clear := gc.Prog(s390x.ACLEAR) clear.From.Type = obj.TYPE_CONST clear.From.Offset = v.AuxInt clear.To.Type = obj.TYPE_MEM clear.To.Reg = v.Args[0].Reg() } default: v.Fatalf("genValue not implemented: %s", v.LongString()) } }
func clearfat(nl *gc.Node) { /* clear a fat object */ if gc.Debug['g'] != 0 { gc.Dump("\nclearfat", nl) } w := uint32(nl.Type.Width) // Avoid taking the address for simple enough types. if gc.Componentgen(nil, nl) { return } c := w % 4 // bytes q := w / 4 // quads if q < 4 { // Write sequence of MOV 0, off(base) instead of using STOSL. // The hope is that although the code will be slightly longer, // the MOVs will have no dependencies and pipeline better // than the unrolled STOSL loop. // NOTE: Must use agen, not igen, so that optimizer sees address // being taken. We are not writing on field boundaries. var n1 gc.Node gc.Regalloc(&n1, gc.Types[gc.Tptr], nil) gc.Agen(nl, &n1) n1.Op = gc.OINDREG var z gc.Node gc.Nodconst(&z, gc.Types[gc.TUINT64], 0) for ; q > 0; q-- { n1.Type = z.Type gins(x86.AMOVL, &z, &n1) n1.Xoffset += 4 } gc.Nodconst(&z, gc.Types[gc.TUINT8], 0) for ; c > 0; c-- { n1.Type = z.Type gins(x86.AMOVB, &z, &n1) n1.Xoffset++ } gc.Regfree(&n1) return } var n1 gc.Node gc.Nodreg(&n1, gc.Types[gc.Tptr], x86.REG_DI) gc.Agen(nl, &n1) gconreg(x86.AMOVL, 0, x86.REG_AX) if q > 128 || (q >= 4 && gc.Nacl) { gconreg(x86.AMOVL, int64(q), x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.ASTOSL, nil, nil) // STOL AL,*(DI)+ } else if q >= 4 { p := gins(obj.ADUFFZERO, nil, nil) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) // 1 and 128 = magic constants: see ../../runtime/asm_386.s p.To.Offset = 1 * (128 - int64(q)) } else { for q > 0 { gins(x86.ASTOSL, nil, nil) // STOL AL,*(DI)+ q-- } } for c > 0 { gins(x86.ASTOSB, nil, nil) // STOB AL,*(DI)+ c-- } }
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { s.SetLineno(v.Line) switch v.Op { case ssa.OpInitMem: // memory arg needs no code case ssa.OpArg: // input args need no code case ssa.OpSP, ssa.OpSB, ssa.OpGetG: // nothing to do case ssa.OpCopy, ssa.OpMIPS64MOVVconvert, ssa.OpMIPS64MOVVreg: if v.Type.IsMemory() { return } x := gc.SSARegNum(v.Args[0]) y := gc.SSARegNum(v) if x == y { return } as := mips.AMOVV if v.Type.IsFloat() { switch v.Type.Size() { case 4: as = mips.AMOVF case 8: as = mips.AMOVD default: panic("bad float size") } } p := gc.Prog(as) p.From.Type = obj.TYPE_REG p.From.Reg = x p.To.Type = obj.TYPE_REG p.To.Reg = y case ssa.OpMIPS64MOVVnop: if gc.SSARegNum(v) != gc.SSARegNum(v.Args[0]) { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } // nothing to do case ssa.OpLoadReg: if v.Type.IsFlags() { v.Unimplementedf("load flags not implemented: %v", v.LongString()) return } r := gc.SSARegNum(v) p := gc.Prog(loadByType(v.Type, r)) n, off := gc.AutoVar(v.Args[0]) p.From.Type = obj.TYPE_MEM p.From.Node = n p.From.Sym = gc.Linksym(n.Sym) p.From.Offset = off if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { p.From.Name = obj.NAME_PARAM p.From.Offset += n.Xoffset } else { p.From.Name = obj.NAME_AUTO } p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpPhi: gc.CheckLoweredPhi(v) case ssa.OpStoreReg: if v.Type.IsFlags() { v.Unimplementedf("store flags not implemented: %v", v.LongString()) return } r := gc.SSARegNum(v.Args[0]) p := gc.Prog(storeByType(v.Type, r)) p.From.Type = obj.TYPE_REG p.From.Reg = r n, off := gc.AutoVar(v) p.To.Type = obj.TYPE_MEM p.To.Node = n p.To.Sym = gc.Linksym(n.Sym) p.To.Offset = off if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { p.To.Name = obj.NAME_PARAM p.To.Offset += n.Xoffset } else { p.To.Name = obj.NAME_AUTO } case ssa.OpMIPS64ADDV, ssa.OpMIPS64SUBV, ssa.OpMIPS64AND, ssa.OpMIPS64OR, ssa.OpMIPS64XOR, ssa.OpMIPS64NOR, ssa.OpMIPS64SLLV, ssa.OpMIPS64SRLV, ssa.OpMIPS64SRAV, ssa.OpMIPS64ADDF, ssa.OpMIPS64ADDD, ssa.OpMIPS64SUBF, ssa.OpMIPS64SUBD, ssa.OpMIPS64MULF, ssa.OpMIPS64MULD, ssa.OpMIPS64DIVF, ssa.OpMIPS64DIVD: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) p.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpMIPS64SGT, ssa.OpMIPS64SGTU: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.Reg = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpMIPS64ADDVconst, ssa.OpMIPS64SUBVconst, ssa.OpMIPS64ANDconst, ssa.OpMIPS64ORconst, ssa.OpMIPS64XORconst, ssa.OpMIPS64NORconst, ssa.OpMIPS64SLLVconst, ssa.OpMIPS64SRLVconst, ssa.OpMIPS64SRAVconst, ssa.OpMIPS64SGTconst, ssa.OpMIPS64SGTUconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpMIPS64MULV, ssa.OpMIPS64MULVU, ssa.OpMIPS64DIVV, ssa.OpMIPS64DIVVU: // result in hi,lo p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) p.Reg = gc.SSARegNum(v.Args[0]) case ssa.OpMIPS64MOVVconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpMIPS64MOVFconst, ssa.OpMIPS64MOVDconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_FCONST p.From.Val = math.Float64frombits(uint64(v.AuxInt)) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpMIPS64CMPEQF, ssa.OpMIPS64CMPEQD, ssa.OpMIPS64CMPGEF, ssa.OpMIPS64CMPGED, ssa.OpMIPS64CMPGTF, ssa.OpMIPS64CMPGTD: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.Reg = gc.SSARegNum(v.Args[1]) case ssa.OpMIPS64MOVVaddr: p := gc.Prog(mips.AMOVV) p.From.Type = obj.TYPE_ADDR p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) var wantreg string // MOVV $sym+off(base), R // the assembler expands it as the following: // - base is SP: add constant offset to SP (R29) // when constant is large, tmp register (R23) may be used // - base is SB: load external address with relocation switch v.Aux.(type) { default: v.Fatalf("aux is of unknown type %T", v.Aux) case *ssa.ExternSymbol: wantreg = "SB" gc.AddAux(&p.From, v) case *ssa.ArgSymbol, *ssa.AutoSymbol: wantreg = "SP" gc.AddAux(&p.From, v) case nil: // No sym, just MOVV $off(SP), R wantreg = "SP" p.From.Reg = mips.REGSP p.From.Offset = v.AuxInt } if reg := gc.SSAReg(v.Args[0]); reg.Name() != wantreg { v.Fatalf("bad reg %s for symbol type %T, want %s", reg.Name(), v.Aux, wantreg) } case ssa.OpMIPS64MOVBload, ssa.OpMIPS64MOVBUload, ssa.OpMIPS64MOVHload, ssa.OpMIPS64MOVHUload, ssa.OpMIPS64MOVWload, ssa.OpMIPS64MOVWUload, ssa.OpMIPS64MOVVload, ssa.OpMIPS64MOVFload, ssa.OpMIPS64MOVDload: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpMIPS64MOVBstore, ssa.OpMIPS64MOVHstore, ssa.OpMIPS64MOVWstore, ssa.OpMIPS64MOVVstore, ssa.OpMIPS64MOVFstore, ssa.OpMIPS64MOVDstore: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.To, v) case ssa.OpMIPS64MOVBstorezero, ssa.OpMIPS64MOVHstorezero, ssa.OpMIPS64MOVWstorezero, ssa.OpMIPS64MOVVstorezero: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = mips.REGZERO p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.To, v) case ssa.OpMIPS64MOVBreg, ssa.OpMIPS64MOVBUreg, ssa.OpMIPS64MOVHreg, ssa.OpMIPS64MOVHUreg, ssa.OpMIPS64MOVWreg, ssa.OpMIPS64MOVWUreg: // TODO: remove extension if after proper load fallthrough case ssa.OpMIPS64MOVWF, ssa.OpMIPS64MOVWD, ssa.OpMIPS64MOVFW, ssa.OpMIPS64MOVDW, ssa.OpMIPS64MOVVF, ssa.OpMIPS64MOVVD, ssa.OpMIPS64MOVFV, ssa.OpMIPS64MOVDV, ssa.OpMIPS64MOVFD, ssa.OpMIPS64MOVDF, ssa.OpMIPS64NEGF, ssa.OpMIPS64NEGD: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpMIPS64NEGV: // SUB from REGZERO p := gc.Prog(mips.ASUBVU) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.Reg = mips.REGZERO p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpMIPS64CALLstatic: if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym { // Deferred calls will appear to be returning to // the CALL deferreturn(SB) that we are about to emit. // However, the stack trace code will show the line // of the instruction byte before the return PC. // To avoid that being an unrelated instruction, // insert an actual hardware NOP that will have the right line number. // This is different from obj.ANOP, which is a virtual no-op // that doesn't make it into the instruction stream. ginsnop() } p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym)) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpMIPS64CALLclosure: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Offset = 0 p.To.Reg = gc.SSARegNum(v.Args[0]) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpMIPS64CALLdefer: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Deferproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpMIPS64CALLgo: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Newproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpMIPS64CALLinter: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Offset = 0 p.To.Reg = gc.SSARegNum(v.Args[0]) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpMIPS64LoweredNilCheck: // TODO: optimization // Issue a load which will fault if arg is nil. p := gc.Prog(mips.AMOVB) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = mips.REGZERO if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers gc.Warnl(v.Line, "generated nil check") } case ssa.OpVarDef: gc.Gvardef(v.Aux.(*gc.Node)) case ssa.OpVarKill: gc.Gvarkill(v.Aux.(*gc.Node)) case ssa.OpVarLive: gc.Gvarlive(v.Aux.(*gc.Node)) case ssa.OpKeepAlive: if !v.Args[0].Type.IsPtrShaped() { v.Fatalf("keeping non-pointer alive %v", v.Args[0]) } n, off := gc.AutoVar(v.Args[0]) if n == nil { v.Fatalf("KeepLive with non-spilled value %s %s", v, v.Args[0]) } if off != 0 { v.Fatalf("KeepLive with non-zero offset spill location %s:%d", n, off) } gc.Gvarlive(n) case ssa.OpMIPS64FPFlagTrue, ssa.OpMIPS64FPFlagFalse: // MOVV $0, r // BFPF 2(PC) // MOVV $1, r branch := mips.ABFPF if v.Op == ssa.OpMIPS64FPFlagFalse { branch = mips.ABFPT } p := gc.Prog(mips.AMOVV) p.From.Type = obj.TYPE_REG p.From.Reg = mips.REGZERO p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) p2 := gc.Prog(branch) p2.To.Type = obj.TYPE_BRANCH p3 := gc.Prog(mips.AMOVV) p3.From.Type = obj.TYPE_CONST p3.From.Offset = 1 p3.To.Type = obj.TYPE_REG p3.To.Reg = gc.SSARegNum(v) p4 := gc.Prog(obj.ANOP) // not a machine instruction, for branch to land gc.Patch(p2, p4) case ssa.OpSelect0, ssa.OpSelect1: // nothing to do case ssa.OpMIPS64LoweredGetClosurePtr: // Closure pointer is R22 (mips.REGCTXT). gc.CheckLoweredGetClosurePtr(v) default: v.Unimplementedf("genValue not implemented: %s", v.LongString()) } }
func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { s.SetLineno(b.Line) switch b.Kind { case ssa.BlockPlain, ssa.BlockCall, ssa.BlockCheck: if b.Succs[0] != next { p := gc.Prog(obj.AJMP) p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{p, b.Succs[0]}) } case ssa.BlockDefer: // defer returns in rax: // 0 if we should continue executing // 1 if we should jump to deferreturn call p := gc.Prog(x86.ATESTL) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_AX p.To.Type = obj.TYPE_REG p.To.Reg = x86.REG_AX p = gc.Prog(x86.AJNE) p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{p, b.Succs[1]}) if b.Succs[0] != next { p := gc.Prog(obj.AJMP) p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{p, b.Succs[0]}) } case ssa.BlockExit: gc.Prog(obj.AUNDEF) // tell plive.go that we never reach here case ssa.BlockRet: gc.Prog(obj.ARET) case ssa.BlockRetJmp: p := gc.Prog(obj.AJMP) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(b.Aux.(*gc.Sym)) case ssa.BlockAMD64EQF: gc.SSAGenFPJump(s, b, next, &eqfJumps) case ssa.BlockAMD64NEF: gc.SSAGenFPJump(s, b, next, &nefJumps) case ssa.BlockAMD64EQ, ssa.BlockAMD64NE, ssa.BlockAMD64LT, ssa.BlockAMD64GE, ssa.BlockAMD64LE, ssa.BlockAMD64GT, ssa.BlockAMD64ULT, ssa.BlockAMD64UGT, ssa.BlockAMD64ULE, ssa.BlockAMD64UGE: jmp := blockJump[b.Kind] likely := b.Likely var p *obj.Prog switch next { case b.Succs[0]: p = gc.Prog(jmp.invasm) likely *= -1 p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{p, b.Succs[1]}) case b.Succs[1]: p = gc.Prog(jmp.asm) p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{p, b.Succs[0]}) default: p = gc.Prog(jmp.asm) p.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{p, b.Succs[0]}) q := gc.Prog(obj.AJMP) q.To.Type = obj.TYPE_BRANCH s.Branches = append(s.Branches, gc.Branch{q, b.Succs[1]}) } // liblink reorders the instruction stream as it sees fit. // Pass along what we know so liblink can make use of it. // TODO: Once we've fully switched to SSA, // make liblink leave our output alone. switch likely { case ssa.BranchUnlikely: p.From.Type = obj.TYPE_CONST p.From.Offset = 0 case ssa.BranchLikely: p.From.Type = obj.TYPE_CONST p.From.Offset = 1 } default: b.Unimplementedf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString()) } }
// Generates code for v using 387 instructions. Reports whether // the instruction was handled by this routine. func ssaGenValue387(s *gc.SSAGenState, v *ssa.Value) bool { // The SSA compiler pretends that it has an SSE backend. // If we don't have one of those, we need to translate // all the SSE ops to equivalent 387 ops. That's what this // function does. switch v.Op { case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst: p := gc.Prog(loadPush(v.Type)) p.From.Type = obj.TYPE_FCONST p.From.Val = math.Float64frombits(uint64(v.AuxInt)) p.To.Type = obj.TYPE_REG p.To.Reg = x86.REG_F0 popAndSave(s, v) return true case ssa.Op386MOVSSconst2, ssa.Op386MOVSDconst2: p := gc.Prog(loadPush(v.Type)) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = x86.REG_F0 popAndSave(s, v) return true case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1, ssa.Op386MOVSSloadidx4, ssa.Op386MOVSDloadidx8: p := gc.Prog(loadPush(v.Type)) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() gc.AddAux(&p.From, v) switch v.Op { case ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1: p.From.Scale = 1 p.From.Index = v.Args[1].Reg() case ssa.Op386MOVSSloadidx4: p.From.Scale = 4 p.From.Index = v.Args[1].Reg() case ssa.Op386MOVSDloadidx8: p.From.Scale = 8 p.From.Index = v.Args[1].Reg() } p.To.Type = obj.TYPE_REG p.To.Reg = x86.REG_F0 popAndSave(s, v) return true case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore: // Push to-be-stored value on top of stack. push(s, v.Args[1]) // Pop and store value. var op obj.As switch v.Op { case ssa.Op386MOVSSstore: op = x86.AFMOVFP case ssa.Op386MOVSDstore: op = x86.AFMOVDP } p := gc.Prog(op) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_F0 p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() gc.AddAux(&p.To, v) return true case ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1, ssa.Op386MOVSSstoreidx4, ssa.Op386MOVSDstoreidx8: push(s, v.Args[2]) var op obj.As switch v.Op { case ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSSstoreidx4: op = x86.AFMOVFP case ssa.Op386MOVSDstoreidx1, ssa.Op386MOVSDstoreidx8: op = x86.AFMOVDP } p := gc.Prog(op) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_F0 p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() gc.AddAux(&p.To, v) switch v.Op { case ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1: p.To.Scale = 1 p.To.Index = v.Args[1].Reg() case ssa.Op386MOVSSstoreidx4: p.To.Scale = 4 p.To.Index = v.Args[1].Reg() case ssa.Op386MOVSDstoreidx8: p.To.Scale = 8 p.To.Index = v.Args[1].Reg() } return true case ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD, ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD: if v.Reg() != v.Args[0].Reg() { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } // Push arg1 on top of stack push(s, v.Args[1]) // Set precision if needed. 64 bits is the default. switch v.Op { case ssa.Op386ADDSS, ssa.Op386SUBSS, ssa.Op386MULSS, ssa.Op386DIVSS: p := gc.Prog(x86.AFSTCW) s.AddrScratch(&p.To) p = gc.Prog(x86.AFLDCW) p.From.Type = obj.TYPE_MEM p.From.Name = obj.NAME_EXTERN p.From.Sym = gc.Linksym(gc.Pkglookup("controlWord32", gc.Runtimepkg)) } var op obj.As switch v.Op { case ssa.Op386ADDSS, ssa.Op386ADDSD: op = x86.AFADDDP case ssa.Op386SUBSS, ssa.Op386SUBSD: op = x86.AFSUBDP case ssa.Op386MULSS, ssa.Op386MULSD: op = x86.AFMULDP case ssa.Op386DIVSS, ssa.Op386DIVSD: op = x86.AFDIVDP } p := gc.Prog(op) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_F0 p.To.Type = obj.TYPE_REG p.To.Reg = s.SSEto387[v.Reg()] + 1 // Restore precision if needed. switch v.Op { case ssa.Op386ADDSS, ssa.Op386SUBSS, ssa.Op386MULSS, ssa.Op386DIVSS: p := gc.Prog(x86.AFLDCW) s.AddrScratch(&p.From) } return true case ssa.Op386UCOMISS, ssa.Op386UCOMISD: push(s, v.Args[0]) // Compare. p := gc.Prog(x86.AFUCOMP) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_F0 p.To.Type = obj.TYPE_REG p.To.Reg = s.SSEto387[v.Args[1].Reg()] + 1 // Save AX. p = gc.Prog(x86.AMOVL) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_AX s.AddrScratch(&p.To) // Move status word into AX. p = gc.Prog(x86.AFSTSW) p.To.Type = obj.TYPE_REG p.To.Reg = x86.REG_AX // Then move the flags we need to the integer flags. gc.Prog(x86.ASAHF) // Restore AX. p = gc.Prog(x86.AMOVL) s.AddrScratch(&p.From) p.To.Type = obj.TYPE_REG p.To.Reg = x86.REG_AX return true case ssa.Op386SQRTSD: push(s, v.Args[0]) gc.Prog(x86.AFSQRT) popAndSave(s, v) return true case ssa.Op386FCHS: push(s, v.Args[0]) gc.Prog(x86.AFCHS) popAndSave(s, v) return true case ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD: p := gc.Prog(x86.AMOVL) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() s.AddrScratch(&p.To) p = gc.Prog(x86.AFMOVL) s.AddrScratch(&p.From) p.To.Type = obj.TYPE_REG p.To.Reg = x86.REG_F0 popAndSave(s, v) return true case ssa.Op386CVTTSD2SL, ssa.Op386CVTTSS2SL: push(s, v.Args[0]) // Save control word. p := gc.Prog(x86.AFSTCW) s.AddrScratch(&p.To) p.To.Offset += 4 // Load control word which truncates (rounds towards zero). p = gc.Prog(x86.AFLDCW) p.From.Type = obj.TYPE_MEM p.From.Name = obj.NAME_EXTERN p.From.Sym = gc.Linksym(gc.Pkglookup("controlWord64trunc", gc.Runtimepkg)) // Now do the conversion. p = gc.Prog(x86.AFMOVLP) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_F0 s.AddrScratch(&p.To) p = gc.Prog(x86.AMOVL) s.AddrScratch(&p.From) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() // Restore control word. p = gc.Prog(x86.AFLDCW) s.AddrScratch(&p.From) p.From.Offset += 4 return true case ssa.Op386CVTSS2SD: // float32 -> float64 is a nop push(s, v.Args[0]) popAndSave(s, v) return true case ssa.Op386CVTSD2SS: // Round to nearest float32. push(s, v.Args[0]) p := gc.Prog(x86.AFMOVFP) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_F0 s.AddrScratch(&p.To) p = gc.Prog(x86.AFMOVF) s.AddrScratch(&p.From) p.To.Type = obj.TYPE_REG p.To.Reg = x86.REG_F0 popAndSave(s, v) return true case ssa.OpLoadReg: if !v.Type.IsFloat() { return false } // Load+push the value we need. p := gc.Prog(loadPush(v.Type)) gc.AddrAuto(&p.From, v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = x86.REG_F0 // Move the value to its assigned register. popAndSave(s, v) return true case ssa.OpStoreReg: if !v.Type.IsFloat() { return false } push(s, v.Args[0]) var op obj.As switch v.Type.Size() { case 4: op = x86.AFMOVFP case 8: op = x86.AFMOVDP } p := gc.Prog(op) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_F0 gc.AddrAuto(&p.To, v) return true case ssa.OpCopy: if !v.Type.IsFloat() { return false } push(s, v.Args[0]) popAndSave(s, v) return true case ssa.Op386CALLstatic, ssa.Op386CALLclosure, ssa.Op386CALLdefer, ssa.Op386CALLgo, ssa.Op386CALLinter: flush387(s) // Calls must empty the the FP stack. return false // then issue the call as normal } return false }
func clearfat(nl *gc.Node) { /* clear a fat object */ if gc.Debug['g'] != 0 { gc.Dump("\nclearfat", nl) } // Avoid taking the address for simple enough types. if gc.Componentgen(nil, nl) { return } w := nl.Type.Width if w > 1024 || (w >= 64 && (gc.Nacl || isPlan9)) { var oldn1 gc.Node var n1 gc.Node savex(x86.REG_DI, &n1, &oldn1, nil, gc.Types[gc.Tptr]) gc.Agen(nl, &n1) var ax gc.Node var oldax gc.Node savex(x86.REG_AX, &ax, &oldax, nil, gc.Types[gc.Tptr]) gconreg(x86.AMOVL, 0, x86.REG_AX) gconreg(movptr, w/8, x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.ASTOSQ, nil, nil) // STOQ AL,*(DI)+ if w%8 != 0 { n1.Op = gc.OINDREG clearfat_tail(&n1, w%8) } restx(&n1, &oldn1) restx(&ax, &oldax) return } if w >= 64 { var oldn1 gc.Node var n1 gc.Node savex(x86.REG_DI, &n1, &oldn1, nil, gc.Types[gc.Tptr]) gc.Agen(nl, &n1) var vec_zero gc.Node var old_x0 gc.Node savex(x86.REG_X0, &vec_zero, &old_x0, nil, gc.Types[gc.TFLOAT64]) gins(x86.AXORPS, &vec_zero, &vec_zero) if di := dzDI(w); di != 0 { gconreg(addptr, di, x86.REG_DI) } p := gins(obj.ADUFFZERO, nil, nil) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) p.To.Offset = dzOff(w) if w%16 != 0 { n1.Op = gc.OINDREG n1.Xoffset -= 16 - w%16 gins(x86.AMOVUPS, &vec_zero, &n1) } restx(&vec_zero, &old_x0) restx(&n1, &oldn1) return } // NOTE: Must use agen, not igen, so that optimizer sees address // being taken. We are not writing on field boundaries. var n1 gc.Node gc.Agenr(nl, &n1, nil) n1.Op = gc.OINDREG clearfat_tail(&n1, w) gc.Regfree(&n1) }
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { s.SetLineno(v.Line) if gc.Thearch.Use387 { if ssaGenValue387(s, v) { return // v was handled by 387 generation. } } switch v.Op { case ssa.Op386ADDL: r := gc.SSARegNum(v) r1 := gc.SSARegNum(v.Args[0]) r2 := gc.SSARegNum(v.Args[1]) switch { case r == r1: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.To.Type = obj.TYPE_REG p.To.Reg = r case r == r2: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r default: p := gc.Prog(x86.ALEAL) p.From.Type = obj.TYPE_MEM p.From.Reg = r1 p.From.Scale = 1 p.From.Index = r2 p.To.Type = obj.TYPE_REG p.To.Reg = r } // 2-address opcode arithmetic case ssa.Op386SUBL, ssa.Op386MULL, ssa.Op386ANDL, ssa.Op386ORL, ssa.Op386XORL, ssa.Op386SHLL, ssa.Op386SHRL, ssa.Op386SHRW, ssa.Op386SHRB, ssa.Op386SARL, ssa.Op386SARW, ssa.Op386SARB, ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD, ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD, ssa.Op386PXOR, ssa.Op386ADCL, ssa.Op386SBBL: r := gc.SSARegNum(v) if r != gc.SSARegNum(v.Args[0]) { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } opregreg(v.Op.Asm(), r, gc.SSARegNum(v.Args[1])) case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry: // output 0 is carry/borrow, output 1 is the low 32 bits. r := gc.SSARegNum1(v) if r != gc.SSARegNum(v.Args[0]) { v.Fatalf("input[0] and output[1] not in same register %s", v.LongString()) } opregreg(v.Op.Asm(), r, gc.SSARegNum(v.Args[1])) case ssa.Op386ADDLconstcarry, ssa.Op386SUBLconstcarry: // output 0 is carry/borrow, output 1 is the low 32 bits. r := gc.SSARegNum1(v) if r != gc.SSARegNum(v.Args[0]) { v.Fatalf("input[0] and output[1] not in same register %s", v.LongString()) } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.Op386DIVL, ssa.Op386DIVW, ssa.Op386DIVLU, ssa.Op386DIVWU, ssa.Op386MODL, ssa.Op386MODW, ssa.Op386MODLU, ssa.Op386MODWU: // Arg[0] is already in AX as it's the only register we allow // and AX is the only output x := gc.SSARegNum(v.Args[1]) // CPU faults upon signed overflow, which occurs when most // negative int is divided by -1. var j *obj.Prog if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW || v.Op == ssa.Op386MODL || v.Op == ssa.Op386MODW { var c *obj.Prog switch v.Op { case ssa.Op386DIVL, ssa.Op386MODL: c = gc.Prog(x86.ACMPL) j = gc.Prog(x86.AJEQ) gc.Prog(x86.ACDQ) //TODO: fix case ssa.Op386DIVW, ssa.Op386MODW: c = gc.Prog(x86.ACMPW) j = gc.Prog(x86.AJEQ) gc.Prog(x86.ACWD) } c.From.Type = obj.TYPE_REG c.From.Reg = x c.To.Type = obj.TYPE_CONST c.To.Offset = -1 j.To.Type = obj.TYPE_BRANCH } // for unsigned ints, we sign extend by setting DX = 0 // signed ints were sign extended above if v.Op == ssa.Op386DIVLU || v.Op == ssa.Op386MODLU || v.Op == ssa.Op386DIVWU || v.Op == ssa.Op386MODWU { c := gc.Prog(x86.AXORL) c.From.Type = obj.TYPE_REG c.From.Reg = x86.REG_DX c.To.Type = obj.TYPE_REG c.To.Reg = x86.REG_DX } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = x // signed division, rest of the check for -1 case if j != nil { j2 := gc.Prog(obj.AJMP) j2.To.Type = obj.TYPE_BRANCH var n *obj.Prog if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW { // n * -1 = -n n = gc.Prog(x86.ANEGL) n.To.Type = obj.TYPE_REG n.To.Reg = x86.REG_AX } else { // n % -1 == 0 n = gc.Prog(x86.AXORL) n.From.Type = obj.TYPE_REG n.From.Reg = x86.REG_DX n.To.Type = obj.TYPE_REG n.To.Reg = x86.REG_DX } j.To.Val = n j2.To.Val = s.Pc() } case ssa.Op386HMULL, ssa.Op386HMULW, ssa.Op386HMULB, ssa.Op386HMULLU, ssa.Op386HMULWU, ssa.Op386HMULBU: // the frontend rewrites constant division by 8/16/32 bit integers into // HMUL by a constant // SSA rewrites generate the 64 bit versions // Arg[0] is already in AX as it's the only register we allow // and DX is the only output we care about (the high bits) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) // IMULB puts the high portion in AH instead of DL, // so move it to DL for consistency if v.Type.Size() == 1 { m := gc.Prog(x86.AMOVB) m.From.Type = obj.TYPE_REG m.From.Reg = x86.REG_AH m.To.Type = obj.TYPE_REG m.To.Reg = x86.REG_DX } case ssa.Op386MULLQU: // AX * args[1], high 32 bits in DX (result[0]), low 32 bits in AX (result[1]). p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) case ssa.Op386ADDLconst: r := gc.SSARegNum(v) a := gc.SSARegNum(v.Args[0]) if r == a { if v.AuxInt == 1 { p := gc.Prog(x86.AINCL) p.To.Type = obj.TYPE_REG p.To.Reg = r return } if v.AuxInt == -1 { p := gc.Prog(x86.ADECL) p.To.Type = obj.TYPE_REG p.To.Reg = r return } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = r return } p := gc.Prog(x86.ALEAL) p.From.Type = obj.TYPE_MEM p.From.Reg = a p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.Op386MULLconst: r := gc.SSARegNum(v) if r != gc.SSARegNum(v.Args[0]) { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = r // TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2 // then we don't need to use resultInArg0 for these ops. //p.From3 = new(obj.Addr) //p.From3.Type = obj.TYPE_REG //p.From3.Reg = gc.SSARegNum(v.Args[0]) case ssa.Op386SUBLconst, ssa.Op386ADCLconst, ssa.Op386SBBLconst, ssa.Op386ANDLconst, ssa.Op386ORLconst, ssa.Op386XORLconst, ssa.Op386SHLLconst, ssa.Op386SHRLconst, ssa.Op386SHRWconst, ssa.Op386SHRBconst, ssa.Op386SARLconst, ssa.Op386SARWconst, ssa.Op386SARBconst, ssa.Op386ROLLconst, ssa.Op386ROLWconst, ssa.Op386ROLBconst: r := gc.SSARegNum(v) if r != gc.SSARegNum(v.Args[0]) { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.Op386SBBLcarrymask: r := gc.SSARegNum(v) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.Op386LEAL1, ssa.Op386LEAL2, ssa.Op386LEAL4, ssa.Op386LEAL8: r := gc.SSARegNum(v.Args[0]) i := gc.SSARegNum(v.Args[1]) p := gc.Prog(x86.ALEAL) switch v.Op { case ssa.Op386LEAL1: p.From.Scale = 1 if i == x86.REG_SP { r, i = i, r } case ssa.Op386LEAL2: p.From.Scale = 2 case ssa.Op386LEAL4: p.From.Scale = 4 case ssa.Op386LEAL8: p.From.Scale = 8 } p.From.Type = obj.TYPE_MEM p.From.Reg = r p.From.Index = i gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.Op386LEAL: p := gc.Prog(x86.ALEAL) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.Op386CMPL, ssa.Op386CMPW, ssa.Op386CMPB, ssa.Op386TESTL, ssa.Op386TESTW, ssa.Op386TESTB: opregreg(v.Op.Asm(), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[0])) case ssa.Op386UCOMISS, ssa.Op386UCOMISD: // Go assembler has swapped operands for UCOMISx relative to CMP, // must account for that right here. opregreg(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1])) case ssa.Op386CMPLconst, ssa.Op386CMPWconst, ssa.Op386CMPBconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_CONST p.To.Offset = v.AuxInt case ssa.Op386TESTLconst, ssa.Op386TESTWconst, ssa.Op386TESTBconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v.Args[0]) case ssa.Op386MOVLconst: x := gc.SSARegNum(v) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = x // If flags are live at this instruction, suppress the // MOV $0,AX -> XOR AX,AX optimization. if v.Aux != nil { p.Mark |= x86.PRESERVEFLAGS } case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst: x := gc.SSARegNum(v) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_FCONST p.From.Val = math.Float64frombits(uint64(v.AuxInt)) p.To.Type = obj.TYPE_REG p.To.Reg = x case ssa.Op386MOVSSconst1, ssa.Op386MOVSDconst1: var literal string if v.Op == ssa.Op386MOVSDconst1 { literal = fmt.Sprintf("$f64.%016x", uint64(v.AuxInt)) } else { literal = fmt.Sprintf("$f32.%08x", math.Float32bits(float32(math.Float64frombits(uint64(v.AuxInt))))) } p := gc.Prog(x86.ALEAL) p.From.Type = obj.TYPE_MEM p.From.Name = obj.NAME_EXTERN p.From.Sym = obj.Linklookup(gc.Ctxt, literal, 0) p.From.Sym.Local = true p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.Op386MOVSSconst2, ssa.Op386MOVSDconst2: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload, ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.Op386MOVSDloadidx8: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.From.Scale = 8 p.From.Index = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.From.Scale = 4 p.From.Index = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.Op386MOVWloadidx2: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.From.Scale = 2 p.From.Index = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1: r := gc.SSARegNum(v.Args[0]) i := gc.SSARegNum(v.Args[1]) if i == x86.REG_SP { r, i = i, r } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = r p.From.Scale = 1 p.From.Index = i gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.To, v) case ssa.Op386MOVSDstoreidx8: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Scale = 8 p.To.Index = gc.SSARegNum(v.Args[1]) gc.AddAux(&p.To, v) case ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Scale = 4 p.To.Index = gc.SSARegNum(v.Args[1]) gc.AddAux(&p.To, v) case ssa.Op386MOVWstoreidx2: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Scale = 2 p.To.Index = gc.SSARegNum(v.Args[1]) gc.AddAux(&p.To, v) case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1: r := gc.SSARegNum(v.Args[0]) i := gc.SSARegNum(v.Args[1]) if i == x86.REG_SP { r, i = i, r } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_MEM p.To.Reg = r p.To.Scale = 1 p.To.Index = i gc.AddAux(&p.To, v) case ssa.Op386MOVLstoreconst, ssa.Op386MOVWstoreconst, ssa.Op386MOVBstoreconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST sc := v.AuxValAndOff() p.From.Offset = sc.Val() p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux2(&p.To, v, sc.Off()) case ssa.Op386MOVLstoreconstidx1, ssa.Op386MOVLstoreconstidx4, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVWstoreconstidx2, ssa.Op386MOVBstoreconstidx1: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST sc := v.AuxValAndOff() p.From.Offset = sc.Val() r := gc.SSARegNum(v.Args[0]) i := gc.SSARegNum(v.Args[1]) switch v.Op { case ssa.Op386MOVBstoreconstidx1, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVLstoreconstidx1: p.To.Scale = 1 if i == x86.REG_SP { r, i = i, r } case ssa.Op386MOVWstoreconstidx2: p.To.Scale = 2 case ssa.Op386MOVLstoreconstidx4: p.To.Scale = 4 } p.To.Type = obj.TYPE_MEM p.To.Reg = r p.To.Index = i gc.AddAux2(&p.To, v, sc.Off()) case ssa.Op386MOVWLSX, ssa.Op386MOVBLSX, ssa.Op386MOVWLZX, ssa.Op386MOVBLZX, ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD, ssa.Op386CVTTSS2SL, ssa.Op386CVTTSD2SL, ssa.Op386CVTSS2SD, ssa.Op386CVTSD2SS: opregreg(v.Op.Asm(), gc.SSARegNum(v), gc.SSARegNum(v.Args[0])) case ssa.Op386DUFFZERO: p := gc.Prog(obj.ADUFFZERO) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) p.To.Offset = v.AuxInt case ssa.Op386DUFFCOPY: p := gc.Prog(obj.ADUFFCOPY) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) p.To.Offset = v.AuxInt case ssa.OpCopy, ssa.Op386MOVLconvert: // TODO: use MOVLreg for reg->reg copies instead of OpCopy? if v.Type.IsMemory() { return } x := gc.SSARegNum(v.Args[0]) y := gc.SSARegNum(v) if x != y { opregreg(moveByType(v.Type), y, x) } case ssa.OpLoadReg: if v.Type.IsFlags() { v.Unimplementedf("load flags not implemented: %v", v.LongString()) return } p := gc.Prog(loadByType(v.Type)) n, off := gc.AutoVar(v.Args[0]) p.From.Type = obj.TYPE_MEM p.From.Node = n p.From.Sym = gc.Linksym(n.Sym) p.From.Offset = off if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { p.From.Name = obj.NAME_PARAM p.From.Offset += n.Xoffset } else { p.From.Name = obj.NAME_AUTO } p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpStoreReg: if v.Type.IsFlags() { v.Unimplementedf("store flags not implemented: %v", v.LongString()) return } p := gc.Prog(storeByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) n, off := gc.AutoVar(v) p.To.Type = obj.TYPE_MEM p.To.Node = n p.To.Sym = gc.Linksym(n.Sym) p.To.Offset = off if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { p.To.Name = obj.NAME_PARAM p.To.Offset += n.Xoffset } else { p.To.Name = obj.NAME_AUTO } case ssa.OpPhi: gc.CheckLoweredPhi(v) case ssa.OpInitMem: // memory arg needs no code case ssa.OpArg: // input args need no code case ssa.Op386LoweredGetClosurePtr: // Closure pointer is DX. gc.CheckLoweredGetClosurePtr(v) case ssa.Op386LoweredGetG: r := gc.SSARegNum(v) // See the comments in cmd/internal/obj/x86/obj6.go // near CanUse1InsnTLS for a detailed explanation of these instructions. if x86.CanUse1InsnTLS(gc.Ctxt) { // MOVL (TLS), r p := gc.Prog(x86.AMOVL) p.From.Type = obj.TYPE_MEM p.From.Reg = x86.REG_TLS p.To.Type = obj.TYPE_REG p.To.Reg = r } else { // MOVL TLS, r // MOVL (r)(TLS*1), r p := gc.Prog(x86.AMOVL) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_TLS p.To.Type = obj.TYPE_REG p.To.Reg = r q := gc.Prog(x86.AMOVL) q.From.Type = obj.TYPE_MEM q.From.Reg = r q.From.Index = x86.REG_TLS q.From.Scale = 1 q.To.Type = obj.TYPE_REG q.To.Reg = r } case ssa.Op386CALLstatic: if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym { // Deferred calls will appear to be returning to // the CALL deferreturn(SB) that we are about to emit. // However, the stack trace code will show the line // of the instruction byte before the return PC. // To avoid that being an unrelated instruction, // insert an actual hardware NOP that will have the right line number. // This is different from obj.ANOP, which is a virtual no-op // that doesn't make it into the instruction stream. ginsnop() } p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym)) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.Op386CALLclosure: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v.Args[0]) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.Op386CALLdefer: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Deferproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.Op386CALLgo: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Newproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.Op386CALLinter: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v.Args[0]) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.Op386NEGL, ssa.Op386BSWAPL, ssa.Op386NOTL: r := gc.SSARegNum(v) if r != gc.SSARegNum(v.Args[0]) { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } p := gc.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.Op386BSFL, ssa.Op386BSFW, ssa.Op386BSRL, ssa.Op386BSRW, ssa.Op386SQRTSD: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpSP, ssa.OpSB, ssa.OpSelect0, ssa.OpSelect1: // nothing to do case ssa.Op386SETEQ, ssa.Op386SETNE, ssa.Op386SETL, ssa.Op386SETLE, ssa.Op386SETG, ssa.Op386SETGE, ssa.Op386SETGF, ssa.Op386SETGEF, ssa.Op386SETB, ssa.Op386SETBE, ssa.Op386SETORD, ssa.Op386SETNAN, ssa.Op386SETA, ssa.Op386SETAE: p := gc.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.Op386SETNEF: p := gc.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) q := gc.Prog(x86.ASETPS) q.To.Type = obj.TYPE_REG q.To.Reg = x86.REG_AX opregreg(x86.AORL, gc.SSARegNum(v), x86.REG_AX) case ssa.Op386SETEQF: p := gc.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) q := gc.Prog(x86.ASETPC) q.To.Type = obj.TYPE_REG q.To.Reg = x86.REG_AX opregreg(x86.AANDL, gc.SSARegNum(v), x86.REG_AX) case ssa.Op386InvertFlags: v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) case ssa.Op386FlagEQ, ssa.Op386FlagLT_ULT, ssa.Op386FlagLT_UGT, ssa.Op386FlagGT_ULT, ssa.Op386FlagGT_UGT: v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) case ssa.Op386REPSTOSL: gc.Prog(x86.AREP) gc.Prog(x86.ASTOSL) case ssa.Op386REPMOVSL: gc.Prog(x86.AREP) gc.Prog(x86.AMOVSL) case ssa.OpVarDef: gc.Gvardef(v.Aux.(*gc.Node)) case ssa.OpVarKill: gc.Gvarkill(v.Aux.(*gc.Node)) case ssa.OpVarLive: gc.Gvarlive(v.Aux.(*gc.Node)) case ssa.OpKeepAlive: if !v.Args[0].Type.IsPtrShaped() { v.Fatalf("keeping non-pointer alive %v", v.Args[0]) } n, off := gc.AutoVar(v.Args[0]) if n == nil { v.Fatalf("KeepLive with non-spilled value %s %s", v, v.Args[0]) } if off != 0 { v.Fatalf("KeepLive with non-zero offset spill location %s:%d", n, off) } gc.Gvarlive(n) case ssa.Op386LoweredNilCheck: // Optimization - if the subsequent block has a load or store // at the same address, we don't need to issue this instruction. mem := v.Args[1] for _, w := range v.Block.Succs[0].Block().Values { if w.Op == ssa.OpPhi { if w.Type.IsMemory() { mem = w } continue } if len(w.Args) == 0 || !w.Args[len(w.Args)-1].Type.IsMemory() { // w doesn't use a store - can't be a memory op. continue } if w.Args[len(w.Args)-1] != mem { v.Fatalf("wrong store after nilcheck v=%s w=%s", v, w) } switch w.Op { case ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore, ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload, ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVSSstore, ssa.Op386MOVSDstore: if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage { if gc.Debug_checknil != 0 && int(v.Line) > 1 { gc.Warnl(v.Line, "removed nil check") } return } case ssa.Op386MOVLstoreconst, ssa.Op386MOVWstoreconst, ssa.Op386MOVBstoreconst: off := ssa.ValAndOff(v.AuxInt).Off() if w.Args[0] == v.Args[0] && w.Aux == nil && off >= 0 && off < minZeroPage { if gc.Debug_checknil != 0 && int(v.Line) > 1 { gc.Warnl(v.Line, "removed nil check") } return } } if w.Type.IsMemory() { if w.Op == ssa.OpVarDef || w.Op == ssa.OpVarKill || w.Op == ssa.OpVarLive { // these ops are OK mem = w continue } // We can't delay the nil check past the next store. break } } // Issue a load which will fault if the input is nil. // TODO: We currently use the 2-byte instruction TESTB AX, (reg). // Should we use the 3-byte TESTB $0, (reg) instead? It is larger // but it doesn't have false dependency on AX. // Or maybe allocate an output register and use MOVL (reg),reg2 ? // That trades clobbering flags for clobbering a register. p := gc.Prog(x86.ATESTB) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_AX p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.To, v) if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers gc.Warnl(v.Line, "generated nil check") } case ssa.Op386FCHS: v.Fatalf("FCHS in non-387 mode") default: v.Unimplementedf("genValue not implemented: %s", v.LongString()) } }
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { s.SetLineno(v.Line) switch v.Op { case ssa.OpInitMem: // memory arg needs no code case ssa.OpArg: // input args need no code case ssa.OpSP, ssa.OpSB: // nothing to do case ssa.OpCopy: case ssa.OpLoadReg: // TODO: by type p := gc.Prog(arm.AMOVW) n, off := gc.AutoVar(v.Args[0]) p.From.Type = obj.TYPE_MEM p.From.Node = n p.From.Sym = gc.Linksym(n.Sym) p.From.Offset = off if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { p.From.Name = obj.NAME_PARAM p.From.Offset += n.Xoffset } else { p.From.Name = obj.NAME_AUTO } p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpStoreReg: // TODO: by type p := gc.Prog(arm.AMOVW) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) n, off := gc.AutoVar(v) p.To.Type = obj.TYPE_MEM p.To.Node = n p.To.Sym = gc.Linksym(n.Sym) p.To.Offset = off if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { p.To.Name = obj.NAME_PARAM p.To.Offset += n.Xoffset } else { p.To.Name = obj.NAME_AUTO } case ssa.OpARMADD: r := gc.SSARegNum(v) r1 := gc.SSARegNum(v.Args[0]) r2 := gc.SSARegNum(v.Args[1]) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r1 p.Reg = r2 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpARMADDconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt if v.Aux != nil { panic("can't handle symbolic constant yet") } p.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARMMOVWconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt2Int64() p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARMCMP: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.Reg = gc.SSARegNum(v.Args[1]) case ssa.OpARMMOVWload: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARMMOVWstore: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.To, v) case ssa.OpARMCALLstatic: // TODO: deferreturn p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym)) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpVarDef: gc.Gvardef(v.Aux.(*gc.Node)) case ssa.OpVarKill: gc.Gvarkill(v.Aux.(*gc.Node)) case ssa.OpVarLive: gc.Gvarlive(v.Aux.(*gc.Node)) case ssa.OpARMLessThan: v.Fatalf("pseudo-op made it to output: %s", v.LongString()) default: v.Unimplementedf("genValue not implemented: %s", v.LongString()) } }
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { s.SetLineno(v.Line) switch v.Op { case ssa.OpInitMem: // memory arg needs no code case ssa.OpArg: // input args need no code case ssa.OpSP, ssa.OpSB, ssa.OpGetG: // nothing to do case ssa.OpCopy, ssa.OpPPC64MOVDconvert: t := v.Type if t.IsMemory() { return } x := v.Args[0].Reg() y := v.Reg() if x != y { rt := obj.TYPE_REG op := ppc64.AMOVD if t.IsFloat() { op = ppc64.AFMOVD } p := gc.Prog(op) p.From.Type = rt p.From.Reg = x p.To.Type = rt p.To.Reg = y } case ssa.OpPPC64Xf2i64: { x := v.Args[0].Reg() y := v.Reg() p := gc.Prog(ppc64.AFMOVD) p.From.Type = obj.TYPE_REG p.From.Reg = x s.AddrScratch(&p.To) p = gc.Prog(ppc64.AMOVD) p.To.Type = obj.TYPE_REG p.To.Reg = y s.AddrScratch(&p.From) } case ssa.OpPPC64Xi2f64: { x := v.Args[0].Reg() y := v.Reg() p := gc.Prog(ppc64.AMOVD) p.From.Type = obj.TYPE_REG p.From.Reg = x s.AddrScratch(&p.To) p = gc.Prog(ppc64.AFMOVD) p.To.Type = obj.TYPE_REG p.To.Reg = y s.AddrScratch(&p.From) } case ssa.OpPPC64LoweredGetClosurePtr: // Closure pointer is R11 (already) gc.CheckLoweredGetClosurePtr(v) case ssa.OpLoadReg: loadOp := loadByType(v.Type) p := gc.Prog(loadOp) gc.AddrAuto(&p.From, v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpStoreReg: storeOp := storeByType(v.Type) p := gc.Prog(storeOp) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() gc.AddrAuto(&p.To, v) case ssa.OpPPC64DIVD: // For now, // // cmp arg1, -1 // be ahead // v = arg0 / arg1 // b over // ahead: v = - arg0 // over: nop r := v.Reg() r0 := v.Args[0].Reg() r1 := v.Args[1].Reg() p := gc.Prog(ppc64.ACMP) p.From.Type = obj.TYPE_REG p.From.Reg = r1 p.To.Type = obj.TYPE_CONST p.To.Offset = -1 pbahead := gc.Prog(ppc64.ABEQ) pbahead.To.Type = obj.TYPE_BRANCH p = gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r1 p.Reg = r0 p.To.Type = obj.TYPE_REG p.To.Reg = r pbover := gc.Prog(obj.AJMP) pbover.To.Type = obj.TYPE_BRANCH p = gc.Prog(ppc64.ANEG) p.To.Type = obj.TYPE_REG p.To.Reg = r p.From.Type = obj.TYPE_REG p.From.Reg = r0 gc.Patch(pbahead, p) p = gc.Prog(obj.ANOP) gc.Patch(pbover, p) case ssa.OpPPC64DIVW: // word-width version of above r := v.Reg() r0 := v.Args[0].Reg() r1 := v.Args[1].Reg() p := gc.Prog(ppc64.ACMPW) p.From.Type = obj.TYPE_REG p.From.Reg = r1 p.To.Type = obj.TYPE_CONST p.To.Offset = -1 pbahead := gc.Prog(ppc64.ABEQ) pbahead.To.Type = obj.TYPE_BRANCH p = gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r1 p.Reg = r0 p.To.Type = obj.TYPE_REG p.To.Reg = r pbover := gc.Prog(obj.AJMP) pbover.To.Type = obj.TYPE_BRANCH p = gc.Prog(ppc64.ANEG) p.To.Type = obj.TYPE_REG p.To.Reg = r p.From.Type = obj.TYPE_REG p.From.Reg = r0 gc.Patch(pbahead, p) p = gc.Prog(obj.ANOP) gc.Patch(pbover, p) case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS, ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU, ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW, ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU, ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64XOR, ssa.OpPPC64EQV: r := v.Reg() r1 := v.Args[0].Reg() r2 := v.Args[1].Reg() p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpPPC64MaskIfNotCarry: r := v.Reg() p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = ppc64.REGZERO p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpPPC64ADDconstForCarry: r1 := v.Args[0].Reg() p := gc.Prog(v.Op.Asm()) p.Reg = r1 p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect. case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FRSP: r := v.Reg() p := gc.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = r p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() case ssa.OpPPC64ADDconst, ssa.OpPPC64ANDconst, ssa.OpPPC64ORconst, ssa.OpPPC64XORconst, ssa.OpPPC64SRADconst, ssa.OpPPC64SRAWconst, ssa.OpPPC64SRDconst, ssa.OpPPC64SRWconst, ssa.OpPPC64SLDconst, ssa.OpPPC64SLWconst: p := gc.Prog(v.Op.Asm()) p.Reg = v.Args[0].Reg() if v.Aux != nil { p.From.Type = obj.TYPE_CONST p.From.Offset = gc.AuxOffset(v) } else { p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt } p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpPPC64ANDCCconst: p := gc.Prog(v.Op.Asm()) p.Reg = v.Args[0].Reg() if v.Aux != nil { p.From.Type = obj.TYPE_CONST p.From.Offset = gc.AuxOffset(v) } else { p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt } p.To.Type = obj.TYPE_REG p.To.Reg = ppc64.REGTMP // discard result case ssa.OpPPC64MOVDaddr: p := gc.Prog(ppc64.AMOVD) p.From.Type = obj.TYPE_ADDR p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() var wantreg string // Suspect comment, copied from ARM code // MOVD $sym+off(base), R // the assembler expands it as the following: // - base is SP: add constant offset to SP // when constant is large, tmp register (R11) may be used // - base is SB: load external address from constant pool (use relocation) switch v.Aux.(type) { default: v.Fatalf("aux is of unknown type %T", v.Aux) case *ssa.ExternSymbol: wantreg = "SB" gc.AddAux(&p.From, v) case *ssa.ArgSymbol, *ssa.AutoSymbol: wantreg = "SP" gc.AddAux(&p.From, v) case nil: // No sym, just MOVD $off(SP), R wantreg = "SP" p.From.Reg = ppc64.REGSP p.From.Offset = v.AuxInt } if reg := v.Args[0].RegName(); reg != wantreg { v.Fatalf("bad reg %s for symbol type %T, want %s", reg, v.Aux, wantreg) } case ssa.OpPPC64MOVDconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpPPC64FMOVDconst, ssa.OpPPC64FMOVSconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_FCONST p.From.Val = math.Float64frombits(uint64(v.AuxInt)) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpPPC64FCMPU, ssa.OpPPC64CMP, ssa.OpPPC64CMPW, ssa.OpPPC64CMPU, ssa.OpPPC64CMPWU: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Args[1].Reg() case ssa.OpPPC64CMPconst, ssa.OpPPC64CMPUconst, ssa.OpPPC64CMPWconst, ssa.OpPPC64CMPWUconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_CONST p.To.Offset = v.AuxInt case ssa.OpPPC64MOVBreg, ssa.OpPPC64MOVBZreg, ssa.OpPPC64MOVHreg, ssa.OpPPC64MOVHZreg, ssa.OpPPC64MOVWreg, ssa.OpPPC64MOVWZreg: // Shift in register to required size p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Reg = v.Reg() p.To.Type = obj.TYPE_REG case ssa.OpPPC64MOVDload, ssa.OpPPC64MOVWload, ssa.OpPPC64MOVHload, ssa.OpPPC64MOVWZload, ssa.OpPPC64MOVBZload, ssa.OpPPC64MOVHZload: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpPPC64MOVDstorezero, ssa.OpPPC64MOVWstorezero, ssa.OpPPC64MOVHstorezero, ssa.OpPPC64MOVBstorezero: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = ppc64.REGZERO p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() gc.AddAux(&p.To, v) case ssa.OpPPC64MOVDstore, ssa.OpPPC64MOVWstore, ssa.OpPPC64MOVHstore, ssa.OpPPC64MOVBstore: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() gc.AddAux(&p.To, v) case ssa.OpPPC64FMOVDstore, ssa.OpPPC64FMOVSstore: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() gc.AddAux(&p.To, v) case ssa.OpPPC64Equal, ssa.OpPPC64NotEqual, ssa.OpPPC64LessThan, ssa.OpPPC64FLessThan, ssa.OpPPC64LessEqual, ssa.OpPPC64GreaterThan, ssa.OpPPC64FGreaterThan, ssa.OpPPC64GreaterEqual: // On Power7 or later, can use isel instruction: // for a < b, a > b, a = b: // rtmp := 1 // isel rt,rtmp,r0,cond // rt is target in ppc asm // for a >= b, a <= b, a != b: // rtmp := 1 // isel rt,0,rtmp,!cond // rt is target in ppc asm if v.Block.Func.Config.OldArch { p := gc.Prog(ppc64.AMOVD) p.From.Type = obj.TYPE_CONST p.From.Offset = 1 p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() pb := gc.Prog(condOps[v.Op]) pb.To.Type = obj.TYPE_BRANCH p = gc.Prog(ppc64.AMOVD) p.From.Type = obj.TYPE_CONST p.From.Offset = 0 p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() p = gc.Prog(obj.ANOP) gc.Patch(pb, p) break } // Modern PPC uses ISEL p := gc.Prog(ppc64.AMOVD) p.From.Type = obj.TYPE_CONST p.From.Offset = 1 p.To.Type = obj.TYPE_REG p.To.Reg = iselRegs[1] iop := iselOps[v.Op] ssaGenISEL(v, iop.cond, iselRegs[iop.valueIfCond], iselRegs[1-iop.valueIfCond]) case ssa.OpPPC64FLessEqual, // These include a second branch for EQ -- dealing with NaN prevents REL= to !REL conversion ssa.OpPPC64FGreaterEqual: if v.Block.Func.Config.OldArch { p := gc.Prog(ppc64.AMOVW) p.From.Type = obj.TYPE_CONST p.From.Offset = 1 p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() pb0 := gc.Prog(condOps[v.Op]) pb0.To.Type = obj.TYPE_BRANCH pb1 := gc.Prog(ppc64.ABEQ) pb1.To.Type = obj.TYPE_BRANCH p = gc.Prog(ppc64.AMOVW) p.From.Type = obj.TYPE_CONST p.From.Offset = 0 p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() p = gc.Prog(obj.ANOP) gc.Patch(pb0, p) gc.Patch(pb1, p) break } // Modern PPC uses ISEL p := gc.Prog(ppc64.AMOVD) p.From.Type = obj.TYPE_CONST p.From.Offset = 1 p.To.Type = obj.TYPE_REG p.To.Reg = iselRegs[1] iop := iselOps[v.Op] ssaGenISEL(v, iop.cond, iselRegs[iop.valueIfCond], iselRegs[1-iop.valueIfCond]) ssaGenISEL(v, ppc64.C_COND_EQ, iselRegs[1], v.Reg()) case ssa.OpPPC64LoweredZero: // Similar to how this is done on ARM, // except that PPC MOVDU x,off(y) is *(y+off) = x; y=y+off // not store-and-increment. // Therefore R3 should be dest-align // and arg1 should be dest+size-align // HOWEVER, the input dest address cannot be dest-align because // that does not necessarily address valid memory and it's not // known how that might be optimized. Therefore, correct it in // in the expansion: // // ADD -8,R3,R3 // MOVDU R0, 8(R3) // CMP R3, Rarg1 // BL -2(PC) // arg1 is the address of the last element to zero // auxint is alignment var sz int64 var movu obj.As switch { case v.AuxInt%8 == 0: sz = 8 movu = ppc64.AMOVDU case v.AuxInt%4 == 0: sz = 4 movu = ppc64.AMOVWZU // MOVWU instruction not implemented case v.AuxInt%2 == 0: sz = 2 movu = ppc64.AMOVHU default: sz = 1 movu = ppc64.AMOVBU } p := gc.Prog(ppc64.AADD) p.Reg = v.Args[0].Reg() p.From.Type = obj.TYPE_CONST p.From.Offset = -sz p.To.Type = obj.TYPE_REG p.To.Reg = v.Args[0].Reg() p = gc.Prog(movu) p.From.Type = obj.TYPE_REG p.From.Reg = ppc64.REG_R0 p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() p.To.Offset = sz p2 := gc.Prog(ppc64.ACMPU) p2.From.Type = obj.TYPE_REG p2.From.Reg = v.Args[0].Reg() p2.To.Reg = v.Args[1].Reg() p2.To.Type = obj.TYPE_REG p3 := gc.Prog(ppc64.ABLT) p3.To.Type = obj.TYPE_BRANCH gc.Patch(p3, p) case ssa.OpPPC64LoweredMove: // Similar to how this is done on ARM, // except that PPC MOVDU x,off(y) is *(y+off) = x; y=y+off, // not store-and-increment. // Inputs must be valid pointers to memory, // so adjust arg0 and arg1 as part of the expansion. // arg2 should be src+size-align, // // ADD -8,R3,R3 // ADD -8,R4,R4 // MOVDU 8(R4), Rtmp // MOVDU Rtmp, 8(R3) // CMP R4, Rarg2 // BL -3(PC) // arg2 is the address of the last element of src // auxint is alignment var sz int64 var movu obj.As switch { case v.AuxInt%8 == 0: sz = 8 movu = ppc64.AMOVDU case v.AuxInt%4 == 0: sz = 4 movu = ppc64.AMOVWZU // MOVWU instruction not implemented case v.AuxInt%2 == 0: sz = 2 movu = ppc64.AMOVHU default: sz = 1 movu = ppc64.AMOVBU } p := gc.Prog(ppc64.AADD) p.Reg = v.Args[0].Reg() p.From.Type = obj.TYPE_CONST p.From.Offset = -sz p.To.Type = obj.TYPE_REG p.To.Reg = v.Args[0].Reg() p = gc.Prog(ppc64.AADD) p.Reg = v.Args[1].Reg() p.From.Type = obj.TYPE_CONST p.From.Offset = -sz p.To.Type = obj.TYPE_REG p.To.Reg = v.Args[1].Reg() p = gc.Prog(movu) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[1].Reg() p.From.Offset = sz p.To.Type = obj.TYPE_REG p.To.Reg = ppc64.REGTMP p2 := gc.Prog(movu) p2.From.Type = obj.TYPE_REG p2.From.Reg = ppc64.REGTMP p2.To.Type = obj.TYPE_MEM p2.To.Reg = v.Args[0].Reg() p2.To.Offset = sz p3 := gc.Prog(ppc64.ACMPU) p3.From.Reg = v.Args[1].Reg() p3.From.Type = obj.TYPE_REG p3.To.Reg = v.Args[2].Reg() p3.To.Type = obj.TYPE_REG p4 := gc.Prog(ppc64.ABLT) p4.To.Type = obj.TYPE_BRANCH gc.Patch(p4, p) case ssa.OpPPC64CALLstatic: if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym { // Deferred calls will appear to be returning to // the CALL deferreturn(SB) that we are about to emit. // However, the stack trace code will show the line // of the instruction byte before the return PC. // To avoid that being an unrelated instruction, // insert two actual hardware NOPs that will have the right line number. // This is different from obj.ANOP, which is a virtual no-op // that doesn't make it into the instruction stream. // PPC64 is unusual because TWO nops are required // (see gc/cgen.go, gc/plive.go -- copy of comment below) // // On ppc64, when compiling Go into position // independent code on ppc64le we insert an // instruction to reload the TOC pointer from the // stack as well. See the long comment near // jmpdefer in runtime/asm_ppc64.s for why. // If the MOVD is not needed, insert a hardware NOP // so that the same number of instructions are used // on ppc64 in both shared and non-shared modes. ginsnop() if gc.Ctxt.Flag_shared { p := gc.Prog(ppc64.AMOVD) p.From.Type = obj.TYPE_MEM p.From.Offset = 24 p.From.Reg = ppc64.REGSP p.To.Type = obj.TYPE_REG p.To.Reg = ppc64.REG_R2 } else { ginsnop() } } p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym)) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpPPC64CALLclosure, ssa.OpPPC64CALLinter: p := gc.Prog(ppc64.AMOVD) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = ppc64.REG_CTR if gc.Ctxt.Flag_shared && p.From.Reg != ppc64.REG_R12 { // Make sure function pointer is in R12 as well when // compiling Go into PIC. // TODO(mwhudson): it would obviously be better to // change the register allocation to put the value in // R12 already, but I don't know how to do that. // TODO: We have the technology now to implement TODO above. q := gc.Prog(ppc64.AMOVD) q.From = p.From q.To.Type = obj.TYPE_REG q.To.Reg = ppc64.REG_R12 } pp := gc.Prog(obj.ACALL) pp.To.Type = obj.TYPE_REG pp.To.Reg = ppc64.REG_CTR if gc.Ctxt.Flag_shared { // When compiling Go into PIC, the function we just // called via pointer might have been implemented in // a separate module and so overwritten the TOC // pointer in R2; reload it. q := gc.Prog(ppc64.AMOVD) q.From.Type = obj.TYPE_MEM q.From.Offset = 24 q.From.Reg = ppc64.REGSP q.To.Type = obj.TYPE_REG q.To.Reg = ppc64.REG_R2 } if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpPPC64CALLdefer: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Deferproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpPPC64CALLgo: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Newproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpVarDef: gc.Gvardef(v.Aux.(*gc.Node)) case ssa.OpVarKill: gc.Gvarkill(v.Aux.(*gc.Node)) case ssa.OpVarLive: gc.Gvarlive(v.Aux.(*gc.Node)) case ssa.OpKeepAlive: gc.KeepAlive(v) case ssa.OpPhi: gc.CheckLoweredPhi(v) case ssa.OpPPC64LoweredNilCheck: // Issue a load which will fault if arg is nil. p := gc.Prog(ppc64.AMOVBZ) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = ppc64.REGTMP if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers gc.Warnl(v.Line, "generated nil check") } case ssa.OpPPC64InvertFlags: v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) case ssa.OpPPC64FlagEQ, ssa.OpPPC64FlagLT, ssa.OpPPC64FlagGT: v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) default: v.Fatalf("genValue not implemented: %s", v.LongString()) } }