func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { s.SetLineno(v.Line) switch v.Op { case ssa.OpInitMem: // memory arg needs no code case ssa.OpArg: // input args need no code case ssa.OpSP, ssa.OpSB: // nothing to do case ssa.OpCopy: case ssa.OpLoadReg: // TODO: by type p := gc.Prog(arm.AMOVW) n, off := gc.AutoVar(v.Args[0]) p.From.Type = obj.TYPE_MEM p.From.Node = n p.From.Sym = gc.Linksym(n.Sym) p.From.Offset = off if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { p.From.Name = obj.NAME_PARAM p.From.Offset += n.Xoffset } else { p.From.Name = obj.NAME_AUTO } p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpStoreReg: // TODO: by type p := gc.Prog(arm.AMOVW) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) n, off := gc.AutoVar(v) p.To.Type = obj.TYPE_MEM p.To.Node = n p.To.Sym = gc.Linksym(n.Sym) p.To.Offset = off if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { p.To.Name = obj.NAME_PARAM p.To.Offset += n.Xoffset } else { p.To.Name = obj.NAME_AUTO } case ssa.OpARMADD: r := gc.SSARegNum(v) r1 := gc.SSARegNum(v.Args[0]) r2 := gc.SSARegNum(v.Args[1]) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r1 p.Reg = r2 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpARMADDconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt if v.Aux != nil { panic("can't handle symbolic constant yet") } p.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARMMOVWconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt2Int64() p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARMCMP: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.Reg = gc.SSARegNum(v.Args[1]) case ssa.OpARMMOVWload: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARMMOVWstore: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.To, v) case ssa.OpARMCALLstatic: // TODO: deferreturn p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym)) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpVarDef: gc.Gvardef(v.Aux.(*gc.Node)) case ssa.OpVarKill: gc.Gvarkill(v.Aux.(*gc.Node)) case ssa.OpVarLive: gc.Gvarlive(v.Aux.(*gc.Node)) case ssa.OpARMLessThan: v.Fatalf("pseudo-op made it to output: %s", v.LongString()) default: v.Unimplementedf("genValue not implemented: %s", v.LongString()) } }
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { s.SetLineno(v.Line) switch v.Op { case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL, ssa.OpAMD64ADDW: r := gc.SSARegNum(v) r1 := gc.SSARegNum(v.Args[0]) r2 := gc.SSARegNum(v.Args[1]) switch { case r == r1: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.To.Type = obj.TYPE_REG p.To.Reg = r case r == r2: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r default: var asm obj.As switch v.Op { case ssa.OpAMD64ADDQ: asm = x86.ALEAQ case ssa.OpAMD64ADDL: asm = x86.ALEAL case ssa.OpAMD64ADDW: asm = x86.ALEAL } p := gc.Prog(asm) p.From.Type = obj.TYPE_MEM p.From.Reg = r1 p.From.Scale = 1 p.From.Index = r2 p.To.Type = obj.TYPE_REG p.To.Reg = r } // 2-address opcode arithmetic, symmetric case ssa.OpAMD64ADDB, ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL, ssa.OpAMD64ANDW, ssa.OpAMD64ANDB, ssa.OpAMD64ORQ, ssa.OpAMD64ORL, ssa.OpAMD64ORW, ssa.OpAMD64ORB, ssa.OpAMD64XORQ, ssa.OpAMD64XORL, ssa.OpAMD64XORW, ssa.OpAMD64XORB, ssa.OpAMD64MULQ, ssa.OpAMD64MULL, ssa.OpAMD64MULW, ssa.OpAMD64MULB, ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64PXOR: r := gc.SSARegNum(v) x := gc.SSARegNum(v.Args[0]) y := gc.SSARegNum(v.Args[1]) if x != r && y != r { opregreg(moveByType(v.Type), r, x) x = r } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG p.To.Reg = r if x == r { p.From.Reg = y } else { p.From.Reg = x } // 2-address opcode arithmetic, not symmetric case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL, ssa.OpAMD64SUBW, ssa.OpAMD64SUBB: r := gc.SSARegNum(v) x := gc.SSARegNum(v.Args[0]) y := gc.SSARegNum(v.Args[1]) var neg bool if y == r { // compute -(y-x) instead x, y = y, x neg = true } if x != r { opregreg(moveByType(v.Type), r, x) } opregreg(v.Op.Asm(), r, y) if neg { if v.Op == ssa.OpAMD64SUBQ { p := gc.Prog(x86.ANEGQ) p.To.Type = obj.TYPE_REG p.To.Reg = r } else { // Avoids partial registers write p := gc.Prog(x86.ANEGL) p.To.Type = obj.TYPE_REG p.To.Reg = r } } case ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD: r := gc.SSARegNum(v) x := gc.SSARegNum(v.Args[0]) y := gc.SSARegNum(v.Args[1]) if y == r && x != r { // r/y := x op r/y, need to preserve x and rewrite to // r/y := r/y op x15 x15 := int16(x86.REG_X15) // register move y to x15 // register move x to y // rename y with x15 opregreg(moveByType(v.Type), x15, y) opregreg(moveByType(v.Type), r, x) y = x15 } else if x != r { opregreg(moveByType(v.Type), r, x) } opregreg(v.Op.Asm(), r, y) case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW, ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU, ssa.OpAMD64MODQ, ssa.OpAMD64MODL, ssa.OpAMD64MODW, ssa.OpAMD64MODQU, ssa.OpAMD64MODLU, ssa.OpAMD64MODWU: // Arg[0] is already in AX as it's the only register we allow // and AX is the only output x := gc.SSARegNum(v.Args[1]) // CPU faults upon signed overflow, which occurs when most // negative int is divided by -1. var j *obj.Prog if v.Op == ssa.OpAMD64DIVQ || v.Op == ssa.OpAMD64DIVL || v.Op == ssa.OpAMD64DIVW || v.Op == ssa.OpAMD64MODQ || v.Op == ssa.OpAMD64MODL || v.Op == ssa.OpAMD64MODW { var c *obj.Prog switch v.Op { case ssa.OpAMD64DIVQ, ssa.OpAMD64MODQ: c = gc.Prog(x86.ACMPQ) j = gc.Prog(x86.AJEQ) // go ahead and sign extend to save doing it later gc.Prog(x86.ACQO) case ssa.OpAMD64DIVL, ssa.OpAMD64MODL: c = gc.Prog(x86.ACMPL) j = gc.Prog(x86.AJEQ) gc.Prog(x86.ACDQ) case ssa.OpAMD64DIVW, ssa.OpAMD64MODW: c = gc.Prog(x86.ACMPW) j = gc.Prog(x86.AJEQ) gc.Prog(x86.ACWD) } c.From.Type = obj.TYPE_REG c.From.Reg = x c.To.Type = obj.TYPE_CONST c.To.Offset = -1 j.To.Type = obj.TYPE_BRANCH } // for unsigned ints, we sign extend by setting DX = 0 // signed ints were sign extended above if v.Op == ssa.OpAMD64DIVQU || v.Op == ssa.OpAMD64MODQU || v.Op == ssa.OpAMD64DIVLU || v.Op == ssa.OpAMD64MODLU || v.Op == ssa.OpAMD64DIVWU || v.Op == ssa.OpAMD64MODWU { c := gc.Prog(x86.AXORQ) c.From.Type = obj.TYPE_REG c.From.Reg = x86.REG_DX c.To.Type = obj.TYPE_REG c.To.Reg = x86.REG_DX } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = x // signed division, rest of the check for -1 case if j != nil { j2 := gc.Prog(obj.AJMP) j2.To.Type = obj.TYPE_BRANCH var n *obj.Prog if v.Op == ssa.OpAMD64DIVQ || v.Op == ssa.OpAMD64DIVL || v.Op == ssa.OpAMD64DIVW { // n * -1 = -n n = gc.Prog(x86.ANEGQ) n.To.Type = obj.TYPE_REG n.To.Reg = x86.REG_AX } else { // n % -1 == 0 n = gc.Prog(x86.AXORQ) n.From.Type = obj.TYPE_REG n.From.Reg = x86.REG_DX n.To.Type = obj.TYPE_REG n.To.Reg = x86.REG_DX } j.To.Val = n j2.To.Val = s.Pc() } case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULW, ssa.OpAMD64HMULB, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU, ssa.OpAMD64HMULWU, ssa.OpAMD64HMULBU: // the frontend rewrites constant division by 8/16/32 bit integers into // HMUL by a constant // SSA rewrites generate the 64 bit versions // Arg[0] is already in AX as it's the only register we allow // and DX is the only output we care about (the high bits) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) // IMULB puts the high portion in AH instead of DL, // so move it to DL for consistency if v.Type.Size() == 1 { m := gc.Prog(x86.AMOVB) m.From.Type = obj.TYPE_REG m.From.Reg = x86.REG_AH m.To.Type = obj.TYPE_REG m.To.Reg = x86.REG_DX } case ssa.OpAMD64AVGQU: // compute (x+y)/2 unsigned. // Do a 64-bit add, the overflow goes into the carry. // Shift right once and pull the carry back into the 63rd bit. r := gc.SSARegNum(v) x := gc.SSARegNum(v.Args[0]) y := gc.SSARegNum(v.Args[1]) if x != r && y != r { opregreg(moveByType(v.Type), r, x) x = r } p := gc.Prog(x86.AADDQ) p.From.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG p.To.Reg = r if x == r { p.From.Reg = y } else { p.From.Reg = x } p = gc.Prog(x86.ARCRQ) p.From.Type = obj.TYPE_CONST p.From.Offset = 1 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, ssa.OpAMD64SHLW, ssa.OpAMD64SHLB, ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB, ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB: x := gc.SSARegNum(v.Args[0]) r := gc.SSARegNum(v) if x != r { if r == x86.REG_CX { v.Fatalf("can't implement %s, target and shift both in CX", v.LongString()) } p := gc.Prog(moveByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = x p.To.Type = obj.TYPE_REG p.To.Reg = r } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) // should be CX p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst, ssa.OpAMD64ADDWconst: r := gc.SSARegNum(v) a := gc.SSARegNum(v.Args[0]) if r == a { if v.AuxInt2Int64() == 1 { var asm obj.As switch v.Op { // Software optimization manual recommends add $1,reg. // But inc/dec is 1 byte smaller. ICC always uses inc // Clang/GCC choose depending on flags, but prefer add. // Experiments show that inc/dec is both a little faster // and make a binary a little smaller. case ssa.OpAMD64ADDQconst: asm = x86.AINCQ case ssa.OpAMD64ADDLconst: asm = x86.AINCL case ssa.OpAMD64ADDWconst: asm = x86.AINCL } p := gc.Prog(asm) p.To.Type = obj.TYPE_REG p.To.Reg = r return } else if v.AuxInt2Int64() == -1 { var asm obj.As switch v.Op { case ssa.OpAMD64ADDQconst: asm = x86.ADECQ case ssa.OpAMD64ADDLconst: asm = x86.ADECL case ssa.OpAMD64ADDWconst: asm = x86.ADECL } p := gc.Prog(asm) p.To.Type = obj.TYPE_REG p.To.Reg = r return } else { p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt2Int64() p.To.Type = obj.TYPE_REG p.To.Reg = r return } } var asm obj.As switch v.Op { case ssa.OpAMD64ADDQconst: asm = x86.ALEAQ case ssa.OpAMD64ADDLconst: asm = x86.ALEAL case ssa.OpAMD64ADDWconst: asm = x86.ALEAL } p := gc.Prog(asm) p.From.Type = obj.TYPE_MEM p.From.Reg = a p.From.Offset = v.AuxInt2Int64() p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64CMOVQEQconst, ssa.OpAMD64CMOVLEQconst, ssa.OpAMD64CMOVWEQconst, ssa.OpAMD64CMOVQNEconst, ssa.OpAMD64CMOVLNEconst, ssa.OpAMD64CMOVWNEconst: r := gc.SSARegNum(v) x := gc.SSARegNum(v.Args[0]) // Arg0 is in/out, move in to out if not already same if r != x { p := gc.Prog(moveByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = x p.To.Type = obj.TYPE_REG p.To.Reg = r } // Constant into AX, after arg0 movement in case arg0 is in AX p := gc.Prog(moveByType(v.Type)) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt2Int64() p.To.Type = obj.TYPE_REG p.To.Reg = x86.REG_AX p = gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_AX p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst, ssa.OpAMD64MULWconst, ssa.OpAMD64MULBconst: r := gc.SSARegNum(v) x := gc.SSARegNum(v.Args[0]) if r != x { p := gc.Prog(moveByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = x p.To.Type = obj.TYPE_REG p.To.Reg = r } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt2Int64() p.To.Type = obj.TYPE_REG p.To.Reg = r // TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2 // instead of using the MOVQ above. //p.From3 = new(obj.Addr) //p.From3.Type = obj.TYPE_REG //p.From3.Reg = gc.SSARegNum(v.Args[0]) case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst, ssa.OpAMD64SUBWconst: x := gc.SSARegNum(v.Args[0]) r := gc.SSARegNum(v) // We have 3-op add (lea), so transforming a = b - const into // a = b + (- const), saves us 1 instruction. We can't fit // - (-1 << 31) into 4 bytes offset in lea. // We handle 2-address just fine below. if v.AuxInt2Int64() == -1<<31 || x == r { if x != r { // This code compensates for the fact that the register allocator // doesn't understand 2-address instructions yet. TODO: fix that. p := gc.Prog(moveByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = x p.To.Type = obj.TYPE_REG p.To.Reg = r } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt2Int64() p.To.Type = obj.TYPE_REG p.To.Reg = r } else if x == r && v.AuxInt2Int64() == -1 { var asm obj.As // x = x - (-1) is the same as x++ // See OpAMD64ADDQconst comments about inc vs add $1,reg switch v.Op { case ssa.OpAMD64SUBQconst: asm = x86.AINCQ case ssa.OpAMD64SUBLconst: asm = x86.AINCL case ssa.OpAMD64SUBWconst: asm = x86.AINCL } p := gc.Prog(asm) p.To.Type = obj.TYPE_REG p.To.Reg = r } else if x == r && v.AuxInt2Int64() == 1 { var asm obj.As switch v.Op { case ssa.OpAMD64SUBQconst: asm = x86.ADECQ case ssa.OpAMD64SUBLconst: asm = x86.ADECL case ssa.OpAMD64SUBWconst: asm = x86.ADECL } p := gc.Prog(asm) p.To.Type = obj.TYPE_REG p.To.Reg = r } else { var asm obj.As switch v.Op { case ssa.OpAMD64SUBQconst: asm = x86.ALEAQ case ssa.OpAMD64SUBLconst: asm = x86.ALEAL case ssa.OpAMD64SUBWconst: asm = x86.ALEAL } p := gc.Prog(asm) p.From.Type = obj.TYPE_MEM p.From.Reg = x p.From.Offset = -v.AuxInt2Int64() p.To.Type = obj.TYPE_REG p.To.Reg = r } case ssa.OpAMD64ADDBconst, ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst, ssa.OpAMD64ANDWconst, ssa.OpAMD64ANDBconst, ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst, ssa.OpAMD64ORWconst, ssa.OpAMD64ORBconst, ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst, ssa.OpAMD64XORWconst, ssa.OpAMD64XORBconst, ssa.OpAMD64SUBBconst, ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst, ssa.OpAMD64SHLWconst, ssa.OpAMD64SHLBconst, ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst, ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst, ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst: // This code compensates for the fact that the register allocator // doesn't understand 2-address instructions yet. TODO: fix that. x := gc.SSARegNum(v.Args[0]) r := gc.SSARegNum(v) if x != r { p := gc.Prog(moveByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = x p.To.Type = obj.TYPE_REG p.To.Reg = r } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt2Int64() p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask: r := gc.SSARegNum(v) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8: p := gc.Prog(x86.ALEAQ) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) switch v.Op { case ssa.OpAMD64LEAQ1: p.From.Scale = 1 case ssa.OpAMD64LEAQ2: p.From.Scale = 2 case ssa.OpAMD64LEAQ4: p.From.Scale = 4 case ssa.OpAMD64LEAQ8: p.From.Scale = 8 } p.From.Index = gc.SSARegNum(v.Args[1]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64LEAQ: p := gc.Prog(x86.ALEAQ) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB, ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB: opregreg(v.Op.Asm(), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[0])) case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD: // Go assembler has swapped operands for UCOMISx relative to CMP, // must account for that right here. opregreg(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1])) case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_CONST p.To.Offset = v.AuxInt2Int64() case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt2Int64() p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v.Args[0]) case ssa.OpAMD64MOVBconst, ssa.OpAMD64MOVWconst, ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst: x := gc.SSARegNum(v) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt2Int64() p.To.Type = obj.TYPE_REG p.To.Reg = x // If flags are live at this instruction, suppress the // MOV $0,AX -> XOR AX,AX optimization. if v.Aux != nil { p.Mark |= x86.PRESERVEFLAGS } case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst: x := gc.SSARegNum(v) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_FCONST p.From.Val = math.Float64frombits(uint64(v.AuxInt)) p.To.Type = obj.TYPE_REG p.To.Reg = x case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVQloadidx1: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.From.Scale = 1 p.From.Index = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.From.Scale = 8 p.From.Index = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVLloadidx1: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.From.Scale = 1 p.From.Index = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.From.Scale = 4 p.From.Index = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVWloadidx1: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.From.Scale = 1 p.From.Index = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVWloadidx2: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.From.Scale = 2 p.From.Index = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVBloadidx1: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.From.Scale = 1 p.From.Index = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.To, v) case ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Scale = 8 p.To.Index = gc.SSARegNum(v.Args[1]) gc.AddAux(&p.To, v) case ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Scale = 4 p.To.Index = gc.SSARegNum(v.Args[1]) gc.AddAux(&p.To, v) case ssa.OpAMD64MOVWstoreidx2: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Scale = 2 p.To.Index = gc.SSARegNum(v.Args[1]) gc.AddAux(&p.To, v) case ssa.OpAMD64MOVBstoreidx1: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Scale = 1 p.To.Index = gc.SSARegNum(v.Args[1]) gc.AddAux(&p.To, v) case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST sc := v.AuxValAndOff() i := sc.Val() switch v.Op { case ssa.OpAMD64MOVBstoreconst: i = int64(int8(i)) case ssa.OpAMD64MOVWstoreconst: i = int64(int16(i)) case ssa.OpAMD64MOVLstoreconst: i = int64(int32(i)) case ssa.OpAMD64MOVQstoreconst: } p.From.Offset = i p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux2(&p.To, v, sc.Off()) case ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST sc := v.AuxValAndOff() switch v.Op { case ssa.OpAMD64MOVBstoreconstidx1: p.From.Offset = int64(int8(sc.Val())) p.To.Scale = 1 case ssa.OpAMD64MOVWstoreconstidx2: p.From.Offset = int64(int16(sc.Val())) p.To.Scale = 2 case ssa.OpAMD64MOVLstoreconstidx4: p.From.Offset = int64(int32(sc.Val())) p.To.Scale = 4 case ssa.OpAMD64MOVQstoreconstidx8: p.From.Offset = sc.Val() p.To.Scale = 8 } p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Index = gc.SSARegNum(v.Args[1]) gc.AddAux2(&p.To, v, sc.Off()) case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX, ssa.OpAMD64CVTSL2SS, ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ, ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS: opregreg(v.Op.Asm(), gc.SSARegNum(v), gc.SSARegNum(v.Args[0])) case ssa.OpAMD64DUFFZERO: p := gc.Prog(obj.ADUFFZERO) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) p.To.Offset = v.AuxInt case ssa.OpAMD64MOVOconst: if v.AuxInt != 0 { v.Unimplementedf("MOVOconst can only do constant=0") } r := gc.SSARegNum(v) opregreg(x86.AXORPS, r, r) case ssa.OpAMD64DUFFCOPY: p := gc.Prog(obj.ADUFFCOPY) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) p.To.Offset = v.AuxInt case ssa.OpCopy, ssa.OpAMD64MOVQconvert: // TODO: use MOVQreg for reg->reg copies instead of OpCopy? if v.Type.IsMemory() { return } x := gc.SSARegNum(v.Args[0]) y := gc.SSARegNum(v) if x != y { opregreg(moveByType(v.Type), y, x) } case ssa.OpLoadReg: if v.Type.IsFlags() { v.Unimplementedf("load flags not implemented: %v", v.LongString()) return } p := gc.Prog(loadByType(v.Type)) n, off := gc.AutoVar(v.Args[0]) p.From.Type = obj.TYPE_MEM p.From.Node = n p.From.Sym = gc.Linksym(n.Sym) p.From.Offset = off if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { p.From.Name = obj.NAME_PARAM p.From.Offset += n.Xoffset } else { p.From.Name = obj.NAME_AUTO } p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpStoreReg: if v.Type.IsFlags() { v.Unimplementedf("store flags not implemented: %v", v.LongString()) return } p := gc.Prog(storeByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) n, off := gc.AutoVar(v) p.To.Type = obj.TYPE_MEM p.To.Node = n p.To.Sym = gc.Linksym(n.Sym) p.To.Offset = off if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { p.To.Name = obj.NAME_PARAM p.To.Offset += n.Xoffset } else { p.To.Name = obj.NAME_AUTO } case ssa.OpPhi: // just check to make sure regalloc and stackalloc did it right if v.Type.IsMemory() { return } f := v.Block.Func loc := f.RegAlloc[v.ID] for _, a := range v.Args { if aloc := f.RegAlloc[a.ID]; aloc != loc { // TODO: .Equal() instead? v.Fatalf("phi arg at different location than phi: %v @ %v, but arg %v @ %v\n%s\n", v, loc, a, aloc, v.Block.Func) } } case ssa.OpInitMem: // memory arg needs no code case ssa.OpArg: // input args need no code case ssa.OpAMD64LoweredGetClosurePtr: // Output is hardwired to DX only, // and DX contains the closure pointer on // closure entry, and this "instruction" // is scheduled to the very beginning // of the entry block. case ssa.OpAMD64LoweredGetG: r := gc.SSARegNum(v) // See the comments in cmd/internal/obj/x86/obj6.go // near CanUse1InsnTLS for a detailed explanation of these instructions. if x86.CanUse1InsnTLS(gc.Ctxt) { // MOVQ (TLS), r p := gc.Prog(x86.AMOVQ) p.From.Type = obj.TYPE_MEM p.From.Reg = x86.REG_TLS p.To.Type = obj.TYPE_REG p.To.Reg = r } else { // MOVQ TLS, r // MOVQ (r)(TLS*1), r p := gc.Prog(x86.AMOVQ) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_TLS p.To.Type = obj.TYPE_REG p.To.Reg = r q := gc.Prog(x86.AMOVQ) q.From.Type = obj.TYPE_MEM q.From.Reg = r q.From.Index = x86.REG_TLS q.From.Scale = 1 q.To.Type = obj.TYPE_REG q.To.Reg = r } case ssa.OpAMD64CALLstatic: if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym { // Deferred calls will appear to be returning to // the CALL deferreturn(SB) that we are about to emit. // However, the stack trace code will show the line // of the instruction byte before the return PC. // To avoid that being an unrelated instruction, // insert an actual hardware NOP that will have the right line number. // This is different from obj.ANOP, which is a virtual no-op // that doesn't make it into the instruction stream. ginsnop() } p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym)) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpAMD64CALLclosure: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v.Args[0]) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpAMD64CALLdefer: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Deferproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpAMD64CALLgo: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Newproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpAMD64CALLinter: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v.Args[0]) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, ssa.OpAMD64NEGW, ssa.OpAMD64NEGB, ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL, ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL, ssa.OpAMD64NOTW, ssa.OpAMD64NOTB: x := gc.SSARegNum(v.Args[0]) r := gc.SSARegNum(v) if x != r { p := gc.Prog(moveByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = x p.To.Type = obj.TYPE_REG p.To.Reg = r } p := gc.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSFW, ssa.OpAMD64BSRQ, ssa.OpAMD64BSRL, ssa.OpAMD64BSRW, ssa.OpAMD64SQRTSD: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpSP, ssa.OpSB: // nothing to do case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE, ssa.OpAMD64SETL, ssa.OpAMD64SETLE, ssa.OpAMD64SETG, ssa.OpAMD64SETGE, ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF, ssa.OpAMD64SETB, ssa.OpAMD64SETBE, ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN, ssa.OpAMD64SETA, ssa.OpAMD64SETAE: p := gc.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64SETNEF: p := gc.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) q := gc.Prog(x86.ASETPS) q.To.Type = obj.TYPE_REG q.To.Reg = x86.REG_AX // ORL avoids partial register write and is smaller than ORQ, used by old compiler opregreg(x86.AORL, gc.SSARegNum(v), x86.REG_AX) case ssa.OpAMD64SETEQF: p := gc.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) q := gc.Prog(x86.ASETPC) q.To.Type = obj.TYPE_REG q.To.Reg = x86.REG_AX // ANDL avoids partial register write and is smaller than ANDQ, used by old compiler opregreg(x86.AANDL, gc.SSARegNum(v), x86.REG_AX) case ssa.OpAMD64InvertFlags: v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT: v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) case ssa.OpAMD64REPSTOSQ: gc.Prog(x86.AREP) gc.Prog(x86.ASTOSQ) case ssa.OpAMD64REPMOVSQ: gc.Prog(x86.AREP) gc.Prog(x86.AMOVSQ) case ssa.OpVarDef: gc.Gvardef(v.Aux.(*gc.Node)) case ssa.OpVarKill: gc.Gvarkill(v.Aux.(*gc.Node)) case ssa.OpVarLive: gc.Gvarlive(v.Aux.(*gc.Node)) case ssa.OpAMD64LoweredNilCheck: // Optimization - if the subsequent block has a load or store // at the same address, we don't need to issue this instruction. mem := v.Args[1] for _, w := range v.Block.Succs[0].Values { if w.Op == ssa.OpPhi { if w.Type.IsMemory() { mem = w } continue } if len(w.Args) == 0 || !w.Args[len(w.Args)-1].Type.IsMemory() { // w doesn't use a store - can't be a memory op. continue } if w.Args[len(w.Args)-1] != mem { v.Fatalf("wrong store after nilcheck v=%s w=%s", v, w) } switch w.Op { case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVOload, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVOstore: if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage { if gc.Debug_checknil != 0 && int(v.Line) > 1 { gc.Warnl(v.Line, "removed nil check") } return } case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: off := ssa.ValAndOff(v.AuxInt).Off() if w.Args[0] == v.Args[0] && w.Aux == nil && off >= 0 && off < minZeroPage { if gc.Debug_checknil != 0 && int(v.Line) > 1 { gc.Warnl(v.Line, "removed nil check") } return } } if w.Type.IsMemory() { if w.Op == ssa.OpVarDef || w.Op == ssa.OpVarKill || w.Op == ssa.OpVarLive { // these ops are OK mem = w continue } // We can't delay the nil check past the next store. break } } // Issue a load which will fault if the input is nil. // TODO: We currently use the 2-byte instruction TESTB AX, (reg). // Should we use the 3-byte TESTB $0, (reg) instead? It is larger // but it doesn't have false dependency on AX. // Or maybe allocate an output register and use MOVL (reg),reg2 ? // That trades clobbering flags for clobbering a register. p := gc.Prog(x86.ATESTB) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_AX p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.To, v) if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers gc.Warnl(v.Line, "generated nil check") } default: v.Unimplementedf("genValue not implemented: %s", v.LongString()) } }