func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { s.SetLineno(v.Line) switch v.Op { case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL: r := gc.SSARegNum(v) r1 := gc.SSARegNum(v.Args[0]) r2 := gc.SSARegNum(v.Args[1]) switch { case r == r1: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.To.Type = obj.TYPE_REG p.To.Reg = r case r == r2: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r default: var asm obj.As if v.Op == ssa.OpAMD64ADDQ { asm = x86.ALEAQ } else { asm = x86.ALEAL } p := gc.Prog(asm) p.From.Type = obj.TYPE_MEM p.From.Reg = r1 p.From.Scale = 1 p.From.Index = r2 p.To.Type = obj.TYPE_REG p.To.Reg = r } // 2-address opcode arithmetic case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL, ssa.OpAMD64MULQ, ssa.OpAMD64MULL, ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL, ssa.OpAMD64ORQ, ssa.OpAMD64ORL, ssa.OpAMD64XORQ, ssa.OpAMD64XORL, ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB, ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB, ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD, ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD, ssa.OpAMD64PXOR: r := gc.SSARegNum(v) if r != gc.SSARegNum(v.Args[0]) { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } opregreg(v.Op.Asm(), r, gc.SSARegNum(v.Args[1])) case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU: // Arg[0] (the dividend) is in AX. // Arg[1] (the divisor) can be in any other register. // Result[0] (the quotient) is in AX. // Result[1] (the remainder) is in DX. r := gc.SSARegNum(v.Args[1]) // Zero extend dividend. c := gc.Prog(x86.AXORL) c.From.Type = obj.TYPE_REG c.From.Reg = x86.REG_DX c.To.Type = obj.TYPE_REG c.To.Reg = x86.REG_DX // Issue divide. p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW: // Arg[0] (the dividend) is in AX. // Arg[1] (the divisor) can be in any other register. // Result[0] (the quotient) is in AX. // Result[1] (the remainder) is in DX. r := gc.SSARegNum(v.Args[1]) // CPU faults upon signed overflow, which occurs when the most // negative int is divided by -1. Handle divide by -1 as a special case. var c *obj.Prog switch v.Op { case ssa.OpAMD64DIVQ: c = gc.Prog(x86.ACMPQ) case ssa.OpAMD64DIVL: c = gc.Prog(x86.ACMPL) case ssa.OpAMD64DIVW: c = gc.Prog(x86.ACMPW) } c.From.Type = obj.TYPE_REG c.From.Reg = r c.To.Type = obj.TYPE_CONST c.To.Offset = -1 j1 := gc.Prog(x86.AJEQ) j1.To.Type = obj.TYPE_BRANCH // Sign extend dividend. switch v.Op { case ssa.OpAMD64DIVQ: gc.Prog(x86.ACQO) case ssa.OpAMD64DIVL: gc.Prog(x86.ACDQ) case ssa.OpAMD64DIVW: gc.Prog(x86.ACWD) } // Issue divide. p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r // Skip over -1 fixup code. j2 := gc.Prog(obj.AJMP) j2.To.Type = obj.TYPE_BRANCH // Issue -1 fixup code. // n / -1 = -n n1 := gc.Prog(x86.ANEGQ) n1.To.Type = obj.TYPE_REG n1.To.Reg = x86.REG_AX // n % -1 == 0 n2 := gc.Prog(x86.AXORL) n2.From.Type = obj.TYPE_REG n2.From.Reg = x86.REG_DX n2.To.Type = obj.TYPE_REG n2.To.Reg = x86.REG_DX // TODO(khr): issue only the -1 fixup code we need. // For instance, if only the quotient is used, no point in zeroing the remainder. j1.To.Val = n1 j2.To.Val = s.Pc() case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULW, ssa.OpAMD64HMULB, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU, ssa.OpAMD64HMULWU, ssa.OpAMD64HMULBU: // the frontend rewrites constant division by 8/16/32 bit integers into // HMUL by a constant // SSA rewrites generate the 64 bit versions // Arg[0] is already in AX as it's the only register we allow // and DX is the only output we care about (the high bits) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) // IMULB puts the high portion in AH instead of DL, // so move it to DL for consistency if v.Type.Size() == 1 { m := gc.Prog(x86.AMOVB) m.From.Type = obj.TYPE_REG m.From.Reg = x86.REG_AH m.To.Type = obj.TYPE_REG m.To.Reg = x86.REG_DX } case ssa.OpAMD64AVGQU: // compute (x+y)/2 unsigned. // Do a 64-bit add, the overflow goes into the carry. // Shift right once and pull the carry back into the 63rd bit. r := gc.SSARegNum(v) if r != gc.SSARegNum(v.Args[0]) { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } p := gc.Prog(x86.AADDQ) p.From.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG p.To.Reg = r p.From.Reg = gc.SSARegNum(v.Args[1]) p = gc.Prog(x86.ARCRQ) p.From.Type = obj.TYPE_CONST p.From.Offset = 1 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst: r := gc.SSARegNum(v) a := gc.SSARegNum(v.Args[0]) if r == a { if v.AuxInt == 1 { var asm obj.As // Software optimization manual recommends add $1,reg. // But inc/dec is 1 byte smaller. ICC always uses inc // Clang/GCC choose depending on flags, but prefer add. // Experiments show that inc/dec is both a little faster // and make a binary a little smaller. if v.Op == ssa.OpAMD64ADDQconst { asm = x86.AINCQ } else { asm = x86.AINCL } p := gc.Prog(asm) p.To.Type = obj.TYPE_REG p.To.Reg = r return } if v.AuxInt == -1 { var asm obj.As if v.Op == ssa.OpAMD64ADDQconst { asm = x86.ADECQ } else { asm = x86.ADECL } p := gc.Prog(asm) p.To.Type = obj.TYPE_REG p.To.Reg = r return } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = r return } var asm obj.As if v.Op == ssa.OpAMD64ADDQconst { asm = x86.ALEAQ } else { asm = x86.ALEAL } p := gc.Prog(asm) p.From.Type = obj.TYPE_MEM p.From.Reg = a p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ: r := gc.SSARegNum(v) if r != gc.SSARegNum(v.Args[0]) { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst: r := gc.SSARegNum(v) if r != gc.SSARegNum(v.Args[0]) { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = r // TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2 // then we don't need to use resultInArg0 for these ops. //p.From3 = new(obj.Addr) //p.From3.Type = obj.TYPE_REG //p.From3.Reg = gc.SSARegNum(v.Args[0]) case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst, ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst, ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst, ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst, ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst, ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst, ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst, ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst: r := gc.SSARegNum(v) if r != gc.SSARegNum(v.Args[0]) { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask: r := gc.SSARegNum(v) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8: r := gc.SSARegNum(v.Args[0]) i := gc.SSARegNum(v.Args[1]) p := gc.Prog(x86.ALEAQ) switch v.Op { case ssa.OpAMD64LEAQ1: p.From.Scale = 1 if i == x86.REG_SP { r, i = i, r } case ssa.OpAMD64LEAQ2: p.From.Scale = 2 case ssa.OpAMD64LEAQ4: p.From.Scale = 4 case ssa.OpAMD64LEAQ8: p.From.Scale = 8 } p.From.Type = obj.TYPE_MEM p.From.Reg = r p.From.Index = i gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB, ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB: opregreg(v.Op.Asm(), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[0])) case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD: // Go assembler has swapped operands for UCOMISx relative to CMP, // must account for that right here. opregreg(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1])) case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_CONST p.To.Offset = v.AuxInt case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v.Args[0]) case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst: x := gc.SSARegNum(v) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = x // If flags are live at this instruction, suppress the // MOV $0,AX -> XOR AX,AX optimization. if v.Aux != nil { p.Mark |= x86.PRESERVEFLAGS } case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst: x := gc.SSARegNum(v) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_FCONST p.From.Val = math.Float64frombits(uint64(v.AuxInt)) p.To.Type = obj.TYPE_REG p.To.Reg = x case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.From.Scale = 8 p.From.Index = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.From.Scale = 4 p.From.Index = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVWloadidx2: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.From.Scale = 2 p.From.Index = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1: r := gc.SSARegNum(v.Args[0]) i := gc.SSARegNum(v.Args[1]) if i == x86.REG_SP { r, i = i, r } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = r p.From.Scale = 1 p.From.Index = i gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.To, v) case ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Scale = 8 p.To.Index = gc.SSARegNum(v.Args[1]) gc.AddAux(&p.To, v) case ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Scale = 4 p.To.Index = gc.SSARegNum(v.Args[1]) gc.AddAux(&p.To, v) case ssa.OpAMD64MOVWstoreidx2: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Scale = 2 p.To.Index = gc.SSARegNum(v.Args[1]) gc.AddAux(&p.To, v) case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1: r := gc.SSARegNum(v.Args[0]) i := gc.SSARegNum(v.Args[1]) if i == x86.REG_SP { r, i = i, r } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_MEM p.To.Reg = r p.To.Scale = 1 p.To.Index = i gc.AddAux(&p.To, v) case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST sc := v.AuxValAndOff() p.From.Offset = sc.Val() p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux2(&p.To, v, sc.Off()) case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST sc := v.AuxValAndOff() p.From.Offset = sc.Val() r := gc.SSARegNum(v.Args[0]) i := gc.SSARegNum(v.Args[1]) switch v.Op { case ssa.OpAMD64MOVBstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx1: p.To.Scale = 1 if i == x86.REG_SP { r, i = i, r } case ssa.OpAMD64MOVWstoreconstidx2: p.To.Scale = 2 case ssa.OpAMD64MOVLstoreconstidx4: p.To.Scale = 4 case ssa.OpAMD64MOVQstoreconstidx8: p.To.Scale = 8 } p.To.Type = obj.TYPE_MEM p.To.Reg = r p.To.Index = i gc.AddAux2(&p.To, v, sc.Off()) case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX, ssa.OpAMD64CVTSL2SS, ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ, ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS: opregreg(v.Op.Asm(), gc.SSARegNum(v), gc.SSARegNum(v.Args[0])) case ssa.OpAMD64DUFFZERO: off := duffStart(v.AuxInt) adj := duffAdj(v.AuxInt) var p *obj.Prog if adj != 0 { p = gc.Prog(x86.AADDQ) p.From.Type = obj.TYPE_CONST p.From.Offset = adj p.To.Type = obj.TYPE_REG p.To.Reg = x86.REG_DI } p = gc.Prog(obj.ADUFFZERO) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) p.To.Offset = off case ssa.OpAMD64MOVOconst: if v.AuxInt != 0 { v.Unimplementedf("MOVOconst can only do constant=0") } r := gc.SSARegNum(v) opregreg(x86.AXORPS, r, r) case ssa.OpAMD64DUFFCOPY: p := gc.Prog(obj.ADUFFCOPY) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) p.To.Offset = v.AuxInt case ssa.OpCopy, ssa.OpAMD64MOVQconvert, ssa.OpAMD64MOVLconvert: // TODO: use MOVQreg for reg->reg copies instead of OpCopy? if v.Type.IsMemory() { return } x := gc.SSARegNum(v.Args[0]) y := gc.SSARegNum(v) if x != y { opregreg(moveByType(v.Type), y, x) } case ssa.OpLoadReg: if v.Type.IsFlags() { v.Unimplementedf("load flags not implemented: %v", v.LongString()) return } p := gc.Prog(loadByType(v.Type)) n, off := gc.AutoVar(v.Args[0]) p.From.Type = obj.TYPE_MEM p.From.Node = n p.From.Sym = gc.Linksym(n.Sym) p.From.Offset = off if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { p.From.Name = obj.NAME_PARAM p.From.Offset += n.Xoffset } else { p.From.Name = obj.NAME_AUTO } p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpStoreReg: if v.Type.IsFlags() { v.Unimplementedf("store flags not implemented: %v", v.LongString()) return } p := gc.Prog(storeByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) n, off := gc.AutoVar(v) p.To.Type = obj.TYPE_MEM p.To.Node = n p.To.Sym = gc.Linksym(n.Sym) p.To.Offset = off if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { p.To.Name = obj.NAME_PARAM p.To.Offset += n.Xoffset } else { p.To.Name = obj.NAME_AUTO } case ssa.OpPhi: gc.CheckLoweredPhi(v) case ssa.OpInitMem: // memory arg needs no code case ssa.OpArg: // input args need no code case ssa.OpAMD64LoweredGetClosurePtr: // Closure pointer is DX. gc.CheckLoweredGetClosurePtr(v) case ssa.OpAMD64LoweredGetG: r := gc.SSARegNum(v) // See the comments in cmd/internal/obj/x86/obj6.go // near CanUse1InsnTLS for a detailed explanation of these instructions. if x86.CanUse1InsnTLS(gc.Ctxt) { // MOVQ (TLS), r p := gc.Prog(x86.AMOVQ) p.From.Type = obj.TYPE_MEM p.From.Reg = x86.REG_TLS p.To.Type = obj.TYPE_REG p.To.Reg = r } else { // MOVQ TLS, r // MOVQ (r)(TLS*1), r p := gc.Prog(x86.AMOVQ) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_TLS p.To.Type = obj.TYPE_REG p.To.Reg = r q := gc.Prog(x86.AMOVQ) q.From.Type = obj.TYPE_MEM q.From.Reg = r q.From.Index = x86.REG_TLS q.From.Scale = 1 q.To.Type = obj.TYPE_REG q.To.Reg = r } case ssa.OpAMD64CALLstatic: if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym { // Deferred calls will appear to be returning to // the CALL deferreturn(SB) that we are about to emit. // However, the stack trace code will show the line // of the instruction byte before the return PC. // To avoid that being an unrelated instruction, // insert an actual hardware NOP that will have the right line number. // This is different from obj.ANOP, which is a virtual no-op // that doesn't make it into the instruction stream. ginsnop() } p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym)) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpAMD64CALLclosure: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v.Args[0]) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpAMD64CALLdefer: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Deferproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpAMD64CALLgo: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Newproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpAMD64CALLinter: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v.Args[0]) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL, ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL: r := gc.SSARegNum(v) if r != gc.SSARegNum(v.Args[0]) { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } p := gc.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum0(v) case ssa.OpAMD64SQRTSD: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpSP, ssa.OpSB: // nothing to do case ssa.OpSelect0, ssa.OpSelect1: // nothing to do case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE, ssa.OpAMD64SETL, ssa.OpAMD64SETLE, ssa.OpAMD64SETG, ssa.OpAMD64SETGE, ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF, ssa.OpAMD64SETB, ssa.OpAMD64SETBE, ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN, ssa.OpAMD64SETA, ssa.OpAMD64SETAE: p := gc.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpAMD64SETNEF: p := gc.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) q := gc.Prog(x86.ASETPS) q.To.Type = obj.TYPE_REG q.To.Reg = x86.REG_AX // ORL avoids partial register write and is smaller than ORQ, used by old compiler opregreg(x86.AORL, gc.SSARegNum(v), x86.REG_AX) case ssa.OpAMD64SETEQF: p := gc.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) q := gc.Prog(x86.ASETPC) q.To.Type = obj.TYPE_REG q.To.Reg = x86.REG_AX // ANDL avoids partial register write and is smaller than ANDQ, used by old compiler opregreg(x86.AANDL, gc.SSARegNum(v), x86.REG_AX) case ssa.OpAMD64InvertFlags: v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT: v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) case ssa.OpAMD64REPSTOSQ: gc.Prog(x86.AREP) gc.Prog(x86.ASTOSQ) case ssa.OpAMD64REPMOVSQ: gc.Prog(x86.AREP) gc.Prog(x86.AMOVSQ) case ssa.OpVarDef: gc.Gvardef(v.Aux.(*gc.Node)) case ssa.OpVarKill: gc.Gvarkill(v.Aux.(*gc.Node)) case ssa.OpVarLive: gc.Gvarlive(v.Aux.(*gc.Node)) case ssa.OpKeepAlive: if !v.Args[0].Type.IsPtrShaped() { v.Fatalf("keeping non-pointer alive %v", v.Args[0]) } n, off := gc.AutoVar(v.Args[0]) if n == nil { v.Fatalf("KeepLive with non-spilled value %s %s", v, v.Args[0]) } if off != 0 { v.Fatalf("KeepLive with non-zero offset spill location %s:%d", n, off) } gc.Gvarlive(n) case ssa.OpAMD64LoweredNilCheck: // Optimization - if the subsequent block has a load or store // at the same address, we don't need to issue this instruction. mem := v.Args[1] for _, w := range v.Block.Succs[0].Block().Values { if w.Op == ssa.OpPhi { if w.Type.IsMemory() { mem = w } continue } if len(w.Args) == 0 || !w.Args[len(w.Args)-1].Type.IsMemory() { // w doesn't use a store - can't be a memory op. continue } if w.Args[len(w.Args)-1] != mem { v.Fatalf("wrong store after nilcheck v=%s w=%s", v, w) } switch w.Op { case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVOload, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVOstore: if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage { if gc.Debug_checknil != 0 && int(v.Line) > 1 { gc.Warnl(v.Line, "removed nil check") } return } case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst: off := ssa.ValAndOff(v.AuxInt).Off() if w.Args[0] == v.Args[0] && w.Aux == nil && off >= 0 && off < minZeroPage { if gc.Debug_checknil != 0 && int(v.Line) > 1 { gc.Warnl(v.Line, "removed nil check") } return } } if w.Type.IsMemory() { if w.Op == ssa.OpVarDef || w.Op == ssa.OpVarKill || w.Op == ssa.OpVarLive { // these ops are OK mem = w continue } // We can't delay the nil check past the next store. break } } // Issue a load which will fault if the input is nil. // TODO: We currently use the 2-byte instruction TESTB AX, (reg). // Should we use the 3-byte TESTB $0, (reg) instead? It is larger // but it doesn't have false dependency on AX. // Or maybe allocate an output register and use MOVL (reg),reg2 ? // That trades clobbering flags for clobbering a register. p := gc.Prog(x86.ATESTB) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_AX p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.To, v) if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers gc.Warnl(v.Line, "generated nil check") } default: v.Unimplementedf("genValue not implemented: %s", v.LongString()) } }
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { s.SetLineno(v.Line) switch v.Op { case ssa.OpInitMem: // memory arg needs no code case ssa.OpArg: // input args need no code case ssa.OpSP, ssa.OpSB, ssa.OpGetG: // nothing to do case ssa.OpCopy, ssa.OpARMMOVWconvert, ssa.OpARMMOVWreg: if v.Type.IsMemory() { return } x := gc.SSARegNum(v.Args[0]) y := gc.SSARegNum(v) if x == y { return } as := arm.AMOVW if v.Type.IsFloat() { switch v.Type.Size() { case 4: as = arm.AMOVF case 8: as = arm.AMOVD default: panic("bad float size") } } p := gc.Prog(as) p.From.Type = obj.TYPE_REG p.From.Reg = x p.To.Type = obj.TYPE_REG p.To.Reg = y case ssa.OpARMMOVWnop: if gc.SSARegNum(v) != gc.SSARegNum(v.Args[0]) { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } // nothing to do case ssa.OpLoadReg: if v.Type.IsFlags() { v.Unimplementedf("load flags not implemented: %v", v.LongString()) return } p := gc.Prog(loadByType(v.Type)) n, off := gc.AutoVar(v.Args[0]) p.From.Type = obj.TYPE_MEM p.From.Node = n p.From.Sym = gc.Linksym(n.Sym) p.From.Offset = off if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { p.From.Name = obj.NAME_PARAM p.From.Offset += n.Xoffset } else { p.From.Name = obj.NAME_AUTO } p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpPhi: gc.CheckLoweredPhi(v) case ssa.OpStoreReg: if v.Type.IsFlags() { v.Unimplementedf("store flags not implemented: %v", v.LongString()) return } p := gc.Prog(storeByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) n, off := gc.AutoVar(v) p.To.Type = obj.TYPE_MEM p.To.Node = n p.To.Sym = gc.Linksym(n.Sym) p.To.Offset = off if n.Class == gc.PPARAM || n.Class == gc.PPARAMOUT { p.To.Name = obj.NAME_PARAM p.To.Offset += n.Xoffset } else { p.To.Name = obj.NAME_AUTO } case ssa.OpARMDIV, ssa.OpARMDIVU, ssa.OpARMMOD, ssa.OpARMMODU: // Note: for software division the assembler rewrite these // instructions to sequence of instructions: // - it puts numerator in R11 and denominator in g.m.divmod // and call (say) _udiv // - _udiv saves R0-R3 on stack and call udiv, restores R0-R3 // before return // - udiv does the actual work //TODO: set approperiate regmasks and call udiv directly? // need to be careful for negative case // Or, as soft div is already expensive, we don't care? fallthrough case ssa.OpARMADD, ssa.OpARMADC, ssa.OpARMSUB, ssa.OpARMSBC, ssa.OpARMRSB, ssa.OpARMAND, ssa.OpARMOR, ssa.OpARMXOR, ssa.OpARMBIC, ssa.OpARMMUL, ssa.OpARMADDF, ssa.OpARMADDD, ssa.OpARMSUBF, ssa.OpARMSUBD, ssa.OpARMMULF, ssa.OpARMMULD, ssa.OpARMDIVF, ssa.OpARMDIVD: r := gc.SSARegNum(v) r1 := gc.SSARegNum(v.Args[0]) r2 := gc.SSARegNum(v.Args[1]) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpARMADDS, ssa.OpARMSUBS: r := gc.SSARegNum1(v) r1 := gc.SSARegNum(v.Args[0]) r2 := gc.SSARegNum(v.Args[1]) p := gc.Prog(v.Op.Asm()) p.Scond = arm.C_SBIT p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpARMSLL, ssa.OpARMSRL, ssa.OpARMSRA: r := gc.SSARegNum(v) r1 := gc.SSARegNum(v.Args[0]) r2 := gc.SSARegNum(v.Args[1]) p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpARMSRAcond: // ARM shift instructions uses only the low-order byte of the shift amount // generate conditional instructions to deal with large shifts // flag is already set // SRA.HS $31, Rarg0, Rdst // shift 31 bits to get the sign bit // SRA.LO Rarg1, Rarg0, Rdst r := gc.SSARegNum(v) r1 := gc.SSARegNum(v.Args[0]) r2 := gc.SSARegNum(v.Args[1]) p := gc.Prog(arm.ASRA) p.Scond = arm.C_SCOND_HS p.From.Type = obj.TYPE_CONST p.From.Offset = 31 p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r p = gc.Prog(arm.ASRA) p.Scond = arm.C_SCOND_LO p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpARMADDconst, ssa.OpARMADCconst, ssa.OpARMSUBconst, ssa.OpARMSBCconst, ssa.OpARMRSBconst, ssa.OpARMRSCconst, ssa.OpARMANDconst, ssa.OpARMORconst, ssa.OpARMXORconst, ssa.OpARMBICconst, ssa.OpARMSLLconst, ssa.OpARMSRLconst, ssa.OpARMSRAconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARMADDSconst, ssa.OpARMSUBSconst, ssa.OpARMRSBSconst: p := gc.Prog(v.Op.Asm()) p.Scond = arm.C_SBIT p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum1(v) case ssa.OpARMSRRconst: genshift(arm.AMOVW, 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v), arm.SHIFT_RR, v.AuxInt) case ssa.OpARMADDshiftLL, ssa.OpARMADCshiftLL, ssa.OpARMSUBshiftLL, ssa.OpARMSBCshiftLL, ssa.OpARMRSBshiftLL, ssa.OpARMRSCshiftLL, ssa.OpARMANDshiftLL, ssa.OpARMORshiftLL, ssa.OpARMXORshiftLL, ssa.OpARMBICshiftLL: genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm.SHIFT_LL, v.AuxInt) case ssa.OpARMADDSshiftLL, ssa.OpARMSUBSshiftLL, ssa.OpARMRSBSshiftLL: p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum1(v), arm.SHIFT_LL, v.AuxInt) p.Scond = arm.C_SBIT case ssa.OpARMADDshiftRL, ssa.OpARMADCshiftRL, ssa.OpARMSUBshiftRL, ssa.OpARMSBCshiftRL, ssa.OpARMRSBshiftRL, ssa.OpARMRSCshiftRL, ssa.OpARMANDshiftRL, ssa.OpARMORshiftRL, ssa.OpARMXORshiftRL, ssa.OpARMBICshiftRL: genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm.SHIFT_LR, v.AuxInt) case ssa.OpARMADDSshiftRL, ssa.OpARMSUBSshiftRL, ssa.OpARMRSBSshiftRL: p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum1(v), arm.SHIFT_LR, v.AuxInt) p.Scond = arm.C_SBIT case ssa.OpARMADDshiftRA, ssa.OpARMADCshiftRA, ssa.OpARMSUBshiftRA, ssa.OpARMSBCshiftRA, ssa.OpARMRSBshiftRA, ssa.OpARMRSCshiftRA, ssa.OpARMANDshiftRA, ssa.OpARMORshiftRA, ssa.OpARMXORshiftRA, ssa.OpARMBICshiftRA: genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm.SHIFT_AR, v.AuxInt) case ssa.OpARMADDSshiftRA, ssa.OpARMSUBSshiftRA, ssa.OpARMRSBSshiftRA: p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum1(v), arm.SHIFT_AR, v.AuxInt) p.Scond = arm.C_SBIT case ssa.OpARMMVNshiftLL: genshift(v.Op.Asm(), 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v), arm.SHIFT_LL, v.AuxInt) case ssa.OpARMMVNshiftRL: genshift(v.Op.Asm(), 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v), arm.SHIFT_LR, v.AuxInt) case ssa.OpARMMVNshiftRA: genshift(v.Op.Asm(), 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v), arm.SHIFT_AR, v.AuxInt) case ssa.OpARMMVNshiftLLreg: genregshift(v.Op.Asm(), 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm.SHIFT_LL) case ssa.OpARMMVNshiftRLreg: genregshift(v.Op.Asm(), 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm.SHIFT_LR) case ssa.OpARMMVNshiftRAreg: genregshift(v.Op.Asm(), 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm.SHIFT_AR) case ssa.OpARMADDshiftLLreg, ssa.OpARMADCshiftLLreg, ssa.OpARMSUBshiftLLreg, ssa.OpARMSBCshiftLLreg, ssa.OpARMRSBshiftLLreg, ssa.OpARMRSCshiftLLreg, ssa.OpARMANDshiftLLreg, ssa.OpARMORshiftLLreg, ssa.OpARMXORshiftLLreg, ssa.OpARMBICshiftLLreg: genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum(v), arm.SHIFT_LL) case ssa.OpARMADDSshiftLLreg, ssa.OpARMSUBSshiftLLreg, ssa.OpARMRSBSshiftLLreg: p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum1(v), arm.SHIFT_LL) p.Scond = arm.C_SBIT case ssa.OpARMADDshiftRLreg, ssa.OpARMADCshiftRLreg, ssa.OpARMSUBshiftRLreg, ssa.OpARMSBCshiftRLreg, ssa.OpARMRSBshiftRLreg, ssa.OpARMRSCshiftRLreg, ssa.OpARMANDshiftRLreg, ssa.OpARMORshiftRLreg, ssa.OpARMXORshiftRLreg, ssa.OpARMBICshiftRLreg: genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum(v), arm.SHIFT_LR) case ssa.OpARMADDSshiftRLreg, ssa.OpARMSUBSshiftRLreg, ssa.OpARMRSBSshiftRLreg: p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum1(v), arm.SHIFT_LR) p.Scond = arm.C_SBIT case ssa.OpARMADDshiftRAreg, ssa.OpARMADCshiftRAreg, ssa.OpARMSUBshiftRAreg, ssa.OpARMSBCshiftRAreg, ssa.OpARMRSBshiftRAreg, ssa.OpARMRSCshiftRAreg, ssa.OpARMANDshiftRAreg, ssa.OpARMORshiftRAreg, ssa.OpARMXORshiftRAreg, ssa.OpARMBICshiftRAreg: genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum(v), arm.SHIFT_AR) case ssa.OpARMADDSshiftRAreg, ssa.OpARMSUBSshiftRAreg, ssa.OpARMRSBSshiftRAreg: p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum1(v), arm.SHIFT_AR) p.Scond = arm.C_SBIT case ssa.OpARMHMUL, ssa.OpARMHMULU: // 32-bit high multiplication p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.Reg = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REGREG p.To.Reg = gc.SSARegNum(v) p.To.Offset = arm.REGTMP // throw away low 32-bit into tmp register case ssa.OpARMMULLU: // 32-bit multiplication, results 64-bit, high 32-bit in out0, low 32-bit in out1 p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.Reg = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REGREG p.To.Reg = gc.SSARegNum0(v) // high 32-bit p.To.Offset = int64(gc.SSARegNum1(v)) // low 32-bit case ssa.OpARMMULA: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.Reg = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_REGREG2 p.To.Reg = gc.SSARegNum(v) // result p.To.Offset = int64(gc.SSARegNum(v.Args[2])) // addend case ssa.OpARMMOVWconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARMMOVFconst, ssa.OpARMMOVDconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_FCONST p.From.Val = math.Float64frombits(uint64(v.AuxInt)) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARMCMP, ssa.OpARMCMN, ssa.OpARMTST, ssa.OpARMTEQ, ssa.OpARMCMPF, ssa.OpARMCMPD: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG // Special layout in ARM assembly // Comparing to x86, the operands of ARM's CMP are reversed. p.From.Reg = gc.SSARegNum(v.Args[1]) p.Reg = gc.SSARegNum(v.Args[0]) case ssa.OpARMCMPconst, ssa.OpARMCMNconst, ssa.OpARMTSTconst, ssa.OpARMTEQconst: // Special layout in ARM assembly p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.Reg = gc.SSARegNum(v.Args[0]) case ssa.OpARMCMPF0, ssa.OpARMCMPD0: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) case ssa.OpARMCMPshiftLL: genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm.SHIFT_LL, v.AuxInt) case ssa.OpARMCMPshiftRL: genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm.SHIFT_LR, v.AuxInt) case ssa.OpARMCMPshiftRA: genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm.SHIFT_AR, v.AuxInt) case ssa.OpARMCMPshiftLLreg: genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), 0, arm.SHIFT_LL) case ssa.OpARMCMPshiftRLreg: genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), 0, arm.SHIFT_LR) case ssa.OpARMCMPshiftRAreg: genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), 0, arm.SHIFT_AR) case ssa.OpARMMOVWaddr: p := gc.Prog(arm.AMOVW) p.From.Type = obj.TYPE_ADDR p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) var wantreg string // MOVW $sym+off(base), R // the assembler expands it as the following: // - base is SP: add constant offset to SP (R13) // when constant is large, tmp register (R11) may be used // - base is SB: load external address from constant pool (use relocation) switch v.Aux.(type) { default: v.Fatalf("aux is of unknown type %T", v.Aux) case *ssa.ExternSymbol: wantreg = "SB" gc.AddAux(&p.From, v) case *ssa.ArgSymbol, *ssa.AutoSymbol: wantreg = "SP" gc.AddAux(&p.From, v) case nil: // No sym, just MOVW $off(SP), R wantreg = "SP" p.From.Reg = arm.REGSP p.From.Offset = v.AuxInt } if reg := gc.SSAReg(v.Args[0]); reg.Name() != wantreg { v.Fatalf("bad reg %s for symbol type %T, want %s", reg.Name(), v.Aux, wantreg) } case ssa.OpARMMOVBload, ssa.OpARMMOVBUload, ssa.OpARMMOVHload, ssa.OpARMMOVHUload, ssa.OpARMMOVWload, ssa.OpARMMOVFload, ssa.OpARMMOVDload: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARMMOVBstore, ssa.OpARMMOVHstore, ssa.OpARMMOVWstore, ssa.OpARMMOVFstore, ssa.OpARMMOVDstore: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[1]) p.To.Type = obj.TYPE_MEM p.To.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.To, v) case ssa.OpARMMOVWloadidx: // this is just shift 0 bits fallthrough case ssa.OpARMMOVWloadshiftLL: p := genshift(v.Op.Asm(), 0, gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm.SHIFT_LL, v.AuxInt) p.From.Reg = gc.SSARegNum(v.Args[0]) case ssa.OpARMMOVWloadshiftRL: p := genshift(v.Op.Asm(), 0, gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm.SHIFT_LR, v.AuxInt) p.From.Reg = gc.SSARegNum(v.Args[0]) case ssa.OpARMMOVWloadshiftRA: p := genshift(v.Op.Asm(), 0, gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm.SHIFT_AR, v.AuxInt) p.From.Reg = gc.SSARegNum(v.Args[0]) case ssa.OpARMMOVWstoreidx: // this is just shift 0 bits fallthrough case ssa.OpARMMOVWstoreshiftLL: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_SHIFT p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Offset = int64(makeshift(gc.SSARegNum(v.Args[1]), arm.SHIFT_LL, v.AuxInt)) case ssa.OpARMMOVWstoreshiftRL: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_SHIFT p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Offset = int64(makeshift(gc.SSARegNum(v.Args[1]), arm.SHIFT_LR, v.AuxInt)) case ssa.OpARMMOVWstoreshiftRA: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_SHIFT p.To.Reg = gc.SSARegNum(v.Args[0]) p.To.Offset = int64(makeshift(gc.SSARegNum(v.Args[1]), arm.SHIFT_AR, v.AuxInt)) case ssa.OpARMMOVBreg, ssa.OpARMMOVBUreg, ssa.OpARMMOVHreg, ssa.OpARMMOVHUreg: a := v.Args[0] for a.Op == ssa.OpCopy || a.Op == ssa.OpARMMOVWreg || a.Op == ssa.OpARMMOVWnop { a = a.Args[0] } if a.Op == ssa.OpLoadReg { t := a.Type switch { case v.Op == ssa.OpARMMOVBreg && t.Size() == 1 && t.IsSigned(), v.Op == ssa.OpARMMOVBUreg && t.Size() == 1 && !t.IsSigned(), v.Op == ssa.OpARMMOVHreg && t.Size() == 2 && t.IsSigned(), v.Op == ssa.OpARMMOVHUreg && t.Size() == 2 && !t.IsSigned(): // arg is a proper-typed load, already zero/sign-extended, don't extend again if gc.SSARegNum(v) == gc.SSARegNum(v.Args[0]) { return } p := gc.Prog(arm.AMOVW) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) return default: } } fallthrough case ssa.OpARMMVN, ssa.OpARMSQRTD, ssa.OpARMNEGF, ssa.OpARMNEGD, ssa.OpARMMOVWF, ssa.OpARMMOVWD, ssa.OpARMMOVFW, ssa.OpARMMOVDW, ssa.OpARMMOVFD, ssa.OpARMMOVDF: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARMMOVWUF, ssa.OpARMMOVWUD, ssa.OpARMMOVFWU, ssa.OpARMMOVDWU: p := gc.Prog(v.Op.Asm()) p.Scond = arm.C_UBIT p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARMCMOVWHSconst: p := gc.Prog(arm.AMOVW) p.Scond = arm.C_SCOND_HS p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARMCMOVWLSconst: p := gc.Prog(arm.AMOVW) p.Scond = arm.C_SCOND_LS p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpARMCALLstatic: if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym { // Deferred calls will appear to be returning to // the CALL deferreturn(SB) that we are about to emit. // However, the stack trace code will show the line // of the instruction byte before the return PC. // To avoid that being an unrelated instruction, // insert an actual hardware NOP that will have the right line number. // This is different from obj.ANOP, which is a virtual no-op // that doesn't make it into the instruction stream. ginsnop() } p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym)) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARMCALLclosure: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Offset = 0 p.To.Reg = gc.SSARegNum(v.Args[0]) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARMCALLdefer: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Deferproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARMCALLgo: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Newproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARMCALLinter: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Offset = 0 p.To.Reg = gc.SSARegNum(v.Args[0]) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARMDUFFZERO: p := gc.Prog(obj.ADUFFZERO) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) p.To.Offset = v.AuxInt case ssa.OpARMDUFFCOPY: p := gc.Prog(obj.ADUFFCOPY) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) p.To.Offset = v.AuxInt case ssa.OpARMLoweredNilCheck: // Optimization - if the subsequent block has a load or store // at the same address, we don't need to issue this instruction. mem := v.Args[1] for _, w := range v.Block.Succs[0].Block().Values { if w.Op == ssa.OpPhi { if w.Type.IsMemory() { mem = w } continue } if len(w.Args) == 0 || !w.Args[len(w.Args)-1].Type.IsMemory() { // w doesn't use a store - can't be a memory op. continue } if w.Args[len(w.Args)-1] != mem { v.Fatalf("wrong store after nilcheck v=%s w=%s", v, w) } switch w.Op { case ssa.OpARMMOVBload, ssa.OpARMMOVBUload, ssa.OpARMMOVHload, ssa.OpARMMOVHUload, ssa.OpARMMOVWload, ssa.OpARMMOVFload, ssa.OpARMMOVDload, ssa.OpARMMOVBstore, ssa.OpARMMOVHstore, ssa.OpARMMOVWstore, ssa.OpARMMOVFstore, ssa.OpARMMOVDstore: // arg0 is ptr, auxint is offset if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage { if gc.Debug_checknil != 0 && int(v.Line) > 1 { gc.Warnl(v.Line, "removed nil check") } return } case ssa.OpARMDUFFZERO, ssa.OpARMLoweredZero: // arg0 is ptr if w.Args[0] == v.Args[0] { if gc.Debug_checknil != 0 && int(v.Line) > 1 { gc.Warnl(v.Line, "removed nil check") } return } case ssa.OpARMDUFFCOPY, ssa.OpARMLoweredMove: // arg0 is dst ptr, arg1 is src ptr if w.Args[0] == v.Args[0] || w.Args[1] == v.Args[0] { if gc.Debug_checknil != 0 && int(v.Line) > 1 { gc.Warnl(v.Line, "removed nil check") } return } default: } if w.Type.IsMemory() { if w.Op == ssa.OpVarDef || w.Op == ssa.OpVarKill || w.Op == ssa.OpVarLive { // these ops are OK mem = w continue } // We can't delay the nil check past the next store. break } } // Issue a load which will fault if arg is nil. p := gc.Prog(arm.AMOVB) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = arm.REGTMP if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers gc.Warnl(v.Line, "generated nil check") } case ssa.OpARMLoweredZero: // MOVW.P Rarg2, 4(R1) // CMP Rarg1, R1 // BLE -2(PC) // arg1 is the address of the last element to zero // arg2 is known to be zero // auxint is alignment var sz int64 var mov obj.As switch { case v.AuxInt%4 == 0: sz = 4 mov = arm.AMOVW case v.AuxInt%2 == 0: sz = 2 mov = arm.AMOVH default: sz = 1 mov = arm.AMOVB } p := gc.Prog(mov) p.Scond = arm.C_PBIT p.From.Type = obj.TYPE_REG p.From.Reg = gc.SSARegNum(v.Args[2]) p.To.Type = obj.TYPE_MEM p.To.Reg = arm.REG_R1 p.To.Offset = sz p2 := gc.Prog(arm.ACMP) p2.From.Type = obj.TYPE_REG p2.From.Reg = gc.SSARegNum(v.Args[1]) p2.Reg = arm.REG_R1 p3 := gc.Prog(arm.ABLE) p3.To.Type = obj.TYPE_BRANCH gc.Patch(p3, p) case ssa.OpARMLoweredMove: // MOVW.P 4(R1), Rtmp // MOVW.P Rtmp, 4(R2) // CMP Rarg2, R1 // BLE -3(PC) // arg2 is the address of the last element of src // auxint is alignment var sz int64 var mov obj.As switch { case v.AuxInt%4 == 0: sz = 4 mov = arm.AMOVW case v.AuxInt%2 == 0: sz = 2 mov = arm.AMOVH default: sz = 1 mov = arm.AMOVB } p := gc.Prog(mov) p.Scond = arm.C_PBIT p.From.Type = obj.TYPE_MEM p.From.Reg = arm.REG_R1 p.From.Offset = sz p.To.Type = obj.TYPE_REG p.To.Reg = arm.REGTMP p2 := gc.Prog(mov) p2.Scond = arm.C_PBIT p2.From.Type = obj.TYPE_REG p2.From.Reg = arm.REGTMP p2.To.Type = obj.TYPE_MEM p2.To.Reg = arm.REG_R2 p2.To.Offset = sz p3 := gc.Prog(arm.ACMP) p3.From.Type = obj.TYPE_REG p3.From.Reg = gc.SSARegNum(v.Args[2]) p3.Reg = arm.REG_R1 p4 := gc.Prog(arm.ABLE) p4.To.Type = obj.TYPE_BRANCH gc.Patch(p4, p) case ssa.OpVarDef: gc.Gvardef(v.Aux.(*gc.Node)) case ssa.OpVarKill: gc.Gvarkill(v.Aux.(*gc.Node)) case ssa.OpVarLive: gc.Gvarlive(v.Aux.(*gc.Node)) case ssa.OpKeepAlive: if !v.Args[0].Type.IsPtrShaped() { v.Fatalf("keeping non-pointer alive %v", v.Args[0]) } n, off := gc.AutoVar(v.Args[0]) if n == nil { v.Fatalf("KeepLive with non-spilled value %s %s", v, v.Args[0]) } if off != 0 { v.Fatalf("KeepLive with non-zero offset spill location %s:%d", n, off) } gc.Gvarlive(n) case ssa.OpARMEqual, ssa.OpARMNotEqual, ssa.OpARMLessThan, ssa.OpARMLessEqual, ssa.OpARMGreaterThan, ssa.OpARMGreaterEqual, ssa.OpARMLessThanU, ssa.OpARMLessEqualU, ssa.OpARMGreaterThanU, ssa.OpARMGreaterEqualU: // generate boolean values // use conditional move p := gc.Prog(arm.AMOVW) p.From.Type = obj.TYPE_CONST p.From.Offset = 0 p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) p = gc.Prog(arm.AMOVW) p.Scond = condBits[v.Op] p.From.Type = obj.TYPE_CONST p.From.Offset = 1 p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) case ssa.OpSelect0, ssa.OpSelect1: // nothing to do case ssa.OpARMLoweredGetClosurePtr: // Closure pointer is R7 (arm.REGCTXT). gc.CheckLoweredGetClosurePtr(v) case ssa.OpARMFlagEQ, ssa.OpARMFlagLT_ULT, ssa.OpARMFlagLT_UGT, ssa.OpARMFlagGT_ULT, ssa.OpARMFlagGT_UGT: v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) case ssa.OpARMInvertFlags: v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) default: v.Unimplementedf("genValue not implemented: %s", v.LongString()) } }