// generate one instruction: // as f, t func rawgins(as obj.As, f *gc.Node, t *gc.Node) *obj.Prog { // self move check // TODO(mundaym): use sized math and extend to MOVB, MOVWZ etc. switch as { case s390x.AMOVD, s390x.AFMOVS, s390x.AFMOVD: if f != nil && t != nil && f.Op == gc.OREGISTER && t.Op == gc.OREGISTER && f.Reg == t.Reg { return nil } } p := gc.Prog(as) gc.Naddr(&p.From, f) gc.Naddr(&p.To, t) switch as { // Bad things the front end has done to us. Crash to find call stack. case s390x.ACMP, s390x.ACMPU: if p.From.Type == obj.TYPE_MEM || p.To.Type == obj.TYPE_MEM { gc.Debug['h'] = 1 gc.Fatalf("bad inst: %v", p) } } if gc.Debug['g'] != 0 { fmt.Printf("%v\n", p) } w := int32(0) switch as { case s390x.AMOVB, s390x.AMOVBZ: w = 1 case s390x.AMOVH, s390x.AMOVHZ: w = 2 case s390x.AMOVW, s390x.AMOVWZ: w = 4 case s390x.AMOVD: if p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_ADDR { break } w = 8 } if w != 0 && ((f != nil && p.From.Width < int64(w)) || (t != nil && p.To.Type != obj.TYPE_REG && p.To.Width > int64(w))) { gc.Dump("f", f) gc.Dump("t", t) gc.Fatalf("bad width: %v (%d, %d)\n", p, p.From.Width, p.To.Width) } return p }
/* * attempt to generate 64-bit * res = n * return 1 on success, 0 if op not handled. */ func cgen64(n *gc.Node, res *gc.Node) { if res.Op != gc.OINDREG && res.Op != gc.ONAME { gc.Dump("n", n) gc.Dump("res", res) gc.Fatalf("cgen64 %v of %v", n.Op, res.Op) } switch n.Op { default: gc.Fatalf("cgen64 %v", n.Op) case gc.OMINUS: gc.Cgen(n.Left, res) var hi1 gc.Node var lo1 gc.Node split64(res, &lo1, &hi1) gins(x86.ANEGL, nil, &lo1) gins(x86.AADCL, ncon(0), &hi1) gins(x86.ANEGL, nil, &hi1) splitclean() return case gc.OCOM: gc.Cgen(n.Left, res) var lo1 gc.Node var hi1 gc.Node split64(res, &lo1, &hi1) gins(x86.ANOTL, nil, &lo1) gins(x86.ANOTL, nil, &hi1) splitclean() return // binary operators. // common setup below. case gc.OADD, gc.OSUB, gc.OMUL, gc.OLROT, gc.OLSH, gc.ORSH, gc.OAND, gc.OOR, gc.OXOR: break } l := n.Left r := n.Right if !l.Addable { var t1 gc.Node gc.Tempname(&t1, l.Type) gc.Cgen(l, &t1) l = &t1 } if r != nil && !r.Addable { var t2 gc.Node gc.Tempname(&t2, r.Type) gc.Cgen(r, &t2) r = &t2 } var ax gc.Node gc.Nodreg(&ax, gc.Types[gc.TINT32], x86.REG_AX) var cx gc.Node gc.Nodreg(&cx, gc.Types[gc.TINT32], x86.REG_CX) var dx gc.Node gc.Nodreg(&dx, gc.Types[gc.TINT32], x86.REG_DX) // Setup for binary operation. var hi1 gc.Node var lo1 gc.Node split64(l, &lo1, &hi1) var lo2 gc.Node var hi2 gc.Node if gc.Is64(r.Type) { split64(r, &lo2, &hi2) } // Do op. Leave result in DX:AX. switch n.Op { // TODO: Constants case gc.OADD: gins(x86.AMOVL, &lo1, &ax) gins(x86.AMOVL, &hi1, &dx) gins(x86.AADDL, &lo2, &ax) gins(x86.AADCL, &hi2, &dx) // TODO: Constants. case gc.OSUB: gins(x86.AMOVL, &lo1, &ax) gins(x86.AMOVL, &hi1, &dx) gins(x86.ASUBL, &lo2, &ax) gins(x86.ASBBL, &hi2, &dx) case gc.OMUL: // let's call the next three EX, FX and GX var ex, fx, gx gc.Node gc.Regalloc(&ex, gc.Types[gc.TPTR32], nil) gc.Regalloc(&fx, gc.Types[gc.TPTR32], nil) gc.Regalloc(&gx, gc.Types[gc.TPTR32], nil) // load args into DX:AX and EX:GX. gins(x86.AMOVL, &lo1, &ax) gins(x86.AMOVL, &hi1, &dx) gins(x86.AMOVL, &lo2, &gx) gins(x86.AMOVL, &hi2, &ex) // if DX and EX are zero, use 32 x 32 -> 64 unsigned multiply. gins(x86.AMOVL, &dx, &fx) gins(x86.AORL, &ex, &fx) p1 := gc.Gbranch(x86.AJNE, nil, 0) gins(x86.AMULL, &gx, nil) // implicit &ax p2 := gc.Gbranch(obj.AJMP, nil, 0) gc.Patch(p1, gc.Pc) // full 64x64 -> 64, from 32x32 -> 64. gins(x86.AIMULL, &gx, &dx) gins(x86.AMOVL, &ax, &fx) gins(x86.AIMULL, &ex, &fx) gins(x86.AADDL, &dx, &fx) gins(x86.AMOVL, &gx, &dx) gins(x86.AMULL, &dx, nil) // implicit &ax gins(x86.AADDL, &fx, &dx) gc.Patch(p2, gc.Pc) gc.Regfree(&ex) gc.Regfree(&fx) gc.Regfree(&gx) // We only rotate by a constant c in [0,64). // if c >= 32: // lo, hi = hi, lo // c -= 32 // if c == 0: // no-op // else: // t = hi // shld hi:lo, c // shld lo:t, c case gc.OLROT: v := uint64(r.Int64()) if v >= 32 { // reverse during load to do the first 32 bits of rotate v -= 32 gins(x86.AMOVL, &lo1, &dx) gins(x86.AMOVL, &hi1, &ax) } else { gins(x86.AMOVL, &lo1, &ax) gins(x86.AMOVL, &hi1, &dx) } if v == 0 { } else // done { gins(x86.AMOVL, &dx, &cx) p1 := gins(x86.ASHLL, ncon(uint32(v)), &dx) p1.From.Index = x86.REG_AX // double-width shift p1.From.Scale = 0 p1 = gins(x86.ASHLL, ncon(uint32(v)), &ax) p1.From.Index = x86.REG_CX // double-width shift p1.From.Scale = 0 } case gc.OLSH: if r.Op == gc.OLITERAL { v := uint64(r.Int64()) if v >= 64 { if gc.Is64(r.Type) { splitclean() } splitclean() split64(res, &lo2, &hi2) gins(x86.AMOVL, ncon(0), &lo2) gins(x86.AMOVL, ncon(0), &hi2) splitclean() return } if v >= 32 { if gc.Is64(r.Type) { splitclean() } split64(res, &lo2, &hi2) gmove(&lo1, &hi2) if v > 32 { gins(x86.ASHLL, ncon(uint32(v-32)), &hi2) } gins(x86.AMOVL, ncon(0), &lo2) splitclean() splitclean() return } // general shift gins(x86.AMOVL, &lo1, &ax) gins(x86.AMOVL, &hi1, &dx) p1 := gins(x86.ASHLL, ncon(uint32(v)), &dx) p1.From.Index = x86.REG_AX // double-width shift p1.From.Scale = 0 gins(x86.ASHLL, ncon(uint32(v)), &ax) break } // load value into DX:AX. gins(x86.AMOVL, &lo1, &ax) gins(x86.AMOVL, &hi1, &dx) // load shift value into register. // if high bits are set, zero value. var p1 *obj.Prog if gc.Is64(r.Type) { gins(x86.ACMPL, &hi2, ncon(0)) p1 = gc.Gbranch(x86.AJNE, nil, +1) gins(x86.AMOVL, &lo2, &cx) } else { cx.Type = gc.Types[gc.TUINT32] gmove(r, &cx) } // if shift count is >=64, zero value gins(x86.ACMPL, &cx, ncon(64)) p2 := gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1) if p1 != nil { gc.Patch(p1, gc.Pc) } gins(x86.AXORL, &dx, &dx) gins(x86.AXORL, &ax, &ax) gc.Patch(p2, gc.Pc) // if shift count is >= 32, zero low. gins(x86.ACMPL, &cx, ncon(32)) p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1) gins(x86.AMOVL, &ax, &dx) gins(x86.ASHLL, &cx, &dx) // SHLL only uses bottom 5 bits of count gins(x86.AXORL, &ax, &ax) p2 = gc.Gbranch(obj.AJMP, nil, 0) gc.Patch(p1, gc.Pc) // general shift p1 = gins(x86.ASHLL, &cx, &dx) p1.From.Index = x86.REG_AX // double-width shift p1.From.Scale = 0 gins(x86.ASHLL, &cx, &ax) gc.Patch(p2, gc.Pc) case gc.ORSH: if r.Op == gc.OLITERAL { v := uint64(r.Int64()) if v >= 64 { if gc.Is64(r.Type) { splitclean() } splitclean() split64(res, &lo2, &hi2) if hi1.Type.Etype == gc.TINT32 { gmove(&hi1, &lo2) gins(x86.ASARL, ncon(31), &lo2) gmove(&hi1, &hi2) gins(x86.ASARL, ncon(31), &hi2) } else { gins(x86.AMOVL, ncon(0), &lo2) gins(x86.AMOVL, ncon(0), &hi2) } splitclean() return } if v >= 32 { if gc.Is64(r.Type) { splitclean() } split64(res, &lo2, &hi2) gmove(&hi1, &lo2) if v > 32 { gins(optoas(gc.ORSH, hi1.Type), ncon(uint32(v-32)), &lo2) } if hi1.Type.Etype == gc.TINT32 { gmove(&hi1, &hi2) gins(x86.ASARL, ncon(31), &hi2) } else { gins(x86.AMOVL, ncon(0), &hi2) } splitclean() splitclean() return } // general shift gins(x86.AMOVL, &lo1, &ax) gins(x86.AMOVL, &hi1, &dx) p1 := gins(x86.ASHRL, ncon(uint32(v)), &ax) p1.From.Index = x86.REG_DX // double-width shift p1.From.Scale = 0 gins(optoas(gc.ORSH, hi1.Type), ncon(uint32(v)), &dx) break } // load value into DX:AX. gins(x86.AMOVL, &lo1, &ax) gins(x86.AMOVL, &hi1, &dx) // load shift value into register. // if high bits are set, zero value. var p1 *obj.Prog if gc.Is64(r.Type) { gins(x86.ACMPL, &hi2, ncon(0)) p1 = gc.Gbranch(x86.AJNE, nil, +1) gins(x86.AMOVL, &lo2, &cx) } else { cx.Type = gc.Types[gc.TUINT32] gmove(r, &cx) } // if shift count is >=64, zero or sign-extend value gins(x86.ACMPL, &cx, ncon(64)) p2 := gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1) if p1 != nil { gc.Patch(p1, gc.Pc) } if hi1.Type.Etype == gc.TINT32 { gins(x86.ASARL, ncon(31), &dx) gins(x86.AMOVL, &dx, &ax) } else { gins(x86.AXORL, &dx, &dx) gins(x86.AXORL, &ax, &ax) } gc.Patch(p2, gc.Pc) // if shift count is >= 32, sign-extend hi. gins(x86.ACMPL, &cx, ncon(32)) p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1) gins(x86.AMOVL, &dx, &ax) if hi1.Type.Etype == gc.TINT32 { gins(x86.ASARL, &cx, &ax) // SARL only uses bottom 5 bits of count gins(x86.ASARL, ncon(31), &dx) } else { gins(x86.ASHRL, &cx, &ax) gins(x86.AXORL, &dx, &dx) } p2 = gc.Gbranch(obj.AJMP, nil, 0) gc.Patch(p1, gc.Pc) // general shift p1 = gins(x86.ASHRL, &cx, &ax) p1.From.Index = x86.REG_DX // double-width shift p1.From.Scale = 0 gins(optoas(gc.ORSH, hi1.Type), &cx, &dx) gc.Patch(p2, gc.Pc) // make constant the right side (it usually is anyway). case gc.OXOR, gc.OAND, gc.OOR: if lo1.Op == gc.OLITERAL { nswap(&lo1, &lo2) nswap(&hi1, &hi2) } if lo2.Op == gc.OLITERAL { // special cases for constants. lv := uint32(lo2.Int64()) hv := uint32(hi2.Int64()) splitclean() // right side split64(res, &lo2, &hi2) switch n.Op { case gc.OXOR: gmove(&lo1, &lo2) gmove(&hi1, &hi2) switch lv { case 0: break case 0xffffffff: gins(x86.ANOTL, nil, &lo2) default: gins(x86.AXORL, ncon(lv), &lo2) } switch hv { case 0: break case 0xffffffff: gins(x86.ANOTL, nil, &hi2) default: gins(x86.AXORL, ncon(hv), &hi2) } case gc.OAND: switch lv { case 0: gins(x86.AMOVL, ncon(0), &lo2) default: gmove(&lo1, &lo2) if lv != 0xffffffff { gins(x86.AANDL, ncon(lv), &lo2) } } switch hv { case 0: gins(x86.AMOVL, ncon(0), &hi2) default: gmove(&hi1, &hi2) if hv != 0xffffffff { gins(x86.AANDL, ncon(hv), &hi2) } } case gc.OOR: switch lv { case 0: gmove(&lo1, &lo2) case 0xffffffff: gins(x86.AMOVL, ncon(0xffffffff), &lo2) default: gmove(&lo1, &lo2) gins(x86.AORL, ncon(lv), &lo2) } switch hv { case 0: gmove(&hi1, &hi2) case 0xffffffff: gins(x86.AMOVL, ncon(0xffffffff), &hi2) default: gmove(&hi1, &hi2) gins(x86.AORL, ncon(hv), &hi2) } } splitclean() splitclean() return } gins(x86.AMOVL, &lo1, &ax) gins(x86.AMOVL, &hi1, &dx) gins(optoas(n.Op, lo1.Type), &lo2, &ax) gins(optoas(n.Op, lo1.Type), &hi2, &dx) } if gc.Is64(r.Type) { splitclean() } splitclean() split64(res, &lo1, &hi1) gins(x86.AMOVL, &ax, &lo1) gins(x86.AMOVL, &dx, &hi1) splitclean() }
/* * attempt to generate 64-bit * res = n * return 1 on success, 0 if op not handled. */ func cgen64(n *gc.Node, res *gc.Node) { if res.Op != gc.OINDREG && res.Op != gc.ONAME { gc.Dump("n", n) gc.Dump("res", res) gc.Fatalf("cgen64 %v of %v", n.Op, res.Op) } l := n.Left var t1 gc.Node if !l.Addable { gc.Tempname(&t1, l.Type) gc.Cgen(l, &t1) l = &t1 } var hi1 gc.Node var lo1 gc.Node split64(l, &lo1, &hi1) switch n.Op { default: gc.Fatalf("cgen64 %v", n.Op) case gc.OMINUS: var lo2 gc.Node var hi2 gc.Node split64(res, &lo2, &hi2) gc.Regalloc(&t1, lo1.Type, nil) var al gc.Node gc.Regalloc(&al, lo1.Type, nil) var ah gc.Node gc.Regalloc(&ah, hi1.Type, nil) gins(arm.AMOVW, &lo1, &al) gins(arm.AMOVW, &hi1, &ah) gmove(ncon(0), &t1) p1 := gins(arm.ASUB, &al, &t1) p1.Scond |= arm.C_SBIT gins(arm.AMOVW, &t1, &lo2) gmove(ncon(0), &t1) gins(arm.ASBC, &ah, &t1) gins(arm.AMOVW, &t1, &hi2) gc.Regfree(&t1) gc.Regfree(&al) gc.Regfree(&ah) splitclean() splitclean() return case gc.OCOM: gc.Regalloc(&t1, lo1.Type, nil) gmove(ncon(^uint32(0)), &t1) var lo2 gc.Node var hi2 gc.Node split64(res, &lo2, &hi2) var n1 gc.Node gc.Regalloc(&n1, lo1.Type, nil) gins(arm.AMOVW, &lo1, &n1) gins(arm.AEOR, &t1, &n1) gins(arm.AMOVW, &n1, &lo2) gins(arm.AMOVW, &hi1, &n1) gins(arm.AEOR, &t1, &n1) gins(arm.AMOVW, &n1, &hi2) gc.Regfree(&t1) gc.Regfree(&n1) splitclean() splitclean() return // binary operators. // common setup below. case gc.OADD, gc.OSUB, gc.OMUL, gc.OLSH, gc.ORSH, gc.OAND, gc.OOR, gc.OXOR, gc.OLROT: break } // setup for binary operators r := n.Right if r != nil && !r.Addable { var t2 gc.Node gc.Tempname(&t2, r.Type) gc.Cgen(r, &t2) r = &t2 } var hi2 gc.Node var lo2 gc.Node if gc.Is64(r.Type) { split64(r, &lo2, &hi2) } var al gc.Node gc.Regalloc(&al, lo1.Type, nil) var ah gc.Node gc.Regalloc(&ah, hi1.Type, nil) // Do op. Leave result in ah:al. switch n.Op { default: gc.Fatalf("cgen64: not implemented: %v\n", n) // TODO: Constants case gc.OADD: var bl gc.Node gc.Regalloc(&bl, gc.Types[gc.TPTR32], nil) var bh gc.Node gc.Regalloc(&bh, gc.Types[gc.TPTR32], nil) gins(arm.AMOVW, &hi1, &ah) gins(arm.AMOVW, &lo1, &al) gins(arm.AMOVW, &hi2, &bh) gins(arm.AMOVW, &lo2, &bl) p1 := gins(arm.AADD, &bl, &al) p1.Scond |= arm.C_SBIT gins(arm.AADC, &bh, &ah) gc.Regfree(&bl) gc.Regfree(&bh) // TODO: Constants. case gc.OSUB: var bl gc.Node gc.Regalloc(&bl, gc.Types[gc.TPTR32], nil) var bh gc.Node gc.Regalloc(&bh, gc.Types[gc.TPTR32], nil) gins(arm.AMOVW, &lo1, &al) gins(arm.AMOVW, &hi1, &ah) gins(arm.AMOVW, &lo2, &bl) gins(arm.AMOVW, &hi2, &bh) p1 := gins(arm.ASUB, &bl, &al) p1.Scond |= arm.C_SBIT gins(arm.ASBC, &bh, &ah) gc.Regfree(&bl) gc.Regfree(&bh) // TODO(kaib): this can be done with 4 regs and does not need 6 case gc.OMUL: var bl gc.Node gc.Regalloc(&bl, gc.Types[gc.TPTR32], nil) var bh gc.Node gc.Regalloc(&bh, gc.Types[gc.TPTR32], nil) var cl gc.Node gc.Regalloc(&cl, gc.Types[gc.TPTR32], nil) var ch gc.Node gc.Regalloc(&ch, gc.Types[gc.TPTR32], nil) // load args into bh:bl and bh:bl. gins(arm.AMOVW, &hi1, &bh) gins(arm.AMOVW, &lo1, &bl) gins(arm.AMOVW, &hi2, &ch) gins(arm.AMOVW, &lo2, &cl) // bl * cl -> ah al p1 := gins(arm.AMULLU, nil, nil) p1.From.Type = obj.TYPE_REG p1.From.Reg = bl.Reg p1.Reg = cl.Reg p1.To.Type = obj.TYPE_REGREG p1.To.Reg = ah.Reg p1.To.Offset = int64(al.Reg) //print("%v\n", p1); // bl * ch + ah -> ah p1 = gins(arm.AMULA, nil, nil) p1.From.Type = obj.TYPE_REG p1.From.Reg = bl.Reg p1.Reg = ch.Reg p1.To.Type = obj.TYPE_REGREG2 p1.To.Reg = ah.Reg p1.To.Offset = int64(ah.Reg) //print("%v\n", p1); // bh * cl + ah -> ah p1 = gins(arm.AMULA, nil, nil) p1.From.Type = obj.TYPE_REG p1.From.Reg = bh.Reg p1.Reg = cl.Reg p1.To.Type = obj.TYPE_REGREG2 p1.To.Reg = ah.Reg p1.To.Offset = int64(ah.Reg) //print("%v\n", p1); gc.Regfree(&bh) gc.Regfree(&bl) gc.Regfree(&ch) gc.Regfree(&cl) // We only rotate by a constant c in [0,64). // if c >= 32: // lo, hi = hi, lo // c -= 32 // if c == 0: // no-op // else: // t = hi // shld hi:lo, c // shld lo:t, c case gc.OLROT: v := uint64(r.Int64()) var bl gc.Node gc.Regalloc(&bl, lo1.Type, nil) var bh gc.Node gc.Regalloc(&bh, hi1.Type, nil) if v >= 32 { // reverse during load to do the first 32 bits of rotate v -= 32 gins(arm.AMOVW, &hi1, &bl) gins(arm.AMOVW, &lo1, &bh) } else { gins(arm.AMOVW, &hi1, &bh) gins(arm.AMOVW, &lo1, &bl) } if v == 0 { gins(arm.AMOVW, &bh, &ah) gins(arm.AMOVW, &bl, &al) } else { // rotate by 1 <= v <= 31 // MOVW bl<<v, al // MOVW bh<<v, ah // OR bl>>(32-v), ah // OR bh>>(32-v), al gshift(arm.AMOVW, &bl, arm.SHIFT_LL, int32(v), &al) gshift(arm.AMOVW, &bh, arm.SHIFT_LL, int32(v), &ah) gshift(arm.AORR, &bl, arm.SHIFT_LR, int32(32-v), &ah) gshift(arm.AORR, &bh, arm.SHIFT_LR, int32(32-v), &al) } gc.Regfree(&bl) gc.Regfree(&bh) case gc.OLSH: var bl gc.Node gc.Regalloc(&bl, lo1.Type, nil) var bh gc.Node gc.Regalloc(&bh, hi1.Type, nil) gins(arm.AMOVW, &hi1, &bh) gins(arm.AMOVW, &lo1, &bl) var p6 *obj.Prog var s gc.Node var n1 gc.Node var creg gc.Node var p1 *obj.Prog var p2 *obj.Prog var p3 *obj.Prog var p4 *obj.Prog var p5 *obj.Prog if r.Op == gc.OLITERAL { v := uint64(r.Int64()) if v >= 64 { // TODO(kaib): replace with gins(AMOVW, nodintconst(0), &al) // here and below (verify it optimizes to EOR) gins(arm.AEOR, &al, &al) gins(arm.AEOR, &ah, &ah) } else if v > 32 { gins(arm.AEOR, &al, &al) // MOVW bl<<(v-32), ah gshift(arm.AMOVW, &bl, arm.SHIFT_LL, int32(v-32), &ah) } else if v == 32 { gins(arm.AEOR, &al, &al) gins(arm.AMOVW, &bl, &ah) } else if v > 0 { // MOVW bl<<v, al gshift(arm.AMOVW, &bl, arm.SHIFT_LL, int32(v), &al) // MOVW bh<<v, ah gshift(arm.AMOVW, &bh, arm.SHIFT_LL, int32(v), &ah) // OR bl>>(32-v), ah gshift(arm.AORR, &bl, arm.SHIFT_LR, int32(32-v), &ah) } else { gins(arm.AMOVW, &bl, &al) gins(arm.AMOVW, &bh, &ah) } goto olsh_break } gc.Regalloc(&s, gc.Types[gc.TUINT32], nil) gc.Regalloc(&creg, gc.Types[gc.TUINT32], nil) if gc.Is64(r.Type) { // shift is >= 1<<32 var cl gc.Node var ch gc.Node split64(r, &cl, &ch) gmove(&ch, &s) gins(arm.ATST, &s, nil) p6 = gc.Gbranch(arm.ABNE, nil, 0) gmove(&cl, &s) splitclean() } else { gmove(r, &s) p6 = nil } gins(arm.ATST, &s, nil) // shift == 0 p1 = gins(arm.AMOVW, &bl, &al) p1.Scond = arm.C_SCOND_EQ p1 = gins(arm.AMOVW, &bh, &ah) p1.Scond = arm.C_SCOND_EQ p2 = gc.Gbranch(arm.ABEQ, nil, 0) // shift is < 32 gc.Nodconst(&n1, gc.Types[gc.TUINT32], 32) gmove(&n1, &creg) gins(arm.ACMP, &s, &creg) // MOVW.LO bl<<s, al p1 = gregshift(arm.AMOVW, &bl, arm.SHIFT_LL, &s, &al) p1.Scond = arm.C_SCOND_LO // MOVW.LO bh<<s, ah p1 = gregshift(arm.AMOVW, &bh, arm.SHIFT_LL, &s, &ah) p1.Scond = arm.C_SCOND_LO // SUB.LO s, creg p1 = gins(arm.ASUB, &s, &creg) p1.Scond = arm.C_SCOND_LO // OR.LO bl>>creg, ah p1 = gregshift(arm.AORR, &bl, arm.SHIFT_LR, &creg, &ah) p1.Scond = arm.C_SCOND_LO // BLO end p3 = gc.Gbranch(arm.ABLO, nil, 0) // shift == 32 p1 = gins(arm.AEOR, &al, &al) p1.Scond = arm.C_SCOND_EQ p1 = gins(arm.AMOVW, &bl, &ah) p1.Scond = arm.C_SCOND_EQ p4 = gc.Gbranch(arm.ABEQ, nil, 0) // shift is < 64 gc.Nodconst(&n1, gc.Types[gc.TUINT32], 64) gmove(&n1, &creg) gins(arm.ACMP, &s, &creg) // EOR.LO al, al p1 = gins(arm.AEOR, &al, &al) p1.Scond = arm.C_SCOND_LO // MOVW.LO creg>>1, creg p1 = gshift(arm.AMOVW, &creg, arm.SHIFT_LR, 1, &creg) p1.Scond = arm.C_SCOND_LO // SUB.LO creg, s p1 = gins(arm.ASUB, &creg, &s) p1.Scond = arm.C_SCOND_LO // MOVW bl<<s, ah p1 = gregshift(arm.AMOVW, &bl, arm.SHIFT_LL, &s, &ah) p1.Scond = arm.C_SCOND_LO p5 = gc.Gbranch(arm.ABLO, nil, 0) // shift >= 64 if p6 != nil { gc.Patch(p6, gc.Pc) } gins(arm.AEOR, &al, &al) gins(arm.AEOR, &ah, &ah) gc.Patch(p2, gc.Pc) gc.Patch(p3, gc.Pc) gc.Patch(p4, gc.Pc) gc.Patch(p5, gc.Pc) gc.Regfree(&s) gc.Regfree(&creg) olsh_break: gc.Regfree(&bl) gc.Regfree(&bh) case gc.ORSH: var bl gc.Node gc.Regalloc(&bl, lo1.Type, nil) var bh gc.Node gc.Regalloc(&bh, hi1.Type, nil) gins(arm.AMOVW, &hi1, &bh) gins(arm.AMOVW, &lo1, &bl) var p4 *obj.Prog var p5 *obj.Prog var n1 gc.Node var p6 *obj.Prog var s gc.Node var p1 *obj.Prog var p2 *obj.Prog var creg gc.Node var p3 *obj.Prog if r.Op == gc.OLITERAL { v := uint64(r.Int64()) if v >= 64 { if bh.Type.Etype == gc.TINT32 { // MOVW bh->31, al gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &al) // MOVW bh->31, ah gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &ah) } else { gins(arm.AEOR, &al, &al) gins(arm.AEOR, &ah, &ah) } } else if v > 32 { if bh.Type.Etype == gc.TINT32 { // MOVW bh->(v-32), al gshift(arm.AMOVW, &bh, arm.SHIFT_AR, int32(v-32), &al) // MOVW bh->31, ah gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &ah) } else { // MOVW bh>>(v-32), al gshift(arm.AMOVW, &bh, arm.SHIFT_LR, int32(v-32), &al) gins(arm.AEOR, &ah, &ah) } } else if v == 32 { gins(arm.AMOVW, &bh, &al) if bh.Type.Etype == gc.TINT32 { // MOVW bh->31, ah gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &ah) } else { gins(arm.AEOR, &ah, &ah) } } else if v > 0 { // MOVW bl>>v, al gshift(arm.AMOVW, &bl, arm.SHIFT_LR, int32(v), &al) // OR bh<<(32-v), al gshift(arm.AORR, &bh, arm.SHIFT_LL, int32(32-v), &al) if bh.Type.Etype == gc.TINT32 { // MOVW bh->v, ah gshift(arm.AMOVW, &bh, arm.SHIFT_AR, int32(v), &ah) } else { // MOVW bh>>v, ah gshift(arm.AMOVW, &bh, arm.SHIFT_LR, int32(v), &ah) } } else { gins(arm.AMOVW, &bl, &al) gins(arm.AMOVW, &bh, &ah) } goto orsh_break } gc.Regalloc(&s, gc.Types[gc.TUINT32], nil) gc.Regalloc(&creg, gc.Types[gc.TUINT32], nil) if gc.Is64(r.Type) { // shift is >= 1<<32 var ch gc.Node var cl gc.Node split64(r, &cl, &ch) gmove(&ch, &s) gins(arm.ATST, &s, nil) var p1 *obj.Prog if bh.Type.Etype == gc.TINT32 { p1 = gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &ah) } else { p1 = gins(arm.AEOR, &ah, &ah) } p1.Scond = arm.C_SCOND_NE p6 = gc.Gbranch(arm.ABNE, nil, 0) gmove(&cl, &s) splitclean() } else { gmove(r, &s) p6 = nil } gins(arm.ATST, &s, nil) // shift == 0 p1 = gins(arm.AMOVW, &bl, &al) p1.Scond = arm.C_SCOND_EQ p1 = gins(arm.AMOVW, &bh, &ah) p1.Scond = arm.C_SCOND_EQ p2 = gc.Gbranch(arm.ABEQ, nil, 0) // check if shift is < 32 gc.Nodconst(&n1, gc.Types[gc.TUINT32], 32) gmove(&n1, &creg) gins(arm.ACMP, &s, &creg) // MOVW.LO bl>>s, al p1 = gregshift(arm.AMOVW, &bl, arm.SHIFT_LR, &s, &al) p1.Scond = arm.C_SCOND_LO // SUB.LO s,creg p1 = gins(arm.ASUB, &s, &creg) p1.Scond = arm.C_SCOND_LO // OR.LO bh<<(32-s), al p1 = gregshift(arm.AORR, &bh, arm.SHIFT_LL, &creg, &al) p1.Scond = arm.C_SCOND_LO if bh.Type.Etype == gc.TINT32 { // MOVW bh->s, ah p1 = gregshift(arm.AMOVW, &bh, arm.SHIFT_AR, &s, &ah) } else { // MOVW bh>>s, ah p1 = gregshift(arm.AMOVW, &bh, arm.SHIFT_LR, &s, &ah) } p1.Scond = arm.C_SCOND_LO // BLO end p3 = gc.Gbranch(arm.ABLO, nil, 0) // shift == 32 p1 = gins(arm.AMOVW, &bh, &al) p1.Scond = arm.C_SCOND_EQ if bh.Type.Etype == gc.TINT32 { gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &ah) } else { gins(arm.AEOR, &ah, &ah) } p4 = gc.Gbranch(arm.ABEQ, nil, 0) // check if shift is < 64 gc.Nodconst(&n1, gc.Types[gc.TUINT32], 64) gmove(&n1, &creg) gins(arm.ACMP, &s, &creg) // MOVW.LO creg>>1, creg p1 = gshift(arm.AMOVW, &creg, arm.SHIFT_LR, 1, &creg) p1.Scond = arm.C_SCOND_LO // SUB.LO creg, s p1 = gins(arm.ASUB, &creg, &s) p1.Scond = arm.C_SCOND_LO if bh.Type.Etype == gc.TINT32 { // MOVW bh->(s-32), al p1 := gregshift(arm.AMOVW, &bh, arm.SHIFT_AR, &s, &al) p1.Scond = arm.C_SCOND_LO } else { // MOVW bh>>(v-32), al p1 := gregshift(arm.AMOVW, &bh, arm.SHIFT_LR, &s, &al) p1.Scond = arm.C_SCOND_LO } // BLO end p5 = gc.Gbranch(arm.ABLO, nil, 0) // s >= 64 if p6 != nil { gc.Patch(p6, gc.Pc) } if bh.Type.Etype == gc.TINT32 { // MOVW bh->31, al gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &al) } else { gins(arm.AEOR, &al, &al) } gc.Patch(p2, gc.Pc) gc.Patch(p3, gc.Pc) gc.Patch(p4, gc.Pc) gc.Patch(p5, gc.Pc) gc.Regfree(&s) gc.Regfree(&creg) orsh_break: gc.Regfree(&bl) gc.Regfree(&bh) // TODO(kaib): literal optimizations // make constant the right side (it usually is anyway). // if(lo1.op == OLITERAL) { // nswap(&lo1, &lo2); // nswap(&hi1, &hi2); // } // if(lo2.op == OLITERAL) { // // special cases for constants. // lv = mpgetfix(lo2.val.u.xval); // hv = mpgetfix(hi2.val.u.xval); // splitclean(); // right side // split64(res, &lo2, &hi2); // switch(n->op) { // case OXOR: // gmove(&lo1, &lo2); // gmove(&hi1, &hi2); // switch(lv) { // case 0: // break; // case 0xffffffffu: // gins(ANOTL, N, &lo2); // break; // default: // gins(AXORL, ncon(lv), &lo2); // break; // } // switch(hv) { // case 0: // break; // case 0xffffffffu: // gins(ANOTL, N, &hi2); // break; // default: // gins(AXORL, ncon(hv), &hi2); // break; // } // break; // case OAND: // switch(lv) { // case 0: // gins(AMOVL, ncon(0), &lo2); // break; // default: // gmove(&lo1, &lo2); // if(lv != 0xffffffffu) // gins(AANDL, ncon(lv), &lo2); // break; // } // switch(hv) { // case 0: // gins(AMOVL, ncon(0), &hi2); // break; // default: // gmove(&hi1, &hi2); // if(hv != 0xffffffffu) // gins(AANDL, ncon(hv), &hi2); // break; // } // break; // case OOR: // switch(lv) { // case 0: // gmove(&lo1, &lo2); // break; // case 0xffffffffu: // gins(AMOVL, ncon(0xffffffffu), &lo2); // break; // default: // gmove(&lo1, &lo2); // gins(AORL, ncon(lv), &lo2); // break; // } // switch(hv) { // case 0: // gmove(&hi1, &hi2); // break; // case 0xffffffffu: // gins(AMOVL, ncon(0xffffffffu), &hi2); // break; // default: // gmove(&hi1, &hi2); // gins(AORL, ncon(hv), &hi2); // break; // } // break; // } // splitclean(); // splitclean(); // goto out; // } case gc.OXOR, gc.OAND, gc.OOR: var n1 gc.Node gc.Regalloc(&n1, lo1.Type, nil) gins(arm.AMOVW, &lo1, &al) gins(arm.AMOVW, &hi1, &ah) gins(arm.AMOVW, &lo2, &n1) gins(optoas(n.Op, lo1.Type), &n1, &al) gins(arm.AMOVW, &hi2, &n1) gins(optoas(n.Op, lo1.Type), &n1, &ah) gc.Regfree(&n1) } if gc.Is64(r.Type) { splitclean() } splitclean() split64(res, &lo1, &hi1) gins(arm.AMOVW, &al, &lo1) gins(arm.AMOVW, &ah, &hi1) splitclean() //out: gc.Regfree(&al) gc.Regfree(&ah) }
/* * generate one instruction: * as f, t */ func rawgins(as obj.As, f *gc.Node, t *gc.Node) *obj.Prog { // TODO(austin): Add self-move test like in 6g (but be careful // of truncation moves) p := gc.Prog(as) gc.Naddr(&p.From, f) gc.Naddr(&p.To, t) switch as { case obj.ACALL: if p.To.Type == obj.TYPE_REG { // Allow front end to emit CALL REG, and rewrite into CALL (REG). p.From = obj.Addr{} p.To.Type = obj.TYPE_MEM p.To.Offset = 0 if gc.Debug['g'] != 0 { fmt.Printf("%v\n", p) } return p } // Bad things the front end has done to us. Crash to find call stack. case mips.AAND: if p.From.Type == obj.TYPE_CONST { gc.Debug['h'] = 1 gc.Fatalf("bad inst: %v", p) } case mips.ASGT, mips.ASGTU: if p.From.Type == obj.TYPE_MEM || p.To.Type == obj.TYPE_MEM { gc.Debug['h'] = 1 gc.Fatalf("bad inst: %v", p) } // Special cases case mips.AMUL, mips.AMULU, mips.AMULV, mips.AMULVU: if p.From.Type == obj.TYPE_CONST { gc.Debug['h'] = 1 gc.Fatalf("bad inst: %v", p) } pp := gc.Prog(mips.AMOVV) pp.From.Type = obj.TYPE_REG pp.From.Reg = mips.REG_LO pp.To = p.To p.Reg = p.To.Reg p.To = obj.Addr{} case mips.ASUBVU: // unary if f == nil { p.From = p.To p.Reg = mips.REGZERO } } if gc.Debug['g'] != 0 { fmt.Printf("%v\n", p) } w := int32(0) switch as { case mips.AMOVB, mips.AMOVBU: w = 1 case mips.AMOVH, mips.AMOVHU: w = 2 case mips.AMOVW, mips.AMOVWU: w = 4 case mips.AMOVV: if p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_ADDR { break } w = 8 } if w != 0 && ((f != nil && p.From.Width < int64(w)) || (t != nil && p.To.Type != obj.TYPE_REG && p.To.Width > int64(w))) { gc.Dump("f", f) gc.Dump("t", t) gc.Fatalf("bad width: %v (%d, %d)\n", p, p.From.Width, p.To.Width) } return p }
func clearfat(nl *gc.Node) { /* clear a fat object */ if gc.Debug['g'] != 0 { gc.Dump("\nclearfat", nl) } w := uint32(nl.Type.Width) // Avoid taking the address for simple enough types. if gc.Componentgen(nil, nl) { return } c := w % 4 // bytes q := w / 4 // quads if nl.Type.Align < 4 { q = 0 c = w } var r0 gc.Node r0.Op = gc.OREGISTER r0.Reg = arm.REG_R0 var r1 gc.Node r1.Op = gc.OREGISTER r1.Reg = arm.REG_R1 var dst gc.Node gc.Regalloc(&dst, gc.Types[gc.Tptr], &r1) gc.Agen(nl, &dst) var nc gc.Node gc.Nodconst(&nc, gc.Types[gc.TUINT32], 0) var nz gc.Node gc.Regalloc(&nz, gc.Types[gc.TUINT32], &r0) gc.Cgen(&nc, &nz) if q > 128 { var end gc.Node gc.Regalloc(&end, gc.Types[gc.Tptr], nil) p := gins(arm.AMOVW, &dst, &end) p.From.Type = obj.TYPE_ADDR p.From.Offset = int64(q) * 4 p = gins(arm.AMOVW, &nz, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = 4 p.Scond |= arm.C_PBIT pl := p p = gins(arm.ACMP, &dst, nil) raddr(&end, p) gc.Patch(gc.Gbranch(arm.ABNE, nil, 0), pl) gc.Regfree(&end) } else if q >= 4 && !gc.Nacl { f := gc.Sysfunc("duffzero") p := gins(obj.ADUFFZERO, nil, f) gc.Afunclit(&p.To, f) // 4 and 128 = magic constants: see ../../runtime/asm_arm.s p.To.Offset = 4 * (128 - int64(q)) } else { var p *obj.Prog for q > 0 { p = gins(arm.AMOVW, &nz, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = 4 p.Scond |= arm.C_PBIT //print("1. %v\n", p); q-- } } if c > 4 { // Loop to zero unaligned memory. var end gc.Node gc.Regalloc(&end, gc.Types[gc.Tptr], nil) p := gins(arm.AMOVW, &dst, &end) p.From.Type = obj.TYPE_ADDR p.From.Offset = int64(c) p = gins(arm.AMOVB, &nz, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = 1 p.Scond |= arm.C_PBIT pl := p p = gins(arm.ACMP, &dst, nil) raddr(&end, p) gc.Patch(gc.Gbranch(arm.ABNE, nil, 0), pl) gc.Regfree(&end) c = 0 } var p *obj.Prog for c > 0 { p = gins(arm.AMOVB, &nz, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = 1 p.Scond |= arm.C_PBIT //print("2. %v\n", p); c-- } gc.Regfree(&dst) gc.Regfree(&nz) }
func cgen_floatsse(n *gc.Node, res *gc.Node) { var a obj.As nl := n.Left nr := n.Right switch n.Op { default: gc.Dump("cgen_floatsse", n) gc.Fatalf("cgen_floatsse %v", n.Op) return case gc.OMINUS, gc.OCOM: nr = gc.NegOne(n.Type) a = foptoas(gc.OMUL, nl.Type, 0) goto sbop // symmetric binary case gc.OADD, gc.OMUL: a = foptoas(n.Op, nl.Type, 0) goto sbop // asymmetric binary case gc.OSUB, gc.OMOD, gc.ODIV: a = foptoas(n.Op, nl.Type, 0) goto abop } sbop: // symmetric binary if nl.Ullman < nr.Ullman || nl.Op == gc.OLITERAL { nl, nr = nr, nl } abop: // asymmetric binary if nl.Ullman >= nr.Ullman { var nt gc.Node gc.Tempname(&nt, nl.Type) gc.Cgen(nl, &nt) var n2 gc.Node gc.Mgen(nr, &n2, nil) var n1 gc.Node gc.Regalloc(&n1, nl.Type, res) gmove(&nt, &n1) gins(a, &n2, &n1) gmove(&n1, res) gc.Regfree(&n1) gc.Mfree(&n2) } else { var n2 gc.Node gc.Regalloc(&n2, nr.Type, res) gc.Cgen(nr, &n2) var n1 gc.Node gc.Regalloc(&n1, nl.Type, nil) gc.Cgen(nl, &n1) gins(a, &n2, &n1) gc.Regfree(&n2) gmove(&n1, res) gc.Regfree(&n1) } return }
func clearfat(nl *gc.Node) { /* clear a fat object */ if gc.Debug['g'] != 0 { gc.Dump("\nclearfat", nl) } w := uint32(nl.Type.Width) // Avoid taking the address for simple enough types. if gc.Componentgen(nil, nl) { return } c := w % 4 // bytes q := w / 4 // quads if q < 4 { // Write sequence of MOV 0, off(base) instead of using STOSL. // The hope is that although the code will be slightly longer, // the MOVs will have no dependencies and pipeline better // than the unrolled STOSL loop. // NOTE: Must use agen, not igen, so that optimizer sees address // being taken. We are not writing on field boundaries. var n1 gc.Node gc.Regalloc(&n1, gc.Types[gc.Tptr], nil) gc.Agen(nl, &n1) n1.Op = gc.OINDREG var z gc.Node gc.Nodconst(&z, gc.Types[gc.TUINT64], 0) for ; q > 0; q-- { n1.Type = z.Type gins(x86.AMOVL, &z, &n1) n1.Xoffset += 4 } gc.Nodconst(&z, gc.Types[gc.TUINT8], 0) for ; c > 0; c-- { n1.Type = z.Type gins(x86.AMOVB, &z, &n1) n1.Xoffset++ } gc.Regfree(&n1) return } var n1 gc.Node gc.Nodreg(&n1, gc.Types[gc.Tptr], x86.REG_DI) gc.Agen(nl, &n1) gconreg(x86.AMOVL, 0, x86.REG_AX) if q > 128 || (q >= 4 && gc.Nacl) { gconreg(x86.AMOVL, int64(q), x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.ASTOSL, nil, nil) // STOL AL,*(DI)+ } else if q >= 4 { p := gins(obj.ADUFFZERO, nil, nil) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) // 1 and 128 = magic constants: see ../../runtime/asm_386.s p.To.Offset = 1 * (128 - int64(q)) } else { for q > 0 { gins(x86.ASTOSL, nil, nil) // STOL AL,*(DI)+ q-- } } for c > 0 { gins(x86.ASTOSB, nil, nil) // STOB AL,*(DI)+ c-- } }
/* * generate one instruction: * as f, t */ func gins(as obj.As, f *gc.Node, t *gc.Node) *obj.Prog { // Node nod; // if(f != N && f->op == OINDEX) { // gc.Regalloc(&nod, ®node, Z); // v = constnode.vconst; // gc.Cgen(f->right, &nod); // constnode.vconst = v; // idx.reg = nod.reg; // gc.Regfree(&nod); // } // if(t != N && t->op == OINDEX) { // gc.Regalloc(&nod, ®node, Z); // v = constnode.vconst; // gc.Cgen(t->right, &nod); // constnode.vconst = v; // idx.reg = nod.reg; // gc.Regfree(&nod); // } if f != nil && f.Op == gc.OADDR && (as == x86.AMOVL || as == x86.AMOVQ) { // Turn MOVL $xxx into LEAL xxx. // These should be equivalent but most of the backend // only expects to see LEAL, because that's what we had // historically generated. Various hidden assumptions are baked in by now. if as == x86.AMOVL { as = x86.ALEAL } else { as = x86.ALEAQ } f = f.Left } switch as { case x86.AMOVB, x86.AMOVW, x86.AMOVL, x86.AMOVQ, x86.AMOVSS, x86.AMOVSD: if f != nil && t != nil && samaddr(f, t) { return nil } case x86.ALEAQ: if f != nil && gc.Isconst(f, gc.CTNIL) { gc.Fatalf("gins LEAQ nil %v", f.Type) } } p := gc.Prog(as) gc.Naddr(&p.From, f) gc.Naddr(&p.To, t) if gc.Debug['g'] != 0 { fmt.Printf("%v\n", p) } w := int32(0) switch as { case x86.AMOVB: w = 1 case x86.AMOVW: w = 2 case x86.AMOVL: w = 4 case x86.AMOVQ: w = 8 } if w != 0 && ((f != nil && p.From.Width < int64(w)) || (t != nil && p.To.Width > int64(w))) { gc.Dump("f", f) gc.Dump("t", t) gc.Fatalf("bad width: %v (%d, %d)\n", p, p.From.Width, p.To.Width) } if p.To.Type == obj.TYPE_ADDR && w > 0 { gc.Fatalf("bad use of addr: %v", p) } return p }
/* * generate move: * t = f * hard part is conversions. */ func gmove(f *gc.Node, t *gc.Node) { if gc.Debug['M'] != 0 { fmt.Printf("gmove %v -> %v\n", gc.Nconv(f, gc.FmtLong), gc.Nconv(t, gc.FmtLong)) } ft := gc.Simsimtype(f.Type) tt := gc.Simsimtype(t.Type) cvt := t.Type if gc.Iscomplex[ft] || gc.Iscomplex[tt] { gc.Complexmove(f, t) return } // cannot have two memory operands var a obj.As if gc.Ismem(f) && gc.Ismem(t) { goto hard } // convert constant to desired type if f.Op == gc.OLITERAL { var con gc.Node f.Convconst(&con, t.Type) f = &con ft = tt // so big switch will choose a simple mov // some constants can't move directly to memory. if gc.Ismem(t) { // float constants come from memory. if gc.Isfloat[tt] { goto hard } // 64-bit immediates are really 32-bit sign-extended // unless moving into a register. if gc.Isint[tt] { if i := con.Int64(); int64(int32(i)) != i { goto hard } } } } // value -> value copy, only one memory operand. // figure out the instruction to use. // break out of switch for one-instruction gins. // goto rdst for "destination must be register". // goto hard for "convert to cvt type first". // otherwise handle and return. switch uint32(ft)<<16 | uint32(tt) { default: gc.Dump("f", f) gc.Dump("t", t) gc.Fatalf("gmove %v -> %v", gc.Tconv(f.Type, gc.FmtLong), gc.Tconv(t.Type, gc.FmtLong)) /* * integer copy and truncate */ case gc.TINT8<<16 | gc.TINT8, // same size gc.TINT8<<16 | gc.TUINT8, gc.TUINT8<<16 | gc.TINT8, gc.TUINT8<<16 | gc.TUINT8, gc.TINT16<<16 | gc.TINT8, // truncate gc.TUINT16<<16 | gc.TINT8, gc.TINT32<<16 | gc.TINT8, gc.TUINT32<<16 | gc.TINT8, gc.TINT64<<16 | gc.TINT8, gc.TUINT64<<16 | gc.TINT8, gc.TINT16<<16 | gc.TUINT8, gc.TUINT16<<16 | gc.TUINT8, gc.TINT32<<16 | gc.TUINT8, gc.TUINT32<<16 | gc.TUINT8, gc.TINT64<<16 | gc.TUINT8, gc.TUINT64<<16 | gc.TUINT8: a = x86.AMOVB case gc.TINT16<<16 | gc.TINT16, // same size gc.TINT16<<16 | gc.TUINT16, gc.TUINT16<<16 | gc.TINT16, gc.TUINT16<<16 | gc.TUINT16, gc.TINT32<<16 | gc.TINT16, // truncate gc.TUINT32<<16 | gc.TINT16, gc.TINT64<<16 | gc.TINT16, gc.TUINT64<<16 | gc.TINT16, gc.TINT32<<16 | gc.TUINT16, gc.TUINT32<<16 | gc.TUINT16, gc.TINT64<<16 | gc.TUINT16, gc.TUINT64<<16 | gc.TUINT16: a = x86.AMOVW case gc.TINT32<<16 | gc.TINT32, // same size gc.TINT32<<16 | gc.TUINT32, gc.TUINT32<<16 | gc.TINT32, gc.TUINT32<<16 | gc.TUINT32: a = x86.AMOVL case gc.TINT64<<16 | gc.TINT32, // truncate gc.TUINT64<<16 | gc.TINT32, gc.TINT64<<16 | gc.TUINT32, gc.TUINT64<<16 | gc.TUINT32: a = x86.AMOVQL case gc.TINT64<<16 | gc.TINT64, // same size gc.TINT64<<16 | gc.TUINT64, gc.TUINT64<<16 | gc.TINT64, gc.TUINT64<<16 | gc.TUINT64: a = x86.AMOVQ /* * integer up-conversions */ case gc.TINT8<<16 | gc.TINT16, // sign extend int8 gc.TINT8<<16 | gc.TUINT16: a = x86.AMOVBWSX goto rdst case gc.TINT8<<16 | gc.TINT32, gc.TINT8<<16 | gc.TUINT32: a = x86.AMOVBLSX goto rdst case gc.TINT8<<16 | gc.TINT64, gc.TINT8<<16 | gc.TUINT64: a = x86.AMOVBQSX goto rdst case gc.TUINT8<<16 | gc.TINT16, // zero extend uint8 gc.TUINT8<<16 | gc.TUINT16: a = x86.AMOVBWZX goto rdst case gc.TUINT8<<16 | gc.TINT32, gc.TUINT8<<16 | gc.TUINT32: a = x86.AMOVBLZX goto rdst case gc.TUINT8<<16 | gc.TINT64, gc.TUINT8<<16 | gc.TUINT64: a = x86.AMOVBQZX goto rdst case gc.TINT16<<16 | gc.TINT32, // sign extend int16 gc.TINT16<<16 | gc.TUINT32: a = x86.AMOVWLSX goto rdst case gc.TINT16<<16 | gc.TINT64, gc.TINT16<<16 | gc.TUINT64: a = x86.AMOVWQSX goto rdst case gc.TUINT16<<16 | gc.TINT32, // zero extend uint16 gc.TUINT16<<16 | gc.TUINT32: a = x86.AMOVWLZX goto rdst case gc.TUINT16<<16 | gc.TINT64, gc.TUINT16<<16 | gc.TUINT64: a = x86.AMOVWQZX goto rdst case gc.TINT32<<16 | gc.TINT64, // sign extend int32 gc.TINT32<<16 | gc.TUINT64: a = x86.AMOVLQSX goto rdst // AMOVL into a register zeros the top of the register, // so this is not always necessary, but if we rely on AMOVL // the optimizer is almost certain to screw with us. case gc.TUINT32<<16 | gc.TINT64, // zero extend uint32 gc.TUINT32<<16 | gc.TUINT64: a = x86.AMOVLQZX goto rdst /* * float to integer */ case gc.TFLOAT32<<16 | gc.TINT32: a = x86.ACVTTSS2SL goto rdst case gc.TFLOAT64<<16 | gc.TINT32: a = x86.ACVTTSD2SL goto rdst case gc.TFLOAT32<<16 | gc.TINT64: a = x86.ACVTTSS2SQ goto rdst case gc.TFLOAT64<<16 | gc.TINT64: a = x86.ACVTTSD2SQ goto rdst // convert via int32. case gc.TFLOAT32<<16 | gc.TINT16, gc.TFLOAT32<<16 | gc.TINT8, gc.TFLOAT32<<16 | gc.TUINT16, gc.TFLOAT32<<16 | gc.TUINT8, gc.TFLOAT64<<16 | gc.TINT16, gc.TFLOAT64<<16 | gc.TINT8, gc.TFLOAT64<<16 | gc.TUINT16, gc.TFLOAT64<<16 | gc.TUINT8: cvt = gc.Types[gc.TINT32] goto hard // convert via int64. case gc.TFLOAT32<<16 | gc.TUINT32, gc.TFLOAT64<<16 | gc.TUINT32: cvt = gc.Types[gc.TINT64] goto hard // algorithm is: // if small enough, use native float64 -> int64 conversion. // otherwise, subtract 2^63, convert, and add it back. case gc.TFLOAT32<<16 | gc.TUINT64, gc.TFLOAT64<<16 | gc.TUINT64: a := x86.ACVTTSS2SQ if ft == gc.TFLOAT64 { a = x86.ACVTTSD2SQ } bignodes() var r1 gc.Node gc.Regalloc(&r1, gc.Types[ft], nil) var r2 gc.Node gc.Regalloc(&r2, gc.Types[tt], t) var r3 gc.Node gc.Regalloc(&r3, gc.Types[ft], nil) var r4 gc.Node gc.Regalloc(&r4, gc.Types[tt], nil) gins(optoas(gc.OAS, f.Type), f, &r1) gins(optoas(gc.OCMP, f.Type), &bigf, &r1) p1 := gc.Gbranch(optoas(gc.OLE, f.Type), nil, +1) gins(a, &r1, &r2) p2 := gc.Gbranch(obj.AJMP, nil, 0) gc.Patch(p1, gc.Pc) gins(optoas(gc.OAS, f.Type), &bigf, &r3) gins(optoas(gc.OSUB, f.Type), &r3, &r1) gins(a, &r1, &r2) gins(x86.AMOVQ, &bigi, &r4) gins(x86.AXORQ, &r4, &r2) gc.Patch(p2, gc.Pc) gmove(&r2, t) gc.Regfree(&r4) gc.Regfree(&r3) gc.Regfree(&r2) gc.Regfree(&r1) return /* * integer to float */ case gc.TINT32<<16 | gc.TFLOAT32: a = x86.ACVTSL2SS goto rdst case gc.TINT32<<16 | gc.TFLOAT64: a = x86.ACVTSL2SD goto rdst case gc.TINT64<<16 | gc.TFLOAT32: a = x86.ACVTSQ2SS goto rdst case gc.TINT64<<16 | gc.TFLOAT64: a = x86.ACVTSQ2SD goto rdst // convert via int32 case gc.TINT16<<16 | gc.TFLOAT32, gc.TINT16<<16 | gc.TFLOAT64, gc.TINT8<<16 | gc.TFLOAT32, gc.TINT8<<16 | gc.TFLOAT64, gc.TUINT16<<16 | gc.TFLOAT32, gc.TUINT16<<16 | gc.TFLOAT64, gc.TUINT8<<16 | gc.TFLOAT32, gc.TUINT8<<16 | gc.TFLOAT64: cvt = gc.Types[gc.TINT32] goto hard // convert via int64. case gc.TUINT32<<16 | gc.TFLOAT32, gc.TUINT32<<16 | gc.TFLOAT64: cvt = gc.Types[gc.TINT64] goto hard // algorithm is: // if small enough, use native int64 -> uint64 conversion. // otherwise, halve (rounding to odd?), convert, and double. case gc.TUINT64<<16 | gc.TFLOAT32, gc.TUINT64<<16 | gc.TFLOAT64: a := x86.ACVTSQ2SS if tt == gc.TFLOAT64 { a = x86.ACVTSQ2SD } var zero gc.Node gc.Nodconst(&zero, gc.Types[gc.TUINT64], 0) var one gc.Node gc.Nodconst(&one, gc.Types[gc.TUINT64], 1) var r1 gc.Node gc.Regalloc(&r1, f.Type, f) var r2 gc.Node gc.Regalloc(&r2, t.Type, t) var r3 gc.Node gc.Regalloc(&r3, f.Type, nil) var r4 gc.Node gc.Regalloc(&r4, f.Type, nil) gmove(f, &r1) gins(x86.ACMPQ, &r1, &zero) p1 := gc.Gbranch(x86.AJLT, nil, +1) gins(a, &r1, &r2) p2 := gc.Gbranch(obj.AJMP, nil, 0) gc.Patch(p1, gc.Pc) gmove(&r1, &r3) gins(x86.ASHRQ, &one, &r3) gmove(&r1, &r4) gins(x86.AANDL, &one, &r4) gins(x86.AORQ, &r4, &r3) gins(a, &r3, &r2) gins(optoas(gc.OADD, t.Type), &r2, &r2) gc.Patch(p2, gc.Pc) gmove(&r2, t) gc.Regfree(&r4) gc.Regfree(&r3) gc.Regfree(&r2) gc.Regfree(&r1) return /* * float to float */ case gc.TFLOAT32<<16 | gc.TFLOAT32: a = x86.AMOVSS case gc.TFLOAT64<<16 | gc.TFLOAT64: a = x86.AMOVSD case gc.TFLOAT32<<16 | gc.TFLOAT64: a = x86.ACVTSS2SD goto rdst case gc.TFLOAT64<<16 | gc.TFLOAT32: a = x86.ACVTSD2SS goto rdst } gins(a, f, t) return // requires register destination rdst: { var r1 gc.Node gc.Regalloc(&r1, t.Type, t) gins(a, f, &r1) gmove(&r1, t) gc.Regfree(&r1) return } // requires register intermediate hard: var r1 gc.Node gc.Regalloc(&r1, cvt, t) gmove(f, &r1) gmove(&r1, t) gc.Regfree(&r1) return }
func clearfat(nl *gc.Node) { /* clear a fat object */ if gc.Debug['g'] != 0 { gc.Dump("\nclearfat", nl) } // Avoid taking the address for simple enough types. if gc.Componentgen(nil, nl) { return } w := nl.Type.Width if w > 1024 || (w >= 64 && (gc.Nacl || isPlan9)) { var oldn1 gc.Node var n1 gc.Node savex(x86.REG_DI, &n1, &oldn1, nil, gc.Types[gc.Tptr]) gc.Agen(nl, &n1) var ax gc.Node var oldax gc.Node savex(x86.REG_AX, &ax, &oldax, nil, gc.Types[gc.Tptr]) gconreg(x86.AMOVL, 0, x86.REG_AX) gconreg(movptr, w/8, x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.ASTOSQ, nil, nil) // STOQ AL,*(DI)+ if w%8 != 0 { n1.Op = gc.OINDREG clearfat_tail(&n1, w%8) } restx(&n1, &oldn1) restx(&ax, &oldax) return } if w >= 64 { var oldn1 gc.Node var n1 gc.Node savex(x86.REG_DI, &n1, &oldn1, nil, gc.Types[gc.Tptr]) gc.Agen(nl, &n1) var vec_zero gc.Node var old_x0 gc.Node savex(x86.REG_X0, &vec_zero, &old_x0, nil, gc.Types[gc.TFLOAT64]) gins(x86.AXORPS, &vec_zero, &vec_zero) if di := dzDI(w); di != 0 { gconreg(addptr, di, x86.REG_DI) } p := gins(obj.ADUFFZERO, nil, nil) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) p.To.Offset = dzOff(w) if w%16 != 0 { n1.Op = gc.OINDREG n1.Xoffset -= 16 - w%16 gins(x86.AMOVUPS, &vec_zero, &n1) } restx(&vec_zero, &old_x0) restx(&n1, &oldn1) return } // NOTE: Must use agen, not igen, so that optimizer sees address // being taken. We are not writing on field boundaries. var n1 gc.Node gc.Agenr(nl, &n1, nil) n1.Op = gc.OINDREG clearfat_tail(&n1, w) gc.Regfree(&n1) }
/* * generate one instruction: * as f, t */ func rawgins(as obj.As, f *gc.Node, t *gc.Node) *obj.Prog { // TODO(austin): Add self-move test like in 6g (but be careful // of truncation moves) p := gc.Prog(as) gc.Naddr(&p.From, f) gc.Naddr(&p.To, t) switch as { case arm64.ACMP, arm64.AFCMPS, arm64.AFCMPD: if t != nil { if f.Op != gc.OREGISTER { gc.Fatalf("bad operands to gcmp") } p.From = p.To p.To = obj.Addr{} raddr(f, p) } } // Bad things the front end has done to us. Crash to find call stack. switch as { case arm64.AAND, arm64.AMUL: if p.From.Type == obj.TYPE_CONST { gc.Debug['h'] = 1 gc.Fatalf("bad inst: %v", p) } case arm64.ACMP: if p.From.Type == obj.TYPE_MEM || p.To.Type == obj.TYPE_MEM { gc.Debug['h'] = 1 gc.Fatalf("bad inst: %v", p) } } if gc.Debug['g'] != 0 { fmt.Printf("%v\n", p) } w := int32(0) switch as { case arm64.AMOVB, arm64.AMOVBU: w = 1 case arm64.AMOVH, arm64.AMOVHU: w = 2 case arm64.AMOVW, arm64.AMOVWU: w = 4 case arm64.AMOVD: if p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_ADDR { break } w = 8 } if w != 0 && ((f != nil && p.From.Width < int64(w)) || (t != nil && p.To.Type != obj.TYPE_REG && p.To.Width > int64(w))) { gc.Dump("f", f) gc.Dump("t", t) gc.Fatalf("bad width: %v (%d, %d)\n", p, p.From.Width, p.To.Width) } return p }
/* * generate one instruction: * as f, t */ func gins(as obj.As, f *gc.Node, t *gc.Node) *obj.Prog { if as == x86.AFMOVF && f != nil && f.Op == gc.OREGISTER && t != nil && t.Op == gc.OREGISTER { gc.Fatalf("gins MOVF reg, reg") } if as == x86.ACVTSD2SS && f != nil && f.Op == gc.OLITERAL { gc.Fatalf("gins CVTSD2SS const") } if as == x86.AMOVSD && t != nil && t.Op == gc.OREGISTER && t.Reg == x86.REG_F0 { gc.Fatalf("gins MOVSD into F0") } if as == x86.AMOVL && f != nil && f.Op == gc.OADDR && f.Left.Op == gc.ONAME && f.Left.Class != gc.PEXTERN && f.Left.Class != gc.PFUNC { // Turn MOVL $xxx(FP/SP) into LEAL xxx. // These should be equivalent but most of the backend // only expects to see LEAL, because that's what we had // historically generated. Various hidden assumptions are baked in by now. as = x86.ALEAL f = f.Left } switch as { case x86.AMOVB, x86.AMOVW, x86.AMOVL: if f != nil && t != nil && samaddr(f, t) { return nil } case x86.ALEAL: if f != nil && gc.Isconst(f, gc.CTNIL) { gc.Fatalf("gins LEAL nil %v", f.Type) } } p := gc.Prog(as) gc.Naddr(&p.From, f) gc.Naddr(&p.To, t) if gc.Debug['g'] != 0 { fmt.Printf("%v\n", p) } w := 0 switch as { case x86.AMOVB: w = 1 case x86.AMOVW: w = 2 case x86.AMOVL: w = 4 } if true && w != 0 && f != nil && (p.From.Width > int64(w) || p.To.Width > int64(w)) { gc.Dump("bad width from:", f) gc.Dump("bad width to:", t) gc.Fatalf("bad width: %v (%d, %d)\n", p, p.From.Width, p.To.Width) } if p.To.Type == obj.TYPE_ADDR && w > 0 { gc.Fatalf("bad use of addr: %v", p) } return p }
/* * generate one instruction: * as f, t */ func rawgins(as obj.As, f *gc.Node, t *gc.Node) *obj.Prog { // TODO(austin): Add self-move test like in 6g (but be careful // of truncation moves) p := gc.Prog(as) gc.Naddr(&p.From, f) gc.Naddr(&p.To, t) switch as { case obj.ACALL: if p.To.Type == obj.TYPE_REG && p.To.Reg != ppc64.REG_CTR { // Allow front end to emit CALL REG, and rewrite into MOV REG, CTR; CALL CTR. if gc.Ctxt.Flag_shared { // Make sure function pointer is in R12 as well when // compiling Go into PIC. // TODO(mwhudson): it would obviously be better to // change the register allocation to put the value in // R12 already, but I don't know how to do that. q := gc.Prog(as) q.As = ppc64.AMOVD q.From = p.To q.To.Type = obj.TYPE_REG q.To.Reg = ppc64.REG_R12 } pp := gc.Prog(as) pp.From = p.From pp.To.Type = obj.TYPE_REG pp.To.Reg = ppc64.REG_CTR p.As = ppc64.AMOVD p.From = p.To p.To.Type = obj.TYPE_REG p.To.Reg = ppc64.REG_CTR if gc.Ctxt.Flag_shared { // When compiling Go into PIC, the function we just // called via pointer might have been implemented in // a separate module and so overwritten the TOC // pointer in R2; reload it. q := gc.Prog(ppc64.AMOVD) q.From.Type = obj.TYPE_MEM q.From.Offset = 24 q.From.Reg = ppc64.REGSP q.To.Type = obj.TYPE_REG q.To.Reg = ppc64.REG_R2 } if gc.Debug['g'] != 0 { fmt.Printf("%v\n", p) fmt.Printf("%v\n", pp) } return pp } // Bad things the front end has done to us. Crash to find call stack. case ppc64.AAND, ppc64.AMULLD: if p.From.Type == obj.TYPE_CONST { gc.Debug['h'] = 1 gc.Fatalf("bad inst: %v", p) } case ppc64.ACMP, ppc64.ACMPU: if p.From.Type == obj.TYPE_MEM || p.To.Type == obj.TYPE_MEM { gc.Debug['h'] = 1 gc.Fatalf("bad inst: %v", p) } } if gc.Debug['g'] != 0 { fmt.Printf("%v\n", p) } w := int32(0) switch as { case ppc64.AMOVB, ppc64.AMOVBU, ppc64.AMOVBZ, ppc64.AMOVBZU: w = 1 case ppc64.AMOVH, ppc64.AMOVHU, ppc64.AMOVHZ, ppc64.AMOVHZU: w = 2 case ppc64.AMOVW, ppc64.AMOVWU, ppc64.AMOVWZ, ppc64.AMOVWZU: w = 4 case ppc64.AMOVD, ppc64.AMOVDU: if p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_ADDR { break } w = 8 } if w != 0 && ((f != nil && p.From.Width < int64(w)) || (t != nil && p.To.Type != obj.TYPE_REG && p.To.Width > int64(w))) { gc.Dump("f", f) gc.Dump("t", t) gc.Fatalf("bad width: %v (%d, %d)\n", p, p.From.Width, p.To.Width) } return p }