/* * generate division. * generates one of: * res = nl / nr * res = nl % nr * according to op. */ func dodiv(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) { t := nl.Type t0 := t if t.Width < 8 { if gc.Issigned[t.Etype] { t = gc.Types[gc.TINT64] } else { t = gc.Types[gc.TUINT64] } } a := optoas(gc.ODIV, t) var tl gc.Node gc.Regalloc(&tl, t0, nil) var tr gc.Node gc.Regalloc(&tr, t0, nil) if nl.Ullman >= nr.Ullman { gc.Cgen(nl, &tl) gc.Cgen(nr, &tr) } else { gc.Cgen(nr, &tr) gc.Cgen(nl, &tl) } if t != t0 { // Convert tl2 := tl tr2 := tr tl.Type = t tr.Type = t gmove(&tl2, &tl) gmove(&tr2, &tr) } // Handle divide-by-zero panic. p1 := ginsbranch(mips.ABNE, nil, &tr, nil, 0) if panicdiv == nil { panicdiv = gc.Sysfunc("panicdivide") } gc.Ginscall(panicdiv, -1) gc.Patch(p1, gc.Pc) gins3(a, &tr, &tl, nil) gc.Regfree(&tr) if op == gc.ODIV { var lo gc.Node gc.Nodreg(&lo, gc.Types[gc.TUINT64], mips.REG_LO) gins(mips.AMOVV, &lo, &tl) } else { // remainder in REG_HI var hi gc.Node gc.Nodreg(&hi, gc.Types[gc.TUINT64], mips.REG_HI) gins(mips.AMOVV, &hi, &tl) } gmove(&tl, res) gc.Regfree(&tl) }
/* * generate high multiply: * res = (nl*nr) >> width */ func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) { t := nl.Type a := optoas(gc.OHMUL, t) if nl.Ullman < nr.Ullman { nl, nr = nr, nl } var n1 gc.Node gc.Cgenr(nl, &n1, res) var n2 gc.Node gc.Cgenr(nr, &n2, nil) var ax, oldax, dx, olddx gc.Node savex(x86.REG_AX, &ax, &oldax, res, gc.Types[gc.TUINT64]) savex(x86.REG_DX, &dx, &olddx, res, gc.Types[gc.TUINT64]) gmove(&n1, &ax) gins(a, &n2, nil) gc.Regfree(&n2) gc.Regfree(&n1) if t.Width == 1 { // byte multiply behaves differently. var byteAH, byteDX gc.Node gc.Nodreg(&byteAH, t, x86.REG_AH) gc.Nodreg(&byteDX, t, x86.REG_DX) gmove(&byteAH, &byteDX) } gmove(&dx, res) restx(&ax, &oldax) restx(&dx, &olddx) }
/* * generate array index into res. * n might be any size; res is 32-bit. * returns Prog* to patch to panic call. */ func cgenindex(n *gc.Node, res *gc.Node, bounded bool) *obj.Prog { if !gc.Is64(n.Type) { gc.Cgen(n, res) return nil } var tmp gc.Node gc.Tempname(&tmp, gc.Types[gc.TINT64]) gc.Cgen(n, &tmp) var lo gc.Node var hi gc.Node split64(&tmp, &lo, &hi) gmove(&lo, res) if bounded { splitclean() return nil } var n1 gc.Node gc.Regalloc(&n1, gc.Types[gc.TINT32], nil) var n2 gc.Node gc.Regalloc(&n2, gc.Types[gc.TINT32], nil) var zero gc.Node gc.Nodconst(&zero, gc.Types[gc.TINT32], 0) gmove(&hi, &n1) gmove(&zero, &n2) gins(arm.ACMP, &n1, &n2) gc.Regfree(&n2) gc.Regfree(&n1) splitclean() return gc.Gbranch(arm.ABNE, nil, -1) }
func ginscmp(op gc.Op, t *gc.Type, n1, n2 *gc.Node, likely int) *obj.Prog { if gc.Isint[t.Etype] && n1.Op == gc.OLITERAL && n1.Int() == 0 && n2.Op != gc.OLITERAL { op = gc.Brrev(op) n1, n2 = n2, n1 } var r1, r2, g1, g2 gc.Node gc.Regalloc(&r1, t, n1) gc.Regalloc(&g1, n1.Type, &r1) gc.Cgen(n1, &g1) gmove(&g1, &r1) if gc.Isint[t.Etype] && n2.Op == gc.OLITERAL && n2.Int() == 0 { gins(arm.ACMP, &r1, n2) } else { gc.Regalloc(&r2, t, n2) gc.Regalloc(&g2, n1.Type, &r2) gc.Cgen(n2, &g2) gmove(&g2, &r2) gins(optoas(gc.OCMP, t), &r1, &r2) gc.Regfree(&g2) gc.Regfree(&r2) } gc.Regfree(&g1) gc.Regfree(&r1) return gc.Gbranch(optoas(op, t), nil, likely) }
func ginscmp(op gc.Op, t *gc.Type, n1, n2 *gc.Node, likely int) *obj.Prog { if gc.Isint[t.Etype] && n1.Op == gc.OLITERAL && n2.Op != gc.OLITERAL { // Reverse comparison to place constant last. op = gc.Brrev(op) n1, n2 = n2, n1 } var r1, r2, g1, g2 gc.Node gc.Regalloc(&r1, t, n1) gc.Regalloc(&g1, n1.Type, &r1) gc.Cgen(n1, &g1) gmove(&g1, &r1) if gc.Isint[t.Etype] && gc.Isconst(n2, gc.CTINT) { ginscon2(optoas(gc.OCMP, t), &r1, n2.Int()) } else { gc.Regalloc(&r2, t, n2) gc.Regalloc(&g2, n1.Type, &r2) gc.Cgen(n2, &g2) gmove(&g2, &r2) rawgins(optoas(gc.OCMP, t), &r1, &r2) gc.Regfree(&g2) gc.Regfree(&r2) } gc.Regfree(&g1) gc.Regfree(&r1) return gc.Gbranch(optoas(op, t), nil, likely) }
/* * generate byte multiply: * res = nl * nr * there is no 2-operand byte multiply instruction so * we do a full-width multiplication and truncate afterwards. */ func cgen_bmul(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) bool { if optoas(op, nl.Type) != x86.AIMULB { return false } // copy from byte to full registers t := gc.Types[gc.TUINT32] if gc.Issigned[nl.Type.Etype] { t = gc.Types[gc.TINT32] } // largest ullman on left. if nl.Ullman < nr.Ullman { nl, nr = nr, nl } var nt gc.Node gc.Tempname(&nt, nl.Type) gc.Cgen(nl, &nt) var n1 gc.Node gc.Regalloc(&n1, t, res) gc.Cgen(nr, &n1) var n2 gc.Node gc.Regalloc(&n2, t, nil) gmove(&nt, &n2) a := optoas(op, t) gins(a, &n2, &n1) gc.Regfree(&n2) gmove(&n1, res) gc.Regfree(&n1) return true }
func sudoclean() { if clean[cleani-1].Op != gc.OEMPTY { gc.Regfree(&clean[cleani-1]) } if clean[cleani-2].Op != gc.OEMPTY { gc.Regfree(&clean[cleani-2]) } cleani -= 2 }
/* * generate high multiply: * res = (nl*nr) >> width */ func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) { // largest ullman on left. if nl.Ullman < nr.Ullman { nl, nr = nr, nl } t := (*gc.Type)(nl.Type) w := int(int(t.Width * 8)) var n1 gc.Node gc.Cgenr(nl, &n1, res) var n2 gc.Node gc.Cgenr(nr, &n2, nil) switch gc.Simtype[t.Etype] { case gc.TINT8, gc.TINT16, gc.TINT32: gins3(optoas(gc.OMUL, t), &n2, &n1, nil) var lo gc.Node gc.Nodreg(&lo, gc.Types[gc.TUINT64], mips.REG_LO) gins(mips.AMOVV, &lo, &n1) p := (*obj.Prog)(gins(mips.ASRAV, nil, &n1)) p.From.Type = obj.TYPE_CONST p.From.Offset = int64(w) case gc.TUINT8, gc.TUINT16, gc.TUINT32: gins3(optoas(gc.OMUL, t), &n2, &n1, nil) var lo gc.Node gc.Nodreg(&lo, gc.Types[gc.TUINT64], mips.REG_LO) gins(mips.AMOVV, &lo, &n1) p := (*obj.Prog)(gins(mips.ASRLV, nil, &n1)) p.From.Type = obj.TYPE_CONST p.From.Offset = int64(w) case gc.TINT64, gc.TUINT64: if gc.Issigned[t.Etype] { gins3(mips.AMULV, &n2, &n1, nil) } else { gins3(mips.AMULVU, &n2, &n1, nil) } var hi gc.Node gc.Nodreg(&hi, gc.Types[gc.TUINT64], mips.REG_HI) gins(mips.AMOVV, &hi, &n1) default: gc.Fatalf("cgen_hmul %v", t) } gc.Cgen(&n1, res) gc.Regfree(&n1) gc.Regfree(&n2) }
/* * generate high multiply * res = (nl * nr) >> wordsize */ func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) { if nl.Ullman < nr.Ullman { nl, nr = nr, nl } t := nl.Type w := int(t.Width * 8) var n1 gc.Node gc.Regalloc(&n1, t, res) gc.Cgen(nl, &n1) var n2 gc.Node gc.Regalloc(&n2, t, nil) gc.Cgen(nr, &n2) switch gc.Simtype[t.Etype] { case gc.TINT8, gc.TINT16: gins(optoas(gc.OMUL, t), &n2, &n1) gshift(arm.AMOVW, &n1, arm.SHIFT_AR, int32(w), &n1) case gc.TUINT8, gc.TUINT16: gins(optoas(gc.OMUL, t), &n2, &n1) gshift(arm.AMOVW, &n1, arm.SHIFT_LR, int32(w), &n1) // perform a long multiplication. case gc.TINT32, gc.TUINT32: var p *obj.Prog if gc.Issigned[t.Etype] { p = gins(arm.AMULL, &n2, nil) } else { p = gins(arm.AMULLU, &n2, nil) } // n2 * n1 -> (n1 n2) p.Reg = n1.Reg p.To.Type = obj.TYPE_REGREG p.To.Reg = n1.Reg p.To.Offset = int64(n2.Reg) default: gc.Fatalf("cgen_hmul %v", t) } gc.Cgen(&n1, res) gc.Regfree(&n1) gc.Regfree(&n2) }
/* * generate * as $c, n */ func ginscon(as int, c int64, n2 *gc.Node) { var n1 gc.Node switch as { case x86.AADDL, x86.AMOVL, x86.ALEAL: gc.Nodconst(&n1, gc.Types[gc.TINT32], c) default: gc.Nodconst(&n1, gc.Types[gc.TINT64], c) } if as != x86.AMOVQ && (c < -(1<<31) || c >= 1<<31) { // cannot have 64-bit immediate in ADD, etc. // instead, MOV into register first. var ntmp gc.Node gc.Regalloc(&ntmp, gc.Types[gc.TINT64], nil) gins(x86.AMOVQ, &n1, &ntmp) gins(as, &ntmp, n2) gc.Regfree(&ntmp) return } gins(as, &n1, n2) }
/* * generate * as n, $c (CMP/CMPU) */ func ginscon2(as int, n2 *gc.Node, c int64) { var n1 gc.Node gc.Nodconst(&n1, gc.Types[gc.TINT64], c) switch as { default: gc.Fatalf("ginscon2") case ppc64.ACMP: if -ppc64.BIG <= c && c <= ppc64.BIG { rawgins(as, n2, &n1) return } case ppc64.ACMPU: if 0 <= c && c <= 2*ppc64.BIG { rawgins(as, n2, &n1) return } } // MOV n1 into register first var ntmp gc.Node gc.Regalloc(&ntmp, gc.Types[gc.TINT64], nil) rawgins(ppc64.AMOVD, &n1, &ntmp) rawgins(as, n2, &ntmp) gc.Regfree(&ntmp) }
func restx(x *gc.Node, oldx *gc.Node) { if oldx.Op != 0 { x.Type = gc.Types[gc.TINT64] gc.SetReg(int(x.Reg), int(oldx.Etype)) gmove(oldx, x) gc.Regfree(oldx) } }
func ginscmp(op gc.Op, t *gc.Type, n1, n2 *gc.Node, likely int) *obj.Prog { if gc.Isint[t.Etype] || t.Etype == gc.Tptr { if (n1.Op == gc.OLITERAL || n1.Op == gc.OADDR && n1.Left.Op == gc.ONAME) && n2.Op != gc.OLITERAL { // Reverse comparison to place constant (including address constant) last. op = gc.Brrev(op) n1, n2 = n2, n1 } } // General case. var r1, r2, g1, g2 gc.Node // A special case to make write barriers more efficient. // Comparing the first field of a named struct can be done directly. base := n1 if n1.Op == gc.ODOT && n1.Left.Type.Etype == gc.TSTRUCT && n1.Left.Type.Type.Sym == n1.Right.Sym { base = n1.Left } if base.Op == gc.ONAME && base.Class&gc.PHEAP == 0 || n1.Op == gc.OINDREG { r1 = *n1 } else { gc.Regalloc(&r1, t, n1) gc.Regalloc(&g1, n1.Type, &r1) gc.Cgen(n1, &g1) gmove(&g1, &r1) } if n2.Op == gc.OLITERAL && gc.Isint[t.Etype] || n2.Op == gc.OADDR && n2.Left.Op == gc.ONAME && n2.Left.Class == gc.PEXTERN { r2 = *n2 } else { gc.Regalloc(&r2, t, n2) gc.Regalloc(&g2, n1.Type, &r2) gc.Cgen(n2, &g2) gmove(&g2, &r2) } gins(optoas(gc.OCMP, t), &r1, &r2) if r1.Op == gc.OREGISTER { gc.Regfree(&g1) gc.Regfree(&r1) } if r2.Op == gc.OREGISTER { gc.Regfree(&g2) gc.Regfree(&r2) } return gc.Gbranch(optoas(op, t), nil, likely) }
func restx(x *gc.Node, oldx *gc.Node) { gc.Regfree(x) if oldx.Op != 0 { x.Type = gc.Types[gc.TINT32] gmove(oldx, x) } }
/* * generate high multiply: * res = (nl*nr) >> width */ func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) { // largest ullman on left. if nl.Ullman < nr.Ullman { nl, nr = nr, nl } t := (*gc.Type)(nl.Type) w := int(int(t.Width * 8)) var n1 gc.Node gc.Cgenr(nl, &n1, res) var n2 gc.Node gc.Cgenr(nr, &n2, nil) switch gc.Simtype[t.Etype] { case gc.TINT8, gc.TINT16, gc.TINT32: gins(optoas(gc.OMUL, t), &n2, &n1) p := (*obj.Prog)(gins(arm64.AASR, nil, &n1)) p.From.Type = obj.TYPE_CONST p.From.Offset = int64(w) case gc.TUINT8, gc.TUINT16, gc.TUINT32: gins(optoas(gc.OMUL, t), &n2, &n1) p := (*obj.Prog)(gins(arm64.ALSR, nil, &n1)) p.From.Type = obj.TYPE_CONST p.From.Offset = int64(w) case gc.TINT64, gc.TUINT64: if gc.Issigned[t.Etype] { gins(arm64.ASMULH, &n2, &n1) } else { gins(arm64.AUMULH, &n2, &n1) } default: gc.Fatalf("cgen_hmul %v", t) } gc.Cgen(&n1, res) gc.Regfree(&n1) gc.Regfree(&n2) }
func splitclean() { if nsclean <= 0 { gc.Fatalf("splitclean") } nsclean-- if sclean[nsclean].Op != gc.OEMPTY { gc.Regfree(&sclean[nsclean]) } }
/* * generate * as $c, n */ func ginscon(as int, c int64, n *gc.Node) { var n1 gc.Node gc.Nodconst(&n1, gc.Types[gc.TINT32], c) var n2 gc.Node gc.Regalloc(&n2, gc.Types[gc.TINT32], nil) gmove(&n1, &n2) gins(as, &n2, n) gc.Regfree(&n2) }
/* * generate byte multiply: * res = nl * nr * there is no 2-operand byte multiply instruction so * we do a full-width multiplication and truncate afterwards. */ func cgen_bmul(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) bool { if optoas(op, nl.Type) != x86.AIMULB { return false } // largest ullman on left. if nl.Ullman < nr.Ullman { nl, nr = nr, nl } // generate operands in "8-bit" registers. var n1b gc.Node gc.Regalloc(&n1b, nl.Type, res) gc.Cgen(nl, &n1b) var n2b gc.Node gc.Regalloc(&n2b, nr.Type, nil) gc.Cgen(nr, &n2b) // perform full-width multiplication. t := gc.Types[gc.TUINT64] if gc.Issigned[nl.Type.Etype] { t = gc.Types[gc.TINT64] } var n1 gc.Node gc.Nodreg(&n1, t, int(n1b.Reg)) var n2 gc.Node gc.Nodreg(&n2, t, int(n2b.Reg)) a := optoas(op, t) gins(a, &n2, &n1) // truncate. gmove(&n1, res) gc.Regfree(&n1b) gc.Regfree(&n2b) return true }
// res = runtime.getg() func getg(res *gc.Node) { var n1 gc.Node gc.Regalloc(&n1, res.Type, res) mov := optoas(gc.OAS, gc.Types[gc.Tptr]) p := gins(mov, nil, &n1) p.From.Type = obj.TYPE_REG p.From.Reg = x86.REG_TLS p = gins(mov, nil, &n1) p.From = p.To p.From.Type = obj.TYPE_MEM p.From.Index = x86.REG_TLS p.From.Scale = 1 gmove(&n1, res) gc.Regfree(&n1) }
/* * generate * as $c, n */ func ginscon(as int, c int64, n2 *gc.Node) { var n1 gc.Node gc.Nodconst(&n1, gc.Types[gc.TINT64], c) if as != ppc64.AMOVD && (c < -ppc64.BIG || c > ppc64.BIG) || n2.Op != gc.OREGISTER || as == ppc64.AMULLD { // cannot have more than 16-bit of immediate in ADD, etc. // instead, MOV into register first. var ntmp gc.Node gc.Regalloc(&ntmp, gc.Types[gc.TINT64], nil) rawgins(ppc64.AMOVD, &n1, &ntmp) rawgins(as, &ntmp, n2) gc.Regfree(&ntmp) return } rawgins(as, &n1, n2) }
/* * generate high multiply: * res = (nl*nr) >> width */ func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) { var n1 gc.Node var n2 gc.Node t := nl.Type a := optoas(gc.OHMUL, t) // gen nl in n1. gc.Tempname(&n1, t) gc.Cgen(nl, &n1) // gen nr in n2. gc.Regalloc(&n2, t, res) gc.Cgen(nr, &n2) var ax, oldax, dx, olddx gc.Node savex(x86.REG_AX, &ax, &oldax, res, gc.Types[gc.TUINT32]) savex(x86.REG_DX, &dx, &olddx, res, gc.Types[gc.TUINT32]) gmove(&n2, &ax) gins(a, &n1, nil) gc.Regfree(&n2) if t.Width == 1 { // byte multiply behaves differently. var byteAH, byteDX gc.Node gc.Nodreg(&byteAH, t, x86.REG_AH) gc.Nodreg(&byteDX, t, x86.REG_DX) gmove(&byteAH, &byteDX) } gmove(&dx, res) restx(&ax, &oldax) restx(&dx, &olddx) }
func clearfat_tail(n1 *gc.Node, b int64) { if b >= 16 { var vec_zero gc.Node gc.Regalloc(&vec_zero, gc.Types[gc.TFLOAT64], nil) gins(x86.AXORPS, &vec_zero, &vec_zero) for b >= 16 { gins(x86.AMOVUPS, &vec_zero, n1) n1.Xoffset += 16 b -= 16 } // MOVUPS X0, off(base) is a few bytes shorter than MOV 0, off(base) if b != 0 { n1.Xoffset -= 16 - b gins(x86.AMOVUPS, &vec_zero, n1) } gc.Regfree(&vec_zero) return } // Write sequence of MOV 0, off(base) instead of using STOSQ. // The hope is that although the code will be slightly longer, // the MOVs will have no dependencies and pipeline better // than the unrolled STOSQ loop. var z gc.Node gc.Nodconst(&z, gc.Types[gc.TUINT64], 0) if b >= 8 { n1.Type = z.Type gins(x86.AMOVQ, &z, n1) n1.Xoffset += 8 b -= 8 if b != 0 { n1.Xoffset -= 8 - b gins(x86.AMOVQ, &z, n1) } return } if b >= 4 { gc.Nodconst(&z, gc.Types[gc.TUINT32], 0) n1.Type = z.Type gins(x86.AMOVL, &z, n1) n1.Xoffset += 4 b -= 4 if b != 0 { n1.Xoffset -= 4 - b gins(x86.AMOVL, &z, n1) } return } if b >= 2 { gc.Nodconst(&z, gc.Types[gc.TUINT16], 0) n1.Type = z.Type gins(x86.AMOVW, &z, n1) n1.Xoffset += 2 b -= 2 } gc.Nodconst(&z, gc.Types[gc.TUINT8], 0) for b > 0 { n1.Type = z.Type gins(x86.AMOVB, &z, n1) n1.Xoffset++ b-- } }
/* * generate move: * t = f * hard part is conversions. */ func gmove(f *gc.Node, t *gc.Node) { if gc.Debug['M'] != 0 { fmt.Printf("gmove %v -> %v\n", gc.Nconv(f, obj.FmtLong), gc.Nconv(t, obj.FmtLong)) } ft := int(gc.Simsimtype(f.Type)) tt := int(gc.Simsimtype(t.Type)) cvt := (*gc.Type)(t.Type) if gc.Iscomplex[ft] || gc.Iscomplex[tt] { gc.Complexmove(f, t) return } // cannot have two memory operands var r2 gc.Node var r1 gc.Node var a int if gc.Ismem(f) && gc.Ismem(t) { goto hard } // convert constant to desired type if f.Op == gc.OLITERAL { var con gc.Node switch tt { default: f.Convconst(&con, t.Type) case gc.TINT32, gc.TINT16, gc.TINT8: var con gc.Node f.Convconst(&con, gc.Types[gc.TINT64]) var r1 gc.Node gc.Regalloc(&r1, con.Type, t) gins(ppc64.AMOVD, &con, &r1) gmove(&r1, t) gc.Regfree(&r1) return case gc.TUINT32, gc.TUINT16, gc.TUINT8: var con gc.Node f.Convconst(&con, gc.Types[gc.TUINT64]) var r1 gc.Node gc.Regalloc(&r1, con.Type, t) gins(ppc64.AMOVD, &con, &r1) gmove(&r1, t) gc.Regfree(&r1) return } f = &con ft = tt // so big switch will choose a simple mov // constants can't move directly to memory. if gc.Ismem(t) { goto hard } } // float constants come from memory. //if(isfloat[tt]) // goto hard; // 64-bit immediates are also from memory. //if(isint[tt]) // goto hard; //// 64-bit immediates are really 32-bit sign-extended //// unless moving into a register. //if(isint[tt]) { // if(mpcmpfixfix(con.val.u.xval, minintval[TINT32]) < 0) // goto hard; // if(mpcmpfixfix(con.val.u.xval, maxintval[TINT32]) > 0) // goto hard; //} // value -> value copy, only one memory operand. // figure out the instruction to use. // break out of switch for one-instruction gins. // goto rdst for "destination must be register". // goto hard for "convert to cvt type first". // otherwise handle and return. switch uint32(ft)<<16 | uint32(tt) { default: gc.Fatalf("gmove %v -> %v", gc.Tconv(f.Type, obj.FmtLong), gc.Tconv(t.Type, obj.FmtLong)) /* * integer copy and truncate */ case gc.TINT8<<16 | gc.TINT8, // same size gc.TUINT8<<16 | gc.TINT8, gc.TINT16<<16 | gc.TINT8, // truncate gc.TUINT16<<16 | gc.TINT8, gc.TINT32<<16 | gc.TINT8, gc.TUINT32<<16 | gc.TINT8, gc.TINT64<<16 | gc.TINT8, gc.TUINT64<<16 | gc.TINT8: a = ppc64.AMOVB case gc.TINT8<<16 | gc.TUINT8, // same size gc.TUINT8<<16 | gc.TUINT8, gc.TINT16<<16 | gc.TUINT8, // truncate gc.TUINT16<<16 | gc.TUINT8, gc.TINT32<<16 | gc.TUINT8, gc.TUINT32<<16 | gc.TUINT8, gc.TINT64<<16 | gc.TUINT8, gc.TUINT64<<16 | gc.TUINT8: a = ppc64.AMOVBZ case gc.TINT16<<16 | gc.TINT16, // same size gc.TUINT16<<16 | gc.TINT16, gc.TINT32<<16 | gc.TINT16, // truncate gc.TUINT32<<16 | gc.TINT16, gc.TINT64<<16 | gc.TINT16, gc.TUINT64<<16 | gc.TINT16: a = ppc64.AMOVH case gc.TINT16<<16 | gc.TUINT16, // same size gc.TUINT16<<16 | gc.TUINT16, gc.TINT32<<16 | gc.TUINT16, // truncate gc.TUINT32<<16 | gc.TUINT16, gc.TINT64<<16 | gc.TUINT16, gc.TUINT64<<16 | gc.TUINT16: a = ppc64.AMOVHZ case gc.TINT32<<16 | gc.TINT32, // same size gc.TUINT32<<16 | gc.TINT32, gc.TINT64<<16 | gc.TINT32, // truncate gc.TUINT64<<16 | gc.TINT32: a = ppc64.AMOVW case gc.TINT32<<16 | gc.TUINT32, // same size gc.TUINT32<<16 | gc.TUINT32, gc.TINT64<<16 | gc.TUINT32, gc.TUINT64<<16 | gc.TUINT32: a = ppc64.AMOVWZ case gc.TINT64<<16 | gc.TINT64, // same size gc.TINT64<<16 | gc.TUINT64, gc.TUINT64<<16 | gc.TINT64, gc.TUINT64<<16 | gc.TUINT64: a = ppc64.AMOVD /* * integer up-conversions */ case gc.TINT8<<16 | gc.TINT16, // sign extend int8 gc.TINT8<<16 | gc.TUINT16, gc.TINT8<<16 | gc.TINT32, gc.TINT8<<16 | gc.TUINT32, gc.TINT8<<16 | gc.TINT64, gc.TINT8<<16 | gc.TUINT64: a = ppc64.AMOVB goto rdst case gc.TUINT8<<16 | gc.TINT16, // zero extend uint8 gc.TUINT8<<16 | gc.TUINT16, gc.TUINT8<<16 | gc.TINT32, gc.TUINT8<<16 | gc.TUINT32, gc.TUINT8<<16 | gc.TINT64, gc.TUINT8<<16 | gc.TUINT64: a = ppc64.AMOVBZ goto rdst case gc.TINT16<<16 | gc.TINT32, // sign extend int16 gc.TINT16<<16 | gc.TUINT32, gc.TINT16<<16 | gc.TINT64, gc.TINT16<<16 | gc.TUINT64: a = ppc64.AMOVH goto rdst case gc.TUINT16<<16 | gc.TINT32, // zero extend uint16 gc.TUINT16<<16 | gc.TUINT32, gc.TUINT16<<16 | gc.TINT64, gc.TUINT16<<16 | gc.TUINT64: a = ppc64.AMOVHZ goto rdst case gc.TINT32<<16 | gc.TINT64, // sign extend int32 gc.TINT32<<16 | gc.TUINT64: a = ppc64.AMOVW goto rdst case gc.TUINT32<<16 | gc.TINT64, // zero extend uint32 gc.TUINT32<<16 | gc.TUINT64: a = ppc64.AMOVWZ goto rdst //warn("gmove: convert float to int not implemented: %N -> %N\n", f, t); //return; // algorithm is: // if small enough, use native float64 -> int64 conversion. // otherwise, subtract 2^63, convert, and add it back. /* * float to integer */ case gc.TFLOAT32<<16 | gc.TINT32, gc.TFLOAT64<<16 | gc.TINT32, gc.TFLOAT32<<16 | gc.TINT64, gc.TFLOAT64<<16 | gc.TINT64, gc.TFLOAT32<<16 | gc.TINT16, gc.TFLOAT32<<16 | gc.TINT8, gc.TFLOAT32<<16 | gc.TUINT16, gc.TFLOAT32<<16 | gc.TUINT8, gc.TFLOAT64<<16 | gc.TINT16, gc.TFLOAT64<<16 | gc.TINT8, gc.TFLOAT64<<16 | gc.TUINT16, gc.TFLOAT64<<16 | gc.TUINT8, gc.TFLOAT32<<16 | gc.TUINT32, gc.TFLOAT64<<16 | gc.TUINT32, gc.TFLOAT32<<16 | gc.TUINT64, gc.TFLOAT64<<16 | gc.TUINT64: bignodes() var r1 gc.Node gc.Regalloc(&r1, gc.Types[ft], f) gmove(f, &r1) if tt == gc.TUINT64 { gc.Regalloc(&r2, gc.Types[gc.TFLOAT64], nil) gmove(&bigf, &r2) gins(ppc64.AFCMPU, &r1, &r2) p1 := (*obj.Prog)(gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TFLOAT64]), nil, +1)) gins(ppc64.AFSUB, &r2, &r1) gc.Patch(p1, gc.Pc) gc.Regfree(&r2) } gc.Regalloc(&r2, gc.Types[gc.TFLOAT64], nil) var r3 gc.Node gc.Regalloc(&r3, gc.Types[gc.TINT64], t) gins(ppc64.AFCTIDZ, &r1, &r2) p1 := (*obj.Prog)(gins(ppc64.AFMOVD, &r2, nil)) p1.To.Type = obj.TYPE_MEM p1.To.Reg = ppc64.REGSP p1.To.Offset = -8 p1 = gins(ppc64.AMOVD, nil, &r3) p1.From.Type = obj.TYPE_MEM p1.From.Reg = ppc64.REGSP p1.From.Offset = -8 gc.Regfree(&r2) gc.Regfree(&r1) if tt == gc.TUINT64 { p1 := (*obj.Prog)(gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TFLOAT64]), nil, +1)) // use CR0 here again gc.Nodreg(&r1, gc.Types[gc.TINT64], ppc64.REGTMP) gins(ppc64.AMOVD, &bigi, &r1) gins(ppc64.AADD, &r1, &r3) gc.Patch(p1, gc.Pc) } gmove(&r3, t) gc.Regfree(&r3) return //warn("gmove: convert int to float not implemented: %N -> %N\n", f, t); //return; // algorithm is: // if small enough, use native int64 -> uint64 conversion. // otherwise, halve (rounding to odd?), convert, and double. /* * integer to float */ case gc.TINT32<<16 | gc.TFLOAT32, gc.TINT32<<16 | gc.TFLOAT64, gc.TINT64<<16 | gc.TFLOAT32, gc.TINT64<<16 | gc.TFLOAT64, gc.TINT16<<16 | gc.TFLOAT32, gc.TINT16<<16 | gc.TFLOAT64, gc.TINT8<<16 | gc.TFLOAT32, gc.TINT8<<16 | gc.TFLOAT64, gc.TUINT16<<16 | gc.TFLOAT32, gc.TUINT16<<16 | gc.TFLOAT64, gc.TUINT8<<16 | gc.TFLOAT32, gc.TUINT8<<16 | gc.TFLOAT64, gc.TUINT32<<16 | gc.TFLOAT32, gc.TUINT32<<16 | gc.TFLOAT64, gc.TUINT64<<16 | gc.TFLOAT32, gc.TUINT64<<16 | gc.TFLOAT64: bignodes() var r1 gc.Node gc.Regalloc(&r1, gc.Types[gc.TINT64], nil) gmove(f, &r1) if ft == gc.TUINT64 { gc.Nodreg(&r2, gc.Types[gc.TUINT64], ppc64.REGTMP) gmove(&bigi, &r2) gins(ppc64.ACMPU, &r1, &r2) p1 := (*obj.Prog)(gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT64]), nil, +1)) p2 := (*obj.Prog)(gins(ppc64.ASRD, nil, &r1)) p2.From.Type = obj.TYPE_CONST p2.From.Offset = 1 gc.Patch(p1, gc.Pc) } gc.Regalloc(&r2, gc.Types[gc.TFLOAT64], t) p1 := (*obj.Prog)(gins(ppc64.AMOVD, &r1, nil)) p1.To.Type = obj.TYPE_MEM p1.To.Reg = ppc64.REGSP p1.To.Offset = -8 p1 = gins(ppc64.AFMOVD, nil, &r2) p1.From.Type = obj.TYPE_MEM p1.From.Reg = ppc64.REGSP p1.From.Offset = -8 gins(ppc64.AFCFID, &r2, &r2) gc.Regfree(&r1) if ft == gc.TUINT64 { p1 := (*obj.Prog)(gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT64]), nil, +1)) // use CR0 here again gc.Nodreg(&r1, gc.Types[gc.TFLOAT64], ppc64.FREGTWO) gins(ppc64.AFMUL, &r1, &r2) gc.Patch(p1, gc.Pc) } gmove(&r2, t) gc.Regfree(&r2) return /* * float to float */ case gc.TFLOAT32<<16 | gc.TFLOAT32: a = ppc64.AFMOVS case gc.TFLOAT64<<16 | gc.TFLOAT64: a = ppc64.AFMOVD case gc.TFLOAT32<<16 | gc.TFLOAT64: a = ppc64.AFMOVS goto rdst case gc.TFLOAT64<<16 | gc.TFLOAT32: a = ppc64.AFRSP goto rdst } gins(a, f, t) return // requires register destination rdst: { gc.Regalloc(&r1, t.Type, t) gins(a, f, &r1) gmove(&r1, t) gc.Regfree(&r1) return } // requires register intermediate hard: gc.Regalloc(&r1, cvt, t) gmove(f, &r1) gmove(&r1, t) gc.Regfree(&r1) return }
func clearfat(nl *gc.Node) { /* clear a fat object */ if gc.Debug['g'] != 0 { fmt.Printf("clearfat %v (%v, size: %d)\n", nl, nl.Type, nl.Type.Width) } w := uint64(uint64(nl.Type.Width)) // Avoid taking the address for simple enough types. if gc.Componentgen(nil, nl) { return } c := uint64(w % 8) // bytes q := uint64(w / 8) // dwords var r0 gc.Node gc.Nodreg(&r0, gc.Types[gc.TUINT64], arm64.REGZERO) var dst gc.Node // REGRT1 is reserved on arm64, see arm64/gsubr.go. gc.Nodreg(&dst, gc.Types[gc.Tptr], arm64.REGRT1) gc.Agen(nl, &dst) var boff uint64 if q > 128 { p := gins(arm64.ASUB, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 var end gc.Node gc.Regalloc(&end, gc.Types[gc.Tptr], nil) p = gins(arm64.AMOVD, &dst, &end) p.From.Type = obj.TYPE_ADDR p.From.Offset = int64(q * 8) p = gins(arm64.AMOVD, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = 8 p.Scond = arm64.C_XPRE pl := (*obj.Prog)(p) p = gcmp(arm64.ACMP, &dst, &end) gc.Patch(gc.Gbranch(arm64.ABNE, nil, 0), pl) gc.Regfree(&end) // The loop leaves R16 on the last zeroed dword boff = 8 } else if q >= 4 && !darwin { // darwin ld64 cannot handle BR26 reloc with non-zero addend p := gins(arm64.ASUB, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 f := (*gc.Node)(gc.Sysfunc("duffzero")) p = gins(obj.ADUFFZERO, nil, f) gc.Afunclit(&p.To, f) // 4 and 128 = magic constants: see ../../runtime/asm_arm64x.s p.To.Offset = int64(4 * (128 - q)) // duffzero leaves R16 on the last zeroed dword boff = 8 } else { var p *obj.Prog for t := uint64(0); t < q; t++ { p = gins(arm64.AMOVD, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(8 * t) } boff = 8 * q } var p *obj.Prog for t := uint64(0); t < c; t++ { p = gins(arm64.AMOVB, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(t + boff) } }
/* * generate shift according to op, one of: * res = nl << nr * res = nl >> nr */ func cgen_shift(op gc.Op, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) { a := int(optoas(op, nl.Type)) if nr.Op == gc.OLITERAL { var n1 gc.Node gc.Regalloc(&n1, nl.Type, res) gc.Cgen(nl, &n1) sc := uint64(nr.Int()) if sc >= uint64(nl.Type.Width*8) { // large shift gets 2 shifts by width-1 var n3 gc.Node gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1) gins(a, &n3, &n1) gins(a, &n3, &n1) } else { gins(a, nr, &n1) } gmove(&n1, res) gc.Regfree(&n1) return } if nl.Ullman >= gc.UINF { var n4 gc.Node gc.Tempname(&n4, nl.Type) gc.Cgen(nl, &n4) nl = &n4 } if nr.Ullman >= gc.UINF { var n5 gc.Node gc.Tempname(&n5, nr.Type) gc.Cgen(nr, &n5) nr = &n5 } // Allow either uint32 or uint64 as shift type, // to avoid unnecessary conversion from uint32 to uint64 // just to do the comparison. tcount := gc.Types[gc.Simtype[nr.Type.Etype]] if tcount.Etype < gc.TUINT32 { tcount = gc.Types[gc.TUINT32] } var n1 gc.Node gc.Regalloc(&n1, nr.Type, nil) // to hold the shift type in CX var n3 gc.Node gc.Regalloc(&n3, tcount, &n1) // to clear high bits of CX var n2 gc.Node gc.Regalloc(&n2, nl.Type, res) if nl.Ullman >= nr.Ullman { gc.Cgen(nl, &n2) gc.Cgen(nr, &n1) gmove(&n1, &n3) } else { gc.Cgen(nr, &n1) gmove(&n1, &n3) gc.Cgen(nl, &n2) } gc.Regfree(&n3) // test and fix up large shifts if !bounded { gc.Nodconst(&n3, tcount, nl.Type.Width*8) gcmp(optoas(gc.OCMP, tcount), &n1, &n3) p1 := (*obj.Prog)(gc.Gbranch(optoas(gc.OLT, tcount), nil, +1)) if op == gc.ORSH && gc.Issigned[nl.Type.Etype] { gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1) gins(a, &n3, &n2) } else { gc.Nodconst(&n3, nl.Type, 0) gmove(&n3, &n2) } gc.Patch(p1, gc.Pc) } gins(a, &n1, &n2) gmove(&n2, res) gc.Regfree(&n1) gc.Regfree(&n2) }
/* * generate division. * generates one of: * res = nl / nr * res = nl % nr * according to op. */ func dodiv(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) { // Have to be careful about handling // most negative int divided by -1 correctly. // The hardware will generate undefined result. // Also need to explicitly trap on division on zero, // the hardware will silently generate undefined result. // DIVW will leave unpredicable result in higher 32-bit, // so always use DIVD/DIVDU. t := nl.Type t0 := t check := false if gc.Issigned[t.Etype] { check = true if gc.Isconst(nl, gc.CTINT) && nl.Int() != -(1<<uint64(t.Width*8-1)) { check = false } else if gc.Isconst(nr, gc.CTINT) && nr.Int() != -1 { check = false } } if t.Width < 8 { if gc.Issigned[t.Etype] { t = gc.Types[gc.TINT64] } else { t = gc.Types[gc.TUINT64] } check = false } a := optoas(gc.ODIV, t) var tl gc.Node gc.Regalloc(&tl, t0, nil) var tr gc.Node gc.Regalloc(&tr, t0, nil) if nl.Ullman >= nr.Ullman { gc.Cgen(nl, &tl) gc.Cgen(nr, &tr) } else { gc.Cgen(nr, &tr) gc.Cgen(nl, &tl) } if t != t0 { // Convert tl2 := tl tr2 := tr tl.Type = t tr.Type = t gmove(&tl2, &tl) gmove(&tr2, &tr) } // Handle divide-by-zero panic. p1 := gins(optoas(gc.OCMP, t), &tr, nil) p1.Reg = arm64.REGZERO p1 = gc.Gbranch(optoas(gc.ONE, t), nil, +1) if panicdiv == nil { panicdiv = gc.Sysfunc("panicdivide") } gc.Ginscall(panicdiv, -1) gc.Patch(p1, gc.Pc) var p2 *obj.Prog if check { var nm1 gc.Node gc.Nodconst(&nm1, t, -1) gcmp(optoas(gc.OCMP, t), &tr, &nm1) p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1) if op == gc.ODIV { // a / (-1) is -a. gins(optoas(gc.OMINUS, t), &tl, &tl) gmove(&tl, res) } else { // a % (-1) is 0. var nz gc.Node gc.Nodconst(&nz, t, 0) gmove(&nz, res) } p2 = gc.Gbranch(obj.AJMP, nil, 0) gc.Patch(p1, gc.Pc) } p1 = gins(a, &tr, &tl) if op == gc.ODIV { gc.Regfree(&tr) gmove(&tl, res) } else { // A%B = A-(A/B*B) var tm gc.Node gc.Regalloc(&tm, t, nil) // patch div to use the 3 register form // TODO(minux): add gins3? p1.Reg = p1.To.Reg p1.To.Reg = tm.Reg gins(optoas(gc.OMUL, t), &tr, &tm) gc.Regfree(&tr) gins(optoas(gc.OSUB, t), &tm, &tl) gc.Regfree(&tm) gmove(&tl, res) } gc.Regfree(&tl) if check { gc.Patch(p2, gc.Pc) } }
func clearfat(nl *gc.Node) { /* clear a fat object */ if gc.Debug['g'] != 0 { fmt.Printf("clearfat %v (%v, size: %d)\n", nl, nl.Type, nl.Type.Width) } w := uint64(uint64(nl.Type.Width)) // Avoid taking the address for simple enough types. if gc.Componentgen(nil, nl) { return } c := uint64(w % 8) // bytes q := uint64(w / 8) // dwords if gc.Reginuse(mips.REGRT1) { gc.Fatalf("%v in use during clearfat", obj.Rconv(mips.REGRT1)) } var r0 gc.Node gc.Nodreg(&r0, gc.Types[gc.TUINT64], mips.REGZERO) var dst gc.Node gc.Nodreg(&dst, gc.Types[gc.Tptr], mips.REGRT1) gc.Regrealloc(&dst) gc.Agen(nl, &dst) var boff uint64 if q > 128 { p := gins(mips.ASUBV, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 var end gc.Node gc.Regalloc(&end, gc.Types[gc.Tptr], nil) p = gins(mips.AMOVV, &dst, &end) p.From.Type = obj.TYPE_ADDR p.From.Offset = int64(q * 8) p = gins(mips.AMOVV, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = 8 pl := (*obj.Prog)(p) p = gins(mips.AADDV, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 gc.Patch(ginsbranch(mips.ABNE, nil, &dst, &end, 0), pl) gc.Regfree(&end) // The loop leaves R1 on the last zeroed dword boff = 8 // TODO(dfc): https://golang.org/issue/12108 // If DUFFZERO is used inside a tail call (see genwrapper) it will // overwrite the link register. } else if false && q >= 4 { p := gins(mips.ASUBV, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 f := (*gc.Node)(gc.Sysfunc("duffzero")) p = gins(obj.ADUFFZERO, nil, f) gc.Afunclit(&p.To, f) // 8 and 128 = magic constants: see ../../runtime/asm_mips64x.s p.To.Offset = int64(8 * (128 - q)) // duffzero leaves R1 on the last zeroed dword boff = 8 } else { var p *obj.Prog for t := uint64(0); t < q; t++ { p = gins(mips.AMOVV, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(8 * t) } boff = 8 * q } var p *obj.Prog for t := uint64(0); t < c; t++ { p = gins(mips.AMOVB, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(t + boff) } gc.Regfree(&dst) }
/* * generate comparison of nl, nr, both 64-bit. * nl is memory; nr is constant or memory. */ func cmp64(nl *gc.Node, nr *gc.Node, op gc.Op, likely int, to *obj.Prog) { var lo1 gc.Node var hi1 gc.Node var lo2 gc.Node var hi2 gc.Node var r1 gc.Node var r2 gc.Node split64(nl, &lo1, &hi1) split64(nr, &lo2, &hi2) // compare most significant word; // if they differ, we're done. t := hi1.Type gc.Regalloc(&r1, gc.Types[gc.TINT32], nil) gc.Regalloc(&r2, gc.Types[gc.TINT32], nil) gins(arm.AMOVW, &hi1, &r1) gins(arm.AMOVW, &hi2, &r2) gins(arm.ACMP, &r1, &r2) gc.Regfree(&r1) gc.Regfree(&r2) var br *obj.Prog switch op { default: gc.Fatalf("cmp64 %v %v", gc.Oconv(int(op), 0), t) // cmp hi // bne L // cmp lo // beq to // L: case gc.OEQ: br = gc.Gbranch(arm.ABNE, nil, -likely) // cmp hi // bne to // cmp lo // bne to case gc.ONE: gc.Patch(gc.Gbranch(arm.ABNE, nil, likely), to) // cmp hi // bgt to // blt L // cmp lo // bge to (or bgt to) // L: case gc.OGE, gc.OGT: gc.Patch(gc.Gbranch(optoas(gc.OGT, t), nil, likely), to) br = gc.Gbranch(optoas(gc.OLT, t), nil, -likely) // cmp hi // blt to // bgt L // cmp lo // ble to (or jlt to) // L: case gc.OLE, gc.OLT: gc.Patch(gc.Gbranch(optoas(gc.OLT, t), nil, likely), to) br = gc.Gbranch(optoas(gc.OGT, t), nil, -likely) } // compare least significant word t = lo1.Type gc.Regalloc(&r1, gc.Types[gc.TINT32], nil) gc.Regalloc(&r2, gc.Types[gc.TINT32], nil) gins(arm.AMOVW, &lo1, &r1) gins(arm.AMOVW, &lo2, &r2) gins(arm.ACMP, &r1, &r2) gc.Regfree(&r1) gc.Regfree(&r2) // jump again gc.Patch(gc.Gbranch(optoas(op, t), nil, likely), to) // point first branch down here if appropriate if br != nil { gc.Patch(br, gc.Pc) } splitclean() splitclean() }
/* * attempt to generate 64-bit * res = n * return 1 on success, 0 if op not handled. */ func cgen64(n *gc.Node, res *gc.Node) { if res.Op != gc.OINDREG && res.Op != gc.ONAME { gc.Dump("n", n) gc.Dump("res", res) gc.Fatalf("cgen64 %v of %v", gc.Oconv(int(n.Op), 0), gc.Oconv(int(res.Op), 0)) } l := n.Left var t1 gc.Node if !l.Addable { gc.Tempname(&t1, l.Type) gc.Cgen(l, &t1) l = &t1 } var hi1 gc.Node var lo1 gc.Node split64(l, &lo1, &hi1) switch n.Op { default: gc.Fatalf("cgen64 %v", gc.Oconv(int(n.Op), 0)) case gc.OMINUS: var lo2 gc.Node var hi2 gc.Node split64(res, &lo2, &hi2) gc.Regalloc(&t1, lo1.Type, nil) var al gc.Node gc.Regalloc(&al, lo1.Type, nil) var ah gc.Node gc.Regalloc(&ah, hi1.Type, nil) gins(arm.AMOVW, &lo1, &al) gins(arm.AMOVW, &hi1, &ah) gmove(ncon(0), &t1) p1 := gins(arm.ASUB, &al, &t1) p1.Scond |= arm.C_SBIT gins(arm.AMOVW, &t1, &lo2) gmove(ncon(0), &t1) gins(arm.ASBC, &ah, &t1) gins(arm.AMOVW, &t1, &hi2) gc.Regfree(&t1) gc.Regfree(&al) gc.Regfree(&ah) splitclean() splitclean() return case gc.OCOM: gc.Regalloc(&t1, lo1.Type, nil) gmove(ncon(^uint32(0)), &t1) var lo2 gc.Node var hi2 gc.Node split64(res, &lo2, &hi2) var n1 gc.Node gc.Regalloc(&n1, lo1.Type, nil) gins(arm.AMOVW, &lo1, &n1) gins(arm.AEOR, &t1, &n1) gins(arm.AMOVW, &n1, &lo2) gins(arm.AMOVW, &hi1, &n1) gins(arm.AEOR, &t1, &n1) gins(arm.AMOVW, &n1, &hi2) gc.Regfree(&t1) gc.Regfree(&n1) splitclean() splitclean() return // binary operators. // common setup below. case gc.OADD, gc.OSUB, gc.OMUL, gc.OLSH, gc.ORSH, gc.OAND, gc.OOR, gc.OXOR, gc.OLROT: break } // setup for binary operators r := n.Right if r != nil && !r.Addable { var t2 gc.Node gc.Tempname(&t2, r.Type) gc.Cgen(r, &t2) r = &t2 } var hi2 gc.Node var lo2 gc.Node if gc.Is64(r.Type) { split64(r, &lo2, &hi2) } var al gc.Node gc.Regalloc(&al, lo1.Type, nil) var ah gc.Node gc.Regalloc(&ah, hi1.Type, nil) // Do op. Leave result in ah:al. switch n.Op { default: gc.Fatalf("cgen64: not implemented: %v\n", n) // TODO: Constants case gc.OADD: var bl gc.Node gc.Regalloc(&bl, gc.Types[gc.TPTR32], nil) var bh gc.Node gc.Regalloc(&bh, gc.Types[gc.TPTR32], nil) gins(arm.AMOVW, &hi1, &ah) gins(arm.AMOVW, &lo1, &al) gins(arm.AMOVW, &hi2, &bh) gins(arm.AMOVW, &lo2, &bl) p1 := gins(arm.AADD, &bl, &al) p1.Scond |= arm.C_SBIT gins(arm.AADC, &bh, &ah) gc.Regfree(&bl) gc.Regfree(&bh) // TODO: Constants. case gc.OSUB: var bl gc.Node gc.Regalloc(&bl, gc.Types[gc.TPTR32], nil) var bh gc.Node gc.Regalloc(&bh, gc.Types[gc.TPTR32], nil) gins(arm.AMOVW, &lo1, &al) gins(arm.AMOVW, &hi1, &ah) gins(arm.AMOVW, &lo2, &bl) gins(arm.AMOVW, &hi2, &bh) p1 := gins(arm.ASUB, &bl, &al) p1.Scond |= arm.C_SBIT gins(arm.ASBC, &bh, &ah) gc.Regfree(&bl) gc.Regfree(&bh) // TODO(kaib): this can be done with 4 regs and does not need 6 case gc.OMUL: var bl gc.Node gc.Regalloc(&bl, gc.Types[gc.TPTR32], nil) var bh gc.Node gc.Regalloc(&bh, gc.Types[gc.TPTR32], nil) var cl gc.Node gc.Regalloc(&cl, gc.Types[gc.TPTR32], nil) var ch gc.Node gc.Regalloc(&ch, gc.Types[gc.TPTR32], nil) // load args into bh:bl and bh:bl. gins(arm.AMOVW, &hi1, &bh) gins(arm.AMOVW, &lo1, &bl) gins(arm.AMOVW, &hi2, &ch) gins(arm.AMOVW, &lo2, &cl) // bl * cl -> ah al p1 := gins(arm.AMULLU, nil, nil) p1.From.Type = obj.TYPE_REG p1.From.Reg = bl.Reg p1.Reg = cl.Reg p1.To.Type = obj.TYPE_REGREG p1.To.Reg = ah.Reg p1.To.Offset = int64(al.Reg) //print("%v\n", p1); // bl * ch + ah -> ah p1 = gins(arm.AMULA, nil, nil) p1.From.Type = obj.TYPE_REG p1.From.Reg = bl.Reg p1.Reg = ch.Reg p1.To.Type = obj.TYPE_REGREG2 p1.To.Reg = ah.Reg p1.To.Offset = int64(ah.Reg) //print("%v\n", p1); // bh * cl + ah -> ah p1 = gins(arm.AMULA, nil, nil) p1.From.Type = obj.TYPE_REG p1.From.Reg = bh.Reg p1.Reg = cl.Reg p1.To.Type = obj.TYPE_REGREG2 p1.To.Reg = ah.Reg p1.To.Offset = int64(ah.Reg) //print("%v\n", p1); gc.Regfree(&bh) gc.Regfree(&bl) gc.Regfree(&ch) gc.Regfree(&cl) // We only rotate by a constant c in [0,64). // if c >= 32: // lo, hi = hi, lo // c -= 32 // if c == 0: // no-op // else: // t = hi // shld hi:lo, c // shld lo:t, c case gc.OLROT: v := uint64(r.Int()) var bl gc.Node gc.Regalloc(&bl, lo1.Type, nil) var bh gc.Node gc.Regalloc(&bh, hi1.Type, nil) if v >= 32 { // reverse during load to do the first 32 bits of rotate v -= 32 gins(arm.AMOVW, &hi1, &bl) gins(arm.AMOVW, &lo1, &bh) } else { gins(arm.AMOVW, &hi1, &bh) gins(arm.AMOVW, &lo1, &bl) } if v == 0 { gins(arm.AMOVW, &bh, &ah) gins(arm.AMOVW, &bl, &al) } else { // rotate by 1 <= v <= 31 // MOVW bl<<v, al // MOVW bh<<v, ah // OR bl>>(32-v), ah // OR bh>>(32-v), al gshift(arm.AMOVW, &bl, arm.SHIFT_LL, int32(v), &al) gshift(arm.AMOVW, &bh, arm.SHIFT_LL, int32(v), &ah) gshift(arm.AORR, &bl, arm.SHIFT_LR, int32(32-v), &ah) gshift(arm.AORR, &bh, arm.SHIFT_LR, int32(32-v), &al) } gc.Regfree(&bl) gc.Regfree(&bh) case gc.OLSH: var bl gc.Node gc.Regalloc(&bl, lo1.Type, nil) var bh gc.Node gc.Regalloc(&bh, hi1.Type, nil) gins(arm.AMOVW, &hi1, &bh) gins(arm.AMOVW, &lo1, &bl) var p6 *obj.Prog var s gc.Node var n1 gc.Node var creg gc.Node var p1 *obj.Prog var p2 *obj.Prog var p3 *obj.Prog var p4 *obj.Prog var p5 *obj.Prog if r.Op == gc.OLITERAL { v := uint64(r.Int()) if v >= 64 { // TODO(kaib): replace with gins(AMOVW, nodintconst(0), &al) // here and below (verify it optimizes to EOR) gins(arm.AEOR, &al, &al) gins(arm.AEOR, &ah, &ah) } else if v > 32 { gins(arm.AEOR, &al, &al) // MOVW bl<<(v-32), ah gshift(arm.AMOVW, &bl, arm.SHIFT_LL, int32(v-32), &ah) } else if v == 32 { gins(arm.AEOR, &al, &al) gins(arm.AMOVW, &bl, &ah) } else if v > 0 { // MOVW bl<<v, al gshift(arm.AMOVW, &bl, arm.SHIFT_LL, int32(v), &al) // MOVW bh<<v, ah gshift(arm.AMOVW, &bh, arm.SHIFT_LL, int32(v), &ah) // OR bl>>(32-v), ah gshift(arm.AORR, &bl, arm.SHIFT_LR, int32(32-v), &ah) } else { gins(arm.AMOVW, &bl, &al) gins(arm.AMOVW, &bh, &ah) } goto olsh_break } gc.Regalloc(&s, gc.Types[gc.TUINT32], nil) gc.Regalloc(&creg, gc.Types[gc.TUINT32], nil) if gc.Is64(r.Type) { // shift is >= 1<<32 var cl gc.Node var ch gc.Node split64(r, &cl, &ch) gmove(&ch, &s) gins(arm.ATST, &s, nil) p6 = gc.Gbranch(arm.ABNE, nil, 0) gmove(&cl, &s) splitclean() } else { gmove(r, &s) p6 = nil } gins(arm.ATST, &s, nil) // shift == 0 p1 = gins(arm.AMOVW, &bl, &al) p1.Scond = arm.C_SCOND_EQ p1 = gins(arm.AMOVW, &bh, &ah) p1.Scond = arm.C_SCOND_EQ p2 = gc.Gbranch(arm.ABEQ, nil, 0) // shift is < 32 gc.Nodconst(&n1, gc.Types[gc.TUINT32], 32) gmove(&n1, &creg) gins(arm.ACMP, &s, &creg) // MOVW.LO bl<<s, al p1 = gregshift(arm.AMOVW, &bl, arm.SHIFT_LL, &s, &al) p1.Scond = arm.C_SCOND_LO // MOVW.LO bh<<s, ah p1 = gregshift(arm.AMOVW, &bh, arm.SHIFT_LL, &s, &ah) p1.Scond = arm.C_SCOND_LO // SUB.LO s, creg p1 = gins(arm.ASUB, &s, &creg) p1.Scond = arm.C_SCOND_LO // OR.LO bl>>creg, ah p1 = gregshift(arm.AORR, &bl, arm.SHIFT_LR, &creg, &ah) p1.Scond = arm.C_SCOND_LO // BLO end p3 = gc.Gbranch(arm.ABLO, nil, 0) // shift == 32 p1 = gins(arm.AEOR, &al, &al) p1.Scond = arm.C_SCOND_EQ p1 = gins(arm.AMOVW, &bl, &ah) p1.Scond = arm.C_SCOND_EQ p4 = gc.Gbranch(arm.ABEQ, nil, 0) // shift is < 64 gc.Nodconst(&n1, gc.Types[gc.TUINT32], 64) gmove(&n1, &creg) gins(arm.ACMP, &s, &creg) // EOR.LO al, al p1 = gins(arm.AEOR, &al, &al) p1.Scond = arm.C_SCOND_LO // MOVW.LO creg>>1, creg p1 = gshift(arm.AMOVW, &creg, arm.SHIFT_LR, 1, &creg) p1.Scond = arm.C_SCOND_LO // SUB.LO creg, s p1 = gins(arm.ASUB, &creg, &s) p1.Scond = arm.C_SCOND_LO // MOVW bl<<s, ah p1 = gregshift(arm.AMOVW, &bl, arm.SHIFT_LL, &s, &ah) p1.Scond = arm.C_SCOND_LO p5 = gc.Gbranch(arm.ABLO, nil, 0) // shift >= 64 if p6 != nil { gc.Patch(p6, gc.Pc) } gins(arm.AEOR, &al, &al) gins(arm.AEOR, &ah, &ah) gc.Patch(p2, gc.Pc) gc.Patch(p3, gc.Pc) gc.Patch(p4, gc.Pc) gc.Patch(p5, gc.Pc) gc.Regfree(&s) gc.Regfree(&creg) olsh_break: gc.Regfree(&bl) gc.Regfree(&bh) case gc.ORSH: var bl gc.Node gc.Regalloc(&bl, lo1.Type, nil) var bh gc.Node gc.Regalloc(&bh, hi1.Type, nil) gins(arm.AMOVW, &hi1, &bh) gins(arm.AMOVW, &lo1, &bl) var p4 *obj.Prog var p5 *obj.Prog var n1 gc.Node var p6 *obj.Prog var s gc.Node var p1 *obj.Prog var p2 *obj.Prog var creg gc.Node var p3 *obj.Prog if r.Op == gc.OLITERAL { v := uint64(r.Int()) if v >= 64 { if bh.Type.Etype == gc.TINT32 { // MOVW bh->31, al gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &al) // MOVW bh->31, ah gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &ah) } else { gins(arm.AEOR, &al, &al) gins(arm.AEOR, &ah, &ah) } } else if v > 32 { if bh.Type.Etype == gc.TINT32 { // MOVW bh->(v-32), al gshift(arm.AMOVW, &bh, arm.SHIFT_AR, int32(v-32), &al) // MOVW bh->31, ah gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &ah) } else { // MOVW bh>>(v-32), al gshift(arm.AMOVW, &bh, arm.SHIFT_LR, int32(v-32), &al) gins(arm.AEOR, &ah, &ah) } } else if v == 32 { gins(arm.AMOVW, &bh, &al) if bh.Type.Etype == gc.TINT32 { // MOVW bh->31, ah gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &ah) } else { gins(arm.AEOR, &ah, &ah) } } else if v > 0 { // MOVW bl>>v, al gshift(arm.AMOVW, &bl, arm.SHIFT_LR, int32(v), &al) // OR bh<<(32-v), al gshift(arm.AORR, &bh, arm.SHIFT_LL, int32(32-v), &al) if bh.Type.Etype == gc.TINT32 { // MOVW bh->v, ah gshift(arm.AMOVW, &bh, arm.SHIFT_AR, int32(v), &ah) } else { // MOVW bh>>v, ah gshift(arm.AMOVW, &bh, arm.SHIFT_LR, int32(v), &ah) } } else { gins(arm.AMOVW, &bl, &al) gins(arm.AMOVW, &bh, &ah) } goto orsh_break } gc.Regalloc(&s, gc.Types[gc.TUINT32], nil) gc.Regalloc(&creg, gc.Types[gc.TUINT32], nil) if gc.Is64(r.Type) { // shift is >= 1<<32 var ch gc.Node var cl gc.Node split64(r, &cl, &ch) gmove(&ch, &s) gins(arm.ATST, &s, nil) var p1 *obj.Prog if bh.Type.Etype == gc.TINT32 { p1 = gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &ah) } else { p1 = gins(arm.AEOR, &ah, &ah) } p1.Scond = arm.C_SCOND_NE p6 = gc.Gbranch(arm.ABNE, nil, 0) gmove(&cl, &s) splitclean() } else { gmove(r, &s) p6 = nil } gins(arm.ATST, &s, nil) // shift == 0 p1 = gins(arm.AMOVW, &bl, &al) p1.Scond = arm.C_SCOND_EQ p1 = gins(arm.AMOVW, &bh, &ah) p1.Scond = arm.C_SCOND_EQ p2 = gc.Gbranch(arm.ABEQ, nil, 0) // check if shift is < 32 gc.Nodconst(&n1, gc.Types[gc.TUINT32], 32) gmove(&n1, &creg) gins(arm.ACMP, &s, &creg) // MOVW.LO bl>>s, al p1 = gregshift(arm.AMOVW, &bl, arm.SHIFT_LR, &s, &al) p1.Scond = arm.C_SCOND_LO // SUB.LO s,creg p1 = gins(arm.ASUB, &s, &creg) p1.Scond = arm.C_SCOND_LO // OR.LO bh<<(32-s), al p1 = gregshift(arm.AORR, &bh, arm.SHIFT_LL, &creg, &al) p1.Scond = arm.C_SCOND_LO if bh.Type.Etype == gc.TINT32 { // MOVW bh->s, ah p1 = gregshift(arm.AMOVW, &bh, arm.SHIFT_AR, &s, &ah) } else { // MOVW bh>>s, ah p1 = gregshift(arm.AMOVW, &bh, arm.SHIFT_LR, &s, &ah) } p1.Scond = arm.C_SCOND_LO // BLO end p3 = gc.Gbranch(arm.ABLO, nil, 0) // shift == 32 p1 = gins(arm.AMOVW, &bh, &al) p1.Scond = arm.C_SCOND_EQ if bh.Type.Etype == gc.TINT32 { gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &ah) } else { gins(arm.AEOR, &ah, &ah) } p4 = gc.Gbranch(arm.ABEQ, nil, 0) // check if shift is < 64 gc.Nodconst(&n1, gc.Types[gc.TUINT32], 64) gmove(&n1, &creg) gins(arm.ACMP, &s, &creg) // MOVW.LO creg>>1, creg p1 = gshift(arm.AMOVW, &creg, arm.SHIFT_LR, 1, &creg) p1.Scond = arm.C_SCOND_LO // SUB.LO creg, s p1 = gins(arm.ASUB, &creg, &s) p1.Scond = arm.C_SCOND_LO if bh.Type.Etype == gc.TINT32 { // MOVW bh->(s-32), al p1 := gregshift(arm.AMOVW, &bh, arm.SHIFT_AR, &s, &al) p1.Scond = arm.C_SCOND_LO } else { // MOVW bh>>(v-32), al p1 := gregshift(arm.AMOVW, &bh, arm.SHIFT_LR, &s, &al) p1.Scond = arm.C_SCOND_LO } // BLO end p5 = gc.Gbranch(arm.ABLO, nil, 0) // s >= 64 if p6 != nil { gc.Patch(p6, gc.Pc) } if bh.Type.Etype == gc.TINT32 { // MOVW bh->31, al gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &al) } else { gins(arm.AEOR, &al, &al) } gc.Patch(p2, gc.Pc) gc.Patch(p3, gc.Pc) gc.Patch(p4, gc.Pc) gc.Patch(p5, gc.Pc) gc.Regfree(&s) gc.Regfree(&creg) orsh_break: gc.Regfree(&bl) gc.Regfree(&bh) // TODO(kaib): literal optimizations // make constant the right side (it usually is anyway). // if(lo1.op == OLITERAL) { // nswap(&lo1, &lo2); // nswap(&hi1, &hi2); // } // if(lo2.op == OLITERAL) { // // special cases for constants. // lv = mpgetfix(lo2.val.u.xval); // hv = mpgetfix(hi2.val.u.xval); // splitclean(); // right side // split64(res, &lo2, &hi2); // switch(n->op) { // case OXOR: // gmove(&lo1, &lo2); // gmove(&hi1, &hi2); // switch(lv) { // case 0: // break; // case 0xffffffffu: // gins(ANOTL, N, &lo2); // break; // default: // gins(AXORL, ncon(lv), &lo2); // break; // } // switch(hv) { // case 0: // break; // case 0xffffffffu: // gins(ANOTL, N, &hi2); // break; // default: // gins(AXORL, ncon(hv), &hi2); // break; // } // break; // case OAND: // switch(lv) { // case 0: // gins(AMOVL, ncon(0), &lo2); // break; // default: // gmove(&lo1, &lo2); // if(lv != 0xffffffffu) // gins(AANDL, ncon(lv), &lo2); // break; // } // switch(hv) { // case 0: // gins(AMOVL, ncon(0), &hi2); // break; // default: // gmove(&hi1, &hi2); // if(hv != 0xffffffffu) // gins(AANDL, ncon(hv), &hi2); // break; // } // break; // case OOR: // switch(lv) { // case 0: // gmove(&lo1, &lo2); // break; // case 0xffffffffu: // gins(AMOVL, ncon(0xffffffffu), &lo2); // break; // default: // gmove(&lo1, &lo2); // gins(AORL, ncon(lv), &lo2); // break; // } // switch(hv) { // case 0: // gmove(&hi1, &hi2); // break; // case 0xffffffffu: // gins(AMOVL, ncon(0xffffffffu), &hi2); // break; // default: // gmove(&hi1, &hi2); // gins(AORL, ncon(hv), &hi2); // break; // } // break; // } // splitclean(); // splitclean(); // goto out; // } case gc.OXOR, gc.OAND, gc.OOR: var n1 gc.Node gc.Regalloc(&n1, lo1.Type, nil) gins(arm.AMOVW, &lo1, &al) gins(arm.AMOVW, &hi1, &ah) gins(arm.AMOVW, &lo2, &n1) gins(optoas(n.Op, lo1.Type), &n1, &al) gins(arm.AMOVW, &hi2, &n1) gins(optoas(n.Op, lo1.Type), &n1, &ah) gc.Regfree(&n1) } if gc.Is64(r.Type) { splitclean() } splitclean() split64(res, &lo1, &hi1) gins(arm.AMOVW, &al, &lo1) gins(arm.AMOVW, &ah, &hi1) splitclean() //out: gc.Regfree(&al) gc.Regfree(&ah) }
func clearfat(nl *gc.Node) { /* clear a fat object */ if gc.Debug['g'] != 0 { gc.Dump("\nclearfat", nl) } // Avoid taking the address for simple enough types. if gc.Componentgen(nil, nl) { return } w := nl.Type.Width if w > 1024 || (gc.Nacl && w >= 64) { var oldn1 gc.Node var n1 gc.Node savex(x86.REG_DI, &n1, &oldn1, nil, gc.Types[gc.Tptr]) gc.Agen(nl, &n1) var ax gc.Node var oldax gc.Node savex(x86.REG_AX, &ax, &oldax, nil, gc.Types[gc.Tptr]) gconreg(x86.AMOVL, 0, x86.REG_AX) gconreg(movptr, w/8, x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.ASTOSQ, nil, nil) // STOQ AL,*(DI)+ if w%8 != 0 { n1.Op = gc.OINDREG clearfat_tail(&n1, w%8) } restx(&n1, &oldn1) restx(&ax, &oldax) return } if w >= 64 { var oldn1 gc.Node var n1 gc.Node savex(x86.REG_DI, &n1, &oldn1, nil, gc.Types[gc.Tptr]) gc.Agen(nl, &n1) var vec_zero gc.Node var old_x0 gc.Node savex(x86.REG_X0, &vec_zero, &old_x0, nil, gc.Types[gc.TFLOAT64]) gins(x86.AXORPS, &vec_zero, &vec_zero) if di := dzDI(w); di != 0 { gconreg(addptr, di, x86.REG_DI) } p := gins(obj.ADUFFZERO, nil, nil) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) p.To.Offset = dzOff(w) if w%16 != 0 { n1.Op = gc.OINDREG n1.Xoffset -= 16 - w%16 gins(x86.AMOVUPS, &vec_zero, &n1) } restx(&vec_zero, &old_x0) restx(&n1, &oldn1) return } // NOTE: Must use agen, not igen, so that optimizer sees address // being taken. We are not writing on field boundaries. var n1 gc.Node gc.Agenr(nl, &n1, nil) n1.Op = gc.OINDREG clearfat_tail(&n1, w) gc.Regfree(&n1) }