func zerorange(p *obj.Prog, frame int64, lo int64, hi int64) *obj.Prog { cnt := hi - lo if cnt == 0 { return p } if cnt < int64(4*gc.Widthptr) { for i := int64(0); i < cnt; i += int64(gc.Widthptr) { p = appendpp(p, ppc64.AMOVD, obj.TYPE_REG, ppc64.REGZERO, 0, obj.TYPE_MEM, ppc64.REGSP, 8+frame+lo+i) } } else if cnt <= int64(128*gc.Widthptr) { p = appendpp(p, ppc64.AADD, obj.TYPE_CONST, 0, 8+frame+lo-8, obj.TYPE_REG, ppc64.REGRT1, 0) p.Reg = ppc64.REGSP p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0) f := gc.Sysfunc("duffzero") gc.Naddr(&p.To, f) gc.Afunclit(&p.To, f) p.To.Offset = 4 * (128 - cnt/int64(gc.Widthptr)) } else { p = appendpp(p, ppc64.AMOVD, obj.TYPE_CONST, 0, 8+frame+lo-8, obj.TYPE_REG, ppc64.REGTMP, 0) p = appendpp(p, ppc64.AADD, obj.TYPE_REG, ppc64.REGTMP, 0, obj.TYPE_REG, ppc64.REGRT1, 0) p.Reg = ppc64.REGSP p = appendpp(p, ppc64.AMOVD, obj.TYPE_CONST, 0, cnt, obj.TYPE_REG, ppc64.REGTMP, 0) p = appendpp(p, ppc64.AADD, obj.TYPE_REG, ppc64.REGTMP, 0, obj.TYPE_REG, ppc64.REGRT2, 0) p.Reg = ppc64.REGRT1 p = appendpp(p, ppc64.AMOVDU, obj.TYPE_REG, ppc64.REGZERO, 0, obj.TYPE_MEM, ppc64.REGRT1, int64(gc.Widthptr)) p1 := p p = appendpp(p, ppc64.ACMP, obj.TYPE_REG, ppc64.REGRT1, 0, obj.TYPE_REG, ppc64.REGRT2, 0) p = appendpp(p, ppc64.ABNE, obj.TYPE_NONE, 0, 0, obj.TYPE_BRANCH, 0, 0) gc.Patch(p, p1) } return p }
func zerorange(p *obj.Prog, frame int64, lo int64, hi int64, r0 *uint32) *obj.Prog { cnt := hi - lo if cnt == 0 { return p } if *r0 == 0 { p = appendpp(p, arm.AMOVW, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, arm.REG_R0, 0) *r0 = 1 } if cnt < int64(4*gc.Widthptr) { for i := int64(0); i < cnt; i += int64(gc.Widthptr) { p = appendpp(p, arm.AMOVW, obj.TYPE_REG, arm.REG_R0, 0, obj.TYPE_MEM, arm.REGSP, int32(4+frame+lo+i)) } } else if !gc.Nacl && (cnt <= int64(128*gc.Widthptr)) { p = appendpp(p, arm.AADD, obj.TYPE_CONST, 0, int32(4+frame+lo), obj.TYPE_REG, arm.REG_R1, 0) p.Reg = arm.REGSP p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0) f := gc.Sysfunc("duffzero") gc.Naddr(&p.To, f) gc.Afunclit(&p.To, f) p.To.Offset = 4 * (128 - cnt/int64(gc.Widthptr)) } else { p = appendpp(p, arm.AADD, obj.TYPE_CONST, 0, int32(4+frame+lo), obj.TYPE_REG, arm.REG_R1, 0) p.Reg = arm.REGSP p = appendpp(p, arm.AADD, obj.TYPE_CONST, 0, int32(cnt), obj.TYPE_REG, arm.REG_R2, 0) p.Reg = arm.REG_R1 p = appendpp(p, arm.AMOVW, obj.TYPE_REG, arm.REG_R0, 0, obj.TYPE_MEM, arm.REG_R1, 4) p1 := p p.Scond |= arm.C_PBIT p = appendpp(p, arm.ACMP, obj.TYPE_REG, arm.REG_R1, 0, obj.TYPE_NONE, 0, 0) p.Reg = arm.REG_R2 p = appendpp(p, arm.ABNE, obj.TYPE_NONE, 0, 0, obj.TYPE_BRANCH, 0, 0) gc.Patch(p, p1) } return p }
func zerorange(p *obj.Prog, frame int64, lo int64, hi int64) *obj.Prog { cnt := hi - lo if cnt == 0 { return p } if cnt < int64(4*gc.Widthptr) { for i := int64(0); i < cnt; i += int64(gc.Widthptr) { p = appendpp(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGZERO, 0, obj.TYPE_MEM, arm64.REGSP, 8+frame+lo+i) } } else if cnt <= int64(128*gc.Widthptr) && !darwin { // darwin ld64 cannot handle BR26 reloc with non-zero addend p = appendpp(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGSP, 0, obj.TYPE_REG, arm64.REGRT1, 0) p = appendpp(p, arm64.AADD, obj.TYPE_CONST, 0, 8+frame+lo-8, obj.TYPE_REG, arm64.REGRT1, 0) p.Reg = arm64.REGRT1 p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0) f := gc.Sysfunc("duffzero") gc.Naddr(&p.To, f) gc.Afunclit(&p.To, f) p.To.Offset = 4 * (128 - cnt/int64(gc.Widthptr)) } else { p = appendpp(p, arm64.AMOVD, obj.TYPE_CONST, 0, 8+frame+lo-8, obj.TYPE_REG, arm64.REGTMP, 0) p = appendpp(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGSP, 0, obj.TYPE_REG, arm64.REGRT1, 0) p = appendpp(p, arm64.AADD, obj.TYPE_REG, arm64.REGTMP, 0, obj.TYPE_REG, arm64.REGRT1, 0) p.Reg = arm64.REGRT1 p = appendpp(p, arm64.AMOVD, obj.TYPE_CONST, 0, cnt, obj.TYPE_REG, arm64.REGTMP, 0) p = appendpp(p, arm64.AADD, obj.TYPE_REG, arm64.REGTMP, 0, obj.TYPE_REG, arm64.REGRT2, 0) p.Reg = arm64.REGRT1 p = appendpp(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGZERO, 0, obj.TYPE_MEM, arm64.REGRT1, int64(gc.Widthptr)) p.Scond = arm64.C_XPRE p1 := p p = appendpp(p, arm64.ACMP, obj.TYPE_REG, arm64.REGRT1, 0, obj.TYPE_NONE, 0, 0) p.Reg = arm64.REGRT2 p = appendpp(p, arm64.ABNE, obj.TYPE_NONE, 0, 0, obj.TYPE_BRANCH, 0, 0) gc.Patch(p, p1) } return p }
/* * generate division. * caller must set: * ax = allocated AX register * dx = allocated DX register * generates one of: * res = nl / nr * res = nl % nr * according to op. */ func dodiv(op int, nl *gc.Node, nr *gc.Node, res *gc.Node, ax *gc.Node, dx *gc.Node) { // Have to be careful about handling // most negative int divided by -1 correctly. // The hardware will trap. // Also the byte divide instruction needs AH, // which we otherwise don't have to deal with. // Easiest way to avoid for int8, int16: use int32. // For int32 and int64, use explicit test. // Could use int64 hw for int32. t := nl.Type t0 := t check := 0 if gc.Issigned[t.Etype] { check = 1 if gc.Isconst(nl, gc.CTINT) && gc.Mpgetfix(nl.Val.U.Xval) != -1<<uint64(t.Width*8-1) { check = 0 } else if gc.Isconst(nr, gc.CTINT) && gc.Mpgetfix(nr.Val.U.Xval) != -1 { check = 0 } } if t.Width < 4 { if gc.Issigned[t.Etype] { t = gc.Types[gc.TINT32] } else { t = gc.Types[gc.TUINT32] } check = 0 } var t1 gc.Node gc.Tempname(&t1, t) var t2 gc.Node gc.Tempname(&t2, t) if t0 != t { var t3 gc.Node gc.Tempname(&t3, t0) var t4 gc.Node gc.Tempname(&t4, t0) gc.Cgen(nl, &t3) gc.Cgen(nr, &t4) // Convert. gmove(&t3, &t1) gmove(&t4, &t2) } else { gc.Cgen(nl, &t1) gc.Cgen(nr, &t2) } var n1 gc.Node if !gc.Samereg(ax, res) && !gc.Samereg(dx, res) { gc.Regalloc(&n1, t, res) } else { gc.Regalloc(&n1, t, nil) } gmove(&t2, &n1) gmove(&t1, ax) var p2 *obj.Prog var n4 gc.Node if gc.Nacl { // Native Client does not relay the divide-by-zero trap // to the executing program, so we must insert a check // for ourselves. gc.Nodconst(&n4, t, 0) gins(optoas(gc.OCMP, t), &n1, &n4) p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1) if panicdiv == nil { panicdiv = gc.Sysfunc("panicdivide") } gc.Ginscall(panicdiv, -1) gc.Patch(p1, gc.Pc) } if check != 0 { gc.Nodconst(&n4, t, -1) gins(optoas(gc.OCMP, t), &n1, &n4) p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1) if op == gc.ODIV { // a / (-1) is -a. gins(optoas(gc.OMINUS, t), nil, ax) gmove(ax, res) } else { // a % (-1) is 0. gc.Nodconst(&n4, t, 0) gmove(&n4, res) } p2 = gc.Gbranch(obj.AJMP, nil, 0) gc.Patch(p1, gc.Pc) } if !gc.Issigned[t.Etype] { var nz gc.Node gc.Nodconst(&nz, t, 0) gmove(&nz, dx) } else { gins(optoas(gc.OEXTEND, t), nil, nil) } gins(optoas(op, t), &n1, nil) gc.Regfree(&n1) if op == gc.ODIV { gmove(ax, res) } else { gmove(dx, res) } if check != 0 { gc.Patch(p2, gc.Pc) } }
/* * generate division. * generates one of: * res = nl / nr * res = nl % nr * according to op. */ func dodiv(op int, nl *gc.Node, nr *gc.Node, res *gc.Node) { // Have to be careful about handling // most negative int divided by -1 correctly. // The hardware will trap. // Also the byte divide instruction needs AH, // which we otherwise don't have to deal with. // Easiest way to avoid for int8, int16: use int32. // For int32 and int64, use explicit test. // Could use int64 hw for int32. t := nl.Type t0 := t check := 0 if gc.Issigned[t.Etype] { check = 1 if gc.Isconst(nl, gc.CTINT) && gc.Mpgetfix(nl.Val.U.Xval) != -(1<<uint64(t.Width*8-1)) { check = 0 } else if gc.Isconst(nr, gc.CTINT) && gc.Mpgetfix(nr.Val.U.Xval) != -1 { check = 0 } } if t.Width < 4 { if gc.Issigned[t.Etype] { t = gc.Types[gc.TINT32] } else { t = gc.Types[gc.TUINT32] } check = 0 } a := optoas(op, t) var n3 gc.Node gc.Regalloc(&n3, t0, nil) var ax gc.Node var oldax gc.Node if nl.Ullman >= nr.Ullman { savex(x86.REG_AX, &ax, &oldax, res, t0) gc.Cgen(nl, &ax) gc.Regalloc(&ax, t0, &ax) // mark ax live during cgen gc.Cgen(nr, &n3) gc.Regfree(&ax) } else { gc.Cgen(nr, &n3) savex(x86.REG_AX, &ax, &oldax, res, t0) gc.Cgen(nl, &ax) } if t != t0 { // Convert ax1 := ax n31 := n3 ax.Type = t n3.Type = t gmove(&ax1, &ax) gmove(&n31, &n3) } var n4 gc.Node if gc.Nacl { // Native Client does not relay the divide-by-zero trap // to the executing program, so we must insert a check // for ourselves. gc.Nodconst(&n4, t, 0) gins(optoas(gc.OCMP, t), &n3, &n4) p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1) if panicdiv == nil { panicdiv = gc.Sysfunc("panicdivide") } gc.Ginscall(panicdiv, -1) gc.Patch(p1, gc.Pc) } var p2 *obj.Prog if check != 0 { gc.Nodconst(&n4, t, -1) gins(optoas(gc.OCMP, t), &n3, &n4) p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1) if op == gc.ODIV { // a / (-1) is -a. gins(optoas(gc.OMINUS, t), nil, &ax) gmove(&ax, res) } else { // a % (-1) is 0. gc.Nodconst(&n4, t, 0) gmove(&n4, res) } p2 = gc.Gbranch(obj.AJMP, nil, 0) gc.Patch(p1, gc.Pc) } var olddx gc.Node var dx gc.Node savex(x86.REG_DX, &dx, &olddx, res, t) if !gc.Issigned[t.Etype] { gc.Nodconst(&n4, t, 0) gmove(&n4, &dx) } else { gins(optoas(gc.OEXTEND, t), nil, nil) } gins(a, &n3, nil) gc.Regfree(&n3) if op == gc.ODIV { gmove(&ax, res) } else { gmove(&dx, res) } restx(&dx, &olddx) if check != 0 { gc.Patch(p2, gc.Pc) } restx(&ax, &oldax) }
func clearfat(nl *gc.Node) { /* clear a fat object */ if gc.Debug['g'] != 0 { fmt.Printf("clearfat %v (%v, size: %d)\n", gc.Nconv(nl, 0), gc.Tconv(nl.Type, 0), nl.Type.Width) } w := uint64(uint64(nl.Type.Width)) // Avoid taking the address for simple enough types. if gc.Componentgen(nil, nl) { return } c := uint64(w % 8) // bytes q := uint64(w / 8) // dwords if gc.Reginuse(ppc64.REGRT1) { gc.Fatal("%v in use during clearfat", obj.Rconv(ppc64.REGRT1)) } var r0 gc.Node gc.Nodreg(&r0, gc.Types[gc.TUINT64], ppc64.REGZERO) var dst gc.Node gc.Nodreg(&dst, gc.Types[gc.Tptr], ppc64.REGRT1) gc.Regrealloc(&dst) gc.Agen(nl, &dst) var boff uint64 if q > 128 { p := gins(ppc64.ASUB, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 var end gc.Node gc.Regalloc(&end, gc.Types[gc.Tptr], nil) p = gins(ppc64.AMOVD, &dst, &end) p.From.Type = obj.TYPE_ADDR p.From.Offset = int64(q * 8) p = gins(ppc64.AMOVDU, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = 8 pl := (*obj.Prog)(p) p = gins(ppc64.ACMP, &dst, &end) gc.Patch(gc.Gbranch(ppc64.ABNE, nil, 0), pl) gc.Regfree(&end) // The loop leaves R3 on the last zeroed dword boff = 8 } else if q >= 4 { p := gins(ppc64.ASUB, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 f := (*gc.Node)(gc.Sysfunc("duffzero")) p = gins(obj.ADUFFZERO, nil, f) gc.Afunclit(&p.To, f) // 4 and 128 = magic constants: see ../../runtime/asm_ppc64x.s p.To.Offset = int64(4 * (128 - q)) // duffzero leaves R3 on the last zeroed dword boff = 8 } else { var p *obj.Prog for t := uint64(0); t < q; t++ { p = gins(ppc64.AMOVD, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(8 * t) } boff = 8 * q } var p *obj.Prog for t := uint64(0); t < c; t++ { p = gins(ppc64.AMOVB, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(t + boff) } gc.Regfree(&dst) }
/* * generate division. * generates one of: * res = nl / nr * res = nl % nr * according to op. */ func dodiv(op int, nl *gc.Node, nr *gc.Node, res *gc.Node) { // Have to be careful about handling // most negative int divided by -1 correctly. // The hardware will generate undefined result. // Also need to explicitly trap on division on zero, // the hardware will silently generate undefined result. // DIVW will leave unpredicable result in higher 32-bit, // so always use DIVD/DIVDU. t := nl.Type t0 := t check := 0 if gc.Issigned[t.Etype] { check = 1 if gc.Isconst(nl, gc.CTINT) && gc.Mpgetfix(nl.Val.U.Xval) != -(1<<uint64(t.Width*8-1)) { check = 0 } else if gc.Isconst(nr, gc.CTINT) && gc.Mpgetfix(nr.Val.U.Xval) != -1 { check = 0 } } if t.Width < 8 { if gc.Issigned[t.Etype] { t = gc.Types[gc.TINT64] } else { t = gc.Types[gc.TUINT64] } check = 0 } a := optoas(gc.ODIV, t) var tl gc.Node gc.Regalloc(&tl, t0, nil) var tr gc.Node gc.Regalloc(&tr, t0, nil) if nl.Ullman >= nr.Ullman { gc.Cgen(nl, &tl) gc.Cgen(nr, &tr) } else { gc.Cgen(nr, &tr) gc.Cgen(nl, &tl) } if t != t0 { // Convert tl2 := tl tr2 := tr tl.Type = t tr.Type = t gmove(&tl2, &tl) gmove(&tr2, &tr) } // Handle divide-by-zero panic. p1 := gins(optoas(gc.OCMP, t), &tr, nil) p1.To.Type = obj.TYPE_REG p1.To.Reg = ppc64.REGZERO p1 = gc.Gbranch(optoas(gc.ONE, t), nil, +1) if panicdiv == nil { panicdiv = gc.Sysfunc("panicdivide") } gc.Ginscall(panicdiv, -1) gc.Patch(p1, gc.Pc) var p2 *obj.Prog if check != 0 { var nm1 gc.Node gc.Nodconst(&nm1, t, -1) gins(optoas(gc.OCMP, t), &tr, &nm1) p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1) if op == gc.ODIV { // a / (-1) is -a. gins(optoas(gc.OMINUS, t), nil, &tl) gmove(&tl, res) } else { // a % (-1) is 0. var nz gc.Node gc.Nodconst(&nz, t, 0) gmove(&nz, res) } p2 = gc.Gbranch(obj.AJMP, nil, 0) gc.Patch(p1, gc.Pc) } p1 = gins(a, &tr, &tl) if op == gc.ODIV { gc.Regfree(&tr) gmove(&tl, res) } else { // A%B = A-(A/B*B) var tm gc.Node gc.Regalloc(&tm, t, nil) // patch div to use the 3 register form // TODO(minux): add gins3? p1.Reg = p1.To.Reg p1.To.Reg = tm.Val.U.Reg gins(optoas(gc.OMUL, t), &tr, &tm) gc.Regfree(&tr) gins(optoas(gc.OSUB, t), &tm, &tl) gc.Regfree(&tm) gmove(&tl, res) } gc.Regfree(&tl) if check != 0 { gc.Patch(p2, gc.Pc) } }
func blockcopy(n, res *gc.Node, osrc, odst, w int64) { // determine alignment. // want to avoid unaligned access, so have to use // smaller operations for less aligned types. // for example moving [4]byte must use 4 MOVB not 1 MOVW. align := int(n.Type.Align) var op int switch align { default: gc.Fatal("sgen: invalid alignment %d for %v", align, n.Type) case 1: op = arm.AMOVB case 2: op = arm.AMOVH case 4: op = arm.AMOVW } if w%int64(align) != 0 { gc.Fatal("sgen: unaligned size %d (align=%d) for %v", w, align, n.Type) } c := int32(w / int64(align)) if osrc%int64(align) != 0 || odst%int64(align) != 0 { gc.Fatal("sgen: unaligned offset src %d or dst %d (align %d)", osrc, odst, align) } // if we are copying forward on the stack and // the src and dst overlap, then reverse direction dir := align if osrc < odst && int64(odst) < int64(osrc)+w { dir = -dir } if op == arm.AMOVW && !gc.Nacl && dir > 0 && c >= 4 && c <= 128 { var r0 gc.Node r0.Op = gc.OREGISTER r0.Reg = arm.REG_R0 var r1 gc.Node r1.Op = gc.OREGISTER r1.Reg = arm.REG_R0 + 1 var r2 gc.Node r2.Op = gc.OREGISTER r2.Reg = arm.REG_R0 + 2 var src gc.Node gc.Regalloc(&src, gc.Types[gc.Tptr], &r1) var dst gc.Node gc.Regalloc(&dst, gc.Types[gc.Tptr], &r2) if n.Ullman >= res.Ullman { // eval n first gc.Agen(n, &src) if res.Op == gc.ONAME { gc.Gvardef(res) } gc.Agen(res, &dst) } else { // eval res first if res.Op == gc.ONAME { gc.Gvardef(res) } gc.Agen(res, &dst) gc.Agen(n, &src) } var tmp gc.Node gc.Regalloc(&tmp, gc.Types[gc.Tptr], &r0) f := gc.Sysfunc("duffcopy") p := gins(obj.ADUFFCOPY, nil, f) gc.Afunclit(&p.To, f) // 8 and 128 = magic constants: see ../../runtime/asm_arm.s p.To.Offset = 8 * (128 - int64(c)) gc.Regfree(&tmp) gc.Regfree(&src) gc.Regfree(&dst) return } var dst gc.Node var src gc.Node if n.Ullman >= res.Ullman { gc.Agenr(n, &dst, res) // temporarily use dst gc.Regalloc(&src, gc.Types[gc.Tptr], nil) gins(arm.AMOVW, &dst, &src) if res.Op == gc.ONAME { gc.Gvardef(res) } gc.Agen(res, &dst) } else { if res.Op == gc.ONAME { gc.Gvardef(res) } gc.Agenr(res, &dst, res) gc.Agenr(n, &src, nil) } var tmp gc.Node gc.Regalloc(&tmp, gc.Types[gc.TUINT32], nil) // set up end marker var nend gc.Node if c >= 4 { gc.Regalloc(&nend, gc.Types[gc.TUINT32], nil) p := gins(arm.AMOVW, &src, &nend) p.From.Type = obj.TYPE_ADDR if dir < 0 { p.From.Offset = int64(dir) } else { p.From.Offset = w } } // move src and dest to the end of block if necessary if dir < 0 { p := gins(arm.AMOVW, &src, &src) p.From.Type = obj.TYPE_ADDR p.From.Offset = w + int64(dir) p = gins(arm.AMOVW, &dst, &dst) p.From.Type = obj.TYPE_ADDR p.From.Offset = w + int64(dir) } // move if c >= 4 { p := gins(op, &src, &tmp) p.From.Type = obj.TYPE_MEM p.From.Offset = int64(dir) p.Scond |= arm.C_PBIT ploop := p p = gins(op, &tmp, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(dir) p.Scond |= arm.C_PBIT p = gins(arm.ACMP, &src, nil) raddr(&nend, p) gc.Patch(gc.Gbranch(arm.ABNE, nil, 0), ploop) gc.Regfree(&nend) } else { var p *obj.Prog for { tmp14 := c c-- if tmp14 <= 0 { break } p = gins(op, &src, &tmp) p.From.Type = obj.TYPE_MEM p.From.Offset = int64(dir) p.Scond |= arm.C_PBIT p = gins(op, &tmp, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(dir) p.Scond |= arm.C_PBIT } } gc.Regfree(&dst) gc.Regfree(&src) gc.Regfree(&tmp) }
func clearfat(nl *gc.Node) { /* clear a fat object */ if gc.Debug['g'] != 0 { gc.Dump("\nclearfat", nl) } w := uint32(nl.Type.Width) // Avoid taking the address for simple enough types. if gc.Componentgen(nil, nl) { return } c := w % 4 // bytes q := w / 4 // quads var r0 gc.Node r0.Op = gc.OREGISTER r0.Reg = arm.REG_R0 var r1 gc.Node r1.Op = gc.OREGISTER r1.Reg = arm.REG_R1 var dst gc.Node gc.Regalloc(&dst, gc.Types[gc.Tptr], &r1) gc.Agen(nl, &dst) var nc gc.Node gc.Nodconst(&nc, gc.Types[gc.TUINT32], 0) var nz gc.Node gc.Regalloc(&nz, gc.Types[gc.TUINT32], &r0) gc.Cgen(&nc, &nz) if q > 128 { var end gc.Node gc.Regalloc(&end, gc.Types[gc.Tptr], nil) p := gins(arm.AMOVW, &dst, &end) p.From.Type = obj.TYPE_ADDR p.From.Offset = int64(q) * 4 p = gins(arm.AMOVW, &nz, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = 4 p.Scond |= arm.C_PBIT pl := p p = gins(arm.ACMP, &dst, nil) raddr(&end, p) gc.Patch(gc.Gbranch(arm.ABNE, nil, 0), pl) gc.Regfree(&end) } else if q >= 4 && !gc.Nacl { f := gc.Sysfunc("duffzero") p := gins(obj.ADUFFZERO, nil, f) gc.Afunclit(&p.To, f) // 4 and 128 = magic constants: see ../../runtime/asm_arm.s p.To.Offset = 4 * (128 - int64(q)) } else { var p *obj.Prog for q > 0 { p = gins(arm.AMOVW, &nz, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = 4 p.Scond |= arm.C_PBIT //print("1. %P\n", p); q-- } } var p *obj.Prog for c > 0 { p = gins(arm.AMOVB, &nz, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = 1 p.Scond |= arm.C_PBIT //print("2. %P\n", p); c-- } gc.Regfree(&dst) gc.Regfree(&nz) }
func clearfat(nl *gc.Node) { /* clear a fat object */ if gc.Debug['g'] != 0 { fmt.Printf("clearfat %v (%v, size: %d)\n", nl, nl.Type, nl.Type.Width) } w := uint64(uint64(nl.Type.Width)) // Avoid taking the address for simple enough types. if gc.Componentgen(nil, nl) { return } c := uint64(w % 8) // bytes q := uint64(w / 8) // dwords if reg[arm64.REGRT1-arm64.REG_R0] > 0 { gc.Fatal("R%d in use during clearfat", arm64.REGRT1-arm64.REG_R0) } var r0 gc.Node gc.Nodreg(&r0, gc.Types[gc.TUINT64], arm64.REGZERO) var dst gc.Node gc.Nodreg(&dst, gc.Types[gc.Tptr], arm64.REGRT1) reg[arm64.REGRT1-arm64.REG_R0]++ gc.Agen(nl, &dst) var boff uint64 if q > 128 { p := gins(arm64.ASUB, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 var end gc.Node gc.Regalloc(&end, gc.Types[gc.Tptr], nil) p = gins(arm64.AMOVD, &dst, &end) p.From.Type = obj.TYPE_ADDR p.From.Offset = int64(q * 8) p = gins(arm64.AMOVD, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = 8 p.Scond = arm64.C_XPRE pl := (*obj.Prog)(p) p = gcmp(arm64.ACMP, &dst, &end) gc.Patch(gc.Gbranch(arm64.ABNE, nil, 0), pl) gc.Regfree(&end) // The loop leaves R16 on the last zeroed dword boff = 8 } else if q >= 4 && !darwin { // darwin ld64 cannot handle BR26 reloc with non-zero addend p := gins(arm64.ASUB, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 f := (*gc.Node)(gc.Sysfunc("duffzero")) p = gins(obj.ADUFFZERO, nil, f) gc.Afunclit(&p.To, f) // 4 and 128 = magic constants: see ../../runtime/asm_arm64x.s p.To.Offset = int64(4 * (128 - q)) // duffzero leaves R16 on the last zeroed dword boff = 8 } else { var p *obj.Prog for t := uint64(0); t < q; t++ { p = gins(arm64.AMOVD, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(8 * t) } boff = 8 * q } var p *obj.Prog for t := uint64(0); t < c; t++ { p = gins(arm64.AMOVB, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(t + boff) } reg[arm64.REGRT1-arm64.REG_R0]-- }
/* * block copy: * memmove(&res, &n, w); * NB: character copy assumed little endian architecture */ func sgen(n *gc.Node, res *gc.Node, w int64) { if gc.Debug['g'] != 0 { fmt.Printf("\nsgen w=%d\n", w) gc.Dump("r", n) gc.Dump("res", res) } if n.Ullman >= gc.UINF && res.Ullman >= gc.UINF { gc.Fatal("sgen UINF") } if w < 0 || int64(int32(w)) != w { gc.Fatal("sgen copy %d", w) } if n.Type == nil { gc.Fatal("sgen: missing type") } if w == 0 { // evaluate side effects only. var dst gc.Node regalloc(&dst, gc.Types[gc.Tptr], nil) agen(res, &dst) agen(n, &dst) regfree(&dst) return } // If copying .args, that's all the results, so record definition sites // for them for the liveness analysis. if res.Op == gc.ONAME && res.Sym.Name == ".args" { for l := gc.Curfn.Dcl; l != nil; l = l.Next { if l.N.Class == gc.PPARAMOUT { gc.Gvardef(l.N) } } } // Avoid taking the address for simple enough types. if componentgen(n, res) { return } // determine alignment. // want to avoid unaligned access, so have to use // smaller operations for less aligned types. // for example moving [4]byte must use 4 MOVB not 1 MOVW. align := int(n.Type.Align) var op int switch align { default: gc.Fatal("sgen: invalid alignment %d for %v", align, gc.Tconv(n.Type, 0)) case 1: op = arm.AMOVB case 2: op = arm.AMOVH case 4: op = arm.AMOVW } if w%int64(align) != 0 { gc.Fatal("sgen: unaligned size %d (align=%d) for %v", w, align, gc.Tconv(n.Type, 0)) } c := int32(w / int64(align)) // offset on the stack osrc := stkof(n) odst := stkof(res) if osrc != -1000 && odst != -1000 && (osrc == 1000 || odst == 1000) { // osrc and odst both on stack, and at least one is in // an unknown position. Could generate code to test // for forward/backward copy, but instead just copy // to a temporary location first. var tmp gc.Node gc.Tempname(&tmp, n.Type) sgen(n, &tmp, w) sgen(&tmp, res, w) return } if osrc%int32(align) != 0 || odst%int32(align) != 0 { gc.Fatal("sgen: unaligned offset src %d or dst %d (align %d)", osrc, odst, align) } // if we are copying forward on the stack and // the src and dst overlap, then reverse direction dir := align if osrc < odst && int64(odst) < int64(osrc)+w { dir = -dir } if op == arm.AMOVW && !gc.Nacl && dir > 0 && c >= 4 && c <= 128 { var r0 gc.Node r0.Op = gc.OREGISTER r0.Val.U.Reg = REGALLOC_R0 var r1 gc.Node r1.Op = gc.OREGISTER r1.Val.U.Reg = REGALLOC_R0 + 1 var r2 gc.Node r2.Op = gc.OREGISTER r2.Val.U.Reg = REGALLOC_R0 + 2 var src gc.Node regalloc(&src, gc.Types[gc.Tptr], &r1) var dst gc.Node regalloc(&dst, gc.Types[gc.Tptr], &r2) if n.Ullman >= res.Ullman { // eval n first agen(n, &src) if res.Op == gc.ONAME { gc.Gvardef(res) } agen(res, &dst) } else { // eval res first if res.Op == gc.ONAME { gc.Gvardef(res) } agen(res, &dst) agen(n, &src) } var tmp gc.Node regalloc(&tmp, gc.Types[gc.Tptr], &r0) f := gc.Sysfunc("duffcopy") p := gins(obj.ADUFFCOPY, nil, f) gc.Afunclit(&p.To, f) // 8 and 128 = magic constants: see ../../runtime/asm_arm.s p.To.Offset = 8 * (128 - int64(c)) regfree(&tmp) regfree(&src) regfree(&dst) return } var dst gc.Node var src gc.Node if n.Ullman >= res.Ullman { agenr(n, &dst, res) // temporarily use dst regalloc(&src, gc.Types[gc.Tptr], nil) gins(arm.AMOVW, &dst, &src) if res.Op == gc.ONAME { gc.Gvardef(res) } agen(res, &dst) } else { if res.Op == gc.ONAME { gc.Gvardef(res) } agenr(res, &dst, res) agenr(n, &src, nil) } var tmp gc.Node regalloc(&tmp, gc.Types[gc.TUINT32], nil) // set up end marker var nend gc.Node if c >= 4 { regalloc(&nend, gc.Types[gc.TUINT32], nil) p := gins(arm.AMOVW, &src, &nend) p.From.Type = obj.TYPE_ADDR if dir < 0 { p.From.Offset = int64(dir) } else { p.From.Offset = w } } // move src and dest to the end of block if necessary if dir < 0 { p := gins(arm.AMOVW, &src, &src) p.From.Type = obj.TYPE_ADDR p.From.Offset = w + int64(dir) p = gins(arm.AMOVW, &dst, &dst) p.From.Type = obj.TYPE_ADDR p.From.Offset = w + int64(dir) } // move if c >= 4 { p := gins(op, &src, &tmp) p.From.Type = obj.TYPE_MEM p.From.Offset = int64(dir) p.Scond |= arm.C_PBIT ploop := p p = gins(op, &tmp, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(dir) p.Scond |= arm.C_PBIT p = gins(arm.ACMP, &src, nil) raddr(&nend, p) gc.Patch(gc.Gbranch(arm.ABNE, nil, 0), ploop) regfree(&nend) } else { var p *obj.Prog for { tmp14 := c c-- if tmp14 <= 0 { break } p = gins(op, &src, &tmp) p.From.Type = obj.TYPE_MEM p.From.Offset = int64(dir) p.Scond |= arm.C_PBIT p = gins(op, &tmp, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(dir) p.Scond |= arm.C_PBIT } } regfree(&dst) regfree(&src) regfree(&tmp) }