func clearfat(nl *gc.Node) { /* clear a fat object */ if gc.Debug['g'] != 0 { fmt.Printf("clearfat %v (%v, size: %d)\n", nl, nl.Type, nl.Type.Width) } w := uint64(uint64(nl.Type.Width)) // Avoid taking the address for simple enough types. if gc.Componentgen(nil, nl) { return } c := uint64(w % 8) // bytes q := uint64(w / 8) // dwords if gc.Reginuse(mips.REGRT1) { gc.Fatalf("%v in use during clearfat", obj.Rconv(mips.REGRT1)) } var r0 gc.Node gc.Nodreg(&r0, gc.Types[gc.TUINT64], mips.REGZERO) var dst gc.Node gc.Nodreg(&dst, gc.Types[gc.Tptr], mips.REGRT1) gc.Regrealloc(&dst) gc.Agen(nl, &dst) var boff uint64 if q > 128 { p := gins(mips.ASUBV, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 var end gc.Node gc.Regalloc(&end, gc.Types[gc.Tptr], nil) p = gins(mips.AMOVV, &dst, &end) p.From.Type = obj.TYPE_ADDR p.From.Offset = int64(q * 8) p = gins(mips.AMOVV, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = 8 pl := (*obj.Prog)(p) p = gins(mips.AADDV, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 gc.Patch(ginsbranch(mips.ABNE, nil, &dst, &end, 0), pl) gc.Regfree(&end) // The loop leaves R1 on the last zeroed dword boff = 8 // TODO(dfc): https://golang.org/issue/12108 // If DUFFZERO is used inside a tail call (see genwrapper) it will // overwrite the link register. } else if false && q >= 4 { p := gins(mips.ASUBV, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 f := (*gc.Node)(gc.Sysfunc("duffzero")) p = gins(obj.ADUFFZERO, nil, f) gc.Afunclit(&p.To, f) // 8 and 128 = magic constants: see ../../runtime/asm_mips64x.s p.To.Offset = int64(8 * (128 - q)) // duffzero leaves R1 on the last zeroed dword boff = 8 } else { var p *obj.Prog for t := uint64(0); t < q; t++ { p = gins(mips.AMOVV, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(8 * t) } boff = 8 * q } var p *obj.Prog for t := uint64(0); t < c; t++ { p = gins(mips.AMOVB, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(t + boff) } gc.Regfree(&dst) }
func clearfat(nl *gc.Node) { /* clear a fat object */ if gc.Debug['g'] != 0 { fmt.Printf("clearfat %v (%v, size: %d)\n", nl, nl.Type, nl.Type.Width) } w := uint64(uint64(nl.Type.Width)) // Avoid taking the address for simple enough types. if gc.Componentgen(nil, nl) { return } c := uint64(w % 8) // bytes q := uint64(w / 8) // dwords var r0 gc.Node gc.Nodreg(&r0, gc.Types[gc.TUINT64], arm64.REGZERO) var dst gc.Node // REGRT1 is reserved on arm64, see arm64/gsubr.go. gc.Nodreg(&dst, gc.Types[gc.Tptr], arm64.REGRT1) gc.Agen(nl, &dst) var boff uint64 if q > 128 { p := gins(arm64.ASUB, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 var end gc.Node gc.Regalloc(&end, gc.Types[gc.Tptr], nil) p = gins(arm64.AMOVD, &dst, &end) p.From.Type = obj.TYPE_ADDR p.From.Offset = int64(q * 8) p = gins(arm64.AMOVD, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = 8 p.Scond = arm64.C_XPRE pl := (*obj.Prog)(p) p = gcmp(arm64.ACMP, &dst, &end) gc.Patch(gc.Gbranch(arm64.ABNE, nil, 0), pl) gc.Regfree(&end) // The loop leaves R16 on the last zeroed dword boff = 8 } else if q >= 4 && !darwin { // darwin ld64 cannot handle BR26 reloc with non-zero addend p := gins(arm64.ASUB, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 f := (*gc.Node)(gc.Sysfunc("duffzero")) p = gins(obj.ADUFFZERO, nil, f) gc.Afunclit(&p.To, f) // 4 and 128 = magic constants: see ../../runtime/asm_arm64x.s p.To.Offset = int64(4 * (128 - q)) // duffzero leaves R16 on the last zeroed dword boff = 8 } else { var p *obj.Prog for t := uint64(0); t < q; t++ { p = gins(arm64.AMOVD, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(8 * t) } boff = 8 * q } var p *obj.Prog for t := uint64(0); t < c; t++ { p = gins(arm64.AMOVB, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(t + boff) } }
func clearfat(nl *gc.Node) { /* clear a fat object */ if gc.Debug['g'] != 0 { fmt.Printf("clearfat %v (%v, size: %d)\n", nl, nl.Type, nl.Type.Width) } w := uint64(uint64(nl.Type.Width)) // Avoid taking the address for simple enough types. if gc.Componentgen(nil, nl) { return } c := uint64(w % 8) // bytes q := uint64(w / 8) // dwords if gc.Reginuse(ppc64.REGRT1) { gc.Fatalf("%v in use during clearfat", obj.Rconv(ppc64.REGRT1)) } var r0 gc.Node gc.Nodreg(&r0, gc.Types[gc.TUINT64], ppc64.REGZERO) var dst gc.Node gc.Nodreg(&dst, gc.Types[gc.Tptr], ppc64.REGRT1) gc.Regrealloc(&dst) gc.Agen(nl, &dst) var boff uint64 if q > 128 { p := gins(ppc64.ASUB, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 var end gc.Node gc.Regalloc(&end, gc.Types[gc.Tptr], nil) p = gins(ppc64.AMOVD, &dst, &end) p.From.Type = obj.TYPE_ADDR p.From.Offset = int64(q * 8) p = gins(ppc64.AMOVDU, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = 8 pl := (*obj.Prog)(p) p = gins(ppc64.ACMP, &dst, &end) gc.Patch(gc.Gbranch(ppc64.ABNE, nil, 0), pl) gc.Regfree(&end) // The loop leaves R3 on the last zeroed dword boff = 8 } else if q >= 4 { p := gins(ppc64.ASUB, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 f := (*gc.Node)(gc.Sysfunc("duffzero")) p = gins(obj.ADUFFZERO, nil, f) gc.Afunclit(&p.To, f) // 4 and 128 = magic constants: see ../../runtime/asm_ppc64x.s p.To.Offset = int64(4 * (128 - q)) // duffzero leaves R3 on the last zeroed dword boff = 8 } else { var p *obj.Prog for t := uint64(0); t < q; t++ { p = gins(ppc64.AMOVD, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(8 * t) } boff = 8 * q } var p *obj.Prog for t := uint64(0); t < c; t++ { p = gins(ppc64.AMOVB, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(t + boff) } gc.Regfree(&dst) }
func clearfat(nl *gc.Node) { /* clear a fat object */ if gc.Debug['g'] != 0 { gc.Dump("\nclearfat", nl) } // Avoid taking the address for simple enough types. if gc.Componentgen(nil, nl) { return } w := nl.Type.Width if w > 1024 || (gc.Nacl && w >= 64) { var oldn1 gc.Node var n1 gc.Node savex(x86.REG_DI, &n1, &oldn1, nil, gc.Types[gc.Tptr]) gc.Agen(nl, &n1) var ax gc.Node var oldax gc.Node savex(x86.REG_AX, &ax, &oldax, nil, gc.Types[gc.Tptr]) gconreg(x86.AMOVL, 0, x86.REG_AX) gconreg(movptr, w/8, x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.ASTOSQ, nil, nil) // STOQ AL,*(DI)+ if w%8 != 0 { n1.Op = gc.OINDREG clearfat_tail(&n1, w%8) } restx(&n1, &oldn1) restx(&ax, &oldax) return } if w >= 64 { var oldn1 gc.Node var n1 gc.Node savex(x86.REG_DI, &n1, &oldn1, nil, gc.Types[gc.Tptr]) gc.Agen(nl, &n1) var vec_zero gc.Node var old_x0 gc.Node savex(x86.REG_X0, &vec_zero, &old_x0, nil, gc.Types[gc.TFLOAT64]) gins(x86.AXORPS, &vec_zero, &vec_zero) if di := dzDI(w); di != 0 { gconreg(addptr, di, x86.REG_DI) } p := gins(obj.ADUFFZERO, nil, nil) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) p.To.Offset = dzOff(w) if w%16 != 0 { n1.Op = gc.OINDREG n1.Xoffset -= 16 - w%16 gins(x86.AMOVUPS, &vec_zero, &n1) } restx(&vec_zero, &old_x0) restx(&n1, &oldn1) return } // NOTE: Must use agen, not igen, so that optimizer sees address // being taken. We are not writing on field boundaries. var n1 gc.Node gc.Agenr(nl, &n1, nil) n1.Op = gc.OINDREG clearfat_tail(&n1, w) gc.Regfree(&n1) }
func blockcopy(n, res *gc.Node, osrc, odst, w int64) { // determine alignment. // want to avoid unaligned access, so have to use // smaller operations for less aligned types. // for example moving [4]byte must use 4 MOVB not 1 MOVW. align := int(n.Type.Align) var op int switch align { default: gc.Fatalf("sgen: invalid alignment %d for %v", align, n.Type) case 1: op = ppc64.AMOVBU case 2: op = ppc64.AMOVHU case 4: op = ppc64.AMOVWZU // there is no lwau, only lwaux case 8: op = ppc64.AMOVDU } if w%int64(align) != 0 { gc.Fatalf("sgen: unaligned size %d (align=%d) for %v", w, align, n.Type) } c := int32(w / int64(align)) // if we are copying forward on the stack and // the src and dst overlap, then reverse direction dir := align if osrc < odst && int64(odst) < int64(osrc)+w { dir = -dir } var dst gc.Node var src gc.Node if n.Ullman >= res.Ullman { gc.Agenr(n, &dst, res) // temporarily use dst gc.Regalloc(&src, gc.Types[gc.Tptr], nil) gins(ppc64.AMOVD, &dst, &src) if res.Op == gc.ONAME { gc.Gvardef(res) } gc.Agen(res, &dst) } else { if res.Op == gc.ONAME { gc.Gvardef(res) } gc.Agenr(res, &dst, res) gc.Agenr(n, &src, nil) } var tmp gc.Node gc.Regalloc(&tmp, gc.Types[gc.Tptr], nil) // set up end marker var nend gc.Node // move src and dest to the end of block if necessary if dir < 0 { if c >= 4 { gc.Regalloc(&nend, gc.Types[gc.Tptr], nil) gins(ppc64.AMOVD, &src, &nend) } p := gins(ppc64.AADD, nil, &src) p.From.Type = obj.TYPE_CONST p.From.Offset = w p = gins(ppc64.AADD, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = w } else { p := gins(ppc64.AADD, nil, &src) p.From.Type = obj.TYPE_CONST p.From.Offset = int64(-dir) p = gins(ppc64.AADD, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = int64(-dir) if c >= 4 { gc.Regalloc(&nend, gc.Types[gc.Tptr], nil) p := gins(ppc64.AMOVD, &src, &nend) p.From.Type = obj.TYPE_ADDR p.From.Offset = w } } // move // TODO: enable duffcopy for larger copies. if c >= 4 { p := gins(op, &src, &tmp) p.From.Type = obj.TYPE_MEM p.From.Offset = int64(dir) ploop := p p = gins(op, &tmp, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(dir) p = gins(ppc64.ACMP, &src, &nend) gc.Patch(gc.Gbranch(ppc64.ABNE, nil, 0), ploop) gc.Regfree(&nend) } else { // TODO(austin): Instead of generating ADD $-8,R8; ADD // $-8,R7; n*(MOVDU 8(R8),R9; MOVDU R9,8(R7);) just // generate the offsets directly and eliminate the // ADDs. That will produce shorter, more // pipeline-able code. var p *obj.Prog for ; c > 0; c-- { p = gins(op, &src, &tmp) p.From.Type = obj.TYPE_MEM p.From.Offset = int64(dir) p = gins(op, &tmp, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(dir) } } gc.Regfree(&dst) gc.Regfree(&src) gc.Regfree(&tmp) }
/* * generate code to compute address of n, * a reference to a (perhaps nested) field inside * an array or struct. * return 0 on failure, 1 on success. * on success, leaves usable address in a. * * caller is responsible for calling sudoclean * after successful sudoaddable, * to release the register used for a. */ func sudoaddable(as int, n *gc.Node, a *obj.Addr) bool { if n.Type == nil { return false } *a = obj.Addr{} switch n.Op { case gc.OLITERAL: if !gc.Isconst(n, gc.CTINT) { break } v := n.Int() if v >= 32000 || v <= -32000 { break } switch as { default: return false case x86.AADDB, x86.AADDW, x86.AADDL, x86.AADDQ, x86.ASUBB, x86.ASUBW, x86.ASUBL, x86.ASUBQ, x86.AANDB, x86.AANDW, x86.AANDL, x86.AANDQ, x86.AORB, x86.AORW, x86.AORL, x86.AORQ, x86.AXORB, x86.AXORW, x86.AXORL, x86.AXORQ, x86.AINCB, x86.AINCW, x86.AINCL, x86.AINCQ, x86.ADECB, x86.ADECW, x86.ADECL, x86.ADECQ, x86.AMOVB, x86.AMOVW, x86.AMOVL, x86.AMOVQ: break } cleani += 2 reg := &clean[cleani-1] reg1 := &clean[cleani-2] reg.Op = gc.OEMPTY reg1.Op = gc.OEMPTY gc.Naddr(a, n) return true case gc.ODOT, gc.ODOTPTR: cleani += 2 reg := &clean[cleani-1] reg1 := &clean[cleani-2] reg.Op = gc.OEMPTY reg1.Op = gc.OEMPTY var nn *gc.Node var oary [10]int64 o := gc.Dotoffset(n, oary[:], &nn) if nn == nil { sudoclean() return false } if nn.Addable && o == 1 && oary[0] >= 0 { // directly addressable set of DOTs n1 := *nn n1.Type = n.Type n1.Xoffset += oary[0] gc.Naddr(a, &n1) return true } gc.Regalloc(reg, gc.Types[gc.Tptr], nil) n1 := *reg n1.Op = gc.OINDREG if oary[0] >= 0 { gc.Agen(nn, reg) n1.Xoffset = oary[0] } else { gc.Cgen(nn, reg) gc.Cgen_checknil(reg) n1.Xoffset = -(oary[0] + 1) } for i := 1; i < o; i++ { if oary[i] >= 0 { gc.Fatalf("can't happen") } gins(movptr, &n1, reg) gc.Cgen_checknil(reg) n1.Xoffset = -(oary[i] + 1) } a.Type = obj.TYPE_NONE a.Index = obj.TYPE_NONE gc.Fixlargeoffset(&n1) gc.Naddr(a, &n1) return true case gc.OINDEX: return false } return false }
func blockcopy(n, res *gc.Node, osrc, odst, w int64) { // determine alignment. // want to avoid unaligned access, so have to use // smaller operations for less aligned types. // for example moving [4]byte must use 4 MOVB not 1 MOVW. align := int(n.Type.Align) var op int switch align { default: gc.Fatalf("sgen: invalid alignment %d for %v", align, n.Type) case 1: op = arm.AMOVB case 2: op = arm.AMOVH case 4: op = arm.AMOVW } if w%int64(align) != 0 { gc.Fatalf("sgen: unaligned size %d (align=%d) for %v", w, align, n.Type) } c := int32(w / int64(align)) if osrc%int64(align) != 0 || odst%int64(align) != 0 { gc.Fatalf("sgen: unaligned offset src %d or dst %d (align %d)", osrc, odst, align) } // if we are copying forward on the stack and // the src and dst overlap, then reverse direction dir := align if osrc < odst && int64(odst) < int64(osrc)+w { dir = -dir } if op == arm.AMOVW && !gc.Nacl && dir > 0 && c >= 4 && c <= 128 { var r0 gc.Node r0.Op = gc.OREGISTER r0.Reg = arm.REG_R0 var r1 gc.Node r1.Op = gc.OREGISTER r1.Reg = arm.REG_R0 + 1 var r2 gc.Node r2.Op = gc.OREGISTER r2.Reg = arm.REG_R0 + 2 var src gc.Node gc.Regalloc(&src, gc.Types[gc.Tptr], &r1) var dst gc.Node gc.Regalloc(&dst, gc.Types[gc.Tptr], &r2) if n.Ullman >= res.Ullman { // eval n first gc.Agen(n, &src) if res.Op == gc.ONAME { gc.Gvardef(res) } gc.Agen(res, &dst) } else { // eval res first if res.Op == gc.ONAME { gc.Gvardef(res) } gc.Agen(res, &dst) gc.Agen(n, &src) } var tmp gc.Node gc.Regalloc(&tmp, gc.Types[gc.Tptr], &r0) f := gc.Sysfunc("duffcopy") p := gins(obj.ADUFFCOPY, nil, f) gc.Afunclit(&p.To, f) // 8 and 128 = magic constants: see ../../runtime/asm_arm.s p.To.Offset = 8 * (128 - int64(c)) gc.Regfree(&tmp) gc.Regfree(&src) gc.Regfree(&dst) return } var dst gc.Node var src gc.Node if n.Ullman >= res.Ullman { gc.Agenr(n, &dst, res) // temporarily use dst gc.Regalloc(&src, gc.Types[gc.Tptr], nil) gins(arm.AMOVW, &dst, &src) if res.Op == gc.ONAME { gc.Gvardef(res) } gc.Agen(res, &dst) } else { if res.Op == gc.ONAME { gc.Gvardef(res) } gc.Agenr(res, &dst, res) gc.Agenr(n, &src, nil) } var tmp gc.Node gc.Regalloc(&tmp, gc.Types[gc.TUINT32], nil) // set up end marker var nend gc.Node if c >= 4 { gc.Regalloc(&nend, gc.Types[gc.TUINT32], nil) p := gins(arm.AMOVW, &src, &nend) p.From.Type = obj.TYPE_ADDR if dir < 0 { p.From.Offset = int64(dir) } else { p.From.Offset = w } } // move src and dest to the end of block if necessary if dir < 0 { p := gins(arm.AMOVW, &src, &src) p.From.Type = obj.TYPE_ADDR p.From.Offset = w + int64(dir) p = gins(arm.AMOVW, &dst, &dst) p.From.Type = obj.TYPE_ADDR p.From.Offset = w + int64(dir) } // move if c >= 4 { p := gins(op, &src, &tmp) p.From.Type = obj.TYPE_MEM p.From.Offset = int64(dir) p.Scond |= arm.C_PBIT ploop := p p = gins(op, &tmp, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(dir) p.Scond |= arm.C_PBIT p = gins(arm.ACMP, &src, nil) raddr(&nend, p) gc.Patch(gc.Gbranch(arm.ABNE, nil, 0), ploop) gc.Regfree(&nend) } else { var p *obj.Prog for ; c > 0; c-- { p = gins(op, &src, &tmp) p.From.Type = obj.TYPE_MEM p.From.Offset = int64(dir) p.Scond |= arm.C_PBIT p = gins(op, &tmp, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(dir) p.Scond |= arm.C_PBIT } } gc.Regfree(&dst) gc.Regfree(&src) gc.Regfree(&tmp) }
/* * generate code to compute address of n, * a reference to a (perhaps nested) field inside * an array or struct. * return 0 on failure, 1 on success. * on success, leaves usable address in a. * * caller is responsible for calling sudoclean * after successful sudoaddable, * to release the register used for a. */ func sudoaddable(as int, n *gc.Node, a *obj.Addr) bool { if n.Type == nil { return false } *a = obj.Addr{} switch n.Op { case gc.OLITERAL: if !gc.Isconst(n, gc.CTINT) { break } v := n.Int() if v >= 32000 || v <= -32000 { break } switch as { default: return false case arm.AADD, arm.ASUB, arm.AAND, arm.AORR, arm.AEOR, arm.AMOVB, arm.AMOVBS, arm.AMOVBU, arm.AMOVH, arm.AMOVHS, arm.AMOVHU, arm.AMOVW: break } cleani += 2 reg := &clean[cleani-1] reg1 := &clean[cleani-2] reg.Op = gc.OEMPTY reg1.Op = gc.OEMPTY gc.Naddr(a, n) return true case gc.ODOT, gc.ODOTPTR: cleani += 2 reg := &clean[cleani-1] reg1 := &clean[cleani-2] reg.Op = gc.OEMPTY reg1.Op = gc.OEMPTY var nn *gc.Node var oary [10]int64 o := gc.Dotoffset(n, oary[:], &nn) if nn == nil { sudoclean() return false } if nn.Addable && o == 1 && oary[0] >= 0 { // directly addressable set of DOTs n1 := *nn n1.Type = n.Type n1.Xoffset += oary[0] gc.Naddr(a, &n1) return true } gc.Regalloc(reg, gc.Types[gc.Tptr], nil) n1 := *reg n1.Op = gc.OINDREG if oary[0] >= 0 { gc.Agen(nn, reg) n1.Xoffset = oary[0] } else { gc.Cgen(nn, reg) gc.Cgen_checknil(reg) n1.Xoffset = -(oary[0] + 1) } for i := 1; i < o; i++ { if oary[i] >= 0 { gc.Fatalf("can't happen") } gins(arm.AMOVW, &n1, reg) gc.Cgen_checknil(reg) n1.Xoffset = -(oary[i] + 1) } a.Type = obj.TYPE_NONE a.Name = obj.NAME_NONE n1.Type = n.Type gc.Naddr(a, &n1) return true case gc.OINDEX: return false } return false }
func blockcopy(n, res *gc.Node, osrc, odst, w int64) { var dst gc.Node gc.Nodreg(&dst, gc.Types[gc.Tptr], x86.REG_DI) var src gc.Node gc.Nodreg(&src, gc.Types[gc.Tptr], x86.REG_SI) var tsrc gc.Node gc.Tempname(&tsrc, gc.Types[gc.Tptr]) var tdst gc.Node gc.Tempname(&tdst, gc.Types[gc.Tptr]) if !n.Addable { gc.Agen(n, &tsrc) } if !res.Addable { gc.Agen(res, &tdst) } if n.Addable { gc.Agen(n, &src) } else { gmove(&tsrc, &src) } if res.Op == gc.ONAME { gc.Gvardef(res) } if res.Addable { gc.Agen(res, &dst) } else { gmove(&tdst, &dst) } c := int32(w % 4) // bytes q := int32(w / 4) // doublewords // if we are copying forward on the stack and // the src and dst overlap, then reverse direction if osrc < odst && int64(odst) < int64(osrc)+w { // reverse direction gins(x86.ASTD, nil, nil) // set direction flag if c > 0 { gconreg(x86.AADDL, w-1, x86.REG_SI) gconreg(x86.AADDL, w-1, x86.REG_DI) gconreg(x86.AMOVL, int64(c), x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.AMOVSB, nil, nil) // MOVB *(SI)-,*(DI)- } if q > 0 { if c > 0 { gconreg(x86.AADDL, -3, x86.REG_SI) gconreg(x86.AADDL, -3, x86.REG_DI) } else { gconreg(x86.AADDL, w-4, x86.REG_SI) gconreg(x86.AADDL, w-4, x86.REG_DI) } gconreg(x86.AMOVL, int64(q), x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.AMOVSL, nil, nil) // MOVL *(SI)-,*(DI)- } // we leave with the flag clear gins(x86.ACLD, nil, nil) } else { gins(x86.ACLD, nil, nil) // paranoia. TODO(rsc): remove? // normal direction if q > 128 || (q >= 4 && gc.Nacl) { gconreg(x86.AMOVL, int64(q), x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.AMOVSL, nil, nil) // MOVL *(SI)+,*(DI)+ } else if q >= 4 { p := gins(obj.ADUFFCOPY, nil, nil) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) // 10 and 128 = magic constants: see ../../runtime/asm_386.s p.To.Offset = 10 * (128 - int64(q)) } else if !gc.Nacl && c == 0 { var cx gc.Node gc.Nodreg(&cx, gc.Types[gc.TINT32], x86.REG_CX) // We don't need the MOVSL side-effect of updating SI and DI, // and issuing a sequence of MOVLs directly is faster. src.Op = gc.OINDREG dst.Op = gc.OINDREG for q > 0 { gmove(&src, &cx) // MOVL x+(SI),CX gmove(&cx, &dst) // MOVL CX,x+(DI) src.Xoffset += 4 dst.Xoffset += 4 q-- } } else { for q > 0 { gins(x86.AMOVSL, nil, nil) // MOVL *(SI)+,*(DI)+ q-- } } for c > 0 { gins(x86.AMOVSB, nil, nil) // MOVB *(SI)+,*(DI)+ c-- } } }
func clearfat(nl *gc.Node) { /* clear a fat object */ if gc.Debug['g'] != 0 { gc.Dump("\nclearfat", nl) } w := uint32(nl.Type.Width) // Avoid taking the address for simple enough types. if gc.Componentgen(nil, nl) { return } c := w % 4 // bytes q := w / 4 // quads var r0 gc.Node r0.Op = gc.OREGISTER r0.Reg = arm.REG_R0 var r1 gc.Node r1.Op = gc.OREGISTER r1.Reg = arm.REG_R1 var dst gc.Node gc.Regalloc(&dst, gc.Types[gc.Tptr], &r1) gc.Agen(nl, &dst) var nc gc.Node gc.Nodconst(&nc, gc.Types[gc.TUINT32], 0) var nz gc.Node gc.Regalloc(&nz, gc.Types[gc.TUINT32], &r0) gc.Cgen(&nc, &nz) if q > 128 { var end gc.Node gc.Regalloc(&end, gc.Types[gc.Tptr], nil) p := gins(arm.AMOVW, &dst, &end) p.From.Type = obj.TYPE_ADDR p.From.Offset = int64(q) * 4 p = gins(arm.AMOVW, &nz, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = 4 p.Scond |= arm.C_PBIT pl := p p = gins(arm.ACMP, &dst, nil) raddr(&end, p) gc.Patch(gc.Gbranch(arm.ABNE, nil, 0), pl) gc.Regfree(&end) } else if q >= 4 && !gc.Nacl { f := gc.Sysfunc("duffzero") p := gins(obj.ADUFFZERO, nil, f) gc.Afunclit(&p.To, f) // 4 and 128 = magic constants: see ../../runtime/asm_arm.s p.To.Offset = 4 * (128 - int64(q)) } else { var p *obj.Prog for q > 0 { p = gins(arm.AMOVW, &nz, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = 4 p.Scond |= arm.C_PBIT //print("1. %v\n", p); q-- } } var p *obj.Prog for c > 0 { p = gins(arm.AMOVB, &nz, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = 1 p.Scond |= arm.C_PBIT //print("2. %v\n", p); c-- } gc.Regfree(&dst) gc.Regfree(&nz) }
func clearfat(nl *gc.Node) { /* clear a fat object */ if gc.Debug['g'] != 0 { gc.Dump("\nclearfat", nl) } w := uint32(nl.Type.Width) // Avoid taking the address for simple enough types. if gc.Componentgen(nil, nl) { return } c := w % 4 // bytes q := w / 4 // quads if q < 4 { // Write sequence of MOV 0, off(base) instead of using STOSL. // The hope is that although the code will be slightly longer, // the MOVs will have no dependencies and pipeline better // than the unrolled STOSL loop. // NOTE: Must use agen, not igen, so that optimizer sees address // being taken. We are not writing on field boundaries. var n1 gc.Node gc.Regalloc(&n1, gc.Types[gc.Tptr], nil) gc.Agen(nl, &n1) n1.Op = gc.OINDREG var z gc.Node gc.Nodconst(&z, gc.Types[gc.TUINT64], 0) for ; q > 0; q-- { n1.Type = z.Type gins(x86.AMOVL, &z, &n1) n1.Xoffset += 4 } gc.Nodconst(&z, gc.Types[gc.TUINT8], 0) for ; c > 0; c-- { n1.Type = z.Type gins(x86.AMOVB, &z, &n1) n1.Xoffset++ } gc.Regfree(&n1) return } var n1 gc.Node gc.Nodreg(&n1, gc.Types[gc.Tptr], x86.REG_DI) gc.Agen(nl, &n1) gconreg(x86.AMOVL, 0, x86.REG_AX) if q > 128 || (q >= 4 && gc.Nacl) { gconreg(x86.AMOVL, int64(q), x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.ASTOSL, nil, nil) // STOL AL,*(DI)+ } else if q >= 4 { p := gins(obj.ADUFFZERO, nil, nil) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) // 1 and 128 = magic constants: see ../../runtime/asm_386.s p.To.Offset = 1 * (128 - int64(q)) } else { for q > 0 { gins(x86.ASTOSL, nil, nil) // STOL AL,*(DI)+ q-- } } for c > 0 { gins(x86.ASTOSB, nil, nil) // STOB AL,*(DI)+ c-- } }