Example #1
0
File: cgen.go Project: ckeyer/gosrc
func blockcopy(n, res *gc.Node, osrc, odst, w int64) {
	// determine alignment.
	// want to avoid unaligned access, so have to use
	// smaller operations for less aligned types.
	// for example moving [4]byte must use 4 MOVB not 1 MOVW.
	align := int(n.Type.Align)

	var op int
	switch align {
	default:
		gc.Fatalf("sgen: invalid alignment %d for %v", align, n.Type)

	case 1:
		op = ppc64.AMOVBU

	case 2:
		op = ppc64.AMOVHU

	case 4:
		op = ppc64.AMOVWZU // there is no lwau, only lwaux

	case 8:
		op = ppc64.AMOVDU
	}

	if w%int64(align) != 0 {
		gc.Fatalf("sgen: unaligned size %d (align=%d) for %v", w, align, n.Type)
	}
	c := int32(w / int64(align))

	// if we are copying forward on the stack and
	// the src and dst overlap, then reverse direction
	dir := align

	if osrc < odst && int64(odst) < int64(osrc)+w {
		dir = -dir
	}

	var dst gc.Node
	var src gc.Node
	if n.Ullman >= res.Ullman {
		gc.Agenr(n, &dst, res) // temporarily use dst
		gc.Regalloc(&src, gc.Types[gc.Tptr], nil)
		gins(ppc64.AMOVD, &dst, &src)
		if res.Op == gc.ONAME {
			gc.Gvardef(res)
		}
		gc.Agen(res, &dst)
	} else {
		if res.Op == gc.ONAME {
			gc.Gvardef(res)
		}
		gc.Agenr(res, &dst, res)
		gc.Agenr(n, &src, nil)
	}

	var tmp gc.Node
	gc.Regalloc(&tmp, gc.Types[gc.Tptr], nil)

	// set up end marker
	var nend gc.Node

	// move src and dest to the end of block if necessary
	if dir < 0 {
		if c >= 4 {
			gc.Regalloc(&nend, gc.Types[gc.Tptr], nil)
			gins(ppc64.AMOVD, &src, &nend)
		}

		p := gins(ppc64.AADD, nil, &src)
		p.From.Type = obj.TYPE_CONST
		p.From.Offset = w

		p = gins(ppc64.AADD, nil, &dst)
		p.From.Type = obj.TYPE_CONST
		p.From.Offset = w
	} else {
		p := gins(ppc64.AADD, nil, &src)
		p.From.Type = obj.TYPE_CONST
		p.From.Offset = int64(-dir)

		p = gins(ppc64.AADD, nil, &dst)
		p.From.Type = obj.TYPE_CONST
		p.From.Offset = int64(-dir)

		if c >= 4 {
			gc.Regalloc(&nend, gc.Types[gc.Tptr], nil)
			p := gins(ppc64.AMOVD, &src, &nend)
			p.From.Type = obj.TYPE_ADDR
			p.From.Offset = w
		}
	}

	// move
	// TODO: enable duffcopy for larger copies.
	if c >= 4 {
		p := gins(op, &src, &tmp)
		p.From.Type = obj.TYPE_MEM
		p.From.Offset = int64(dir)
		ploop := p

		p = gins(op, &tmp, &dst)
		p.To.Type = obj.TYPE_MEM
		p.To.Offset = int64(dir)

		p = gins(ppc64.ACMP, &src, &nend)

		gc.Patch(gc.Gbranch(ppc64.ABNE, nil, 0), ploop)
		gc.Regfree(&nend)
	} else {
		// TODO(austin): Instead of generating ADD $-8,R8; ADD
		// $-8,R7; n*(MOVDU 8(R8),R9; MOVDU R9,8(R7);) just
		// generate the offsets directly and eliminate the
		// ADDs.  That will produce shorter, more
		// pipeline-able code.
		var p *obj.Prog
		for ; c > 0; c-- {
			p = gins(op, &src, &tmp)
			p.From.Type = obj.TYPE_MEM
			p.From.Offset = int64(dir)

			p = gins(op, &tmp, &dst)
			p.To.Type = obj.TYPE_MEM
			p.To.Offset = int64(dir)
		}
	}

	gc.Regfree(&dst)
	gc.Regfree(&src)
	gc.Regfree(&tmp)
}
Example #2
0
File: cgen.go Project: ckeyer/gosrc
func blockcopy(n, res *gc.Node, osrc, odst, w int64) {
	var dst gc.Node
	gc.Nodreg(&dst, gc.Types[gc.Tptr], x86.REG_DI)
	var src gc.Node
	gc.Nodreg(&src, gc.Types[gc.Tptr], x86.REG_SI)

	var tsrc gc.Node
	gc.Tempname(&tsrc, gc.Types[gc.Tptr])
	var tdst gc.Node
	gc.Tempname(&tdst, gc.Types[gc.Tptr])
	if !n.Addable {
		gc.Agen(n, &tsrc)
	}
	if !res.Addable {
		gc.Agen(res, &tdst)
	}
	if n.Addable {
		gc.Agen(n, &src)
	} else {
		gmove(&tsrc, &src)
	}

	if res.Op == gc.ONAME {
		gc.Gvardef(res)
	}

	if res.Addable {
		gc.Agen(res, &dst)
	} else {
		gmove(&tdst, &dst)
	}

	c := int32(w % 4) // bytes
	q := int32(w / 4) // doublewords

	// if we are copying forward on the stack and
	// the src and dst overlap, then reverse direction
	if osrc < odst && int64(odst) < int64(osrc)+w {
		// reverse direction
		gins(x86.ASTD, nil, nil) // set direction flag
		if c > 0 {
			gconreg(x86.AADDL, w-1, x86.REG_SI)
			gconreg(x86.AADDL, w-1, x86.REG_DI)

			gconreg(x86.AMOVL, int64(c), x86.REG_CX)
			gins(x86.AREP, nil, nil)   // repeat
			gins(x86.AMOVSB, nil, nil) // MOVB *(SI)-,*(DI)-
		}

		if q > 0 {
			if c > 0 {
				gconreg(x86.AADDL, -3, x86.REG_SI)
				gconreg(x86.AADDL, -3, x86.REG_DI)
			} else {
				gconreg(x86.AADDL, w-4, x86.REG_SI)
				gconreg(x86.AADDL, w-4, x86.REG_DI)
			}

			gconreg(x86.AMOVL, int64(q), x86.REG_CX)
			gins(x86.AREP, nil, nil)   // repeat
			gins(x86.AMOVSL, nil, nil) // MOVL *(SI)-,*(DI)-
		}

		// we leave with the flag clear
		gins(x86.ACLD, nil, nil)
	} else {
		gins(x86.ACLD, nil, nil) // paranoia.  TODO(rsc): remove?

		// normal direction
		if q > 128 || (q >= 4 && gc.Nacl) {
			gconreg(x86.AMOVL, int64(q), x86.REG_CX)
			gins(x86.AREP, nil, nil)   // repeat
			gins(x86.AMOVSL, nil, nil) // MOVL *(SI)+,*(DI)+
		} else if q >= 4 {
			p := gins(obj.ADUFFCOPY, nil, nil)
			p.To.Type = obj.TYPE_ADDR
			p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg))

			// 10 and 128 = magic constants: see ../../runtime/asm_386.s
			p.To.Offset = 10 * (128 - int64(q))
		} else if !gc.Nacl && c == 0 {
			var cx gc.Node
			gc.Nodreg(&cx, gc.Types[gc.TINT32], x86.REG_CX)

			// We don't need the MOVSL side-effect of updating SI and DI,
			// and issuing a sequence of MOVLs directly is faster.
			src.Op = gc.OINDREG

			dst.Op = gc.OINDREG
			for q > 0 {
				gmove(&src, &cx) // MOVL x+(SI),CX
				gmove(&cx, &dst) // MOVL CX,x+(DI)
				src.Xoffset += 4
				dst.Xoffset += 4
				q--
			}
		} else {
			for q > 0 {
				gins(x86.AMOVSL, nil, nil) // MOVL *(SI)+,*(DI)+
				q--
			}
		}

		for c > 0 {
			gins(x86.AMOVSB, nil, nil) // MOVB *(SI)+,*(DI)+
			c--
		}
	}
}
Example #3
0
File: cgen.go Project: ckeyer/gosrc
func blockcopy(n, res *gc.Node, osrc, odst, w int64) {
	// determine alignment.
	// want to avoid unaligned access, so have to use
	// smaller operations for less aligned types.
	// for example moving [4]byte must use 4 MOVB not 1 MOVW.
	align := int(n.Type.Align)

	var op int
	switch align {
	default:
		gc.Fatalf("sgen: invalid alignment %d for %v", align, n.Type)

	case 1:
		op = arm.AMOVB

	case 2:
		op = arm.AMOVH

	case 4:
		op = arm.AMOVW
	}

	if w%int64(align) != 0 {
		gc.Fatalf("sgen: unaligned size %d (align=%d) for %v", w, align, n.Type)
	}
	c := int32(w / int64(align))

	if osrc%int64(align) != 0 || odst%int64(align) != 0 {
		gc.Fatalf("sgen: unaligned offset src %d or dst %d (align %d)", osrc, odst, align)
	}

	// if we are copying forward on the stack and
	// the src and dst overlap, then reverse direction
	dir := align
	if osrc < odst && int64(odst) < int64(osrc)+w {
		dir = -dir
	}

	if op == arm.AMOVW && !gc.Nacl && dir > 0 && c >= 4 && c <= 128 {
		var r0 gc.Node
		r0.Op = gc.OREGISTER
		r0.Reg = arm.REG_R0
		var r1 gc.Node
		r1.Op = gc.OREGISTER
		r1.Reg = arm.REG_R0 + 1
		var r2 gc.Node
		r2.Op = gc.OREGISTER
		r2.Reg = arm.REG_R0 + 2

		var src gc.Node
		gc.Regalloc(&src, gc.Types[gc.Tptr], &r1)
		var dst gc.Node
		gc.Regalloc(&dst, gc.Types[gc.Tptr], &r2)
		if n.Ullman >= res.Ullman {
			// eval n first
			gc.Agen(n, &src)

			if res.Op == gc.ONAME {
				gc.Gvardef(res)
			}
			gc.Agen(res, &dst)
		} else {
			// eval res first
			if res.Op == gc.ONAME {
				gc.Gvardef(res)
			}
			gc.Agen(res, &dst)
			gc.Agen(n, &src)
		}

		var tmp gc.Node
		gc.Regalloc(&tmp, gc.Types[gc.Tptr], &r0)
		f := gc.Sysfunc("duffcopy")
		p := gins(obj.ADUFFCOPY, nil, f)
		gc.Afunclit(&p.To, f)

		// 8 and 128 = magic constants: see ../../runtime/asm_arm.s
		p.To.Offset = 8 * (128 - int64(c))

		gc.Regfree(&tmp)
		gc.Regfree(&src)
		gc.Regfree(&dst)
		return
	}

	var dst gc.Node
	var src gc.Node
	if n.Ullman >= res.Ullman {
		gc.Agenr(n, &dst, res) // temporarily use dst
		gc.Regalloc(&src, gc.Types[gc.Tptr], nil)
		gins(arm.AMOVW, &dst, &src)
		if res.Op == gc.ONAME {
			gc.Gvardef(res)
		}
		gc.Agen(res, &dst)
	} else {
		if res.Op == gc.ONAME {
			gc.Gvardef(res)
		}
		gc.Agenr(res, &dst, res)
		gc.Agenr(n, &src, nil)
	}

	var tmp gc.Node
	gc.Regalloc(&tmp, gc.Types[gc.TUINT32], nil)

	// set up end marker
	var nend gc.Node

	if c >= 4 {
		gc.Regalloc(&nend, gc.Types[gc.TUINT32], nil)

		p := gins(arm.AMOVW, &src, &nend)
		p.From.Type = obj.TYPE_ADDR
		if dir < 0 {
			p.From.Offset = int64(dir)
		} else {
			p.From.Offset = w
		}
	}

	// move src and dest to the end of block if necessary
	if dir < 0 {
		p := gins(arm.AMOVW, &src, &src)
		p.From.Type = obj.TYPE_ADDR
		p.From.Offset = w + int64(dir)

		p = gins(arm.AMOVW, &dst, &dst)
		p.From.Type = obj.TYPE_ADDR
		p.From.Offset = w + int64(dir)
	}

	// move
	if c >= 4 {
		p := gins(op, &src, &tmp)
		p.From.Type = obj.TYPE_MEM
		p.From.Offset = int64(dir)
		p.Scond |= arm.C_PBIT
		ploop := p

		p = gins(op, &tmp, &dst)
		p.To.Type = obj.TYPE_MEM
		p.To.Offset = int64(dir)
		p.Scond |= arm.C_PBIT

		p = gins(arm.ACMP, &src, nil)
		raddr(&nend, p)

		gc.Patch(gc.Gbranch(arm.ABNE, nil, 0), ploop)
		gc.Regfree(&nend)
	} else {
		var p *obj.Prog
		for ; c > 0; c-- {
			p = gins(op, &src, &tmp)
			p.From.Type = obj.TYPE_MEM
			p.From.Offset = int64(dir)
			p.Scond |= arm.C_PBIT

			p = gins(op, &tmp, &dst)
			p.To.Type = obj.TYPE_MEM
			p.To.Offset = int64(dir)
			p.Scond |= arm.C_PBIT
		}
	}

	gc.Regfree(&dst)
	gc.Regfree(&src)
	gc.Regfree(&tmp)
}
Example #4
0
File: cgen.go Project: ckeyer/gosrc
func blockcopy(n, ns *gc.Node, osrc, odst, w int64) {
	var noddi gc.Node
	gc.Nodreg(&noddi, gc.Types[gc.Tptr], x86.REG_DI)
	var nodsi gc.Node
	gc.Nodreg(&nodsi, gc.Types[gc.Tptr], x86.REG_SI)

	var nodl gc.Node
	var nodr gc.Node
	if n.Ullman >= ns.Ullman {
		gc.Agenr(n, &nodr, &nodsi)
		if ns.Op == gc.ONAME {
			gc.Gvardef(ns)
		}
		gc.Agenr(ns, &nodl, &noddi)
	} else {
		if ns.Op == gc.ONAME {
			gc.Gvardef(ns)
		}
		gc.Agenr(ns, &nodl, &noddi)
		gc.Agenr(n, &nodr, &nodsi)
	}

	if nodl.Reg != x86.REG_DI {
		gmove(&nodl, &noddi)
	}
	if nodr.Reg != x86.REG_SI {
		gmove(&nodr, &nodsi)
	}
	gc.Regfree(&nodl)
	gc.Regfree(&nodr)

	c := w % 8 // bytes
	q := w / 8 // quads

	var oldcx gc.Node
	var cx gc.Node
	savex(x86.REG_CX, &cx, &oldcx, nil, gc.Types[gc.TINT64])

	// if we are copying forward on the stack and
	// the src and dst overlap, then reverse direction
	if osrc < odst && odst < osrc+w {
		// reverse direction
		gins(x86.ASTD, nil, nil) // set direction flag
		if c > 0 {
			gconreg(addptr, w-1, x86.REG_SI)
			gconreg(addptr, w-1, x86.REG_DI)

			gconreg(movptr, c, x86.REG_CX)
			gins(x86.AREP, nil, nil)   // repeat
			gins(x86.AMOVSB, nil, nil) // MOVB *(SI)-,*(DI)-
		}

		if q > 0 {
			if c > 0 {
				gconreg(addptr, -7, x86.REG_SI)
				gconreg(addptr, -7, x86.REG_DI)
			} else {
				gconreg(addptr, w-8, x86.REG_SI)
				gconreg(addptr, w-8, x86.REG_DI)
			}

			gconreg(movptr, q, x86.REG_CX)
			gins(x86.AREP, nil, nil)   // repeat
			gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)-,*(DI)-
		}

		// we leave with the flag clear
		gins(x86.ACLD, nil, nil)
	} else {
		// normal direction
		if q > 128 || (gc.Nacl && q >= 4) || (obj.Getgoos() == "plan9" && q >= 4) {
			gconreg(movptr, q, x86.REG_CX)
			gins(x86.AREP, nil, nil)   // repeat
			gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+
		} else if q >= 4 {
			var oldx0 gc.Node
			var x0 gc.Node
			savex(x86.REG_X0, &x0, &oldx0, nil, gc.Types[gc.TFLOAT64])

			p := gins(obj.ADUFFCOPY, nil, nil)
			p.To.Type = obj.TYPE_ADDR
			p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg))

			// 64 blocks taking 14 bytes each
			// see ../../../../runtime/mkduff.go
			p.To.Offset = 14 * (64 - q/2)
			restx(&x0, &oldx0)

			if q%2 != 0 {
				gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+
			}
		} else if !gc.Nacl && c == 0 {
			// We don't need the MOVSQ side-effect of updating SI and DI,
			// and issuing a sequence of MOVQs directly is faster.
			nodsi.Op = gc.OINDREG

			noddi.Op = gc.OINDREG
			for q > 0 {
				gmove(&nodsi, &cx) // MOVQ x+(SI),CX
				gmove(&cx, &noddi) // MOVQ CX,x+(DI)
				nodsi.Xoffset += 8
				noddi.Xoffset += 8
				q--
			}
		} else {
			for q > 0 {
				gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+
				q--
			}
		}

		// copy the remaining c bytes
		if w < 4 || c <= 1 || (odst < osrc && osrc < odst+w) {
			for c > 0 {
				gins(x86.AMOVSB, nil, nil) // MOVB *(SI)+,*(DI)+
				c--
			}
		} else if w < 8 || c <= 4 {
			nodsi.Op = gc.OINDREG
			noddi.Op = gc.OINDREG
			cx.Type = gc.Types[gc.TINT32]
			nodsi.Type = gc.Types[gc.TINT32]
			noddi.Type = gc.Types[gc.TINT32]
			if c > 4 {
				nodsi.Xoffset = 0
				noddi.Xoffset = 0
				gmove(&nodsi, &cx)
				gmove(&cx, &noddi)
			}

			nodsi.Xoffset = c - 4
			noddi.Xoffset = c - 4
			gmove(&nodsi, &cx)
			gmove(&cx, &noddi)
		} else {
			nodsi.Op = gc.OINDREG
			noddi.Op = gc.OINDREG
			cx.Type = gc.Types[gc.TINT64]
			nodsi.Type = gc.Types[gc.TINT64]
			noddi.Type = gc.Types[gc.TINT64]
			nodsi.Xoffset = c - 8
			noddi.Xoffset = c - 8
			gmove(&nodsi, &cx)
			gmove(&cx, &noddi)
		}
	}

	restx(&cx, &oldcx)
}