예제 #1
0
파일: ggen.go 프로젝트: xslonepiece/goios
/*
 * generate high multiply:
 *   res = (nl*nr) >> width
 */
func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) {
	t := nl.Type
	a := optoas(gc.OHMUL, t)
	if nl.Ullman < nr.Ullman {
		tmp := nl
		nl = nr
		nr = tmp
	}

	var n1 gc.Node
	gc.Cgenr(nl, &n1, res)
	var n2 gc.Node
	gc.Cgenr(nr, &n2, nil)
	var ax gc.Node
	gc.Nodreg(&ax, t, x86.REG_AX)
	gmove(&n1, &ax)
	gins(a, &n2, nil)
	gc.Regfree(&n2)
	gc.Regfree(&n1)

	var dx gc.Node
	if t.Width == 1 {
		// byte multiply behaves differently.
		gc.Nodreg(&ax, t, x86.REG_AH)

		gc.Nodreg(&dx, t, x86.REG_DX)
		gmove(&ax, &dx)
	}

	gc.Nodreg(&dx, t, x86.REG_DX)
	gmove(&dx, res)
}
예제 #2
0
파일: gsubr.go 프로젝트: bibbyflyaway/go
/*
 * generate
 *	as $c, reg
 */
func gconreg(as int, c int64, reg int) {
	var nr gc.Node

	switch as {
	case x86.AADDL,
		x86.AMOVL,
		x86.ALEAL:
		gc.Nodreg(&nr, gc.Types[gc.TINT32], reg)

	default:
		gc.Nodreg(&nr, gc.Types[gc.TINT64], reg)
	}

	ginscon(as, c, &nr)
}
예제 #3
0
파일: gsubr.go 프로젝트: xslonepiece/goios
/*
 * generate
 *	as $c, reg
 */
func gconreg(as int, c int64, reg int) {
	var n1 gc.Node
	var n2 gc.Node

	gc.Nodconst(&n1, gc.Types[gc.TINT64], c)
	gc.Nodreg(&n2, gc.Types[gc.TINT64], reg)
	gins(as, &n1, &n2)
}
예제 #4
0
파일: gsubr.go 프로젝트: bibbyflyaway/go
func ginsnop() {
	// This is actually not the x86 NOP anymore,
	// but at the point where it gets used, AX is dead
	// so it's okay if we lose the high bits.
	var reg gc.Node
	gc.Nodreg(&reg, gc.Types[gc.TINT], x86.REG_AX)
	gins(x86.AXCHGL, &reg, &reg)
}
예제 #5
0
파일: ggen.go 프로젝트: xslonepiece/goios
/*
 * generate byte multiply:
 *	res = nl * nr
 * there is no 2-operand byte multiply instruction so
 * we do a full-width multiplication and truncate afterwards.
 */
func cgen_bmul(op int, nl *gc.Node, nr *gc.Node, res *gc.Node) bool {
	if optoas(op, nl.Type) != x86.AIMULB {
		return false
	}

	// largest ullman on left.
	if nl.Ullman < nr.Ullman {
		tmp := nl
		nl = nr
		nr = tmp
	}

	// generate operands in "8-bit" registers.
	var n1b gc.Node
	gc.Regalloc(&n1b, nl.Type, res)

	gc.Cgen(nl, &n1b)
	var n2b gc.Node
	gc.Regalloc(&n2b, nr.Type, nil)
	gc.Cgen(nr, &n2b)

	// perform full-width multiplication.
	t := gc.Types[gc.TUINT64]

	if gc.Issigned[nl.Type.Etype] {
		t = gc.Types[gc.TINT64]
	}
	var n1 gc.Node
	gc.Nodreg(&n1, t, int(n1b.Reg))
	var n2 gc.Node
	gc.Nodreg(&n2, t, int(n2b.Reg))
	a := optoas(op, t)
	gins(a, &n2, &n1)

	// truncate.
	gmove(&n1, res)

	gc.Regfree(&n1b)
	gc.Regfree(&n2b)
	return true
}
예제 #6
0
파일: ggen.go 프로젝트: tidatida/go
// floating-point.  387 (not SSE2)
func cgen_float387(n *gc.Node, res *gc.Node) {
	var f0 gc.Node
	var f1 gc.Node

	nl := n.Left
	nr := n.Right
	gc.Nodreg(&f0, nl.Type, x86.REG_F0)
	gc.Nodreg(&f1, n.Type, x86.REG_F0+1)
	if nr != nil {
		// binary
		if nl.Ullman >= nr.Ullman {
			gc.Cgen(nl, &f0)
			if nr.Addable != 0 {
				gins(foptoas(int(n.Op), n.Type, 0), nr, &f0)
			} else {
				gc.Cgen(nr, &f0)
				gins(foptoas(int(n.Op), n.Type, Fpop), &f0, &f1)
			}
		} else {
			gc.Cgen(nr, &f0)
			if nl.Addable != 0 {
				gins(foptoas(int(n.Op), n.Type, Frev), nl, &f0)
			} else {
				gc.Cgen(nl, &f0)
				gins(foptoas(int(n.Op), n.Type, Frev|Fpop), &f0, &f1)
			}
		}

		gmove(&f0, res)
		return
	}

	// unary
	gc.Cgen(nl, &f0)

	if n.Op != gc.OCONV && n.Op != gc.OPLUS {
		gins(foptoas(int(n.Op), n.Type, 0), nil, nil)
	}
	gmove(&f0, res)
	return
}
예제 #7
0
파일: ggen.go 프로젝트: tidatida/go
/*
 * generate high multiply:
 *   res = (nl*nr) >> width
 */
func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) {
	var n1 gc.Node
	var n2 gc.Node
	var ax gc.Node
	var dx gc.Node

	t := nl.Type
	a := optoas(gc.OHMUL, t)

	// gen nl in n1.
	gc.Tempname(&n1, t)

	gc.Cgen(nl, &n1)

	// gen nr in n2.
	gc.Regalloc(&n2, t, res)

	gc.Cgen(nr, &n2)

	// multiply.
	gc.Nodreg(&ax, t, x86.REG_AX)

	gmove(&n2, &ax)
	gins(a, &n1, nil)
	gc.Regfree(&n2)

	if t.Width == 1 {
		// byte multiply behaves differently.
		gc.Nodreg(&ax, t, x86.REG_AH)

		gc.Nodreg(&dx, t, x86.REG_DX)
		gmove(&ax, &dx)
	}

	gc.Nodreg(&dx, t, x86.REG_DX)
	gmove(&dx, res)
}
예제 #8
0
파일: ggen.go 프로젝트: tidatida/go
func savex(dr int, x *gc.Node, oldx *gc.Node, res *gc.Node, t *gc.Type) {
	r := int(reg[dr])
	gc.Nodreg(x, gc.Types[gc.TINT32], dr)

	// save current ax and dx if they are live
	// and not the destination
	*oldx = gc.Node{}

	if r > 0 && !gc.Samereg(x, res) {
		gc.Tempname(oldx, gc.Types[gc.TINT32])
		gmove(x, oldx)
	}

	gc.Regalloc(x, t, x)
}
예제 #9
0
파일: ggen.go 프로젝트: xslonepiece/goios
/*
 * register dr is one of the special ones (AX, CX, DI, SI, etc.).
 * we need to use it.  if it is already allocated as a temporary
 * (r > 1; can only happen if a routine like sgen passed a
 * special as cgen's res and then cgen used regalloc to reuse
 * it as its own temporary), then move it for now to another
 * register.  caller must call restx to move it back.
 * the move is not necessary if dr == res, because res is
 * known to be dead.
 */
func savex(dr int, x *gc.Node, oldx *gc.Node, res *gc.Node, t *gc.Type) {
	r := int(reg[dr])

	// save current ax and dx if they are live
	// and not the destination
	*oldx = gc.Node{}

	gc.Nodreg(x, t, dr)
	if r > 1 && !gc.Samereg(x, res) {
		gc.Regalloc(oldx, gc.Types[gc.TINT64], nil)
		x.Type = gc.Types[gc.TINT64]
		gmove(x, oldx)
		x.Type = t
		oldx.Ostk = int32(r) // squirrel away old r value
		reg[dr] = 1
	}
}
예제 #10
0
파일: ggen.go 프로젝트: klueska/go-akaros
/*
 * generate:
 *	call f
 *	proc=-1	normal call but no return
 *	proc=0	normal call
 *	proc=1	goroutine run in new proc
 *	proc=2	defer call save away stack
  *	proc=3	normal call to C pointer (not Go func value)
*/
func ginscall(f *gc.Node, proc int) {
	if f.Type != nil {
		extra := int32(0)
		if proc == 1 || proc == 2 {
			extra = 2 * int32(gc.Widthptr)
		}
		gc.Setmaxarg(f.Type, extra)
	}

	switch proc {
	default:
		gc.Fatal("ginscall: bad proc %d", proc)

	case 0, // normal call
		-1: // normal call but no return
		if f.Op == gc.ONAME && f.Class == gc.PFUNC {
			if f == gc.Deferreturn {
				// Deferred calls will appear to be returning to
				// the CALL deferreturn(SB) that we are about to emit.
				// However, the stack trace code will show the line
				// of the instruction byte before the return PC.
				// To avoid that being an unrelated instruction,
				// insert a ppc64 NOP that we will have the right line number.
				// The ppc64 NOP is really or r0, r0, r0; use that description
				// because the NOP pseudo-instruction would be removed by
				// the linker.
				var reg gc.Node
				gc.Nodreg(&reg, gc.Types[gc.TINT], ppc64.REG_R0)

				gins(ppc64.AOR, &reg, &reg)
			}

			p := gins(ppc64.ABL, nil, f)
			gc.Afunclit(&p.To, f)
			if proc == -1 || gc.Noreturn(p) {
				gins(obj.AUNDEF, nil, nil)
			}
			break
		}

		var reg gc.Node
		gc.Nodreg(&reg, gc.Types[gc.Tptr], ppc64.REGCTXT)
		var r1 gc.Node
		gc.Nodreg(&r1, gc.Types[gc.Tptr], ppc64.REG_R3)
		gmove(f, &reg)
		reg.Op = gc.OINDREG
		gmove(&reg, &r1)
		reg.Op = gc.OREGISTER
		ginsBL(&reg, &r1)

	case 3: // normal call of c function pointer
		ginsBL(nil, f)

	case 1, // call in new proc (go)
		2: // deferred call (defer)
		var con gc.Node
		gc.Nodconst(&con, gc.Types[gc.TINT64], int64(gc.Argsize(f.Type)))

		var reg gc.Node
		gc.Nodreg(&reg, gc.Types[gc.TINT64], ppc64.REG_R3)
		var reg2 gc.Node
		gc.Nodreg(&reg2, gc.Types[gc.TINT64], ppc64.REG_R4)
		gmove(f, &reg)

		gmove(&con, &reg2)
		p := gins(ppc64.AMOVW, &reg2, nil)
		p.To.Type = obj.TYPE_MEM
		p.To.Reg = ppc64.REGSP
		p.To.Offset = 8

		p = gins(ppc64.AMOVD, &reg, nil)
		p.To.Type = obj.TYPE_MEM
		p.To.Reg = ppc64.REGSP
		p.To.Offset = 16

		if proc == 1 {
			ginscall(gc.Newproc, 0)
		} else {
			if gc.Hasdefer == 0 {
				gc.Fatal("hasdefer=0 but has defer")
			}
			ginscall(gc.Deferproc, 0)
		}

		if proc == 2 {
			gc.Nodreg(&reg, gc.Types[gc.TINT64], ppc64.REG_R3)
			p := gins(ppc64.ACMP, &reg, nil)
			p.To.Type = obj.TYPE_REG
			p.To.Reg = ppc64.REGZERO
			p = gc.Gbranch(ppc64.ABEQ, nil, +1)
			cgen_ret(nil)
			gc.Patch(p, gc.Pc)
		}
	}
}
예제 #11
0
파일: ggen.go 프로젝트: xslonepiece/goios
/*
 * generate shift according to op, one of:
 *	res = nl << nr
 *	res = nl >> nr
 */
func cgen_shift(op int, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) {
	a := optoas(op, nl.Type)

	if nr.Op == gc.OLITERAL {
		var n1 gc.Node
		gc.Regalloc(&n1, nl.Type, res)
		gc.Cgen(nl, &n1)
		sc := uint64(gc.Mpgetfix(nr.Val.U.Xval))
		if sc >= uint64(nl.Type.Width*8) {
			// large shift gets 2 shifts by width-1
			var n3 gc.Node
			gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1)

			gins(a, &n3, &n1)
			gins(a, &n3, &n1)
		} else {
			gins(a, nr, &n1)
		}
		gmove(&n1, res)
		gc.Regfree(&n1)
		return
	}

	if nl.Ullman >= gc.UINF {
		var n4 gc.Node
		gc.Tempname(&n4, nl.Type)
		gc.Cgen(nl, &n4)
		nl = &n4
	}

	if nr.Ullman >= gc.UINF {
		var n5 gc.Node
		gc.Tempname(&n5, nr.Type)
		gc.Cgen(nr, &n5)
		nr = &n5
	}

	rcx := int(reg[x86.REG_CX])
	var n1 gc.Node
	gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)

	// Allow either uint32 or uint64 as shift type,
	// to avoid unnecessary conversion from uint32 to uint64
	// just to do the comparison.
	tcount := gc.Types[gc.Simtype[nr.Type.Etype]]

	if tcount.Etype < gc.TUINT32 {
		tcount = gc.Types[gc.TUINT32]
	}

	gc.Regalloc(&n1, nr.Type, &n1) // to hold the shift type in CX
	var n3 gc.Node
	gc.Regalloc(&n3, tcount, &n1) // to clear high bits of CX

	var cx gc.Node
	gc.Nodreg(&cx, gc.Types[gc.TUINT64], x86.REG_CX)

	var oldcx gc.Node
	if rcx > 0 && !gc.Samereg(&cx, res) {
		gc.Regalloc(&oldcx, gc.Types[gc.TUINT64], nil)
		gmove(&cx, &oldcx)
	}

	cx.Type = tcount

	var n2 gc.Node
	if gc.Samereg(&cx, res) {
		gc.Regalloc(&n2, nl.Type, nil)
	} else {
		gc.Regalloc(&n2, nl.Type, res)
	}
	if nl.Ullman >= nr.Ullman {
		gc.Cgen(nl, &n2)
		gc.Cgen(nr, &n1)
		gmove(&n1, &n3)
	} else {
		gc.Cgen(nr, &n1)
		gmove(&n1, &n3)
		gc.Cgen(nl, &n2)
	}

	gc.Regfree(&n3)

	// test and fix up large shifts
	if !bounded {
		gc.Nodconst(&n3, tcount, nl.Type.Width*8)
		gins(optoas(gc.OCMP, tcount), &n1, &n3)
		p1 := gc.Gbranch(optoas(gc.OLT, tcount), nil, +1)
		if op == gc.ORSH && gc.Issigned[nl.Type.Etype] {
			gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1)
			gins(a, &n3, &n2)
		} else {
			gc.Nodconst(&n3, nl.Type, 0)
			gmove(&n3, &n2)
		}

		gc.Patch(p1, gc.Pc)
	}

	gins(a, &n1, &n2)

	if oldcx.Op != 0 {
		cx.Type = gc.Types[gc.TUINT64]
		gmove(&oldcx, &cx)
		gc.Regfree(&oldcx)
	}

	gmove(&n2, res)

	gc.Regfree(&n1)
	gc.Regfree(&n2)
}
예제 #12
0
파일: cgen64.go 프로젝트: xslonepiece/goios
/*
 * attempt to generate 64-bit
 *	res = n
 * return 1 on success, 0 if op not handled.
 */
func cgen64(n *gc.Node, res *gc.Node) {
	if res.Op != gc.OINDREG && res.Op != gc.ONAME {
		gc.Dump("n", n)
		gc.Dump("res", res)
		gc.Fatal("cgen64 %v of %v", gc.Oconv(int(n.Op), 0), gc.Oconv(int(res.Op), 0))
	}

	switch n.Op {
	default:
		gc.Fatal("cgen64 %v", gc.Oconv(int(n.Op), 0))

	case gc.OMINUS:
		gc.Cgen(n.Left, res)
		var hi1 gc.Node
		var lo1 gc.Node
		split64(res, &lo1, &hi1)
		gins(x86.ANEGL, nil, &lo1)
		gins(x86.AADCL, ncon(0), &hi1)
		gins(x86.ANEGL, nil, &hi1)
		splitclean()
		return

	case gc.OCOM:
		gc.Cgen(n.Left, res)
		var lo1 gc.Node
		var hi1 gc.Node
		split64(res, &lo1, &hi1)
		gins(x86.ANOTL, nil, &lo1)
		gins(x86.ANOTL, nil, &hi1)
		splitclean()
		return

		// binary operators.
	// common setup below.
	case gc.OADD,
		gc.OSUB,
		gc.OMUL,
		gc.OLROT,
		gc.OLSH,
		gc.ORSH,
		gc.OAND,
		gc.OOR,
		gc.OXOR:
		break
	}

	l := n.Left
	r := n.Right
	if !l.Addable {
		var t1 gc.Node
		gc.Tempname(&t1, l.Type)
		gc.Cgen(l, &t1)
		l = &t1
	}

	if r != nil && !r.Addable {
		var t2 gc.Node
		gc.Tempname(&t2, r.Type)
		gc.Cgen(r, &t2)
		r = &t2
	}

	var ax gc.Node
	gc.Nodreg(&ax, gc.Types[gc.TINT32], x86.REG_AX)
	var cx gc.Node
	gc.Nodreg(&cx, gc.Types[gc.TINT32], x86.REG_CX)
	var dx gc.Node
	gc.Nodreg(&dx, gc.Types[gc.TINT32], x86.REG_DX)

	// Setup for binary operation.
	var hi1 gc.Node
	var lo1 gc.Node
	split64(l, &lo1, &hi1)

	var lo2 gc.Node
	var hi2 gc.Node
	if gc.Is64(r.Type) {
		split64(r, &lo2, &hi2)
	}

	// Do op.  Leave result in DX:AX.
	switch n.Op {
	// TODO: Constants
	case gc.OADD:
		gins(x86.AMOVL, &lo1, &ax)

		gins(x86.AMOVL, &hi1, &dx)
		gins(x86.AADDL, &lo2, &ax)
		gins(x86.AADCL, &hi2, &dx)

		// TODO: Constants.
	case gc.OSUB:
		gins(x86.AMOVL, &lo1, &ax)

		gins(x86.AMOVL, &hi1, &dx)
		gins(x86.ASUBL, &lo2, &ax)
		gins(x86.ASBBL, &hi2, &dx)

		// let's call the next two EX and FX.
	case gc.OMUL:
		var ex gc.Node
		gc.Regalloc(&ex, gc.Types[gc.TPTR32], nil)

		var fx gc.Node
		gc.Regalloc(&fx, gc.Types[gc.TPTR32], nil)

		// load args into DX:AX and EX:CX.
		gins(x86.AMOVL, &lo1, &ax)

		gins(x86.AMOVL, &hi1, &dx)
		gins(x86.AMOVL, &lo2, &cx)
		gins(x86.AMOVL, &hi2, &ex)

		// if DX and EX are zero, use 32 x 32 -> 64 unsigned multiply.
		gins(x86.AMOVL, &dx, &fx)

		gins(x86.AORL, &ex, &fx)
		p1 := gc.Gbranch(x86.AJNE, nil, 0)
		gins(x86.AMULL, &cx, nil) // implicit &ax
		p2 := gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p1, gc.Pc)

		// full 64x64 -> 64, from 32x32 -> 64.
		gins(x86.AIMULL, &cx, &dx)

		gins(x86.AMOVL, &ax, &fx)
		gins(x86.AIMULL, &ex, &fx)
		gins(x86.AADDL, &dx, &fx)
		gins(x86.AMOVL, &cx, &dx)
		gins(x86.AMULL, &dx, nil) // implicit &ax
		gins(x86.AADDL, &fx, &dx)
		gc.Patch(p2, gc.Pc)

		gc.Regfree(&ex)
		gc.Regfree(&fx)

		// We only rotate by a constant c in [0,64).
	// if c >= 32:
	//	lo, hi = hi, lo
	//	c -= 32
	// if c == 0:
	//	no-op
	// else:
	//	t = hi
	//	shld hi:lo, c
	//	shld lo:t, c
	case gc.OLROT:
		v := uint64(gc.Mpgetfix(r.Val.U.Xval))

		if v >= 32 {
			// reverse during load to do the first 32 bits of rotate
			v -= 32

			gins(x86.AMOVL, &lo1, &dx)
			gins(x86.AMOVL, &hi1, &ax)
		} else {
			gins(x86.AMOVL, &lo1, &ax)
			gins(x86.AMOVL, &hi1, &dx)
		}

		if v == 0 {
		} else // done
		{
			gins(x86.AMOVL, &dx, &cx)
			p1 := gins(x86.ASHLL, ncon(uint32(v)), &dx)
			p1.From.Index = x86.REG_AX // double-width shift
			p1.From.Scale = 0
			p1 = gins(x86.ASHLL, ncon(uint32(v)), &ax)
			p1.From.Index = x86.REG_CX // double-width shift
			p1.From.Scale = 0
		}

	case gc.OLSH:
		if r.Op == gc.OLITERAL {
			v := uint64(gc.Mpgetfix(r.Val.U.Xval))
			if v >= 64 {
				if gc.Is64(r.Type) {
					splitclean()
				}
				splitclean()
				split64(res, &lo2, &hi2)
				gins(x86.AMOVL, ncon(0), &lo2)
				gins(x86.AMOVL, ncon(0), &hi2)
				splitclean()
				return
			}

			if v >= 32 {
				if gc.Is64(r.Type) {
					splitclean()
				}
				split64(res, &lo2, &hi2)
				gmove(&lo1, &hi2)
				if v > 32 {
					gins(x86.ASHLL, ncon(uint32(v-32)), &hi2)
				}

				gins(x86.AMOVL, ncon(0), &lo2)
				splitclean()
				splitclean()
				return
			}

			// general shift
			gins(x86.AMOVL, &lo1, &ax)

			gins(x86.AMOVL, &hi1, &dx)
			p1 := gins(x86.ASHLL, ncon(uint32(v)), &dx)
			p1.From.Index = x86.REG_AX // double-width shift
			p1.From.Scale = 0
			gins(x86.ASHLL, ncon(uint32(v)), &ax)
			break
		}

		// load value into DX:AX.
		gins(x86.AMOVL, &lo1, &ax)

		gins(x86.AMOVL, &hi1, &dx)

		// load shift value into register.
		// if high bits are set, zero value.
		var p1 *obj.Prog

		if gc.Is64(r.Type) {
			gins(x86.ACMPL, &hi2, ncon(0))
			p1 = gc.Gbranch(x86.AJNE, nil, +1)
			gins(x86.AMOVL, &lo2, &cx)
		} else {
			cx.Type = gc.Types[gc.TUINT32]
			gmove(r, &cx)
		}

		// if shift count is >=64, zero value
		gins(x86.ACMPL, &cx, ncon(64))

		p2 := gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
		if p1 != nil {
			gc.Patch(p1, gc.Pc)
		}
		gins(x86.AXORL, &dx, &dx)
		gins(x86.AXORL, &ax, &ax)
		gc.Patch(p2, gc.Pc)

		// if shift count is >= 32, zero low.
		gins(x86.ACMPL, &cx, ncon(32))

		p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
		gins(x86.AMOVL, &ax, &dx)
		gins(x86.ASHLL, &cx, &dx) // SHLL only uses bottom 5 bits of count
		gins(x86.AXORL, &ax, &ax)
		p2 = gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p1, gc.Pc)

		// general shift
		p1 = gins(x86.ASHLL, &cx, &dx)

		p1.From.Index = x86.REG_AX // double-width shift
		p1.From.Scale = 0
		gins(x86.ASHLL, &cx, &ax)
		gc.Patch(p2, gc.Pc)

	case gc.ORSH:
		if r.Op == gc.OLITERAL {
			v := uint64(gc.Mpgetfix(r.Val.U.Xval))
			if v >= 64 {
				if gc.Is64(r.Type) {
					splitclean()
				}
				splitclean()
				split64(res, &lo2, &hi2)
				if hi1.Type.Etype == gc.TINT32 {
					gmove(&hi1, &lo2)
					gins(x86.ASARL, ncon(31), &lo2)
					gmove(&hi1, &hi2)
					gins(x86.ASARL, ncon(31), &hi2)
				} else {
					gins(x86.AMOVL, ncon(0), &lo2)
					gins(x86.AMOVL, ncon(0), &hi2)
				}

				splitclean()
				return
			}

			if v >= 32 {
				if gc.Is64(r.Type) {
					splitclean()
				}
				split64(res, &lo2, &hi2)
				gmove(&hi1, &lo2)
				if v > 32 {
					gins(optoas(gc.ORSH, hi1.Type), ncon(uint32(v-32)), &lo2)
				}
				if hi1.Type.Etype == gc.TINT32 {
					gmove(&hi1, &hi2)
					gins(x86.ASARL, ncon(31), &hi2)
				} else {
					gins(x86.AMOVL, ncon(0), &hi2)
				}
				splitclean()
				splitclean()
				return
			}

			// general shift
			gins(x86.AMOVL, &lo1, &ax)

			gins(x86.AMOVL, &hi1, &dx)
			p1 := gins(x86.ASHRL, ncon(uint32(v)), &ax)
			p1.From.Index = x86.REG_DX // double-width shift
			p1.From.Scale = 0
			gins(optoas(gc.ORSH, hi1.Type), ncon(uint32(v)), &dx)
			break
		}

		// load value into DX:AX.
		gins(x86.AMOVL, &lo1, &ax)

		gins(x86.AMOVL, &hi1, &dx)

		// load shift value into register.
		// if high bits are set, zero value.
		var p1 *obj.Prog

		if gc.Is64(r.Type) {
			gins(x86.ACMPL, &hi2, ncon(0))
			p1 = gc.Gbranch(x86.AJNE, nil, +1)
			gins(x86.AMOVL, &lo2, &cx)
		} else {
			cx.Type = gc.Types[gc.TUINT32]
			gmove(r, &cx)
		}

		// if shift count is >=64, zero or sign-extend value
		gins(x86.ACMPL, &cx, ncon(64))

		p2 := gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
		if p1 != nil {
			gc.Patch(p1, gc.Pc)
		}
		if hi1.Type.Etype == gc.TINT32 {
			gins(x86.ASARL, ncon(31), &dx)
			gins(x86.AMOVL, &dx, &ax)
		} else {
			gins(x86.AXORL, &dx, &dx)
			gins(x86.AXORL, &ax, &ax)
		}

		gc.Patch(p2, gc.Pc)

		// if shift count is >= 32, sign-extend hi.
		gins(x86.ACMPL, &cx, ncon(32))

		p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
		gins(x86.AMOVL, &dx, &ax)
		if hi1.Type.Etype == gc.TINT32 {
			gins(x86.ASARL, &cx, &ax) // SARL only uses bottom 5 bits of count
			gins(x86.ASARL, ncon(31), &dx)
		} else {
			gins(x86.ASHRL, &cx, &ax)
			gins(x86.AXORL, &dx, &dx)
		}

		p2 = gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p1, gc.Pc)

		// general shift
		p1 = gins(x86.ASHRL, &cx, &ax)

		p1.From.Index = x86.REG_DX // double-width shift
		p1.From.Scale = 0
		gins(optoas(gc.ORSH, hi1.Type), &cx, &dx)
		gc.Patch(p2, gc.Pc)

		// make constant the right side (it usually is anyway).
	case gc.OXOR,
		gc.OAND,
		gc.OOR:
		if lo1.Op == gc.OLITERAL {
			nswap(&lo1, &lo2)
			nswap(&hi1, &hi2)
		}

		if lo2.Op == gc.OLITERAL {
			// special cases for constants.
			lv := uint32(gc.Mpgetfix(lo2.Val.U.Xval))

			hv := uint32(gc.Mpgetfix(hi2.Val.U.Xval))
			splitclean() // right side
			split64(res, &lo2, &hi2)
			switch n.Op {
			case gc.OXOR:
				gmove(&lo1, &lo2)
				gmove(&hi1, &hi2)
				switch lv {
				case 0:
					break

				case 0xffffffff:
					gins(x86.ANOTL, nil, &lo2)

				default:
					gins(x86.AXORL, ncon(lv), &lo2)
				}

				switch hv {
				case 0:
					break

				case 0xffffffff:
					gins(x86.ANOTL, nil, &hi2)

				default:
					gins(x86.AXORL, ncon(hv), &hi2)
				}

			case gc.OAND:
				switch lv {
				case 0:
					gins(x86.AMOVL, ncon(0), &lo2)

				default:
					gmove(&lo1, &lo2)
					if lv != 0xffffffff {
						gins(x86.AANDL, ncon(lv), &lo2)
					}
				}

				switch hv {
				case 0:
					gins(x86.AMOVL, ncon(0), &hi2)

				default:
					gmove(&hi1, &hi2)
					if hv != 0xffffffff {
						gins(x86.AANDL, ncon(hv), &hi2)
					}
				}

			case gc.OOR:
				switch lv {
				case 0:
					gmove(&lo1, &lo2)

				case 0xffffffff:
					gins(x86.AMOVL, ncon(0xffffffff), &lo2)

				default:
					gmove(&lo1, &lo2)
					gins(x86.AORL, ncon(lv), &lo2)
				}

				switch hv {
				case 0:
					gmove(&hi1, &hi2)

				case 0xffffffff:
					gins(x86.AMOVL, ncon(0xffffffff), &hi2)

				default:
					gmove(&hi1, &hi2)
					gins(x86.AORL, ncon(hv), &hi2)
				}
			}

			splitclean()
			splitclean()
			return
		}

		gins(x86.AMOVL, &lo1, &ax)
		gins(x86.AMOVL, &hi1, &dx)
		gins(optoas(int(n.Op), lo1.Type), &lo2, &ax)
		gins(optoas(int(n.Op), lo1.Type), &hi2, &dx)
	}

	if gc.Is64(r.Type) {
		splitclean()
	}
	splitclean()

	split64(res, &lo1, &hi1)
	gins(x86.AMOVL, &ax, &lo1)
	gins(x86.AMOVL, &dx, &hi1)
	splitclean()
}
예제 #13
0
파일: gsubr.go 프로젝트: tidatida/go
/*
 * generate move:
 *	t = f
 * hard part is conversions.
 */
func gmove(f *gc.Node, t *gc.Node) {
	if gc.Debug['M'] != 0 {
		fmt.Printf("gmove %v -> %v\n", gc.Nconv(f, obj.FmtLong), gc.Nconv(t, obj.FmtLong))
	}

	ft := int(gc.Simsimtype(f.Type))
	tt := int(gc.Simsimtype(t.Type))
	cvt := (*gc.Type)(t.Type)

	if gc.Iscomplex[ft] || gc.Iscomplex[tt] {
		gc.Complexmove(f, t)
		return
	}

	// cannot have two memory operands
	var r2 gc.Node
	var r1 gc.Node
	var a int
	if gc.Ismem(f) && gc.Ismem(t) {
		goto hard
	}

	// convert constant to desired type
	if f.Op == gc.OLITERAL {
		var con gc.Node
		switch tt {
		default:
			gc.Convconst(&con, t.Type, &f.Val)

		case gc.TINT32,
			gc.TINT16,
			gc.TINT8:
			var con gc.Node
			gc.Convconst(&con, gc.Types[gc.TINT64], &f.Val)
			var r1 gc.Node
			gc.Regalloc(&r1, con.Type, t)
			gins(ppc64.AMOVD, &con, &r1)
			gmove(&r1, t)
			gc.Regfree(&r1)
			return

		case gc.TUINT32,
			gc.TUINT16,
			gc.TUINT8:
			var con gc.Node
			gc.Convconst(&con, gc.Types[gc.TUINT64], &f.Val)
			var r1 gc.Node
			gc.Regalloc(&r1, con.Type, t)
			gins(ppc64.AMOVD, &con, &r1)
			gmove(&r1, t)
			gc.Regfree(&r1)
			return
		}

		f = &con
		ft = tt // so big switch will choose a simple mov

		// constants can't move directly to memory.
		if gc.Ismem(t) {
			goto hard
		}
	}

	// float constants come from memory.
	//if(isfloat[tt])
	//	goto hard;

	// 64-bit immediates are also from memory.
	//if(isint[tt])
	//	goto hard;
	//// 64-bit immediates are really 32-bit sign-extended
	//// unless moving into a register.
	//if(isint[tt]) {
	//	if(mpcmpfixfix(con.val.u.xval, minintval[TINT32]) < 0)
	//		goto hard;
	//	if(mpcmpfixfix(con.val.u.xval, maxintval[TINT32]) > 0)
	//		goto hard;
	//}

	// value -> value copy, only one memory operand.
	// figure out the instruction to use.
	// break out of switch for one-instruction gins.
	// goto rdst for "destination must be register".
	// goto hard for "convert to cvt type first".
	// otherwise handle and return.

	switch uint32(ft)<<16 | uint32(tt) {
	default:
		gc.Fatal("gmove %v -> %v", gc.Tconv(f.Type, obj.FmtLong), gc.Tconv(t.Type, obj.FmtLong))

		/*
		 * integer copy and truncate
		 */
	case gc.TINT8<<16 | gc.TINT8, // same size
		gc.TUINT8<<16 | gc.TINT8,
		gc.TINT16<<16 | gc.TINT8,
		// truncate
		gc.TUINT16<<16 | gc.TINT8,
		gc.TINT32<<16 | gc.TINT8,
		gc.TUINT32<<16 | gc.TINT8,
		gc.TINT64<<16 | gc.TINT8,
		gc.TUINT64<<16 | gc.TINT8:
		a = ppc64.AMOVB

	case gc.TINT8<<16 | gc.TUINT8, // same size
		gc.TUINT8<<16 | gc.TUINT8,
		gc.TINT16<<16 | gc.TUINT8,
		// truncate
		gc.TUINT16<<16 | gc.TUINT8,
		gc.TINT32<<16 | gc.TUINT8,
		gc.TUINT32<<16 | gc.TUINT8,
		gc.TINT64<<16 | gc.TUINT8,
		gc.TUINT64<<16 | gc.TUINT8:
		a = ppc64.AMOVBZ

	case gc.TINT16<<16 | gc.TINT16, // same size
		gc.TUINT16<<16 | gc.TINT16,
		gc.TINT32<<16 | gc.TINT16,
		// truncate
		gc.TUINT32<<16 | gc.TINT16,
		gc.TINT64<<16 | gc.TINT16,
		gc.TUINT64<<16 | gc.TINT16:
		a = ppc64.AMOVH

	case gc.TINT16<<16 | gc.TUINT16, // same size
		gc.TUINT16<<16 | gc.TUINT16,
		gc.TINT32<<16 | gc.TUINT16,
		// truncate
		gc.TUINT32<<16 | gc.TUINT16,
		gc.TINT64<<16 | gc.TUINT16,
		gc.TUINT64<<16 | gc.TUINT16:
		a = ppc64.AMOVHZ

	case gc.TINT32<<16 | gc.TINT32, // same size
		gc.TUINT32<<16 | gc.TINT32,
		gc.TINT64<<16 | gc.TINT32,
		// truncate
		gc.TUINT64<<16 | gc.TINT32:
		a = ppc64.AMOVW

	case gc.TINT32<<16 | gc.TUINT32, // same size
		gc.TUINT32<<16 | gc.TUINT32,
		gc.TINT64<<16 | gc.TUINT32,
		gc.TUINT64<<16 | gc.TUINT32:
		a = ppc64.AMOVWZ

	case gc.TINT64<<16 | gc.TINT64, // same size
		gc.TINT64<<16 | gc.TUINT64,
		gc.TUINT64<<16 | gc.TINT64,
		gc.TUINT64<<16 | gc.TUINT64:
		a = ppc64.AMOVD

		/*
		 * integer up-conversions
		 */
	case gc.TINT8<<16 | gc.TINT16, // sign extend int8
		gc.TINT8<<16 | gc.TUINT16,
		gc.TINT8<<16 | gc.TINT32,
		gc.TINT8<<16 | gc.TUINT32,
		gc.TINT8<<16 | gc.TINT64,
		gc.TINT8<<16 | gc.TUINT64:
		a = ppc64.AMOVB

		goto rdst

	case gc.TUINT8<<16 | gc.TINT16, // zero extend uint8
		gc.TUINT8<<16 | gc.TUINT16,
		gc.TUINT8<<16 | gc.TINT32,
		gc.TUINT8<<16 | gc.TUINT32,
		gc.TUINT8<<16 | gc.TINT64,
		gc.TUINT8<<16 | gc.TUINT64:
		a = ppc64.AMOVBZ

		goto rdst

	case gc.TINT16<<16 | gc.TINT32, // sign extend int16
		gc.TINT16<<16 | gc.TUINT32,
		gc.TINT16<<16 | gc.TINT64,
		gc.TINT16<<16 | gc.TUINT64:
		a = ppc64.AMOVH

		goto rdst

	case gc.TUINT16<<16 | gc.TINT32, // zero extend uint16
		gc.TUINT16<<16 | gc.TUINT32,
		gc.TUINT16<<16 | gc.TINT64,
		gc.TUINT16<<16 | gc.TUINT64:
		a = ppc64.AMOVHZ

		goto rdst

	case gc.TINT32<<16 | gc.TINT64, // sign extend int32
		gc.TINT32<<16 | gc.TUINT64:
		a = ppc64.AMOVW

		goto rdst

	case gc.TUINT32<<16 | gc.TINT64, // zero extend uint32
		gc.TUINT32<<16 | gc.TUINT64:
		a = ppc64.AMOVWZ

		goto rdst

		//warn("gmove: convert float to int not implemented: %N -> %N\n", f, t);
	//return;
	// algorithm is:
	//	if small enough, use native float64 -> int64 conversion.
	//	otherwise, subtract 2^63, convert, and add it back.
	/*
	* float to integer
	 */
	case gc.TFLOAT32<<16 | gc.TINT32,
		gc.TFLOAT64<<16 | gc.TINT32,
		gc.TFLOAT32<<16 | gc.TINT64,
		gc.TFLOAT64<<16 | gc.TINT64,
		gc.TFLOAT32<<16 | gc.TINT16,
		gc.TFLOAT32<<16 | gc.TINT8,
		gc.TFLOAT32<<16 | gc.TUINT16,
		gc.TFLOAT32<<16 | gc.TUINT8,
		gc.TFLOAT64<<16 | gc.TINT16,
		gc.TFLOAT64<<16 | gc.TINT8,
		gc.TFLOAT64<<16 | gc.TUINT16,
		gc.TFLOAT64<<16 | gc.TUINT8,
		gc.TFLOAT32<<16 | gc.TUINT32,
		gc.TFLOAT64<<16 | gc.TUINT32,
		gc.TFLOAT32<<16 | gc.TUINT64,
		gc.TFLOAT64<<16 | gc.TUINT64:
		bignodes()

		var r1 gc.Node
		gc.Regalloc(&r1, gc.Types[ft], f)
		gmove(f, &r1)
		if tt == gc.TUINT64 {
			gc.Regalloc(&r2, gc.Types[gc.TFLOAT64], nil)
			gmove(&bigf, &r2)
			gins(ppc64.AFCMPU, &r1, &r2)
			p1 := (*obj.Prog)(gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TFLOAT64]), nil, +1))
			gins(ppc64.AFSUB, &r2, &r1)
			gc.Patch(p1, gc.Pc)
			gc.Regfree(&r2)
		}

		gc.Regalloc(&r2, gc.Types[gc.TFLOAT64], nil)
		var r3 gc.Node
		gc.Regalloc(&r3, gc.Types[gc.TINT64], t)
		gins(ppc64.AFCTIDZ, &r1, &r2)
		p1 := (*obj.Prog)(gins(ppc64.AFMOVD, &r2, nil))
		p1.To.Type = obj.TYPE_MEM
		p1.To.Reg = ppc64.REGSP
		p1.To.Offset = -8
		p1 = gins(ppc64.AMOVD, nil, &r3)
		p1.From.Type = obj.TYPE_MEM
		p1.From.Reg = ppc64.REGSP
		p1.From.Offset = -8
		gc.Regfree(&r2)
		gc.Regfree(&r1)
		if tt == gc.TUINT64 {
			p1 := (*obj.Prog)(gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TFLOAT64]), nil, +1)) // use CR0 here again
			gc.Nodreg(&r1, gc.Types[gc.TINT64], ppc64.REGTMP)
			gins(ppc64.AMOVD, &bigi, &r1)
			gins(ppc64.AADD, &r1, &r3)
			gc.Patch(p1, gc.Pc)
		}

		gmove(&r3, t)
		gc.Regfree(&r3)
		return

		//warn("gmove: convert int to float not implemented: %N -> %N\n", f, t);
	//return;
	// algorithm is:
	//	if small enough, use native int64 -> uint64 conversion.
	//	otherwise, halve (rounding to odd?), convert, and double.
	/*
	 * integer to float
	 */
	case gc.TINT32<<16 | gc.TFLOAT32,
		gc.TINT32<<16 | gc.TFLOAT64,
		gc.TINT64<<16 | gc.TFLOAT32,
		gc.TINT64<<16 | gc.TFLOAT64,
		gc.TINT16<<16 | gc.TFLOAT32,
		gc.TINT16<<16 | gc.TFLOAT64,
		gc.TINT8<<16 | gc.TFLOAT32,
		gc.TINT8<<16 | gc.TFLOAT64,
		gc.TUINT16<<16 | gc.TFLOAT32,
		gc.TUINT16<<16 | gc.TFLOAT64,
		gc.TUINT8<<16 | gc.TFLOAT32,
		gc.TUINT8<<16 | gc.TFLOAT64,
		gc.TUINT32<<16 | gc.TFLOAT32,
		gc.TUINT32<<16 | gc.TFLOAT64,
		gc.TUINT64<<16 | gc.TFLOAT32,
		gc.TUINT64<<16 | gc.TFLOAT64:
		bignodes()

		var r1 gc.Node
		gc.Regalloc(&r1, gc.Types[gc.TINT64], nil)
		gmove(f, &r1)
		if ft == gc.TUINT64 {
			gc.Nodreg(&r2, gc.Types[gc.TUINT64], ppc64.REGTMP)
			gmove(&bigi, &r2)
			gins(ppc64.ACMPU, &r1, &r2)
			p1 := (*obj.Prog)(gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT64]), nil, +1))
			p2 := (*obj.Prog)(gins(ppc64.ASRD, nil, &r1))
			p2.From.Type = obj.TYPE_CONST
			p2.From.Offset = 1
			gc.Patch(p1, gc.Pc)
		}

		gc.Regalloc(&r2, gc.Types[gc.TFLOAT64], t)
		p1 := (*obj.Prog)(gins(ppc64.AMOVD, &r1, nil))
		p1.To.Type = obj.TYPE_MEM
		p1.To.Reg = ppc64.REGSP
		p1.To.Offset = -8
		p1 = gins(ppc64.AFMOVD, nil, &r2)
		p1.From.Type = obj.TYPE_MEM
		p1.From.Reg = ppc64.REGSP
		p1.From.Offset = -8
		gins(ppc64.AFCFID, &r2, &r2)
		gc.Regfree(&r1)
		if ft == gc.TUINT64 {
			p1 := (*obj.Prog)(gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT64]), nil, +1)) // use CR0 here again
			gc.Nodreg(&r1, gc.Types[gc.TFLOAT64], ppc64.FREGTWO)
			gins(ppc64.AFMUL, &r1, &r2)
			gc.Patch(p1, gc.Pc)
		}

		gmove(&r2, t)
		gc.Regfree(&r2)
		return

		/*
		 * float to float
		 */
	case gc.TFLOAT32<<16 | gc.TFLOAT32:
		a = ppc64.AFMOVS

	case gc.TFLOAT64<<16 | gc.TFLOAT64:
		a = ppc64.AFMOVD

	case gc.TFLOAT32<<16 | gc.TFLOAT64:
		a = ppc64.AFMOVS
		goto rdst

	case gc.TFLOAT64<<16 | gc.TFLOAT32:
		a = ppc64.AFRSP
		goto rdst
	}

	gins(a, f, t)
	return

	// requires register destination
rdst:
	{
		gc.Regalloc(&r1, t.Type, t)

		gins(a, f, &r1)
		gmove(&r1, t)
		gc.Regfree(&r1)
		return
	}

	// requires register intermediate
hard:
	gc.Regalloc(&r1, cvt, t)

	gmove(f, &r1)
	gmove(&r1, t)
	gc.Regfree(&r1)
	return
}
예제 #14
0
파일: cgen.go 프로젝트: tidatida/go
func stackcopy(n, res *gc.Node, osrc, odst, w int64) {
	var dst gc.Node
	gc.Nodreg(&dst, gc.Types[gc.Tptr], x86.REG_DI)
	var src gc.Node
	gc.Nodreg(&src, gc.Types[gc.Tptr], x86.REG_SI)

	var tsrc gc.Node
	gc.Tempname(&tsrc, gc.Types[gc.Tptr])
	var tdst gc.Node
	gc.Tempname(&tdst, gc.Types[gc.Tptr])
	if n.Addable == 0 {
		gc.Agen(n, &tsrc)
	}
	if res.Addable == 0 {
		gc.Agen(res, &tdst)
	}
	if n.Addable != 0 {
		gc.Agen(n, &src)
	} else {
		gmove(&tsrc, &src)
	}

	if res.Op == gc.ONAME {
		gc.Gvardef(res)
	}

	if res.Addable != 0 {
		gc.Agen(res, &dst)
	} else {
		gmove(&tdst, &dst)
	}

	c := int32(w % 4) // bytes
	q := int32(w / 4) // doublewords

	// if we are copying forward on the stack and
	// the src and dst overlap, then reverse direction
	if osrc < odst && int64(odst) < int64(osrc)+w {
		// reverse direction
		gins(x86.ASTD, nil, nil) // set direction flag
		if c > 0 {
			gconreg(x86.AADDL, w-1, x86.REG_SI)
			gconreg(x86.AADDL, w-1, x86.REG_DI)

			gconreg(x86.AMOVL, int64(c), x86.REG_CX)
			gins(x86.AREP, nil, nil)   // repeat
			gins(x86.AMOVSB, nil, nil) // MOVB *(SI)-,*(DI)-
		}

		if q > 0 {
			if c > 0 {
				gconreg(x86.AADDL, -3, x86.REG_SI)
				gconreg(x86.AADDL, -3, x86.REG_DI)
			} else {
				gconreg(x86.AADDL, w-4, x86.REG_SI)
				gconreg(x86.AADDL, w-4, x86.REG_DI)
			}

			gconreg(x86.AMOVL, int64(q), x86.REG_CX)
			gins(x86.AREP, nil, nil)   // repeat
			gins(x86.AMOVSL, nil, nil) // MOVL *(SI)-,*(DI)-
		}

		// we leave with the flag clear
		gins(x86.ACLD, nil, nil)
	} else {
		gins(x86.ACLD, nil, nil) // paranoia.  TODO(rsc): remove?

		// normal direction
		if q > 128 || (q >= 4 && gc.Nacl) {
			gconreg(x86.AMOVL, int64(q), x86.REG_CX)
			gins(x86.AREP, nil, nil)   // repeat
			gins(x86.AMOVSL, nil, nil) // MOVL *(SI)+,*(DI)+
		} else if q >= 4 {
			p := gins(obj.ADUFFCOPY, nil, nil)
			p.To.Type = obj.TYPE_ADDR
			p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg))

			// 10 and 128 = magic constants: see ../../runtime/asm_386.s
			p.To.Offset = 10 * (128 - int64(q))
		} else if !gc.Nacl && c == 0 {
			var cx gc.Node
			gc.Nodreg(&cx, gc.Types[gc.TINT32], x86.REG_CX)

			// We don't need the MOVSL side-effect of updating SI and DI,
			// and issuing a sequence of MOVLs directly is faster.
			src.Op = gc.OINDREG

			dst.Op = gc.OINDREG
			for q > 0 {
				gmove(&src, &cx) // MOVL x+(SI),CX
				gmove(&cx, &dst) // MOVL CX,x+(DI)
				src.Xoffset += 4
				dst.Xoffset += 4
				q--
			}
		} else {
			for q > 0 {
				gins(x86.AMOVSL, nil, nil) // MOVL *(SI)+,*(DI)+
				q--
			}
		}

		for c > 0 {
			gins(x86.AMOVSB, nil, nil) // MOVB *(SI)+,*(DI)+
			c--
		}
	}
}
예제 #15
0
파일: gsubr.go 프로젝트: xslonepiece/goios
func gmove(f *gc.Node, t *gc.Node) {
	if gc.Debug['M'] != 0 {
		fmt.Printf("gmove %v -> %v\n", gc.Nconv(f, 0), gc.Nconv(t, 0))
	}

	ft := gc.Simsimtype(f.Type)
	tt := gc.Simsimtype(t.Type)
	cvt := t.Type

	if gc.Iscomplex[ft] || gc.Iscomplex[tt] {
		gc.Complexmove(f, t)
		return
	}

	if gc.Isfloat[ft] || gc.Isfloat[tt] {
		floatmove(f, t)
		return
	}

	// cannot have two integer memory operands;
	// except 64-bit, which always copies via registers anyway.
	var r1 gc.Node
	var a int
	if gc.Isint[ft] && gc.Isint[tt] && !gc.Is64(f.Type) && !gc.Is64(t.Type) && gc.Ismem(f) && gc.Ismem(t) {
		goto hard
	}

	// convert constant to desired type
	if f.Op == gc.OLITERAL {
		var con gc.Node
		gc.Convconst(&con, t.Type, &f.Val)
		f = &con
		ft = gc.Simsimtype(con.Type)
	}

	// value -> value copy, only one memory operand.
	// figure out the instruction to use.
	// break out of switch for one-instruction gins.
	// goto rdst for "destination must be register".
	// goto hard for "convert to cvt type first".
	// otherwise handle and return.

	switch uint32(ft)<<16 | uint32(tt) {
	default:
		// should not happen
		gc.Fatal("gmove %v -> %v", gc.Nconv(f, 0), gc.Nconv(t, 0))
		return

		/*
		 * integer copy and truncate
		 */
	case gc.TINT8<<16 | gc.TINT8, // same size
		gc.TINT8<<16 | gc.TUINT8,
		gc.TUINT8<<16 | gc.TINT8,
		gc.TUINT8<<16 | gc.TUINT8:
		a = x86.AMOVB

	case gc.TINT16<<16 | gc.TINT8, // truncate
		gc.TUINT16<<16 | gc.TINT8,
		gc.TINT32<<16 | gc.TINT8,
		gc.TUINT32<<16 | gc.TINT8,
		gc.TINT16<<16 | gc.TUINT8,
		gc.TUINT16<<16 | gc.TUINT8,
		gc.TINT32<<16 | gc.TUINT8,
		gc.TUINT32<<16 | gc.TUINT8:
		a = x86.AMOVB

		goto rsrc

	case gc.TINT64<<16 | gc.TINT8, // truncate low word
		gc.TUINT64<<16 | gc.TINT8,
		gc.TINT64<<16 | gc.TUINT8,
		gc.TUINT64<<16 | gc.TUINT8:
		var flo gc.Node
		var fhi gc.Node
		split64(f, &flo, &fhi)

		var r1 gc.Node
		gc.Nodreg(&r1, t.Type, x86.REG_AX)
		gmove(&flo, &r1)
		gins(x86.AMOVB, &r1, t)
		splitclean()
		return

	case gc.TINT16<<16 | gc.TINT16, // same size
		gc.TINT16<<16 | gc.TUINT16,
		gc.TUINT16<<16 | gc.TINT16,
		gc.TUINT16<<16 | gc.TUINT16:
		a = x86.AMOVW

	case gc.TINT32<<16 | gc.TINT16, // truncate
		gc.TUINT32<<16 | gc.TINT16,
		gc.TINT32<<16 | gc.TUINT16,
		gc.TUINT32<<16 | gc.TUINT16:
		a = x86.AMOVW

		goto rsrc

	case gc.TINT64<<16 | gc.TINT16, // truncate low word
		gc.TUINT64<<16 | gc.TINT16,
		gc.TINT64<<16 | gc.TUINT16,
		gc.TUINT64<<16 | gc.TUINT16:
		var flo gc.Node
		var fhi gc.Node
		split64(f, &flo, &fhi)

		var r1 gc.Node
		gc.Nodreg(&r1, t.Type, x86.REG_AX)
		gmove(&flo, &r1)
		gins(x86.AMOVW, &r1, t)
		splitclean()
		return

	case gc.TINT32<<16 | gc.TINT32, // same size
		gc.TINT32<<16 | gc.TUINT32,
		gc.TUINT32<<16 | gc.TINT32,
		gc.TUINT32<<16 | gc.TUINT32:
		a = x86.AMOVL

	case gc.TINT64<<16 | gc.TINT32, // truncate
		gc.TUINT64<<16 | gc.TINT32,
		gc.TINT64<<16 | gc.TUINT32,
		gc.TUINT64<<16 | gc.TUINT32:
		var fhi gc.Node
		var flo gc.Node
		split64(f, &flo, &fhi)

		var r1 gc.Node
		gc.Nodreg(&r1, t.Type, x86.REG_AX)
		gmove(&flo, &r1)
		gins(x86.AMOVL, &r1, t)
		splitclean()
		return

	case gc.TINT64<<16 | gc.TINT64, // same size
		gc.TINT64<<16 | gc.TUINT64,
		gc.TUINT64<<16 | gc.TINT64,
		gc.TUINT64<<16 | gc.TUINT64:
		var fhi gc.Node
		var flo gc.Node
		split64(f, &flo, &fhi)

		var tlo gc.Node
		var thi gc.Node
		split64(t, &tlo, &thi)
		if f.Op == gc.OLITERAL {
			gins(x86.AMOVL, &flo, &tlo)
			gins(x86.AMOVL, &fhi, &thi)
		} else {
			var r1 gc.Node
			gc.Nodreg(&r1, gc.Types[gc.TUINT32], x86.REG_AX)
			var r2 gc.Node
			gc.Nodreg(&r2, gc.Types[gc.TUINT32], x86.REG_DX)
			gins(x86.AMOVL, &flo, &r1)
			gins(x86.AMOVL, &fhi, &r2)
			gins(x86.AMOVL, &r1, &tlo)
			gins(x86.AMOVL, &r2, &thi)
		}

		splitclean()
		splitclean()
		return

		/*
		 * integer up-conversions
		 */
	case gc.TINT8<<16 | gc.TINT16, // sign extend int8
		gc.TINT8<<16 | gc.TUINT16:
		a = x86.AMOVBWSX

		goto rdst

	case gc.TINT8<<16 | gc.TINT32,
		gc.TINT8<<16 | gc.TUINT32:
		a = x86.AMOVBLSX
		goto rdst

	case gc.TINT8<<16 | gc.TINT64, // convert via int32
		gc.TINT8<<16 | gc.TUINT64:
		cvt = gc.Types[gc.TINT32]

		goto hard

	case gc.TUINT8<<16 | gc.TINT16, // zero extend uint8
		gc.TUINT8<<16 | gc.TUINT16:
		a = x86.AMOVBWZX

		goto rdst

	case gc.TUINT8<<16 | gc.TINT32,
		gc.TUINT8<<16 | gc.TUINT32:
		a = x86.AMOVBLZX
		goto rdst

	case gc.TUINT8<<16 | gc.TINT64, // convert via uint32
		gc.TUINT8<<16 | gc.TUINT64:
		cvt = gc.Types[gc.TUINT32]

		goto hard

	case gc.TINT16<<16 | gc.TINT32, // sign extend int16
		gc.TINT16<<16 | gc.TUINT32:
		a = x86.AMOVWLSX

		goto rdst

	case gc.TINT16<<16 | gc.TINT64, // convert via int32
		gc.TINT16<<16 | gc.TUINT64:
		cvt = gc.Types[gc.TINT32]

		goto hard

	case gc.TUINT16<<16 | gc.TINT32, // zero extend uint16
		gc.TUINT16<<16 | gc.TUINT32:
		a = x86.AMOVWLZX

		goto rdst

	case gc.TUINT16<<16 | gc.TINT64, // convert via uint32
		gc.TUINT16<<16 | gc.TUINT64:
		cvt = gc.Types[gc.TUINT32]

		goto hard

	case gc.TINT32<<16 | gc.TINT64, // sign extend int32
		gc.TINT32<<16 | gc.TUINT64:
		var thi gc.Node
		var tlo gc.Node
		split64(t, &tlo, &thi)

		var flo gc.Node
		gc.Nodreg(&flo, tlo.Type, x86.REG_AX)
		var fhi gc.Node
		gc.Nodreg(&fhi, thi.Type, x86.REG_DX)
		gmove(f, &flo)
		gins(x86.ACDQ, nil, nil)
		gins(x86.AMOVL, &flo, &tlo)
		gins(x86.AMOVL, &fhi, &thi)
		splitclean()
		return

	case gc.TUINT32<<16 | gc.TINT64, // zero extend uint32
		gc.TUINT32<<16 | gc.TUINT64:
		var tlo gc.Node
		var thi gc.Node
		split64(t, &tlo, &thi)

		gmove(f, &tlo)
		gins(x86.AMOVL, ncon(0), &thi)
		splitclean()
		return
	}

	gins(a, f, t)
	return

	// requires register source
rsrc:
	gc.Regalloc(&r1, f.Type, t)

	gmove(f, &r1)
	gins(a, &r1, t)
	gc.Regfree(&r1)
	return

	// requires register destination
rdst:
	{
		gc.Regalloc(&r1, t.Type, t)

		gins(a, f, &r1)
		gmove(&r1, t)
		gc.Regfree(&r1)
		return
	}

	// requires register intermediate
hard:
	gc.Regalloc(&r1, cvt, t)

	gmove(f, &r1)
	gmove(&r1, t)
	gc.Regfree(&r1)
	return
}
예제 #16
0
파일: ggen.go 프로젝트: klueska/go-akaros
/*
 * generate:
 *	call f
 *	proc=-1	normal call but no return
 *	proc=0	normal call
 *	proc=1	goroutine run in new proc
 *	proc=2	defer call save away stack
  *	proc=3	normal call to C pointer (not Go func value)
*/
func ginscall(f *gc.Node, proc int) {
	if f.Type != nil {
		extra := int32(0)
		if proc == 1 || proc == 2 {
			extra = 2 * int32(gc.Widthptr)
		}
		gc.Setmaxarg(f.Type, extra)
	}

	switch proc {
	default:
		gc.Fatal("ginscall: bad proc %d", proc)

	case 0, // normal call
		-1: // normal call but no return
		if f.Op == gc.ONAME && f.Class == gc.PFUNC {
			if f == gc.Deferreturn {
				// Deferred calls will appear to be returning to
				// the BL deferreturn(SB) that we are about to emit.
				// However, the stack trace code will show the line
				// of the instruction before that return PC.
				// To avoid that instruction being an unrelated instruction,
				// insert a NOP so that we will have the right line number.
				// ARM NOP 0x00000000 is really AND.EQ R0, R0, R0.
				// Use the latter form because the NOP pseudo-instruction
				// would be removed by the linker.
				var r gc.Node
				gc.Nodreg(&r, gc.Types[gc.TINT], arm.REG_R0)

				p := gins(arm.AAND, &r, &r)
				p.Scond = arm.C_SCOND_EQ
			}

			p := gins(arm.ABL, nil, f)
			gc.Afunclit(&p.To, f)
			if proc == -1 || gc.Noreturn(p) {
				gins(obj.AUNDEF, nil, nil)
			}
			break
		}

		var r gc.Node
		gc.Nodreg(&r, gc.Types[gc.Tptr], arm.REG_R7)
		var r1 gc.Node
		gc.Nodreg(&r1, gc.Types[gc.Tptr], arm.REG_R1)
		gmove(f, &r)
		r.Op = gc.OINDREG
		gmove(&r, &r1)
		r.Op = gc.OREGISTER
		r1.Op = gc.OINDREG
		gins(arm.ABL, &r, &r1)

	case 3: // normal call of c function pointer
		gins(arm.ABL, nil, f)

	case 1, // call in new proc (go)
		2: // deferred call (defer)
		var r gc.Node
		regalloc(&r, gc.Types[gc.Tptr], nil)

		var con gc.Node
		gc.Nodconst(&con, gc.Types[gc.TINT32], int64(gc.Argsize(f.Type)))
		gins(arm.AMOVW, &con, &r)
		p := gins(arm.AMOVW, &r, nil)
		p.To.Type = obj.TYPE_MEM
		p.To.Reg = arm.REGSP
		p.To.Offset = 4

		gins(arm.AMOVW, f, &r)
		p = gins(arm.AMOVW, &r, nil)
		p.To.Type = obj.TYPE_MEM
		p.To.Reg = arm.REGSP
		p.To.Offset = 8

		regfree(&r)

		if proc == 1 {
			ginscall(gc.Newproc, 0)
		} else {
			ginscall(gc.Deferproc, 0)
		}

		if proc == 2 {
			gc.Nodconst(&con, gc.Types[gc.TINT32], 0)
			p := gins(arm.ACMP, &con, nil)
			p.Reg = arm.REG_R0
			p = gc.Gbranch(arm.ABEQ, nil, +1)
			cgen_ret(nil)
			gc.Patch(p, gc.Pc)
		}
	}
}
예제 #17
0
파일: gsubr.go 프로젝트: xslonepiece/goios
func ginsnop() {
	var reg gc.Node
	gc.Nodreg(&reg, gc.Types[gc.TINT], x86.REG_AX)
	gins(x86.AXCHGL, &reg, &reg)
}
예제 #18
0
파일: ggen.go 프로젝트: tidatida/go
/*
 * generate shift according to op, one of:
 *	res = nl << nr
 *	res = nl >> nr
 */
func cgen_shift(op int, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) {
	if nl.Type.Width > 4 {
		gc.Fatal("cgen_shift %v", gc.Tconv(nl.Type, 0))
	}

	w := int(nl.Type.Width * 8)

	a := optoas(op, nl.Type)

	if nr.Op == gc.OLITERAL {
		var n2 gc.Node
		gc.Tempname(&n2, nl.Type)
		gc.Cgen(nl, &n2)
		var n1 gc.Node
		gc.Regalloc(&n1, nl.Type, res)
		gmove(&n2, &n1)
		sc := uint64(gc.Mpgetfix(nr.Val.U.Xval))
		if sc >= uint64(nl.Type.Width*8) {
			// large shift gets 2 shifts by width-1
			gins(a, ncon(uint32(w)-1), &n1)

			gins(a, ncon(uint32(w)-1), &n1)
		} else {
			gins(a, nr, &n1)
		}
		gmove(&n1, res)
		gc.Regfree(&n1)
		return
	}

	var oldcx gc.Node
	var cx gc.Node
	gc.Nodreg(&cx, gc.Types[gc.TUINT32], x86.REG_CX)
	if reg[x86.REG_CX] > 1 && !gc.Samereg(&cx, res) {
		gc.Tempname(&oldcx, gc.Types[gc.TUINT32])
		gmove(&cx, &oldcx)
	}

	var n1 gc.Node
	var nt gc.Node
	if nr.Type.Width > 4 {
		gc.Tempname(&nt, nr.Type)
		n1 = nt
	} else {
		gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)
		gc.Regalloc(&n1, nr.Type, &n1) // to hold the shift type in CX
	}

	var n2 gc.Node
	if gc.Samereg(&cx, res) {
		gc.Regalloc(&n2, nl.Type, nil)
	} else {
		gc.Regalloc(&n2, nl.Type, res)
	}
	if nl.Ullman >= nr.Ullman {
		gc.Cgen(nl, &n2)
		gc.Cgen(nr, &n1)
	} else {
		gc.Cgen(nr, &n1)
		gc.Cgen(nl, &n2)
	}

	// test and fix up large shifts
	if bounded {
		if nr.Type.Width > 4 {
			// delayed reg alloc
			gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)

			gc.Regalloc(&n1, gc.Types[gc.TUINT32], &n1) // to hold the shift type in CX
			var lo gc.Node
			var hi gc.Node
			split64(&nt, &lo, &hi)
			gmove(&lo, &n1)
			splitclean()
		}
	} else {
		var p1 *obj.Prog
		if nr.Type.Width > 4 {
			// delayed reg alloc
			gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)

			gc.Regalloc(&n1, gc.Types[gc.TUINT32], &n1) // to hold the shift type in CX
			var lo gc.Node
			var hi gc.Node
			split64(&nt, &lo, &hi)
			gmove(&lo, &n1)
			gins(optoas(gc.OCMP, gc.Types[gc.TUINT32]), &hi, ncon(0))
			p2 := gc.Gbranch(optoas(gc.ONE, gc.Types[gc.TUINT32]), nil, +1)
			gins(optoas(gc.OCMP, gc.Types[gc.TUINT32]), &n1, ncon(uint32(w)))
			p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
			splitclean()
			gc.Patch(p2, gc.Pc)
		} else {
			gins(optoas(gc.OCMP, nr.Type), &n1, ncon(uint32(w)))
			p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
		}

		if op == gc.ORSH && gc.Issigned[nl.Type.Etype] {
			gins(a, ncon(uint32(w)-1), &n2)
		} else {
			gmove(ncon(0), &n2)
		}

		gc.Patch(p1, gc.Pc)
	}

	gins(a, &n1, &n2)

	if oldcx.Op != 0 {
		gmove(&oldcx, &cx)
	}

	gmove(&n2, res)

	gc.Regfree(&n1)
	gc.Regfree(&n2)
}
예제 #19
0
파일: ggen.go 프로젝트: bibbyflyaway/go
// res = runtime.getg()
func getg(res *gc.Node) {
	var n1 gc.Node
	gc.Nodreg(&n1, res.Type, arm.REGG)
	gmove(&n1, res)
}
예제 #20
0
파일: ggen.go 프로젝트: tidatida/go
func clearfat(nl *gc.Node) {
	/* clear a fat object */
	if gc.Debug['g'] != 0 {
		fmt.Printf("clearfat %v (%v, size: %d)\n", gc.Nconv(nl, 0), gc.Tconv(nl.Type, 0), nl.Type.Width)
	}

	w := uint64(uint64(nl.Type.Width))

	// Avoid taking the address for simple enough types.
	if gc.Componentgen(nil, nl) {
		return
	}

	c := uint64(w % 8) // bytes
	q := uint64(w / 8) // dwords

	if gc.Reginuse(ppc64.REGRT1) {
		gc.Fatal("%v in use during clearfat", obj.Rconv(ppc64.REGRT1))
	}

	var r0 gc.Node
	gc.Nodreg(&r0, gc.Types[gc.TUINT64], ppc64.REGZERO)
	var dst gc.Node
	gc.Nodreg(&dst, gc.Types[gc.Tptr], ppc64.REGRT1)
	gc.Regrealloc(&dst)
	gc.Agen(nl, &dst)

	var boff uint64
	if q > 128 {
		p := gins(ppc64.ASUB, nil, &dst)
		p.From.Type = obj.TYPE_CONST
		p.From.Offset = 8

		var end gc.Node
		gc.Regalloc(&end, gc.Types[gc.Tptr], nil)
		p = gins(ppc64.AMOVD, &dst, &end)
		p.From.Type = obj.TYPE_ADDR
		p.From.Offset = int64(q * 8)

		p = gins(ppc64.AMOVDU, &r0, &dst)
		p.To.Type = obj.TYPE_MEM
		p.To.Offset = 8
		pl := (*obj.Prog)(p)

		p = gins(ppc64.ACMP, &dst, &end)
		gc.Patch(gc.Gbranch(ppc64.ABNE, nil, 0), pl)

		gc.Regfree(&end)

		// The loop leaves R3 on the last zeroed dword
		boff = 8
	} else if q >= 4 {
		p := gins(ppc64.ASUB, nil, &dst)
		p.From.Type = obj.TYPE_CONST
		p.From.Offset = 8
		f := (*gc.Node)(gc.Sysfunc("duffzero"))
		p = gins(obj.ADUFFZERO, nil, f)
		gc.Afunclit(&p.To, f)

		// 4 and 128 = magic constants: see ../../runtime/asm_ppc64x.s
		p.To.Offset = int64(4 * (128 - q))

		// duffzero leaves R3 on the last zeroed dword
		boff = 8
	} else {
		var p *obj.Prog
		for t := uint64(0); t < q; t++ {
			p = gins(ppc64.AMOVD, &r0, &dst)
			p.To.Type = obj.TYPE_MEM
			p.To.Offset = int64(8 * t)
		}

		boff = 8 * q
	}

	var p *obj.Prog
	for t := uint64(0); t < c; t++ {
		p = gins(ppc64.AMOVB, &r0, &dst)
		p.To.Type = obj.TYPE_MEM
		p.To.Offset = int64(t + boff)
	}

	gc.Regfree(&dst)
}
예제 #21
0
파일: ggen.go 프로젝트: tidatida/go
func ginsnop() {
	var reg gc.Node
	gc.Nodreg(&reg, gc.Types[gc.TINT], ppc64.REG_R0)
	gins(ppc64.AOR, &reg, &reg)
}
예제 #22
0
파일: gsubr.go 프로젝트: klueska/go-akaros
func anyregalloc() bool {
	var j int

	for i := int(0); i < len(reg); i++ {
		if reg[i] == 0 {
			goto ok
		}
		for j = 0; j < len(resvd); j++ {
			if resvd[j] == i {
				goto ok
			}
		}
		return true
	ok:
	}

	return false
}

/*
 * allocate register of type t, leave in n.
 * if o != N, o is desired fixed register.
 * caller must regfree(n).
 */
func regalloc(n *gc.Node, t *gc.Type, o *gc.Node) {
	if t == nil {
		gc.Fatal("regalloc: t nil")
	}
	et := int(int(gc.Simtype[t.Etype]))

	if gc.Debug['r'] != 0 {
		fixfree := int(0)
		fltfree := int(0)
		for i := int(arm64.REG_R0); i < arm64.REG_F31; i++ {
			if reg[i-arm64.REG_R0] == 0 {
				if i < arm64.REG_F0 {
					fixfree++
				} else {
					fltfree++
				}
			}
		}

		fmt.Printf("regalloc fix %d flt %d free\n", fixfree, fltfree)
	}

	var i int
	switch et {
	case gc.TINT8,
		gc.TUINT8,
		gc.TINT16,
		gc.TUINT16,
		gc.TINT32,
		gc.TUINT32,
		gc.TINT64,
		gc.TUINT64,
		gc.TPTR32,
		gc.TPTR64,
		gc.TBOOL:
		if o != nil && o.Op == gc.OREGISTER {
			i = int(o.Val.U.Reg)
			if i >= arm64.REGMIN && i <= arm64.REGMAX {
				goto out
			}
		}

		for i = arm64.REGMIN; i <= arm64.REGMAX; i++ {
			if reg[i-arm64.REG_R0] == 0 {
				regpc[i-arm64.REG_R0] = uint32(obj.Getcallerpc(&n))
				goto out
			}
		}

		gc.Flusherrors()
		for i := int(arm64.REG_R0); i < arm64.REG_R0+arm64.NREG; i++ {
			fmt.Printf("R%d %p\n", i, regpc[i-arm64.REG_R0])
		}
		gc.Fatal("out of fixed registers")

	case gc.TFLOAT32,
		gc.TFLOAT64:
		if o != nil && o.Op == gc.OREGISTER {
			i = int(o.Val.U.Reg)
			if i >= arm64.FREGMIN && i <= arm64.FREGMAX {
				goto out
			}
		}

		for i = arm64.FREGMIN; i <= arm64.FREGMAX; i++ {
			if reg[i-arm64.REG_R0] == 0 {
				regpc[i-arm64.REG_R0] = uint32(obj.Getcallerpc(&n))
				goto out
			}
		}

		gc.Flusherrors()
		for i := int(arm64.REG_F0); i < arm64.REG_F0+arm64.NREG; i++ {
			fmt.Printf("F%d %p\n", i, regpc[i-arm64.REG_R0])
		}
		gc.Fatal("out of floating registers")

	case gc.TCOMPLEX64,
		gc.TCOMPLEX128:
		gc.Tempname(n, t)
		return
	}

	gc.Fatal("regalloc: unknown type %v", gc.Tconv(t, 0))
	return

out:
	reg[i-arm64.REG_R0]++
	gc.Nodreg(n, t, i)
}

func regfree(n *gc.Node) {
	if n.Op == gc.ONAME {
		return
	}
	if n.Op != gc.OREGISTER && n.Op != gc.OINDREG {
		gc.Fatal("regfree: not a register")
	}
	i := int(int(n.Val.U.Reg) - arm64.REG_R0)
	if i == arm64.REGSP-arm64.REG_R0 {
		return
	}
	if i < 0 || i >= len(reg) {
		gc.Fatal("regfree: reg out of range")
	}
	if reg[i] <= 0 {
		gc.Fatal("regfree: reg not allocated")
	}
	reg[i]--
	if reg[i] == 0 {
		regpc[i] = 0
	}
}

/*
 * generate
 *	as $c, n
 */
func ginscon(as int, c int64, n2 *gc.Node) {
	var n1 gc.Node

	gc.Nodconst(&n1, gc.Types[gc.TINT64], c)

	if as != arm64.AMOVD && (c < -arm64.BIG || c > arm64.BIG) {
		// cannot have more than 16-bit of immediate in ADD, etc.
		// instead, MOV into register first.
		var ntmp gc.Node
		regalloc(&ntmp, gc.Types[gc.TINT64], nil)

		gins(arm64.AMOVD, &n1, &ntmp)
		gins(as, &ntmp, n2)
		regfree(&ntmp)
		return
	}

	gins(as, &n1, n2)
}

/*
 * generate
 *	as n, $c (CMP)
 */
func ginscon2(as int, n2 *gc.Node, c int64) {
	var n1 gc.Node

	gc.Nodconst(&n1, gc.Types[gc.TINT64], c)

	switch as {
	default:
		gc.Fatal("ginscon2")

	case arm64.ACMP:
		if -arm64.BIG <= c && c <= arm64.BIG {
			gcmp(as, n2, &n1)
			return
		}
	}

	// MOV n1 into register first
	var ntmp gc.Node
	regalloc(&ntmp, gc.Types[gc.TINT64], nil)

	gins(arm64.AMOVD, &n1, &ntmp)
	gcmp(as, n2, &ntmp)
	regfree(&ntmp)
}

/*
 * generate move:
 *	t = f
 * hard part is conversions.
 */
func gmove(f *gc.Node, t *gc.Node) {
	if gc.Debug['M'] != 0 {
		fmt.Printf("gmove %v -> %v\n", gc.Nconv(f, obj.FmtLong), gc.Nconv(t, obj.FmtLong))
	}

	ft := int(gc.Simsimtype(f.Type))
	tt := int(gc.Simsimtype(t.Type))
	cvt := (*gc.Type)(t.Type)

	if gc.Iscomplex[ft] || gc.Iscomplex[tt] {
		gc.Complexmove(f, t)
		return
	}

	// cannot have two memory operands
	var r1 gc.Node
	var a int
	if gc.Ismem(f) && gc.Ismem(t) {
		goto hard
	}

	// convert constant to desired type
	if f.Op == gc.OLITERAL {
		var con gc.Node
		switch tt {
		default:
			gc.Convconst(&con, t.Type, &f.Val)

		case gc.TINT32,
			gc.TINT16,
			gc.TINT8:
			var con gc.Node
			gc.Convconst(&con, gc.Types[gc.TINT64], &f.Val)
			var r1 gc.Node
			regalloc(&r1, con.Type, t)
			gins(arm64.AMOVD, &con, &r1)
			gmove(&r1, t)
			regfree(&r1)
			return

		case gc.TUINT32,
			gc.TUINT16,
			gc.TUINT8:
			var con gc.Node
			gc.Convconst(&con, gc.Types[gc.TUINT64], &f.Val)
			var r1 gc.Node
			regalloc(&r1, con.Type, t)
			gins(arm64.AMOVD, &con, &r1)
			gmove(&r1, t)
			regfree(&r1)
			return
		}

		f = &con
		ft = tt // so big switch will choose a simple mov

		// constants can't move directly to memory.
		if gc.Ismem(t) {
			goto hard
		}
	}

	// value -> value copy, first operand in memory.
	// any floating point operand requires register
	// src, so goto hard to copy to register first.
	if gc.Ismem(f) && ft != tt && (gc.Isfloat[ft] || gc.Isfloat[tt]) {
		cvt = gc.Types[ft]
		goto hard
	}

	// value -> value copy, only one memory operand.
	// figure out the instruction to use.
	// break out of switch for one-instruction gins.
	// goto rdst for "destination must be register".
	// goto hard for "convert to cvt type first".
	// otherwise handle and return.

	switch uint32(ft)<<16 | uint32(tt) {
	default:
		gc.Fatal("gmove %v -> %v", gc.Tconv(f.Type, obj.FmtLong), gc.Tconv(t.Type, obj.FmtLong))

		/*
		 * integer copy and truncate
		 */
	case gc.TINT8<<16 | gc.TINT8, // same size
		gc.TUINT8<<16 | gc.TINT8,
		gc.TINT16<<16 | gc.TINT8,
		// truncate
		gc.TUINT16<<16 | gc.TINT8,
		gc.TINT32<<16 | gc.TINT8,
		gc.TUINT32<<16 | gc.TINT8,
		gc.TINT64<<16 | gc.TINT8,
		gc.TUINT64<<16 | gc.TINT8:
		a = arm64.AMOVB

	case gc.TINT8<<16 | gc.TUINT8, // same size
		gc.TUINT8<<16 | gc.TUINT8,
		gc.TINT16<<16 | gc.TUINT8,
		// truncate
		gc.TUINT16<<16 | gc.TUINT8,
		gc.TINT32<<16 | gc.TUINT8,
		gc.TUINT32<<16 | gc.TUINT8,
		gc.TINT64<<16 | gc.TUINT8,
		gc.TUINT64<<16 | gc.TUINT8:
		a = arm64.AMOVBU

	case gc.TINT16<<16 | gc.TINT16, // same size
		gc.TUINT16<<16 | gc.TINT16,
		gc.TINT32<<16 | gc.TINT16,
		// truncate
		gc.TUINT32<<16 | gc.TINT16,
		gc.TINT64<<16 | gc.TINT16,
		gc.TUINT64<<16 | gc.TINT16:
		a = arm64.AMOVH

	case gc.TINT16<<16 | gc.TUINT16, // same size
		gc.TUINT16<<16 | gc.TUINT16,
		gc.TINT32<<16 | gc.TUINT16,
		// truncate
		gc.TUINT32<<16 | gc.TUINT16,
		gc.TINT64<<16 | gc.TUINT16,
		gc.TUINT64<<16 | gc.TUINT16:
		a = arm64.AMOVHU

	case gc.TINT32<<16 | gc.TINT32, // same size
		gc.TUINT32<<16 | gc.TINT32,
		gc.TINT64<<16 | gc.TINT32,
		// truncate
		gc.TUINT64<<16 | gc.TINT32:
		a = arm64.AMOVW

	case gc.TINT32<<16 | gc.TUINT32, // same size
		gc.TUINT32<<16 | gc.TUINT32,
		gc.TINT64<<16 | gc.TUINT32,
		gc.TUINT64<<16 | gc.TUINT32:
		a = arm64.AMOVWU

	case gc.TINT64<<16 | gc.TINT64, // same size
		gc.TINT64<<16 | gc.TUINT64,
		gc.TUINT64<<16 | gc.TINT64,
		gc.TUINT64<<16 | gc.TUINT64:
		a = arm64.AMOVD

		/*
		 * integer up-conversions
		 */
	case gc.TINT8<<16 | gc.TINT16, // sign extend int8
		gc.TINT8<<16 | gc.TUINT16,
		gc.TINT8<<16 | gc.TINT32,
		gc.TINT8<<16 | gc.TUINT32,
		gc.TINT8<<16 | gc.TINT64,
		gc.TINT8<<16 | gc.TUINT64:
		a = arm64.AMOVB

		goto rdst

	case gc.TUINT8<<16 | gc.TINT16, // zero extend uint8
		gc.TUINT8<<16 | gc.TUINT16,
		gc.TUINT8<<16 | gc.TINT32,
		gc.TUINT8<<16 | gc.TUINT32,
		gc.TUINT8<<16 | gc.TINT64,
		gc.TUINT8<<16 | gc.TUINT64:
		a = arm64.AMOVBU

		goto rdst

	case gc.TINT16<<16 | gc.TINT32, // sign extend int16
		gc.TINT16<<16 | gc.TUINT32,
		gc.TINT16<<16 | gc.TINT64,
		gc.TINT16<<16 | gc.TUINT64:
		a = arm64.AMOVH

		goto rdst

	case gc.TUINT16<<16 | gc.TINT32, // zero extend uint16
		gc.TUINT16<<16 | gc.TUINT32,
		gc.TUINT16<<16 | gc.TINT64,
		gc.TUINT16<<16 | gc.TUINT64:
		a = arm64.AMOVHU

		goto rdst

	case gc.TINT32<<16 | gc.TINT64, // sign extend int32
		gc.TINT32<<16 | gc.TUINT64:
		a = arm64.AMOVW

		goto rdst

	case gc.TUINT32<<16 | gc.TINT64, // zero extend uint32
		gc.TUINT32<<16 | gc.TUINT64:
		a = arm64.AMOVWU

		goto rdst

	/*
	* float to integer
	 */
	case gc.TFLOAT32<<16 | gc.TINT32:
		a = arm64.AFCVTZSSW
		goto rdst

	case gc.TFLOAT64<<16 | gc.TINT32:
		a = arm64.AFCVTZSDW
		goto rdst

	case gc.TFLOAT32<<16 | gc.TINT64:
		a = arm64.AFCVTZSS
		goto rdst

	case gc.TFLOAT64<<16 | gc.TINT64:
		a = arm64.AFCVTZSD
		goto rdst

	case gc.TFLOAT32<<16 | gc.TUINT32:
		a = arm64.AFCVTZUSW
		goto rdst

	case gc.TFLOAT64<<16 | gc.TUINT32:
		a = arm64.AFCVTZUDW
		goto rdst

	case gc.TFLOAT32<<16 | gc.TUINT64:
		a = arm64.AFCVTZUS
		goto rdst

	case gc.TFLOAT64<<16 | gc.TUINT64:
		a = arm64.AFCVTZUD
		goto rdst

	case gc.TFLOAT32<<16 | gc.TINT16,
		gc.TFLOAT32<<16 | gc.TINT8,
		gc.TFLOAT64<<16 | gc.TINT16,
		gc.TFLOAT64<<16 | gc.TINT8:
		cvt = gc.Types[gc.TINT32]

		goto hard

	case gc.TFLOAT32<<16 | gc.TUINT16,
		gc.TFLOAT32<<16 | gc.TUINT8,
		gc.TFLOAT64<<16 | gc.TUINT16,
		gc.TFLOAT64<<16 | gc.TUINT8:
		cvt = gc.Types[gc.TUINT32]

		goto hard

	/*
	 * integer to float
	 */
	case gc.TINT8<<16 | gc.TFLOAT32,
		gc.TINT16<<16 | gc.TFLOAT32,
		gc.TINT32<<16 | gc.TFLOAT32:
		a = arm64.ASCVTFWS

		goto rdst

	case gc.TINT8<<16 | gc.TFLOAT64,
		gc.TINT16<<16 | gc.TFLOAT64,
		gc.TINT32<<16 | gc.TFLOAT64:
		a = arm64.ASCVTFWD

		goto rdst

	case gc.TINT64<<16 | gc.TFLOAT32:
		a = arm64.ASCVTFS
		goto rdst

	case gc.TINT64<<16 | gc.TFLOAT64:
		a = arm64.ASCVTFD
		goto rdst

	case gc.TUINT8<<16 | gc.TFLOAT32,
		gc.TUINT16<<16 | gc.TFLOAT32,
		gc.TUINT32<<16 | gc.TFLOAT32:
		a = arm64.AUCVTFWS

		goto rdst

	case gc.TUINT8<<16 | gc.TFLOAT64,
		gc.TUINT16<<16 | gc.TFLOAT64,
		gc.TUINT32<<16 | gc.TFLOAT64:
		a = arm64.AUCVTFWD

		goto rdst

	case gc.TUINT64<<16 | gc.TFLOAT32:
		a = arm64.AUCVTFS
		goto rdst

	case gc.TUINT64<<16 | gc.TFLOAT64:
		a = arm64.AUCVTFD
		goto rdst

		/*
		 * float to float
		 */
	case gc.TFLOAT32<<16 | gc.TFLOAT32:
		a = arm64.AFMOVS

	case gc.TFLOAT64<<16 | gc.TFLOAT64:
		a = arm64.AFMOVD

	case gc.TFLOAT32<<16 | gc.TFLOAT64:
		a = arm64.AFCVTSD
		goto rdst

	case gc.TFLOAT64<<16 | gc.TFLOAT32:
		a = arm64.AFCVTDS
		goto rdst
	}

	gins(a, f, t)
	return

	// requires register destination
rdst:
	regalloc(&r1, t.Type, t)

	gins(a, f, &r1)
	gmove(&r1, t)
	regfree(&r1)
	return

	// requires register intermediate
hard:
	regalloc(&r1, cvt, t)

	gmove(f, &r1)
	gmove(&r1, t)
	regfree(&r1)
	return
}

/*
 * generate one instruction:
 *	as f, t
 */
func gins(as int, f *gc.Node, t *gc.Node) *obj.Prog {
	// TODO(austin): Add self-move test like in 6g (but be careful
	// of truncation moves)

	af := obj.Addr(obj.Addr{})

	at := obj.Addr(obj.Addr{})
	if f != nil {
		af = gc.Naddr(f)
	}
	if t != nil {
		at = gc.Naddr(t)
	}
	p := (*obj.Prog)(gc.Prog(as))
	if f != nil {
		p.From = af
	}
	if t != nil {
		p.To = at
	}
	if gc.Debug['g'] != 0 {
		fmt.Printf("%v\n", p)
	}

	w := int32(0)
	switch as {
	case arm64.AMOVB,
		arm64.AMOVBU:
		w = 1

	case arm64.AMOVH,
		arm64.AMOVHU:
		w = 2

	case arm64.AMOVW,
		arm64.AMOVWU:
		w = 4

	case arm64.AMOVD:
		if af.Type == obj.TYPE_CONST || af.Type == obj.TYPE_ADDR {
			break
		}
		w = 8
	}

	if w != 0 && ((f != nil && af.Width < int64(w)) || (t != nil && at.Type != obj.TYPE_REG && at.Width > int64(w))) {
		gc.Dump("f", f)
		gc.Dump("t", t)
		gc.Fatal("bad width: %v (%d, %d)\n", p, af.Width, at.Width)
	}

	return p
}

func fixlargeoffset(n *gc.Node) {
	if n == nil {
		return
	}
	if n.Op != gc.OINDREG {
		return
	}
	if -4096 <= n.Xoffset && n.Xoffset < 4096 {
		return
	}
	a := gc.Node(*n)
	a.Op = gc.OREGISTER
	a.Type = gc.Types[gc.Tptr]
	a.Xoffset = 0
	gc.Cgen_checknil(&a)
	ginscon(optoas(gc.OADD, gc.Types[gc.Tptr]), n.Xoffset, &a)
	n.Xoffset = 0
}

/*
 * insert n into reg slot of p
 */
func raddr(n *gc.Node, p *obj.Prog) {
	var a obj.Addr

	a = gc.Naddr(n)
	if a.Type != obj.TYPE_REG {
		if n != nil {
			gc.Fatal("bad in raddr: %v", gc.Oconv(int(n.Op), 0))
		} else {
			gc.Fatal("bad in raddr: <null>")
		}
		p.Reg = 0
	} else {
		p.Reg = a.Reg
	}
}

func gcmp(as int, lhs *gc.Node, rhs *gc.Node) *obj.Prog {
	if lhs.Op != gc.OREGISTER {
		gc.Fatal("bad operands to gcmp: %v %v", gc.Oconv(int(lhs.Op), 0), gc.Oconv(int(rhs.Op), 0))
	}

	p := gins(as, rhs, nil)
	raddr(lhs, p)
	return p
}

/*
 * return Axxx for Oxxx on type t.
 */
func optoas(op int, t *gc.Type) int {
	if t == nil {
		gc.Fatal("optoas: t is nil")
	}

	a := int(obj.AXXX)
	switch uint32(op)<<16 | uint32(gc.Simtype[t.Etype]) {
	default:
		gc.Fatal("optoas: no entry for op=%v type=%v", gc.Oconv(int(op), 0), gc.Tconv(t, 0))

	case gc.OEQ<<16 | gc.TBOOL,
		gc.OEQ<<16 | gc.TINT8,
		gc.OEQ<<16 | gc.TUINT8,
		gc.OEQ<<16 | gc.TINT16,
		gc.OEQ<<16 | gc.TUINT16,
		gc.OEQ<<16 | gc.TINT32,
		gc.OEQ<<16 | gc.TUINT32,
		gc.OEQ<<16 | gc.TINT64,
		gc.OEQ<<16 | gc.TUINT64,
		gc.OEQ<<16 | gc.TPTR32,
		gc.OEQ<<16 | gc.TPTR64,
		gc.OEQ<<16 | gc.TFLOAT32,
		gc.OEQ<<16 | gc.TFLOAT64:
		a = arm64.ABEQ

	case gc.ONE<<16 | gc.TBOOL,
		gc.ONE<<16 | gc.TINT8,
		gc.ONE<<16 | gc.TUINT8,
		gc.ONE<<16 | gc.TINT16,
		gc.ONE<<16 | gc.TUINT16,
		gc.ONE<<16 | gc.TINT32,
		gc.ONE<<16 | gc.TUINT32,
		gc.ONE<<16 | gc.TINT64,
		gc.ONE<<16 | gc.TUINT64,
		gc.ONE<<16 | gc.TPTR32,
		gc.ONE<<16 | gc.TPTR64,
		gc.ONE<<16 | gc.TFLOAT32,
		gc.ONE<<16 | gc.TFLOAT64:
		a = arm64.ABNE

	case gc.OLT<<16 | gc.TINT8,
		gc.OLT<<16 | gc.TINT16,
		gc.OLT<<16 | gc.TINT32,
		gc.OLT<<16 | gc.TINT64:
		a = arm64.ABLT

	case gc.OLT<<16 | gc.TUINT8,
		gc.OLT<<16 | gc.TUINT16,
		gc.OLT<<16 | gc.TUINT32,
		gc.OLT<<16 | gc.TUINT64,
		gc.OLT<<16 | gc.TFLOAT32,
		gc.OLT<<16 | gc.TFLOAT64:
		a = arm64.ABLO

	case gc.OLE<<16 | gc.TINT8,
		gc.OLE<<16 | gc.TINT16,
		gc.OLE<<16 | gc.TINT32,
		gc.OLE<<16 | gc.TINT64:
		a = arm64.ABLE

	case gc.OLE<<16 | gc.TUINT8,
		gc.OLE<<16 | gc.TUINT16,
		gc.OLE<<16 | gc.TUINT32,
		gc.OLE<<16 | gc.TUINT64,
		gc.OLE<<16 | gc.TFLOAT32,
		gc.OLE<<16 | gc.TFLOAT64:
		a = arm64.ABLS

	case gc.OGT<<16 | gc.TINT8,
		gc.OGT<<16 | gc.TINT16,
		gc.OGT<<16 | gc.TINT32,
		gc.OGT<<16 | gc.TINT64,
		gc.OGT<<16 | gc.TFLOAT32,
		gc.OGT<<16 | gc.TFLOAT64:
		a = arm64.ABGT

	case gc.OGT<<16 | gc.TUINT8,
		gc.OGT<<16 | gc.TUINT16,
		gc.OGT<<16 | gc.TUINT32,
		gc.OGT<<16 | gc.TUINT64:
		a = arm64.ABHI

	case gc.OGE<<16 | gc.TINT8,
		gc.OGE<<16 | gc.TINT16,
		gc.OGE<<16 | gc.TINT32,
		gc.OGE<<16 | gc.TINT64,
		gc.OGE<<16 | gc.TFLOAT32,
		gc.OGE<<16 | gc.TFLOAT64:
		a = arm64.ABGE

	case gc.OGE<<16 | gc.TUINT8,
		gc.OGE<<16 | gc.TUINT16,
		gc.OGE<<16 | gc.TUINT32,
		gc.OGE<<16 | gc.TUINT64:
		a = arm64.ABHS

	case gc.OCMP<<16 | gc.TBOOL,
		gc.OCMP<<16 | gc.TINT8,
		gc.OCMP<<16 | gc.TINT16,
		gc.OCMP<<16 | gc.TINT32,
		gc.OCMP<<16 | gc.TPTR32,
		gc.OCMP<<16 | gc.TINT64,
		gc.OCMP<<16 | gc.TUINT8,
		gc.OCMP<<16 | gc.TUINT16,
		gc.OCMP<<16 | gc.TUINT32,
		gc.OCMP<<16 | gc.TUINT64,
		gc.OCMP<<16 | gc.TPTR64:
		a = arm64.ACMP

	case gc.OCMP<<16 | gc.TFLOAT32:
		a = arm64.AFCMPS

	case gc.OCMP<<16 | gc.TFLOAT64:
		a = arm64.AFCMPD

	case gc.OAS<<16 | gc.TBOOL,
		gc.OAS<<16 | gc.TINT8:
		a = arm64.AMOVB

	case gc.OAS<<16 | gc.TUINT8:
		a = arm64.AMOVBU

	case gc.OAS<<16 | gc.TINT16:
		a = arm64.AMOVH

	case gc.OAS<<16 | gc.TUINT16:
		a = arm64.AMOVHU

	case gc.OAS<<16 | gc.TINT32:
		a = arm64.AMOVW

	case gc.OAS<<16 | gc.TUINT32,
		gc.OAS<<16 | gc.TPTR32:
		a = arm64.AMOVWU

	case gc.OAS<<16 | gc.TINT64,
		gc.OAS<<16 | gc.TUINT64,
		gc.OAS<<16 | gc.TPTR64:
		a = arm64.AMOVD

	case gc.OAS<<16 | gc.TFLOAT32:
		a = arm64.AFMOVS

	case gc.OAS<<16 | gc.TFLOAT64:
		a = arm64.AFMOVD

	case gc.OADD<<16 | gc.TINT8,
		gc.OADD<<16 | gc.TUINT8,
		gc.OADD<<16 | gc.TINT16,
		gc.OADD<<16 | gc.TUINT16,
		gc.OADD<<16 | gc.TINT32,
		gc.OADD<<16 | gc.TUINT32,
		gc.OADD<<16 | gc.TPTR32,
		gc.OADD<<16 | gc.TINT64,
		gc.OADD<<16 | gc.TUINT64,
		gc.OADD<<16 | gc.TPTR64:
		a = arm64.AADD

	case gc.OADD<<16 | gc.TFLOAT32:
		a = arm64.AFADDS

	case gc.OADD<<16 | gc.TFLOAT64:
		a = arm64.AFADDD

	case gc.OSUB<<16 | gc.TINT8,
		gc.OSUB<<16 | gc.TUINT8,
		gc.OSUB<<16 | gc.TINT16,
		gc.OSUB<<16 | gc.TUINT16,
		gc.OSUB<<16 | gc.TINT32,
		gc.OSUB<<16 | gc.TUINT32,
		gc.OSUB<<16 | gc.TPTR32,
		gc.OSUB<<16 | gc.TINT64,
		gc.OSUB<<16 | gc.TUINT64,
		gc.OSUB<<16 | gc.TPTR64:
		a = arm64.ASUB

	case gc.OSUB<<16 | gc.TFLOAT32:
		a = arm64.AFSUBS

	case gc.OSUB<<16 | gc.TFLOAT64:
		a = arm64.AFSUBD

	case gc.OMINUS<<16 | gc.TINT8,
		gc.OMINUS<<16 | gc.TUINT8,
		gc.OMINUS<<16 | gc.TINT16,
		gc.OMINUS<<16 | gc.TUINT16,
		gc.OMINUS<<16 | gc.TINT32,
		gc.OMINUS<<16 | gc.TUINT32,
		gc.OMINUS<<16 | gc.TPTR32,
		gc.OMINUS<<16 | gc.TINT64,
		gc.OMINUS<<16 | gc.TUINT64,
		gc.OMINUS<<16 | gc.TPTR64:
		a = arm64.ANEG

	case gc.OMINUS<<16 | gc.TFLOAT32:
		a = arm64.AFNEGS

	case gc.OMINUS<<16 | gc.TFLOAT64:
		a = arm64.AFNEGD

	case gc.OAND<<16 | gc.TINT8,
		gc.OAND<<16 | gc.TUINT8,
		gc.OAND<<16 | gc.TINT16,
		gc.OAND<<16 | gc.TUINT16,
		gc.OAND<<16 | gc.TINT32,
		gc.OAND<<16 | gc.TUINT32,
		gc.OAND<<16 | gc.TPTR32,
		gc.OAND<<16 | gc.TINT64,
		gc.OAND<<16 | gc.TUINT64,
		gc.OAND<<16 | gc.TPTR64:
		a = arm64.AAND

	case gc.OOR<<16 | gc.TINT8,
		gc.OOR<<16 | gc.TUINT8,
		gc.OOR<<16 | gc.TINT16,
		gc.OOR<<16 | gc.TUINT16,
		gc.OOR<<16 | gc.TINT32,
		gc.OOR<<16 | gc.TUINT32,
		gc.OOR<<16 | gc.TPTR32,
		gc.OOR<<16 | gc.TINT64,
		gc.OOR<<16 | gc.TUINT64,
		gc.OOR<<16 | gc.TPTR64:
		a = arm64.AORR

	case gc.OXOR<<16 | gc.TINT8,
		gc.OXOR<<16 | gc.TUINT8,
		gc.OXOR<<16 | gc.TINT16,
		gc.OXOR<<16 | gc.TUINT16,
		gc.OXOR<<16 | gc.TINT32,
		gc.OXOR<<16 | gc.TUINT32,
		gc.OXOR<<16 | gc.TPTR32,
		gc.OXOR<<16 | gc.TINT64,
		gc.OXOR<<16 | gc.TUINT64,
		gc.OXOR<<16 | gc.TPTR64:
		a = arm64.AEOR

		// TODO(minux): handle rotates
	//case CASE(OLROT, TINT8):
	//case CASE(OLROT, TUINT8):
	//case CASE(OLROT, TINT16):
	//case CASE(OLROT, TUINT16):
	//case CASE(OLROT, TINT32):
	//case CASE(OLROT, TUINT32):
	//case CASE(OLROT, TPTR32):
	//case CASE(OLROT, TINT64):
	//case CASE(OLROT, TUINT64):
	//case CASE(OLROT, TPTR64):
	//	a = 0//???; RLDC?
	//	break;

	case gc.OLSH<<16 | gc.TINT8,
		gc.OLSH<<16 | gc.TUINT8,
		gc.OLSH<<16 | gc.TINT16,
		gc.OLSH<<16 | gc.TUINT16,
		gc.OLSH<<16 | gc.TINT32,
		gc.OLSH<<16 | gc.TUINT32,
		gc.OLSH<<16 | gc.TPTR32,
		gc.OLSH<<16 | gc.TINT64,
		gc.OLSH<<16 | gc.TUINT64,
		gc.OLSH<<16 | gc.TPTR64:
		a = arm64.ALSL

	case gc.ORSH<<16 | gc.TUINT8,
		gc.ORSH<<16 | gc.TUINT16,
		gc.ORSH<<16 | gc.TUINT32,
		gc.ORSH<<16 | gc.TPTR32,
		gc.ORSH<<16 | gc.TUINT64,
		gc.ORSH<<16 | gc.TPTR64:
		a = arm64.ALSR

	case gc.ORSH<<16 | gc.TINT8,
		gc.ORSH<<16 | gc.TINT16,
		gc.ORSH<<16 | gc.TINT32,
		gc.ORSH<<16 | gc.TINT64:
		a = arm64.AASR

		// TODO(minux): handle rotates
	//case CASE(ORROTC, TINT8):
	//case CASE(ORROTC, TUINT8):
	//case CASE(ORROTC, TINT16):
	//case CASE(ORROTC, TUINT16):
	//case CASE(ORROTC, TINT32):
	//case CASE(ORROTC, TUINT32):
	//case CASE(ORROTC, TINT64):
	//case CASE(ORROTC, TUINT64):
	//	a = 0//??? RLDC??
	//	break;

	case gc.OHMUL<<16 | gc.TINT64:
		a = arm64.ASMULH

	case gc.OHMUL<<16 | gc.TUINT64,
		gc.OHMUL<<16 | gc.TPTR64:
		a = arm64.AUMULH

	case gc.OMUL<<16 | gc.TINT8,
		gc.OMUL<<16 | gc.TINT16,
		gc.OMUL<<16 | gc.TINT32:
		a = arm64.ASMULL

	case gc.OMUL<<16 | gc.TINT64:
		a = arm64.AMUL

	case gc.OMUL<<16 | gc.TUINT8,
		gc.OMUL<<16 | gc.TUINT16,
		gc.OMUL<<16 | gc.TUINT32,
		gc.OMUL<<16 | gc.TPTR32:
		// don't use word multiply, the high 32-bit are undefined.
		a = arm64.AUMULL

	case gc.OMUL<<16 | gc.TUINT64,
		gc.OMUL<<16 | gc.TPTR64:
		a = arm64.AMUL // for 64-bit multiplies, signedness doesn't matter.

	case gc.OMUL<<16 | gc.TFLOAT32:
		a = arm64.AFMULS

	case gc.OMUL<<16 | gc.TFLOAT64:
		a = arm64.AFMULD

	case gc.ODIV<<16 | gc.TINT8,
		gc.ODIV<<16 | gc.TINT16,
		gc.ODIV<<16 | gc.TINT32,
		gc.ODIV<<16 | gc.TINT64:
		a = arm64.ASDIV

	case gc.ODIV<<16 | gc.TUINT8,
		gc.ODIV<<16 | gc.TUINT16,
		gc.ODIV<<16 | gc.TUINT32,
		gc.ODIV<<16 | gc.TPTR32,
		gc.ODIV<<16 | gc.TUINT64,
		gc.ODIV<<16 | gc.TPTR64:
		a = arm64.AUDIV

	case gc.ODIV<<16 | gc.TFLOAT32:
		a = arm64.AFDIVS

	case gc.ODIV<<16 | gc.TFLOAT64:
		a = arm64.AFDIVD
	}

	return a
}

const (
	ODynam   = 1 << 0
	OAddable = 1 << 1
)

func xgen(n *gc.Node, a *gc.Node, o int) bool {
	// TODO(minux)

	return -1 != 0 /*TypeKind(100016)*/
}

func sudoclean() {
	return
}

/*
 * generate code to compute address of n,
 * a reference to a (perhaps nested) field inside
 * an array or struct.
 * return 0 on failure, 1 on success.
 * on success, leaves usable address in a.
 *
 * caller is responsible for calling sudoclean
 * after successful sudoaddable,
 * to release the register used for a.
 */
func sudoaddable(as int, n *gc.Node, a *obj.Addr) bool {
	// TODO(minux)

	*a = obj.Addr{}
	return false
}
예제 #23
0
파일: ggen.go 프로젝트: tidatida/go
func clearfat(nl *gc.Node) {
	/* clear a fat object */
	if gc.Debug['g'] != 0 {
		gc.Dump("\nclearfat", nl)
	}

	w := uint32(nl.Type.Width)

	// Avoid taking the address for simple enough types.
	if gc.Componentgen(nil, nl) {
		return
	}

	c := w % 4 // bytes
	q := w / 4 // quads

	if q < 4 {
		// Write sequence of MOV 0, off(base) instead of using STOSL.
		// The hope is that although the code will be slightly longer,
		// the MOVs will have no dependencies and pipeline better
		// than the unrolled STOSL loop.
		// NOTE: Must use agen, not igen, so that optimizer sees address
		// being taken. We are not writing on field boundaries.
		var n1 gc.Node
		gc.Regalloc(&n1, gc.Types[gc.Tptr], nil)

		gc.Agen(nl, &n1)
		n1.Op = gc.OINDREG
		var z gc.Node
		gc.Nodconst(&z, gc.Types[gc.TUINT64], 0)
		for {
			tmp14 := q
			q--
			if tmp14 <= 0 {
				break
			}
			n1.Type = z.Type
			gins(x86.AMOVL, &z, &n1)
			n1.Xoffset += 4
		}

		gc.Nodconst(&z, gc.Types[gc.TUINT8], 0)
		for {
			tmp15 := c
			c--
			if tmp15 <= 0 {
				break
			}
			n1.Type = z.Type
			gins(x86.AMOVB, &z, &n1)
			n1.Xoffset++
		}

		gc.Regfree(&n1)
		return
	}

	var n1 gc.Node
	gc.Nodreg(&n1, gc.Types[gc.Tptr], x86.REG_DI)
	gc.Agen(nl, &n1)
	gconreg(x86.AMOVL, 0, x86.REG_AX)

	if q > 128 || (q >= 4 && gc.Nacl) {
		gconreg(x86.AMOVL, int64(q), x86.REG_CX)
		gins(x86.AREP, nil, nil)   // repeat
		gins(x86.ASTOSL, nil, nil) // STOL AL,*(DI)+
	} else if q >= 4 {
		p := gins(obj.ADUFFZERO, nil, nil)
		p.To.Type = obj.TYPE_ADDR
		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))

		// 1 and 128 = magic constants: see ../../runtime/asm_386.s
		p.To.Offset = 1 * (128 - int64(q))
	} else {
		for q > 0 {
			gins(x86.ASTOSL, nil, nil) // STOL AL,*(DI)+
			q--
		}
	}

	for c > 0 {
		gins(x86.ASTOSB, nil, nil) // STOB AL,*(DI)+
		c--
	}
}
예제 #24
0
파일: ggen.go 프로젝트: bibbyflyaway/go
func ginsnop() {
	var r gc.Node
	gc.Nodreg(&r, gc.Types[gc.TINT], arm.REG_R0)
	p := gins(arm.AAND, &r, &r)
	p.Scond = arm.C_SCOND_EQ
}
예제 #25
0
파일: cgen.go 프로젝트: tidatida/go
func stackcopy(n, ns *gc.Node, osrc, odst, w int64) {
	var noddi gc.Node
	gc.Nodreg(&noddi, gc.Types[gc.Tptr], x86.REG_DI)
	var nodsi gc.Node
	gc.Nodreg(&nodsi, gc.Types[gc.Tptr], x86.REG_SI)

	var nodl gc.Node
	var nodr gc.Node
	if n.Ullman >= ns.Ullman {
		gc.Agenr(n, &nodr, &nodsi)
		if ns.Op == gc.ONAME {
			gc.Gvardef(ns)
		}
		gc.Agenr(ns, &nodl, &noddi)
	} else {
		if ns.Op == gc.ONAME {
			gc.Gvardef(ns)
		}
		gc.Agenr(ns, &nodl, &noddi)
		gc.Agenr(n, &nodr, &nodsi)
	}

	if nodl.Val.U.Reg != x86.REG_DI {
		gmove(&nodl, &noddi)
	}
	if nodr.Val.U.Reg != x86.REG_SI {
		gmove(&nodr, &nodsi)
	}
	gc.Regfree(&nodl)
	gc.Regfree(&nodr)

	c := w % 8 // bytes
	q := w / 8 // quads

	var oldcx gc.Node
	var cx gc.Node
	savex(x86.REG_CX, &cx, &oldcx, nil, gc.Types[gc.TINT64])

	// if we are copying forward on the stack and
	// the src and dst overlap, then reverse direction
	if osrc < odst && odst < osrc+w {
		// reverse direction
		gins(x86.ASTD, nil, nil) // set direction flag
		if c > 0 {
			gconreg(addptr, w-1, x86.REG_SI)
			gconreg(addptr, w-1, x86.REG_DI)

			gconreg(movptr, c, x86.REG_CX)
			gins(x86.AREP, nil, nil)   // repeat
			gins(x86.AMOVSB, nil, nil) // MOVB *(SI)-,*(DI)-
		}

		if q > 0 {
			if c > 0 {
				gconreg(addptr, -7, x86.REG_SI)
				gconreg(addptr, -7, x86.REG_DI)
			} else {
				gconreg(addptr, w-8, x86.REG_SI)
				gconreg(addptr, w-8, x86.REG_DI)
			}

			gconreg(movptr, q, x86.REG_CX)
			gins(x86.AREP, nil, nil)   // repeat
			gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)-,*(DI)-
		}

		// we leave with the flag clear
		gins(x86.ACLD, nil, nil)
	} else {
		// normal direction
		if q > 128 || (gc.Nacl && q >= 4) {
			gconreg(movptr, q, x86.REG_CX)
			gins(x86.AREP, nil, nil)   // repeat
			gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+
		} else if q >= 4 {
			p := gins(obj.ADUFFCOPY, nil, nil)
			p.To.Type = obj.TYPE_ADDR
			p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg))

			// 14 and 128 = magic constants: see ../../runtime/asm_amd64.s
			p.To.Offset = 14 * (128 - q)
		} else if !gc.Nacl && c == 0 {
			// We don't need the MOVSQ side-effect of updating SI and DI,
			// and issuing a sequence of MOVQs directly is faster.
			nodsi.Op = gc.OINDREG

			noddi.Op = gc.OINDREG
			for q > 0 {
				gmove(&nodsi, &cx) // MOVQ x+(SI),CX
				gmove(&cx, &noddi) // MOVQ CX,x+(DI)
				nodsi.Xoffset += 8
				noddi.Xoffset += 8
				q--
			}
		} else {
			for q > 0 {
				gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+
				q--
			}
		}

		// copy the remaining c bytes
		if w < 4 || c <= 1 || (odst < osrc && osrc < odst+w) {
			for c > 0 {
				gins(x86.AMOVSB, nil, nil) // MOVB *(SI)+,*(DI)+
				c--
			}
		} else if w < 8 || c <= 4 {
			nodsi.Op = gc.OINDREG
			noddi.Op = gc.OINDREG
			cx.Type = gc.Types[gc.TINT32]
			nodsi.Type = gc.Types[gc.TINT32]
			noddi.Type = gc.Types[gc.TINT32]
			if c > 4 {
				nodsi.Xoffset = 0
				noddi.Xoffset = 0
				gmove(&nodsi, &cx)
				gmove(&cx, &noddi)
			}

			nodsi.Xoffset = c - 4
			noddi.Xoffset = c - 4
			gmove(&nodsi, &cx)
			gmove(&cx, &noddi)
		} else {
			nodsi.Op = gc.OINDREG
			noddi.Op = gc.OINDREG
			cx.Type = gc.Types[gc.TINT64]
			nodsi.Type = gc.Types[gc.TINT64]
			noddi.Type = gc.Types[gc.TINT64]
			nodsi.Xoffset = c - 8
			noddi.Xoffset = c - 8
			gmove(&nodsi, &cx)
			gmove(&cx, &noddi)
		}
	}

	restx(&cx, &oldcx)
}
예제 #26
0
파일: ggen.go 프로젝트: klueska/go-akaros
/*
 * generate:
 *	call f
 *	proc=-1	normal call but no return
 *	proc=0	normal call
 *	proc=1	goroutine run in new proc
 *	proc=2	defer call save away stack
  *	proc=3	normal call to C pointer (not Go func value)
*/
func ginscall(f *gc.Node, proc int) {
	if f.Type != nil {
		extra := int32(0)
		if proc == 1 || proc == 2 {
			extra = 2 * int32(gc.Widthptr)
		}
		gc.Setmaxarg(f.Type, extra)
	}

	switch proc {
	default:
		gc.Fatal("ginscall: bad proc %d", proc)

	case 0, // normal call
		-1: // normal call but no return
		if f.Op == gc.ONAME && f.Class == gc.PFUNC {
			if f == gc.Deferreturn {
				// Deferred calls will appear to be returning to
				// the CALL deferreturn(SB) that we are about to emit.
				// However, the stack trace code will show the line
				// of the instruction byte before the return PC.
				// To avoid that being an unrelated instruction,
				// insert an x86 NOP that we will have the right line number.
				// x86 NOP 0x90 is really XCHG AX, AX; use that description
				// because the NOP pseudo-instruction will be removed by
				// the linker.
				var reg gc.Node
				gc.Nodreg(&reg, gc.Types[gc.TINT], x86.REG_AX)

				gins(x86.AXCHGL, &reg, &reg)
			}

			p := gins(obj.ACALL, nil, f)
			gc.Afunclit(&p.To, f)
			if proc == -1 || gc.Noreturn(p) {
				gins(obj.AUNDEF, nil, nil)
			}
			break
		}

		var reg gc.Node
		gc.Nodreg(&reg, gc.Types[gc.Tptr], x86.REG_DX)
		var r1 gc.Node
		gc.Nodreg(&r1, gc.Types[gc.Tptr], x86.REG_BX)
		gmove(f, &reg)
		reg.Op = gc.OINDREG
		gmove(&reg, &r1)
		reg.Op = gc.OREGISTER
		gins(obj.ACALL, &reg, &r1)

	case 3: // normal call of c function pointer
		gins(obj.ACALL, nil, f)

	case 1, // call in new proc (go)
		2: // deferred call (defer)
		var stk gc.Node

		stk.Op = gc.OINDREG
		stk.Val.U.Reg = x86.REG_SP
		stk.Xoffset = 0

		// size of arguments at 0(SP)
		var con gc.Node
		gc.Nodconst(&con, gc.Types[gc.TINT32], int64(gc.Argsize(f.Type)))

		gins(x86.AMOVL, &con, &stk)

		// FuncVal* at 4(SP)
		stk.Xoffset = int64(gc.Widthptr)

		gins(x86.AMOVL, f, &stk)

		if proc == 1 {
			ginscall(gc.Newproc, 0)
		} else {
			ginscall(gc.Deferproc, 0)
		}
		if proc == 2 {
			var reg gc.Node
			gc.Nodreg(&reg, gc.Types[gc.TINT32], x86.REG_AX)
			gins(x86.ATESTL, &reg, &reg)
			p := gc.Gbranch(x86.AJEQ, nil, +1)
			cgen_ret(nil)
			gc.Patch(p, gc.Pc)
		}
	}
}
예제 #27
0
파일: gsubr.go 프로젝트: xslonepiece/goios
func floatmove(f *gc.Node, t *gc.Node) {
	var r1 gc.Node

	ft := gc.Simsimtype(f.Type)
	tt := gc.Simsimtype(t.Type)
	cvt := t.Type

	// cannot have two floating point memory operands.
	if gc.Isfloat[ft] && gc.Isfloat[tt] && gc.Ismem(f) && gc.Ismem(t) {
		goto hard
	}

	// convert constant to desired type
	if f.Op == gc.OLITERAL {
		var con gc.Node
		gc.Convconst(&con, t.Type, &f.Val)
		f = &con
		ft = gc.Simsimtype(con.Type)

		// some constants can't move directly to memory.
		if gc.Ismem(t) {
			// float constants come from memory.
			if gc.Isfloat[tt] {
				goto hard
			}
		}
	}

	// value -> value copy, only one memory operand.
	// figure out the instruction to use.
	// break out of switch for one-instruction gins.
	// goto rdst for "destination must be register".
	// goto hard for "convert to cvt type first".
	// otherwise handle and return.

	switch uint32(ft)<<16 | uint32(tt) {
	default:
		if gc.Thearch.Use387 {
			floatmove_387(f, t)
		} else {
			floatmove_sse(f, t)
		}
		return

		// float to very long integer.
	case gc.TFLOAT32<<16 | gc.TINT64,
		gc.TFLOAT64<<16 | gc.TINT64:
		if f.Op == gc.OREGISTER {
			cvt = f.Type
			goto hardmem
		}

		var r1 gc.Node
		gc.Nodreg(&r1, gc.Types[ft], x86.REG_F0)
		if ft == gc.TFLOAT32 {
			gins(x86.AFMOVF, f, &r1)
		} else {
			gins(x86.AFMOVD, f, &r1)
		}

		// set round to zero mode during conversion
		var t1 gc.Node
		memname(&t1, gc.Types[gc.TUINT16])

		var t2 gc.Node
		memname(&t2, gc.Types[gc.TUINT16])
		gins(x86.AFSTCW, nil, &t1)
		gins(x86.AMOVW, ncon(0xf7f), &t2)
		gins(x86.AFLDCW, &t2, nil)
		if tt == gc.TINT16 {
			gins(x86.AFMOVWP, &r1, t)
		} else if tt == gc.TINT32 {
			gins(x86.AFMOVLP, &r1, t)
		} else {
			gins(x86.AFMOVVP, &r1, t)
		}
		gins(x86.AFLDCW, &t1, nil)
		return

	case gc.TFLOAT32<<16 | gc.TUINT64,
		gc.TFLOAT64<<16 | gc.TUINT64:
		if !gc.Ismem(f) {
			cvt = f.Type
			goto hardmem
		}

		bignodes()
		var f0 gc.Node
		gc.Nodreg(&f0, gc.Types[ft], x86.REG_F0)
		var f1 gc.Node
		gc.Nodreg(&f1, gc.Types[ft], x86.REG_F0+1)
		var ax gc.Node
		gc.Nodreg(&ax, gc.Types[gc.TUINT16], x86.REG_AX)

		if ft == gc.TFLOAT32 {
			gins(x86.AFMOVF, f, &f0)
		} else {
			gins(x86.AFMOVD, f, &f0)
		}

		// if 0 > v { answer = 0 }
		gins(x86.AFMOVD, &zerof, &f0)

		gins(x86.AFUCOMIP, &f0, &f1)
		p1 := gc.Gbranch(optoas(gc.OGT, gc.Types[tt]), nil, 0)

		// if 1<<64 <= v { answer = 0 too }
		gins(x86.AFMOVD, &two64f, &f0)

		gins(x86.AFUCOMIP, &f0, &f1)
		p2 := gc.Gbranch(optoas(gc.OGT, gc.Types[tt]), nil, 0)
		gc.Patch(p1, gc.Pc)
		gins(x86.AFMOVVP, &f0, t) // don't care about t, but will pop the stack
		var thi gc.Node
		var tlo gc.Node
		split64(t, &tlo, &thi)
		gins(x86.AMOVL, ncon(0), &tlo)
		gins(x86.AMOVL, ncon(0), &thi)
		splitclean()
		p1 = gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p2, gc.Pc)

		// in range; algorithm is:
		//	if small enough, use native float64 -> int64 conversion.
		//	otherwise, subtract 2^63, convert, and add it back.

		// set round to zero mode during conversion
		var t1 gc.Node
		memname(&t1, gc.Types[gc.TUINT16])

		var t2 gc.Node
		memname(&t2, gc.Types[gc.TUINT16])
		gins(x86.AFSTCW, nil, &t1)
		gins(x86.AMOVW, ncon(0xf7f), &t2)
		gins(x86.AFLDCW, &t2, nil)

		// actual work
		gins(x86.AFMOVD, &two63f, &f0)

		gins(x86.AFUCOMIP, &f0, &f1)
		p2 = gc.Gbranch(optoas(gc.OLE, gc.Types[tt]), nil, 0)
		gins(x86.AFMOVVP, &f0, t)
		p3 := gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p2, gc.Pc)
		gins(x86.AFMOVD, &two63f, &f0)
		gins(x86.AFSUBDP, &f0, &f1)
		gins(x86.AFMOVVP, &f0, t)
		split64(t, &tlo, &thi)
		gins(x86.AXORL, ncon(0x80000000), &thi) // + 2^63
		gc.Patch(p3, gc.Pc)
		splitclean()

		// restore rounding mode
		gins(x86.AFLDCW, &t1, nil)

		gc.Patch(p1, gc.Pc)
		return

		/*
		 * integer to float
		 */
	case gc.TINT64<<16 | gc.TFLOAT32,
		gc.TINT64<<16 | gc.TFLOAT64:
		if t.Op == gc.OREGISTER {
			goto hardmem
		}
		var f0 gc.Node
		gc.Nodreg(&f0, t.Type, x86.REG_F0)
		gins(x86.AFMOVV, f, &f0)
		if tt == gc.TFLOAT32 {
			gins(x86.AFMOVFP, &f0, t)
		} else {
			gins(x86.AFMOVDP, &f0, t)
		}
		return

		// algorithm is:
	//	if small enough, use native int64 -> float64 conversion.
	//	otherwise, halve (rounding to odd?), convert, and double.
	case gc.TUINT64<<16 | gc.TFLOAT32,
		gc.TUINT64<<16 | gc.TFLOAT64:
		var ax gc.Node
		gc.Nodreg(&ax, gc.Types[gc.TUINT32], x86.REG_AX)

		var dx gc.Node
		gc.Nodreg(&dx, gc.Types[gc.TUINT32], x86.REG_DX)
		var cx gc.Node
		gc.Nodreg(&cx, gc.Types[gc.TUINT32], x86.REG_CX)
		var t1 gc.Node
		gc.Tempname(&t1, f.Type)
		var tlo gc.Node
		var thi gc.Node
		split64(&t1, &tlo, &thi)
		gmove(f, &t1)
		gins(x86.ACMPL, &thi, ncon(0))
		p1 := gc.Gbranch(x86.AJLT, nil, 0)

		// native
		var r1 gc.Node
		gc.Nodreg(&r1, gc.Types[tt], x86.REG_F0)

		gins(x86.AFMOVV, &t1, &r1)
		if tt == gc.TFLOAT32 {
			gins(x86.AFMOVFP, &r1, t)
		} else {
			gins(x86.AFMOVDP, &r1, t)
		}
		p2 := gc.Gbranch(obj.AJMP, nil, 0)

		// simulated
		gc.Patch(p1, gc.Pc)

		gmove(&tlo, &ax)
		gmove(&thi, &dx)
		p1 = gins(x86.ASHRL, ncon(1), &ax)
		p1.From.Index = x86.REG_DX // double-width shift DX -> AX
		p1.From.Scale = 0
		gins(x86.AMOVL, ncon(0), &cx)
		gins(x86.ASETCC, nil, &cx)
		gins(x86.AORL, &cx, &ax)
		gins(x86.ASHRL, ncon(1), &dx)
		gmove(&dx, &thi)
		gmove(&ax, &tlo)
		gc.Nodreg(&r1, gc.Types[tt], x86.REG_F0)
		var r2 gc.Node
		gc.Nodreg(&r2, gc.Types[tt], x86.REG_F0+1)
		gins(x86.AFMOVV, &t1, &r1)
		gins(x86.AFMOVD, &r1, &r1)
		gins(x86.AFADDDP, &r1, &r2)
		if tt == gc.TFLOAT32 {
			gins(x86.AFMOVFP, &r1, t)
		} else {
			gins(x86.AFMOVDP, &r1, t)
		}
		gc.Patch(p2, gc.Pc)
		splitclean()
		return
	}

	// requires register intermediate
hard:
	gc.Regalloc(&r1, cvt, t)

	gmove(f, &r1)
	gmove(&r1, t)
	gc.Regfree(&r1)
	return

	// requires memory intermediate
hardmem:
	gc.Tempname(&r1, cvt)

	gmove(f, &r1)
	gmove(&r1, t)
	return
}
예제 #28
0
파일: gsubr.go 프로젝트: xslonepiece/goios
func floatmove_387(f *gc.Node, t *gc.Node) {
	var r1 gc.Node
	var a int

	ft := gc.Simsimtype(f.Type)
	tt := gc.Simsimtype(t.Type)
	cvt := t.Type

	switch uint32(ft)<<16 | uint32(tt) {
	default:
		goto fatal

		/*
		* float to integer
		 */
	case gc.TFLOAT32<<16 | gc.TINT16,
		gc.TFLOAT32<<16 | gc.TINT32,
		gc.TFLOAT32<<16 | gc.TINT64,
		gc.TFLOAT64<<16 | gc.TINT16,
		gc.TFLOAT64<<16 | gc.TINT32,
		gc.TFLOAT64<<16 | gc.TINT64:
		if t.Op == gc.OREGISTER {
			goto hardmem
		}
		var r1 gc.Node
		gc.Nodreg(&r1, gc.Types[ft], x86.REG_F0)
		if f.Op != gc.OREGISTER {
			if ft == gc.TFLOAT32 {
				gins(x86.AFMOVF, f, &r1)
			} else {
				gins(x86.AFMOVD, f, &r1)
			}
		}

		// set round to zero mode during conversion
		var t1 gc.Node
		memname(&t1, gc.Types[gc.TUINT16])

		var t2 gc.Node
		memname(&t2, gc.Types[gc.TUINT16])
		gins(x86.AFSTCW, nil, &t1)
		gins(x86.AMOVW, ncon(0xf7f), &t2)
		gins(x86.AFLDCW, &t2, nil)
		if tt == gc.TINT16 {
			gins(x86.AFMOVWP, &r1, t)
		} else if tt == gc.TINT32 {
			gins(x86.AFMOVLP, &r1, t)
		} else {
			gins(x86.AFMOVVP, &r1, t)
		}
		gins(x86.AFLDCW, &t1, nil)
		return

		// convert via int32.
	case gc.TFLOAT32<<16 | gc.TINT8,
		gc.TFLOAT32<<16 | gc.TUINT16,
		gc.TFLOAT32<<16 | gc.TUINT8,
		gc.TFLOAT64<<16 | gc.TINT8,
		gc.TFLOAT64<<16 | gc.TUINT16,
		gc.TFLOAT64<<16 | gc.TUINT8:
		var t1 gc.Node
		gc.Tempname(&t1, gc.Types[gc.TINT32])

		gmove(f, &t1)
		switch tt {
		default:
			gc.Fatal("gmove %v", gc.Nconv(t, 0))

		case gc.TINT8:
			gins(x86.ACMPL, &t1, ncon(-0x80&(1<<32-1)))
			p1 := gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TINT32]), nil, -1)
			gins(x86.ACMPL, &t1, ncon(0x7f))
			p2 := gc.Gbranch(optoas(gc.OGT, gc.Types[gc.TINT32]), nil, -1)
			p3 := gc.Gbranch(obj.AJMP, nil, 0)
			gc.Patch(p1, gc.Pc)
			gc.Patch(p2, gc.Pc)
			gmove(ncon(-0x80&(1<<32-1)), &t1)
			gc.Patch(p3, gc.Pc)
			gmove(&t1, t)

		case gc.TUINT8:
			gins(x86.ATESTL, ncon(0xffffff00), &t1)
			p1 := gc.Gbranch(x86.AJEQ, nil, +1)
			gins(x86.AMOVL, ncon(0), &t1)
			gc.Patch(p1, gc.Pc)
			gmove(&t1, t)

		case gc.TUINT16:
			gins(x86.ATESTL, ncon(0xffff0000), &t1)
			p1 := gc.Gbranch(x86.AJEQ, nil, +1)
			gins(x86.AMOVL, ncon(0), &t1)
			gc.Patch(p1, gc.Pc)
			gmove(&t1, t)
		}

		return

		// convert via int64.
	case gc.TFLOAT32<<16 | gc.TUINT32,
		gc.TFLOAT64<<16 | gc.TUINT32:
		cvt = gc.Types[gc.TINT64]

		goto hardmem

		/*
		 * integer to float
		 */
	case gc.TINT16<<16 | gc.TFLOAT32,
		gc.TINT16<<16 | gc.TFLOAT64,
		gc.TINT32<<16 | gc.TFLOAT32,
		gc.TINT32<<16 | gc.TFLOAT64,
		gc.TINT64<<16 | gc.TFLOAT32,
		gc.TINT64<<16 | gc.TFLOAT64:
		if t.Op != gc.OREGISTER {
			goto hard
		}
		if f.Op == gc.OREGISTER {
			cvt = f.Type
			goto hardmem
		}

		switch ft {
		case gc.TINT16:
			a = x86.AFMOVW

		case gc.TINT32:
			a = x86.AFMOVL

		default:
			a = x86.AFMOVV
		}

		// convert via int32 memory
	case gc.TINT8<<16 | gc.TFLOAT32,
		gc.TINT8<<16 | gc.TFLOAT64,
		gc.TUINT16<<16 | gc.TFLOAT32,
		gc.TUINT16<<16 | gc.TFLOAT64,
		gc.TUINT8<<16 | gc.TFLOAT32,
		gc.TUINT8<<16 | gc.TFLOAT64:
		cvt = gc.Types[gc.TINT32]

		goto hardmem

		// convert via int64 memory
	case gc.TUINT32<<16 | gc.TFLOAT32,
		gc.TUINT32<<16 | gc.TFLOAT64:
		cvt = gc.Types[gc.TINT64]

		goto hardmem

		// The way the code generator uses floating-point
	// registers, a move from F0 to F0 is intended as a no-op.
	// On the x86, it's not: it pushes a second copy of F0
	// on the floating point stack.  So toss it away here.
	// Also, F0 is the *only* register we ever evaluate
	// into, so we should only see register/register as F0/F0.
	/*
	 * float to float
	 */
	case gc.TFLOAT32<<16 | gc.TFLOAT32,
		gc.TFLOAT64<<16 | gc.TFLOAT64:
		if gc.Ismem(f) && gc.Ismem(t) {
			goto hard
		}
		if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER {
			if f.Reg != x86.REG_F0 || t.Reg != x86.REG_F0 {
				goto fatal
			}
			return
		}

		a = x86.AFMOVF
		if ft == gc.TFLOAT64 {
			a = x86.AFMOVD
		}
		if gc.Ismem(t) {
			if f.Op != gc.OREGISTER || f.Reg != x86.REG_F0 {
				gc.Fatal("gmove %v", gc.Nconv(f, 0))
			}
			a = x86.AFMOVFP
			if ft == gc.TFLOAT64 {
				a = x86.AFMOVDP
			}
		}

	case gc.TFLOAT32<<16 | gc.TFLOAT64:
		if gc.Ismem(f) && gc.Ismem(t) {
			goto hard
		}
		if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER {
			if f.Reg != x86.REG_F0 || t.Reg != x86.REG_F0 {
				goto fatal
			}
			return
		}

		if f.Op == gc.OREGISTER {
			gins(x86.AFMOVDP, f, t)
		} else {
			gins(x86.AFMOVF, f, t)
		}
		return

	case gc.TFLOAT64<<16 | gc.TFLOAT32:
		if gc.Ismem(f) && gc.Ismem(t) {
			goto hard
		}
		if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER {
			var r1 gc.Node
			gc.Tempname(&r1, gc.Types[gc.TFLOAT32])
			gins(x86.AFMOVFP, f, &r1)
			gins(x86.AFMOVF, &r1, t)
			return
		}

		if f.Op == gc.OREGISTER {
			gins(x86.AFMOVFP, f, t)
		} else {
			gins(x86.AFMOVD, f, t)
		}
		return
	}

	gins(a, f, t)
	return

	// requires register intermediate
hard:
	gc.Regalloc(&r1, cvt, t)

	gmove(f, &r1)
	gmove(&r1, t)
	gc.Regfree(&r1)
	return

	// requires memory intermediate
hardmem:
	gc.Tempname(&r1, cvt)

	gmove(f, &r1)
	gmove(&r1, t)
	return

	// should not happen
fatal:
	gc.Fatal("gmove %v -> %v", gc.Nconv(f, obj.FmtLong), gc.Nconv(t, obj.FmtLong))

	return
}
예제 #29
0
파일: ggen.go 프로젝트: tidatida/go
func bgen_float(n *gc.Node, true_ int, likely int, to *obj.Prog) {
	nl := n.Left
	nr := n.Right
	a := int(n.Op)
	if true_ == 0 {
		// brcom is not valid on floats when NaN is involved.
		p1 := gc.Gbranch(obj.AJMP, nil, 0)

		p2 := gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p1, gc.Pc)

		// No need to avoid re-genning ninit.
		bgen_float(n, 1, -likely, p2)

		gc.Patch(gc.Gbranch(obj.AJMP, nil, 0), to)
		gc.Patch(p2, gc.Pc)
		return
	}

	var tmp gc.Node
	var et int
	var n2 gc.Node
	var ax gc.Node
	if !gc.Thearch.Use387 {
		if nl.Addable == 0 {
			var n1 gc.Node
			gc.Tempname(&n1, nl.Type)
			gc.Cgen(nl, &n1)
			nl = &n1
		}

		if nr.Addable == 0 {
			var tmp gc.Node
			gc.Tempname(&tmp, nr.Type)
			gc.Cgen(nr, &tmp)
			nr = &tmp
		}

		var n2 gc.Node
		gc.Regalloc(&n2, nr.Type, nil)
		gmove(nr, &n2)
		nr = &n2

		if nl.Op != gc.OREGISTER {
			var n3 gc.Node
			gc.Regalloc(&n3, nl.Type, nil)
			gmove(nl, &n3)
			nl = &n3
		}

		if a == gc.OGE || a == gc.OGT {
			// only < and <= work right with NaN; reverse if needed
			r := nr

			nr = nl
			nl = r
			a = gc.Brrev(a)
		}

		gins(foptoas(gc.OCMP, nr.Type, 0), nl, nr)
		if nl.Op == gc.OREGISTER {
			gc.Regfree(nl)
		}
		gc.Regfree(nr)
		goto ret
	} else {
		goto x87
	}

x87:
	a = gc.Brrev(a) // because the args are stacked
	if a == gc.OGE || a == gc.OGT {
		// only < and <= work right with NaN; reverse if needed
		r := nr

		nr = nl
		nl = r
		a = gc.Brrev(a)
	}

	gc.Nodreg(&tmp, nr.Type, x86.REG_F0)
	gc.Nodreg(&n2, nr.Type, x86.REG_F0+1)
	gc.Nodreg(&ax, gc.Types[gc.TUINT16], x86.REG_AX)
	et = gc.Simsimtype(nr.Type)
	if et == gc.TFLOAT64 {
		if nl.Ullman > nr.Ullman {
			gc.Cgen(nl, &tmp)
			gc.Cgen(nr, &tmp)
			gins(x86.AFXCHD, &tmp, &n2)
		} else {
			gc.Cgen(nr, &tmp)
			gc.Cgen(nl, &tmp)
		}

		gins(x86.AFUCOMIP, &tmp, &n2)
		gins(x86.AFMOVDP, &tmp, &tmp) // annoying pop but still better than STSW+SAHF
	} else {
		// TODO(rsc): The moves back and forth to memory
		// here are for truncating the value to 32 bits.
		// This handles 32-bit comparison but presumably
		// all the other ops have the same problem.
		// We need to figure out what the right general
		// solution is, besides telling people to use float64.
		var t1 gc.Node
		gc.Tempname(&t1, gc.Types[gc.TFLOAT32])

		var t2 gc.Node
		gc.Tempname(&t2, gc.Types[gc.TFLOAT32])
		gc.Cgen(nr, &t1)
		gc.Cgen(nl, &t2)
		gmove(&t2, &tmp)
		gins(x86.AFCOMFP, &t1, &tmp)
		gins(x86.AFSTSW, nil, &ax)
		gins(x86.ASAHF, nil, nil)
	}

	goto ret

ret:
	if a == gc.OEQ {
		// neither NE nor P
		p1 := gc.Gbranch(x86.AJNE, nil, -likely)

		p2 := gc.Gbranch(x86.AJPS, nil, -likely)
		gc.Patch(gc.Gbranch(obj.AJMP, nil, 0), to)
		gc.Patch(p1, gc.Pc)
		gc.Patch(p2, gc.Pc)
	} else if a == gc.ONE {
		// either NE or P
		gc.Patch(gc.Gbranch(x86.AJNE, nil, likely), to)

		gc.Patch(gc.Gbranch(x86.AJPS, nil, likely), to)
	} else {
		gc.Patch(gc.Gbranch(optoas(a, nr.Type), nil, likely), to)
	}
}
예제 #30
0
파일: ggen.go 프로젝트: bibbyflyaway/go
func clearfat(nl *gc.Node) {
	/* clear a fat object */
	if gc.Debug['g'] != 0 {
		fmt.Printf("clearfat %v (%v, size: %d)\n", nl, nl.Type, nl.Type.Width)
	}

	w := uint64(uint64(nl.Type.Width))

	// Avoid taking the address for simple enough types.
	if gc.Componentgen(nil, nl) {
		return
	}

	c := uint64(w % 8) // bytes
	q := uint64(w / 8) // dwords

	if reg[arm64.REGRT1-arm64.REG_R0] > 0 {
		gc.Fatal("R%d in use during clearfat", arm64.REGRT1-arm64.REG_R0)
	}

	var r0 gc.Node
	gc.Nodreg(&r0, gc.Types[gc.TUINT64], arm64.REGZERO)
	var dst gc.Node
	gc.Nodreg(&dst, gc.Types[gc.Tptr], arm64.REGRT1)
	reg[arm64.REGRT1-arm64.REG_R0]++
	gc.Agen(nl, &dst)

	var boff uint64
	if q > 128 {
		p := gins(arm64.ASUB, nil, &dst)
		p.From.Type = obj.TYPE_CONST
		p.From.Offset = 8

		var end gc.Node
		gc.Regalloc(&end, gc.Types[gc.Tptr], nil)
		p = gins(arm64.AMOVD, &dst, &end)
		p.From.Type = obj.TYPE_ADDR
		p.From.Offset = int64(q * 8)

		p = gins(arm64.AMOVD, &r0, &dst)
		p.To.Type = obj.TYPE_MEM
		p.To.Offset = 8
		p.Scond = arm64.C_XPRE
		pl := (*obj.Prog)(p)

		p = gcmp(arm64.ACMP, &dst, &end)
		gc.Patch(gc.Gbranch(arm64.ABNE, nil, 0), pl)

		gc.Regfree(&end)

		// The loop leaves R16 on the last zeroed dword
		boff = 8
	} else if q >= 4 && !darwin { // darwin ld64 cannot handle BR26 reloc with non-zero addend
		p := gins(arm64.ASUB, nil, &dst)
		p.From.Type = obj.TYPE_CONST
		p.From.Offset = 8
		f := (*gc.Node)(gc.Sysfunc("duffzero"))
		p = gins(obj.ADUFFZERO, nil, f)
		gc.Afunclit(&p.To, f)

		// 4 and 128 = magic constants: see ../../runtime/asm_arm64x.s
		p.To.Offset = int64(4 * (128 - q))

		// duffzero leaves R16 on the last zeroed dword
		boff = 8
	} else {
		var p *obj.Prog
		for t := uint64(0); t < q; t++ {
			p = gins(arm64.AMOVD, &r0, &dst)
			p.To.Type = obj.TYPE_MEM
			p.To.Offset = int64(8 * t)
		}

		boff = 8 * q
	}

	var p *obj.Prog
	for t := uint64(0); t < c; t++ {
		p = gins(arm64.AMOVB, &r0, &dst)
		p.To.Type = obj.TYPE_MEM
		p.To.Offset = int64(t + boff)
	}

	reg[arm64.REGRT1-arm64.REG_R0]--
}