Ejemplo n.º 1
0
/*
 * generate byte multiply:
 *	res = nl * nr
 * there is no 2-operand byte multiply instruction so
 * we do a full-width multiplication and truncate afterwards.
 */
func cgen_bmul(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) bool {
	if optoas(op, nl.Type) != x86.AIMULB {
		return false
	}

	// copy from byte to full registers
	t := gc.Types[gc.TUINT32]

	if nl.Type.IsSigned() {
		t = gc.Types[gc.TINT32]
	}

	// largest ullman on left.
	if nl.Ullman < nr.Ullman {
		nl, nr = nr, nl
	}

	var nt gc.Node
	gc.Tempname(&nt, nl.Type)
	gc.Cgen(nl, &nt)
	var n1 gc.Node
	gc.Regalloc(&n1, t, res)
	gc.Cgen(nr, &n1)
	var n2 gc.Node
	gc.Regalloc(&n2, t, nil)
	gmove(&nt, &n2)
	a := optoas(op, t)
	gins(a, &n2, &n1)
	gc.Regfree(&n2)
	gmove(&n1, res)
	gc.Regfree(&n1)

	return true
}
Ejemplo n.º 2
0
/*
 * generate array index into res.
 * n might be any size; res is 32-bit.
 * returns Prog* to patch to panic call.
 */
func cgenindex(n *gc.Node, res *gc.Node, bounded bool) *obj.Prog {
	if !gc.Is64(n.Type) {
		gc.Cgen(n, res)
		return nil
	}

	var tmp gc.Node
	gc.Tempname(&tmp, gc.Types[gc.TINT64])
	gc.Cgen(n, &tmp)
	var lo gc.Node
	var hi gc.Node
	split64(&tmp, &lo, &hi)
	gmove(&lo, res)
	if bounded {
		splitclean()
		return nil
	}

	var n1 gc.Node
	gc.Regalloc(&n1, gc.Types[gc.TINT32], nil)
	var n2 gc.Node
	gc.Regalloc(&n2, gc.Types[gc.TINT32], nil)
	var zero gc.Node
	gc.Nodconst(&zero, gc.Types[gc.TINT32], 0)
	gmove(&hi, &n1)
	gmove(&zero, &n2)
	gins(arm.ACMP, &n1, &n2)
	gc.Regfree(&n2)
	gc.Regfree(&n1)
	splitclean()
	return gc.Gbranch(arm.ABNE, nil, -1)
}
Ejemplo n.º 3
0
/*
 * generate division.
 * generates one of:
 *	res = nl / nr
 *	res = nl % nr
 * according to op.
 */
func dodiv(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) {
	t := nl.Type

	t0 := t

	if t.Width < 8 {
		if t.IsSigned() {
			t = gc.Types[gc.TINT64]
		} else {
			t = gc.Types[gc.TUINT64]
		}
	}

	a := optoas(gc.ODIV, t)

	var tl gc.Node
	gc.Regalloc(&tl, t0, nil)
	var tr gc.Node
	gc.Regalloc(&tr, t0, nil)
	if nl.Ullman >= nr.Ullman {
		gc.Cgen(nl, &tl)
		gc.Cgen(nr, &tr)
	} else {
		gc.Cgen(nr, &tr)
		gc.Cgen(nl, &tl)
	}

	if t != t0 {
		// Convert
		tl2 := tl

		tr2 := tr
		tl.Type = t
		tr.Type = t
		gmove(&tl2, &tl)
		gmove(&tr2, &tr)
	}

	// Handle divide-by-zero panic.
	p1 := ginsbranch(mips.ABNE, nil, &tr, nil, 0)
	if panicdiv == nil {
		panicdiv = gc.Sysfunc("panicdivide")
	}
	gc.Ginscall(panicdiv, -1)
	gc.Patch(p1, gc.Pc)

	gins3(a, &tr, &tl, nil)
	gc.Regfree(&tr)
	if op == gc.ODIV {
		var lo gc.Node
		gc.Nodreg(&lo, gc.Types[gc.TUINT64], mips.REG_LO)
		gins(mips.AMOVV, &lo, &tl)
	} else { // remainder in REG_HI
		var hi gc.Node
		gc.Nodreg(&hi, gc.Types[gc.TUINT64], mips.REG_HI)
		gins(mips.AMOVV, &hi, &tl)
	}
	gmove(&tl, res)
	gc.Regfree(&tl)
}
Ejemplo n.º 4
0
func ginscmp(op gc.Op, t *gc.Type, n1, n2 *gc.Node, likely int) *obj.Prog {
	if t.IsInteger() && n1.Op == gc.OLITERAL && n2.Op != gc.OLITERAL {
		// Reverse comparison to place constant last.
		op = gc.Brrev(op)
		n1, n2 = n2, n1
	}

	var r1, r2, g1, g2 gc.Node
	gc.Regalloc(&r1, t, n1)
	gc.Regalloc(&g1, n1.Type, &r1)
	gc.Cgen(n1, &g1)
	gmove(&g1, &r1)
	if t.IsInteger() && gc.Isconst(n2, gc.CTINT) {
		ginscon2(optoas(gc.OCMP, t), &r1, n2.Int64())
	} else {
		gc.Regalloc(&r2, t, n2)
		gc.Regalloc(&g2, n1.Type, &r2)
		gc.Cgen(n2, &g2)
		gmove(&g2, &r2)
		gcmp(optoas(gc.OCMP, t), &r1, &r2)
		gc.Regfree(&g2)
		gc.Regfree(&r2)
	}
	gc.Regfree(&g1)
	gc.Regfree(&r1)
	return gc.Gbranch(optoas(op, t), nil, likely)
}
Ejemplo n.º 5
0
func ginscmp(op gc.Op, t *gc.Type, n1, n2 *gc.Node, likely int) *obj.Prog {
	if t.IsInteger() && n1.Op == gc.OLITERAL && n1.Int64() == 0 && n2.Op != gc.OLITERAL {
		op = gc.Brrev(op)
		n1, n2 = n2, n1
	}
	var r1, r2, g1, g2 gc.Node
	gc.Regalloc(&r1, t, n1)
	gc.Regalloc(&g1, n1.Type, &r1)
	gc.Cgen(n1, &g1)
	gmove(&g1, &r1)
	if t.IsInteger() && n2.Op == gc.OLITERAL && n2.Int64() == 0 {
		gins(arm.ACMP, &r1, n2)
	} else {
		gc.Regalloc(&r2, t, n2)
		gc.Regalloc(&g2, n1.Type, &r2)
		gc.Cgen(n2, &g2)
		gmove(&g2, &r2)
		gins(optoas(gc.OCMP, t), &r1, &r2)
		gc.Regfree(&g2)
		gc.Regfree(&r2)
	}
	gc.Regfree(&g1)
	gc.Regfree(&r1)
	return gc.Gbranch(optoas(op, t), nil, likely)
}
Ejemplo n.º 6
0
/*
 * generate high multiply
 *  res = (nl * nr) >> wordsize
 */
func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) {
	if nl.Ullman < nr.Ullman {
		nl, nr = nr, nl
	}

	t := nl.Type
	w := t.Width * 8
	var n1 gc.Node
	gc.Regalloc(&n1, t, res)
	gc.Cgen(nl, &n1)
	var n2 gc.Node
	gc.Regalloc(&n2, t, nil)
	gc.Cgen(nr, &n2)
	switch gc.Simtype[t.Etype] {
	case gc.TINT8,
		gc.TINT16:
		gins(optoas(gc.OMUL, t), &n2, &n1)
		gshift(arm.AMOVW, &n1, arm.SHIFT_AR, int32(w), &n1)

	case gc.TUINT8,
		gc.TUINT16:
		gins(optoas(gc.OMUL, t), &n2, &n1)
		gshift(arm.AMOVW, &n1, arm.SHIFT_LR, int32(w), &n1)

		// perform a long multiplication.
	case gc.TINT32,
		gc.TUINT32:
		var p *obj.Prog
		if t.IsSigned() {
			p = gins(arm.AMULL, &n2, nil)
		} else {
			p = gins(arm.AMULLU, &n2, nil)
		}

		// n2 * n1 -> (n1 n2)
		p.Reg = n1.Reg

		p.To.Type = obj.TYPE_REGREG
		p.To.Reg = n1.Reg
		p.To.Offset = int64(n2.Reg)

	default:
		gc.Fatalf("cgen_hmul %v", t)
	}

	gc.Cgen(&n1, res)
	gc.Regfree(&n1)
	gc.Regfree(&n2)
}
Ejemplo n.º 7
0
/*
 * generate
 *	as $c, n
 */
func ginscon(as obj.As, c int64, n2 *gc.Node) {
	var n1 gc.Node

	switch as {
	case x86.AADDL,
		x86.AMOVL,
		x86.ALEAL:
		gc.Nodconst(&n1, gc.Types[gc.TINT32], c)

	default:
		gc.Nodconst(&n1, gc.Types[gc.TINT64], c)
	}

	if as != x86.AMOVQ && (c < -(1<<31) || c >= 1<<31) {
		// cannot have 64-bit immediate in ADD, etc.
		// instead, MOV into register first.
		var ntmp gc.Node
		gc.Regalloc(&ntmp, gc.Types[gc.TINT64], nil)

		gins(x86.AMOVQ, &n1, &ntmp)
		gins(as, &ntmp, n2)
		gc.Regfree(&ntmp)
		return
	}

	gins(as, &n1, n2)
}
Ejemplo n.º 8
0
/*
 * generate
 *	as n, $c (CMP/CMPU)
 */
func ginscon2(as obj.As, n2 *gc.Node, c int64) {
	var n1 gc.Node

	gc.Nodconst(&n1, gc.Types[gc.TINT64], c)

	switch as {
	default:
		gc.Fatalf("ginscon2")

	case ppc64.ACMP:
		if -ppc64.BIG <= c && c <= ppc64.BIG {
			rawgins(as, n2, &n1)
			return
		}

	case ppc64.ACMPU:
		if 0 <= c && c <= 2*ppc64.BIG {
			rawgins(as, n2, &n1)
			return
		}
	}

	// MOV n1 into register first
	var ntmp gc.Node
	gc.Regalloc(&ntmp, gc.Types[gc.TINT64], nil)

	rawgins(ppc64.AMOVD, &n1, &ntmp)
	rawgins(as, n2, &ntmp)
	gc.Regfree(&ntmp)
}
Ejemplo n.º 9
0
// RightShiftWithCarry generates a constant unsigned
// right shift with carry.
//
// res = n >> shift // with carry
func RightShiftWithCarry(n *gc.Node, shift uint, res *gc.Node) {
	// Extra 1 is for carry bit.
	maxshift := uint(n.Type.Width*8 + 1)
	if shift == 0 {
		gmove(n, res)
	} else if shift < maxshift {
		// 1. clear rightmost bit of target
		var n1 gc.Node
		gc.Nodconst(&n1, n.Type, 1)
		gins(optoas(gc.ORSH, n.Type), &n1, n)
		gins(optoas(gc.OLSH, n.Type), &n1, n)
		// 2. add carry flag to target
		var n2 gc.Node
		gc.Nodconst(&n1, n.Type, 0)
		gc.Regalloc(&n2, n.Type, nil)
		gins(optoas(gc.OAS, n.Type), &n1, &n2)
		gins(arm64.AADC, &n2, n)
		// 3. right rotate 1 bit
		gc.Nodconst(&n1, n.Type, 1)
		gins(arm64.AROR, &n1, n)

		// ARM64 backend doesn't eliminate shifts by 0. It is manually checked here.
		if shift > 1 {
			var n3 gc.Node
			gc.Nodconst(&n3, n.Type, int64(shift-1))
			cgen_shift(gc.ORSH, true, n, &n3, res)
		} else {
			gmove(n, res)
		}
		gc.Regfree(&n2)
	} else {
		gc.Fatalf("RightShiftWithCarry: shift(%v) is bigger than max size(%v)", shift, maxshift)
	}
}
Ejemplo n.º 10
0
func ginscmp(op gc.Op, t *gc.Type, n1, n2 *gc.Node, likely int) *obj.Prog {
	if t.IsInteger() || t.Etype == gc.Tptr {
		if (n1.Op == gc.OLITERAL || n1.Op == gc.OADDR && n1.Left.Op == gc.ONAME) && n2.Op != gc.OLITERAL {
			// Reverse comparison to place constant (including address constant) last.
			op = gc.Brrev(op)
			n1, n2 = n2, n1
		}
	}

	// General case.
	var r1, r2, g1, g2 gc.Node

	// A special case to make write barriers more efficient.
	// Comparing the first field of a named struct can be done directly.
	base := n1
	if n1.Op == gc.ODOT && n1.Left.Type.IsStruct() && n1.Left.Type.Field(0).Sym == n1.Sym {
		base = n1.Left
	}

	if base.Op == gc.ONAME && base.Class != gc.PAUTOHEAP || n1.Op == gc.OINDREG {
		r1 = *n1
	} else {
		gc.Regalloc(&r1, t, n1)
		gc.Regalloc(&g1, n1.Type, &r1)
		gc.Cgen(n1, &g1)
		gmove(&g1, &r1)
	}
	if n2.Op == gc.OLITERAL && t.IsInteger() || n2.Op == gc.OADDR && n2.Left.Op == gc.ONAME && n2.Left.Class == gc.PEXTERN {
		r2 = *n2
	} else {
		gc.Regalloc(&r2, t, n2)
		gc.Regalloc(&g2, n1.Type, &r2)
		gc.Cgen(n2, &g2)
		gmove(&g2, &r2)
	}
	gins(optoas(gc.OCMP, t), &r1, &r2)
	if r1.Op == gc.OREGISTER {
		gc.Regfree(&g1)
		gc.Regfree(&r1)
	}
	if r2.Op == gc.OREGISTER {
		gc.Regfree(&g2)
		gc.Regfree(&r2)
	}
	return gc.Gbranch(optoas(op, t), nil, likely)
}
Ejemplo n.º 11
0
/*
 * generate
 *	as $c, n
 */
func ginscon(as obj.As, c int64, n *gc.Node) {
	var n1 gc.Node
	gc.Nodconst(&n1, gc.Types[gc.TINT32], c)
	var n2 gc.Node
	gc.Regalloc(&n2, gc.Types[gc.TINT32], nil)
	gmove(&n1, &n2)
	gins(as, &n2, n)
	gc.Regfree(&n2)
}
Ejemplo n.º 12
0
/*
 * generate byte multiply:
 *	res = nl * nr
 * there is no 2-operand byte multiply instruction so
 * we do a full-width multiplication and truncate afterwards.
 */
func cgen_bmul(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) bool {
	if optoas(op, nl.Type) != x86.AIMULB {
		return false
	}

	// largest ullman on left.
	if nl.Ullman < nr.Ullman {
		nl, nr = nr, nl
	}

	// generate operands in "8-bit" registers.
	var n1b gc.Node
	gc.Regalloc(&n1b, nl.Type, res)

	gc.Cgen(nl, &n1b)
	var n2b gc.Node
	gc.Regalloc(&n2b, nr.Type, nil)
	gc.Cgen(nr, &n2b)

	// perform full-width multiplication.
	t := gc.Types[gc.TUINT64]

	if nl.Type.IsSigned() {
		t = gc.Types[gc.TINT64]
	}
	var n1 gc.Node
	gc.Nodreg(&n1, t, int(n1b.Reg))
	var n2 gc.Node
	gc.Nodreg(&n2, t, int(n2b.Reg))
	a := optoas(op, t)
	gins(a, &n2, &n1)

	// truncate.
	gmove(&n1, res)

	gc.Regfree(&n1b)
	gc.Regfree(&n2b)
	return true
}
Ejemplo n.º 13
0
// res = runtime.getg()
func getg(res *gc.Node) {
	var n1 gc.Node
	gc.Regalloc(&n1, res.Type, res)
	mov := optoas(gc.OAS, gc.Types[gc.Tptr])
	p := gins(mov, nil, &n1)
	p.From.Type = obj.TYPE_REG
	p.From.Reg = x86.REG_TLS
	p = gins(mov, nil, &n1)
	p.From = p.To
	p.From.Type = obj.TYPE_MEM
	p.From.Index = x86.REG_TLS
	p.From.Scale = 1
	gmove(&n1, res)
	gc.Regfree(&n1)
}
Ejemplo n.º 14
0
func savex(dr int, x *gc.Node, oldx *gc.Node, res *gc.Node, t *gc.Type) {
	r := gc.GetReg(dr)
	gc.Nodreg(x, gc.Types[gc.TINT32], dr)

	// save current ax and dx if they are live
	// and not the destination
	*oldx = gc.Node{}

	if r > 0 && !gc.Samereg(x, res) {
		gc.Tempname(oldx, gc.Types[gc.TINT32])
		gmove(x, oldx)
	}

	gc.Regalloc(x, t, x)
}
Ejemplo n.º 15
0
/*
 * register dr is one of the special ones (AX, CX, DI, SI, etc.).
 * we need to use it.  if it is already allocated as a temporary
 * (r > 1; can only happen if a routine like sgen passed a
 * special as cgen's res and then cgen used regalloc to reuse
 * it as its own temporary), then move it for now to another
 * register.  caller must call restx to move it back.
 * the move is not necessary if dr == res, because res is
 * known to be dead.
 */
func savex(dr int, x *gc.Node, oldx *gc.Node, res *gc.Node, t *gc.Type) {
	r := uint8(gc.GetReg(dr))

	// save current ax and dx if they are live
	// and not the destination
	*oldx = gc.Node{}

	gc.Nodreg(x, t, dr)
	if r > 1 && !gc.Samereg(x, res) {
		gc.Regalloc(oldx, gc.Types[gc.TINT64], nil)
		x.Type = gc.Types[gc.TINT64]
		gmove(x, oldx)
		x.Type = t
		// TODO(marvin): Fix Node.EType type union.
		oldx.Etype = gc.EType(r) // squirrel away old r value
		gc.SetReg(dr, 1)
	}
}
Ejemplo n.º 16
0
/*
 * generate
 *	as $c, n
 */
func ginscon(as obj.As, c int64, n2 *gc.Node) {
	var n1 gc.Node

	gc.Nodconst(&n1, gc.Types[gc.TINT64], c)

	if as != arm64.AMOVD && (c < -arm64.BIG || c > arm64.BIG) || as == arm64.AMUL || n2 != nil && n2.Op != gc.OREGISTER {
		// cannot have more than 16-bit of immediate in ADD, etc.
		// instead, MOV into register first.
		var ntmp gc.Node
		gc.Regalloc(&ntmp, gc.Types[gc.TINT64], nil)

		gins(arm64.AMOVD, &n1, &ntmp)
		gins(as, &ntmp, n2)
		gc.Regfree(&ntmp)
		return
	}

	rawgins(as, &n1, n2)
}
Ejemplo n.º 17
0
/*
 * generate
 *	as $c, n
 */
func ginscon(as obj.As, c int64, n2 *gc.Node) {
	var n1 gc.Node

	gc.Nodconst(&n1, gc.Types[gc.TINT64], c)

	if as != mips.AMOVV && (c < -mips.BIG || c > mips.BIG) || n2.Op != gc.OREGISTER || as == mips.AMUL || as == mips.AMULU || as == mips.AMULV || as == mips.AMULVU {
		// cannot have more than 16-bit of immediate in ADD, etc.
		// instead, MOV into register first.
		var ntmp gc.Node
		gc.Regalloc(&ntmp, gc.Types[gc.TINT64], nil)

		rawgins(mips.AMOVV, &n1, &ntmp)
		rawgins(as, &ntmp, n2)
		gc.Regfree(&ntmp)
		return
	}

	rawgins(as, &n1, n2)
}
Ejemplo n.º 18
0
/*
 * generate high multiply:
 *   res = (nl*nr) >> width
 */
func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) {
	var n1 gc.Node
	var n2 gc.Node

	t := nl.Type
	a := optoas(gc.OHMUL, t)

	// gen nl in n1.
	gc.Tempname(&n1, t)
	gc.Cgen(nl, &n1)

	// gen nr in n2.
	gc.Regalloc(&n2, t, res)
	gc.Cgen(nr, &n2)

	var ax, oldax, dx, olddx gc.Node
	savex(x86.REG_AX, &ax, &oldax, res, gc.Types[gc.TUINT32])
	savex(x86.REG_DX, &dx, &olddx, res, gc.Types[gc.TUINT32])

	gmove(&n2, &ax)
	gins(a, &n1, nil)
	gc.Regfree(&n2)

	if t.Width == 1 {
		// byte multiply behaves differently.
		var byteAH, byteDX gc.Node
		gc.Nodreg(&byteAH, t, x86.REG_AH)
		gc.Nodreg(&byteDX, t, x86.REG_DX)
		gmove(&byteAH, &byteDX)
	}

	gmove(&dx, res)

	restx(&ax, &oldax)
	restx(&dx, &olddx)
}
Ejemplo n.º 19
0
func gmove(f *gc.Node, t *gc.Node) {
	if gc.Debug['M'] != 0 {
		fmt.Printf("gmove %v -> %v\n", f, t)
	}

	ft := gc.Simsimtype(f.Type)
	tt := gc.Simsimtype(t.Type)
	cvt := t.Type

	if gc.Iscomplex[ft] || gc.Iscomplex[tt] {
		gc.Complexmove(f, t)
		return
	}

	// cannot have two memory operands;
	// except 64-bit, which always copies via registers anyway.
	var a obj.As
	var r1 gc.Node
	if !gc.Is64(f.Type) && !gc.Is64(t.Type) && gc.Ismem(f) && gc.Ismem(t) {
		goto hard
	}

	// convert constant to desired type
	if f.Op == gc.OLITERAL {
		var con gc.Node
		switch tt {
		default:
			f.Convconst(&con, t.Type)

		case gc.TINT16,
			gc.TINT8:
			var con gc.Node
			f.Convconst(&con, gc.Types[gc.TINT32])
			var r1 gc.Node
			gc.Regalloc(&r1, con.Type, t)
			gins(arm.AMOVW, &con, &r1)
			gmove(&r1, t)
			gc.Regfree(&r1)
			return

		case gc.TUINT16,
			gc.TUINT8:
			var con gc.Node
			f.Convconst(&con, gc.Types[gc.TUINT32])
			var r1 gc.Node
			gc.Regalloc(&r1, con.Type, t)
			gins(arm.AMOVW, &con, &r1)
			gmove(&r1, t)
			gc.Regfree(&r1)
			return
		}

		f = &con
		ft = gc.Simsimtype(con.Type)

		// constants can't move directly to memory
		if gc.Ismem(t) && !gc.Is64(t.Type) {
			goto hard
		}
	}

	// value -> value copy, only one memory operand.
	// figure out the instruction to use.
	// break out of switch for one-instruction gins.
	// goto rdst for "destination must be register".
	// goto hard for "convert to cvt type first".
	// otherwise handle and return.

	switch uint32(ft)<<16 | uint32(tt) {
	default:
		// should not happen
		gc.Fatalf("gmove %v -> %v", f, t)
		return

		/*
		 * integer copy and truncate
		 */
	case gc.TINT8<<16 | gc.TINT8: // same size
		if !gc.Ismem(f) {
			a = arm.AMOVB
			break
		}
		fallthrough

	case gc.TUINT8<<16 | gc.TINT8,
		gc.TINT16<<16 | gc.TINT8, // truncate
		gc.TUINT16<<16 | gc.TINT8,
		gc.TINT32<<16 | gc.TINT8,
		gc.TUINT32<<16 | gc.TINT8:
		a = arm.AMOVBS

	case gc.TUINT8<<16 | gc.TUINT8:
		if !gc.Ismem(f) {
			a = arm.AMOVB
			break
		}
		fallthrough

	case gc.TINT8<<16 | gc.TUINT8,
		gc.TINT16<<16 | gc.TUINT8,
		gc.TUINT16<<16 | gc.TUINT8,
		gc.TINT32<<16 | gc.TUINT8,
		gc.TUINT32<<16 | gc.TUINT8:
		a = arm.AMOVBU

	case gc.TINT64<<16 | gc.TINT8, // truncate low word
		gc.TUINT64<<16 | gc.TINT8:
		a = arm.AMOVBS

		goto trunc64

	case gc.TINT64<<16 | gc.TUINT8,
		gc.TUINT64<<16 | gc.TUINT8:
		a = arm.AMOVBU
		goto trunc64

	case gc.TINT16<<16 | gc.TINT16: // same size
		if !gc.Ismem(f) {
			a = arm.AMOVH
			break
		}
		fallthrough

	case gc.TUINT16<<16 | gc.TINT16,
		gc.TINT32<<16 | gc.TINT16, // truncate
		gc.TUINT32<<16 | gc.TINT16:
		a = arm.AMOVHS

	case gc.TUINT16<<16 | gc.TUINT16:
		if !gc.Ismem(f) {
			a = arm.AMOVH
			break
		}
		fallthrough

	case gc.TINT16<<16 | gc.TUINT16,
		gc.TINT32<<16 | gc.TUINT16,
		gc.TUINT32<<16 | gc.TUINT16:
		a = arm.AMOVHU

	case gc.TINT64<<16 | gc.TINT16, // truncate low word
		gc.TUINT64<<16 | gc.TINT16:
		a = arm.AMOVHS

		goto trunc64

	case gc.TINT64<<16 | gc.TUINT16,
		gc.TUINT64<<16 | gc.TUINT16:
		a = arm.AMOVHU
		goto trunc64

	case gc.TINT32<<16 | gc.TINT32, // same size
		gc.TINT32<<16 | gc.TUINT32,
		gc.TUINT32<<16 | gc.TINT32,
		gc.TUINT32<<16 | gc.TUINT32:
		a = arm.AMOVW

	case gc.TINT64<<16 | gc.TINT32, // truncate
		gc.TUINT64<<16 | gc.TINT32,
		gc.TINT64<<16 | gc.TUINT32,
		gc.TUINT64<<16 | gc.TUINT32:
		var flo gc.Node
		var fhi gc.Node
		split64(f, &flo, &fhi)

		var r1 gc.Node
		gc.Regalloc(&r1, t.Type, nil)
		gins(arm.AMOVW, &flo, &r1)
		gins(arm.AMOVW, &r1, t)
		gc.Regfree(&r1)
		splitclean()
		return

	case gc.TINT64<<16 | gc.TINT64, // same size
		gc.TINT64<<16 | gc.TUINT64,
		gc.TUINT64<<16 | gc.TINT64,
		gc.TUINT64<<16 | gc.TUINT64:
		var fhi gc.Node
		var flo gc.Node
		split64(f, &flo, &fhi)

		var tlo gc.Node
		var thi gc.Node
		split64(t, &tlo, &thi)
		var r1 gc.Node
		gc.Regalloc(&r1, flo.Type, nil)
		var r2 gc.Node
		gc.Regalloc(&r2, fhi.Type, nil)
		gins(arm.AMOVW, &flo, &r1)
		gins(arm.AMOVW, &fhi, &r2)
		gins(arm.AMOVW, &r1, &tlo)
		gins(arm.AMOVW, &r2, &thi)
		gc.Regfree(&r1)
		gc.Regfree(&r2)
		splitclean()
		splitclean()
		return

		/*
		 * integer up-conversions
		 */
	case gc.TINT8<<16 | gc.TINT16, // sign extend int8
		gc.TINT8<<16 | gc.TUINT16,
		gc.TINT8<<16 | gc.TINT32,
		gc.TINT8<<16 | gc.TUINT32:
		a = arm.AMOVBS

		goto rdst

	case gc.TINT8<<16 | gc.TINT64, // convert via int32
		gc.TINT8<<16 | gc.TUINT64:
		cvt = gc.Types[gc.TINT32]

		goto hard

	case gc.TUINT8<<16 | gc.TINT16, // zero extend uint8
		gc.TUINT8<<16 | gc.TUINT16,
		gc.TUINT8<<16 | gc.TINT32,
		gc.TUINT8<<16 | gc.TUINT32:
		a = arm.AMOVBU

		goto rdst

	case gc.TUINT8<<16 | gc.TINT64, // convert via uint32
		gc.TUINT8<<16 | gc.TUINT64:
		cvt = gc.Types[gc.TUINT32]

		goto hard

	case gc.TINT16<<16 | gc.TINT32, // sign extend int16
		gc.TINT16<<16 | gc.TUINT32:
		a = arm.AMOVHS

		goto rdst

	case gc.TINT16<<16 | gc.TINT64, // convert via int32
		gc.TINT16<<16 | gc.TUINT64:
		cvt = gc.Types[gc.TINT32]

		goto hard

	case gc.TUINT16<<16 | gc.TINT32, // zero extend uint16
		gc.TUINT16<<16 | gc.TUINT32:
		a = arm.AMOVHU

		goto rdst

	case gc.TUINT16<<16 | gc.TINT64, // convert via uint32
		gc.TUINT16<<16 | gc.TUINT64:
		cvt = gc.Types[gc.TUINT32]

		goto hard

	case gc.TINT32<<16 | gc.TINT64, // sign extend int32
		gc.TINT32<<16 | gc.TUINT64:
		var tlo gc.Node
		var thi gc.Node
		split64(t, &tlo, &thi)

		var r1 gc.Node
		gc.Regalloc(&r1, tlo.Type, nil)
		var r2 gc.Node
		gc.Regalloc(&r2, thi.Type, nil)
		gmove(f, &r1)
		p1 := gins(arm.AMOVW, &r1, &r2)
		p1.From.Type = obj.TYPE_SHIFT
		p1.From.Offset = 2<<5 | 31<<7 | int64(r1.Reg)&15 // r1->31
		p1.From.Reg = 0

		//print("gmove: %v\n", p1);
		gins(arm.AMOVW, &r1, &tlo)

		gins(arm.AMOVW, &r2, &thi)
		gc.Regfree(&r1)
		gc.Regfree(&r2)
		splitclean()
		return

	case gc.TUINT32<<16 | gc.TINT64, // zero extend uint32
		gc.TUINT32<<16 | gc.TUINT64:
		var thi gc.Node
		var tlo gc.Node
		split64(t, &tlo, &thi)

		gmove(f, &tlo)
		var r1 gc.Node
		gc.Regalloc(&r1, thi.Type, nil)
		gins(arm.AMOVW, ncon(0), &r1)
		gins(arm.AMOVW, &r1, &thi)
		gc.Regfree(&r1)
		splitclean()
		return

		//	case CASE(TFLOAT64, TUINT64):
	/*
	* float to integer
	 */
	case gc.TFLOAT32<<16 | gc.TINT8,
		gc.TFLOAT32<<16 | gc.TUINT8,
		gc.TFLOAT32<<16 | gc.TINT16,
		gc.TFLOAT32<<16 | gc.TUINT16,
		gc.TFLOAT32<<16 | gc.TINT32,
		gc.TFLOAT32<<16 | gc.TUINT32,

		//	case CASE(TFLOAT32, TUINT64):

		gc.TFLOAT64<<16 | gc.TINT8,
		gc.TFLOAT64<<16 | gc.TUINT8,
		gc.TFLOAT64<<16 | gc.TINT16,
		gc.TFLOAT64<<16 | gc.TUINT16,
		gc.TFLOAT64<<16 | gc.TINT32,
		gc.TFLOAT64<<16 | gc.TUINT32:
		fa := arm.AMOVF

		a := arm.AMOVFW
		if ft == gc.TFLOAT64 {
			fa = arm.AMOVD
			a = arm.AMOVDW
		}

		ta := arm.AMOVW
		switch tt {
		case gc.TINT8:
			ta = arm.AMOVBS

		case gc.TUINT8:
			ta = arm.AMOVBU

		case gc.TINT16:
			ta = arm.AMOVHS

		case gc.TUINT16:
			ta = arm.AMOVHU
		}

		var r1 gc.Node
		gc.Regalloc(&r1, gc.Types[ft], f)
		var r2 gc.Node
		gc.Regalloc(&r2, gc.Types[tt], t)
		gins(fa, f, &r1)        // load to fpu
		p1 := gins(a, &r1, &r1) // convert to w
		switch tt {
		case gc.TUINT8,
			gc.TUINT16,
			gc.TUINT32:
			p1.Scond |= arm.C_UBIT
		}

		gins(arm.AMOVW, &r1, &r2) // copy to cpu
		gins(ta, &r2, t)          // store
		gc.Regfree(&r1)
		gc.Regfree(&r2)
		return

		/*
		 * integer to float
		 */
	case gc.TINT8<<16 | gc.TFLOAT32,
		gc.TUINT8<<16 | gc.TFLOAT32,
		gc.TINT16<<16 | gc.TFLOAT32,
		gc.TUINT16<<16 | gc.TFLOAT32,
		gc.TINT32<<16 | gc.TFLOAT32,
		gc.TUINT32<<16 | gc.TFLOAT32,
		gc.TINT8<<16 | gc.TFLOAT64,
		gc.TUINT8<<16 | gc.TFLOAT64,
		gc.TINT16<<16 | gc.TFLOAT64,
		gc.TUINT16<<16 | gc.TFLOAT64,
		gc.TINT32<<16 | gc.TFLOAT64,
		gc.TUINT32<<16 | gc.TFLOAT64:
		fa := arm.AMOVW

		switch ft {
		case gc.TINT8:
			fa = arm.AMOVBS

		case gc.TUINT8:
			fa = arm.AMOVBU

		case gc.TINT16:
			fa = arm.AMOVHS

		case gc.TUINT16:
			fa = arm.AMOVHU
		}

		a := arm.AMOVWF
		ta := arm.AMOVF
		if tt == gc.TFLOAT64 {
			a = arm.AMOVWD
			ta = arm.AMOVD
		}

		var r1 gc.Node
		gc.Regalloc(&r1, gc.Types[ft], f)
		var r2 gc.Node
		gc.Regalloc(&r2, gc.Types[tt], t)
		gins(fa, f, &r1)          // load to cpu
		gins(arm.AMOVW, &r1, &r2) // copy to fpu
		p1 := gins(a, &r2, &r2)   // convert
		switch ft {
		case gc.TUINT8,
			gc.TUINT16,
			gc.TUINT32:
			p1.Scond |= arm.C_UBIT
		}

		gins(ta, &r2, t) // store
		gc.Regfree(&r1)
		gc.Regfree(&r2)
		return

	case gc.TUINT64<<16 | gc.TFLOAT32,
		gc.TUINT64<<16 | gc.TFLOAT64:
		gc.Fatalf("gmove UINT64, TFLOAT not implemented")
		return

		/*
		 * float to float
		 */
	case gc.TFLOAT32<<16 | gc.TFLOAT32:
		a = arm.AMOVF

	case gc.TFLOAT64<<16 | gc.TFLOAT64:
		a = arm.AMOVD

	case gc.TFLOAT32<<16 | gc.TFLOAT64:
		var r1 gc.Node
		gc.Regalloc(&r1, gc.Types[gc.TFLOAT64], t)
		gins(arm.AMOVF, f, &r1)
		gins(arm.AMOVFD, &r1, &r1)
		gins(arm.AMOVD, &r1, t)
		gc.Regfree(&r1)
		return

	case gc.TFLOAT64<<16 | gc.TFLOAT32:
		var r1 gc.Node
		gc.Regalloc(&r1, gc.Types[gc.TFLOAT64], t)
		gins(arm.AMOVD, f, &r1)
		gins(arm.AMOVDF, &r1, &r1)
		gins(arm.AMOVF, &r1, t)
		gc.Regfree(&r1)
		return
	}

	gins(a, f, t)
	return

	// TODO(kaib): we almost always require a register dest anyway, this can probably be
	// removed.
	// requires register destination
rdst:
	{
		gc.Regalloc(&r1, t.Type, t)

		gins(a, f, &r1)
		gmove(&r1, t)
		gc.Regfree(&r1)
		return
	}

	// requires register intermediate
hard:
	gc.Regalloc(&r1, cvt, t)

	gmove(f, &r1)
	gmove(&r1, t)
	gc.Regfree(&r1)
	return

	// truncate 64 bit integer
trunc64:
	var fhi gc.Node
	var flo gc.Node
	split64(f, &flo, &fhi)

	gc.Regalloc(&r1, t.Type, nil)
	gins(a, &flo, &r1)
	gins(a, &r1, t)
	gc.Regfree(&r1)
	splitclean()
	return
}
Ejemplo n.º 20
0
/*
 * generate code to compute address of n,
 * a reference to a (perhaps nested) field inside
 * an array or struct.
 * return 0 on failure, 1 on success.
 * on success, leaves usable address in a.
 *
 * caller is responsible for calling sudoclean
 * after successful sudoaddable,
 * to release the register used for a.
 */
func sudoaddable(as obj.As, n *gc.Node, a *obj.Addr) bool {
	if n.Type == nil {
		return false
	}

	*a = obj.Addr{}

	switch n.Op {
	case gc.OLITERAL:
		if !gc.Isconst(n, gc.CTINT) {
			break
		}
		v := n.Int64()
		if v >= 32000 || v <= -32000 {
			break
		}
		switch as {
		default:
			return false

		case arm.AADD,
			arm.ASUB,
			arm.AAND,
			arm.AORR,
			arm.AEOR,
			arm.AMOVB,
			arm.AMOVBS,
			arm.AMOVBU,
			arm.AMOVH,
			arm.AMOVHS,
			arm.AMOVHU,
			arm.AMOVW:
			break
		}

		cleani += 2
		reg := &clean[cleani-1]
		reg1 := &clean[cleani-2]
		reg.Op = gc.OEMPTY
		reg1.Op = gc.OEMPTY
		gc.Naddr(a, n)
		return true

	case gc.ODOT,
		gc.ODOTPTR:
		cleani += 2
		reg := &clean[cleani-1]
		reg1 := &clean[cleani-2]
		reg.Op = gc.OEMPTY
		reg1.Op = gc.OEMPTY
		var nn *gc.Node
		var oary [10]int64
		o := gc.Dotoffset(n, oary[:], &nn)
		if nn == nil {
			sudoclean()
			return false
		}

		if nn.Addable && o == 1 && oary[0] >= 0 {
			// directly addressable set of DOTs
			n1 := *nn

			n1.Type = n.Type
			n1.Xoffset += oary[0]
			gc.Naddr(a, &n1)
			return true
		}

		gc.Regalloc(reg, gc.Types[gc.Tptr], nil)
		n1 := *reg
		n1.Op = gc.OINDREG
		if oary[0] >= 0 {
			gc.Agen(nn, reg)
			n1.Xoffset = oary[0]
		} else {
			gc.Cgen(nn, reg)
			gc.Cgen_checknil(reg)
			n1.Xoffset = -(oary[0] + 1)
		}

		for i := 1; i < o; i++ {
			if oary[i] >= 0 {
				gc.Fatalf("can't happen")
			}
			gins(arm.AMOVW, &n1, reg)
			gc.Cgen_checknil(reg)
			n1.Xoffset = -(oary[i] + 1)
		}

		a.Type = obj.TYPE_NONE
		a.Name = obj.NAME_NONE
		n1.Type = n.Type
		gc.Naddr(a, &n1)
		return true

	case gc.OINDEX:
		return false
	}

	return false
}
Ejemplo n.º 21
0
/*
 * generate comparison of nl, nr, both 64-bit.
 * nl is memory; nr is constant or memory.
 */
func cmp64(nl *gc.Node, nr *gc.Node, op gc.Op, likely int, to *obj.Prog) {
	var lo1 gc.Node
	var hi1 gc.Node
	var lo2 gc.Node
	var hi2 gc.Node
	var rr gc.Node

	split64(nl, &lo1, &hi1)
	split64(nr, &lo2, &hi2)

	// compare most significant word;
	// if they differ, we're done.
	t := hi1.Type

	if nl.Op == gc.OLITERAL || nr.Op == gc.OLITERAL {
		gins(x86.ACMPL, &hi1, &hi2)
	} else {
		gc.Regalloc(&rr, gc.Types[gc.TINT32], nil)
		gins(x86.AMOVL, &hi1, &rr)
		gins(x86.ACMPL, &rr, &hi2)
		gc.Regfree(&rr)
	}

	var br *obj.Prog
	switch op {
	default:
		gc.Fatalf("cmp64 %v %v", op, t)

		// cmp hi
	// jne L
	// cmp lo
	// jeq to
	// L:
	case gc.OEQ:
		br = gc.Gbranch(x86.AJNE, nil, -likely)

		// cmp hi
	// jne to
	// cmp lo
	// jne to
	case gc.ONE:
		gc.Patch(gc.Gbranch(x86.AJNE, nil, likely), to)

		// cmp hi
	// jgt to
	// jlt L
	// cmp lo
	// jge to (or jgt to)
	// L:
	case gc.OGE,
		gc.OGT:
		gc.Patch(gc.Gbranch(optoas(gc.OGT, t), nil, likely), to)

		br = gc.Gbranch(optoas(gc.OLT, t), nil, -likely)

		// cmp hi
	// jlt to
	// jgt L
	// cmp lo
	// jle to (or jlt to)
	// L:
	case gc.OLE,
		gc.OLT:
		gc.Patch(gc.Gbranch(optoas(gc.OLT, t), nil, likely), to)

		br = gc.Gbranch(optoas(gc.OGT, t), nil, -likely)
	}

	// compare least significant word
	t = lo1.Type

	if nl.Op == gc.OLITERAL || nr.Op == gc.OLITERAL {
		gins(x86.ACMPL, &lo1, &lo2)
	} else {
		gc.Regalloc(&rr, gc.Types[gc.TINT32], nil)
		gins(x86.AMOVL, &lo1, &rr)
		gins(x86.ACMPL, &rr, &lo2)
		gc.Regfree(&rr)
	}

	// jump again
	gc.Patch(gc.Gbranch(optoas(op, t), nil, likely), to)

	// point first branch down here if appropriate
	if br != nil {
		gc.Patch(br, gc.Pc)
	}

	splitclean()
	splitclean()
}
Ejemplo n.º 22
0
/*
 * attempt to generate 64-bit
 *	res = n
 * return 1 on success, 0 if op not handled.
 */
func cgen64(n *gc.Node, res *gc.Node) {
	if res.Op != gc.OINDREG && res.Op != gc.ONAME {
		gc.Dump("n", n)
		gc.Dump("res", res)
		gc.Fatalf("cgen64 %v of %v", n.Op, res.Op)
	}

	switch n.Op {
	default:
		gc.Fatalf("cgen64 %v", n.Op)

	case gc.OMINUS:
		gc.Cgen(n.Left, res)
		var hi1 gc.Node
		var lo1 gc.Node
		split64(res, &lo1, &hi1)
		gins(x86.ANEGL, nil, &lo1)
		gins(x86.AADCL, ncon(0), &hi1)
		gins(x86.ANEGL, nil, &hi1)
		splitclean()
		return

	case gc.OCOM:
		gc.Cgen(n.Left, res)
		var lo1 gc.Node
		var hi1 gc.Node
		split64(res, &lo1, &hi1)
		gins(x86.ANOTL, nil, &lo1)
		gins(x86.ANOTL, nil, &hi1)
		splitclean()
		return

		// binary operators.
	// common setup below.
	case gc.OADD,
		gc.OSUB,
		gc.OMUL,
		gc.OLROT,
		gc.OLSH,
		gc.ORSH,
		gc.OAND,
		gc.OOR,
		gc.OXOR:
		break
	}

	l := n.Left
	r := n.Right
	if !l.Addable {
		var t1 gc.Node
		gc.Tempname(&t1, l.Type)
		gc.Cgen(l, &t1)
		l = &t1
	}

	if r != nil && !r.Addable {
		var t2 gc.Node
		gc.Tempname(&t2, r.Type)
		gc.Cgen(r, &t2)
		r = &t2
	}

	var ax gc.Node
	gc.Nodreg(&ax, gc.Types[gc.TINT32], x86.REG_AX)
	var cx gc.Node
	gc.Nodreg(&cx, gc.Types[gc.TINT32], x86.REG_CX)
	var dx gc.Node
	gc.Nodreg(&dx, gc.Types[gc.TINT32], x86.REG_DX)

	// Setup for binary operation.
	var hi1 gc.Node
	var lo1 gc.Node
	split64(l, &lo1, &hi1)

	var lo2 gc.Node
	var hi2 gc.Node
	if gc.Is64(r.Type) {
		split64(r, &lo2, &hi2)
	}

	// Do op. Leave result in DX:AX.
	switch n.Op {
	// TODO: Constants
	case gc.OADD:
		gins(x86.AMOVL, &lo1, &ax)

		gins(x86.AMOVL, &hi1, &dx)
		gins(x86.AADDL, &lo2, &ax)
		gins(x86.AADCL, &hi2, &dx)

		// TODO: Constants.
	case gc.OSUB:
		gins(x86.AMOVL, &lo1, &ax)

		gins(x86.AMOVL, &hi1, &dx)
		gins(x86.ASUBL, &lo2, &ax)
		gins(x86.ASBBL, &hi2, &dx)

	case gc.OMUL:
		// let's call the next three EX, FX and GX
		var ex, fx, gx gc.Node
		gc.Regalloc(&ex, gc.Types[gc.TPTR32], nil)
		gc.Regalloc(&fx, gc.Types[gc.TPTR32], nil)
		gc.Regalloc(&gx, gc.Types[gc.TPTR32], nil)

		// load args into DX:AX and EX:GX.
		gins(x86.AMOVL, &lo1, &ax)

		gins(x86.AMOVL, &hi1, &dx)
		gins(x86.AMOVL, &lo2, &gx)
		gins(x86.AMOVL, &hi2, &ex)

		// if DX and EX are zero, use 32 x 32 -> 64 unsigned multiply.
		gins(x86.AMOVL, &dx, &fx)

		gins(x86.AORL, &ex, &fx)
		p1 := gc.Gbranch(x86.AJNE, nil, 0)
		gins(x86.AMULL, &gx, nil) // implicit &ax
		p2 := gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p1, gc.Pc)

		// full 64x64 -> 64, from 32x32 -> 64.
		gins(x86.AIMULL, &gx, &dx)

		gins(x86.AMOVL, &ax, &fx)
		gins(x86.AIMULL, &ex, &fx)
		gins(x86.AADDL, &dx, &fx)
		gins(x86.AMOVL, &gx, &dx)
		gins(x86.AMULL, &dx, nil) // implicit &ax
		gins(x86.AADDL, &fx, &dx)
		gc.Patch(p2, gc.Pc)

		gc.Regfree(&ex)
		gc.Regfree(&fx)
		gc.Regfree(&gx)

	// We only rotate by a constant c in [0,64).
	// if c >= 32:
	//	lo, hi = hi, lo
	//	c -= 32
	// if c == 0:
	//	no-op
	// else:
	//	t = hi
	//	shld hi:lo, c
	//	shld lo:t, c
	case gc.OLROT:
		v := uint64(r.Int64())

		if v >= 32 {
			// reverse during load to do the first 32 bits of rotate
			v -= 32

			gins(x86.AMOVL, &lo1, &dx)
			gins(x86.AMOVL, &hi1, &ax)
		} else {
			gins(x86.AMOVL, &lo1, &ax)
			gins(x86.AMOVL, &hi1, &dx)
		}

		if v == 0 {
		} else // done
		{
			gins(x86.AMOVL, &dx, &cx)
			p1 := gins(x86.ASHLL, ncon(uint32(v)), &dx)
			p1.From.Index = x86.REG_AX // double-width shift
			p1.From.Scale = 0
			p1 = gins(x86.ASHLL, ncon(uint32(v)), &ax)
			p1.From.Index = x86.REG_CX // double-width shift
			p1.From.Scale = 0
		}

	case gc.OLSH:
		if r.Op == gc.OLITERAL {
			v := uint64(r.Int64())
			if v >= 64 {
				if gc.Is64(r.Type) {
					splitclean()
				}
				splitclean()
				split64(res, &lo2, &hi2)
				gins(x86.AMOVL, ncon(0), &lo2)
				gins(x86.AMOVL, ncon(0), &hi2)
				splitclean()
				return
			}

			if v >= 32 {
				if gc.Is64(r.Type) {
					splitclean()
				}
				split64(res, &lo2, &hi2)
				gmove(&lo1, &hi2)
				if v > 32 {
					gins(x86.ASHLL, ncon(uint32(v-32)), &hi2)
				}

				gins(x86.AMOVL, ncon(0), &lo2)
				splitclean()
				splitclean()
				return
			}

			// general shift
			gins(x86.AMOVL, &lo1, &ax)

			gins(x86.AMOVL, &hi1, &dx)
			p1 := gins(x86.ASHLL, ncon(uint32(v)), &dx)
			p1.From.Index = x86.REG_AX // double-width shift
			p1.From.Scale = 0
			gins(x86.ASHLL, ncon(uint32(v)), &ax)
			break
		}

		// load value into DX:AX.
		gins(x86.AMOVL, &lo1, &ax)

		gins(x86.AMOVL, &hi1, &dx)

		// load shift value into register.
		// if high bits are set, zero value.
		var p1 *obj.Prog

		if gc.Is64(r.Type) {
			gins(x86.ACMPL, &hi2, ncon(0))
			p1 = gc.Gbranch(x86.AJNE, nil, +1)
			gins(x86.AMOVL, &lo2, &cx)
		} else {
			cx.Type = gc.Types[gc.TUINT32]
			gmove(r, &cx)
		}

		// if shift count is >=64, zero value
		gins(x86.ACMPL, &cx, ncon(64))

		p2 := gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
		if p1 != nil {
			gc.Patch(p1, gc.Pc)
		}
		gins(x86.AXORL, &dx, &dx)
		gins(x86.AXORL, &ax, &ax)
		gc.Patch(p2, gc.Pc)

		// if shift count is >= 32, zero low.
		gins(x86.ACMPL, &cx, ncon(32))

		p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
		gins(x86.AMOVL, &ax, &dx)
		gins(x86.ASHLL, &cx, &dx) // SHLL only uses bottom 5 bits of count
		gins(x86.AXORL, &ax, &ax)
		p2 = gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p1, gc.Pc)

		// general shift
		p1 = gins(x86.ASHLL, &cx, &dx)

		p1.From.Index = x86.REG_AX // double-width shift
		p1.From.Scale = 0
		gins(x86.ASHLL, &cx, &ax)
		gc.Patch(p2, gc.Pc)

	case gc.ORSH:
		if r.Op == gc.OLITERAL {
			v := uint64(r.Int64())
			if v >= 64 {
				if gc.Is64(r.Type) {
					splitclean()
				}
				splitclean()
				split64(res, &lo2, &hi2)
				if hi1.Type.Etype == gc.TINT32 {
					gmove(&hi1, &lo2)
					gins(x86.ASARL, ncon(31), &lo2)
					gmove(&hi1, &hi2)
					gins(x86.ASARL, ncon(31), &hi2)
				} else {
					gins(x86.AMOVL, ncon(0), &lo2)
					gins(x86.AMOVL, ncon(0), &hi2)
				}

				splitclean()
				return
			}

			if v >= 32 {
				if gc.Is64(r.Type) {
					splitclean()
				}
				split64(res, &lo2, &hi2)
				gmove(&hi1, &lo2)
				if v > 32 {
					gins(optoas(gc.ORSH, hi1.Type), ncon(uint32(v-32)), &lo2)
				}
				if hi1.Type.Etype == gc.TINT32 {
					gmove(&hi1, &hi2)
					gins(x86.ASARL, ncon(31), &hi2)
				} else {
					gins(x86.AMOVL, ncon(0), &hi2)
				}
				splitclean()
				splitclean()
				return
			}

			// general shift
			gins(x86.AMOVL, &lo1, &ax)

			gins(x86.AMOVL, &hi1, &dx)
			p1 := gins(x86.ASHRL, ncon(uint32(v)), &ax)
			p1.From.Index = x86.REG_DX // double-width shift
			p1.From.Scale = 0
			gins(optoas(gc.ORSH, hi1.Type), ncon(uint32(v)), &dx)
			break
		}

		// load value into DX:AX.
		gins(x86.AMOVL, &lo1, &ax)

		gins(x86.AMOVL, &hi1, &dx)

		// load shift value into register.
		// if high bits are set, zero value.
		var p1 *obj.Prog

		if gc.Is64(r.Type) {
			gins(x86.ACMPL, &hi2, ncon(0))
			p1 = gc.Gbranch(x86.AJNE, nil, +1)
			gins(x86.AMOVL, &lo2, &cx)
		} else {
			cx.Type = gc.Types[gc.TUINT32]
			gmove(r, &cx)
		}

		// if shift count is >=64, zero or sign-extend value
		gins(x86.ACMPL, &cx, ncon(64))

		p2 := gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
		if p1 != nil {
			gc.Patch(p1, gc.Pc)
		}
		if hi1.Type.Etype == gc.TINT32 {
			gins(x86.ASARL, ncon(31), &dx)
			gins(x86.AMOVL, &dx, &ax)
		} else {
			gins(x86.AXORL, &dx, &dx)
			gins(x86.AXORL, &ax, &ax)
		}

		gc.Patch(p2, gc.Pc)

		// if shift count is >= 32, sign-extend hi.
		gins(x86.ACMPL, &cx, ncon(32))

		p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
		gins(x86.AMOVL, &dx, &ax)
		if hi1.Type.Etype == gc.TINT32 {
			gins(x86.ASARL, &cx, &ax) // SARL only uses bottom 5 bits of count
			gins(x86.ASARL, ncon(31), &dx)
		} else {
			gins(x86.ASHRL, &cx, &ax)
			gins(x86.AXORL, &dx, &dx)
		}

		p2 = gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p1, gc.Pc)

		// general shift
		p1 = gins(x86.ASHRL, &cx, &ax)

		p1.From.Index = x86.REG_DX // double-width shift
		p1.From.Scale = 0
		gins(optoas(gc.ORSH, hi1.Type), &cx, &dx)
		gc.Patch(p2, gc.Pc)

		// make constant the right side (it usually is anyway).
	case gc.OXOR,
		gc.OAND,
		gc.OOR:
		if lo1.Op == gc.OLITERAL {
			nswap(&lo1, &lo2)
			nswap(&hi1, &hi2)
		}

		if lo2.Op == gc.OLITERAL {
			// special cases for constants.
			lv := uint32(lo2.Int64())
			hv := uint32(hi2.Int64())
			splitclean() // right side
			split64(res, &lo2, &hi2)
			switch n.Op {
			case gc.OXOR:
				gmove(&lo1, &lo2)
				gmove(&hi1, &hi2)
				switch lv {
				case 0:
					break

				case 0xffffffff:
					gins(x86.ANOTL, nil, &lo2)

				default:
					gins(x86.AXORL, ncon(lv), &lo2)
				}

				switch hv {
				case 0:
					break

				case 0xffffffff:
					gins(x86.ANOTL, nil, &hi2)

				default:
					gins(x86.AXORL, ncon(hv), &hi2)
				}

			case gc.OAND:
				switch lv {
				case 0:
					gins(x86.AMOVL, ncon(0), &lo2)

				default:
					gmove(&lo1, &lo2)
					if lv != 0xffffffff {
						gins(x86.AANDL, ncon(lv), &lo2)
					}
				}

				switch hv {
				case 0:
					gins(x86.AMOVL, ncon(0), &hi2)

				default:
					gmove(&hi1, &hi2)
					if hv != 0xffffffff {
						gins(x86.AANDL, ncon(hv), &hi2)
					}
				}

			case gc.OOR:
				switch lv {
				case 0:
					gmove(&lo1, &lo2)

				case 0xffffffff:
					gins(x86.AMOVL, ncon(0xffffffff), &lo2)

				default:
					gmove(&lo1, &lo2)
					gins(x86.AORL, ncon(lv), &lo2)
				}

				switch hv {
				case 0:
					gmove(&hi1, &hi2)

				case 0xffffffff:
					gins(x86.AMOVL, ncon(0xffffffff), &hi2)

				default:
					gmove(&hi1, &hi2)
					gins(x86.AORL, ncon(hv), &hi2)
				}
			}

			splitclean()
			splitclean()
			return
		}

		gins(x86.AMOVL, &lo1, &ax)
		gins(x86.AMOVL, &hi1, &dx)
		gins(optoas(n.Op, lo1.Type), &lo2, &ax)
		gins(optoas(n.Op, lo1.Type), &hi2, &dx)
	}

	if gc.Is64(r.Type) {
		splitclean()
	}
	splitclean()

	split64(res, &lo1, &hi1)
	gins(x86.AMOVL, &ax, &lo1)
	gins(x86.AMOVL, &dx, &hi1)
	splitclean()
}
Ejemplo n.º 23
0
func clearfat(nl *gc.Node) {
	/* clear a fat object */
	if gc.Debug['g'] != 0 {
		fmt.Printf("clearfat %v (%v, size: %d)\n", nl, nl.Type, nl.Type.Width)
	}

	w := uint64(nl.Type.Width)

	// Avoid taking the address for simple enough types.
	if gc.Componentgen(nil, nl) {
		return
	}

	c := w % 8 // bytes
	q := w / 8 // dwords

	var r0 gc.Node
	gc.Nodreg(&r0, gc.Types[gc.TUINT64], arm64.REGZERO)
	var dst gc.Node

	// REGRT1 is reserved on arm64, see arm64/gsubr.go.
	gc.Nodreg(&dst, gc.Types[gc.Tptr], arm64.REGRT1)
	gc.Agen(nl, &dst)

	var boff uint64
	if q > 128 {
		p := gins(arm64.ASUB, nil, &dst)
		p.From.Type = obj.TYPE_CONST
		p.From.Offset = 8

		var end gc.Node
		gc.Regalloc(&end, gc.Types[gc.Tptr], nil)
		p = gins(arm64.AMOVD, &dst, &end)
		p.From.Type = obj.TYPE_ADDR
		p.From.Offset = int64(q * 8)

		p = gins(arm64.AMOVD, &r0, &dst)
		p.To.Type = obj.TYPE_MEM
		p.To.Offset = 8
		p.Scond = arm64.C_XPRE
		pl := p

		p = gcmp(arm64.ACMP, &dst, &end)
		gc.Patch(gc.Gbranch(arm64.ABNE, nil, 0), pl)

		gc.Regfree(&end)

		// The loop leaves R16 on the last zeroed dword
		boff = 8
	} else if q >= 4 && !darwin { // darwin ld64 cannot handle BR26 reloc with non-zero addend
		p := gins(arm64.ASUB, nil, &dst)
		p.From.Type = obj.TYPE_CONST
		p.From.Offset = 8
		f := gc.Sysfunc("duffzero")
		p = gins(obj.ADUFFZERO, nil, f)
		gc.Afunclit(&p.To, f)

		// 4 and 128 = magic constants: see ../../runtime/asm_arm64x.s
		p.To.Offset = int64(4 * (128 - q))

		// duffzero leaves R16 on the last zeroed dword
		boff = 8
	} else {
		var p *obj.Prog
		for t := uint64(0); t < q; t++ {
			p = gins(arm64.AMOVD, &r0, &dst)
			p.To.Type = obj.TYPE_MEM
			p.To.Offset = int64(8 * t)
		}

		boff = 8 * q
	}

	var p *obj.Prog
	for t := uint64(0); t < c; t++ {
		p = gins(arm64.AMOVB, &r0, &dst)
		p.To.Type = obj.TYPE_MEM
		p.To.Offset = int64(t + boff)
	}
}
Ejemplo n.º 24
0
/*
 * generate move:
 *	t = f
 * hard part is conversions.
 */
func gmove(f *gc.Node, t *gc.Node) {
	if gc.Debug['M'] != 0 {
		fmt.Printf("gmove %v -> %v\n", gc.Nconv(f, gc.FmtLong), gc.Nconv(t, gc.FmtLong))
	}

	ft := int(gc.Simsimtype(f.Type))
	tt := int(gc.Simsimtype(t.Type))
	cvt := t.Type

	if gc.Iscomplex[ft] || gc.Iscomplex[tt] {
		gc.Complexmove(f, t)
		return
	}

	// cannot have two memory operands
	var r2 gc.Node
	var r1 gc.Node
	var a obj.As
	if gc.Ismem(f) && gc.Ismem(t) {
		goto hard
	}

	// convert constant to desired type
	if f.Op == gc.OLITERAL {
		var con gc.Node
		switch tt {
		default:
			f.Convconst(&con, t.Type)

		case gc.TINT32,
			gc.TINT16,
			gc.TINT8:
			var con gc.Node
			f.Convconst(&con, gc.Types[gc.TINT64])
			var r1 gc.Node
			gc.Regalloc(&r1, con.Type, t)
			gins(mips.AMOVV, &con, &r1)
			gmove(&r1, t)
			gc.Regfree(&r1)
			return

		case gc.TUINT32,
			gc.TUINT16,
			gc.TUINT8:
			var con gc.Node
			f.Convconst(&con, gc.Types[gc.TUINT64])
			var r1 gc.Node
			gc.Regalloc(&r1, con.Type, t)
			gins(mips.AMOVV, &con, &r1)
			gmove(&r1, t)
			gc.Regfree(&r1)
			return
		}

		f = &con
		ft = tt // so big switch will choose a simple mov

		// constants can't move directly to memory.
		if gc.Ismem(t) {
			goto hard
		}
	}

	// value -> value copy, first operand in memory.
	// any floating point operand requires register
	// src, so goto hard to copy to register first.
	if gc.Ismem(f) && ft != tt && (gc.Isfloat[ft] || gc.Isfloat[tt]) {
		cvt = gc.Types[ft]
		goto hard
	}

	// value -> value copy, only one memory operand.
	// figure out the instruction to use.
	// break out of switch for one-instruction gins.
	// goto rdst for "destination must be register".
	// goto hard for "convert to cvt type first".
	// otherwise handle and return.

	switch uint32(ft)<<16 | uint32(tt) {
	default:
		gc.Fatalf("gmove %v -> %v", gc.Tconv(f.Type, gc.FmtLong), gc.Tconv(t.Type, gc.FmtLong))

		/*
		 * integer copy and truncate
		 */
	case gc.TINT8<<16 | gc.TINT8, // same size
		gc.TUINT8<<16 | gc.TINT8,
		gc.TINT16<<16 | gc.TINT8, // truncate
		gc.TUINT16<<16 | gc.TINT8,
		gc.TINT32<<16 | gc.TINT8,
		gc.TUINT32<<16 | gc.TINT8,
		gc.TINT64<<16 | gc.TINT8,
		gc.TUINT64<<16 | gc.TINT8:
		a = mips.AMOVB

	case gc.TINT8<<16 | gc.TUINT8, // same size
		gc.TUINT8<<16 | gc.TUINT8,
		gc.TINT16<<16 | gc.TUINT8, // truncate
		gc.TUINT16<<16 | gc.TUINT8,
		gc.TINT32<<16 | gc.TUINT8,
		gc.TUINT32<<16 | gc.TUINT8,
		gc.TINT64<<16 | gc.TUINT8,
		gc.TUINT64<<16 | gc.TUINT8:
		a = mips.AMOVBU

	case gc.TINT16<<16 | gc.TINT16, // same size
		gc.TUINT16<<16 | gc.TINT16,
		gc.TINT32<<16 | gc.TINT16, // truncate
		gc.TUINT32<<16 | gc.TINT16,
		gc.TINT64<<16 | gc.TINT16,
		gc.TUINT64<<16 | gc.TINT16:
		a = mips.AMOVH

	case gc.TINT16<<16 | gc.TUINT16, // same size
		gc.TUINT16<<16 | gc.TUINT16,
		gc.TINT32<<16 | gc.TUINT16, // truncate
		gc.TUINT32<<16 | gc.TUINT16,
		gc.TINT64<<16 | gc.TUINT16,
		gc.TUINT64<<16 | gc.TUINT16:
		a = mips.AMOVHU

	case gc.TINT32<<16 | gc.TINT32, // same size
		gc.TUINT32<<16 | gc.TINT32,
		gc.TINT64<<16 | gc.TINT32, // truncate
		gc.TUINT64<<16 | gc.TINT32:
		a = mips.AMOVW

	case gc.TINT32<<16 | gc.TUINT32, // same size
		gc.TUINT32<<16 | gc.TUINT32,
		gc.TINT64<<16 | gc.TUINT32, // truncate
		gc.TUINT64<<16 | gc.TUINT32:
		a = mips.AMOVWU

	case gc.TINT64<<16 | gc.TINT64, // same size
		gc.TINT64<<16 | gc.TUINT64,
		gc.TUINT64<<16 | gc.TINT64,
		gc.TUINT64<<16 | gc.TUINT64:
		a = mips.AMOVV

		/*
		 * integer up-conversions
		 */
	case gc.TINT8<<16 | gc.TINT16, // sign extend int8
		gc.TINT8<<16 | gc.TUINT16,
		gc.TINT8<<16 | gc.TINT32,
		gc.TINT8<<16 | gc.TUINT32,
		gc.TINT8<<16 | gc.TINT64,
		gc.TINT8<<16 | gc.TUINT64:
		a = mips.AMOVB

		goto rdst

	case gc.TUINT8<<16 | gc.TINT16, // zero extend uint8
		gc.TUINT8<<16 | gc.TUINT16,
		gc.TUINT8<<16 | gc.TINT32,
		gc.TUINT8<<16 | gc.TUINT32,
		gc.TUINT8<<16 | gc.TINT64,
		gc.TUINT8<<16 | gc.TUINT64:
		a = mips.AMOVBU

		goto rdst

	case gc.TINT16<<16 | gc.TINT32, // sign extend int16
		gc.TINT16<<16 | gc.TUINT32,
		gc.TINT16<<16 | gc.TINT64,
		gc.TINT16<<16 | gc.TUINT64:
		a = mips.AMOVH

		goto rdst

	case gc.TUINT16<<16 | gc.TINT32, // zero extend uint16
		gc.TUINT16<<16 | gc.TUINT32,
		gc.TUINT16<<16 | gc.TINT64,
		gc.TUINT16<<16 | gc.TUINT64:
		a = mips.AMOVHU

		goto rdst

	case gc.TINT32<<16 | gc.TINT64, // sign extend int32
		gc.TINT32<<16 | gc.TUINT64:
		a = mips.AMOVW

		goto rdst

	case gc.TUINT32<<16 | gc.TINT64, // zero extend uint32
		gc.TUINT32<<16 | gc.TUINT64:
		a = mips.AMOVWU

		goto rdst

		//warn("gmove: convert float to int not implemented: %N -> %N\n", f, t);
	//return;
	// algorithm is:
	//	if small enough, use native float64 -> int64 conversion.
	//	otherwise, subtract 2^63, convert, and add it back.
	/*
	* float to integer
	 */
	case gc.TFLOAT32<<16 | gc.TINT32,
		gc.TFLOAT64<<16 | gc.TINT32,
		gc.TFLOAT32<<16 | gc.TINT64,
		gc.TFLOAT64<<16 | gc.TINT64,
		gc.TFLOAT32<<16 | gc.TINT16,
		gc.TFLOAT32<<16 | gc.TINT8,
		gc.TFLOAT32<<16 | gc.TUINT16,
		gc.TFLOAT32<<16 | gc.TUINT8,
		gc.TFLOAT64<<16 | gc.TINT16,
		gc.TFLOAT64<<16 | gc.TINT8,
		gc.TFLOAT64<<16 | gc.TUINT16,
		gc.TFLOAT64<<16 | gc.TUINT8,
		gc.TFLOAT32<<16 | gc.TUINT32,
		gc.TFLOAT64<<16 | gc.TUINT32,
		gc.TFLOAT32<<16 | gc.TUINT64,
		gc.TFLOAT64<<16 | gc.TUINT64:
		bignodes()

		gc.Regalloc(&r1, gc.Types[gc.TFLOAT64], nil)
		gmove(f, &r1)
		if tt == gc.TUINT64 {
			gc.Regalloc(&r2, gc.Types[gc.TFLOAT64], nil)
			gmove(&bigf, &r2)
			gins3(mips.ACMPGED, &r1, &r2, nil)
			p1 := gc.Gbranch(mips.ABFPF, nil, 0)
			gins(mips.ASUBD, &r2, &r1)
			gc.Patch(p1, gc.Pc)
			gc.Regfree(&r2)
		}

		gc.Regalloc(&r2, gc.Types[gc.TINT64], t)
		gins(mips.ATRUNCDV, &r1, &r1)
		gins(mips.AMOVV, &r1, &r2)
		gc.Regfree(&r1)

		if tt == gc.TUINT64 {
			p1 := gc.Gbranch(mips.ABFPF, nil, 0) // use FCR0 here again
			gc.Nodreg(&r1, gc.Types[gc.TINT64], mips.REGTMP)
			gmove(&bigi, &r1)
			gins(mips.AADDVU, &r1, &r2)
			gc.Patch(p1, gc.Pc)
		}

		gmove(&r2, t)
		gc.Regfree(&r2)
		return

		//warn("gmove: convert int to float not implemented: %N -> %N\n", f, t);
	//return;
	// algorithm is:
	//	if small enough, use native int64 -> float64 conversion.
	//	otherwise, halve (x -> (x>>1)|(x&1)), convert, and double.
	/*
	 * integer to float
	 */
	case gc.TINT32<<16 | gc.TFLOAT32,
		gc.TINT32<<16 | gc.TFLOAT64,
		gc.TINT64<<16 | gc.TFLOAT32,
		gc.TINT64<<16 | gc.TFLOAT64,
		gc.TINT16<<16 | gc.TFLOAT32,
		gc.TINT16<<16 | gc.TFLOAT64,
		gc.TINT8<<16 | gc.TFLOAT32,
		gc.TINT8<<16 | gc.TFLOAT64,
		gc.TUINT16<<16 | gc.TFLOAT32,
		gc.TUINT16<<16 | gc.TFLOAT64,
		gc.TUINT8<<16 | gc.TFLOAT32,
		gc.TUINT8<<16 | gc.TFLOAT64,
		gc.TUINT32<<16 | gc.TFLOAT32,
		gc.TUINT32<<16 | gc.TFLOAT64,
		gc.TUINT64<<16 | gc.TFLOAT32,
		gc.TUINT64<<16 | gc.TFLOAT64:
		bignodes()

		var rtmp gc.Node
		gc.Regalloc(&r1, gc.Types[gc.TINT64], nil)
		gmove(f, &r1)
		if ft == gc.TUINT64 {
			gc.Nodreg(&rtmp, gc.Types[gc.TUINT64], mips.REGTMP)
			gmove(&bigi, &rtmp)
			gins(mips.AAND, &r1, &rtmp)
			p1 := ginsbranch(mips.ABEQ, nil, &rtmp, nil, 0)
			var r3 gc.Node
			gc.Regalloc(&r3, gc.Types[gc.TUINT64], nil)
			p2 := gins3(mips.AAND, nil, &r1, &r3)
			p2.From.Type = obj.TYPE_CONST
			p2.From.Offset = 1
			p3 := gins(mips.ASRLV, nil, &r1)
			p3.From.Type = obj.TYPE_CONST
			p3.From.Offset = 1
			gins(mips.AOR, &r3, &r1)
			gc.Regfree(&r3)
			gc.Patch(p1, gc.Pc)
		}

		gc.Regalloc(&r2, gc.Types[gc.TFLOAT64], t)
		gins(mips.AMOVV, &r1, &r2)
		gins(mips.AMOVVD, &r2, &r2)
		gc.Regfree(&r1)

		if ft == gc.TUINT64 {
			p1 := ginsbranch(mips.ABEQ, nil, &rtmp, nil, 0)
			gc.Nodreg(&r1, gc.Types[gc.TFLOAT64], mips.FREGTWO)
			gins(mips.AMULD, &r1, &r2)
			gc.Patch(p1, gc.Pc)
		}

		gmove(&r2, t)
		gc.Regfree(&r2)
		return

		/*
		 * float to float
		 */
	case gc.TFLOAT32<<16 | gc.TFLOAT32:
		a = mips.AMOVF

	case gc.TFLOAT64<<16 | gc.TFLOAT64:
		a = mips.AMOVD

	case gc.TFLOAT32<<16 | gc.TFLOAT64:
		a = mips.AMOVFD
		goto rdst

	case gc.TFLOAT64<<16 | gc.TFLOAT32:
		a = mips.AMOVDF
		goto rdst
	}

	gins(a, f, t)
	return

	// requires register destination
rdst:
	{
		gc.Regalloc(&r1, t.Type, t)

		gins(a, f, &r1)
		gmove(&r1, t)
		gc.Regfree(&r1)
		return
	}

	// requires register intermediate
hard:
	gc.Regalloc(&r1, cvt, t)

	gmove(f, &r1)
	gmove(&r1, t)
	gc.Regfree(&r1)
	return
}
Ejemplo n.º 25
0
/*
 * generate shift according to op, one of:
 *	res = nl << nr
 *	res = nl >> nr
 */
func cgen_shift(op gc.Op, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) {
	a := optoas(op, nl.Type)

	if nr.Op == gc.OLITERAL {
		var n1 gc.Node
		gc.Regalloc(&n1, nl.Type, res)
		gc.Cgen(nl, &n1)
		sc := uint64(nr.Int64())
		if sc >= uint64(nl.Type.Width*8) {
			// large shift gets 2 shifts by width-1
			var n3 gc.Node
			gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1)

			gins(a, &n3, &n1)
			gins(a, &n3, &n1)
		} else {
			gins(a, nr, &n1)
		}
		gmove(&n1, res)
		gc.Regfree(&n1)
		return
	}

	if nl.Ullman >= gc.UINF {
		var n4 gc.Node
		gc.Tempname(&n4, nl.Type)
		gc.Cgen(nl, &n4)
		nl = &n4
	}

	if nr.Ullman >= gc.UINF {
		var n5 gc.Node
		gc.Tempname(&n5, nr.Type)
		gc.Cgen(nr, &n5)
		nr = &n5
	}

	// Allow either uint32 or uint64 as shift type,
	// to avoid unnecessary conversion from uint32 to uint64
	// just to do the comparison.
	tcount := gc.Types[gc.Simtype[nr.Type.Etype]]

	if tcount.Etype < gc.TUINT32 {
		tcount = gc.Types[gc.TUINT32]
	}

	var n1 gc.Node
	gc.Regalloc(&n1, nr.Type, nil) // to hold the shift type in CX
	var n3 gc.Node
	gc.Regalloc(&n3, tcount, &n1) // to clear high bits of CX

	var n2 gc.Node
	gc.Regalloc(&n2, nl.Type, res)

	if nl.Ullman >= nr.Ullman {
		gc.Cgen(nl, &n2)
		gc.Cgen(nr, &n1)
		gmove(&n1, &n3)
	} else {
		gc.Cgen(nr, &n1)
		gmove(&n1, &n3)
		gc.Cgen(nl, &n2)
	}

	gc.Regfree(&n3)

	// test and fix up large shifts
	if !bounded {
		gc.Nodconst(&n3, tcount, nl.Type.Width*8)
		gins(optoas(gc.OCMP, tcount), &n1, &n3)
		p1 := gc.Gbranch(optoas(gc.OLT, tcount), nil, 1)
		if op == gc.ORSH && nl.Type.IsSigned() {
			gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1)
			gins(a, &n3, &n2)
		} else {
			gc.Nodconst(&n3, nl.Type, 0)
			gmove(&n3, &n2)
		}

		gc.Patch(p1, gc.Pc)
	}

	gins(a, &n1, &n2)

	gmove(&n2, res)

	gc.Regfree(&n1)
	gc.Regfree(&n2)
}
Ejemplo n.º 26
0
// blockcopy copies w bytes from &n to &res
func blockcopy(n, res *gc.Node, osrc, odst, w int64) {
	var dst gc.Node
	var src gc.Node
	if n.Ullman >= res.Ullman {
		gc.Agenr(n, &dst, res) // temporarily use dst
		gc.Regalloc(&src, gc.Types[gc.Tptr], nil)
		gins(s390x.AMOVD, &dst, &src)
		if res.Op == gc.ONAME {
			gc.Gvardef(res)
		}
		gc.Agen(res, &dst)
	} else {
		if res.Op == gc.ONAME {
			gc.Gvardef(res)
		}
		gc.Agenr(res, &dst, res)
		gc.Agenr(n, &src, nil)
	}
	defer gc.Regfree(&src)
	defer gc.Regfree(&dst)

	var tmp gc.Node
	gc.Regalloc(&tmp, gc.Types[gc.Tptr], nil)
	defer gc.Regfree(&tmp)

	offset := int64(0)
	dir := _FORWARDS
	if osrc < odst && odst < osrc+w {
		// Reverse. Can't use MVC, fall back onto basic moves.
		dir = _BACKWARDS
		const copiesPerIter = 2
		if w >= 8*copiesPerIter {
			cnt := w - (w % (8 * copiesPerIter))
			ginscon(s390x.AADD, w, &src)
			ginscon(s390x.AADD, w, &dst)

			var end gc.Node
			gc.Regalloc(&end, gc.Types[gc.Tptr], nil)
			p := gins(s390x.ASUB, nil, &end)
			p.From.Type = obj.TYPE_CONST
			p.From.Offset = cnt
			p.Reg = src.Reg

			var label *obj.Prog
			for i := 0; i < copiesPerIter; i++ {
				offset := int64(-8 * (i + 1))
				p := gins(s390x.AMOVD, &src, &tmp)
				p.From.Type = obj.TYPE_MEM
				p.From.Offset = offset
				if i == 0 {
					label = p
				}
				p = gins(s390x.AMOVD, &tmp, &dst)
				p.To.Type = obj.TYPE_MEM
				p.To.Offset = offset
			}

			ginscon(s390x.ASUB, 8*copiesPerIter, &src)
			ginscon(s390x.ASUB, 8*copiesPerIter, &dst)
			gins(s390x.ACMP, &src, &end)
			gc.Patch(gc.Gbranch(s390x.ABNE, nil, 0), label)
			gc.Regfree(&end)

			w -= cnt
		} else {
			offset = w
		}
	}

	if dir == _FORWARDS && w > 1024 {
		// Loop over MVCs
		cnt := w - (w % 256)

		var end gc.Node
		gc.Regalloc(&end, gc.Types[gc.Tptr], nil)
		add := gins(s390x.AADD, nil, &end)
		add.From.Type = obj.TYPE_CONST
		add.From.Offset = cnt
		add.Reg = src.Reg

		mvc := gins(s390x.AMVC, &src, &dst)
		mvc.From.Type = obj.TYPE_MEM
		mvc.From.Offset = 0
		mvc.To.Type = obj.TYPE_MEM
		mvc.To.Offset = 0
		mvc.From3 = new(obj.Addr)
		mvc.From3.Type = obj.TYPE_CONST
		mvc.From3.Offset = 256

		ginscon(s390x.AADD, 256, &src)
		ginscon(s390x.AADD, 256, &dst)
		gins(s390x.ACMP, &src, &end)
		gc.Patch(gc.Gbranch(s390x.ABNE, nil, 0), mvc)
		gc.Regfree(&end)

		w -= cnt
	}

	for w > 0 {
		cnt := w
		// If in reverse we can only do 8, 4, 2 or 1 bytes at a time.
		if dir == _BACKWARDS {
			switch {
			case cnt >= 8:
				cnt = 8
			case cnt >= 4:
				cnt = 4
			case cnt >= 2:
				cnt = 2
			}
		} else if cnt > 256 {
			cnt = 256
		}

		switch cnt {
		case 8, 4, 2, 1:
			op := s390x.AMOVB
			switch cnt {
			case 8:
				op = s390x.AMOVD
			case 4:
				op = s390x.AMOVW
			case 2:
				op = s390x.AMOVH
			}
			load := gins(op, &src, &tmp)
			load.From.Type = obj.TYPE_MEM
			load.From.Offset = offset

			store := gins(op, &tmp, &dst)
			store.To.Type = obj.TYPE_MEM
			store.To.Offset = offset

			if dir == _BACKWARDS {
				load.From.Offset -= cnt
				store.To.Offset -= cnt
			}

		default:
			p := gins(s390x.AMVC, &src, &dst)
			p.From.Type = obj.TYPE_MEM
			p.From.Offset = offset
			p.To.Type = obj.TYPE_MEM
			p.To.Offset = offset
			p.From3 = new(obj.Addr)
			p.From3.Type = obj.TYPE_CONST
			p.From3.Offset = cnt
		}

		switch dir {
		case _FORWARDS:
			offset += cnt
		case _BACKWARDS:
			offset -= cnt
		}
		w -= cnt
	}
}
Ejemplo n.º 27
0
/*
 * generate division.
 * generates one of:
 *	res = nl / nr
 *	res = nl % nr
 * according to op.
 */
func dodiv(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) {
	// Have to be careful about handling
	// most negative int divided by -1 correctly.
	// The hardware will generate undefined result.
	// Also need to explicitly trap on division on zero,
	// the hardware will silently generate undefined result.
	// DIVW will leave unpredicable result in higher 32-bit,
	// so always use DIVD/DIVDU.
	t := nl.Type

	t0 := t
	check := 0
	if t.IsSigned() {
		check = 1
		if gc.Isconst(nl, gc.CTINT) && nl.Int64() != -(1<<uint64(t.Width*8-1)) {
			check = 0
		} else if gc.Isconst(nr, gc.CTINT) && nr.Int64() != -1 {
			check = 0
		}
	}

	if t.Width < 8 {
		if t.IsSigned() {
			t = gc.Types[gc.TINT64]
		} else {
			t = gc.Types[gc.TUINT64]
		}
		check = 0
	}

	a := optoas(gc.ODIV, t)

	var tl gc.Node
	gc.Regalloc(&tl, t0, nil)
	var tr gc.Node
	gc.Regalloc(&tr, t0, nil)
	if nl.Ullman >= nr.Ullman {
		gc.Cgen(nl, &tl)
		gc.Cgen(nr, &tr)
	} else {
		gc.Cgen(nr, &tr)
		gc.Cgen(nl, &tl)
	}

	if t != t0 {
		// Convert
		tl2 := tl

		tr2 := tr
		tl.Type = t
		tr.Type = t
		gmove(&tl2, &tl)
		gmove(&tr2, &tr)
	}

	// Handle divide-by-zero panic.
	p1 := gins(optoas(gc.OCMP, t), &tr, nil)

	p1.To.Type = obj.TYPE_REG
	p1.To.Reg = s390x.REGZERO
	p1 = gc.Gbranch(optoas(gc.ONE, t), nil, +1)
	if panicdiv == nil {
		panicdiv = gc.Sysfunc("panicdivide")
	}
	gc.Ginscall(panicdiv, -1)
	gc.Patch(p1, gc.Pc)

	var p2 *obj.Prog
	if check != 0 {
		var nm1 gc.Node
		gc.Nodconst(&nm1, t, -1)
		gins(optoas(gc.OCMP, t), &tr, &nm1)
		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
		if op == gc.ODIV {
			// a / (-1) is -a.
			gins(optoas(gc.OMINUS, t), nil, &tl)

			gmove(&tl, res)
		} else {
			// a % (-1) is 0.
			var nz gc.Node
			gc.Nodconst(&nz, t, 0)

			gmove(&nz, res)
		}

		p2 = gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p1, gc.Pc)
	}

	p1 = gins(a, &tr, &tl)
	if op == gc.ODIV {
		gc.Regfree(&tr)
		gmove(&tl, res)
	} else {
		// A%B = A-(A/B*B)
		var tm gc.Node
		gc.Regalloc(&tm, t, nil)

		// patch div to use the 3 register form
		// TODO(minux): add gins3?
		p1.Reg = p1.To.Reg

		p1.To.Reg = tm.Reg
		gins(optoas(gc.OMUL, t), &tr, &tm)
		gc.Regfree(&tr)
		gins(optoas(gc.OSUB, t), &tm, &tl)
		gc.Regfree(&tm)
		gmove(&tl, res)
	}

	gc.Regfree(&tl)
	if check != 0 {
		gc.Patch(p2, gc.Pc)
	}
}
Ejemplo n.º 28
0
func ginscmp(op gc.Op, t *gc.Type, n1, n2 *gc.Node, likely int) *obj.Prog {
	if !t.IsFloat() && (op == gc.OLT || op == gc.OGE) {
		// swap nodes to fit SGT instruction
		n1, n2 = n2, n1
	}
	if t.IsFloat() && (op == gc.OLT || op == gc.OLE) {
		// swap nodes to fit CMPGT, CMPGE instructions and reverse relation
		n1, n2 = n2, n1
		if op == gc.OLT {
			op = gc.OGT
		} else {
			op = gc.OGE
		}
	}

	var r1, r2, g1, g2 gc.Node
	gc.Regalloc(&r1, t, n1)
	gc.Regalloc(&g1, n1.Type, &r1)
	gc.Cgen(n1, &g1)
	gmove(&g1, &r1)

	gc.Regalloc(&r2, t, n2)
	gc.Regalloc(&g2, n1.Type, &r2)
	gc.Cgen(n2, &g2)
	gmove(&g2, &r2)

	var p *obj.Prog
	var ntmp gc.Node
	gc.Nodreg(&ntmp, gc.Types[gc.TINT], mips.REGTMP)

	switch gc.Simtype[t.Etype] {
	case gc.TINT8,
		gc.TINT16,
		gc.TINT32,
		gc.TINT64:
		if op == gc.OEQ || op == gc.ONE {
			p = ginsbranch(optoas(op, t), nil, &r1, &r2, likely)
		} else {
			gins3(mips.ASGT, &r1, &r2, &ntmp)

			p = ginsbranch(optoas(op, t), nil, &ntmp, nil, likely)
		}

	case gc.TBOOL,
		gc.TUINT8,
		gc.TUINT16,
		gc.TUINT32,
		gc.TUINT64,
		gc.TPTR32,
		gc.TPTR64:
		if op == gc.OEQ || op == gc.ONE {
			p = ginsbranch(optoas(op, t), nil, &r1, &r2, likely)
		} else {
			gins3(mips.ASGTU, &r1, &r2, &ntmp)

			p = ginsbranch(optoas(op, t), nil, &ntmp, nil, likely)
		}

	case gc.TFLOAT32:
		switch op {
		default:
			gc.Fatalf("ginscmp: no entry for op=%s type=%v", op, t)

		case gc.OEQ,
			gc.ONE:
			gins3(mips.ACMPEQF, &r1, &r2, nil)

		case gc.OGE:
			gins3(mips.ACMPGEF, &r1, &r2, nil)

		case gc.OGT:
			gins3(mips.ACMPGTF, &r1, &r2, nil)
		}
		p = gc.Gbranch(optoas(op, t), nil, likely)

	case gc.TFLOAT64:
		switch op {
		default:
			gc.Fatalf("ginscmp: no entry for op=%s type=%v", op, t)

		case gc.OEQ,
			gc.ONE:
			gins3(mips.ACMPEQD, &r1, &r2, nil)

		case gc.OGE:
			gins3(mips.ACMPGED, &r1, &r2, nil)

		case gc.OGT:
			gins3(mips.ACMPGTD, &r1, &r2, nil)
		}
		p = gc.Gbranch(optoas(op, t), nil, likely)
	}

	gc.Regfree(&g2)
	gc.Regfree(&r2)
	gc.Regfree(&g1)
	gc.Regfree(&r1)

	return p
}
Ejemplo n.º 29
0
// clearfat clears (i.e. replaces with zeros) the value pointed to by nl.
func clearfat(nl *gc.Node) {
	if gc.Debug['g'] != 0 {
		fmt.Printf("clearfat %v (%v, size: %d)\n", nl, nl.Type, nl.Type.Width)
	}

	// Avoid taking the address for simple enough types.
	if gc.Componentgen(nil, nl) {
		return
	}

	var dst gc.Node
	gc.Regalloc(&dst, gc.Types[gc.Tptr], nil)
	gc.Agen(nl, &dst)

	var boff int64
	w := nl.Type.Width
	if w > clearLoopCutoff {
		// Generate a loop clearing 256 bytes per iteration using XCs.
		var end gc.Node
		gc.Regalloc(&end, gc.Types[gc.Tptr], nil)
		p := gins(s390x.AMOVD, &dst, &end)
		p.From.Type = obj.TYPE_ADDR
		p.From.Offset = w - (w % 256)

		p = gins(s390x.AXC, &dst, &dst)
		p.From.Type = obj.TYPE_MEM
		p.From.Offset = 0
		p.To.Type = obj.TYPE_MEM
		p.To.Offset = 0
		p.From3 = new(obj.Addr)
		p.From3.Offset = 256
		p.From3.Type = obj.TYPE_CONST
		pl := p

		ginscon(s390x.AADD, 256, &dst)
		gins(s390x.ACMP, &dst, &end)
		gc.Patch(gc.Gbranch(s390x.ABNE, nil, 0), pl)
		gc.Regfree(&end)
		w = w % 256
	}

	// Generate instructions to clear the remaining memory.
	for w > 0 {
		n := w

		// Can clear at most 256 bytes per instruction.
		if n > 256 {
			n = 256
		}

		switch n {
		// Handle very small clears using moves.
		case 8, 4, 2, 1:
			ins := s390x.AMOVB
			switch n {
			case 8:
				ins = s390x.AMOVD
			case 4:
				ins = s390x.AMOVW
			case 2:
				ins = s390x.AMOVH
			}
			p := gins(ins, nil, &dst)
			p.From.Type = obj.TYPE_CONST
			p.From.Offset = 0
			p.To.Type = obj.TYPE_MEM
			p.To.Offset = boff

		// Handle clears that would require multiple moves with a XC.
		default:
			p := gins(s390x.AXC, &dst, &dst)
			p.From.Type = obj.TYPE_MEM
			p.From.Offset = boff
			p.To.Type = obj.TYPE_MEM
			p.To.Offset = boff
			p.From3 = new(obj.Addr)
			p.From3.Offset = n
			p.From3.Type = obj.TYPE_CONST
		}

		boff += n
		w -= n
	}

	gc.Regfree(&dst)
}
Ejemplo n.º 30
0
/*
 * attempt to generate 64-bit
 *	res = n
 * return 1 on success, 0 if op not handled.
 */
func cgen64(n *gc.Node, res *gc.Node) {
	if res.Op != gc.OINDREG && res.Op != gc.ONAME {
		gc.Dump("n", n)
		gc.Dump("res", res)
		gc.Fatalf("cgen64 %v of %v", n.Op, res.Op)
	}

	l := n.Left
	var t1 gc.Node
	if !l.Addable {
		gc.Tempname(&t1, l.Type)
		gc.Cgen(l, &t1)
		l = &t1
	}

	var hi1 gc.Node
	var lo1 gc.Node
	split64(l, &lo1, &hi1)
	switch n.Op {
	default:
		gc.Fatalf("cgen64 %v", n.Op)

	case gc.OMINUS:
		var lo2 gc.Node
		var hi2 gc.Node
		split64(res, &lo2, &hi2)

		gc.Regalloc(&t1, lo1.Type, nil)
		var al gc.Node
		gc.Regalloc(&al, lo1.Type, nil)
		var ah gc.Node
		gc.Regalloc(&ah, hi1.Type, nil)

		gins(arm.AMOVW, &lo1, &al)
		gins(arm.AMOVW, &hi1, &ah)

		gmove(ncon(0), &t1)
		p1 := gins(arm.ASUB, &al, &t1)
		p1.Scond |= arm.C_SBIT
		gins(arm.AMOVW, &t1, &lo2)

		gmove(ncon(0), &t1)
		gins(arm.ASBC, &ah, &t1)
		gins(arm.AMOVW, &t1, &hi2)

		gc.Regfree(&t1)
		gc.Regfree(&al)
		gc.Regfree(&ah)
		splitclean()
		splitclean()
		return

	case gc.OCOM:
		gc.Regalloc(&t1, lo1.Type, nil)
		gmove(ncon(^uint32(0)), &t1)

		var lo2 gc.Node
		var hi2 gc.Node
		split64(res, &lo2, &hi2)
		var n1 gc.Node
		gc.Regalloc(&n1, lo1.Type, nil)

		gins(arm.AMOVW, &lo1, &n1)
		gins(arm.AEOR, &t1, &n1)
		gins(arm.AMOVW, &n1, &lo2)

		gins(arm.AMOVW, &hi1, &n1)
		gins(arm.AEOR, &t1, &n1)
		gins(arm.AMOVW, &n1, &hi2)

		gc.Regfree(&t1)
		gc.Regfree(&n1)
		splitclean()
		splitclean()
		return

		// binary operators.
	// common setup below.
	case gc.OADD,
		gc.OSUB,
		gc.OMUL,
		gc.OLSH,
		gc.ORSH,
		gc.OAND,
		gc.OOR,
		gc.OXOR,
		gc.OLROT:
		break
	}

	// setup for binary operators
	r := n.Right

	if r != nil && !r.Addable {
		var t2 gc.Node
		gc.Tempname(&t2, r.Type)
		gc.Cgen(r, &t2)
		r = &t2
	}

	var hi2 gc.Node
	var lo2 gc.Node
	if gc.Is64(r.Type) {
		split64(r, &lo2, &hi2)
	}

	var al gc.Node
	gc.Regalloc(&al, lo1.Type, nil)
	var ah gc.Node
	gc.Regalloc(&ah, hi1.Type, nil)

	// Do op. Leave result in ah:al.
	switch n.Op {
	default:
		gc.Fatalf("cgen64: not implemented: %v\n", n)

		// TODO: Constants
	case gc.OADD:
		var bl gc.Node
		gc.Regalloc(&bl, gc.Types[gc.TPTR32], nil)

		var bh gc.Node
		gc.Regalloc(&bh, gc.Types[gc.TPTR32], nil)
		gins(arm.AMOVW, &hi1, &ah)
		gins(arm.AMOVW, &lo1, &al)
		gins(arm.AMOVW, &hi2, &bh)
		gins(arm.AMOVW, &lo2, &bl)
		p1 := gins(arm.AADD, &bl, &al)
		p1.Scond |= arm.C_SBIT
		gins(arm.AADC, &bh, &ah)
		gc.Regfree(&bl)
		gc.Regfree(&bh)

		// TODO: Constants.
	case gc.OSUB:
		var bl gc.Node
		gc.Regalloc(&bl, gc.Types[gc.TPTR32], nil)

		var bh gc.Node
		gc.Regalloc(&bh, gc.Types[gc.TPTR32], nil)
		gins(arm.AMOVW, &lo1, &al)
		gins(arm.AMOVW, &hi1, &ah)
		gins(arm.AMOVW, &lo2, &bl)
		gins(arm.AMOVW, &hi2, &bh)
		p1 := gins(arm.ASUB, &bl, &al)
		p1.Scond |= arm.C_SBIT
		gins(arm.ASBC, &bh, &ah)
		gc.Regfree(&bl)
		gc.Regfree(&bh)

		// TODO(kaib): this can be done with 4 regs and does not need 6
	case gc.OMUL:
		var bl gc.Node
		gc.Regalloc(&bl, gc.Types[gc.TPTR32], nil)

		var bh gc.Node
		gc.Regalloc(&bh, gc.Types[gc.TPTR32], nil)
		var cl gc.Node
		gc.Regalloc(&cl, gc.Types[gc.TPTR32], nil)
		var ch gc.Node
		gc.Regalloc(&ch, gc.Types[gc.TPTR32], nil)

		// load args into bh:bl and bh:bl.
		gins(arm.AMOVW, &hi1, &bh)

		gins(arm.AMOVW, &lo1, &bl)
		gins(arm.AMOVW, &hi2, &ch)
		gins(arm.AMOVW, &lo2, &cl)

		// bl * cl -> ah al
		p1 := gins(arm.AMULLU, nil, nil)

		p1.From.Type = obj.TYPE_REG
		p1.From.Reg = bl.Reg
		p1.Reg = cl.Reg
		p1.To.Type = obj.TYPE_REGREG
		p1.To.Reg = ah.Reg
		p1.To.Offset = int64(al.Reg)

		//print("%v\n", p1);

		// bl * ch + ah -> ah
		p1 = gins(arm.AMULA, nil, nil)

		p1.From.Type = obj.TYPE_REG
		p1.From.Reg = bl.Reg
		p1.Reg = ch.Reg
		p1.To.Type = obj.TYPE_REGREG2
		p1.To.Reg = ah.Reg
		p1.To.Offset = int64(ah.Reg)

		//print("%v\n", p1);

		// bh * cl + ah -> ah
		p1 = gins(arm.AMULA, nil, nil)

		p1.From.Type = obj.TYPE_REG
		p1.From.Reg = bh.Reg
		p1.Reg = cl.Reg
		p1.To.Type = obj.TYPE_REGREG2
		p1.To.Reg = ah.Reg
		p1.To.Offset = int64(ah.Reg)

		//print("%v\n", p1);

		gc.Regfree(&bh)

		gc.Regfree(&bl)
		gc.Regfree(&ch)
		gc.Regfree(&cl)

		// We only rotate by a constant c in [0,64).
	// if c >= 32:
	//	lo, hi = hi, lo
	//	c -= 32
	// if c == 0:
	//	no-op
	// else:
	//	t = hi
	//	shld hi:lo, c
	//	shld lo:t, c
	case gc.OLROT:
		v := uint64(r.Int64())

		var bl gc.Node
		gc.Regalloc(&bl, lo1.Type, nil)
		var bh gc.Node
		gc.Regalloc(&bh, hi1.Type, nil)
		if v >= 32 {
			// reverse during load to do the first 32 bits of rotate
			v -= 32

			gins(arm.AMOVW, &hi1, &bl)
			gins(arm.AMOVW, &lo1, &bh)
		} else {
			gins(arm.AMOVW, &hi1, &bh)
			gins(arm.AMOVW, &lo1, &bl)
		}

		if v == 0 {
			gins(arm.AMOVW, &bh, &ah)
			gins(arm.AMOVW, &bl, &al)
		} else {
			// rotate by 1 <= v <= 31
			//	MOVW	bl<<v, al
			//	MOVW	bh<<v, ah
			//	OR		bl>>(32-v), ah
			//	OR		bh>>(32-v), al
			gshift(arm.AMOVW, &bl, arm.SHIFT_LL, int32(v), &al)

			gshift(arm.AMOVW, &bh, arm.SHIFT_LL, int32(v), &ah)
			gshift(arm.AORR, &bl, arm.SHIFT_LR, int32(32-v), &ah)
			gshift(arm.AORR, &bh, arm.SHIFT_LR, int32(32-v), &al)
		}

		gc.Regfree(&bl)
		gc.Regfree(&bh)

	case gc.OLSH:
		var bl gc.Node
		gc.Regalloc(&bl, lo1.Type, nil)
		var bh gc.Node
		gc.Regalloc(&bh, hi1.Type, nil)
		gins(arm.AMOVW, &hi1, &bh)
		gins(arm.AMOVW, &lo1, &bl)

		var p6 *obj.Prog
		var s gc.Node
		var n1 gc.Node
		var creg gc.Node
		var p1 *obj.Prog
		var p2 *obj.Prog
		var p3 *obj.Prog
		var p4 *obj.Prog
		var p5 *obj.Prog
		if r.Op == gc.OLITERAL {
			v := uint64(r.Int64())
			if v >= 64 {
				// TODO(kaib): replace with gins(AMOVW, nodintconst(0), &al)
				// here and below (verify it optimizes to EOR)
				gins(arm.AEOR, &al, &al)

				gins(arm.AEOR, &ah, &ah)
			} else if v > 32 {
				gins(arm.AEOR, &al, &al)

				//	MOVW	bl<<(v-32), ah
				gshift(arm.AMOVW, &bl, arm.SHIFT_LL, int32(v-32), &ah)
			} else if v == 32 {
				gins(arm.AEOR, &al, &al)
				gins(arm.AMOVW, &bl, &ah)
			} else if v > 0 {
				//	MOVW	bl<<v, al
				gshift(arm.AMOVW, &bl, arm.SHIFT_LL, int32(v), &al)

				//	MOVW	bh<<v, ah
				gshift(arm.AMOVW, &bh, arm.SHIFT_LL, int32(v), &ah)

				//	OR		bl>>(32-v), ah
				gshift(arm.AORR, &bl, arm.SHIFT_LR, int32(32-v), &ah)
			} else {
				gins(arm.AMOVW, &bl, &al)
				gins(arm.AMOVW, &bh, &ah)
			}

			goto olsh_break
		}

		gc.Regalloc(&s, gc.Types[gc.TUINT32], nil)
		gc.Regalloc(&creg, gc.Types[gc.TUINT32], nil)
		if gc.Is64(r.Type) {
			// shift is >= 1<<32
			var cl gc.Node
			var ch gc.Node
			split64(r, &cl, &ch)

			gmove(&ch, &s)
			gins(arm.ATST, &s, nil)
			p6 = gc.Gbranch(arm.ABNE, nil, 0)
			gmove(&cl, &s)
			splitclean()
		} else {
			gmove(r, &s)
			p6 = nil
		}

		gins(arm.ATST, &s, nil)

		// shift == 0
		p1 = gins(arm.AMOVW, &bl, &al)

		p1.Scond = arm.C_SCOND_EQ
		p1 = gins(arm.AMOVW, &bh, &ah)
		p1.Scond = arm.C_SCOND_EQ
		p2 = gc.Gbranch(arm.ABEQ, nil, 0)

		// shift is < 32
		gc.Nodconst(&n1, gc.Types[gc.TUINT32], 32)

		gmove(&n1, &creg)
		gins(arm.ACMP, &s, &creg)

		//	MOVW.LO		bl<<s, al
		p1 = gregshift(arm.AMOVW, &bl, arm.SHIFT_LL, &s, &al)

		p1.Scond = arm.C_SCOND_LO

		//	MOVW.LO		bh<<s, ah
		p1 = gregshift(arm.AMOVW, &bh, arm.SHIFT_LL, &s, &ah)

		p1.Scond = arm.C_SCOND_LO

		//	SUB.LO		s, creg
		p1 = gins(arm.ASUB, &s, &creg)

		p1.Scond = arm.C_SCOND_LO

		//	OR.LO		bl>>creg, ah
		p1 = gregshift(arm.AORR, &bl, arm.SHIFT_LR, &creg, &ah)

		p1.Scond = arm.C_SCOND_LO

		//	BLO	end
		p3 = gc.Gbranch(arm.ABLO, nil, 0)

		// shift == 32
		p1 = gins(arm.AEOR, &al, &al)

		p1.Scond = arm.C_SCOND_EQ
		p1 = gins(arm.AMOVW, &bl, &ah)
		p1.Scond = arm.C_SCOND_EQ
		p4 = gc.Gbranch(arm.ABEQ, nil, 0)

		// shift is < 64
		gc.Nodconst(&n1, gc.Types[gc.TUINT32], 64)

		gmove(&n1, &creg)
		gins(arm.ACMP, &s, &creg)

		//	EOR.LO	al, al
		p1 = gins(arm.AEOR, &al, &al)

		p1.Scond = arm.C_SCOND_LO

		//	MOVW.LO		creg>>1, creg
		p1 = gshift(arm.AMOVW, &creg, arm.SHIFT_LR, 1, &creg)

		p1.Scond = arm.C_SCOND_LO

		//	SUB.LO		creg, s
		p1 = gins(arm.ASUB, &creg, &s)

		p1.Scond = arm.C_SCOND_LO

		//	MOVW	bl<<s, ah
		p1 = gregshift(arm.AMOVW, &bl, arm.SHIFT_LL, &s, &ah)

		p1.Scond = arm.C_SCOND_LO

		p5 = gc.Gbranch(arm.ABLO, nil, 0)

		// shift >= 64
		if p6 != nil {
			gc.Patch(p6, gc.Pc)
		}
		gins(arm.AEOR, &al, &al)
		gins(arm.AEOR, &ah, &ah)

		gc.Patch(p2, gc.Pc)
		gc.Patch(p3, gc.Pc)
		gc.Patch(p4, gc.Pc)
		gc.Patch(p5, gc.Pc)
		gc.Regfree(&s)
		gc.Regfree(&creg)

	olsh_break:
		gc.Regfree(&bl)
		gc.Regfree(&bh)

	case gc.ORSH:
		var bl gc.Node
		gc.Regalloc(&bl, lo1.Type, nil)
		var bh gc.Node
		gc.Regalloc(&bh, hi1.Type, nil)
		gins(arm.AMOVW, &hi1, &bh)
		gins(arm.AMOVW, &lo1, &bl)

		var p4 *obj.Prog
		var p5 *obj.Prog
		var n1 gc.Node
		var p6 *obj.Prog
		var s gc.Node
		var p1 *obj.Prog
		var p2 *obj.Prog
		var creg gc.Node
		var p3 *obj.Prog
		if r.Op == gc.OLITERAL {
			v := uint64(r.Int64())
			if v >= 64 {
				if bh.Type.Etype == gc.TINT32 {
					//	MOVW	bh->31, al
					gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &al)

					//	MOVW	bh->31, ah
					gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &ah)
				} else {
					gins(arm.AEOR, &al, &al)
					gins(arm.AEOR, &ah, &ah)
				}
			} else if v > 32 {
				if bh.Type.Etype == gc.TINT32 {
					//	MOVW	bh->(v-32), al
					gshift(arm.AMOVW, &bh, arm.SHIFT_AR, int32(v-32), &al)

					//	MOVW	bh->31, ah
					gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &ah)
				} else {
					//	MOVW	bh>>(v-32), al
					gshift(arm.AMOVW, &bh, arm.SHIFT_LR, int32(v-32), &al)

					gins(arm.AEOR, &ah, &ah)
				}
			} else if v == 32 {
				gins(arm.AMOVW, &bh, &al)
				if bh.Type.Etype == gc.TINT32 {
					//	MOVW	bh->31, ah
					gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &ah)
				} else {
					gins(arm.AEOR, &ah, &ah)
				}
			} else if v > 0 {
				//	MOVW	bl>>v, al
				gshift(arm.AMOVW, &bl, arm.SHIFT_LR, int32(v), &al)

				//	OR		bh<<(32-v), al
				gshift(arm.AORR, &bh, arm.SHIFT_LL, int32(32-v), &al)

				if bh.Type.Etype == gc.TINT32 {
					//	MOVW	bh->v, ah
					gshift(arm.AMOVW, &bh, arm.SHIFT_AR, int32(v), &ah)
				} else {
					//	MOVW	bh>>v, ah
					gshift(arm.AMOVW, &bh, arm.SHIFT_LR, int32(v), &ah)
				}
			} else {
				gins(arm.AMOVW, &bl, &al)
				gins(arm.AMOVW, &bh, &ah)
			}

			goto orsh_break
		}

		gc.Regalloc(&s, gc.Types[gc.TUINT32], nil)
		gc.Regalloc(&creg, gc.Types[gc.TUINT32], nil)
		if gc.Is64(r.Type) {
			// shift is >= 1<<32
			var ch gc.Node
			var cl gc.Node
			split64(r, &cl, &ch)

			gmove(&ch, &s)
			gins(arm.ATST, &s, nil)
			var p1 *obj.Prog
			if bh.Type.Etype == gc.TINT32 {
				p1 = gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &ah)
			} else {
				p1 = gins(arm.AEOR, &ah, &ah)
			}
			p1.Scond = arm.C_SCOND_NE
			p6 = gc.Gbranch(arm.ABNE, nil, 0)
			gmove(&cl, &s)
			splitclean()
		} else {
			gmove(r, &s)
			p6 = nil
		}

		gins(arm.ATST, &s, nil)

		// shift == 0
		p1 = gins(arm.AMOVW, &bl, &al)

		p1.Scond = arm.C_SCOND_EQ
		p1 = gins(arm.AMOVW, &bh, &ah)
		p1.Scond = arm.C_SCOND_EQ
		p2 = gc.Gbranch(arm.ABEQ, nil, 0)

		// check if shift is < 32
		gc.Nodconst(&n1, gc.Types[gc.TUINT32], 32)

		gmove(&n1, &creg)
		gins(arm.ACMP, &s, &creg)

		//	MOVW.LO		bl>>s, al
		p1 = gregshift(arm.AMOVW, &bl, arm.SHIFT_LR, &s, &al)

		p1.Scond = arm.C_SCOND_LO

		//	SUB.LO		s,creg
		p1 = gins(arm.ASUB, &s, &creg)

		p1.Scond = arm.C_SCOND_LO

		//	OR.LO		bh<<(32-s), al
		p1 = gregshift(arm.AORR, &bh, arm.SHIFT_LL, &creg, &al)

		p1.Scond = arm.C_SCOND_LO

		if bh.Type.Etype == gc.TINT32 {
			//	MOVW	bh->s, ah
			p1 = gregshift(arm.AMOVW, &bh, arm.SHIFT_AR, &s, &ah)
		} else {
			//	MOVW	bh>>s, ah
			p1 = gregshift(arm.AMOVW, &bh, arm.SHIFT_LR, &s, &ah)
		}

		p1.Scond = arm.C_SCOND_LO

		//	BLO	end
		p3 = gc.Gbranch(arm.ABLO, nil, 0)

		// shift == 32
		p1 = gins(arm.AMOVW, &bh, &al)

		p1.Scond = arm.C_SCOND_EQ
		if bh.Type.Etype == gc.TINT32 {
			gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &ah)
		} else {
			gins(arm.AEOR, &ah, &ah)
		}
		p4 = gc.Gbranch(arm.ABEQ, nil, 0)

		// check if shift is < 64
		gc.Nodconst(&n1, gc.Types[gc.TUINT32], 64)

		gmove(&n1, &creg)
		gins(arm.ACMP, &s, &creg)

		//	MOVW.LO		creg>>1, creg
		p1 = gshift(arm.AMOVW, &creg, arm.SHIFT_LR, 1, &creg)

		p1.Scond = arm.C_SCOND_LO

		//	SUB.LO		creg, s
		p1 = gins(arm.ASUB, &creg, &s)

		p1.Scond = arm.C_SCOND_LO

		if bh.Type.Etype == gc.TINT32 {
			//	MOVW	bh->(s-32), al
			p1 := gregshift(arm.AMOVW, &bh, arm.SHIFT_AR, &s, &al)

			p1.Scond = arm.C_SCOND_LO
		} else {
			//	MOVW	bh>>(v-32), al
			p1 := gregshift(arm.AMOVW, &bh, arm.SHIFT_LR, &s, &al)

			p1.Scond = arm.C_SCOND_LO
		}

		//	BLO	end
		p5 = gc.Gbranch(arm.ABLO, nil, 0)

		// s >= 64
		if p6 != nil {
			gc.Patch(p6, gc.Pc)
		}
		if bh.Type.Etype == gc.TINT32 {
			//	MOVW	bh->31, al
			gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &al)
		} else {
			gins(arm.AEOR, &al, &al)
		}

		gc.Patch(p2, gc.Pc)
		gc.Patch(p3, gc.Pc)
		gc.Patch(p4, gc.Pc)
		gc.Patch(p5, gc.Pc)
		gc.Regfree(&s)
		gc.Regfree(&creg)

	orsh_break:
		gc.Regfree(&bl)
		gc.Regfree(&bh)

		// TODO(kaib): literal optimizations
	// make constant the right side (it usually is anyway).
	//		if(lo1.op == OLITERAL) {
	//			nswap(&lo1, &lo2);
	//			nswap(&hi1, &hi2);
	//		}
	//		if(lo2.op == OLITERAL) {
	//			// special cases for constants.
	//			lv = mpgetfix(lo2.val.u.xval);
	//			hv = mpgetfix(hi2.val.u.xval);
	//			splitclean();	// right side
	//			split64(res, &lo2, &hi2);
	//			switch(n->op) {
	//			case OXOR:
	//				gmove(&lo1, &lo2);
	//				gmove(&hi1, &hi2);
	//				switch(lv) {
	//				case 0:
	//					break;
	//				case 0xffffffffu:
	//					gins(ANOTL, N, &lo2);
	//					break;
	//				default:
	//					gins(AXORL, ncon(lv), &lo2);
	//					break;
	//				}
	//				switch(hv) {
	//				case 0:
	//					break;
	//				case 0xffffffffu:
	//					gins(ANOTL, N, &hi2);
	//					break;
	//				default:
	//					gins(AXORL, ncon(hv), &hi2);
	//					break;
	//				}
	//				break;

	//			case OAND:
	//				switch(lv) {
	//				case 0:
	//					gins(AMOVL, ncon(0), &lo2);
	//					break;
	//				default:
	//					gmove(&lo1, &lo2);
	//					if(lv != 0xffffffffu)
	//						gins(AANDL, ncon(lv), &lo2);
	//					break;
	//				}
	//				switch(hv) {
	//				case 0:
	//					gins(AMOVL, ncon(0), &hi2);
	//					break;
	//				default:
	//					gmove(&hi1, &hi2);
	//					if(hv != 0xffffffffu)
	//						gins(AANDL, ncon(hv), &hi2);
	//					break;
	//				}
	//				break;

	//			case OOR:
	//				switch(lv) {
	//				case 0:
	//					gmove(&lo1, &lo2);
	//					break;
	//				case 0xffffffffu:
	//					gins(AMOVL, ncon(0xffffffffu), &lo2);
	//					break;
	//				default:
	//					gmove(&lo1, &lo2);
	//					gins(AORL, ncon(lv), &lo2);
	//					break;
	//				}
	//				switch(hv) {
	//				case 0:
	//					gmove(&hi1, &hi2);
	//					break;
	//				case 0xffffffffu:
	//					gins(AMOVL, ncon(0xffffffffu), &hi2);
	//					break;
	//				default:
	//					gmove(&hi1, &hi2);
	//					gins(AORL, ncon(hv), &hi2);
	//					break;
	//				}
	//				break;
	//			}
	//			splitclean();
	//			splitclean();
	//			goto out;
	//		}
	case gc.OXOR,
		gc.OAND,
		gc.OOR:
		var n1 gc.Node
		gc.Regalloc(&n1, lo1.Type, nil)

		gins(arm.AMOVW, &lo1, &al)
		gins(arm.AMOVW, &hi1, &ah)
		gins(arm.AMOVW, &lo2, &n1)
		gins(optoas(n.Op, lo1.Type), &n1, &al)
		gins(arm.AMOVW, &hi2, &n1)
		gins(optoas(n.Op, lo1.Type), &n1, &ah)
		gc.Regfree(&n1)
	}

	if gc.Is64(r.Type) {
		splitclean()
	}
	splitclean()

	split64(res, &lo1, &hi1)
	gins(arm.AMOVW, &al, &lo1)
	gins(arm.AMOVW, &ah, &hi1)
	splitclean()

	//out:
	gc.Regfree(&al)

	gc.Regfree(&ah)
}