コード例 #1
0
ファイル: gsubr.go プロジェクト: ckeyer/gosrc
func ginscmp(op gc.Op, t *gc.Type, n1, n2 *gc.Node, likely int) *obj.Prog {
	if gc.Isint[t.Etype] && n1.Op == gc.OLITERAL && n2.Op != gc.OLITERAL {
		// Reverse comparison to place constant last.
		op = gc.Brrev(op)
		n1, n2 = n2, n1
	}

	var r1, r2, g1, g2 gc.Node
	gc.Regalloc(&r1, t, n1)
	gc.Regalloc(&g1, n1.Type, &r1)
	gc.Cgen(n1, &g1)
	gmove(&g1, &r1)
	if gc.Isint[t.Etype] && gc.Isconst(n2, gc.CTINT) {
		ginscon2(optoas(gc.OCMP, t), &r1, n2.Int())
	} else {
		gc.Regalloc(&r2, t, n2)
		gc.Regalloc(&g2, n1.Type, &r2)
		gc.Cgen(n2, &g2)
		gmove(&g2, &r2)
		rawgins(optoas(gc.OCMP, t), &r1, &r2)
		gc.Regfree(&g2)
		gc.Regfree(&r2)
	}
	gc.Regfree(&g1)
	gc.Regfree(&r1)
	return gc.Gbranch(optoas(op, t), nil, likely)
}
コード例 #2
0
ファイル: ggen.go プロジェクト: ckeyer/gosrc
func ginscmp(op gc.Op, t *gc.Type, n1, n2 *gc.Node, likely int) *obj.Prog {
	if gc.Isint[t.Etype] && n1.Op == gc.OLITERAL && n1.Int() == 0 && n2.Op != gc.OLITERAL {
		op = gc.Brrev(op)
		n1, n2 = n2, n1
	}
	var r1, r2, g1, g2 gc.Node
	gc.Regalloc(&r1, t, n1)
	gc.Regalloc(&g1, n1.Type, &r1)
	gc.Cgen(n1, &g1)
	gmove(&g1, &r1)
	if gc.Isint[t.Etype] && n2.Op == gc.OLITERAL && n2.Int() == 0 {
		gins(arm.ACMP, &r1, n2)
	} else {
		gc.Regalloc(&r2, t, n2)
		gc.Regalloc(&g2, n1.Type, &r2)
		gc.Cgen(n2, &g2)
		gmove(&g2, &r2)
		gins(optoas(gc.OCMP, t), &r1, &r2)
		gc.Regfree(&g2)
		gc.Regfree(&r2)
	}
	gc.Regfree(&g1)
	gc.Regfree(&r1)
	return gc.Gbranch(optoas(op, t), nil, likely)
}
コード例 #3
0
ファイル: ggen.go プロジェクト: ckeyer/gosrc
/*
 * generate division.
 * generates one of:
 *	res = nl / nr
 *	res = nl % nr
 * according to op.
 */
func dodiv(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) {
	t := nl.Type

	t0 := t

	if t.Width < 8 {
		if gc.Issigned[t.Etype] {
			t = gc.Types[gc.TINT64]
		} else {
			t = gc.Types[gc.TUINT64]
		}
	}

	a := optoas(gc.ODIV, t)

	var tl gc.Node
	gc.Regalloc(&tl, t0, nil)
	var tr gc.Node
	gc.Regalloc(&tr, t0, nil)
	if nl.Ullman >= nr.Ullman {
		gc.Cgen(nl, &tl)
		gc.Cgen(nr, &tr)
	} else {
		gc.Cgen(nr, &tr)
		gc.Cgen(nl, &tl)
	}

	if t != t0 {
		// Convert
		tl2 := tl

		tr2 := tr
		tl.Type = t
		tr.Type = t
		gmove(&tl2, &tl)
		gmove(&tr2, &tr)
	}

	// Handle divide-by-zero panic.
	p1 := ginsbranch(mips.ABNE, nil, &tr, nil, 0)
	if panicdiv == nil {
		panicdiv = gc.Sysfunc("panicdivide")
	}
	gc.Ginscall(panicdiv, -1)
	gc.Patch(p1, gc.Pc)

	gins3(a, &tr, &tl, nil)
	gc.Regfree(&tr)
	if op == gc.ODIV {
		var lo gc.Node
		gc.Nodreg(&lo, gc.Types[gc.TUINT64], mips.REG_LO)
		gins(mips.AMOVV, &lo, &tl)
	} else { // remainder in REG_HI
		var hi gc.Node
		gc.Nodreg(&hi, gc.Types[gc.TUINT64], mips.REG_HI)
		gins(mips.AMOVV, &hi, &tl)
	}
	gmove(&tl, res)
	gc.Regfree(&tl)
}
コード例 #4
0
ファイル: gsubr.go プロジェクト: ckeyer/gosrc
// gins is called by the front end.
// It synthesizes some multiple-instruction sequences
// so the front end can stay simpler.
func gins(as int, f, t *gc.Node) *obj.Prog {
	if as >= obj.A_ARCHSPECIFIC {
		if x, ok := f.IntLiteral(); ok {
			ginscon(as, x, t)
			return nil // caller must not use
		}
	}
	return rawgins(as, f, t)
}
コード例 #5
0
ファイル: gsubr.go プロジェクト: ckeyer/gosrc
/*
 * n is a 64-bit value.  fill in lo and hi to refer to its 32-bit halves.
 */
func split64(n *gc.Node, lo *gc.Node, hi *gc.Node) {
	if !gc.Is64(n.Type) {
		gc.Fatalf("split64 %v", n.Type)
	}

	if nsclean >= len(sclean) {
		gc.Fatalf("split64 clean")
	}
	sclean[nsclean].Op = gc.OEMPTY
	nsclean++
	switch n.Op {
	default:
		switch n.Op {
		default:
			var n1 gc.Node
			if !dotaddable(n, &n1) {
				gc.Igen(n, &n1, nil)
				sclean[nsclean-1] = n1
			}

			n = &n1

		case gc.ONAME:
			if n.Class == gc.PPARAMREF {
				var n1 gc.Node
				gc.Cgen(n.Name.Heapaddr, &n1)
				sclean[nsclean-1] = n1
				n = &n1
			}

			// nothing
		case gc.OINDREG:
			break
		}

		*lo = *n
		*hi = *n
		lo.Type = gc.Types[gc.TUINT32]
		if n.Type.Etype == gc.TINT64 {
			hi.Type = gc.Types[gc.TINT32]
		} else {
			hi.Type = gc.Types[gc.TUINT32]
		}
		hi.Xoffset += 4

	case gc.OLITERAL:
		var n1 gc.Node
		n.Convconst(&n1, n.Type)
		i := n1.Int()
		gc.Nodconst(lo, gc.Types[gc.TUINT32], int64(uint32(i)))
		i >>= 32
		if n.Type.Etype == gc.TINT64 {
			gc.Nodconst(hi, gc.Types[gc.TINT32], int64(int32(i)))
		} else {
			gc.Nodconst(hi, gc.Types[gc.TUINT32], int64(uint32(i)))
		}
	}
}
コード例 #6
0
ファイル: gsubr.go プロジェクト: ckeyer/gosrc
func dotaddable(n *gc.Node, n1 *gc.Node) bool {
	if n.Op != gc.ODOT {
		return false
	}

	var oary [10]int64
	var nn *gc.Node
	o := gc.Dotoffset(n, oary[:], &nn)
	if nn != nil && nn.Addable && o == 1 && oary[0] >= 0 {
		*n1 = *nn
		n1.Type = n.Type
		n1.Xoffset += oary[0]
		return true
	}

	return false
}
コード例 #7
0
ファイル: ggen.go プロジェクト: ckeyer/gosrc
func restx(x *gc.Node, oldx *gc.Node) {
	if oldx.Op != 0 {
		x.Type = gc.Types[gc.TINT64]
		gc.SetReg(int(x.Reg), int(oldx.Etype))
		gmove(oldx, x)
		gc.Regfree(oldx)
	}
}
コード例 #8
0
ファイル: ggen.go プロジェクト: ckeyer/gosrc
func restx(x *gc.Node, oldx *gc.Node) {
	gc.Regfree(x)

	if oldx.Op != 0 {
		x.Type = gc.Types[gc.TINT32]
		gmove(oldx, x)
	}
}
コード例 #9
0
ファイル: ggen.go プロジェクト: ckeyer/gosrc
/*
 * register dr is one of the special ones (AX, CX, DI, SI, etc.).
 * we need to use it.  if it is already allocated as a temporary
 * (r > 1; can only happen if a routine like sgen passed a
 * special as cgen's res and then cgen used regalloc to reuse
 * it as its own temporary), then move it for now to another
 * register.  caller must call restx to move it back.
 * the move is not necessary if dr == res, because res is
 * known to be dead.
 */
func savex(dr int, x *gc.Node, oldx *gc.Node, res *gc.Node, t *gc.Type) {
	r := uint8(gc.GetReg(dr))

	// save current ax and dx if they are live
	// and not the destination
	*oldx = gc.Node{}

	gc.Nodreg(x, t, dr)
	if r > 1 && !gc.Samereg(x, res) {
		gc.Regalloc(oldx, gc.Types[gc.TINT64], nil)
		x.Type = gc.Types[gc.TINT64]
		gmove(x, oldx)
		x.Type = t
		// TODO(marvin): Fix Node.EType type union.
		oldx.Etype = gc.EType(r) // squirrel away old r value
		gc.SetReg(dr, 1)
	}
}
コード例 #10
0
ファイル: gsubr.go プロジェクト: ckeyer/gosrc
func bignodes() {
	if bignodes_did {
		return
	}
	bignodes_did = true

	gc.Nodconst(&zerof, gc.Types[gc.TINT64], 0)
	zerof.Convconst(&zerof, gc.Types[gc.TFLOAT64])

	var i big.Int
	i.SetInt64(1)
	i.Lsh(&i, 63)
	var bigi gc.Node

	gc.Nodconst(&bigi, gc.Types[gc.TUINT64], 0)
	bigi.SetBigInt(&i)
	bigi.Convconst(&two63f, gc.Types[gc.TFLOAT64])

	gc.Nodconst(&bigi, gc.Types[gc.TUINT64], 0)
	i.Lsh(&i, 1)
	bigi.SetBigInt(&i)
	bigi.Convconst(&two64f, gc.Types[gc.TFLOAT64])
}
コード例 #11
0
ファイル: ggen.go プロジェクト: ckeyer/gosrc
/*
 * generate division.
 * generates one of:
 *	res = nl / nr
 *	res = nl % nr
 * according to op.
 */
func dodiv(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) {
	// Have to be careful about handling
	// most negative int divided by -1 correctly.
	// The hardware will trap.
	// Also the byte divide instruction needs AH,
	// which we otherwise don't have to deal with.
	// Easiest way to avoid for int8, int16: use int32.
	// For int32 and int64, use explicit test.
	// Could use int64 hw for int32.
	t := nl.Type

	t0 := t
	check := false
	if gc.Issigned[t.Etype] {
		check = true
		if gc.Isconst(nl, gc.CTINT) && nl.Int() != -(1<<uint64(t.Width*8-1)) {
			check = false
		} else if gc.Isconst(nr, gc.CTINT) && nr.Int() != -1 {
			check = false
		}
	}

	if t.Width < 4 {
		if gc.Issigned[t.Etype] {
			t = gc.Types[gc.TINT32]
		} else {
			t = gc.Types[gc.TUINT32]
		}
		check = false
	}

	a := optoas(op, t)

	var n3 gc.Node
	gc.Regalloc(&n3, t0, nil)
	var ax gc.Node
	var oldax gc.Node
	if nl.Ullman >= nr.Ullman {
		savex(x86.REG_AX, &ax, &oldax, res, t0)
		gc.Cgen(nl, &ax)
		gc.Regalloc(&ax, t0, &ax) // mark ax live during cgen
		gc.Cgen(nr, &n3)
		gc.Regfree(&ax)
	} else {
		gc.Cgen(nr, &n3)
		savex(x86.REG_AX, &ax, &oldax, res, t0)
		gc.Cgen(nl, &ax)
	}

	if t != t0 {
		// Convert
		ax1 := ax

		n31 := n3
		ax.Type = t
		n3.Type = t
		gmove(&ax1, &ax)
		gmove(&n31, &n3)
	}

	var n4 gc.Node
	if gc.Nacl {
		// Native Client does not relay the divide-by-zero trap
		// to the executing program, so we must insert a check
		// for ourselves.
		gc.Nodconst(&n4, t, 0)

		gins(optoas(gc.OCMP, t), &n3, &n4)
		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
		if panicdiv == nil {
			panicdiv = gc.Sysfunc("panicdivide")
		}
		gc.Ginscall(panicdiv, -1)
		gc.Patch(p1, gc.Pc)
	}

	var p2 *obj.Prog
	if check {
		gc.Nodconst(&n4, t, -1)
		gins(optoas(gc.OCMP, t), &n3, &n4)
		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
		if op == gc.ODIV {
			// a / (-1) is -a.
			gins(optoas(gc.OMINUS, t), nil, &ax)

			gmove(&ax, res)
		} else {
			// a % (-1) is 0.
			gc.Nodconst(&n4, t, 0)

			gmove(&n4, res)
		}

		p2 = gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p1, gc.Pc)
	}

	var olddx gc.Node
	var dx gc.Node
	savex(x86.REG_DX, &dx, &olddx, res, t)
	if !gc.Issigned[t.Etype] {
		gc.Nodconst(&n4, t, 0)
		gmove(&n4, &dx)
	} else {
		gins(optoas(gc.OEXTEND, t), nil, nil)
	}
	gins(a, &n3, nil)
	gc.Regfree(&n3)
	if op == gc.ODIV {
		gmove(&ax, res)
	} else {
		gmove(&dx, res)
	}
	restx(&dx, &olddx)
	if check {
		gc.Patch(p2, gc.Pc)
	}
	restx(&ax, &oldax)
}
コード例 #12
0
ファイル: gsubr.go プロジェクト: ckeyer/gosrc
/*
 * generate move:
 *	t = f
 * hard part is conversions.
 */
func gmove(f *gc.Node, t *gc.Node) {
	if gc.Debug['M'] != 0 {
		fmt.Printf("gmove %v -> %v\n", gc.Nconv(f, obj.FmtLong), gc.Nconv(t, obj.FmtLong))
	}

	ft := gc.Simsimtype(f.Type)
	tt := gc.Simsimtype(t.Type)
	cvt := t.Type

	if gc.Iscomplex[ft] || gc.Iscomplex[tt] {
		gc.Complexmove(f, t)
		return
	}

	// cannot have two memory operands
	var a int
	if gc.Ismem(f) && gc.Ismem(t) {
		goto hard
	}

	// convert constant to desired type
	if f.Op == gc.OLITERAL {
		var con gc.Node
		f.Convconst(&con, t.Type)
		f = &con
		ft = tt // so big switch will choose a simple mov

		// some constants can't move directly to memory.
		if gc.Ismem(t) {
			// float constants come from memory.
			if gc.Isfloat[tt] {
				goto hard
			}

			// 64-bit immediates are really 32-bit sign-extended
			// unless moving into a register.
			if gc.Isint[tt] {
				if i := con.Int(); int64(int32(i)) != i {
					goto hard
				}
			}
		}
	}

	// value -> value copy, only one memory operand.
	// figure out the instruction to use.
	// break out of switch for one-instruction gins.
	// goto rdst for "destination must be register".
	// goto hard for "convert to cvt type first".
	// otherwise handle and return.

	switch uint32(ft)<<16 | uint32(tt) {
	default:
		gc.Fatalf("gmove %v -> %v", gc.Tconv(f.Type, obj.FmtLong), gc.Tconv(t.Type, obj.FmtLong))

		/*
		 * integer copy and truncate
		 */
	case gc.TINT8<<16 | gc.TINT8, // same size
		gc.TINT8<<16 | gc.TUINT8,
		gc.TUINT8<<16 | gc.TINT8,
		gc.TUINT8<<16 | gc.TUINT8,
		gc.TINT16<<16 | gc.TINT8,
		// truncate
		gc.TUINT16<<16 | gc.TINT8,
		gc.TINT32<<16 | gc.TINT8,
		gc.TUINT32<<16 | gc.TINT8,
		gc.TINT64<<16 | gc.TINT8,
		gc.TUINT64<<16 | gc.TINT8,
		gc.TINT16<<16 | gc.TUINT8,
		gc.TUINT16<<16 | gc.TUINT8,
		gc.TINT32<<16 | gc.TUINT8,
		gc.TUINT32<<16 | gc.TUINT8,
		gc.TINT64<<16 | gc.TUINT8,
		gc.TUINT64<<16 | gc.TUINT8:
		a = x86.AMOVB

	case gc.TINT16<<16 | gc.TINT16, // same size
		gc.TINT16<<16 | gc.TUINT16,
		gc.TUINT16<<16 | gc.TINT16,
		gc.TUINT16<<16 | gc.TUINT16,
		gc.TINT32<<16 | gc.TINT16,
		// truncate
		gc.TUINT32<<16 | gc.TINT16,
		gc.TINT64<<16 | gc.TINT16,
		gc.TUINT64<<16 | gc.TINT16,
		gc.TINT32<<16 | gc.TUINT16,
		gc.TUINT32<<16 | gc.TUINT16,
		gc.TINT64<<16 | gc.TUINT16,
		gc.TUINT64<<16 | gc.TUINT16:
		a = x86.AMOVW

	case gc.TINT32<<16 | gc.TINT32, // same size
		gc.TINT32<<16 | gc.TUINT32,
		gc.TUINT32<<16 | gc.TINT32,
		gc.TUINT32<<16 | gc.TUINT32:
		a = x86.AMOVL

	case gc.TINT64<<16 | gc.TINT32, // truncate
		gc.TUINT64<<16 | gc.TINT32,
		gc.TINT64<<16 | gc.TUINT32,
		gc.TUINT64<<16 | gc.TUINT32:
		a = x86.AMOVQL

	case gc.TINT64<<16 | gc.TINT64, // same size
		gc.TINT64<<16 | gc.TUINT64,
		gc.TUINT64<<16 | gc.TINT64,
		gc.TUINT64<<16 | gc.TUINT64:
		a = x86.AMOVQ

		/*
		 * integer up-conversions
		 */
	case gc.TINT8<<16 | gc.TINT16, // sign extend int8
		gc.TINT8<<16 | gc.TUINT16:
		a = x86.AMOVBWSX

		goto rdst

	case gc.TINT8<<16 | gc.TINT32,
		gc.TINT8<<16 | gc.TUINT32:
		a = x86.AMOVBLSX
		goto rdst

	case gc.TINT8<<16 | gc.TINT64,
		gc.TINT8<<16 | gc.TUINT64:
		a = x86.AMOVBQSX
		goto rdst

	case gc.TUINT8<<16 | gc.TINT16, // zero extend uint8
		gc.TUINT8<<16 | gc.TUINT16:
		a = x86.AMOVBWZX

		goto rdst

	case gc.TUINT8<<16 | gc.TINT32,
		gc.TUINT8<<16 | gc.TUINT32:
		a = x86.AMOVBLZX
		goto rdst

	case gc.TUINT8<<16 | gc.TINT64,
		gc.TUINT8<<16 | gc.TUINT64:
		a = x86.AMOVBQZX
		goto rdst

	case gc.TINT16<<16 | gc.TINT32, // sign extend int16
		gc.TINT16<<16 | gc.TUINT32:
		a = x86.AMOVWLSX

		goto rdst

	case gc.TINT16<<16 | gc.TINT64,
		gc.TINT16<<16 | gc.TUINT64:
		a = x86.AMOVWQSX
		goto rdst

	case gc.TUINT16<<16 | gc.TINT32, // zero extend uint16
		gc.TUINT16<<16 | gc.TUINT32:
		a = x86.AMOVWLZX

		goto rdst

	case gc.TUINT16<<16 | gc.TINT64,
		gc.TUINT16<<16 | gc.TUINT64:
		a = x86.AMOVWQZX
		goto rdst

	case gc.TINT32<<16 | gc.TINT64, // sign extend int32
		gc.TINT32<<16 | gc.TUINT64:
		a = x86.AMOVLQSX

		goto rdst

		// AMOVL into a register zeros the top of the register,
	// so this is not always necessary, but if we rely on AMOVL
	// the optimizer is almost certain to screw with us.
	case gc.TUINT32<<16 | gc.TINT64, // zero extend uint32
		gc.TUINT32<<16 | gc.TUINT64:
		a = x86.AMOVLQZX

		goto rdst

		/*
		* float to integer
		 */
	case gc.TFLOAT32<<16 | gc.TINT32:
		a = x86.ACVTTSS2SL

		goto rdst

	case gc.TFLOAT64<<16 | gc.TINT32:
		a = x86.ACVTTSD2SL
		goto rdst

	case gc.TFLOAT32<<16 | gc.TINT64:
		a = x86.ACVTTSS2SQ
		goto rdst

	case gc.TFLOAT64<<16 | gc.TINT64:
		a = x86.ACVTTSD2SQ
		goto rdst

		// convert via int32.
	case gc.TFLOAT32<<16 | gc.TINT16,
		gc.TFLOAT32<<16 | gc.TINT8,
		gc.TFLOAT32<<16 | gc.TUINT16,
		gc.TFLOAT32<<16 | gc.TUINT8,
		gc.TFLOAT64<<16 | gc.TINT16,
		gc.TFLOAT64<<16 | gc.TINT8,
		gc.TFLOAT64<<16 | gc.TUINT16,
		gc.TFLOAT64<<16 | gc.TUINT8:
		cvt = gc.Types[gc.TINT32]

		goto hard

		// convert via int64.
	case gc.TFLOAT32<<16 | gc.TUINT32,
		gc.TFLOAT64<<16 | gc.TUINT32:
		cvt = gc.Types[gc.TINT64]

		goto hard

		// algorithm is:
	//	if small enough, use native float64 -> int64 conversion.
	//	otherwise, subtract 2^63, convert, and add it back.
	case gc.TFLOAT32<<16 | gc.TUINT64,
		gc.TFLOAT64<<16 | gc.TUINT64:
		a := x86.ACVTTSS2SQ

		if ft == gc.TFLOAT64 {
			a = x86.ACVTTSD2SQ
		}
		bignodes()
		var r1 gc.Node
		gc.Regalloc(&r1, gc.Types[ft], nil)
		var r2 gc.Node
		gc.Regalloc(&r2, gc.Types[tt], t)
		var r3 gc.Node
		gc.Regalloc(&r3, gc.Types[ft], nil)
		var r4 gc.Node
		gc.Regalloc(&r4, gc.Types[tt], nil)
		gins(optoas(gc.OAS, f.Type), f, &r1)
		gins(optoas(gc.OCMP, f.Type), &bigf, &r1)
		p1 := gc.Gbranch(optoas(gc.OLE, f.Type), nil, +1)
		gins(a, &r1, &r2)
		p2 := gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p1, gc.Pc)
		gins(optoas(gc.OAS, f.Type), &bigf, &r3)
		gins(optoas(gc.OSUB, f.Type), &r3, &r1)
		gins(a, &r1, &r2)
		gins(x86.AMOVQ, &bigi, &r4)
		gins(x86.AXORQ, &r4, &r2)
		gc.Patch(p2, gc.Pc)
		gmove(&r2, t)
		gc.Regfree(&r4)
		gc.Regfree(&r3)
		gc.Regfree(&r2)
		gc.Regfree(&r1)
		return

		/*
		 * integer to float
		 */
	case gc.TINT32<<16 | gc.TFLOAT32:
		a = x86.ACVTSL2SS

		goto rdst

	case gc.TINT32<<16 | gc.TFLOAT64:
		a = x86.ACVTSL2SD
		goto rdst

	case gc.TINT64<<16 | gc.TFLOAT32:
		a = x86.ACVTSQ2SS
		goto rdst

	case gc.TINT64<<16 | gc.TFLOAT64:
		a = x86.ACVTSQ2SD
		goto rdst

		// convert via int32
	case gc.TINT16<<16 | gc.TFLOAT32,
		gc.TINT16<<16 | gc.TFLOAT64,
		gc.TINT8<<16 | gc.TFLOAT32,
		gc.TINT8<<16 | gc.TFLOAT64,
		gc.TUINT16<<16 | gc.TFLOAT32,
		gc.TUINT16<<16 | gc.TFLOAT64,
		gc.TUINT8<<16 | gc.TFLOAT32,
		gc.TUINT8<<16 | gc.TFLOAT64:
		cvt = gc.Types[gc.TINT32]

		goto hard

		// convert via int64.
	case gc.TUINT32<<16 | gc.TFLOAT32,
		gc.TUINT32<<16 | gc.TFLOAT64:
		cvt = gc.Types[gc.TINT64]

		goto hard

		// algorithm is:
	//	if small enough, use native int64 -> uint64 conversion.
	//	otherwise, halve (rounding to odd?), convert, and double.
	case gc.TUINT64<<16 | gc.TFLOAT32,
		gc.TUINT64<<16 | gc.TFLOAT64:
		a := x86.ACVTSQ2SS

		if tt == gc.TFLOAT64 {
			a = x86.ACVTSQ2SD
		}
		var zero gc.Node
		gc.Nodconst(&zero, gc.Types[gc.TUINT64], 0)
		var one gc.Node
		gc.Nodconst(&one, gc.Types[gc.TUINT64], 1)
		var r1 gc.Node
		gc.Regalloc(&r1, f.Type, f)
		var r2 gc.Node
		gc.Regalloc(&r2, t.Type, t)
		var r3 gc.Node
		gc.Regalloc(&r3, f.Type, nil)
		var r4 gc.Node
		gc.Regalloc(&r4, f.Type, nil)
		gmove(f, &r1)
		gins(x86.ACMPQ, &r1, &zero)
		p1 := gc.Gbranch(x86.AJLT, nil, +1)
		gins(a, &r1, &r2)
		p2 := gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p1, gc.Pc)
		gmove(&r1, &r3)
		gins(x86.ASHRQ, &one, &r3)
		gmove(&r1, &r4)
		gins(x86.AANDL, &one, &r4)
		gins(x86.AORQ, &r4, &r3)
		gins(a, &r3, &r2)
		gins(optoas(gc.OADD, t.Type), &r2, &r2)
		gc.Patch(p2, gc.Pc)
		gmove(&r2, t)
		gc.Regfree(&r4)
		gc.Regfree(&r3)
		gc.Regfree(&r2)
		gc.Regfree(&r1)
		return

		/*
		 * float to float
		 */
	case gc.TFLOAT32<<16 | gc.TFLOAT32:
		a = x86.AMOVSS

	case gc.TFLOAT64<<16 | gc.TFLOAT64:
		a = x86.AMOVSD

	case gc.TFLOAT32<<16 | gc.TFLOAT64:
		a = x86.ACVTSS2SD
		goto rdst

	case gc.TFLOAT64<<16 | gc.TFLOAT32:
		a = x86.ACVTSD2SS
		goto rdst
	}

	gins(a, f, t)
	return

	// requires register destination
rdst:
	{
		var r1 gc.Node
		gc.Regalloc(&r1, t.Type, t)

		gins(a, f, &r1)
		gmove(&r1, t)
		gc.Regfree(&r1)
		return
	}

	// requires register intermediate
hard:
	var r1 gc.Node
	gc.Regalloc(&r1, cvt, t)

	gmove(f, &r1)
	gmove(&r1, t)
	gc.Regfree(&r1)
	return
}
コード例 #13
0
ファイル: gsubr.go プロジェクト: ckeyer/gosrc
/*
 * generate code to compute address of n,
 * a reference to a (perhaps nested) field inside
 * an array or struct.
 * return 0 on failure, 1 on success.
 * on success, leaves usable address in a.
 *
 * caller is responsible for calling sudoclean
 * after successful sudoaddable,
 * to release the register used for a.
 */
func sudoaddable(as int, n *gc.Node, a *obj.Addr) bool {
	if n.Type == nil {
		return false
	}

	*a = obj.Addr{}

	switch n.Op {
	case gc.OLITERAL:
		if !gc.Isconst(n, gc.CTINT) {
			break
		}
		v := n.Int()
		if v >= 32000 || v <= -32000 {
			break
		}
		switch as {
		default:
			return false

		case x86.AADDB,
			x86.AADDW,
			x86.AADDL,
			x86.AADDQ,
			x86.ASUBB,
			x86.ASUBW,
			x86.ASUBL,
			x86.ASUBQ,
			x86.AANDB,
			x86.AANDW,
			x86.AANDL,
			x86.AANDQ,
			x86.AORB,
			x86.AORW,
			x86.AORL,
			x86.AORQ,
			x86.AXORB,
			x86.AXORW,
			x86.AXORL,
			x86.AXORQ,
			x86.AINCB,
			x86.AINCW,
			x86.AINCL,
			x86.AINCQ,
			x86.ADECB,
			x86.ADECW,
			x86.ADECL,
			x86.ADECQ,
			x86.AMOVB,
			x86.AMOVW,
			x86.AMOVL,
			x86.AMOVQ:
			break
		}

		cleani += 2
		reg := &clean[cleani-1]
		reg1 := &clean[cleani-2]
		reg.Op = gc.OEMPTY
		reg1.Op = gc.OEMPTY
		gc.Naddr(a, n)
		return true

	case gc.ODOT,
		gc.ODOTPTR:
		cleani += 2
		reg := &clean[cleani-1]
		reg1 := &clean[cleani-2]
		reg.Op = gc.OEMPTY
		reg1.Op = gc.OEMPTY
		var nn *gc.Node
		var oary [10]int64
		o := gc.Dotoffset(n, oary[:], &nn)
		if nn == nil {
			sudoclean()
			return false
		}

		if nn.Addable && o == 1 && oary[0] >= 0 {
			// directly addressable set of DOTs
			n1 := *nn

			n1.Type = n.Type
			n1.Xoffset += oary[0]
			gc.Naddr(a, &n1)
			return true
		}

		gc.Regalloc(reg, gc.Types[gc.Tptr], nil)
		n1 := *reg
		n1.Op = gc.OINDREG
		if oary[0] >= 0 {
			gc.Agen(nn, reg)
			n1.Xoffset = oary[0]
		} else {
			gc.Cgen(nn, reg)
			gc.Cgen_checknil(reg)
			n1.Xoffset = -(oary[0] + 1)
		}

		for i := 1; i < o; i++ {
			if oary[i] >= 0 {
				gc.Fatalf("can't happen")
			}
			gins(movptr, &n1, reg)
			gc.Cgen_checknil(reg)
			n1.Xoffset = -(oary[i] + 1)
		}

		a.Type = obj.TYPE_NONE
		a.Index = obj.TYPE_NONE
		gc.Fixlargeoffset(&n1)
		gc.Naddr(a, &n1)
		return true

	case gc.OINDEX:
		return false
	}

	return false
}
コード例 #14
0
ファイル: cgen.go プロジェクト: ckeyer/gosrc
func blockcopy(n, res *gc.Node, osrc, odst, w int64) {
	// determine alignment.
	// want to avoid unaligned access, so have to use
	// smaller operations for less aligned types.
	// for example moving [4]byte must use 4 MOVB not 1 MOVW.
	align := int(n.Type.Align)

	var op int
	switch align {
	default:
		gc.Fatalf("sgen: invalid alignment %d for %v", align, n.Type)

	case 1:
		op = arm.AMOVB

	case 2:
		op = arm.AMOVH

	case 4:
		op = arm.AMOVW
	}

	if w%int64(align) != 0 {
		gc.Fatalf("sgen: unaligned size %d (align=%d) for %v", w, align, n.Type)
	}
	c := int32(w / int64(align))

	if osrc%int64(align) != 0 || odst%int64(align) != 0 {
		gc.Fatalf("sgen: unaligned offset src %d or dst %d (align %d)", osrc, odst, align)
	}

	// if we are copying forward on the stack and
	// the src and dst overlap, then reverse direction
	dir := align
	if osrc < odst && int64(odst) < int64(osrc)+w {
		dir = -dir
	}

	if op == arm.AMOVW && !gc.Nacl && dir > 0 && c >= 4 && c <= 128 {
		var r0 gc.Node
		r0.Op = gc.OREGISTER
		r0.Reg = arm.REG_R0
		var r1 gc.Node
		r1.Op = gc.OREGISTER
		r1.Reg = arm.REG_R0 + 1
		var r2 gc.Node
		r2.Op = gc.OREGISTER
		r2.Reg = arm.REG_R0 + 2

		var src gc.Node
		gc.Regalloc(&src, gc.Types[gc.Tptr], &r1)
		var dst gc.Node
		gc.Regalloc(&dst, gc.Types[gc.Tptr], &r2)
		if n.Ullman >= res.Ullman {
			// eval n first
			gc.Agen(n, &src)

			if res.Op == gc.ONAME {
				gc.Gvardef(res)
			}
			gc.Agen(res, &dst)
		} else {
			// eval res first
			if res.Op == gc.ONAME {
				gc.Gvardef(res)
			}
			gc.Agen(res, &dst)
			gc.Agen(n, &src)
		}

		var tmp gc.Node
		gc.Regalloc(&tmp, gc.Types[gc.Tptr], &r0)
		f := gc.Sysfunc("duffcopy")
		p := gins(obj.ADUFFCOPY, nil, f)
		gc.Afunclit(&p.To, f)

		// 8 and 128 = magic constants: see ../../runtime/asm_arm.s
		p.To.Offset = 8 * (128 - int64(c))

		gc.Regfree(&tmp)
		gc.Regfree(&src)
		gc.Regfree(&dst)
		return
	}

	var dst gc.Node
	var src gc.Node
	if n.Ullman >= res.Ullman {
		gc.Agenr(n, &dst, res) // temporarily use dst
		gc.Regalloc(&src, gc.Types[gc.Tptr], nil)
		gins(arm.AMOVW, &dst, &src)
		if res.Op == gc.ONAME {
			gc.Gvardef(res)
		}
		gc.Agen(res, &dst)
	} else {
		if res.Op == gc.ONAME {
			gc.Gvardef(res)
		}
		gc.Agenr(res, &dst, res)
		gc.Agenr(n, &src, nil)
	}

	var tmp gc.Node
	gc.Regalloc(&tmp, gc.Types[gc.TUINT32], nil)

	// set up end marker
	var nend gc.Node

	if c >= 4 {
		gc.Regalloc(&nend, gc.Types[gc.TUINT32], nil)

		p := gins(arm.AMOVW, &src, &nend)
		p.From.Type = obj.TYPE_ADDR
		if dir < 0 {
			p.From.Offset = int64(dir)
		} else {
			p.From.Offset = w
		}
	}

	// move src and dest to the end of block if necessary
	if dir < 0 {
		p := gins(arm.AMOVW, &src, &src)
		p.From.Type = obj.TYPE_ADDR
		p.From.Offset = w + int64(dir)

		p = gins(arm.AMOVW, &dst, &dst)
		p.From.Type = obj.TYPE_ADDR
		p.From.Offset = w + int64(dir)
	}

	// move
	if c >= 4 {
		p := gins(op, &src, &tmp)
		p.From.Type = obj.TYPE_MEM
		p.From.Offset = int64(dir)
		p.Scond |= arm.C_PBIT
		ploop := p

		p = gins(op, &tmp, &dst)
		p.To.Type = obj.TYPE_MEM
		p.To.Offset = int64(dir)
		p.Scond |= arm.C_PBIT

		p = gins(arm.ACMP, &src, nil)
		raddr(&nend, p)

		gc.Patch(gc.Gbranch(arm.ABNE, nil, 0), ploop)
		gc.Regfree(&nend)
	} else {
		var p *obj.Prog
		for ; c > 0; c-- {
			p = gins(op, &src, &tmp)
			p.From.Type = obj.TYPE_MEM
			p.From.Offset = int64(dir)
			p.Scond |= arm.C_PBIT

			p = gins(op, &tmp, &dst)
			p.To.Type = obj.TYPE_MEM
			p.To.Offset = int64(dir)
			p.Scond |= arm.C_PBIT
		}
	}

	gc.Regfree(&dst)
	gc.Regfree(&src)
	gc.Regfree(&tmp)
}
コード例 #15
0
ファイル: cgen64.go プロジェクト: ckeyer/gosrc
/*
 * attempt to generate 64-bit
 *	res = n
 * return 1 on success, 0 if op not handled.
 */
func cgen64(n *gc.Node, res *gc.Node) {
	if res.Op != gc.OINDREG && res.Op != gc.ONAME {
		gc.Dump("n", n)
		gc.Dump("res", res)
		gc.Fatalf("cgen64 %v of %v", gc.Oconv(int(n.Op), 0), gc.Oconv(int(res.Op), 0))
	}

	switch n.Op {
	default:
		gc.Fatalf("cgen64 %v", gc.Oconv(int(n.Op), 0))

	case gc.OMINUS:
		gc.Cgen(n.Left, res)
		var hi1 gc.Node
		var lo1 gc.Node
		split64(res, &lo1, &hi1)
		gins(x86.ANEGL, nil, &lo1)
		gins(x86.AADCL, ncon(0), &hi1)
		gins(x86.ANEGL, nil, &hi1)
		splitclean()
		return

	case gc.OCOM:
		gc.Cgen(n.Left, res)
		var lo1 gc.Node
		var hi1 gc.Node
		split64(res, &lo1, &hi1)
		gins(x86.ANOTL, nil, &lo1)
		gins(x86.ANOTL, nil, &hi1)
		splitclean()
		return

		// binary operators.
	// common setup below.
	case gc.OADD,
		gc.OSUB,
		gc.OMUL,
		gc.OLROT,
		gc.OLSH,
		gc.ORSH,
		gc.OAND,
		gc.OOR,
		gc.OXOR:
		break
	}

	l := n.Left
	r := n.Right
	if !l.Addable {
		var t1 gc.Node
		gc.Tempname(&t1, l.Type)
		gc.Cgen(l, &t1)
		l = &t1
	}

	if r != nil && !r.Addable {
		var t2 gc.Node
		gc.Tempname(&t2, r.Type)
		gc.Cgen(r, &t2)
		r = &t2
	}

	var ax gc.Node
	gc.Nodreg(&ax, gc.Types[gc.TINT32], x86.REG_AX)
	var cx gc.Node
	gc.Nodreg(&cx, gc.Types[gc.TINT32], x86.REG_CX)
	var dx gc.Node
	gc.Nodreg(&dx, gc.Types[gc.TINT32], x86.REG_DX)

	// Setup for binary operation.
	var hi1 gc.Node
	var lo1 gc.Node
	split64(l, &lo1, &hi1)

	var lo2 gc.Node
	var hi2 gc.Node
	if gc.Is64(r.Type) {
		split64(r, &lo2, &hi2)
	}

	// Do op.  Leave result in DX:AX.
	switch n.Op {
	// TODO: Constants
	case gc.OADD:
		gins(x86.AMOVL, &lo1, &ax)

		gins(x86.AMOVL, &hi1, &dx)
		gins(x86.AADDL, &lo2, &ax)
		gins(x86.AADCL, &hi2, &dx)

		// TODO: Constants.
	case gc.OSUB:
		gins(x86.AMOVL, &lo1, &ax)

		gins(x86.AMOVL, &hi1, &dx)
		gins(x86.ASUBL, &lo2, &ax)
		gins(x86.ASBBL, &hi2, &dx)

	case gc.OMUL:
		// let's call the next three EX, FX and GX
		var ex, fx, gx gc.Node
		gc.Regalloc(&ex, gc.Types[gc.TPTR32], nil)
		gc.Regalloc(&fx, gc.Types[gc.TPTR32], nil)
		gc.Regalloc(&gx, gc.Types[gc.TPTR32], nil)

		// load args into DX:AX and EX:GX.
		gins(x86.AMOVL, &lo1, &ax)

		gins(x86.AMOVL, &hi1, &dx)
		gins(x86.AMOVL, &lo2, &gx)
		gins(x86.AMOVL, &hi2, &ex)

		// if DX and EX are zero, use 32 x 32 -> 64 unsigned multiply.
		gins(x86.AMOVL, &dx, &fx)

		gins(x86.AORL, &ex, &fx)
		p1 := gc.Gbranch(x86.AJNE, nil, 0)
		gins(x86.AMULL, &gx, nil) // implicit &ax
		p2 := gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p1, gc.Pc)

		// full 64x64 -> 64, from 32x32 -> 64.
		gins(x86.AIMULL, &gx, &dx)

		gins(x86.AMOVL, &ax, &fx)
		gins(x86.AIMULL, &ex, &fx)
		gins(x86.AADDL, &dx, &fx)
		gins(x86.AMOVL, &gx, &dx)
		gins(x86.AMULL, &dx, nil) // implicit &ax
		gins(x86.AADDL, &fx, &dx)
		gc.Patch(p2, gc.Pc)

		gc.Regfree(&ex)
		gc.Regfree(&fx)
		gc.Regfree(&gx)

	// We only rotate by a constant c in [0,64).
	// if c >= 32:
	//	lo, hi = hi, lo
	//	c -= 32
	// if c == 0:
	//	no-op
	// else:
	//	t = hi
	//	shld hi:lo, c
	//	shld lo:t, c
	case gc.OLROT:
		v := uint64(r.Int())

		if v >= 32 {
			// reverse during load to do the first 32 bits of rotate
			v -= 32

			gins(x86.AMOVL, &lo1, &dx)
			gins(x86.AMOVL, &hi1, &ax)
		} else {
			gins(x86.AMOVL, &lo1, &ax)
			gins(x86.AMOVL, &hi1, &dx)
		}

		if v == 0 {
		} else // done
		{
			gins(x86.AMOVL, &dx, &cx)
			p1 := gins(x86.ASHLL, ncon(uint32(v)), &dx)
			p1.From.Index = x86.REG_AX // double-width shift
			p1.From.Scale = 0
			p1 = gins(x86.ASHLL, ncon(uint32(v)), &ax)
			p1.From.Index = x86.REG_CX // double-width shift
			p1.From.Scale = 0
		}

	case gc.OLSH:
		if r.Op == gc.OLITERAL {
			v := uint64(r.Int())
			if v >= 64 {
				if gc.Is64(r.Type) {
					splitclean()
				}
				splitclean()
				split64(res, &lo2, &hi2)
				gins(x86.AMOVL, ncon(0), &lo2)
				gins(x86.AMOVL, ncon(0), &hi2)
				splitclean()
				return
			}

			if v >= 32 {
				if gc.Is64(r.Type) {
					splitclean()
				}
				split64(res, &lo2, &hi2)
				gmove(&lo1, &hi2)
				if v > 32 {
					gins(x86.ASHLL, ncon(uint32(v-32)), &hi2)
				}

				gins(x86.AMOVL, ncon(0), &lo2)
				splitclean()
				splitclean()
				return
			}

			// general shift
			gins(x86.AMOVL, &lo1, &ax)

			gins(x86.AMOVL, &hi1, &dx)
			p1 := gins(x86.ASHLL, ncon(uint32(v)), &dx)
			p1.From.Index = x86.REG_AX // double-width shift
			p1.From.Scale = 0
			gins(x86.ASHLL, ncon(uint32(v)), &ax)
			break
		}

		// load value into DX:AX.
		gins(x86.AMOVL, &lo1, &ax)

		gins(x86.AMOVL, &hi1, &dx)

		// load shift value into register.
		// if high bits are set, zero value.
		var p1 *obj.Prog

		if gc.Is64(r.Type) {
			gins(x86.ACMPL, &hi2, ncon(0))
			p1 = gc.Gbranch(x86.AJNE, nil, +1)
			gins(x86.AMOVL, &lo2, &cx)
		} else {
			cx.Type = gc.Types[gc.TUINT32]
			gmove(r, &cx)
		}

		// if shift count is >=64, zero value
		gins(x86.ACMPL, &cx, ncon(64))

		p2 := gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
		if p1 != nil {
			gc.Patch(p1, gc.Pc)
		}
		gins(x86.AXORL, &dx, &dx)
		gins(x86.AXORL, &ax, &ax)
		gc.Patch(p2, gc.Pc)

		// if shift count is >= 32, zero low.
		gins(x86.ACMPL, &cx, ncon(32))

		p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
		gins(x86.AMOVL, &ax, &dx)
		gins(x86.ASHLL, &cx, &dx) // SHLL only uses bottom 5 bits of count
		gins(x86.AXORL, &ax, &ax)
		p2 = gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p1, gc.Pc)

		// general shift
		p1 = gins(x86.ASHLL, &cx, &dx)

		p1.From.Index = x86.REG_AX // double-width shift
		p1.From.Scale = 0
		gins(x86.ASHLL, &cx, &ax)
		gc.Patch(p2, gc.Pc)

	case gc.ORSH:
		if r.Op == gc.OLITERAL {
			v := uint64(r.Int())
			if v >= 64 {
				if gc.Is64(r.Type) {
					splitclean()
				}
				splitclean()
				split64(res, &lo2, &hi2)
				if hi1.Type.Etype == gc.TINT32 {
					gmove(&hi1, &lo2)
					gins(x86.ASARL, ncon(31), &lo2)
					gmove(&hi1, &hi2)
					gins(x86.ASARL, ncon(31), &hi2)
				} else {
					gins(x86.AMOVL, ncon(0), &lo2)
					gins(x86.AMOVL, ncon(0), &hi2)
				}

				splitclean()
				return
			}

			if v >= 32 {
				if gc.Is64(r.Type) {
					splitclean()
				}
				split64(res, &lo2, &hi2)
				gmove(&hi1, &lo2)
				if v > 32 {
					gins(optoas(gc.ORSH, hi1.Type), ncon(uint32(v-32)), &lo2)
				}
				if hi1.Type.Etype == gc.TINT32 {
					gmove(&hi1, &hi2)
					gins(x86.ASARL, ncon(31), &hi2)
				} else {
					gins(x86.AMOVL, ncon(0), &hi2)
				}
				splitclean()
				splitclean()
				return
			}

			// general shift
			gins(x86.AMOVL, &lo1, &ax)

			gins(x86.AMOVL, &hi1, &dx)
			p1 := gins(x86.ASHRL, ncon(uint32(v)), &ax)
			p1.From.Index = x86.REG_DX // double-width shift
			p1.From.Scale = 0
			gins(optoas(gc.ORSH, hi1.Type), ncon(uint32(v)), &dx)
			break
		}

		// load value into DX:AX.
		gins(x86.AMOVL, &lo1, &ax)

		gins(x86.AMOVL, &hi1, &dx)

		// load shift value into register.
		// if high bits are set, zero value.
		var p1 *obj.Prog

		if gc.Is64(r.Type) {
			gins(x86.ACMPL, &hi2, ncon(0))
			p1 = gc.Gbranch(x86.AJNE, nil, +1)
			gins(x86.AMOVL, &lo2, &cx)
		} else {
			cx.Type = gc.Types[gc.TUINT32]
			gmove(r, &cx)
		}

		// if shift count is >=64, zero or sign-extend value
		gins(x86.ACMPL, &cx, ncon(64))

		p2 := gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
		if p1 != nil {
			gc.Patch(p1, gc.Pc)
		}
		if hi1.Type.Etype == gc.TINT32 {
			gins(x86.ASARL, ncon(31), &dx)
			gins(x86.AMOVL, &dx, &ax)
		} else {
			gins(x86.AXORL, &dx, &dx)
			gins(x86.AXORL, &ax, &ax)
		}

		gc.Patch(p2, gc.Pc)

		// if shift count is >= 32, sign-extend hi.
		gins(x86.ACMPL, &cx, ncon(32))

		p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
		gins(x86.AMOVL, &dx, &ax)
		if hi1.Type.Etype == gc.TINT32 {
			gins(x86.ASARL, &cx, &ax) // SARL only uses bottom 5 bits of count
			gins(x86.ASARL, ncon(31), &dx)
		} else {
			gins(x86.ASHRL, &cx, &ax)
			gins(x86.AXORL, &dx, &dx)
		}

		p2 = gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p1, gc.Pc)

		// general shift
		p1 = gins(x86.ASHRL, &cx, &ax)

		p1.From.Index = x86.REG_DX // double-width shift
		p1.From.Scale = 0
		gins(optoas(gc.ORSH, hi1.Type), &cx, &dx)
		gc.Patch(p2, gc.Pc)

		// make constant the right side (it usually is anyway).
	case gc.OXOR,
		gc.OAND,
		gc.OOR:
		if lo1.Op == gc.OLITERAL {
			nswap(&lo1, &lo2)
			nswap(&hi1, &hi2)
		}

		if lo2.Op == gc.OLITERAL {
			// special cases for constants.
			lv := uint32(lo2.Int())
			hv := uint32(hi2.Int())
			splitclean() // right side
			split64(res, &lo2, &hi2)
			switch n.Op {
			case gc.OXOR:
				gmove(&lo1, &lo2)
				gmove(&hi1, &hi2)
				switch lv {
				case 0:
					break

				case 0xffffffff:
					gins(x86.ANOTL, nil, &lo2)

				default:
					gins(x86.AXORL, ncon(lv), &lo2)
				}

				switch hv {
				case 0:
					break

				case 0xffffffff:
					gins(x86.ANOTL, nil, &hi2)

				default:
					gins(x86.AXORL, ncon(hv), &hi2)
				}

			case gc.OAND:
				switch lv {
				case 0:
					gins(x86.AMOVL, ncon(0), &lo2)

				default:
					gmove(&lo1, &lo2)
					if lv != 0xffffffff {
						gins(x86.AANDL, ncon(lv), &lo2)
					}
				}

				switch hv {
				case 0:
					gins(x86.AMOVL, ncon(0), &hi2)

				default:
					gmove(&hi1, &hi2)
					if hv != 0xffffffff {
						gins(x86.AANDL, ncon(hv), &hi2)
					}
				}

			case gc.OOR:
				switch lv {
				case 0:
					gmove(&lo1, &lo2)

				case 0xffffffff:
					gins(x86.AMOVL, ncon(0xffffffff), &lo2)

				default:
					gmove(&lo1, &lo2)
					gins(x86.AORL, ncon(lv), &lo2)
				}

				switch hv {
				case 0:
					gmove(&hi1, &hi2)

				case 0xffffffff:
					gins(x86.AMOVL, ncon(0xffffffff), &hi2)

				default:
					gmove(&hi1, &hi2)
					gins(x86.AORL, ncon(hv), &hi2)
				}
			}

			splitclean()
			splitclean()
			return
		}

		gins(x86.AMOVL, &lo1, &ax)
		gins(x86.AMOVL, &hi1, &dx)
		gins(optoas(n.Op, lo1.Type), &lo2, &ax)
		gins(optoas(n.Op, lo1.Type), &hi2, &dx)
	}

	if gc.Is64(r.Type) {
		splitclean()
	}
	splitclean()

	split64(res, &lo1, &hi1)
	gins(x86.AMOVL, &ax, &lo1)
	gins(x86.AMOVL, &dx, &hi1)
	splitclean()
}
コード例 #16
0
ファイル: ggen.go プロジェクト: ckeyer/gosrc
func clearfat_tail(n1 *gc.Node, b int64) {
	if b >= 16 {
		var vec_zero gc.Node
		gc.Regalloc(&vec_zero, gc.Types[gc.TFLOAT64], nil)
		gins(x86.AXORPS, &vec_zero, &vec_zero)

		for b >= 16 {
			gins(x86.AMOVUPS, &vec_zero, n1)
			n1.Xoffset += 16
			b -= 16
		}

		// MOVUPS X0, off(base) is a few bytes shorter than MOV 0, off(base)
		if b != 0 {
			n1.Xoffset -= 16 - b
			gins(x86.AMOVUPS, &vec_zero, n1)
		}

		gc.Regfree(&vec_zero)
		return
	}

	// Write sequence of MOV 0, off(base) instead of using STOSQ.
	// The hope is that although the code will be slightly longer,
	// the MOVs will have no dependencies and pipeline better
	// than the unrolled STOSQ loop.
	var z gc.Node
	gc.Nodconst(&z, gc.Types[gc.TUINT64], 0)
	if b >= 8 {
		n1.Type = z.Type
		gins(x86.AMOVQ, &z, n1)
		n1.Xoffset += 8
		b -= 8

		if b != 0 {
			n1.Xoffset -= 8 - b
			gins(x86.AMOVQ, &z, n1)
		}
		return
	}

	if b >= 4 {
		gc.Nodconst(&z, gc.Types[gc.TUINT32], 0)
		n1.Type = z.Type
		gins(x86.AMOVL, &z, n1)
		n1.Xoffset += 4
		b -= 4

		if b != 0 {
			n1.Xoffset -= 4 - b
			gins(x86.AMOVL, &z, n1)
		}
		return
	}

	if b >= 2 {
		gc.Nodconst(&z, gc.Types[gc.TUINT16], 0)
		n1.Type = z.Type
		gins(x86.AMOVW, &z, n1)
		n1.Xoffset += 2
		b -= 2
	}

	gc.Nodconst(&z, gc.Types[gc.TUINT8], 0)
	for b > 0 {
		n1.Type = z.Type
		gins(x86.AMOVB, &z, n1)
		n1.Xoffset++
		b--
	}

}
コード例 #17
0
ファイル: gsubr.go プロジェクト: ckeyer/gosrc
/*
 * generate move:
 *	t = f
 * hard part is conversions.
 */
func gmove(f *gc.Node, t *gc.Node) {
	if gc.Debug['M'] != 0 {
		fmt.Printf("gmove %v -> %v\n", gc.Nconv(f, obj.FmtLong), gc.Nconv(t, obj.FmtLong))
	}

	ft := int(gc.Simsimtype(f.Type))
	tt := int(gc.Simsimtype(t.Type))
	cvt := (*gc.Type)(t.Type)

	if gc.Iscomplex[ft] || gc.Iscomplex[tt] {
		gc.Complexmove(f, t)
		return
	}

	// cannot have two memory operands
	var r1 gc.Node
	var a int
	if gc.Ismem(f) && gc.Ismem(t) {
		goto hard
	}

	// convert constant to desired type
	if f.Op == gc.OLITERAL {
		var con gc.Node
		switch tt {
		default:
			f.Convconst(&con, t.Type)

		case gc.TINT32,
			gc.TINT16,
			gc.TINT8:
			var con gc.Node
			f.Convconst(&con, gc.Types[gc.TINT64])
			var r1 gc.Node
			gc.Regalloc(&r1, con.Type, t)
			gins(arm64.AMOVD, &con, &r1)
			gmove(&r1, t)
			gc.Regfree(&r1)
			return

		case gc.TUINT32,
			gc.TUINT16,
			gc.TUINT8:
			var con gc.Node
			f.Convconst(&con, gc.Types[gc.TUINT64])
			var r1 gc.Node
			gc.Regalloc(&r1, con.Type, t)
			gins(arm64.AMOVD, &con, &r1)
			gmove(&r1, t)
			gc.Regfree(&r1)
			return
		}

		f = &con
		ft = tt // so big switch will choose a simple mov

		// constants can't move directly to memory.
		if gc.Ismem(t) {
			goto hard
		}
	}

	// value -> value copy, first operand in memory.
	// any floating point operand requires register
	// src, so goto hard to copy to register first.
	if gc.Ismem(f) && ft != tt && (gc.Isfloat[ft] || gc.Isfloat[tt]) {
		cvt = gc.Types[ft]
		goto hard
	}

	// value -> value copy, only one memory operand.
	// figure out the instruction to use.
	// break out of switch for one-instruction gins.
	// goto rdst for "destination must be register".
	// goto hard for "convert to cvt type first".
	// otherwise handle and return.

	switch uint32(ft)<<16 | uint32(tt) {
	default:
		gc.Fatalf("gmove %v -> %v", gc.Tconv(f.Type, obj.FmtLong), gc.Tconv(t.Type, obj.FmtLong))

		/*
		 * integer copy and truncate
		 */
	case gc.TINT8<<16 | gc.TINT8, // same size
		gc.TUINT8<<16 | gc.TINT8,
		gc.TINT16<<16 | gc.TINT8,
		// truncate
		gc.TUINT16<<16 | gc.TINT8,
		gc.TINT32<<16 | gc.TINT8,
		gc.TUINT32<<16 | gc.TINT8,
		gc.TINT64<<16 | gc.TINT8,
		gc.TUINT64<<16 | gc.TINT8:
		a = arm64.AMOVB

	case gc.TINT8<<16 | gc.TUINT8, // same size
		gc.TUINT8<<16 | gc.TUINT8,
		gc.TINT16<<16 | gc.TUINT8,
		// truncate
		gc.TUINT16<<16 | gc.TUINT8,
		gc.TINT32<<16 | gc.TUINT8,
		gc.TUINT32<<16 | gc.TUINT8,
		gc.TINT64<<16 | gc.TUINT8,
		gc.TUINT64<<16 | gc.TUINT8:
		a = arm64.AMOVBU

	case gc.TINT16<<16 | gc.TINT16, // same size
		gc.TUINT16<<16 | gc.TINT16,
		gc.TINT32<<16 | gc.TINT16,
		// truncate
		gc.TUINT32<<16 | gc.TINT16,
		gc.TINT64<<16 | gc.TINT16,
		gc.TUINT64<<16 | gc.TINT16:
		a = arm64.AMOVH

	case gc.TINT16<<16 | gc.TUINT16, // same size
		gc.TUINT16<<16 | gc.TUINT16,
		gc.TINT32<<16 | gc.TUINT16,
		// truncate
		gc.TUINT32<<16 | gc.TUINT16,
		gc.TINT64<<16 | gc.TUINT16,
		gc.TUINT64<<16 | gc.TUINT16:
		a = arm64.AMOVHU

	case gc.TINT32<<16 | gc.TINT32, // same size
		gc.TUINT32<<16 | gc.TINT32,
		gc.TINT64<<16 | gc.TINT32,
		// truncate
		gc.TUINT64<<16 | gc.TINT32:
		a = arm64.AMOVW

	case gc.TINT32<<16 | gc.TUINT32, // same size
		gc.TUINT32<<16 | gc.TUINT32,
		gc.TINT64<<16 | gc.TUINT32,
		gc.TUINT64<<16 | gc.TUINT32:
		a = arm64.AMOVWU

	case gc.TINT64<<16 | gc.TINT64, // same size
		gc.TINT64<<16 | gc.TUINT64,
		gc.TUINT64<<16 | gc.TINT64,
		gc.TUINT64<<16 | gc.TUINT64:
		a = arm64.AMOVD

		/*
		 * integer up-conversions
		 */
	case gc.TINT8<<16 | gc.TINT16, // sign extend int8
		gc.TINT8<<16 | gc.TUINT16,
		gc.TINT8<<16 | gc.TINT32,
		gc.TINT8<<16 | gc.TUINT32,
		gc.TINT8<<16 | gc.TINT64,
		gc.TINT8<<16 | gc.TUINT64:
		a = arm64.AMOVB

		goto rdst

	case gc.TUINT8<<16 | gc.TINT16, // zero extend uint8
		gc.TUINT8<<16 | gc.TUINT16,
		gc.TUINT8<<16 | gc.TINT32,
		gc.TUINT8<<16 | gc.TUINT32,
		gc.TUINT8<<16 | gc.TINT64,
		gc.TUINT8<<16 | gc.TUINT64:
		a = arm64.AMOVBU

		goto rdst

	case gc.TINT16<<16 | gc.TINT32, // sign extend int16
		gc.TINT16<<16 | gc.TUINT32,
		gc.TINT16<<16 | gc.TINT64,
		gc.TINT16<<16 | gc.TUINT64:
		a = arm64.AMOVH

		goto rdst

	case gc.TUINT16<<16 | gc.TINT32, // zero extend uint16
		gc.TUINT16<<16 | gc.TUINT32,
		gc.TUINT16<<16 | gc.TINT64,
		gc.TUINT16<<16 | gc.TUINT64:
		a = arm64.AMOVHU

		goto rdst

	case gc.TINT32<<16 | gc.TINT64, // sign extend int32
		gc.TINT32<<16 | gc.TUINT64:
		a = arm64.AMOVW

		goto rdst

	case gc.TUINT32<<16 | gc.TINT64, // zero extend uint32
		gc.TUINT32<<16 | gc.TUINT64:
		a = arm64.AMOVWU

		goto rdst

	/*
	* float to integer
	 */
	case gc.TFLOAT32<<16 | gc.TINT32:
		a = arm64.AFCVTZSSW
		goto rdst

	case gc.TFLOAT64<<16 | gc.TINT32:
		a = arm64.AFCVTZSDW
		goto rdst

	case gc.TFLOAT32<<16 | gc.TINT64:
		a = arm64.AFCVTZSS
		goto rdst

	case gc.TFLOAT64<<16 | gc.TINT64:
		a = arm64.AFCVTZSD
		goto rdst

	case gc.TFLOAT32<<16 | gc.TUINT32:
		a = arm64.AFCVTZUSW
		goto rdst

	case gc.TFLOAT64<<16 | gc.TUINT32:
		a = arm64.AFCVTZUDW
		goto rdst

	case gc.TFLOAT32<<16 | gc.TUINT64:
		a = arm64.AFCVTZUS
		goto rdst

	case gc.TFLOAT64<<16 | gc.TUINT64:
		a = arm64.AFCVTZUD
		goto rdst

	case gc.TFLOAT32<<16 | gc.TINT16,
		gc.TFLOAT32<<16 | gc.TINT8,
		gc.TFLOAT64<<16 | gc.TINT16,
		gc.TFLOAT64<<16 | gc.TINT8:
		cvt = gc.Types[gc.TINT32]

		goto hard

	case gc.TFLOAT32<<16 | gc.TUINT16,
		gc.TFLOAT32<<16 | gc.TUINT8,
		gc.TFLOAT64<<16 | gc.TUINT16,
		gc.TFLOAT64<<16 | gc.TUINT8:
		cvt = gc.Types[gc.TUINT32]

		goto hard

	/*
	 * integer to float
	 */
	case gc.TINT8<<16 | gc.TFLOAT32,
		gc.TINT16<<16 | gc.TFLOAT32,
		gc.TINT32<<16 | gc.TFLOAT32:
		a = arm64.ASCVTFWS

		goto rdst

	case gc.TINT8<<16 | gc.TFLOAT64,
		gc.TINT16<<16 | gc.TFLOAT64,
		gc.TINT32<<16 | gc.TFLOAT64:
		a = arm64.ASCVTFWD

		goto rdst

	case gc.TINT64<<16 | gc.TFLOAT32:
		a = arm64.ASCVTFS
		goto rdst

	case gc.TINT64<<16 | gc.TFLOAT64:
		a = arm64.ASCVTFD
		goto rdst

	case gc.TUINT8<<16 | gc.TFLOAT32,
		gc.TUINT16<<16 | gc.TFLOAT32,
		gc.TUINT32<<16 | gc.TFLOAT32:
		a = arm64.AUCVTFWS

		goto rdst

	case gc.TUINT8<<16 | gc.TFLOAT64,
		gc.TUINT16<<16 | gc.TFLOAT64,
		gc.TUINT32<<16 | gc.TFLOAT64:
		a = arm64.AUCVTFWD

		goto rdst

	case gc.TUINT64<<16 | gc.TFLOAT32:
		a = arm64.AUCVTFS
		goto rdst

	case gc.TUINT64<<16 | gc.TFLOAT64:
		a = arm64.AUCVTFD
		goto rdst

		/*
		 * float to float
		 */
	case gc.TFLOAT32<<16 | gc.TFLOAT32:
		a = arm64.AFMOVS

	case gc.TFLOAT64<<16 | gc.TFLOAT64:
		a = arm64.AFMOVD

	case gc.TFLOAT32<<16 | gc.TFLOAT64:
		a = arm64.AFCVTSD
		goto rdst

	case gc.TFLOAT64<<16 | gc.TFLOAT32:
		a = arm64.AFCVTDS
		goto rdst
	}

	gins(a, f, t)
	return

	// requires register destination
rdst:
	gc.Regalloc(&r1, t.Type, t)

	gins(a, f, &r1)
	gmove(&r1, t)
	gc.Regfree(&r1)
	return

	// requires register intermediate
hard:
	gc.Regalloc(&r1, cvt, t)

	gmove(f, &r1)
	gmove(&r1, t)
	gc.Regfree(&r1)
	return
}
コード例 #18
0
ファイル: ggen.go プロジェクト: ckeyer/gosrc
/*
 * generate division.
 * generates one of:
 *	res = nl / nr
 *	res = nl % nr
 * according to op.
 */
func dodiv(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) {
	// Have to be careful about handling
	// most negative int divided by -1 correctly.
	// The hardware will generate undefined result.
	// Also need to explicitly trap on division on zero,
	// the hardware will silently generate undefined result.
	// DIVW will leave unpredicable result in higher 32-bit,
	// so always use DIVD/DIVDU.
	t := nl.Type

	t0 := t
	check := false
	if gc.Issigned[t.Etype] {
		check = true
		if gc.Isconst(nl, gc.CTINT) && nl.Int() != -(1<<uint64(t.Width*8-1)) {
			check = false
		} else if gc.Isconst(nr, gc.CTINT) && nr.Int() != -1 {
			check = false
		}
	}

	if t.Width < 8 {
		if gc.Issigned[t.Etype] {
			t = gc.Types[gc.TINT64]
		} else {
			t = gc.Types[gc.TUINT64]
		}
		check = false
	}

	a := optoas(gc.ODIV, t)

	var tl gc.Node
	gc.Regalloc(&tl, t0, nil)
	var tr gc.Node
	gc.Regalloc(&tr, t0, nil)
	if nl.Ullman >= nr.Ullman {
		gc.Cgen(nl, &tl)
		gc.Cgen(nr, &tr)
	} else {
		gc.Cgen(nr, &tr)
		gc.Cgen(nl, &tl)
	}

	if t != t0 {
		// Convert
		tl2 := tl

		tr2 := tr
		tl.Type = t
		tr.Type = t
		gmove(&tl2, &tl)
		gmove(&tr2, &tr)
	}

	// Handle divide-by-zero panic.
	p1 := gins(optoas(gc.OCMP, t), &tr, nil)
	p1.Reg = arm64.REGZERO
	p1 = gc.Gbranch(optoas(gc.ONE, t), nil, +1)
	if panicdiv == nil {
		panicdiv = gc.Sysfunc("panicdivide")
	}
	gc.Ginscall(panicdiv, -1)
	gc.Patch(p1, gc.Pc)

	var p2 *obj.Prog
	if check {
		var nm1 gc.Node
		gc.Nodconst(&nm1, t, -1)
		gcmp(optoas(gc.OCMP, t), &tr, &nm1)
		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
		if op == gc.ODIV {
			// a / (-1) is -a.
			gins(optoas(gc.OMINUS, t), &tl, &tl)

			gmove(&tl, res)
		} else {
			// a % (-1) is 0.
			var nz gc.Node
			gc.Nodconst(&nz, t, 0)

			gmove(&nz, res)
		}

		p2 = gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p1, gc.Pc)
	}

	p1 = gins(a, &tr, &tl)
	if op == gc.ODIV {
		gc.Regfree(&tr)
		gmove(&tl, res)
	} else {
		// A%B = A-(A/B*B)
		var tm gc.Node
		gc.Regalloc(&tm, t, nil)

		// patch div to use the 3 register form
		// TODO(minux): add gins3?
		p1.Reg = p1.To.Reg

		p1.To.Reg = tm.Reg
		gins(optoas(gc.OMUL, t), &tr, &tm)
		gc.Regfree(&tr)
		gins(optoas(gc.OSUB, t), &tm, &tl)
		gc.Regfree(&tm)
		gmove(&tl, res)
	}

	gc.Regfree(&tl)
	if check {
		gc.Patch(p2, gc.Pc)
	}
}
コード例 #19
0
ファイル: gsubr.go プロジェクト: ckeyer/gosrc
func gmove(f *gc.Node, t *gc.Node) {
	if gc.Debug['M'] != 0 {
		fmt.Printf("gmove %v -> %v\n", f, t)
	}

	ft := gc.Simsimtype(f.Type)
	tt := gc.Simsimtype(t.Type)
	cvt := t.Type

	if gc.Iscomplex[ft] || gc.Iscomplex[tt] {
		gc.Complexmove(f, t)
		return
	}

	if gc.Isfloat[ft] || gc.Isfloat[tt] {
		floatmove(f, t)
		return
	}

	// cannot have two integer memory operands;
	// except 64-bit, which always copies via registers anyway.
	var r1 gc.Node
	var a int
	if gc.Isint[ft] && gc.Isint[tt] && !gc.Is64(f.Type) && !gc.Is64(t.Type) && gc.Ismem(f) && gc.Ismem(t) {
		goto hard
	}

	// convert constant to desired type
	if f.Op == gc.OLITERAL {
		var con gc.Node
		f.Convconst(&con, t.Type)
		f = &con
		ft = gc.Simsimtype(con.Type)
	}

	// value -> value copy, only one memory operand.
	// figure out the instruction to use.
	// break out of switch for one-instruction gins.
	// goto rdst for "destination must be register".
	// goto hard for "convert to cvt type first".
	// otherwise handle and return.

	switch uint32(ft)<<16 | uint32(tt) {
	default:
		// should not happen
		gc.Fatalf("gmove %v -> %v", f, t)
		return

		/*
		 * integer copy and truncate
		 */
	case gc.TINT8<<16 | gc.TINT8, // same size
		gc.TINT8<<16 | gc.TUINT8,
		gc.TUINT8<<16 | gc.TINT8,
		gc.TUINT8<<16 | gc.TUINT8:
		a = x86.AMOVB

	case gc.TINT16<<16 | gc.TINT8, // truncate
		gc.TUINT16<<16 | gc.TINT8,
		gc.TINT32<<16 | gc.TINT8,
		gc.TUINT32<<16 | gc.TINT8,
		gc.TINT16<<16 | gc.TUINT8,
		gc.TUINT16<<16 | gc.TUINT8,
		gc.TINT32<<16 | gc.TUINT8,
		gc.TUINT32<<16 | gc.TUINT8:
		a = x86.AMOVB

		goto rsrc

	case gc.TINT64<<16 | gc.TINT8, // truncate low word
		gc.TUINT64<<16 | gc.TINT8,
		gc.TINT64<<16 | gc.TUINT8,
		gc.TUINT64<<16 | gc.TUINT8:
		var flo gc.Node
		var fhi gc.Node
		split64(f, &flo, &fhi)

		var r1 gc.Node
		gc.Nodreg(&r1, t.Type, x86.REG_AX)
		gmove(&flo, &r1)
		gins(x86.AMOVB, &r1, t)
		splitclean()
		return

	case gc.TINT16<<16 | gc.TINT16, // same size
		gc.TINT16<<16 | gc.TUINT16,
		gc.TUINT16<<16 | gc.TINT16,
		gc.TUINT16<<16 | gc.TUINT16:
		a = x86.AMOVW

	case gc.TINT32<<16 | gc.TINT16, // truncate
		gc.TUINT32<<16 | gc.TINT16,
		gc.TINT32<<16 | gc.TUINT16,
		gc.TUINT32<<16 | gc.TUINT16:
		a = x86.AMOVW

		goto rsrc

	case gc.TINT64<<16 | gc.TINT16, // truncate low word
		gc.TUINT64<<16 | gc.TINT16,
		gc.TINT64<<16 | gc.TUINT16,
		gc.TUINT64<<16 | gc.TUINT16:
		var flo gc.Node
		var fhi gc.Node
		split64(f, &flo, &fhi)

		var r1 gc.Node
		gc.Nodreg(&r1, t.Type, x86.REG_AX)
		gmove(&flo, &r1)
		gins(x86.AMOVW, &r1, t)
		splitclean()
		return

	case gc.TINT32<<16 | gc.TINT32, // same size
		gc.TINT32<<16 | gc.TUINT32,
		gc.TUINT32<<16 | gc.TINT32,
		gc.TUINT32<<16 | gc.TUINT32:
		a = x86.AMOVL

	case gc.TINT64<<16 | gc.TINT32, // truncate
		gc.TUINT64<<16 | gc.TINT32,
		gc.TINT64<<16 | gc.TUINT32,
		gc.TUINT64<<16 | gc.TUINT32:
		var fhi gc.Node
		var flo gc.Node
		split64(f, &flo, &fhi)

		var r1 gc.Node
		gc.Nodreg(&r1, t.Type, x86.REG_AX)
		gmove(&flo, &r1)
		gins(x86.AMOVL, &r1, t)
		splitclean()
		return

	case gc.TINT64<<16 | gc.TINT64, // same size
		gc.TINT64<<16 | gc.TUINT64,
		gc.TUINT64<<16 | gc.TINT64,
		gc.TUINT64<<16 | gc.TUINT64:
		var fhi gc.Node
		var flo gc.Node
		split64(f, &flo, &fhi)

		var tlo gc.Node
		var thi gc.Node
		split64(t, &tlo, &thi)
		if f.Op == gc.OLITERAL {
			gins(x86.AMOVL, &flo, &tlo)
			gins(x86.AMOVL, &fhi, &thi)
		} else {
			// Implementation of conversion-free x = y for int64 or uint64 x.
			// This is generated by the code that copies small values out of closures,
			// and that code has DX live, so avoid DX and just use AX twice.
			var r1 gc.Node
			gc.Nodreg(&r1, gc.Types[gc.TUINT32], x86.REG_AX)
			gins(x86.AMOVL, &flo, &r1)
			gins(x86.AMOVL, &r1, &tlo)
			gins(x86.AMOVL, &fhi, &r1)
			gins(x86.AMOVL, &r1, &thi)
		}

		splitclean()
		splitclean()
		return

		/*
		 * integer up-conversions
		 */
	case gc.TINT8<<16 | gc.TINT16, // sign extend int8
		gc.TINT8<<16 | gc.TUINT16:
		a = x86.AMOVBWSX

		goto rdst

	case gc.TINT8<<16 | gc.TINT32,
		gc.TINT8<<16 | gc.TUINT32:
		a = x86.AMOVBLSX
		goto rdst

	case gc.TINT8<<16 | gc.TINT64, // convert via int32
		gc.TINT8<<16 | gc.TUINT64:
		cvt = gc.Types[gc.TINT32]

		goto hard

	case gc.TUINT8<<16 | gc.TINT16, // zero extend uint8
		gc.TUINT8<<16 | gc.TUINT16:
		a = x86.AMOVBWZX

		goto rdst

	case gc.TUINT8<<16 | gc.TINT32,
		gc.TUINT8<<16 | gc.TUINT32:
		a = x86.AMOVBLZX
		goto rdst

	case gc.TUINT8<<16 | gc.TINT64, // convert via uint32
		gc.TUINT8<<16 | gc.TUINT64:
		cvt = gc.Types[gc.TUINT32]

		goto hard

	case gc.TINT16<<16 | gc.TINT32, // sign extend int16
		gc.TINT16<<16 | gc.TUINT32:
		a = x86.AMOVWLSX

		goto rdst

	case gc.TINT16<<16 | gc.TINT64, // convert via int32
		gc.TINT16<<16 | gc.TUINT64:
		cvt = gc.Types[gc.TINT32]

		goto hard

	case gc.TUINT16<<16 | gc.TINT32, // zero extend uint16
		gc.TUINT16<<16 | gc.TUINT32:
		a = x86.AMOVWLZX

		goto rdst

	case gc.TUINT16<<16 | gc.TINT64, // convert via uint32
		gc.TUINT16<<16 | gc.TUINT64:
		cvt = gc.Types[gc.TUINT32]

		goto hard

	case gc.TINT32<<16 | gc.TINT64, // sign extend int32
		gc.TINT32<<16 | gc.TUINT64:
		var thi gc.Node
		var tlo gc.Node
		split64(t, &tlo, &thi)

		var flo gc.Node
		gc.Nodreg(&flo, tlo.Type, x86.REG_AX)
		var fhi gc.Node
		gc.Nodreg(&fhi, thi.Type, x86.REG_DX)
		gmove(f, &flo)
		gins(x86.ACDQ, nil, nil)
		gins(x86.AMOVL, &flo, &tlo)
		gins(x86.AMOVL, &fhi, &thi)
		splitclean()
		return

	case gc.TUINT32<<16 | gc.TINT64, // zero extend uint32
		gc.TUINT32<<16 | gc.TUINT64:
		var tlo gc.Node
		var thi gc.Node
		split64(t, &tlo, &thi)

		gmove(f, &tlo)
		gins(x86.AMOVL, ncon(0), &thi)
		splitclean()
		return
	}

	gins(a, f, t)
	return

	// requires register source
rsrc:
	gc.Regalloc(&r1, f.Type, t)

	gmove(f, &r1)
	gins(a, &r1, t)
	gc.Regfree(&r1)
	return

	// requires register destination
rdst:
	{
		gc.Regalloc(&r1, t.Type, t)

		gins(a, f, &r1)
		gmove(&r1, t)
		gc.Regfree(&r1)
		return
	}

	// requires register intermediate
hard:
	gc.Regalloc(&r1, cvt, t)

	gmove(f, &r1)
	gmove(&r1, t)
	gc.Regfree(&r1)
	return
}
コード例 #20
0
ファイル: gsubr.go プロジェクト: ckeyer/gosrc
func memname(n *gc.Node, t *gc.Type) {
	gc.Tempname(n, t)
	n.Sym = gc.Lookup("." + n.Sym.Name[1:]) // keep optimizer from registerizing
	n.Orig.Sym = n.Sym
}
コード例 #21
0
ファイル: gsubr.go プロジェクト: ckeyer/gosrc
func floatmove(f *gc.Node, t *gc.Node) {
	var r1 gc.Node

	ft := gc.Simsimtype(f.Type)
	tt := gc.Simsimtype(t.Type)
	cvt := t.Type

	// cannot have two floating point memory operands.
	if gc.Isfloat[ft] && gc.Isfloat[tt] && gc.Ismem(f) && gc.Ismem(t) {
		goto hard
	}

	// convert constant to desired type
	if f.Op == gc.OLITERAL {
		var con gc.Node
		f.Convconst(&con, t.Type)
		f = &con
		ft = gc.Simsimtype(con.Type)

		// some constants can't move directly to memory.
		if gc.Ismem(t) {
			// float constants come from memory.
			if gc.Isfloat[tt] {
				goto hard
			}
		}
	}

	// value -> value copy, only one memory operand.
	// figure out the instruction to use.
	// break out of switch for one-instruction gins.
	// goto rdst for "destination must be register".
	// goto hard for "convert to cvt type first".
	// otherwise handle and return.

	switch uint32(ft)<<16 | uint32(tt) {
	default:
		if gc.Thearch.Use387 {
			floatmove_387(f, t)
		} else {
			floatmove_sse(f, t)
		}
		return

		// float to very long integer.
	case gc.TFLOAT32<<16 | gc.TINT64,
		gc.TFLOAT64<<16 | gc.TINT64:
		if f.Op == gc.OREGISTER {
			cvt = f.Type
			goto hardmem
		}

		var r1 gc.Node
		gc.Nodreg(&r1, gc.Types[ft], x86.REG_F0)
		if ft == gc.TFLOAT32 {
			gins(x86.AFMOVF, f, &r1)
		} else {
			gins(x86.AFMOVD, f, &r1)
		}

		// set round to zero mode during conversion
		var t1 gc.Node
		memname(&t1, gc.Types[gc.TUINT16])

		var t2 gc.Node
		memname(&t2, gc.Types[gc.TUINT16])
		gins(x86.AFSTCW, nil, &t1)
		gins(x86.AMOVW, ncon(0xf7f), &t2)
		gins(x86.AFLDCW, &t2, nil)
		if tt == gc.TINT16 {
			gins(x86.AFMOVWP, &r1, t)
		} else if tt == gc.TINT32 {
			gins(x86.AFMOVLP, &r1, t)
		} else {
			gins(x86.AFMOVVP, &r1, t)
		}
		gins(x86.AFLDCW, &t1, nil)
		return

	case gc.TFLOAT32<<16 | gc.TUINT64,
		gc.TFLOAT64<<16 | gc.TUINT64:
		if !gc.Ismem(f) {
			cvt = f.Type
			goto hardmem
		}

		bignodes()
		var f0 gc.Node
		gc.Nodreg(&f0, gc.Types[ft], x86.REG_F0)
		var f1 gc.Node
		gc.Nodreg(&f1, gc.Types[ft], x86.REG_F0+1)
		var ax gc.Node
		gc.Nodreg(&ax, gc.Types[gc.TUINT16], x86.REG_AX)

		if ft == gc.TFLOAT32 {
			gins(x86.AFMOVF, f, &f0)
		} else {
			gins(x86.AFMOVD, f, &f0)
		}

		// if 0 > v { answer = 0 }
		gins(x86.AFMOVD, &zerof, &f0)
		gins(x86.AFUCOMP, &f0, &f1)
		gins(x86.AFSTSW, nil, &ax)
		gins(x86.ASAHF, nil, nil)
		p1 := gc.Gbranch(optoas(gc.OGT, gc.Types[tt]), nil, 0)

		// if 1<<64 <= v { answer = 0 too }
		gins(x86.AFMOVD, &two64f, &f0)

		gins(x86.AFUCOMP, &f0, &f1)
		gins(x86.AFSTSW, nil, &ax)
		gins(x86.ASAHF, nil, nil)
		p2 := gc.Gbranch(optoas(gc.OGT, gc.Types[tt]), nil, 0)
		gc.Patch(p1, gc.Pc)
		gins(x86.AFMOVVP, &f0, t) // don't care about t, but will pop the stack
		var thi gc.Node
		var tlo gc.Node
		split64(t, &tlo, &thi)
		gins(x86.AMOVL, ncon(0), &tlo)
		gins(x86.AMOVL, ncon(0), &thi)
		splitclean()
		p1 = gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p2, gc.Pc)

		// in range; algorithm is:
		//	if small enough, use native float64 -> int64 conversion.
		//	otherwise, subtract 2^63, convert, and add it back.

		// set round to zero mode during conversion
		var t1 gc.Node
		memname(&t1, gc.Types[gc.TUINT16])

		var t2 gc.Node
		memname(&t2, gc.Types[gc.TUINT16])
		gins(x86.AFSTCW, nil, &t1)
		gins(x86.AMOVW, ncon(0xf7f), &t2)
		gins(x86.AFLDCW, &t2, nil)

		// actual work
		gins(x86.AFMOVD, &two63f, &f0)

		gins(x86.AFUCOMP, &f0, &f1)
		gins(x86.AFSTSW, nil, &ax)
		gins(x86.ASAHF, nil, nil)
		p2 = gc.Gbranch(optoas(gc.OLE, gc.Types[tt]), nil, 0)
		gins(x86.AFMOVVP, &f0, t)
		p3 := gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p2, gc.Pc)
		gins(x86.AFMOVD, &two63f, &f0)
		gins(x86.AFSUBDP, &f0, &f1)
		gins(x86.AFMOVVP, &f0, t)
		split64(t, &tlo, &thi)
		gins(x86.AXORL, ncon(0x80000000), &thi) // + 2^63
		gc.Patch(p3, gc.Pc)
		splitclean()

		// restore rounding mode
		gins(x86.AFLDCW, &t1, nil)

		gc.Patch(p1, gc.Pc)
		return

		/*
		 * integer to float
		 */
	case gc.TINT64<<16 | gc.TFLOAT32,
		gc.TINT64<<16 | gc.TFLOAT64:
		if t.Op == gc.OREGISTER {
			goto hardmem
		}
		var f0 gc.Node
		gc.Nodreg(&f0, t.Type, x86.REG_F0)
		gins(x86.AFMOVV, f, &f0)
		if tt == gc.TFLOAT32 {
			gins(x86.AFMOVFP, &f0, t)
		} else {
			gins(x86.AFMOVDP, &f0, t)
		}
		return

		// algorithm is:
	//	if small enough, use native int64 -> float64 conversion.
	//	otherwise, halve (rounding to odd?), convert, and double.
	case gc.TUINT64<<16 | gc.TFLOAT32,
		gc.TUINT64<<16 | gc.TFLOAT64:
		var ax gc.Node
		gc.Nodreg(&ax, gc.Types[gc.TUINT32], x86.REG_AX)

		var dx gc.Node
		gc.Nodreg(&dx, gc.Types[gc.TUINT32], x86.REG_DX)
		var cx gc.Node
		gc.Nodreg(&cx, gc.Types[gc.TUINT32], x86.REG_CX)
		var t1 gc.Node
		gc.Tempname(&t1, f.Type)
		var tlo gc.Node
		var thi gc.Node
		split64(&t1, &tlo, &thi)
		gmove(f, &t1)
		gins(x86.ACMPL, &thi, ncon(0))
		p1 := gc.Gbranch(x86.AJLT, nil, 0)

		// native
		var r1 gc.Node
		gc.Nodreg(&r1, gc.Types[tt], x86.REG_F0)

		gins(x86.AFMOVV, &t1, &r1)
		if tt == gc.TFLOAT32 {
			gins(x86.AFMOVFP, &r1, t)
		} else {
			gins(x86.AFMOVDP, &r1, t)
		}
		p2 := gc.Gbranch(obj.AJMP, nil, 0)

		// simulated
		gc.Patch(p1, gc.Pc)

		gmove(&tlo, &ax)
		gmove(&thi, &dx)
		p1 = gins(x86.ASHRL, ncon(1), &ax)
		p1.From.Index = x86.REG_DX // double-width shift DX -> AX
		p1.From.Scale = 0
		gins(x86.AMOVL, ncon(0), &cx)
		gins(x86.ASETCC, nil, &cx)
		gins(x86.AORL, &cx, &ax)
		gins(x86.ASHRL, ncon(1), &dx)
		gmove(&dx, &thi)
		gmove(&ax, &tlo)
		gc.Nodreg(&r1, gc.Types[tt], x86.REG_F0)
		var r2 gc.Node
		gc.Nodreg(&r2, gc.Types[tt], x86.REG_F0+1)
		gins(x86.AFMOVV, &t1, &r1)
		gins(x86.AFMOVD, &r1, &r1)
		gins(x86.AFADDDP, &r1, &r2)
		if tt == gc.TFLOAT32 {
			gins(x86.AFMOVFP, &r1, t)
		} else {
			gins(x86.AFMOVDP, &r1, t)
		}
		gc.Patch(p2, gc.Pc)
		splitclean()
		return
	}

	// requires register intermediate
hard:
	gc.Regalloc(&r1, cvt, t)

	gmove(f, &r1)
	gmove(&r1, t)
	gc.Regfree(&r1)
	return

	// requires memory intermediate
hardmem:
	gc.Tempname(&r1, cvt)

	gmove(f, &r1)
	gmove(&r1, t)
	return
}
コード例 #22
0
ファイル: cgen.go プロジェクト: ckeyer/gosrc
func blockcopy(n, res *gc.Node, osrc, odst, w int64) {
	var dst gc.Node
	gc.Nodreg(&dst, gc.Types[gc.Tptr], x86.REG_DI)
	var src gc.Node
	gc.Nodreg(&src, gc.Types[gc.Tptr], x86.REG_SI)

	var tsrc gc.Node
	gc.Tempname(&tsrc, gc.Types[gc.Tptr])
	var tdst gc.Node
	gc.Tempname(&tdst, gc.Types[gc.Tptr])
	if !n.Addable {
		gc.Agen(n, &tsrc)
	}
	if !res.Addable {
		gc.Agen(res, &tdst)
	}
	if n.Addable {
		gc.Agen(n, &src)
	} else {
		gmove(&tsrc, &src)
	}

	if res.Op == gc.ONAME {
		gc.Gvardef(res)
	}

	if res.Addable {
		gc.Agen(res, &dst)
	} else {
		gmove(&tdst, &dst)
	}

	c := int32(w % 4) // bytes
	q := int32(w / 4) // doublewords

	// if we are copying forward on the stack and
	// the src and dst overlap, then reverse direction
	if osrc < odst && int64(odst) < int64(osrc)+w {
		// reverse direction
		gins(x86.ASTD, nil, nil) // set direction flag
		if c > 0 {
			gconreg(x86.AADDL, w-1, x86.REG_SI)
			gconreg(x86.AADDL, w-1, x86.REG_DI)

			gconreg(x86.AMOVL, int64(c), x86.REG_CX)
			gins(x86.AREP, nil, nil)   // repeat
			gins(x86.AMOVSB, nil, nil) // MOVB *(SI)-,*(DI)-
		}

		if q > 0 {
			if c > 0 {
				gconreg(x86.AADDL, -3, x86.REG_SI)
				gconreg(x86.AADDL, -3, x86.REG_DI)
			} else {
				gconreg(x86.AADDL, w-4, x86.REG_SI)
				gconreg(x86.AADDL, w-4, x86.REG_DI)
			}

			gconreg(x86.AMOVL, int64(q), x86.REG_CX)
			gins(x86.AREP, nil, nil)   // repeat
			gins(x86.AMOVSL, nil, nil) // MOVL *(SI)-,*(DI)-
		}

		// we leave with the flag clear
		gins(x86.ACLD, nil, nil)
	} else {
		gins(x86.ACLD, nil, nil) // paranoia.  TODO(rsc): remove?

		// normal direction
		if q > 128 || (q >= 4 && gc.Nacl) {
			gconreg(x86.AMOVL, int64(q), x86.REG_CX)
			gins(x86.AREP, nil, nil)   // repeat
			gins(x86.AMOVSL, nil, nil) // MOVL *(SI)+,*(DI)+
		} else if q >= 4 {
			p := gins(obj.ADUFFCOPY, nil, nil)
			p.To.Type = obj.TYPE_ADDR
			p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg))

			// 10 and 128 = magic constants: see ../../runtime/asm_386.s
			p.To.Offset = 10 * (128 - int64(q))
		} else if !gc.Nacl && c == 0 {
			var cx gc.Node
			gc.Nodreg(&cx, gc.Types[gc.TINT32], x86.REG_CX)

			// We don't need the MOVSL side-effect of updating SI and DI,
			// and issuing a sequence of MOVLs directly is faster.
			src.Op = gc.OINDREG

			dst.Op = gc.OINDREG
			for q > 0 {
				gmove(&src, &cx) // MOVL x+(SI),CX
				gmove(&cx, &dst) // MOVL CX,x+(DI)
				src.Xoffset += 4
				dst.Xoffset += 4
				q--
			}
		} else {
			for q > 0 {
				gins(x86.AMOVSL, nil, nil) // MOVL *(SI)+,*(DI)+
				q--
			}
		}

		for c > 0 {
			gins(x86.AMOVSB, nil, nil) // MOVB *(SI)+,*(DI)+
			c--
		}
	}
}
コード例 #23
0
ファイル: ggen.go プロジェクト: ckeyer/gosrc
func clearfat(nl *gc.Node) {
	/* clear a fat object */
	if gc.Debug['g'] != 0 {
		gc.Dump("\nclearfat", nl)
	}

	// Avoid taking the address for simple enough types.
	if gc.Componentgen(nil, nl) {
		return
	}

	w := nl.Type.Width

	if w > 1024 || (gc.Nacl && w >= 64) {
		var oldn1 gc.Node
		var n1 gc.Node
		savex(x86.REG_DI, &n1, &oldn1, nil, gc.Types[gc.Tptr])
		gc.Agen(nl, &n1)

		var ax gc.Node
		var oldax gc.Node
		savex(x86.REG_AX, &ax, &oldax, nil, gc.Types[gc.Tptr])
		gconreg(x86.AMOVL, 0, x86.REG_AX)
		gconreg(movptr, w/8, x86.REG_CX)

		gins(x86.AREP, nil, nil)   // repeat
		gins(x86.ASTOSQ, nil, nil) // STOQ AL,*(DI)+

		if w%8 != 0 {
			n1.Op = gc.OINDREG
			clearfat_tail(&n1, w%8)
		}

		restx(&n1, &oldn1)
		restx(&ax, &oldax)
		return
	}

	if w >= 64 {
		var oldn1 gc.Node
		var n1 gc.Node
		savex(x86.REG_DI, &n1, &oldn1, nil, gc.Types[gc.Tptr])
		gc.Agen(nl, &n1)

		var vec_zero gc.Node
		var old_x0 gc.Node
		savex(x86.REG_X0, &vec_zero, &old_x0, nil, gc.Types[gc.TFLOAT64])
		gins(x86.AXORPS, &vec_zero, &vec_zero)

		if di := dzDI(w); di != 0 {
			gconreg(addptr, di, x86.REG_DI)
		}
		p := gins(obj.ADUFFZERO, nil, nil)
		p.To.Type = obj.TYPE_ADDR
		p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
		p.To.Offset = dzOff(w)

		if w%16 != 0 {
			n1.Op = gc.OINDREG
			n1.Xoffset -= 16 - w%16
			gins(x86.AMOVUPS, &vec_zero, &n1)
		}

		restx(&vec_zero, &old_x0)
		restx(&n1, &oldn1)
		return
	}

	// NOTE: Must use agen, not igen, so that optimizer sees address
	// being taken. We are not writing on field boundaries.
	var n1 gc.Node
	gc.Agenr(nl, &n1, nil)
	n1.Op = gc.OINDREG

	clearfat_tail(&n1, w)

	gc.Regfree(&n1)
}
コード例 #24
0
ファイル: ggen.go プロジェクト: ckeyer/gosrc
func clearfat(nl *gc.Node) {
	/* clear a fat object */
	if gc.Debug['g'] != 0 {
		gc.Dump("\nclearfat", nl)
	}

	w := uint32(nl.Type.Width)

	// Avoid taking the address for simple enough types.
	if gc.Componentgen(nil, nl) {
		return
	}

	c := w % 4 // bytes
	q := w / 4 // quads

	var r0 gc.Node
	r0.Op = gc.OREGISTER

	r0.Reg = arm.REG_R0
	var r1 gc.Node
	r1.Op = gc.OREGISTER
	r1.Reg = arm.REG_R1
	var dst gc.Node
	gc.Regalloc(&dst, gc.Types[gc.Tptr], &r1)
	gc.Agen(nl, &dst)
	var nc gc.Node
	gc.Nodconst(&nc, gc.Types[gc.TUINT32], 0)
	var nz gc.Node
	gc.Regalloc(&nz, gc.Types[gc.TUINT32], &r0)
	gc.Cgen(&nc, &nz)

	if q > 128 {
		var end gc.Node
		gc.Regalloc(&end, gc.Types[gc.Tptr], nil)
		p := gins(arm.AMOVW, &dst, &end)
		p.From.Type = obj.TYPE_ADDR
		p.From.Offset = int64(q) * 4

		p = gins(arm.AMOVW, &nz, &dst)
		p.To.Type = obj.TYPE_MEM
		p.To.Offset = 4
		p.Scond |= arm.C_PBIT
		pl := p

		p = gins(arm.ACMP, &dst, nil)
		raddr(&end, p)
		gc.Patch(gc.Gbranch(arm.ABNE, nil, 0), pl)

		gc.Regfree(&end)
	} else if q >= 4 && !gc.Nacl {
		f := gc.Sysfunc("duffzero")
		p := gins(obj.ADUFFZERO, nil, f)
		gc.Afunclit(&p.To, f)

		// 4 and 128 = magic constants: see ../../runtime/asm_arm.s
		p.To.Offset = 4 * (128 - int64(q))
	} else {
		var p *obj.Prog
		for q > 0 {
			p = gins(arm.AMOVW, &nz, &dst)
			p.To.Type = obj.TYPE_MEM
			p.To.Offset = 4
			p.Scond |= arm.C_PBIT

			//print("1. %v\n", p);
			q--
		}
	}

	var p *obj.Prog
	for c > 0 {
		p = gins(arm.AMOVB, &nz, &dst)
		p.To.Type = obj.TYPE_MEM
		p.To.Offset = 1
		p.Scond |= arm.C_PBIT

		//print("2. %v\n", p);
		c--
	}

	gc.Regfree(&dst)
	gc.Regfree(&nz)
}
コード例 #25
0
ファイル: cgen.go プロジェクト: ckeyer/gosrc
func blockcopy(n, ns *gc.Node, osrc, odst, w int64) {
	var noddi gc.Node
	gc.Nodreg(&noddi, gc.Types[gc.Tptr], x86.REG_DI)
	var nodsi gc.Node
	gc.Nodreg(&nodsi, gc.Types[gc.Tptr], x86.REG_SI)

	var nodl gc.Node
	var nodr gc.Node
	if n.Ullman >= ns.Ullman {
		gc.Agenr(n, &nodr, &nodsi)
		if ns.Op == gc.ONAME {
			gc.Gvardef(ns)
		}
		gc.Agenr(ns, &nodl, &noddi)
	} else {
		if ns.Op == gc.ONAME {
			gc.Gvardef(ns)
		}
		gc.Agenr(ns, &nodl, &noddi)
		gc.Agenr(n, &nodr, &nodsi)
	}

	if nodl.Reg != x86.REG_DI {
		gmove(&nodl, &noddi)
	}
	if nodr.Reg != x86.REG_SI {
		gmove(&nodr, &nodsi)
	}
	gc.Regfree(&nodl)
	gc.Regfree(&nodr)

	c := w % 8 // bytes
	q := w / 8 // quads

	var oldcx gc.Node
	var cx gc.Node
	savex(x86.REG_CX, &cx, &oldcx, nil, gc.Types[gc.TINT64])

	// if we are copying forward on the stack and
	// the src and dst overlap, then reverse direction
	if osrc < odst && odst < osrc+w {
		// reverse direction
		gins(x86.ASTD, nil, nil) // set direction flag
		if c > 0 {
			gconreg(addptr, w-1, x86.REG_SI)
			gconreg(addptr, w-1, x86.REG_DI)

			gconreg(movptr, c, x86.REG_CX)
			gins(x86.AREP, nil, nil)   // repeat
			gins(x86.AMOVSB, nil, nil) // MOVB *(SI)-,*(DI)-
		}

		if q > 0 {
			if c > 0 {
				gconreg(addptr, -7, x86.REG_SI)
				gconreg(addptr, -7, x86.REG_DI)
			} else {
				gconreg(addptr, w-8, x86.REG_SI)
				gconreg(addptr, w-8, x86.REG_DI)
			}

			gconreg(movptr, q, x86.REG_CX)
			gins(x86.AREP, nil, nil)   // repeat
			gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)-,*(DI)-
		}

		// we leave with the flag clear
		gins(x86.ACLD, nil, nil)
	} else {
		// normal direction
		if q > 128 || (gc.Nacl && q >= 4) || (obj.Getgoos() == "plan9" && q >= 4) {
			gconreg(movptr, q, x86.REG_CX)
			gins(x86.AREP, nil, nil)   // repeat
			gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+
		} else if q >= 4 {
			var oldx0 gc.Node
			var x0 gc.Node
			savex(x86.REG_X0, &x0, &oldx0, nil, gc.Types[gc.TFLOAT64])

			p := gins(obj.ADUFFCOPY, nil, nil)
			p.To.Type = obj.TYPE_ADDR
			p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg))

			// 64 blocks taking 14 bytes each
			// see ../../../../runtime/mkduff.go
			p.To.Offset = 14 * (64 - q/2)
			restx(&x0, &oldx0)

			if q%2 != 0 {
				gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+
			}
		} else if !gc.Nacl && c == 0 {
			// We don't need the MOVSQ side-effect of updating SI and DI,
			// and issuing a sequence of MOVQs directly is faster.
			nodsi.Op = gc.OINDREG

			noddi.Op = gc.OINDREG
			for q > 0 {
				gmove(&nodsi, &cx) // MOVQ x+(SI),CX
				gmove(&cx, &noddi) // MOVQ CX,x+(DI)
				nodsi.Xoffset += 8
				noddi.Xoffset += 8
				q--
			}
		} else {
			for q > 0 {
				gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+
				q--
			}
		}

		// copy the remaining c bytes
		if w < 4 || c <= 1 || (odst < osrc && osrc < odst+w) {
			for c > 0 {
				gins(x86.AMOVSB, nil, nil) // MOVB *(SI)+,*(DI)+
				c--
			}
		} else if w < 8 || c <= 4 {
			nodsi.Op = gc.OINDREG
			noddi.Op = gc.OINDREG
			cx.Type = gc.Types[gc.TINT32]
			nodsi.Type = gc.Types[gc.TINT32]
			noddi.Type = gc.Types[gc.TINT32]
			if c > 4 {
				nodsi.Xoffset = 0
				noddi.Xoffset = 0
				gmove(&nodsi, &cx)
				gmove(&cx, &noddi)
			}

			nodsi.Xoffset = c - 4
			noddi.Xoffset = c - 4
			gmove(&nodsi, &cx)
			gmove(&cx, &noddi)
		} else {
			nodsi.Op = gc.OINDREG
			noddi.Op = gc.OINDREG
			cx.Type = gc.Types[gc.TINT64]
			nodsi.Type = gc.Types[gc.TINT64]
			noddi.Type = gc.Types[gc.TINT64]
			nodsi.Xoffset = c - 8
			noddi.Xoffset = c - 8
			gmove(&nodsi, &cx)
			gmove(&cx, &noddi)
		}
	}

	restx(&cx, &oldcx)
}
コード例 #26
0
ファイル: gsubr.go プロジェクト: ckeyer/gosrc
/*
 * generate move:
 *	t = f
 * hard part is conversions.
 */
func gmove(f *gc.Node, t *gc.Node) {
	if gc.Debug['M'] != 0 {
		fmt.Printf("gmove %v -> %v\n", gc.Nconv(f, obj.FmtLong), gc.Nconv(t, obj.FmtLong))
	}

	ft := int(gc.Simsimtype(f.Type))
	tt := int(gc.Simsimtype(t.Type))
	cvt := (*gc.Type)(t.Type)

	if gc.Iscomplex[ft] || gc.Iscomplex[tt] {
		gc.Complexmove(f, t)
		return
	}

	// cannot have two memory operands
	var r2 gc.Node
	var r1 gc.Node
	var a int
	if gc.Ismem(f) && gc.Ismem(t) {
		goto hard
	}

	// convert constant to desired type
	if f.Op == gc.OLITERAL {
		var con gc.Node
		switch tt {
		default:
			f.Convconst(&con, t.Type)

		case gc.TINT32,
			gc.TINT16,
			gc.TINT8:
			var con gc.Node
			f.Convconst(&con, gc.Types[gc.TINT64])
			var r1 gc.Node
			gc.Regalloc(&r1, con.Type, t)
			gins(mips.AMOVV, &con, &r1)
			gmove(&r1, t)
			gc.Regfree(&r1)
			return

		case gc.TUINT32,
			gc.TUINT16,
			gc.TUINT8:
			var con gc.Node
			f.Convconst(&con, gc.Types[gc.TUINT64])
			var r1 gc.Node
			gc.Regalloc(&r1, con.Type, t)
			gins(mips.AMOVV, &con, &r1)
			gmove(&r1, t)
			gc.Regfree(&r1)
			return
		}

		f = &con
		ft = tt // so big switch will choose a simple mov

		// constants can't move directly to memory.
		if gc.Ismem(t) {
			goto hard
		}
	}

	// value -> value copy, first operand in memory.
	// any floating point operand requires register
	// src, so goto hard to copy to register first.
	if gc.Ismem(f) && ft != tt && (gc.Isfloat[ft] || gc.Isfloat[tt]) {
		cvt = gc.Types[ft]
		goto hard
	}

	// value -> value copy, only one memory operand.
	// figure out the instruction to use.
	// break out of switch for one-instruction gins.
	// goto rdst for "destination must be register".
	// goto hard for "convert to cvt type first".
	// otherwise handle and return.

	switch uint32(ft)<<16 | uint32(tt) {
	default:
		gc.Fatalf("gmove %v -> %v", gc.Tconv(f.Type, obj.FmtLong), gc.Tconv(t.Type, obj.FmtLong))

		/*
		 * integer copy and truncate
		 */
	case gc.TINT8<<16 | gc.TINT8, // same size
		gc.TUINT8<<16 | gc.TINT8,
		gc.TINT16<<16 | gc.TINT8, // truncate
		gc.TUINT16<<16 | gc.TINT8,
		gc.TINT32<<16 | gc.TINT8,
		gc.TUINT32<<16 | gc.TINT8,
		gc.TINT64<<16 | gc.TINT8,
		gc.TUINT64<<16 | gc.TINT8:
		a = mips.AMOVB

	case gc.TINT8<<16 | gc.TUINT8, // same size
		gc.TUINT8<<16 | gc.TUINT8,
		gc.TINT16<<16 | gc.TUINT8, // truncate
		gc.TUINT16<<16 | gc.TUINT8,
		gc.TINT32<<16 | gc.TUINT8,
		gc.TUINT32<<16 | gc.TUINT8,
		gc.TINT64<<16 | gc.TUINT8,
		gc.TUINT64<<16 | gc.TUINT8:
		a = mips.AMOVBU

	case gc.TINT16<<16 | gc.TINT16, // same size
		gc.TUINT16<<16 | gc.TINT16,
		gc.TINT32<<16 | gc.TINT16, // truncate
		gc.TUINT32<<16 | gc.TINT16,
		gc.TINT64<<16 | gc.TINT16,
		gc.TUINT64<<16 | gc.TINT16:
		a = mips.AMOVH

	case gc.TINT16<<16 | gc.TUINT16, // same size
		gc.TUINT16<<16 | gc.TUINT16,
		gc.TINT32<<16 | gc.TUINT16, // truncate
		gc.TUINT32<<16 | gc.TUINT16,
		gc.TINT64<<16 | gc.TUINT16,
		gc.TUINT64<<16 | gc.TUINT16:
		a = mips.AMOVHU

	case gc.TINT32<<16 | gc.TINT32, // same size
		gc.TUINT32<<16 | gc.TINT32,
		gc.TINT64<<16 | gc.TINT32, // truncate
		gc.TUINT64<<16 | gc.TINT32:
		a = mips.AMOVW

	case gc.TINT32<<16 | gc.TUINT32, // same size
		gc.TUINT32<<16 | gc.TUINT32,
		gc.TINT64<<16 | gc.TUINT32, // truncate
		gc.TUINT64<<16 | gc.TUINT32:
		a = mips.AMOVWU

	case gc.TINT64<<16 | gc.TINT64, // same size
		gc.TINT64<<16 | gc.TUINT64,
		gc.TUINT64<<16 | gc.TINT64,
		gc.TUINT64<<16 | gc.TUINT64:
		a = mips.AMOVV

		/*
		 * integer up-conversions
		 */
	case gc.TINT8<<16 | gc.TINT16, // sign extend int8
		gc.TINT8<<16 | gc.TUINT16,
		gc.TINT8<<16 | gc.TINT32,
		gc.TINT8<<16 | gc.TUINT32,
		gc.TINT8<<16 | gc.TINT64,
		gc.TINT8<<16 | gc.TUINT64:
		a = mips.AMOVB

		goto rdst

	case gc.TUINT8<<16 | gc.TINT16, // zero extend uint8
		gc.TUINT8<<16 | gc.TUINT16,
		gc.TUINT8<<16 | gc.TINT32,
		gc.TUINT8<<16 | gc.TUINT32,
		gc.TUINT8<<16 | gc.TINT64,
		gc.TUINT8<<16 | gc.TUINT64:
		a = mips.AMOVBU

		goto rdst

	case gc.TINT16<<16 | gc.TINT32, // sign extend int16
		gc.TINT16<<16 | gc.TUINT32,
		gc.TINT16<<16 | gc.TINT64,
		gc.TINT16<<16 | gc.TUINT64:
		a = mips.AMOVH

		goto rdst

	case gc.TUINT16<<16 | gc.TINT32, // zero extend uint16
		gc.TUINT16<<16 | gc.TUINT32,
		gc.TUINT16<<16 | gc.TINT64,
		gc.TUINT16<<16 | gc.TUINT64:
		a = mips.AMOVHU

		goto rdst

	case gc.TINT32<<16 | gc.TINT64, // sign extend int32
		gc.TINT32<<16 | gc.TUINT64:
		a = mips.AMOVW

		goto rdst

	case gc.TUINT32<<16 | gc.TINT64, // zero extend uint32
		gc.TUINT32<<16 | gc.TUINT64:
		a = mips.AMOVWU

		goto rdst

		//warn("gmove: convert float to int not implemented: %N -> %N\n", f, t);
	//return;
	// algorithm is:
	//	if small enough, use native float64 -> int64 conversion.
	//	otherwise, subtract 2^63, convert, and add it back.
	/*
	* float to integer
	 */
	case gc.TFLOAT32<<16 | gc.TINT32,
		gc.TFLOAT64<<16 | gc.TINT32,
		gc.TFLOAT32<<16 | gc.TINT64,
		gc.TFLOAT64<<16 | gc.TINT64,
		gc.TFLOAT32<<16 | gc.TINT16,
		gc.TFLOAT32<<16 | gc.TINT8,
		gc.TFLOAT32<<16 | gc.TUINT16,
		gc.TFLOAT32<<16 | gc.TUINT8,
		gc.TFLOAT64<<16 | gc.TINT16,
		gc.TFLOAT64<<16 | gc.TINT8,
		gc.TFLOAT64<<16 | gc.TUINT16,
		gc.TFLOAT64<<16 | gc.TUINT8,
		gc.TFLOAT32<<16 | gc.TUINT32,
		gc.TFLOAT64<<16 | gc.TUINT32,
		gc.TFLOAT32<<16 | gc.TUINT64,
		gc.TFLOAT64<<16 | gc.TUINT64:
		bignodes()

		gc.Regalloc(&r1, gc.Types[gc.TFLOAT64], nil)
		gmove(f, &r1)
		if tt == gc.TUINT64 {
			gc.Regalloc(&r2, gc.Types[gc.TFLOAT64], nil)
			gmove(&bigf, &r2)
			gins3(mips.ACMPGED, &r1, &r2, nil)
			p1 := gc.Gbranch(mips.ABFPF, nil, 0)
			gins(mips.ASUBD, &r2, &r1)
			gc.Patch(p1, gc.Pc)
			gc.Regfree(&r2)
		}

		gc.Regalloc(&r2, gc.Types[gc.TINT64], t)
		gins(mips.ATRUNCDV, &r1, &r1)
		gins(mips.AMOVV, &r1, &r2)
		gc.Regfree(&r1)

		if tt == gc.TUINT64 {
			p1 := gc.Gbranch(mips.ABFPF, nil, 0) // use FCR0 here again
			gc.Nodreg(&r1, gc.Types[gc.TINT64], mips.REGTMP)
			gmove(&bigi, &r1)
			gins(mips.AADDVU, &r1, &r2)
			gc.Patch(p1, gc.Pc)
		}

		gmove(&r2, t)
		gc.Regfree(&r2)
		return

		//warn("gmove: convert int to float not implemented: %N -> %N\n", f, t);
	//return;
	// algorithm is:
	//	if small enough, use native int64 -> float64 conversion.
	//	otherwise, halve (rounding to odd?), convert, and double.
	/*
	 * integer to float
	 */
	case gc.TINT32<<16 | gc.TFLOAT32,
		gc.TINT32<<16 | gc.TFLOAT64,
		gc.TINT64<<16 | gc.TFLOAT32,
		gc.TINT64<<16 | gc.TFLOAT64,
		gc.TINT16<<16 | gc.TFLOAT32,
		gc.TINT16<<16 | gc.TFLOAT64,
		gc.TINT8<<16 | gc.TFLOAT32,
		gc.TINT8<<16 | gc.TFLOAT64,
		gc.TUINT16<<16 | gc.TFLOAT32,
		gc.TUINT16<<16 | gc.TFLOAT64,
		gc.TUINT8<<16 | gc.TFLOAT32,
		gc.TUINT8<<16 | gc.TFLOAT64,
		gc.TUINT32<<16 | gc.TFLOAT32,
		gc.TUINT32<<16 | gc.TFLOAT64,
		gc.TUINT64<<16 | gc.TFLOAT32,
		gc.TUINT64<<16 | gc.TFLOAT64:
		bignodes()

		var rtmp gc.Node
		gc.Regalloc(&r1, gc.Types[gc.TINT64], nil)
		gmove(f, &r1)
		if ft == gc.TUINT64 {
			gc.Nodreg(&rtmp, gc.Types[gc.TUINT64], mips.REGTMP)
			gmove(&bigi, &rtmp)
			gins(mips.AAND, &r1, &rtmp)
			p1 := ginsbranch(mips.ABEQ, nil, &rtmp, nil, 0)
			p2 := gins(mips.ASRLV, nil, &r1)
			p2.From.Type = obj.TYPE_CONST
			p2.From.Offset = 1
			gc.Patch(p1, gc.Pc)
		}

		gc.Regalloc(&r2, gc.Types[gc.TFLOAT64], t)
		gins(mips.AMOVV, &r1, &r2)
		gins(mips.AMOVVD, &r2, &r2)
		gc.Regfree(&r1)

		if ft == gc.TUINT64 {
			p1 := ginsbranch(mips.ABEQ, nil, &rtmp, nil, 0)
			gc.Nodreg(&r1, gc.Types[gc.TFLOAT64], mips.FREGTWO)
			gins(mips.AMULD, &r1, &r2)
			gc.Patch(p1, gc.Pc)
		}

		gmove(&r2, t)
		gc.Regfree(&r2)
		return

		/*
		 * float to float
		 */
	case gc.TFLOAT32<<16 | gc.TFLOAT32:
		a = mips.AMOVF

	case gc.TFLOAT64<<16 | gc.TFLOAT64:
		a = mips.AMOVD

	case gc.TFLOAT32<<16 | gc.TFLOAT64:
		a = mips.AMOVFD
		goto rdst

	case gc.TFLOAT64<<16 | gc.TFLOAT32:
		a = mips.AMOVDF
		goto rdst
	}

	gins(a, f, t)
	return

	// requires register destination
rdst:
	{
		gc.Regalloc(&r1, t.Type, t)

		gins(a, f, &r1)
		gmove(&r1, t)
		gc.Regfree(&r1)
		return
	}

	// requires register intermediate
hard:
	gc.Regalloc(&r1, cvt, t)

	gmove(f, &r1)
	gmove(&r1, t)
	gc.Regfree(&r1)
	return
}
コード例 #27
0
ファイル: ggen.go プロジェクト: ckeyer/gosrc
/*
 * generate shift according to op, one of:
 *	res = nl << nr
 *	res = nl >> nr
 */
func cgen_shift(op gc.Op, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) {
	a := int(optoas(op, nl.Type))

	if nr.Op == gc.OLITERAL {
		var n1 gc.Node
		gc.Regalloc(&n1, nl.Type, res)
		gc.Cgen(nl, &n1)
		sc := uint64(nr.Int())
		if sc >= uint64(nl.Type.Width*8) {
			// large shift gets 2 shifts by width-1
			var n3 gc.Node
			gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1)

			gins(a, &n3, &n1)
			gins(a, &n3, &n1)
		} else {
			gins(a, nr, &n1)
		}
		gmove(&n1, res)
		gc.Regfree(&n1)
		return
	}

	if nl.Ullman >= gc.UINF {
		var n4 gc.Node
		gc.Tempname(&n4, nl.Type)
		gc.Cgen(nl, &n4)
		nl = &n4
	}

	if nr.Ullman >= gc.UINF {
		var n5 gc.Node
		gc.Tempname(&n5, nr.Type)
		gc.Cgen(nr, &n5)
		nr = &n5
	}

	// Allow either uint32 or uint64 as shift type,
	// to avoid unnecessary conversion from uint32 to uint64
	// just to do the comparison.
	tcount := gc.Types[gc.Simtype[nr.Type.Etype]]

	if tcount.Etype < gc.TUINT32 {
		tcount = gc.Types[gc.TUINT32]
	}

	var n1 gc.Node
	gc.Regalloc(&n1, nr.Type, nil) // to hold the shift type in CX
	var n3 gc.Node
	gc.Regalloc(&n3, tcount, &n1) // to clear high bits of CX

	var n2 gc.Node
	gc.Regalloc(&n2, nl.Type, res)

	if nl.Ullman >= nr.Ullman {
		gc.Cgen(nl, &n2)
		gc.Cgen(nr, &n1)
		gmove(&n1, &n3)
	} else {
		gc.Cgen(nr, &n1)
		gmove(&n1, &n3)
		gc.Cgen(nl, &n2)
	}

	gc.Regfree(&n3)

	// test and fix up large shifts
	if !bounded {
		gc.Nodconst(&n3, tcount, nl.Type.Width*8)
		gcmp(optoas(gc.OCMP, tcount), &n1, &n3)
		p1 := (*obj.Prog)(gc.Gbranch(optoas(gc.OLT, tcount), nil, +1))
		if op == gc.ORSH && gc.Issigned[nl.Type.Etype] {
			gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1)
			gins(a, &n3, &n2)
		} else {
			gc.Nodconst(&n3, nl.Type, 0)
			gmove(&n3, &n2)
		}

		gc.Patch(p1, gc.Pc)
	}

	gins(a, &n1, &n2)

	gmove(&n2, res)

	gc.Regfree(&n1)
	gc.Regfree(&n2)
}
コード例 #28
0
ファイル: gsubr.go プロジェクト: ckeyer/gosrc
/*
 * generate code to compute address of n,
 * a reference to a (perhaps nested) field inside
 * an array or struct.
 * return 0 on failure, 1 on success.
 * on success, leaves usable address in a.
 *
 * caller is responsible for calling sudoclean
 * after successful sudoaddable,
 * to release the register used for a.
 */
func sudoaddable(as int, n *gc.Node, a *obj.Addr) bool {
	if n.Type == nil {
		return false
	}

	*a = obj.Addr{}

	switch n.Op {
	case gc.OLITERAL:
		if !gc.Isconst(n, gc.CTINT) {
			break
		}
		v := n.Int()
		if v >= 32000 || v <= -32000 {
			break
		}
		switch as {
		default:
			return false

		case arm.AADD,
			arm.ASUB,
			arm.AAND,
			arm.AORR,
			arm.AEOR,
			arm.AMOVB,
			arm.AMOVBS,
			arm.AMOVBU,
			arm.AMOVH,
			arm.AMOVHS,
			arm.AMOVHU,
			arm.AMOVW:
			break
		}

		cleani += 2
		reg := &clean[cleani-1]
		reg1 := &clean[cleani-2]
		reg.Op = gc.OEMPTY
		reg1.Op = gc.OEMPTY
		gc.Naddr(a, n)
		return true

	case gc.ODOT,
		gc.ODOTPTR:
		cleani += 2
		reg := &clean[cleani-1]
		reg1 := &clean[cleani-2]
		reg.Op = gc.OEMPTY
		reg1.Op = gc.OEMPTY
		var nn *gc.Node
		var oary [10]int64
		o := gc.Dotoffset(n, oary[:], &nn)
		if nn == nil {
			sudoclean()
			return false
		}

		if nn.Addable && o == 1 && oary[0] >= 0 {
			// directly addressable set of DOTs
			n1 := *nn

			n1.Type = n.Type
			n1.Xoffset += oary[0]
			gc.Naddr(a, &n1)
			return true
		}

		gc.Regalloc(reg, gc.Types[gc.Tptr], nil)
		n1 := *reg
		n1.Op = gc.OINDREG
		if oary[0] >= 0 {
			gc.Agen(nn, reg)
			n1.Xoffset = oary[0]
		} else {
			gc.Cgen(nn, reg)
			gc.Cgen_checknil(reg)
			n1.Xoffset = -(oary[0] + 1)
		}

		for i := 1; i < o; i++ {
			if oary[i] >= 0 {
				gc.Fatalf("can't happen")
			}
			gins(arm.AMOVW, &n1, reg)
			gc.Cgen_checknil(reg)
			n1.Xoffset = -(oary[i] + 1)
		}

		a.Type = obj.TYPE_NONE
		a.Name = obj.NAME_NONE
		n1.Type = n.Type
		gc.Naddr(a, &n1)
		return true

	case gc.OINDEX:
		return false
	}

	return false
}
コード例 #29
0
ファイル: gsubr.go プロジェクト: ckeyer/gosrc
/*
 * generate move:
 *	t = f
 * hard part is conversions.
 */
func gmove(f *gc.Node, t *gc.Node) {
	if gc.Debug['M'] != 0 {
		fmt.Printf("gmove %v -> %v\n", gc.Nconv(f, obj.FmtLong), gc.Nconv(t, obj.FmtLong))
	}

	ft := int(gc.Simsimtype(f.Type))
	tt := int(gc.Simsimtype(t.Type))
	cvt := (*gc.Type)(t.Type)

	if gc.Iscomplex[ft] || gc.Iscomplex[tt] {
		gc.Complexmove(f, t)
		return
	}

	// cannot have two memory operands
	var r2 gc.Node
	var r1 gc.Node
	var a int
	if gc.Ismem(f) && gc.Ismem(t) {
		goto hard
	}

	// convert constant to desired type
	if f.Op == gc.OLITERAL {
		var con gc.Node
		switch tt {
		default:
			f.Convconst(&con, t.Type)

		case gc.TINT32,
			gc.TINT16,
			gc.TINT8:
			var con gc.Node
			f.Convconst(&con, gc.Types[gc.TINT64])
			var r1 gc.Node
			gc.Regalloc(&r1, con.Type, t)
			gins(ppc64.AMOVD, &con, &r1)
			gmove(&r1, t)
			gc.Regfree(&r1)
			return

		case gc.TUINT32,
			gc.TUINT16,
			gc.TUINT8:
			var con gc.Node
			f.Convconst(&con, gc.Types[gc.TUINT64])
			var r1 gc.Node
			gc.Regalloc(&r1, con.Type, t)
			gins(ppc64.AMOVD, &con, &r1)
			gmove(&r1, t)
			gc.Regfree(&r1)
			return
		}

		f = &con
		ft = tt // so big switch will choose a simple mov

		// constants can't move directly to memory.
		if gc.Ismem(t) {
			goto hard
		}
	}

	// float constants come from memory.
	//if(isfloat[tt])
	//	goto hard;

	// 64-bit immediates are also from memory.
	//if(isint[tt])
	//	goto hard;
	//// 64-bit immediates are really 32-bit sign-extended
	//// unless moving into a register.
	//if(isint[tt]) {
	//	if(mpcmpfixfix(con.val.u.xval, minintval[TINT32]) < 0)
	//		goto hard;
	//	if(mpcmpfixfix(con.val.u.xval, maxintval[TINT32]) > 0)
	//		goto hard;
	//}

	// value -> value copy, only one memory operand.
	// figure out the instruction to use.
	// break out of switch for one-instruction gins.
	// goto rdst for "destination must be register".
	// goto hard for "convert to cvt type first".
	// otherwise handle and return.

	switch uint32(ft)<<16 | uint32(tt) {
	default:
		gc.Fatalf("gmove %v -> %v", gc.Tconv(f.Type, obj.FmtLong), gc.Tconv(t.Type, obj.FmtLong))

		/*
		 * integer copy and truncate
		 */
	case gc.TINT8<<16 | gc.TINT8, // same size
		gc.TUINT8<<16 | gc.TINT8,
		gc.TINT16<<16 | gc.TINT8,
		// truncate
		gc.TUINT16<<16 | gc.TINT8,
		gc.TINT32<<16 | gc.TINT8,
		gc.TUINT32<<16 | gc.TINT8,
		gc.TINT64<<16 | gc.TINT8,
		gc.TUINT64<<16 | gc.TINT8:
		a = ppc64.AMOVB

	case gc.TINT8<<16 | gc.TUINT8, // same size
		gc.TUINT8<<16 | gc.TUINT8,
		gc.TINT16<<16 | gc.TUINT8,
		// truncate
		gc.TUINT16<<16 | gc.TUINT8,
		gc.TINT32<<16 | gc.TUINT8,
		gc.TUINT32<<16 | gc.TUINT8,
		gc.TINT64<<16 | gc.TUINT8,
		gc.TUINT64<<16 | gc.TUINT8:
		a = ppc64.AMOVBZ

	case gc.TINT16<<16 | gc.TINT16, // same size
		gc.TUINT16<<16 | gc.TINT16,
		gc.TINT32<<16 | gc.TINT16,
		// truncate
		gc.TUINT32<<16 | gc.TINT16,
		gc.TINT64<<16 | gc.TINT16,
		gc.TUINT64<<16 | gc.TINT16:
		a = ppc64.AMOVH

	case gc.TINT16<<16 | gc.TUINT16, // same size
		gc.TUINT16<<16 | gc.TUINT16,
		gc.TINT32<<16 | gc.TUINT16,
		// truncate
		gc.TUINT32<<16 | gc.TUINT16,
		gc.TINT64<<16 | gc.TUINT16,
		gc.TUINT64<<16 | gc.TUINT16:
		a = ppc64.AMOVHZ

	case gc.TINT32<<16 | gc.TINT32, // same size
		gc.TUINT32<<16 | gc.TINT32,
		gc.TINT64<<16 | gc.TINT32,
		// truncate
		gc.TUINT64<<16 | gc.TINT32:
		a = ppc64.AMOVW

	case gc.TINT32<<16 | gc.TUINT32, // same size
		gc.TUINT32<<16 | gc.TUINT32,
		gc.TINT64<<16 | gc.TUINT32,
		gc.TUINT64<<16 | gc.TUINT32:
		a = ppc64.AMOVWZ

	case gc.TINT64<<16 | gc.TINT64, // same size
		gc.TINT64<<16 | gc.TUINT64,
		gc.TUINT64<<16 | gc.TINT64,
		gc.TUINT64<<16 | gc.TUINT64:
		a = ppc64.AMOVD

		/*
		 * integer up-conversions
		 */
	case gc.TINT8<<16 | gc.TINT16, // sign extend int8
		gc.TINT8<<16 | gc.TUINT16,
		gc.TINT8<<16 | gc.TINT32,
		gc.TINT8<<16 | gc.TUINT32,
		gc.TINT8<<16 | gc.TINT64,
		gc.TINT8<<16 | gc.TUINT64:
		a = ppc64.AMOVB

		goto rdst

	case gc.TUINT8<<16 | gc.TINT16, // zero extend uint8
		gc.TUINT8<<16 | gc.TUINT16,
		gc.TUINT8<<16 | gc.TINT32,
		gc.TUINT8<<16 | gc.TUINT32,
		gc.TUINT8<<16 | gc.TINT64,
		gc.TUINT8<<16 | gc.TUINT64:
		a = ppc64.AMOVBZ

		goto rdst

	case gc.TINT16<<16 | gc.TINT32, // sign extend int16
		gc.TINT16<<16 | gc.TUINT32,
		gc.TINT16<<16 | gc.TINT64,
		gc.TINT16<<16 | gc.TUINT64:
		a = ppc64.AMOVH

		goto rdst

	case gc.TUINT16<<16 | gc.TINT32, // zero extend uint16
		gc.TUINT16<<16 | gc.TUINT32,
		gc.TUINT16<<16 | gc.TINT64,
		gc.TUINT16<<16 | gc.TUINT64:
		a = ppc64.AMOVHZ

		goto rdst

	case gc.TINT32<<16 | gc.TINT64, // sign extend int32
		gc.TINT32<<16 | gc.TUINT64:
		a = ppc64.AMOVW

		goto rdst

	case gc.TUINT32<<16 | gc.TINT64, // zero extend uint32
		gc.TUINT32<<16 | gc.TUINT64:
		a = ppc64.AMOVWZ

		goto rdst

		//warn("gmove: convert float to int not implemented: %N -> %N\n", f, t);
	//return;
	// algorithm is:
	//	if small enough, use native float64 -> int64 conversion.
	//	otherwise, subtract 2^63, convert, and add it back.
	/*
	* float to integer
	 */
	case gc.TFLOAT32<<16 | gc.TINT32,
		gc.TFLOAT64<<16 | gc.TINT32,
		gc.TFLOAT32<<16 | gc.TINT64,
		gc.TFLOAT64<<16 | gc.TINT64,
		gc.TFLOAT32<<16 | gc.TINT16,
		gc.TFLOAT32<<16 | gc.TINT8,
		gc.TFLOAT32<<16 | gc.TUINT16,
		gc.TFLOAT32<<16 | gc.TUINT8,
		gc.TFLOAT64<<16 | gc.TINT16,
		gc.TFLOAT64<<16 | gc.TINT8,
		gc.TFLOAT64<<16 | gc.TUINT16,
		gc.TFLOAT64<<16 | gc.TUINT8,
		gc.TFLOAT32<<16 | gc.TUINT32,
		gc.TFLOAT64<<16 | gc.TUINT32,
		gc.TFLOAT32<<16 | gc.TUINT64,
		gc.TFLOAT64<<16 | gc.TUINT64:
		bignodes()

		var r1 gc.Node
		gc.Regalloc(&r1, gc.Types[ft], f)
		gmove(f, &r1)
		if tt == gc.TUINT64 {
			gc.Regalloc(&r2, gc.Types[gc.TFLOAT64], nil)
			gmove(&bigf, &r2)
			gins(ppc64.AFCMPU, &r1, &r2)
			p1 := (*obj.Prog)(gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TFLOAT64]), nil, +1))
			gins(ppc64.AFSUB, &r2, &r1)
			gc.Patch(p1, gc.Pc)
			gc.Regfree(&r2)
		}

		gc.Regalloc(&r2, gc.Types[gc.TFLOAT64], nil)
		var r3 gc.Node
		gc.Regalloc(&r3, gc.Types[gc.TINT64], t)
		gins(ppc64.AFCTIDZ, &r1, &r2)
		p1 := (*obj.Prog)(gins(ppc64.AFMOVD, &r2, nil))
		p1.To.Type = obj.TYPE_MEM
		p1.To.Reg = ppc64.REGSP
		p1.To.Offset = -8
		p1 = gins(ppc64.AMOVD, nil, &r3)
		p1.From.Type = obj.TYPE_MEM
		p1.From.Reg = ppc64.REGSP
		p1.From.Offset = -8
		gc.Regfree(&r2)
		gc.Regfree(&r1)
		if tt == gc.TUINT64 {
			p1 := (*obj.Prog)(gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TFLOAT64]), nil, +1)) // use CR0 here again
			gc.Nodreg(&r1, gc.Types[gc.TINT64], ppc64.REGTMP)
			gins(ppc64.AMOVD, &bigi, &r1)
			gins(ppc64.AADD, &r1, &r3)
			gc.Patch(p1, gc.Pc)
		}

		gmove(&r3, t)
		gc.Regfree(&r3)
		return

		//warn("gmove: convert int to float not implemented: %N -> %N\n", f, t);
	//return;
	// algorithm is:
	//	if small enough, use native int64 -> uint64 conversion.
	//	otherwise, halve (rounding to odd?), convert, and double.
	/*
	 * integer to float
	 */
	case gc.TINT32<<16 | gc.TFLOAT32,
		gc.TINT32<<16 | gc.TFLOAT64,
		gc.TINT64<<16 | gc.TFLOAT32,
		gc.TINT64<<16 | gc.TFLOAT64,
		gc.TINT16<<16 | gc.TFLOAT32,
		gc.TINT16<<16 | gc.TFLOAT64,
		gc.TINT8<<16 | gc.TFLOAT32,
		gc.TINT8<<16 | gc.TFLOAT64,
		gc.TUINT16<<16 | gc.TFLOAT32,
		gc.TUINT16<<16 | gc.TFLOAT64,
		gc.TUINT8<<16 | gc.TFLOAT32,
		gc.TUINT8<<16 | gc.TFLOAT64,
		gc.TUINT32<<16 | gc.TFLOAT32,
		gc.TUINT32<<16 | gc.TFLOAT64,
		gc.TUINT64<<16 | gc.TFLOAT32,
		gc.TUINT64<<16 | gc.TFLOAT64:
		bignodes()

		var r1 gc.Node
		gc.Regalloc(&r1, gc.Types[gc.TINT64], nil)
		gmove(f, &r1)
		if ft == gc.TUINT64 {
			gc.Nodreg(&r2, gc.Types[gc.TUINT64], ppc64.REGTMP)
			gmove(&bigi, &r2)
			gins(ppc64.ACMPU, &r1, &r2)
			p1 := (*obj.Prog)(gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT64]), nil, +1))
			p2 := (*obj.Prog)(gins(ppc64.ASRD, nil, &r1))
			p2.From.Type = obj.TYPE_CONST
			p2.From.Offset = 1
			gc.Patch(p1, gc.Pc)
		}

		gc.Regalloc(&r2, gc.Types[gc.TFLOAT64], t)
		p1 := (*obj.Prog)(gins(ppc64.AMOVD, &r1, nil))
		p1.To.Type = obj.TYPE_MEM
		p1.To.Reg = ppc64.REGSP
		p1.To.Offset = -8
		p1 = gins(ppc64.AFMOVD, nil, &r2)
		p1.From.Type = obj.TYPE_MEM
		p1.From.Reg = ppc64.REGSP
		p1.From.Offset = -8
		gins(ppc64.AFCFID, &r2, &r2)
		gc.Regfree(&r1)
		if ft == gc.TUINT64 {
			p1 := (*obj.Prog)(gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT64]), nil, +1)) // use CR0 here again
			gc.Nodreg(&r1, gc.Types[gc.TFLOAT64], ppc64.FREGTWO)
			gins(ppc64.AFMUL, &r1, &r2)
			gc.Patch(p1, gc.Pc)
		}

		gmove(&r2, t)
		gc.Regfree(&r2)
		return

		/*
		 * float to float
		 */
	case gc.TFLOAT32<<16 | gc.TFLOAT32:
		a = ppc64.AFMOVS

	case gc.TFLOAT64<<16 | gc.TFLOAT64:
		a = ppc64.AFMOVD

	case gc.TFLOAT32<<16 | gc.TFLOAT64:
		a = ppc64.AFMOVS
		goto rdst

	case gc.TFLOAT64<<16 | gc.TFLOAT32:
		a = ppc64.AFRSP
		goto rdst
	}

	gins(a, f, t)
	return

	// requires register destination
rdst:
	{
		gc.Regalloc(&r1, t.Type, t)

		gins(a, f, &r1)
		gmove(&r1, t)
		gc.Regfree(&r1)
		return
	}

	// requires register intermediate
hard:
	gc.Regalloc(&r1, cvt, t)

	gmove(f, &r1)
	gmove(&r1, t)
	gc.Regfree(&r1)
	return
}
コード例 #30
0
ファイル: gsubr.go プロジェクト: ckeyer/gosrc
func gmove(f *gc.Node, t *gc.Node) {
	if gc.Debug['M'] != 0 {
		fmt.Printf("gmove %v -> %v\n", f, t)
	}

	ft := gc.Simsimtype(f.Type)
	tt := gc.Simsimtype(t.Type)
	cvt := t.Type

	if gc.Iscomplex[ft] || gc.Iscomplex[tt] {
		gc.Complexmove(f, t)
		return
	}

	// cannot have two memory operands;
	// except 64-bit, which always copies via registers anyway.
	var a int
	var r1 gc.Node
	if !gc.Is64(f.Type) && !gc.Is64(t.Type) && gc.Ismem(f) && gc.Ismem(t) {
		goto hard
	}

	// convert constant to desired type
	if f.Op == gc.OLITERAL {
		var con gc.Node
		switch tt {
		default:
			f.Convconst(&con, t.Type)

		case gc.TINT16,
			gc.TINT8:
			var con gc.Node
			f.Convconst(&con, gc.Types[gc.TINT32])
			var r1 gc.Node
			gc.Regalloc(&r1, con.Type, t)
			gins(arm.AMOVW, &con, &r1)
			gmove(&r1, t)
			gc.Regfree(&r1)
			return

		case gc.TUINT16,
			gc.TUINT8:
			var con gc.Node
			f.Convconst(&con, gc.Types[gc.TUINT32])
			var r1 gc.Node
			gc.Regalloc(&r1, con.Type, t)
			gins(arm.AMOVW, &con, &r1)
			gmove(&r1, t)
			gc.Regfree(&r1)
			return
		}

		f = &con
		ft = gc.Simsimtype(con.Type)

		// constants can't move directly to memory
		if gc.Ismem(t) && !gc.Is64(t.Type) {
			goto hard
		}
	}

	// value -> value copy, only one memory operand.
	// figure out the instruction to use.
	// break out of switch for one-instruction gins.
	// goto rdst for "destination must be register".
	// goto hard for "convert to cvt type first".
	// otherwise handle and return.

	switch uint32(ft)<<16 | uint32(tt) {
	default:
		// should not happen
		gc.Fatalf("gmove %v -> %v", f, t)
		return

		/*
		 * integer copy and truncate
		 */
	case gc.TINT8<<16 | gc.TINT8: // same size
		if !gc.Ismem(f) {
			a = arm.AMOVB
			break
		}
		fallthrough

	case gc.TUINT8<<16 | gc.TINT8,
		gc.TINT16<<16 | gc.TINT8, // truncate
		gc.TUINT16<<16 | gc.TINT8,
		gc.TINT32<<16 | gc.TINT8,
		gc.TUINT32<<16 | gc.TINT8:
		a = arm.AMOVBS

	case gc.TUINT8<<16 | gc.TUINT8:
		if !gc.Ismem(f) {
			a = arm.AMOVB
			break
		}
		fallthrough

	case gc.TINT8<<16 | gc.TUINT8,
		gc.TINT16<<16 | gc.TUINT8,
		gc.TUINT16<<16 | gc.TUINT8,
		gc.TINT32<<16 | gc.TUINT8,
		gc.TUINT32<<16 | gc.TUINT8:
		a = arm.AMOVBU

	case gc.TINT64<<16 | gc.TINT8, // truncate low word
		gc.TUINT64<<16 | gc.TINT8:
		a = arm.AMOVBS

		goto trunc64

	case gc.TINT64<<16 | gc.TUINT8,
		gc.TUINT64<<16 | gc.TUINT8:
		a = arm.AMOVBU
		goto trunc64

	case gc.TINT16<<16 | gc.TINT16: // same size
		if !gc.Ismem(f) {
			a = arm.AMOVH
			break
		}
		fallthrough

	case gc.TUINT16<<16 | gc.TINT16,
		gc.TINT32<<16 | gc.TINT16, // truncate
		gc.TUINT32<<16 | gc.TINT16:
		a = arm.AMOVHS

	case gc.TUINT16<<16 | gc.TUINT16:
		if !gc.Ismem(f) {
			a = arm.AMOVH
			break
		}
		fallthrough

	case gc.TINT16<<16 | gc.TUINT16,
		gc.TINT32<<16 | gc.TUINT16,
		gc.TUINT32<<16 | gc.TUINT16:
		a = arm.AMOVHU

	case gc.TINT64<<16 | gc.TINT16, // truncate low word
		gc.TUINT64<<16 | gc.TINT16:
		a = arm.AMOVHS

		goto trunc64

	case gc.TINT64<<16 | gc.TUINT16,
		gc.TUINT64<<16 | gc.TUINT16:
		a = arm.AMOVHU
		goto trunc64

	case gc.TINT32<<16 | gc.TINT32, // same size
		gc.TINT32<<16 | gc.TUINT32,
		gc.TUINT32<<16 | gc.TINT32,
		gc.TUINT32<<16 | gc.TUINT32:
		a = arm.AMOVW

	case gc.TINT64<<16 | gc.TINT32, // truncate
		gc.TUINT64<<16 | gc.TINT32,
		gc.TINT64<<16 | gc.TUINT32,
		gc.TUINT64<<16 | gc.TUINT32:
		var flo gc.Node
		var fhi gc.Node
		split64(f, &flo, &fhi)

		var r1 gc.Node
		gc.Regalloc(&r1, t.Type, nil)
		gins(arm.AMOVW, &flo, &r1)
		gins(arm.AMOVW, &r1, t)
		gc.Regfree(&r1)
		splitclean()
		return

	case gc.TINT64<<16 | gc.TINT64, // same size
		gc.TINT64<<16 | gc.TUINT64,
		gc.TUINT64<<16 | gc.TINT64,
		gc.TUINT64<<16 | gc.TUINT64:
		var fhi gc.Node
		var flo gc.Node
		split64(f, &flo, &fhi)

		var tlo gc.Node
		var thi gc.Node
		split64(t, &tlo, &thi)
		var r1 gc.Node
		gc.Regalloc(&r1, flo.Type, nil)
		var r2 gc.Node
		gc.Regalloc(&r2, fhi.Type, nil)
		gins(arm.AMOVW, &flo, &r1)
		gins(arm.AMOVW, &fhi, &r2)
		gins(arm.AMOVW, &r1, &tlo)
		gins(arm.AMOVW, &r2, &thi)
		gc.Regfree(&r1)
		gc.Regfree(&r2)
		splitclean()
		splitclean()
		return

		/*
		 * integer up-conversions
		 */
	case gc.TINT8<<16 | gc.TINT16, // sign extend int8
		gc.TINT8<<16 | gc.TUINT16,
		gc.TINT8<<16 | gc.TINT32,
		gc.TINT8<<16 | gc.TUINT32:
		a = arm.AMOVBS

		goto rdst

	case gc.TINT8<<16 | gc.TINT64, // convert via int32
		gc.TINT8<<16 | gc.TUINT64:
		cvt = gc.Types[gc.TINT32]

		goto hard

	case gc.TUINT8<<16 | gc.TINT16, // zero extend uint8
		gc.TUINT8<<16 | gc.TUINT16,
		gc.TUINT8<<16 | gc.TINT32,
		gc.TUINT8<<16 | gc.TUINT32:
		a = arm.AMOVBU

		goto rdst

	case gc.TUINT8<<16 | gc.TINT64, // convert via uint32
		gc.TUINT8<<16 | gc.TUINT64:
		cvt = gc.Types[gc.TUINT32]

		goto hard

	case gc.TINT16<<16 | gc.TINT32, // sign extend int16
		gc.TINT16<<16 | gc.TUINT32:
		a = arm.AMOVHS

		goto rdst

	case gc.TINT16<<16 | gc.TINT64, // convert via int32
		gc.TINT16<<16 | gc.TUINT64:
		cvt = gc.Types[gc.TINT32]

		goto hard

	case gc.TUINT16<<16 | gc.TINT32, // zero extend uint16
		gc.TUINT16<<16 | gc.TUINT32:
		a = arm.AMOVHU

		goto rdst

	case gc.TUINT16<<16 | gc.TINT64, // convert via uint32
		gc.TUINT16<<16 | gc.TUINT64:
		cvt = gc.Types[gc.TUINT32]

		goto hard

	case gc.TINT32<<16 | gc.TINT64, // sign extend int32
		gc.TINT32<<16 | gc.TUINT64:
		var tlo gc.Node
		var thi gc.Node
		split64(t, &tlo, &thi)

		var r1 gc.Node
		gc.Regalloc(&r1, tlo.Type, nil)
		var r2 gc.Node
		gc.Regalloc(&r2, thi.Type, nil)
		gmove(f, &r1)
		p1 := gins(arm.AMOVW, &r1, &r2)
		p1.From.Type = obj.TYPE_SHIFT
		p1.From.Offset = 2<<5 | 31<<7 | int64(r1.Reg)&15 // r1->31
		p1.From.Reg = 0

		//print("gmove: %v\n", p1);
		gins(arm.AMOVW, &r1, &tlo)

		gins(arm.AMOVW, &r2, &thi)
		gc.Regfree(&r1)
		gc.Regfree(&r2)
		splitclean()
		return

	case gc.TUINT32<<16 | gc.TINT64, // zero extend uint32
		gc.TUINT32<<16 | gc.TUINT64:
		var thi gc.Node
		var tlo gc.Node
		split64(t, &tlo, &thi)

		gmove(f, &tlo)
		var r1 gc.Node
		gc.Regalloc(&r1, thi.Type, nil)
		gins(arm.AMOVW, ncon(0), &r1)
		gins(arm.AMOVW, &r1, &thi)
		gc.Regfree(&r1)
		splitclean()
		return

		//	case CASE(TFLOAT64, TUINT64):
	/*
	* float to integer
	 */
	case gc.TFLOAT32<<16 | gc.TINT8,
		gc.TFLOAT32<<16 | gc.TUINT8,
		gc.TFLOAT32<<16 | gc.TINT16,
		gc.TFLOAT32<<16 | gc.TUINT16,
		gc.TFLOAT32<<16 | gc.TINT32,
		gc.TFLOAT32<<16 | gc.TUINT32,

		//	case CASE(TFLOAT32, TUINT64):

		gc.TFLOAT64<<16 | gc.TINT8,
		gc.TFLOAT64<<16 | gc.TUINT8,
		gc.TFLOAT64<<16 | gc.TINT16,
		gc.TFLOAT64<<16 | gc.TUINT16,
		gc.TFLOAT64<<16 | gc.TINT32,
		gc.TFLOAT64<<16 | gc.TUINT32:
		fa := arm.AMOVF

		a := arm.AMOVFW
		if ft == gc.TFLOAT64 {
			fa = arm.AMOVD
			a = arm.AMOVDW
		}

		ta := arm.AMOVW
		switch tt {
		case gc.TINT8:
			ta = arm.AMOVBS

		case gc.TUINT8:
			ta = arm.AMOVBU

		case gc.TINT16:
			ta = arm.AMOVHS

		case gc.TUINT16:
			ta = arm.AMOVHU
		}

		var r1 gc.Node
		gc.Regalloc(&r1, gc.Types[ft], f)
		var r2 gc.Node
		gc.Regalloc(&r2, gc.Types[tt], t)
		gins(fa, f, &r1)        // load to fpu
		p1 := gins(a, &r1, &r1) // convert to w
		switch tt {
		case gc.TUINT8,
			gc.TUINT16,
			gc.TUINT32:
			p1.Scond |= arm.C_UBIT
		}

		gins(arm.AMOVW, &r1, &r2) // copy to cpu
		gins(ta, &r2, t)          // store
		gc.Regfree(&r1)
		gc.Regfree(&r2)
		return

		/*
		 * integer to float
		 */
	case gc.TINT8<<16 | gc.TFLOAT32,
		gc.TUINT8<<16 | gc.TFLOAT32,
		gc.TINT16<<16 | gc.TFLOAT32,
		gc.TUINT16<<16 | gc.TFLOAT32,
		gc.TINT32<<16 | gc.TFLOAT32,
		gc.TUINT32<<16 | gc.TFLOAT32,
		gc.TINT8<<16 | gc.TFLOAT64,
		gc.TUINT8<<16 | gc.TFLOAT64,
		gc.TINT16<<16 | gc.TFLOAT64,
		gc.TUINT16<<16 | gc.TFLOAT64,
		gc.TINT32<<16 | gc.TFLOAT64,
		gc.TUINT32<<16 | gc.TFLOAT64:
		fa := arm.AMOVW

		switch ft {
		case gc.TINT8:
			fa = arm.AMOVBS

		case gc.TUINT8:
			fa = arm.AMOVBU

		case gc.TINT16:
			fa = arm.AMOVHS

		case gc.TUINT16:
			fa = arm.AMOVHU
		}

		a := arm.AMOVWF
		ta := arm.AMOVF
		if tt == gc.TFLOAT64 {
			a = arm.AMOVWD
			ta = arm.AMOVD
		}

		var r1 gc.Node
		gc.Regalloc(&r1, gc.Types[ft], f)
		var r2 gc.Node
		gc.Regalloc(&r2, gc.Types[tt], t)
		gins(fa, f, &r1)          // load to cpu
		gins(arm.AMOVW, &r1, &r2) // copy to fpu
		p1 := gins(a, &r2, &r2)   // convert
		switch ft {
		case gc.TUINT8,
			gc.TUINT16,
			gc.TUINT32:
			p1.Scond |= arm.C_UBIT
		}

		gins(ta, &r2, t) // store
		gc.Regfree(&r1)
		gc.Regfree(&r2)
		return

	case gc.TUINT64<<16 | gc.TFLOAT32,
		gc.TUINT64<<16 | gc.TFLOAT64:
		gc.Fatalf("gmove UINT64, TFLOAT not implemented")
		return

		/*
		 * float to float
		 */
	case gc.TFLOAT32<<16 | gc.TFLOAT32:
		a = arm.AMOVF

	case gc.TFLOAT64<<16 | gc.TFLOAT64:
		a = arm.AMOVD

	case gc.TFLOAT32<<16 | gc.TFLOAT64:
		var r1 gc.Node
		gc.Regalloc(&r1, gc.Types[gc.TFLOAT64], t)
		gins(arm.AMOVF, f, &r1)
		gins(arm.AMOVFD, &r1, &r1)
		gins(arm.AMOVD, &r1, t)
		gc.Regfree(&r1)
		return

	case gc.TFLOAT64<<16 | gc.TFLOAT32:
		var r1 gc.Node
		gc.Regalloc(&r1, gc.Types[gc.TFLOAT64], t)
		gins(arm.AMOVD, f, &r1)
		gins(arm.AMOVDF, &r1, &r1)
		gins(arm.AMOVF, &r1, t)
		gc.Regfree(&r1)
		return
	}

	gins(a, f, t)
	return

	// TODO(kaib): we almost always require a register dest anyway, this can probably be
	// removed.
	// requires register destination
rdst:
	{
		gc.Regalloc(&r1, t.Type, t)

		gins(a, f, &r1)
		gmove(&r1, t)
		gc.Regfree(&r1)
		return
	}

	// requires register intermediate
hard:
	gc.Regalloc(&r1, cvt, t)

	gmove(f, &r1)
	gmove(&r1, t)
	gc.Regfree(&r1)
	return

	// truncate 64 bit integer
trunc64:
	var fhi gc.Node
	var flo gc.Node
	split64(f, &flo, &fhi)

	gc.Regalloc(&r1, t.Type, nil)
	gins(a, &flo, &r1)
	gins(a, &r1, t)
	gc.Regfree(&r1)
	splitclean()
	return
}