Exemple #1
0
/*
 * generate an addressable node in res, containing the value of n.
 * n is an array index, and might be any size; res width is <= 32-bit.
 * returns Prog* to patch to panic call.
 */
func igenindex(n *gc.Node, res *gc.Node, bounded bool) *obj.Prog {
	if !gc.Is64(n.Type) {
		if n.Addable && (gc.Simtype[n.Etype] == gc.TUINT32 || gc.Simtype[n.Etype] == gc.TINT32) {
			// nothing to do.
			*res = *n
		} else {
			gc.Tempname(res, gc.Types[gc.TUINT32])
			gc.Cgen(n, res)
		}

		return nil
	}

	var tmp gc.Node
	gc.Tempname(&tmp, gc.Types[gc.TINT64])
	gc.Cgen(n, &tmp)
	var lo gc.Node
	var hi gc.Node
	split64(&tmp, &lo, &hi)
	gc.Tempname(res, gc.Types[gc.TUINT32])
	gmove(&lo, res)
	if bounded {
		splitclean()
		return nil
	}

	var zero gc.Node
	gc.Nodconst(&zero, gc.Types[gc.TINT32], 0)
	gins(x86.ACMPL, &hi, &zero)
	splitclean()
	return gc.Gbranch(x86.AJNE, nil, +1)
}
Exemple #2
0
/*
 * generate byte multiply:
 *	res = nl * nr
 * there is no 2-operand byte multiply instruction so
 * we do a full-width multiplication and truncate afterwards.
 */
func cgen_bmul(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) bool {
	if optoas(op, nl.Type) != x86.AIMULB {
		return false
	}

	// copy from byte to full registers
	t := gc.Types[gc.TUINT32]

	if gc.Issigned[nl.Type.Etype] {
		t = gc.Types[gc.TINT32]
	}

	// largest ullman on left.
	if nl.Ullman < nr.Ullman {
		nl, nr = nr, nl
	}

	var nt gc.Node
	gc.Tempname(&nt, nl.Type)
	gc.Cgen(nl, &nt)
	var n1 gc.Node
	gc.Regalloc(&n1, t, res)
	gc.Cgen(nr, &n1)
	var n2 gc.Node
	gc.Regalloc(&n2, t, nil)
	gmove(&nt, &n2)
	a := optoas(op, t)
	gins(a, &n2, &n1)
	gc.Regfree(&n2)
	gmove(&n1, res)
	gc.Regfree(&n1)

	return true
}
Exemple #3
0
/*
 * generate array index into res.
 * n might be any size; res is 32-bit.
 * returns Prog* to patch to panic call.
 */
func cgenindex(n *gc.Node, res *gc.Node, bounded bool) *obj.Prog {
	if !gc.Is64(n.Type) {
		gc.Cgen(n, res)
		return nil
	}

	var tmp gc.Node
	gc.Tempname(&tmp, gc.Types[gc.TINT64])
	gc.Cgen(n, &tmp)
	var lo gc.Node
	var hi gc.Node
	split64(&tmp, &lo, &hi)
	gmove(&lo, res)
	if bounded {
		splitclean()
		return nil
	}

	var n1 gc.Node
	gc.Regalloc(&n1, gc.Types[gc.TINT32], nil)
	var n2 gc.Node
	gc.Regalloc(&n2, gc.Types[gc.TINT32], nil)
	var zero gc.Node
	gc.Nodconst(&zero, gc.Types[gc.TINT32], 0)
	gmove(&hi, &n1)
	gmove(&zero, &n2)
	gins(arm.ACMP, &n1, &n2)
	gc.Regfree(&n2)
	gc.Regfree(&n1)
	splitclean()
	return gc.Gbranch(arm.ABNE, nil, -1)
}
Exemple #4
0
func savex(dr int, x *gc.Node, oldx *gc.Node, res *gc.Node, t *gc.Type) {
	r := gc.GetReg(dr)
	gc.Nodreg(x, gc.Types[gc.TINT32], dr)

	// save current ax and dx if they are live
	// and not the destination
	*oldx = gc.Node{}

	if r > 0 && !gc.Samereg(x, res) {
		gc.Tempname(oldx, gc.Types[gc.TINT32])
		gmove(x, oldx)
	}

	gc.Regalloc(x, t, x)
}
Exemple #5
0
/*
 * generate floating-point operation.
 */
func cgen_float(n *gc.Node, res *gc.Node) {
	nl := n.Left
	switch n.Op {
	case gc.OEQ,
		gc.ONE,
		gc.OLT,
		gc.OLE,
		gc.OGE:
		p1 := gc.Gbranch(obj.AJMP, nil, 0)
		p2 := gc.Pc
		gmove(gc.Nodbool(true), res)
		p3 := gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p1, gc.Pc)
		gc.Bgen(n, true, 0, p2)
		gmove(gc.Nodbool(false), res)
		gc.Patch(p3, gc.Pc)
		return

	case gc.OPLUS:
		gc.Cgen(nl, res)
		return

	case gc.OCONV:
		if gc.Eqtype(n.Type, nl.Type) || gc.Noconv(n.Type, nl.Type) {
			gc.Cgen(nl, res)
			return
		}

		var n2 gc.Node
		gc.Tempname(&n2, n.Type)
		var n1 gc.Node
		gc.Mgen(nl, &n1, res)
		gmove(&n1, &n2)
		gmove(&n2, res)
		gc.Mfree(&n1)
		return
	}

	if gc.Thearch.Use387 {
		cgen_float387(n, res)
	} else {
		cgen_floatsse(n, res)
	}
}
Exemple #6
0
Fichier : ggen.go Projet : gmwu/go
/*
 * generate high multiply:
 *   res = (nl*nr) >> width
 */
func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) {
	var n1 gc.Node
	var n2 gc.Node
	var ax gc.Node
	var dx gc.Node

	t := nl.Type
	a := optoas(gc.OHMUL, t)

	// gen nl in n1.
	gc.Tempname(&n1, t)

	gc.Cgen(nl, &n1)

	// gen nr in n2.
	gc.Regalloc(&n2, t, res)

	gc.Cgen(nr, &n2)

	// multiply.
	gc.Nodreg(&ax, t, x86.REG_AX)

	gmove(&n2, &ax)
	gins(a, &n1, nil)
	gc.Regfree(&n2)

	if t.Width == 1 {
		// byte multiply behaves differently.
		gc.Nodreg(&ax, t, x86.REG_AH)

		gc.Nodreg(&dx, t, x86.REG_DX)
		gmove(&ax, &dx)
	}

	gc.Nodreg(&dx, t, x86.REG_DX)
	gmove(&dx, res)
}
Exemple #7
0
/*
 * generate high multiply:
 *   res = (nl*nr) >> width
 */
func cgen_hmul(nl *gc.Node, nr *gc.Node, res *gc.Node) {
	var n1 gc.Node
	var n2 gc.Node

	t := nl.Type
	a := optoas(gc.OHMUL, t)

	// gen nl in n1.
	gc.Tempname(&n1, t)
	gc.Cgen(nl, &n1)

	// gen nr in n2.
	gc.Regalloc(&n2, t, res)
	gc.Cgen(nr, &n2)

	var ax, oldax, dx, olddx gc.Node
	savex(x86.REG_AX, &ax, &oldax, res, gc.Types[gc.TUINT32])
	savex(x86.REG_DX, &dx, &olddx, res, gc.Types[gc.TUINT32])

	gmove(&n2, &ax)
	gins(a, &n1, nil)
	gc.Regfree(&n2)

	if t.Width == 1 {
		// byte multiply behaves differently.
		var byteAH, byteDX gc.Node
		gc.Nodreg(&byteAH, t, x86.REG_AH)
		gc.Nodreg(&byteDX, t, x86.REG_DX)
		gmove(&byteAH, &byteDX)
	}

	gmove(&dx, res)

	restx(&ax, &oldax)
	restx(&dx, &olddx)
}
Exemple #8
0
func blockcopy(n, res *gc.Node, osrc, odst, w int64) {
	var dst gc.Node
	gc.Nodreg(&dst, gc.Types[gc.Tptr], x86.REG_DI)
	var src gc.Node
	gc.Nodreg(&src, gc.Types[gc.Tptr], x86.REG_SI)

	var tsrc gc.Node
	gc.Tempname(&tsrc, gc.Types[gc.Tptr])
	var tdst gc.Node
	gc.Tempname(&tdst, gc.Types[gc.Tptr])
	if !n.Addable {
		gc.Agen(n, &tsrc)
	}
	if !res.Addable {
		gc.Agen(res, &tdst)
	}
	if n.Addable {
		gc.Agen(n, &src)
	} else {
		gmove(&tsrc, &src)
	}

	if res.Op == gc.ONAME {
		gc.Gvardef(res)
	}

	if res.Addable {
		gc.Agen(res, &dst)
	} else {
		gmove(&tdst, &dst)
	}

	c := int32(w % 4) // bytes
	q := int32(w / 4) // doublewords

	// if we are copying forward on the stack and
	// the src and dst overlap, then reverse direction
	if osrc < odst && odst < osrc+w {
		// reverse direction
		gins(x86.ASTD, nil, nil) // set direction flag
		if c > 0 {
			gconreg(x86.AADDL, w-1, x86.REG_SI)
			gconreg(x86.AADDL, w-1, x86.REG_DI)

			gconreg(x86.AMOVL, int64(c), x86.REG_CX)
			gins(x86.AREP, nil, nil)   // repeat
			gins(x86.AMOVSB, nil, nil) // MOVB *(SI)-,*(DI)-
		}

		if q > 0 {
			if c > 0 {
				gconreg(x86.AADDL, -3, x86.REG_SI)
				gconreg(x86.AADDL, -3, x86.REG_DI)
			} else {
				gconreg(x86.AADDL, w-4, x86.REG_SI)
				gconreg(x86.AADDL, w-4, x86.REG_DI)
			}

			gconreg(x86.AMOVL, int64(q), x86.REG_CX)
			gins(x86.AREP, nil, nil)   // repeat
			gins(x86.AMOVSL, nil, nil) // MOVL *(SI)-,*(DI)-
		}

		// we leave with the flag clear
		gins(x86.ACLD, nil, nil)
	} else {
		gins(x86.ACLD, nil, nil) // paranoia.  TODO(rsc): remove?

		// normal direction
		if q > 128 || (q >= 4 && gc.Nacl) {
			gconreg(x86.AMOVL, int64(q), x86.REG_CX)
			gins(x86.AREP, nil, nil)   // repeat
			gins(x86.AMOVSL, nil, nil) // MOVL *(SI)+,*(DI)+
		} else if q >= 4 {
			p := gins(obj.ADUFFCOPY, nil, nil)
			p.To.Type = obj.TYPE_ADDR
			p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg))

			// 10 and 128 = magic constants: see ../../runtime/asm_386.s
			p.To.Offset = 10 * (128 - int64(q))
		} else if !gc.Nacl && c == 0 {
			var cx gc.Node
			gc.Nodreg(&cx, gc.Types[gc.TINT32], x86.REG_CX)

			// We don't need the MOVSL side-effect of updating SI and DI,
			// and issuing a sequence of MOVLs directly is faster.
			src.Op = gc.OINDREG

			dst.Op = gc.OINDREG
			for q > 0 {
				gmove(&src, &cx) // MOVL x+(SI),CX
				gmove(&cx, &dst) // MOVL CX,x+(DI)
				src.Xoffset += 4
				dst.Xoffset += 4
				q--
			}
		} else {
			for q > 0 {
				gins(x86.AMOVSL, nil, nil) // MOVL *(SI)+,*(DI)+
				q--
			}
		}

		for c > 0 {
			gins(x86.AMOVSB, nil, nil) // MOVB *(SI)+,*(DI)+
			c--
		}
	}
}
Exemple #9
0
func igenindex(n *gc.Node, res *gc.Node, bounded bool) *obj.Prog {
	gc.Tempname(res, n.Type)
	return cgenindex(n, res, bounded)
}
Exemple #10
0
func floatmove_sse(f *gc.Node, t *gc.Node) {
	var r1 gc.Node
	var cvt *gc.Type
	var a int

	ft := gc.Simsimtype(f.Type)
	tt := gc.Simsimtype(t.Type)

	switch uint32(ft)<<16 | uint32(tt) {
	// should not happen
	default:
		gc.Fatalf("gmove %v -> %v", f, t)

		return

		// convert via int32.
	/*
	* float to integer
	 */
	case gc.TFLOAT32<<16 | gc.TINT16,
		gc.TFLOAT32<<16 | gc.TINT8,
		gc.TFLOAT32<<16 | gc.TUINT16,
		gc.TFLOAT32<<16 | gc.TUINT8,
		gc.TFLOAT64<<16 | gc.TINT16,
		gc.TFLOAT64<<16 | gc.TINT8,
		gc.TFLOAT64<<16 | gc.TUINT16,
		gc.TFLOAT64<<16 | gc.TUINT8:
		cvt = gc.Types[gc.TINT32]

		goto hard

		// convert via int64.
	case gc.TFLOAT32<<16 | gc.TUINT32,
		gc.TFLOAT64<<16 | gc.TUINT32:
		cvt = gc.Types[gc.TINT64]

		goto hardmem

	case gc.TFLOAT32<<16 | gc.TINT32:
		a = x86.ACVTTSS2SL
		goto rdst

	case gc.TFLOAT64<<16 | gc.TINT32:
		a = x86.ACVTTSD2SL
		goto rdst

		// convert via int32 memory
	/*
	 * integer to float
	 */
	case gc.TINT8<<16 | gc.TFLOAT32,
		gc.TINT8<<16 | gc.TFLOAT64,
		gc.TINT16<<16 | gc.TFLOAT32,
		gc.TINT16<<16 | gc.TFLOAT64,
		gc.TUINT16<<16 | gc.TFLOAT32,
		gc.TUINT16<<16 | gc.TFLOAT64,
		gc.TUINT8<<16 | gc.TFLOAT32,
		gc.TUINT8<<16 | gc.TFLOAT64:
		cvt = gc.Types[gc.TINT32]

		goto hard

		// convert via int64 memory
	case gc.TUINT32<<16 | gc.TFLOAT32,
		gc.TUINT32<<16 | gc.TFLOAT64:
		cvt = gc.Types[gc.TINT64]

		goto hardmem

	case gc.TINT32<<16 | gc.TFLOAT32:
		a = x86.ACVTSL2SS
		goto rdst

	case gc.TINT32<<16 | gc.TFLOAT64:
		a = x86.ACVTSL2SD
		goto rdst

		/*
		 * float to float
		 */
	case gc.TFLOAT32<<16 | gc.TFLOAT32:
		a = x86.AMOVSS

	case gc.TFLOAT64<<16 | gc.TFLOAT64:
		a = x86.AMOVSD

	case gc.TFLOAT32<<16 | gc.TFLOAT64:
		a = x86.ACVTSS2SD
		goto rdst

	case gc.TFLOAT64<<16 | gc.TFLOAT32:
		a = x86.ACVTSD2SS
		goto rdst
	}

	gins(a, f, t)
	return

	// requires register intermediate
hard:
	gc.Regalloc(&r1, cvt, t)

	gmove(f, &r1)
	gmove(&r1, t)
	gc.Regfree(&r1)
	return

	// requires memory intermediate
hardmem:
	gc.Tempname(&r1, cvt)

	gmove(f, &r1)
	gmove(&r1, t)
	return

	// requires register destination
rdst:
	gc.Regalloc(&r1, t.Type, t)

	gins(a, f, &r1)
	gmove(&r1, t)
	gc.Regfree(&r1)
	return
}
Exemple #11
0
func memname(n *gc.Node, t *gc.Type) {
	gc.Tempname(n, t)
	n.Sym = gc.Lookup("." + n.Sym.Name[1:]) // keep optimizer from registerizing
	n.Orig.Sym = n.Sym
}
Exemple #12
0
func floatmove(f *gc.Node, t *gc.Node) {
	var r1 gc.Node

	ft := gc.Simsimtype(f.Type)
	tt := gc.Simsimtype(t.Type)
	cvt := t.Type

	// cannot have two floating point memory operands.
	if gc.Isfloat[ft] && gc.Isfloat[tt] && gc.Ismem(f) && gc.Ismem(t) {
		goto hard
	}

	// convert constant to desired type
	if f.Op == gc.OLITERAL {
		var con gc.Node
		f.Convconst(&con, t.Type)
		f = &con
		ft = gc.Simsimtype(con.Type)

		// some constants can't move directly to memory.
		if gc.Ismem(t) {
			// float constants come from memory.
			if gc.Isfloat[tt] {
				goto hard
			}
		}
	}

	// value -> value copy, only one memory operand.
	// figure out the instruction to use.
	// break out of switch for one-instruction gins.
	// goto rdst for "destination must be register".
	// goto hard for "convert to cvt type first".
	// otherwise handle and return.

	switch uint32(ft)<<16 | uint32(tt) {
	default:
		if gc.Thearch.Use387 {
			floatmove_387(f, t)
		} else {
			floatmove_sse(f, t)
		}
		return

		// float to very long integer.
	case gc.TFLOAT32<<16 | gc.TINT64,
		gc.TFLOAT64<<16 | gc.TINT64:
		if f.Op == gc.OREGISTER {
			cvt = f.Type
			goto hardmem
		}

		var r1 gc.Node
		gc.Nodreg(&r1, gc.Types[ft], x86.REG_F0)
		if ft == gc.TFLOAT32 {
			gins(x86.AFMOVF, f, &r1)
		} else {
			gins(x86.AFMOVD, f, &r1)
		}

		// set round to zero mode during conversion
		var t1 gc.Node
		memname(&t1, gc.Types[gc.TUINT16])

		var t2 gc.Node
		memname(&t2, gc.Types[gc.TUINT16])
		gins(x86.AFSTCW, nil, &t1)
		gins(x86.AMOVW, ncon(0xf7f), &t2)
		gins(x86.AFLDCW, &t2, nil)
		if tt == gc.TINT16 {
			gins(x86.AFMOVWP, &r1, t)
		} else if tt == gc.TINT32 {
			gins(x86.AFMOVLP, &r1, t)
		} else {
			gins(x86.AFMOVVP, &r1, t)
		}
		gins(x86.AFLDCW, &t1, nil)
		return

	case gc.TFLOAT32<<16 | gc.TUINT64,
		gc.TFLOAT64<<16 | gc.TUINT64:
		if !gc.Ismem(f) {
			cvt = f.Type
			goto hardmem
		}

		bignodes()
		var f0 gc.Node
		gc.Nodreg(&f0, gc.Types[ft], x86.REG_F0)
		var f1 gc.Node
		gc.Nodreg(&f1, gc.Types[ft], x86.REG_F0+1)
		var ax gc.Node
		gc.Nodreg(&ax, gc.Types[gc.TUINT16], x86.REG_AX)

		if ft == gc.TFLOAT32 {
			gins(x86.AFMOVF, f, &f0)
		} else {
			gins(x86.AFMOVD, f, &f0)
		}

		// if 0 > v { answer = 0 }
		gins(x86.AFMOVD, &zerof, &f0)
		gins(x86.AFUCOMP, &f0, &f1)
		gins(x86.AFSTSW, nil, &ax)
		gins(x86.ASAHF, nil, nil)
		p1 := gc.Gbranch(optoas(gc.OGT, gc.Types[tt]), nil, 0)

		// if 1<<64 <= v { answer = 0 too }
		gins(x86.AFMOVD, &two64f, &f0)

		gins(x86.AFUCOMP, &f0, &f1)
		gins(x86.AFSTSW, nil, &ax)
		gins(x86.ASAHF, nil, nil)
		p2 := gc.Gbranch(optoas(gc.OGT, gc.Types[tt]), nil, 0)
		gc.Patch(p1, gc.Pc)
		gins(x86.AFMOVVP, &f0, t) // don't care about t, but will pop the stack
		var thi gc.Node
		var tlo gc.Node
		split64(t, &tlo, &thi)
		gins(x86.AMOVL, ncon(0), &tlo)
		gins(x86.AMOVL, ncon(0), &thi)
		splitclean()
		p1 = gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p2, gc.Pc)

		// in range; algorithm is:
		//	if small enough, use native float64 -> int64 conversion.
		//	otherwise, subtract 2^63, convert, and add it back.

		// set round to zero mode during conversion
		var t1 gc.Node
		memname(&t1, gc.Types[gc.TUINT16])

		var t2 gc.Node
		memname(&t2, gc.Types[gc.TUINT16])
		gins(x86.AFSTCW, nil, &t1)
		gins(x86.AMOVW, ncon(0xf7f), &t2)
		gins(x86.AFLDCW, &t2, nil)

		// actual work
		gins(x86.AFMOVD, &two63f, &f0)

		gins(x86.AFUCOMP, &f0, &f1)
		gins(x86.AFSTSW, nil, &ax)
		gins(x86.ASAHF, nil, nil)
		p2 = gc.Gbranch(optoas(gc.OLE, gc.Types[tt]), nil, 0)
		gins(x86.AFMOVVP, &f0, t)
		p3 := gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p2, gc.Pc)
		gins(x86.AFMOVD, &two63f, &f0)
		gins(x86.AFSUBDP, &f0, &f1)
		gins(x86.AFMOVVP, &f0, t)
		split64(t, &tlo, &thi)
		gins(x86.AXORL, ncon(0x80000000), &thi) // + 2^63
		gc.Patch(p3, gc.Pc)
		splitclean()

		// restore rounding mode
		gins(x86.AFLDCW, &t1, nil)

		gc.Patch(p1, gc.Pc)
		return

		/*
		 * integer to float
		 */
	case gc.TINT64<<16 | gc.TFLOAT32,
		gc.TINT64<<16 | gc.TFLOAT64:
		if t.Op == gc.OREGISTER {
			goto hardmem
		}
		var f0 gc.Node
		gc.Nodreg(&f0, t.Type, x86.REG_F0)
		gins(x86.AFMOVV, f, &f0)
		if tt == gc.TFLOAT32 {
			gins(x86.AFMOVFP, &f0, t)
		} else {
			gins(x86.AFMOVDP, &f0, t)
		}
		return

		// algorithm is:
	//	if small enough, use native int64 -> float64 conversion.
	//	otherwise, halve (rounding to odd?), convert, and double.
	case gc.TUINT64<<16 | gc.TFLOAT32,
		gc.TUINT64<<16 | gc.TFLOAT64:
		var ax gc.Node
		gc.Nodreg(&ax, gc.Types[gc.TUINT32], x86.REG_AX)

		var dx gc.Node
		gc.Nodreg(&dx, gc.Types[gc.TUINT32], x86.REG_DX)
		var cx gc.Node
		gc.Nodreg(&cx, gc.Types[gc.TUINT32], x86.REG_CX)
		var t1 gc.Node
		gc.Tempname(&t1, f.Type)
		var tlo gc.Node
		var thi gc.Node
		split64(&t1, &tlo, &thi)
		gmove(f, &t1)
		gins(x86.ACMPL, &thi, ncon(0))
		p1 := gc.Gbranch(x86.AJLT, nil, 0)

		// native
		var r1 gc.Node
		gc.Nodreg(&r1, gc.Types[tt], x86.REG_F0)

		gins(x86.AFMOVV, &t1, &r1)
		if tt == gc.TFLOAT32 {
			gins(x86.AFMOVFP, &r1, t)
		} else {
			gins(x86.AFMOVDP, &r1, t)
		}
		p2 := gc.Gbranch(obj.AJMP, nil, 0)

		// simulated
		gc.Patch(p1, gc.Pc)

		gmove(&tlo, &ax)
		gmove(&thi, &dx)
		p1 = gins(x86.ASHRL, ncon(1), &ax)
		p1.From.Index = x86.REG_DX // double-width shift DX -> AX
		p1.From.Scale = 0
		gins(x86.AMOVL, ncon(0), &cx)
		gins(x86.ASETCC, nil, &cx)
		gins(x86.AORL, &cx, &ax)
		gins(x86.ASHRL, ncon(1), &dx)
		gmove(&dx, &thi)
		gmove(&ax, &tlo)
		gc.Nodreg(&r1, gc.Types[tt], x86.REG_F0)
		var r2 gc.Node
		gc.Nodreg(&r2, gc.Types[tt], x86.REG_F0+1)
		gins(x86.AFMOVV, &t1, &r1)
		gins(x86.AFMOVD, &r1, &r1)
		gins(x86.AFADDDP, &r1, &r2)
		if tt == gc.TFLOAT32 {
			gins(x86.AFMOVFP, &r1, t)
		} else {
			gins(x86.AFMOVDP, &r1, t)
		}
		gc.Patch(p2, gc.Pc)
		splitclean()
		return
	}

	// requires register intermediate
hard:
	gc.Regalloc(&r1, cvt, t)

	gmove(f, &r1)
	gmove(&r1, t)
	gc.Regfree(&r1)
	return

	// requires memory intermediate
hardmem:
	gc.Tempname(&r1, cvt)

	gmove(f, &r1)
	gmove(&r1, t)
	return
}
Exemple #13
0
func floatmove_387(f *gc.Node, t *gc.Node) {
	var r1 gc.Node
	var a int

	ft := gc.Simsimtype(f.Type)
	tt := gc.Simsimtype(t.Type)
	cvt := t.Type

	switch uint32(ft)<<16 | uint32(tt) {
	default:
		goto fatal

		/*
		* float to integer
		 */
	case gc.TFLOAT32<<16 | gc.TINT16,
		gc.TFLOAT32<<16 | gc.TINT32,
		gc.TFLOAT32<<16 | gc.TINT64,
		gc.TFLOAT64<<16 | gc.TINT16,
		gc.TFLOAT64<<16 | gc.TINT32,
		gc.TFLOAT64<<16 | gc.TINT64:
		if t.Op == gc.OREGISTER {
			goto hardmem
		}
		var r1 gc.Node
		gc.Nodreg(&r1, gc.Types[ft], x86.REG_F0)
		if f.Op != gc.OREGISTER {
			if ft == gc.TFLOAT32 {
				gins(x86.AFMOVF, f, &r1)
			} else {
				gins(x86.AFMOVD, f, &r1)
			}
		}

		// set round to zero mode during conversion
		var t1 gc.Node
		memname(&t1, gc.Types[gc.TUINT16])

		var t2 gc.Node
		memname(&t2, gc.Types[gc.TUINT16])
		gins(x86.AFSTCW, nil, &t1)
		gins(x86.AMOVW, ncon(0xf7f), &t2)
		gins(x86.AFLDCW, &t2, nil)
		if tt == gc.TINT16 {
			gins(x86.AFMOVWP, &r1, t)
		} else if tt == gc.TINT32 {
			gins(x86.AFMOVLP, &r1, t)
		} else {
			gins(x86.AFMOVVP, &r1, t)
		}
		gins(x86.AFLDCW, &t1, nil)
		return

		// convert via int32.
	case gc.TFLOAT32<<16 | gc.TINT8,
		gc.TFLOAT32<<16 | gc.TUINT16,
		gc.TFLOAT32<<16 | gc.TUINT8,
		gc.TFLOAT64<<16 | gc.TINT8,
		gc.TFLOAT64<<16 | gc.TUINT16,
		gc.TFLOAT64<<16 | gc.TUINT8:
		var t1 gc.Node
		gc.Tempname(&t1, gc.Types[gc.TINT32])

		gmove(f, &t1)
		switch tt {
		default:
			gc.Fatalf("gmove %v", t)

		case gc.TINT8:
			gins(x86.ACMPL, &t1, ncon(-0x80&(1<<32-1)))
			p1 := gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TINT32]), nil, -1)
			gins(x86.ACMPL, &t1, ncon(0x7f))
			p2 := gc.Gbranch(optoas(gc.OGT, gc.Types[gc.TINT32]), nil, -1)
			p3 := gc.Gbranch(obj.AJMP, nil, 0)
			gc.Patch(p1, gc.Pc)
			gc.Patch(p2, gc.Pc)
			gmove(ncon(-0x80&(1<<32-1)), &t1)
			gc.Patch(p3, gc.Pc)
			gmove(&t1, t)

		case gc.TUINT8:
			gins(x86.ATESTL, ncon(0xffffff00), &t1)
			p1 := gc.Gbranch(x86.AJEQ, nil, +1)
			gins(x86.AMOVL, ncon(0), &t1)
			gc.Patch(p1, gc.Pc)
			gmove(&t1, t)

		case gc.TUINT16:
			gins(x86.ATESTL, ncon(0xffff0000), &t1)
			p1 := gc.Gbranch(x86.AJEQ, nil, +1)
			gins(x86.AMOVL, ncon(0), &t1)
			gc.Patch(p1, gc.Pc)
			gmove(&t1, t)
		}

		return

		// convert via int64.
	case gc.TFLOAT32<<16 | gc.TUINT32,
		gc.TFLOAT64<<16 | gc.TUINT32:
		cvt = gc.Types[gc.TINT64]

		goto hardmem

		/*
		 * integer to float
		 */
	case gc.TINT16<<16 | gc.TFLOAT32,
		gc.TINT16<<16 | gc.TFLOAT64,
		gc.TINT32<<16 | gc.TFLOAT32,
		gc.TINT32<<16 | gc.TFLOAT64,
		gc.TINT64<<16 | gc.TFLOAT32,
		gc.TINT64<<16 | gc.TFLOAT64:
		if t.Op != gc.OREGISTER {
			goto hard
		}
		if f.Op == gc.OREGISTER {
			cvt = f.Type
			goto hardmem
		}

		switch ft {
		case gc.TINT16:
			a = x86.AFMOVW

		case gc.TINT32:
			a = x86.AFMOVL

		default:
			a = x86.AFMOVV
		}

		// convert via int32 memory
	case gc.TINT8<<16 | gc.TFLOAT32,
		gc.TINT8<<16 | gc.TFLOAT64,
		gc.TUINT16<<16 | gc.TFLOAT32,
		gc.TUINT16<<16 | gc.TFLOAT64,
		gc.TUINT8<<16 | gc.TFLOAT32,
		gc.TUINT8<<16 | gc.TFLOAT64:
		cvt = gc.Types[gc.TINT32]

		goto hardmem

		// convert via int64 memory
	case gc.TUINT32<<16 | gc.TFLOAT32,
		gc.TUINT32<<16 | gc.TFLOAT64:
		cvt = gc.Types[gc.TINT64]

		goto hardmem

		// The way the code generator uses floating-point
	// registers, a move from F0 to F0 is intended as a no-op.
	// On the x86, it's not: it pushes a second copy of F0
	// on the floating point stack. So toss it away here.
	// Also, F0 is the *only* register we ever evaluate
	// into, so we should only see register/register as F0/F0.
	/*
	 * float to float
	 */
	case gc.TFLOAT32<<16 | gc.TFLOAT32,
		gc.TFLOAT64<<16 | gc.TFLOAT64:
		if gc.Ismem(f) && gc.Ismem(t) {
			goto hard
		}
		if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER {
			if f.Reg != x86.REG_F0 || t.Reg != x86.REG_F0 {
				goto fatal
			}
			return
		}

		a = x86.AFMOVF
		if ft == gc.TFLOAT64 {
			a = x86.AFMOVD
		}
		if gc.Ismem(t) {
			if f.Op != gc.OREGISTER || f.Reg != x86.REG_F0 {
				gc.Fatalf("gmove %v", f)
			}
			a = x86.AFMOVFP
			if ft == gc.TFLOAT64 {
				a = x86.AFMOVDP
			}
		}

	case gc.TFLOAT32<<16 | gc.TFLOAT64:
		if gc.Ismem(f) && gc.Ismem(t) {
			goto hard
		}
		if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER {
			if f.Reg != x86.REG_F0 || t.Reg != x86.REG_F0 {
				goto fatal
			}
			return
		}

		if f.Op == gc.OREGISTER {
			gins(x86.AFMOVDP, f, t)
		} else {
			gins(x86.AFMOVF, f, t)
		}
		return

	case gc.TFLOAT64<<16 | gc.TFLOAT32:
		if gc.Ismem(f) && gc.Ismem(t) {
			goto hard
		}
		if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER {
			var r1 gc.Node
			gc.Tempname(&r1, gc.Types[gc.TFLOAT32])
			gins(x86.AFMOVFP, f, &r1)
			gins(x86.AFMOVF, &r1, t)
			return
		}

		if f.Op == gc.OREGISTER {
			gins(x86.AFMOVFP, f, t)
		} else {
			gins(x86.AFMOVD, f, t)
		}
		return
	}

	gins(a, f, t)
	return

	// requires register intermediate
hard:
	gc.Regalloc(&r1, cvt, t)

	gmove(f, &r1)
	gmove(&r1, t)
	gc.Regfree(&r1)
	return

	// requires memory intermediate
hardmem:
	gc.Tempname(&r1, cvt)

	gmove(f, &r1)
	gmove(&r1, t)
	return

	// should not happen
fatal:
	gc.Fatalf("gmove %v -> %v", gc.Nconv(f, obj.FmtLong), gc.Nconv(t, obj.FmtLong))

	return
}
Exemple #14
0
func cgen_floatsse(n *gc.Node, res *gc.Node) {
	var a obj.As

	nl := n.Left
	nr := n.Right
	switch n.Op {
	default:
		gc.Dump("cgen_floatsse", n)
		gc.Fatalf("cgen_floatsse %v", gc.Oconv(n.Op, 0))
		return

	case gc.OMINUS,
		gc.OCOM:
		nr = gc.Nodintconst(-1)
		gc.Convlit(&nr, n.Type)
		a = foptoas(gc.OMUL, nl.Type, 0)
		goto sbop

		// symmetric binary
	case gc.OADD,
		gc.OMUL:
		a = foptoas(n.Op, nl.Type, 0)

		goto sbop

		// asymmetric binary
	case gc.OSUB,
		gc.OMOD,
		gc.ODIV:
		a = foptoas(n.Op, nl.Type, 0)

		goto abop
	}

sbop: // symmetric binary
	if nl.Ullman < nr.Ullman || nl.Op == gc.OLITERAL {
		nl, nr = nr, nl
	}

abop: // asymmetric binary
	if nl.Ullman >= nr.Ullman {
		var nt gc.Node
		gc.Tempname(&nt, nl.Type)
		gc.Cgen(nl, &nt)
		var n2 gc.Node
		gc.Mgen(nr, &n2, nil)
		var n1 gc.Node
		gc.Regalloc(&n1, nl.Type, res)
		gmove(&nt, &n1)
		gins(a, &n2, &n1)
		gmove(&n1, res)
		gc.Regfree(&n1)
		gc.Mfree(&n2)
	} else {
		var n2 gc.Node
		gc.Regalloc(&n2, nr.Type, res)
		gc.Cgen(nr, &n2)
		var n1 gc.Node
		gc.Regalloc(&n1, nl.Type, nil)
		gc.Cgen(nl, &n1)
		gins(a, &n2, &n1)
		gc.Regfree(&n2)
		gmove(&n1, res)
		gc.Regfree(&n1)
	}

	return
}
Exemple #15
0
func bgen_float(n *gc.Node, wantTrue bool, likely int, to *obj.Prog) {
	nl := n.Left
	nr := n.Right
	op := n.Op
	if !wantTrue {
		// brcom is not valid on floats when NaN is involved.
		p1 := gc.Gbranch(obj.AJMP, nil, 0)
		p2 := gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p1, gc.Pc)

		// No need to avoid re-genning ninit.
		bgen_float(n, true, -likely, p2)

		gc.Patch(gc.Gbranch(obj.AJMP, nil, 0), to)
		gc.Patch(p2, gc.Pc)
		return
	}

	if gc.Thearch.Use387 {
		op = gc.Brrev(op) // because the args are stacked
		if op == gc.OGE || op == gc.OGT {
			// only < and <= work right with NaN; reverse if needed
			nl, nr = nr, nl
			op = gc.Brrev(op)
		}

		var ax, n2, tmp gc.Node
		gc.Nodreg(&tmp, nr.Type, x86.REG_F0)
		gc.Nodreg(&n2, nr.Type, x86.REG_F0+1)
		gc.Nodreg(&ax, gc.Types[gc.TUINT16], x86.REG_AX)
		if gc.Simsimtype(nr.Type) == gc.TFLOAT64 {
			if nl.Ullman > nr.Ullman {
				gc.Cgen(nl, &tmp)
				gc.Cgen(nr, &tmp)
				gins(x86.AFXCHD, &tmp, &n2)
			} else {
				gc.Cgen(nr, &tmp)
				gc.Cgen(nl, &tmp)
			}
			gins(x86.AFUCOMPP, &tmp, &n2)
		} else {
			// TODO(rsc): The moves back and forth to memory
			// here are for truncating the value to 32 bits.
			// This handles 32-bit comparison but presumably
			// all the other ops have the same problem.
			// We need to figure out what the right general
			// solution is, besides telling people to use float64.
			var t1 gc.Node
			gc.Tempname(&t1, gc.Types[gc.TFLOAT32])

			var t2 gc.Node
			gc.Tempname(&t2, gc.Types[gc.TFLOAT32])
			gc.Cgen(nr, &t1)
			gc.Cgen(nl, &t2)
			gmove(&t2, &tmp)
			gins(x86.AFCOMFP, &t1, &tmp)
		}
		gins(x86.AFSTSW, nil, &ax)
		gins(x86.ASAHF, nil, nil)
	} else {
		// Not 387
		if !nl.Addable {
			nl = gc.CgenTemp(nl)
		}
		if !nr.Addable {
			nr = gc.CgenTemp(nr)
		}

		var n2 gc.Node
		gc.Regalloc(&n2, nr.Type, nil)
		gmove(nr, &n2)
		nr = &n2

		if nl.Op != gc.OREGISTER {
			var n3 gc.Node
			gc.Regalloc(&n3, nl.Type, nil)
			gmove(nl, &n3)
			nl = &n3
		}

		if op == gc.OGE || op == gc.OGT {
			// only < and <= work right with NopN; reverse if needed
			nl, nr = nr, nl
			op = gc.Brrev(op)
		}

		gins(foptoas(gc.OCMP, nr.Type, 0), nl, nr)
		if nl.Op == gc.OREGISTER {
			gc.Regfree(nl)
		}
		gc.Regfree(nr)
	}

	switch op {
	case gc.OEQ:
		// neither NE nor P
		p1 := gc.Gbranch(x86.AJNE, nil, -likely)
		p2 := gc.Gbranch(x86.AJPS, nil, -likely)
		gc.Patch(gc.Gbranch(obj.AJMP, nil, 0), to)
		gc.Patch(p1, gc.Pc)
		gc.Patch(p2, gc.Pc)
	case gc.ONE:
		// either NE or P
		gc.Patch(gc.Gbranch(x86.AJNE, nil, likely), to)
		gc.Patch(gc.Gbranch(x86.AJPS, nil, likely), to)
	default:
		gc.Patch(gc.Gbranch(optoas(op, nr.Type), nil, likely), to)
	}
}
Exemple #16
0
/*
 * attempt to generate 64-bit
 *	res = n
 * return 1 on success, 0 if op not handled.
 */
func cgen64(n *gc.Node, res *gc.Node) {
	if res.Op != gc.OINDREG && res.Op != gc.ONAME {
		gc.Dump("n", n)
		gc.Dump("res", res)
		gc.Fatal("cgen64 %v of %v", gc.Oconv(int(n.Op), 0), gc.Oconv(int(res.Op), 0))
	}

	l := n.Left
	var t1 gc.Node
	if !l.Addable {
		gc.Tempname(&t1, l.Type)
		gc.Cgen(l, &t1)
		l = &t1
	}

	var hi1 gc.Node
	var lo1 gc.Node
	split64(l, &lo1, &hi1)
	switch n.Op {
	default:
		gc.Fatal("cgen64 %v", gc.Oconv(int(n.Op), 0))

	case gc.OMINUS:
		var lo2 gc.Node
		var hi2 gc.Node
		split64(res, &lo2, &hi2)

		gc.Regalloc(&t1, lo1.Type, nil)
		var al gc.Node
		gc.Regalloc(&al, lo1.Type, nil)
		var ah gc.Node
		gc.Regalloc(&ah, hi1.Type, nil)

		gins(arm.AMOVW, &lo1, &al)
		gins(arm.AMOVW, &hi1, &ah)

		gmove(ncon(0), &t1)
		p1 := gins(arm.ASUB, &al, &t1)
		p1.Scond |= arm.C_SBIT
		gins(arm.AMOVW, &t1, &lo2)

		gmove(ncon(0), &t1)
		gins(arm.ASBC, &ah, &t1)
		gins(arm.AMOVW, &t1, &hi2)

		gc.Regfree(&t1)
		gc.Regfree(&al)
		gc.Regfree(&ah)
		splitclean()
		splitclean()
		return

	case gc.OCOM:
		gc.Regalloc(&t1, lo1.Type, nil)
		gmove(ncon(^uint32(0)), &t1)

		var lo2 gc.Node
		var hi2 gc.Node
		split64(res, &lo2, &hi2)
		var n1 gc.Node
		gc.Regalloc(&n1, lo1.Type, nil)

		gins(arm.AMOVW, &lo1, &n1)
		gins(arm.AEOR, &t1, &n1)
		gins(arm.AMOVW, &n1, &lo2)

		gins(arm.AMOVW, &hi1, &n1)
		gins(arm.AEOR, &t1, &n1)
		gins(arm.AMOVW, &n1, &hi2)

		gc.Regfree(&t1)
		gc.Regfree(&n1)
		splitclean()
		splitclean()
		return

		// binary operators.
	// common setup below.
	case gc.OADD,
		gc.OSUB,
		gc.OMUL,
		gc.OLSH,
		gc.ORSH,
		gc.OAND,
		gc.OOR,
		gc.OXOR,
		gc.OLROT:
		break
	}

	// setup for binary operators
	r := n.Right

	if r != nil && !r.Addable {
		var t2 gc.Node
		gc.Tempname(&t2, r.Type)
		gc.Cgen(r, &t2)
		r = &t2
	}

	var hi2 gc.Node
	var lo2 gc.Node
	if gc.Is64(r.Type) {
		split64(r, &lo2, &hi2)
	}

	var al gc.Node
	gc.Regalloc(&al, lo1.Type, nil)
	var ah gc.Node
	gc.Regalloc(&ah, hi1.Type, nil)

	// Do op.  Leave result in ah:al.
	switch n.Op {
	default:
		gc.Fatal("cgen64: not implemented: %v\n", n)

		// TODO: Constants
	case gc.OADD:
		var bl gc.Node
		gc.Regalloc(&bl, gc.Types[gc.TPTR32], nil)

		var bh gc.Node
		gc.Regalloc(&bh, gc.Types[gc.TPTR32], nil)
		gins(arm.AMOVW, &hi1, &ah)
		gins(arm.AMOVW, &lo1, &al)
		gins(arm.AMOVW, &hi2, &bh)
		gins(arm.AMOVW, &lo2, &bl)
		p1 := gins(arm.AADD, &bl, &al)
		p1.Scond |= arm.C_SBIT
		gins(arm.AADC, &bh, &ah)
		gc.Regfree(&bl)
		gc.Regfree(&bh)

		// TODO: Constants.
	case gc.OSUB:
		var bl gc.Node
		gc.Regalloc(&bl, gc.Types[gc.TPTR32], nil)

		var bh gc.Node
		gc.Regalloc(&bh, gc.Types[gc.TPTR32], nil)
		gins(arm.AMOVW, &lo1, &al)
		gins(arm.AMOVW, &hi1, &ah)
		gins(arm.AMOVW, &lo2, &bl)
		gins(arm.AMOVW, &hi2, &bh)
		p1 := gins(arm.ASUB, &bl, &al)
		p1.Scond |= arm.C_SBIT
		gins(arm.ASBC, &bh, &ah)
		gc.Regfree(&bl)
		gc.Regfree(&bh)

		// TODO(kaib): this can be done with 4 regs and does not need 6
	case gc.OMUL:
		var bl gc.Node
		gc.Regalloc(&bl, gc.Types[gc.TPTR32], nil)

		var bh gc.Node
		gc.Regalloc(&bh, gc.Types[gc.TPTR32], nil)
		var cl gc.Node
		gc.Regalloc(&cl, gc.Types[gc.TPTR32], nil)
		var ch gc.Node
		gc.Regalloc(&ch, gc.Types[gc.TPTR32], nil)

		// load args into bh:bl and bh:bl.
		gins(arm.AMOVW, &hi1, &bh)

		gins(arm.AMOVW, &lo1, &bl)
		gins(arm.AMOVW, &hi2, &ch)
		gins(arm.AMOVW, &lo2, &cl)

		// bl * cl -> ah al
		p1 := gins(arm.AMULLU, nil, nil)

		p1.From.Type = obj.TYPE_REG
		p1.From.Reg = bl.Reg
		p1.Reg = cl.Reg
		p1.To.Type = obj.TYPE_REGREG
		p1.To.Reg = ah.Reg
		p1.To.Offset = int64(al.Reg)

		//print("%v\n", p1);

		// bl * ch + ah -> ah
		p1 = gins(arm.AMULA, nil, nil)

		p1.From.Type = obj.TYPE_REG
		p1.From.Reg = bl.Reg
		p1.Reg = ch.Reg
		p1.To.Type = obj.TYPE_REGREG2
		p1.To.Reg = ah.Reg
		p1.To.Offset = int64(ah.Reg)

		//print("%v\n", p1);

		// bh * cl + ah -> ah
		p1 = gins(arm.AMULA, nil, nil)

		p1.From.Type = obj.TYPE_REG
		p1.From.Reg = bh.Reg
		p1.Reg = cl.Reg
		p1.To.Type = obj.TYPE_REGREG2
		p1.To.Reg = ah.Reg
		p1.To.Offset = int64(ah.Reg)

		//print("%v\n", p1);

		gc.Regfree(&bh)

		gc.Regfree(&bl)
		gc.Regfree(&ch)
		gc.Regfree(&cl)

		// We only rotate by a constant c in [0,64).
	// if c >= 32:
	//	lo, hi = hi, lo
	//	c -= 32
	// if c == 0:
	//	no-op
	// else:
	//	t = hi
	//	shld hi:lo, c
	//	shld lo:t, c
	case gc.OLROT:
		v := uint64(r.Int())

		var bl gc.Node
		gc.Regalloc(&bl, lo1.Type, nil)
		var bh gc.Node
		gc.Regalloc(&bh, hi1.Type, nil)
		if v >= 32 {
			// reverse during load to do the first 32 bits of rotate
			v -= 32

			gins(arm.AMOVW, &hi1, &bl)
			gins(arm.AMOVW, &lo1, &bh)
		} else {
			gins(arm.AMOVW, &hi1, &bh)
			gins(arm.AMOVW, &lo1, &bl)
		}

		if v == 0 {
			gins(arm.AMOVW, &bh, &ah)
			gins(arm.AMOVW, &bl, &al)
		} else {
			// rotate by 1 <= v <= 31
			//	MOVW	bl<<v, al
			//	MOVW	bh<<v, ah
			//	OR		bl>>(32-v), ah
			//	OR		bh>>(32-v), al
			gshift(arm.AMOVW, &bl, arm.SHIFT_LL, int32(v), &al)

			gshift(arm.AMOVW, &bh, arm.SHIFT_LL, int32(v), &ah)
			gshift(arm.AORR, &bl, arm.SHIFT_LR, int32(32-v), &ah)
			gshift(arm.AORR, &bh, arm.SHIFT_LR, int32(32-v), &al)
		}

		gc.Regfree(&bl)
		gc.Regfree(&bh)

	case gc.OLSH:
		var bl gc.Node
		gc.Regalloc(&bl, lo1.Type, nil)
		var bh gc.Node
		gc.Regalloc(&bh, hi1.Type, nil)
		gins(arm.AMOVW, &hi1, &bh)
		gins(arm.AMOVW, &lo1, &bl)

		var p6 *obj.Prog
		var s gc.Node
		var n1 gc.Node
		var creg gc.Node
		var p1 *obj.Prog
		var p2 *obj.Prog
		var p3 *obj.Prog
		var p4 *obj.Prog
		var p5 *obj.Prog
		if r.Op == gc.OLITERAL {
			v := uint64(r.Int())
			if v >= 64 {
				// TODO(kaib): replace with gins(AMOVW, nodintconst(0), &al)
				// here and below (verify it optimizes to EOR)
				gins(arm.AEOR, &al, &al)

				gins(arm.AEOR, &ah, &ah)
			} else if v > 32 {
				gins(arm.AEOR, &al, &al)

				//	MOVW	bl<<(v-32), ah
				gshift(arm.AMOVW, &bl, arm.SHIFT_LL, int32(v-32), &ah)
			} else if v == 32 {
				gins(arm.AEOR, &al, &al)
				gins(arm.AMOVW, &bl, &ah)
			} else if v > 0 {
				//	MOVW	bl<<v, al
				gshift(arm.AMOVW, &bl, arm.SHIFT_LL, int32(v), &al)

				//	MOVW	bh<<v, ah
				gshift(arm.AMOVW, &bh, arm.SHIFT_LL, int32(v), &ah)

				//	OR		bl>>(32-v), ah
				gshift(arm.AORR, &bl, arm.SHIFT_LR, int32(32-v), &ah)
			} else {
				gins(arm.AMOVW, &bl, &al)
				gins(arm.AMOVW, &bh, &ah)
			}

			goto olsh_break
		}

		gc.Regalloc(&s, gc.Types[gc.TUINT32], nil)
		gc.Regalloc(&creg, gc.Types[gc.TUINT32], nil)
		if gc.Is64(r.Type) {
			// shift is >= 1<<32
			var cl gc.Node
			var ch gc.Node
			split64(r, &cl, &ch)

			gmove(&ch, &s)
			gins(arm.ATST, &s, nil)
			p6 = gc.Gbranch(arm.ABNE, nil, 0)
			gmove(&cl, &s)
			splitclean()
		} else {
			gmove(r, &s)
			p6 = nil
		}

		gins(arm.ATST, &s, nil)

		// shift == 0
		p1 = gins(arm.AMOVW, &bl, &al)

		p1.Scond = arm.C_SCOND_EQ
		p1 = gins(arm.AMOVW, &bh, &ah)
		p1.Scond = arm.C_SCOND_EQ
		p2 = gc.Gbranch(arm.ABEQ, nil, 0)

		// shift is < 32
		gc.Nodconst(&n1, gc.Types[gc.TUINT32], 32)

		gmove(&n1, &creg)
		gins(arm.ACMP, &s, &creg)

		//	MOVW.LO		bl<<s, al
		p1 = gregshift(arm.AMOVW, &bl, arm.SHIFT_LL, &s, &al)

		p1.Scond = arm.C_SCOND_LO

		//	MOVW.LO		bh<<s, ah
		p1 = gregshift(arm.AMOVW, &bh, arm.SHIFT_LL, &s, &ah)

		p1.Scond = arm.C_SCOND_LO

		//	SUB.LO		s, creg
		p1 = gins(arm.ASUB, &s, &creg)

		p1.Scond = arm.C_SCOND_LO

		//	OR.LO		bl>>creg, ah
		p1 = gregshift(arm.AORR, &bl, arm.SHIFT_LR, &creg, &ah)

		p1.Scond = arm.C_SCOND_LO

		//	BLO	end
		p3 = gc.Gbranch(arm.ABLO, nil, 0)

		// shift == 32
		p1 = gins(arm.AEOR, &al, &al)

		p1.Scond = arm.C_SCOND_EQ
		p1 = gins(arm.AMOVW, &bl, &ah)
		p1.Scond = arm.C_SCOND_EQ
		p4 = gc.Gbranch(arm.ABEQ, nil, 0)

		// shift is < 64
		gc.Nodconst(&n1, gc.Types[gc.TUINT32], 64)

		gmove(&n1, &creg)
		gins(arm.ACMP, &s, &creg)

		//	EOR.LO	al, al
		p1 = gins(arm.AEOR, &al, &al)

		p1.Scond = arm.C_SCOND_LO

		//	MOVW.LO		creg>>1, creg
		p1 = gshift(arm.AMOVW, &creg, arm.SHIFT_LR, 1, &creg)

		p1.Scond = arm.C_SCOND_LO

		//	SUB.LO		creg, s
		p1 = gins(arm.ASUB, &creg, &s)

		p1.Scond = arm.C_SCOND_LO

		//	MOVW	bl<<s, ah
		p1 = gregshift(arm.AMOVW, &bl, arm.SHIFT_LL, &s, &ah)

		p1.Scond = arm.C_SCOND_LO

		p5 = gc.Gbranch(arm.ABLO, nil, 0)

		// shift >= 64
		if p6 != nil {
			gc.Patch(p6, gc.Pc)
		}
		gins(arm.AEOR, &al, &al)
		gins(arm.AEOR, &ah, &ah)

		gc.Patch(p2, gc.Pc)
		gc.Patch(p3, gc.Pc)
		gc.Patch(p4, gc.Pc)
		gc.Patch(p5, gc.Pc)
		gc.Regfree(&s)
		gc.Regfree(&creg)

	olsh_break:
		gc.Regfree(&bl)
		gc.Regfree(&bh)

	case gc.ORSH:
		var bl gc.Node
		gc.Regalloc(&bl, lo1.Type, nil)
		var bh gc.Node
		gc.Regalloc(&bh, hi1.Type, nil)
		gins(arm.AMOVW, &hi1, &bh)
		gins(arm.AMOVW, &lo1, &bl)

		var p4 *obj.Prog
		var p5 *obj.Prog
		var n1 gc.Node
		var p6 *obj.Prog
		var s gc.Node
		var p1 *obj.Prog
		var p2 *obj.Prog
		var creg gc.Node
		var p3 *obj.Prog
		if r.Op == gc.OLITERAL {
			v := uint64(r.Int())
			if v >= 64 {
				if bh.Type.Etype == gc.TINT32 {
					//	MOVW	bh->31, al
					gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &al)

					//	MOVW	bh->31, ah
					gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &ah)
				} else {
					gins(arm.AEOR, &al, &al)
					gins(arm.AEOR, &ah, &ah)
				}
			} else if v > 32 {
				if bh.Type.Etype == gc.TINT32 {
					//	MOVW	bh->(v-32), al
					gshift(arm.AMOVW, &bh, arm.SHIFT_AR, int32(v-32), &al)

					//	MOVW	bh->31, ah
					gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &ah)
				} else {
					//	MOVW	bh>>(v-32), al
					gshift(arm.AMOVW, &bh, arm.SHIFT_LR, int32(v-32), &al)

					gins(arm.AEOR, &ah, &ah)
				}
			} else if v == 32 {
				gins(arm.AMOVW, &bh, &al)
				if bh.Type.Etype == gc.TINT32 {
					//	MOVW	bh->31, ah
					gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &ah)
				} else {
					gins(arm.AEOR, &ah, &ah)
				}
			} else if v > 0 {
				//	MOVW	bl>>v, al
				gshift(arm.AMOVW, &bl, arm.SHIFT_LR, int32(v), &al)

				//	OR		bh<<(32-v), al
				gshift(arm.AORR, &bh, arm.SHIFT_LL, int32(32-v), &al)

				if bh.Type.Etype == gc.TINT32 {
					//	MOVW	bh->v, ah
					gshift(arm.AMOVW, &bh, arm.SHIFT_AR, int32(v), &ah)
				} else {
					//	MOVW	bh>>v, ah
					gshift(arm.AMOVW, &bh, arm.SHIFT_LR, int32(v), &ah)
				}
			} else {
				gins(arm.AMOVW, &bl, &al)
				gins(arm.AMOVW, &bh, &ah)
			}

			goto orsh_break
		}

		gc.Regalloc(&s, gc.Types[gc.TUINT32], nil)
		gc.Regalloc(&creg, gc.Types[gc.TUINT32], nil)
		if gc.Is64(r.Type) {
			// shift is >= 1<<32
			var ch gc.Node
			var cl gc.Node
			split64(r, &cl, &ch)

			gmove(&ch, &s)
			gins(arm.ATST, &s, nil)
			var p1 *obj.Prog
			if bh.Type.Etype == gc.TINT32 {
				p1 = gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &ah)
			} else {
				p1 = gins(arm.AEOR, &ah, &ah)
			}
			p1.Scond = arm.C_SCOND_NE
			p6 = gc.Gbranch(arm.ABNE, nil, 0)
			gmove(&cl, &s)
			splitclean()
		} else {
			gmove(r, &s)
			p6 = nil
		}

		gins(arm.ATST, &s, nil)

		// shift == 0
		p1 = gins(arm.AMOVW, &bl, &al)

		p1.Scond = arm.C_SCOND_EQ
		p1 = gins(arm.AMOVW, &bh, &ah)
		p1.Scond = arm.C_SCOND_EQ
		p2 = gc.Gbranch(arm.ABEQ, nil, 0)

		// check if shift is < 32
		gc.Nodconst(&n1, gc.Types[gc.TUINT32], 32)

		gmove(&n1, &creg)
		gins(arm.ACMP, &s, &creg)

		//	MOVW.LO		bl>>s, al
		p1 = gregshift(arm.AMOVW, &bl, arm.SHIFT_LR, &s, &al)

		p1.Scond = arm.C_SCOND_LO

		//	SUB.LO		s,creg
		p1 = gins(arm.ASUB, &s, &creg)

		p1.Scond = arm.C_SCOND_LO

		//	OR.LO		bh<<(32-s), al
		p1 = gregshift(arm.AORR, &bh, arm.SHIFT_LL, &creg, &al)

		p1.Scond = arm.C_SCOND_LO

		if bh.Type.Etype == gc.TINT32 {
			//	MOVW	bh->s, ah
			p1 = gregshift(arm.AMOVW, &bh, arm.SHIFT_AR, &s, &ah)
		} else {
			//	MOVW	bh>>s, ah
			p1 = gregshift(arm.AMOVW, &bh, arm.SHIFT_LR, &s, &ah)
		}

		p1.Scond = arm.C_SCOND_LO

		//	BLO	end
		p3 = gc.Gbranch(arm.ABLO, nil, 0)

		// shift == 32
		p1 = gins(arm.AMOVW, &bh, &al)

		p1.Scond = arm.C_SCOND_EQ
		if bh.Type.Etype == gc.TINT32 {
			gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &ah)
		} else {
			gins(arm.AEOR, &ah, &ah)
		}
		p4 = gc.Gbranch(arm.ABEQ, nil, 0)

		// check if shift is < 64
		gc.Nodconst(&n1, gc.Types[gc.TUINT32], 64)

		gmove(&n1, &creg)
		gins(arm.ACMP, &s, &creg)

		//	MOVW.LO		creg>>1, creg
		p1 = gshift(arm.AMOVW, &creg, arm.SHIFT_LR, 1, &creg)

		p1.Scond = arm.C_SCOND_LO

		//	SUB.LO		creg, s
		p1 = gins(arm.ASUB, &creg, &s)

		p1.Scond = arm.C_SCOND_LO

		if bh.Type.Etype == gc.TINT32 {
			//	MOVW	bh->(s-32), al
			p1 := gregshift(arm.AMOVW, &bh, arm.SHIFT_AR, &s, &al)

			p1.Scond = arm.C_SCOND_LO
		} else {
			//	MOVW	bh>>(v-32), al
			p1 := gregshift(arm.AMOVW, &bh, arm.SHIFT_LR, &s, &al)

			p1.Scond = arm.C_SCOND_LO
		}

		//	BLO	end
		p5 = gc.Gbranch(arm.ABLO, nil, 0)

		// s >= 64
		if p6 != nil {
			gc.Patch(p6, gc.Pc)
		}
		if bh.Type.Etype == gc.TINT32 {
			//	MOVW	bh->31, al
			gshift(arm.AMOVW, &bh, arm.SHIFT_AR, 31, &al)
		} else {
			gins(arm.AEOR, &al, &al)
		}

		gc.Patch(p2, gc.Pc)
		gc.Patch(p3, gc.Pc)
		gc.Patch(p4, gc.Pc)
		gc.Patch(p5, gc.Pc)
		gc.Regfree(&s)
		gc.Regfree(&creg)

	orsh_break:
		gc.Regfree(&bl)
		gc.Regfree(&bh)

		// TODO(kaib): literal optimizations
	// make constant the right side (it usually is anyway).
	//		if(lo1.op == OLITERAL) {
	//			nswap(&lo1, &lo2);
	//			nswap(&hi1, &hi2);
	//		}
	//		if(lo2.op == OLITERAL) {
	//			// special cases for constants.
	//			lv = mpgetfix(lo2.val.u.xval);
	//			hv = mpgetfix(hi2.val.u.xval);
	//			splitclean();	// right side
	//			split64(res, &lo2, &hi2);
	//			switch(n->op) {
	//			case OXOR:
	//				gmove(&lo1, &lo2);
	//				gmove(&hi1, &hi2);
	//				switch(lv) {
	//				case 0:
	//					break;
	//				case 0xffffffffu:
	//					gins(ANOTL, N, &lo2);
	//					break;
	//				default:
	//					gins(AXORL, ncon(lv), &lo2);
	//					break;
	//				}
	//				switch(hv) {
	//				case 0:
	//					break;
	//				case 0xffffffffu:
	//					gins(ANOTL, N, &hi2);
	//					break;
	//				default:
	//					gins(AXORL, ncon(hv), &hi2);
	//					break;
	//				}
	//				break;

	//			case OAND:
	//				switch(lv) {
	//				case 0:
	//					gins(AMOVL, ncon(0), &lo2);
	//					break;
	//				default:
	//					gmove(&lo1, &lo2);
	//					if(lv != 0xffffffffu)
	//						gins(AANDL, ncon(lv), &lo2);
	//					break;
	//				}
	//				switch(hv) {
	//				case 0:
	//					gins(AMOVL, ncon(0), &hi2);
	//					break;
	//				default:
	//					gmove(&hi1, &hi2);
	//					if(hv != 0xffffffffu)
	//						gins(AANDL, ncon(hv), &hi2);
	//					break;
	//				}
	//				break;

	//			case OOR:
	//				switch(lv) {
	//				case 0:
	//					gmove(&lo1, &lo2);
	//					break;
	//				case 0xffffffffu:
	//					gins(AMOVL, ncon(0xffffffffu), &lo2);
	//					break;
	//				default:
	//					gmove(&lo1, &lo2);
	//					gins(AORL, ncon(lv), &lo2);
	//					break;
	//				}
	//				switch(hv) {
	//				case 0:
	//					gmove(&hi1, &hi2);
	//					break;
	//				case 0xffffffffu:
	//					gins(AMOVL, ncon(0xffffffffu), &hi2);
	//					break;
	//				default:
	//					gmove(&hi1, &hi2);
	//					gins(AORL, ncon(hv), &hi2);
	//					break;
	//				}
	//				break;
	//			}
	//			splitclean();
	//			splitclean();
	//			goto out;
	//		}
	case gc.OXOR,
		gc.OAND,
		gc.OOR:
		var n1 gc.Node
		gc.Regalloc(&n1, lo1.Type, nil)

		gins(arm.AMOVW, &lo1, &al)
		gins(arm.AMOVW, &hi1, &ah)
		gins(arm.AMOVW, &lo2, &n1)
		gins(optoas(int(n.Op), lo1.Type), &n1, &al)
		gins(arm.AMOVW, &hi2, &n1)
		gins(optoas(int(n.Op), lo1.Type), &n1, &ah)
		gc.Regfree(&n1)
	}

	if gc.Is64(r.Type) {
		splitclean()
	}
	splitclean()

	split64(res, &lo1, &hi1)
	gins(arm.AMOVW, &al, &lo1)
	gins(arm.AMOVW, &ah, &hi1)
	splitclean()

	//out:
	gc.Regfree(&al)

	gc.Regfree(&ah)
}
Exemple #17
0
/*
 * attempt to generate 64-bit
 *	res = n
 * return 1 on success, 0 if op not handled.
 */
func cgen64(n *gc.Node, res *gc.Node) {
	if res.Op != gc.OINDREG && res.Op != gc.ONAME {
		gc.Dump("n", n)
		gc.Dump("res", res)
		gc.Fatalf("cgen64 %v of %v", gc.Oconv(int(n.Op), 0), gc.Oconv(int(res.Op), 0))
	}

	switch n.Op {
	default:
		gc.Fatalf("cgen64 %v", gc.Oconv(int(n.Op), 0))

	case gc.OMINUS:
		gc.Cgen(n.Left, res)
		var hi1 gc.Node
		var lo1 gc.Node
		split64(res, &lo1, &hi1)
		gins(x86.ANEGL, nil, &lo1)
		gins(x86.AADCL, ncon(0), &hi1)
		gins(x86.ANEGL, nil, &hi1)
		splitclean()
		return

	case gc.OCOM:
		gc.Cgen(n.Left, res)
		var lo1 gc.Node
		var hi1 gc.Node
		split64(res, &lo1, &hi1)
		gins(x86.ANOTL, nil, &lo1)
		gins(x86.ANOTL, nil, &hi1)
		splitclean()
		return

		// binary operators.
	// common setup below.
	case gc.OADD,
		gc.OSUB,
		gc.OMUL,
		gc.OLROT,
		gc.OLSH,
		gc.ORSH,
		gc.OAND,
		gc.OOR,
		gc.OXOR:
		break
	}

	l := n.Left
	r := n.Right
	if !l.Addable {
		var t1 gc.Node
		gc.Tempname(&t1, l.Type)
		gc.Cgen(l, &t1)
		l = &t1
	}

	if r != nil && !r.Addable {
		var t2 gc.Node
		gc.Tempname(&t2, r.Type)
		gc.Cgen(r, &t2)
		r = &t2
	}

	var ax gc.Node
	gc.Nodreg(&ax, gc.Types[gc.TINT32], x86.REG_AX)
	var cx gc.Node
	gc.Nodreg(&cx, gc.Types[gc.TINT32], x86.REG_CX)
	var dx gc.Node
	gc.Nodreg(&dx, gc.Types[gc.TINT32], x86.REG_DX)

	// Setup for binary operation.
	var hi1 gc.Node
	var lo1 gc.Node
	split64(l, &lo1, &hi1)

	var lo2 gc.Node
	var hi2 gc.Node
	if gc.Is64(r.Type) {
		split64(r, &lo2, &hi2)
	}

	// Do op. Leave result in DX:AX.
	switch n.Op {
	// TODO: Constants
	case gc.OADD:
		gins(x86.AMOVL, &lo1, &ax)

		gins(x86.AMOVL, &hi1, &dx)
		gins(x86.AADDL, &lo2, &ax)
		gins(x86.AADCL, &hi2, &dx)

		// TODO: Constants.
	case gc.OSUB:
		gins(x86.AMOVL, &lo1, &ax)

		gins(x86.AMOVL, &hi1, &dx)
		gins(x86.ASUBL, &lo2, &ax)
		gins(x86.ASBBL, &hi2, &dx)

	case gc.OMUL:
		// let's call the next three EX, FX and GX
		var ex, fx, gx gc.Node
		gc.Regalloc(&ex, gc.Types[gc.TPTR32], nil)
		gc.Regalloc(&fx, gc.Types[gc.TPTR32], nil)
		gc.Regalloc(&gx, gc.Types[gc.TPTR32], nil)

		// load args into DX:AX and EX:GX.
		gins(x86.AMOVL, &lo1, &ax)

		gins(x86.AMOVL, &hi1, &dx)
		gins(x86.AMOVL, &lo2, &gx)
		gins(x86.AMOVL, &hi2, &ex)

		// if DX and EX are zero, use 32 x 32 -> 64 unsigned multiply.
		gins(x86.AMOVL, &dx, &fx)

		gins(x86.AORL, &ex, &fx)
		p1 := gc.Gbranch(x86.AJNE, nil, 0)
		gins(x86.AMULL, &gx, nil) // implicit &ax
		p2 := gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p1, gc.Pc)

		// full 64x64 -> 64, from 32x32 -> 64.
		gins(x86.AIMULL, &gx, &dx)

		gins(x86.AMOVL, &ax, &fx)
		gins(x86.AIMULL, &ex, &fx)
		gins(x86.AADDL, &dx, &fx)
		gins(x86.AMOVL, &gx, &dx)
		gins(x86.AMULL, &dx, nil) // implicit &ax
		gins(x86.AADDL, &fx, &dx)
		gc.Patch(p2, gc.Pc)

		gc.Regfree(&ex)
		gc.Regfree(&fx)
		gc.Regfree(&gx)

	// We only rotate by a constant c in [0,64).
	// if c >= 32:
	//	lo, hi = hi, lo
	//	c -= 32
	// if c == 0:
	//	no-op
	// else:
	//	t = hi
	//	shld hi:lo, c
	//	shld lo:t, c
	case gc.OLROT:
		v := uint64(r.Int())

		if v >= 32 {
			// reverse during load to do the first 32 bits of rotate
			v -= 32

			gins(x86.AMOVL, &lo1, &dx)
			gins(x86.AMOVL, &hi1, &ax)
		} else {
			gins(x86.AMOVL, &lo1, &ax)
			gins(x86.AMOVL, &hi1, &dx)
		}

		if v == 0 {
		} else // done
		{
			gins(x86.AMOVL, &dx, &cx)
			p1 := gins(x86.ASHLL, ncon(uint32(v)), &dx)
			p1.From.Index = x86.REG_AX // double-width shift
			p1.From.Scale = 0
			p1 = gins(x86.ASHLL, ncon(uint32(v)), &ax)
			p1.From.Index = x86.REG_CX // double-width shift
			p1.From.Scale = 0
		}

	case gc.OLSH:
		if r.Op == gc.OLITERAL {
			v := uint64(r.Int())
			if v >= 64 {
				if gc.Is64(r.Type) {
					splitclean()
				}
				splitclean()
				split64(res, &lo2, &hi2)
				gins(x86.AMOVL, ncon(0), &lo2)
				gins(x86.AMOVL, ncon(0), &hi2)
				splitclean()
				return
			}

			if v >= 32 {
				if gc.Is64(r.Type) {
					splitclean()
				}
				split64(res, &lo2, &hi2)
				gmove(&lo1, &hi2)
				if v > 32 {
					gins(x86.ASHLL, ncon(uint32(v-32)), &hi2)
				}

				gins(x86.AMOVL, ncon(0), &lo2)
				splitclean()
				splitclean()
				return
			}

			// general shift
			gins(x86.AMOVL, &lo1, &ax)

			gins(x86.AMOVL, &hi1, &dx)
			p1 := gins(x86.ASHLL, ncon(uint32(v)), &dx)
			p1.From.Index = x86.REG_AX // double-width shift
			p1.From.Scale = 0
			gins(x86.ASHLL, ncon(uint32(v)), &ax)
			break
		}

		// load value into DX:AX.
		gins(x86.AMOVL, &lo1, &ax)

		gins(x86.AMOVL, &hi1, &dx)

		// load shift value into register.
		// if high bits are set, zero value.
		var p1 *obj.Prog

		if gc.Is64(r.Type) {
			gins(x86.ACMPL, &hi2, ncon(0))
			p1 = gc.Gbranch(x86.AJNE, nil, +1)
			gins(x86.AMOVL, &lo2, &cx)
		} else {
			cx.Type = gc.Types[gc.TUINT32]
			gmove(r, &cx)
		}

		// if shift count is >=64, zero value
		gins(x86.ACMPL, &cx, ncon(64))

		p2 := gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
		if p1 != nil {
			gc.Patch(p1, gc.Pc)
		}
		gins(x86.AXORL, &dx, &dx)
		gins(x86.AXORL, &ax, &ax)
		gc.Patch(p2, gc.Pc)

		// if shift count is >= 32, zero low.
		gins(x86.ACMPL, &cx, ncon(32))

		p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
		gins(x86.AMOVL, &ax, &dx)
		gins(x86.ASHLL, &cx, &dx) // SHLL only uses bottom 5 bits of count
		gins(x86.AXORL, &ax, &ax)
		p2 = gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p1, gc.Pc)

		// general shift
		p1 = gins(x86.ASHLL, &cx, &dx)

		p1.From.Index = x86.REG_AX // double-width shift
		p1.From.Scale = 0
		gins(x86.ASHLL, &cx, &ax)
		gc.Patch(p2, gc.Pc)

	case gc.ORSH:
		if r.Op == gc.OLITERAL {
			v := uint64(r.Int())
			if v >= 64 {
				if gc.Is64(r.Type) {
					splitclean()
				}
				splitclean()
				split64(res, &lo2, &hi2)
				if hi1.Type.Etype == gc.TINT32 {
					gmove(&hi1, &lo2)
					gins(x86.ASARL, ncon(31), &lo2)
					gmove(&hi1, &hi2)
					gins(x86.ASARL, ncon(31), &hi2)
				} else {
					gins(x86.AMOVL, ncon(0), &lo2)
					gins(x86.AMOVL, ncon(0), &hi2)
				}

				splitclean()
				return
			}

			if v >= 32 {
				if gc.Is64(r.Type) {
					splitclean()
				}
				split64(res, &lo2, &hi2)
				gmove(&hi1, &lo2)
				if v > 32 {
					gins(optoas(gc.ORSH, hi1.Type), ncon(uint32(v-32)), &lo2)
				}
				if hi1.Type.Etype == gc.TINT32 {
					gmove(&hi1, &hi2)
					gins(x86.ASARL, ncon(31), &hi2)
				} else {
					gins(x86.AMOVL, ncon(0), &hi2)
				}
				splitclean()
				splitclean()
				return
			}

			// general shift
			gins(x86.AMOVL, &lo1, &ax)

			gins(x86.AMOVL, &hi1, &dx)
			p1 := gins(x86.ASHRL, ncon(uint32(v)), &ax)
			p1.From.Index = x86.REG_DX // double-width shift
			p1.From.Scale = 0
			gins(optoas(gc.ORSH, hi1.Type), ncon(uint32(v)), &dx)
			break
		}

		// load value into DX:AX.
		gins(x86.AMOVL, &lo1, &ax)

		gins(x86.AMOVL, &hi1, &dx)

		// load shift value into register.
		// if high bits are set, zero value.
		var p1 *obj.Prog

		if gc.Is64(r.Type) {
			gins(x86.ACMPL, &hi2, ncon(0))
			p1 = gc.Gbranch(x86.AJNE, nil, +1)
			gins(x86.AMOVL, &lo2, &cx)
		} else {
			cx.Type = gc.Types[gc.TUINT32]
			gmove(r, &cx)
		}

		// if shift count is >=64, zero or sign-extend value
		gins(x86.ACMPL, &cx, ncon(64))

		p2 := gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
		if p1 != nil {
			gc.Patch(p1, gc.Pc)
		}
		if hi1.Type.Etype == gc.TINT32 {
			gins(x86.ASARL, ncon(31), &dx)
			gins(x86.AMOVL, &dx, &ax)
		} else {
			gins(x86.AXORL, &dx, &dx)
			gins(x86.AXORL, &ax, &ax)
		}

		gc.Patch(p2, gc.Pc)

		// if shift count is >= 32, sign-extend hi.
		gins(x86.ACMPL, &cx, ncon(32))

		p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
		gins(x86.AMOVL, &dx, &ax)
		if hi1.Type.Etype == gc.TINT32 {
			gins(x86.ASARL, &cx, &ax) // SARL only uses bottom 5 bits of count
			gins(x86.ASARL, ncon(31), &dx)
		} else {
			gins(x86.ASHRL, &cx, &ax)
			gins(x86.AXORL, &dx, &dx)
		}

		p2 = gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p1, gc.Pc)

		// general shift
		p1 = gins(x86.ASHRL, &cx, &ax)

		p1.From.Index = x86.REG_DX // double-width shift
		p1.From.Scale = 0
		gins(optoas(gc.ORSH, hi1.Type), &cx, &dx)
		gc.Patch(p2, gc.Pc)

		// make constant the right side (it usually is anyway).
	case gc.OXOR,
		gc.OAND,
		gc.OOR:
		if lo1.Op == gc.OLITERAL {
			nswap(&lo1, &lo2)
			nswap(&hi1, &hi2)
		}

		if lo2.Op == gc.OLITERAL {
			// special cases for constants.
			lv := uint32(lo2.Int())
			hv := uint32(hi2.Int())
			splitclean() // right side
			split64(res, &lo2, &hi2)
			switch n.Op {
			case gc.OXOR:
				gmove(&lo1, &lo2)
				gmove(&hi1, &hi2)
				switch lv {
				case 0:
					break

				case 0xffffffff:
					gins(x86.ANOTL, nil, &lo2)

				default:
					gins(x86.AXORL, ncon(lv), &lo2)
				}

				switch hv {
				case 0:
					break

				case 0xffffffff:
					gins(x86.ANOTL, nil, &hi2)

				default:
					gins(x86.AXORL, ncon(hv), &hi2)
				}

			case gc.OAND:
				switch lv {
				case 0:
					gins(x86.AMOVL, ncon(0), &lo2)

				default:
					gmove(&lo1, &lo2)
					if lv != 0xffffffff {
						gins(x86.AANDL, ncon(lv), &lo2)
					}
				}

				switch hv {
				case 0:
					gins(x86.AMOVL, ncon(0), &hi2)

				default:
					gmove(&hi1, &hi2)
					if hv != 0xffffffff {
						gins(x86.AANDL, ncon(hv), &hi2)
					}
				}

			case gc.OOR:
				switch lv {
				case 0:
					gmove(&lo1, &lo2)

				case 0xffffffff:
					gins(x86.AMOVL, ncon(0xffffffff), &lo2)

				default:
					gmove(&lo1, &lo2)
					gins(x86.AORL, ncon(lv), &lo2)
				}

				switch hv {
				case 0:
					gmove(&hi1, &hi2)

				case 0xffffffff:
					gins(x86.AMOVL, ncon(0xffffffff), &hi2)

				default:
					gmove(&hi1, &hi2)
					gins(x86.AORL, ncon(hv), &hi2)
				}
			}

			splitclean()
			splitclean()
			return
		}

		gins(x86.AMOVL, &lo1, &ax)
		gins(x86.AMOVL, &hi1, &dx)
		gins(optoas(n.Op, lo1.Type), &lo2, &ax)
		gins(optoas(n.Op, lo1.Type), &hi2, &dx)
	}

	if gc.Is64(r.Type) {
		splitclean()
	}
	splitclean()

	split64(res, &lo1, &hi1)
	gins(x86.AMOVL, &ax, &lo1)
	gins(x86.AMOVL, &dx, &hi1)
	splitclean()
}
Exemple #18
0
/*
 * generate shift according to op, one of:
 *	res = nl << nr
 *	res = nl >> nr
 */
func cgen_shift(op int, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) {
	if nl.Type.Width > 4 {
		gc.Fatalf("cgen_shift %v", nl.Type)
	}

	w := int(nl.Type.Width * 8)

	if op == gc.OLROT {
		v := nr.Int()
		var n1 gc.Node
		gc.Regalloc(&n1, nl.Type, res)
		if w == 32 {
			gc.Cgen(nl, &n1)
			gshift(arm.AMOVW, &n1, arm.SHIFT_RR, int32(w)-int32(v), &n1)
		} else {
			var n2 gc.Node
			gc.Regalloc(&n2, nl.Type, nil)
			gc.Cgen(nl, &n2)
			gshift(arm.AMOVW, &n2, arm.SHIFT_LL, int32(v), &n1)
			gshift(arm.AORR, &n2, arm.SHIFT_LR, int32(w)-int32(v), &n1)
			gc.Regfree(&n2)

			// Ensure sign/zero-extended result.
			gins(optoas(gc.OAS, nl.Type), &n1, &n1)
		}

		gmove(&n1, res)
		gc.Regfree(&n1)
		return
	}

	if nr.Op == gc.OLITERAL {
		var n1 gc.Node
		gc.Regalloc(&n1, nl.Type, res)
		gc.Cgen(nl, &n1)
		sc := uint64(nr.Int())
		if sc == 0 {
		} else // nothing to do
		if sc >= uint64(nl.Type.Width*8) {
			if op == gc.ORSH && gc.Issigned[nl.Type.Etype] {
				gshift(arm.AMOVW, &n1, arm.SHIFT_AR, int32(w), &n1)
			} else {
				gins(arm.AEOR, &n1, &n1)
			}
		} else {
			if op == gc.ORSH && gc.Issigned[nl.Type.Etype] {
				gshift(arm.AMOVW, &n1, arm.SHIFT_AR, int32(sc), &n1)
			} else if op == gc.ORSH {
				gshift(arm.AMOVW, &n1, arm.SHIFT_LR, int32(sc), &n1) // OLSH
			} else {
				gshift(arm.AMOVW, &n1, arm.SHIFT_LL, int32(sc), &n1)
			}
		}

		if w < 32 && op == gc.OLSH {
			gins(optoas(gc.OAS, nl.Type), &n1, &n1)
		}
		gmove(&n1, res)
		gc.Regfree(&n1)
		return
	}

	tr := nr.Type
	var t gc.Node
	var n1 gc.Node
	var n2 gc.Node
	var n3 gc.Node
	if tr.Width > 4 {
		var nt gc.Node
		gc.Tempname(&nt, nr.Type)
		if nl.Ullman >= nr.Ullman {
			gc.Regalloc(&n2, nl.Type, res)
			gc.Cgen(nl, &n2)
			gc.Cgen(nr, &nt)
			n1 = nt
		} else {
			gc.Cgen(nr, &nt)
			gc.Regalloc(&n2, nl.Type, res)
			gc.Cgen(nl, &n2)
		}

		var hi gc.Node
		var lo gc.Node
		split64(&nt, &lo, &hi)
		gc.Regalloc(&n1, gc.Types[gc.TUINT32], nil)
		gc.Regalloc(&n3, gc.Types[gc.TUINT32], nil)
		gmove(&lo, &n1)
		gmove(&hi, &n3)
		splitclean()
		gins(arm.ATST, &n3, nil)
		gc.Nodconst(&t, gc.Types[gc.TUINT32], int64(w))
		p1 := gins(arm.AMOVW, &t, &n1)
		p1.Scond = arm.C_SCOND_NE
		tr = gc.Types[gc.TUINT32]
		gc.Regfree(&n3)
	} else {
		if nl.Ullman >= nr.Ullman {
			gc.Regalloc(&n2, nl.Type, res)
			gc.Cgen(nl, &n2)
			gc.Regalloc(&n1, nr.Type, nil)
			gc.Cgen(nr, &n1)
		} else {
			gc.Regalloc(&n1, nr.Type, nil)
			gc.Cgen(nr, &n1)
			gc.Regalloc(&n2, nl.Type, res)
			gc.Cgen(nl, &n2)
		}
	}

	// test for shift being 0
	gins(arm.ATST, &n1, nil)

	p3 := gc.Gbranch(arm.ABEQ, nil, -1)

	// test and fix up large shifts
	// TODO: if(!bounded), don't emit some of this.
	gc.Regalloc(&n3, tr, nil)

	gc.Nodconst(&t, gc.Types[gc.TUINT32], int64(w))
	gmove(&t, &n3)
	gins(arm.ACMP, &n1, &n3)
	if op == gc.ORSH {
		var p1 *obj.Prog
		var p2 *obj.Prog
		if gc.Issigned[nl.Type.Etype] {
			p1 = gshift(arm.AMOVW, &n2, arm.SHIFT_AR, int32(w)-1, &n2)
			p2 = gregshift(arm.AMOVW, &n2, arm.SHIFT_AR, &n1, &n2)
		} else {
			p1 = gins(arm.AEOR, &n2, &n2)
			p2 = gregshift(arm.AMOVW, &n2, arm.SHIFT_LR, &n1, &n2)
		}

		p1.Scond = arm.C_SCOND_HS
		p2.Scond = arm.C_SCOND_LO
	} else {
		p1 := gins(arm.AEOR, &n2, &n2)
		p2 := gregshift(arm.AMOVW, &n2, arm.SHIFT_LL, &n1, &n2)
		p1.Scond = arm.C_SCOND_HS
		p2.Scond = arm.C_SCOND_LO
	}

	gc.Regfree(&n3)

	gc.Patch(p3, gc.Pc)

	// Left-shift of smaller word must be sign/zero-extended.
	if w < 32 && op == gc.OLSH {
		gins(optoas(gc.OAS, nl.Type), &n2, &n2)
	}
	gmove(&n2, res)

	gc.Regfree(&n1)
	gc.Regfree(&n2)
}
Exemple #19
0
/*
 * generate shift according to op, one of:
 *	res = nl << nr
 *	res = nl >> nr
 */
func cgen_shift(op gc.Op, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) {
	if nl.Type.Width > 4 {
		gc.Fatalf("cgen_shift %v", nl.Type)
	}

	w := int(nl.Type.Width * 8)

	a := optoas(op, nl.Type)

	if nr.Op == gc.OLITERAL {
		var n2 gc.Node
		gc.Tempname(&n2, nl.Type)
		gc.Cgen(nl, &n2)
		var n1 gc.Node
		gc.Regalloc(&n1, nl.Type, res)
		gmove(&n2, &n1)
		sc := uint64(nr.Int())
		if sc >= uint64(nl.Type.Width*8) {
			// large shift gets 2 shifts by width-1
			gins(a, ncon(uint32(w)-1), &n1)

			gins(a, ncon(uint32(w)-1), &n1)
		} else {
			gins(a, nr, &n1)
		}
		gmove(&n1, res)
		gc.Regfree(&n1)
		return
	}

	var oldcx gc.Node
	var cx gc.Node
	gc.Nodreg(&cx, gc.Types[gc.TUINT32], x86.REG_CX)
	if gc.GetReg(x86.REG_CX) > 1 && !gc.Samereg(&cx, res) {
		gc.Tempname(&oldcx, gc.Types[gc.TUINT32])
		gmove(&cx, &oldcx)
	}

	var n1 gc.Node
	var nt gc.Node
	if nr.Type.Width > 4 {
		gc.Tempname(&nt, nr.Type)
		n1 = nt
	} else {
		gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)
		gc.Regalloc(&n1, nr.Type, &n1) // to hold the shift type in CX
	}

	var n2 gc.Node
	if gc.Samereg(&cx, res) {
		gc.Regalloc(&n2, nl.Type, nil)
	} else {
		gc.Regalloc(&n2, nl.Type, res)
	}
	if nl.Ullman >= nr.Ullman {
		gc.Cgen(nl, &n2)
		gc.Cgen(nr, &n1)
	} else {
		gc.Cgen(nr, &n1)
		gc.Cgen(nl, &n2)
	}

	// test and fix up large shifts
	if bounded {
		if nr.Type.Width > 4 {
			// delayed reg alloc
			gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)

			gc.Regalloc(&n1, gc.Types[gc.TUINT32], &n1) // to hold the shift type in CX
			var lo gc.Node
			var hi gc.Node
			split64(&nt, &lo, &hi)
			gmove(&lo, &n1)
			splitclean()
		}
	} else {
		var p1 *obj.Prog
		if nr.Type.Width > 4 {
			// delayed reg alloc
			gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)

			gc.Regalloc(&n1, gc.Types[gc.TUINT32], &n1) // to hold the shift type in CX
			var lo gc.Node
			var hi gc.Node
			split64(&nt, &lo, &hi)
			gmove(&lo, &n1)
			gins(optoas(gc.OCMP, gc.Types[gc.TUINT32]), &hi, ncon(0))
			p2 := gc.Gbranch(optoas(gc.ONE, gc.Types[gc.TUINT32]), nil, +1)
			gins(optoas(gc.OCMP, gc.Types[gc.TUINT32]), &n1, ncon(uint32(w)))
			p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
			splitclean()
			gc.Patch(p2, gc.Pc)
		} else {
			gins(optoas(gc.OCMP, nr.Type), &n1, ncon(uint32(w)))
			p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1)
		}

		if op == gc.ORSH && gc.Issigned[nl.Type.Etype] {
			gins(a, ncon(uint32(w)-1), &n2)
		} else {
			gmove(ncon(0), &n2)
		}

		gc.Patch(p1, gc.Pc)
	}

	gins(a, &n1, &n2)

	if oldcx.Op != 0 {
		gmove(&oldcx, &cx)
	}

	gmove(&n2, res)

	gc.Regfree(&n1)
	gc.Regfree(&n2)
}
Exemple #20
0
/*
 * generate division.
 * caller must set:
 *	ax = allocated AX register
 *	dx = allocated DX register
 * generates one of:
 *	res = nl / nr
 *	res = nl % nr
 * according to op.
 */
func dodiv(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node, ax *gc.Node, dx *gc.Node) {
	// Have to be careful about handling
	// most negative int divided by -1 correctly.
	// The hardware will trap.
	// Also the byte divide instruction needs AH,
	// which we otherwise don't have to deal with.
	// Easiest way to avoid for int8, int16: use int32.
	// For int32 and int64, use explicit test.
	// Could use int64 hw for int32.
	t := nl.Type

	t0 := t
	check := false
	if gc.Issigned[t.Etype] {
		check = true
		if gc.Isconst(nl, gc.CTINT) && nl.Int() != -1<<uint64(t.Width*8-1) {
			check = false
		} else if gc.Isconst(nr, gc.CTINT) && nr.Int() != -1 {
			check = false
		}
	}

	if t.Width < 4 {
		if gc.Issigned[t.Etype] {
			t = gc.Types[gc.TINT32]
		} else {
			t = gc.Types[gc.TUINT32]
		}
		check = false
	}

	var t1 gc.Node
	gc.Tempname(&t1, t)
	var t2 gc.Node
	gc.Tempname(&t2, t)
	if t0 != t {
		var t3 gc.Node
		gc.Tempname(&t3, t0)
		var t4 gc.Node
		gc.Tempname(&t4, t0)
		gc.Cgen(nl, &t3)
		gc.Cgen(nr, &t4)

		// Convert.
		gmove(&t3, &t1)

		gmove(&t4, &t2)
	} else {
		gc.Cgen(nl, &t1)
		gc.Cgen(nr, &t2)
	}

	var n1 gc.Node
	if !gc.Samereg(ax, res) && !gc.Samereg(dx, res) {
		gc.Regalloc(&n1, t, res)
	} else {
		gc.Regalloc(&n1, t, nil)
	}
	gmove(&t2, &n1)
	gmove(&t1, ax)
	var p2 *obj.Prog
	var n4 gc.Node
	if gc.Nacl {
		// Native Client does not relay the divide-by-zero trap
		// to the executing program, so we must insert a check
		// for ourselves.
		gc.Nodconst(&n4, t, 0)

		gins(optoas(gc.OCMP, t), &n1, &n4)
		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
		if panicdiv == nil {
			panicdiv = gc.Sysfunc("panicdivide")
		}
		gc.Ginscall(panicdiv, -1)
		gc.Patch(p1, gc.Pc)
	}

	if check {
		gc.Nodconst(&n4, t, -1)
		gins(optoas(gc.OCMP, t), &n1, &n4)
		p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1)
		if op == gc.ODIV {
			// a / (-1) is -a.
			gins(optoas(gc.OMINUS, t), nil, ax)

			gmove(ax, res)
		} else {
			// a % (-1) is 0.
			gc.Nodconst(&n4, t, 0)

			gmove(&n4, res)
		}

		p2 = gc.Gbranch(obj.AJMP, nil, 0)
		gc.Patch(p1, gc.Pc)
	}

	if !gc.Issigned[t.Etype] {
		var nz gc.Node
		gc.Nodconst(&nz, t, 0)
		gmove(&nz, dx)
	} else {
		gins(optoas(gc.OEXTEND, t), nil, nil)
	}
	gins(optoas(op, t), &n1, nil)
	gc.Regfree(&n1)

	if op == gc.ODIV {
		gmove(ax, res)
	} else {
		gmove(dx, res)
	}
	if check {
		gc.Patch(p2, gc.Pc)
	}
}
Exemple #21
0
/*
 * generate shift according to op, one of:
 *	res = nl << nr
 *	res = nl >> nr
 */
func cgen_shift(op int, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) {
	a := int(optoas(op, nl.Type))

	if nr.Op == gc.OLITERAL {
		var n1 gc.Node
		gc.Regalloc(&n1, nl.Type, res)
		gc.Cgen(nl, &n1)
		sc := uint64(nr.Int())
		if sc >= uint64(nl.Type.Width*8) {
			// large shift gets 2 shifts by width-1
			var n3 gc.Node
			gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1)

			gins(a, &n3, &n1)
			gins(a, &n3, &n1)
		} else {
			gins(a, nr, &n1)
		}
		gmove(&n1, res)
		gc.Regfree(&n1)
		return
	}

	if nl.Ullman >= gc.UINF {
		var n4 gc.Node
		gc.Tempname(&n4, nl.Type)
		gc.Cgen(nl, &n4)
		nl = &n4
	}

	if nr.Ullman >= gc.UINF {
		var n5 gc.Node
		gc.Tempname(&n5, nr.Type)
		gc.Cgen(nr, &n5)
		nr = &n5
	}

	// Allow either uint32 or uint64 as shift type,
	// to avoid unnecessary conversion from uint32 to uint64
	// just to do the comparison.
	tcount := gc.Types[gc.Simtype[nr.Type.Etype]]

	if tcount.Etype < gc.TUINT32 {
		tcount = gc.Types[gc.TUINT32]
	}

	var n1 gc.Node
	gc.Regalloc(&n1, nr.Type, nil) // to hold the shift type in CX
	var n3 gc.Node
	gc.Regalloc(&n3, tcount, &n1) // to clear high bits of CX

	var n2 gc.Node
	gc.Regalloc(&n2, nl.Type, res)

	if nl.Ullman >= nr.Ullman {
		gc.Cgen(nl, &n2)
		gc.Cgen(nr, &n1)
		gmove(&n1, &n3)
	} else {
		gc.Cgen(nr, &n1)
		gmove(&n1, &n3)
		gc.Cgen(nl, &n2)
	}

	gc.Regfree(&n3)

	// test and fix up large shifts
	if !bounded {
		gc.Nodconst(&n3, tcount, nl.Type.Width*8)
		gins(optoas(gc.OCMP, tcount), &n1, &n3)
		p1 := (*obj.Prog)(gc.Gbranch(optoas(gc.OLT, tcount), nil, +1))
		if op == gc.ORSH && gc.Issigned[nl.Type.Etype] {
			gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1)
			gins(a, &n3, &n2)
		} else {
			gc.Nodconst(&n3, nl.Type, 0)
			gmove(&n3, &n2)
		}

		gc.Patch(p1, gc.Pc)
	}

	gins(a, &n1, &n2)

	gmove(&n2, res)

	gc.Regfree(&n1)
	gc.Regfree(&n2)
}
Exemple #22
0
/*
 * generate shift according to op, one of:
 *	res = nl << nr
 *	res = nl >> nr
 */
func cgen_shift(op gc.Op, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) {
	a := optoas(op, nl.Type)

	if nr.Op == gc.OLITERAL {
		var n1 gc.Node
		gc.Regalloc(&n1, nl.Type, res)
		gc.Cgen(nl, &n1)
		sc := uint64(nr.Int64())
		if sc >= uint64(nl.Type.Width*8) {
			// large shift gets 2 shifts by width-1
			var n3 gc.Node
			gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1)

			gins(a, &n3, &n1)
			gins(a, &n3, &n1)
		} else {
			gins(a, nr, &n1)
		}
		gmove(&n1, res)
		gc.Regfree(&n1)
		return
	}

	if nl.Ullman >= gc.UINF {
		var n4 gc.Node
		gc.Tempname(&n4, nl.Type)
		gc.Cgen(nl, &n4)
		nl = &n4
	}

	if nr.Ullman >= gc.UINF {
		var n5 gc.Node
		gc.Tempname(&n5, nr.Type)
		gc.Cgen(nr, &n5)
		nr = &n5
	}

	rcx := gc.GetReg(x86.REG_CX)
	var n1 gc.Node
	gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX)

	// Allow either uint32 or uint64 as shift type,
	// to avoid unnecessary conversion from uint32 to uint64
	// just to do the comparison.
	tcount := gc.Types[gc.Simtype[nr.Type.Etype]]

	if tcount.Etype < gc.TUINT32 {
		tcount = gc.Types[gc.TUINT32]
	}

	gc.Regalloc(&n1, nr.Type, &n1) // to hold the shift type in CX
	var n3 gc.Node
	gc.Regalloc(&n3, tcount, &n1) // to clear high bits of CX

	var cx gc.Node
	gc.Nodreg(&cx, gc.Types[gc.TUINT64], x86.REG_CX)

	var oldcx gc.Node
	if rcx > 0 && !gc.Samereg(&cx, res) {
		gc.Regalloc(&oldcx, gc.Types[gc.TUINT64], nil)
		gmove(&cx, &oldcx)
	}

	cx.Type = tcount

	var n2 gc.Node
	if gc.Samereg(&cx, res) {
		gc.Regalloc(&n2, nl.Type, nil)
	} else {
		gc.Regalloc(&n2, nl.Type, res)
	}
	if nl.Ullman >= nr.Ullman {
		gc.Cgen(nl, &n2)
		gc.Cgen(nr, &n1)
		gmove(&n1, &n3)
	} else {
		gc.Cgen(nr, &n1)
		gmove(&n1, &n3)
		gc.Cgen(nl, &n2)
	}

	gc.Regfree(&n3)

	// test and fix up large shifts
	if !bounded {
		gc.Nodconst(&n3, tcount, nl.Type.Width*8)
		gins(optoas(gc.OCMP, tcount), &n1, &n3)
		p1 := gc.Gbranch(optoas(gc.OLT, tcount), nil, +1)
		if op == gc.ORSH && nl.Type.IsSigned() {
			gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1)
			gins(a, &n3, &n2)
		} else {
			gc.Nodconst(&n3, nl.Type, 0)
			gmove(&n3, &n2)
		}

		gc.Patch(p1, gc.Pc)
	}

	gins(a, &n1, &n2)

	if oldcx.Op != 0 {
		cx.Type = gc.Types[gc.TUINT64]
		gmove(&oldcx, &cx)
		gc.Regfree(&oldcx)
	}

	gmove(&n2, res)

	gc.Regfree(&n1)
	gc.Regfree(&n2)
}