/* * generate division. * generates one of: * res = nl / nr * res = nl % nr * according to op. */ func dodiv(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node) { t := nl.Type t0 := t if t.Width < 8 { if t.IsSigned() { t = gc.Types[gc.TINT64] } else { t = gc.Types[gc.TUINT64] } } a := optoas(gc.ODIV, t) var tl gc.Node gc.Regalloc(&tl, t0, nil) var tr gc.Node gc.Regalloc(&tr, t0, nil) if nl.Ullman >= nr.Ullman { gc.Cgen(nl, &tl) gc.Cgen(nr, &tr) } else { gc.Cgen(nr, &tr) gc.Cgen(nl, &tl) } if t != t0 { // Convert tl2 := tl tr2 := tr tl.Type = t tr.Type = t gmove(&tl2, &tl) gmove(&tr2, &tr) } // Handle divide-by-zero panic. p1 := ginsbranch(mips.ABNE, nil, &tr, nil, 0) if panicdiv == nil { panicdiv = gc.Sysfunc("panicdivide") } gc.Ginscall(panicdiv, -1) gc.Patch(p1, gc.Pc) gins3(a, &tr, &tl, nil) gc.Regfree(&tr) if op == gc.ODIV { var lo gc.Node gc.Nodreg(&lo, gc.Types[gc.TUINT64], mips.REG_LO) gins(mips.AMOVV, &lo, &tl) } else { // remainder in REG_HI var hi gc.Node gc.Nodreg(&hi, gc.Types[gc.TUINT64], mips.REG_HI) gins(mips.AMOVV, &hi, &tl) } gmove(&tl, res) gc.Regfree(&tl) }
// TODO(mips): implement DUFFZERO func zerorange(p *obj.Prog, frame int64, lo int64, hi int64) *obj.Prog { cnt := hi - lo if cnt == 0 { return p } if cnt < int64(4*gc.Widthptr) { for i := int64(0); i < cnt; i += int64(gc.Widthptr) { p = gc.Appendpp(p, mips.AMOVW, obj.TYPE_REG, mips.REGZERO, 0, obj.TYPE_MEM, mips.REGSP, gc.Ctxt.FixedFrameSize()+frame+lo+i) } } else { //fmt.Printf("zerorange frame:%v, lo: %v, hi:%v \n", frame ,lo, hi) // ADD $(FIXED_FRAME+frame+lo-4), SP, r1 // ADD $cnt, r1, r2 // loop: // MOVW R0, (Widthptr)r1 // ADD $Widthptr, r1 // BNE r1, r2, loop p = gc.Appendpp(p, mips.AADD, obj.TYPE_CONST, 0, gc.Ctxt.FixedFrameSize()+frame+lo-4, obj.TYPE_REG, mips.REGRT1, 0) p.Reg = mips.REGSP p = gc.Appendpp(p, mips.AADD, obj.TYPE_CONST, 0, cnt, obj.TYPE_REG, mips.REGRT2, 0) p.Reg = mips.REGRT1 p = gc.Appendpp(p, mips.AMOVW, obj.TYPE_REG, mips.REGZERO, 0, obj.TYPE_MEM, mips.REGRT1, int64(gc.Widthptr)) p1 := p p = gc.Appendpp(p, mips.AADD, obj.TYPE_CONST, 0, int64(gc.Widthptr), obj.TYPE_REG, mips.REGRT1, 0) p = gc.Appendpp(p, mips.ABNE, obj.TYPE_REG, mips.REGRT1, 0, obj.TYPE_BRANCH, 0, 0) p.Reg = mips.REGRT2 gc.Patch(p, p1) } return p }
func zerorange(p *obj.Prog, frame int64, lo int64, hi int64) *obj.Prog { cnt := hi - lo if cnt == 0 { return p } if cnt < int64(4*gc.Widthptr) { for i := int64(0); i < cnt; i += int64(gc.Widthptr) { p = appendpp(p, ppc64.AMOVD, obj.TYPE_REG, ppc64.REGZERO, 0, obj.TYPE_MEM, ppc64.REGSP, 8+frame+lo+i) } } else if cnt <= int64(128*gc.Widthptr) { p = appendpp(p, ppc64.AADD, obj.TYPE_CONST, 0, 8+frame+lo-8, obj.TYPE_REG, ppc64.REGRT1, 0) p.Reg = ppc64.REGSP p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0) f := gc.Sysfunc("duffzero") gc.Naddr(&p.To, f) gc.Afunclit(&p.To, f) p.To.Offset = 4 * (128 - cnt/int64(gc.Widthptr)) } else { p = appendpp(p, ppc64.AMOVD, obj.TYPE_CONST, 0, 8+frame+lo-8, obj.TYPE_REG, ppc64.REGTMP, 0) p = appendpp(p, ppc64.AADD, obj.TYPE_REG, ppc64.REGTMP, 0, obj.TYPE_REG, ppc64.REGRT1, 0) p.Reg = ppc64.REGSP p = appendpp(p, ppc64.AMOVD, obj.TYPE_CONST, 0, cnt, obj.TYPE_REG, ppc64.REGTMP, 0) p = appendpp(p, ppc64.AADD, obj.TYPE_REG, ppc64.REGTMP, 0, obj.TYPE_REG, ppc64.REGRT2, 0) p.Reg = ppc64.REGRT1 p = appendpp(p, ppc64.AMOVDU, obj.TYPE_REG, ppc64.REGZERO, 0, obj.TYPE_MEM, ppc64.REGRT1, int64(gc.Widthptr)) p1 := p p = appendpp(p, ppc64.ACMP, obj.TYPE_REG, ppc64.REGRT1, 0, obj.TYPE_REG, ppc64.REGRT2, 0) p = appendpp(p, ppc64.ABNE, obj.TYPE_NONE, 0, 0, obj.TYPE_BRANCH, 0, 0) gc.Patch(p, p1) } return p }
func zerorange(p *obj.Prog, frame int64, lo int64, hi int64) *obj.Prog { cnt := hi - lo if cnt == 0 { return p } if cnt < int64(4*gc.Widthptr) { for i := int64(0); i < cnt; i += int64(gc.Widthptr) { p = gc.Appendpp(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGZERO, 0, obj.TYPE_MEM, arm64.REGSP, 8+frame+lo+i) } } else if cnt <= int64(128*gc.Widthptr) && !darwin { // darwin ld64 cannot handle BR26 reloc with non-zero addend p = gc.Appendpp(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGSP, 0, obj.TYPE_REG, arm64.REGRT1, 0) p = gc.Appendpp(p, arm64.AADD, obj.TYPE_CONST, 0, 8+frame+lo-8, obj.TYPE_REG, arm64.REGRT1, 0) p.Reg = arm64.REGRT1 p = gc.Appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0) gc.Naddr(&p.To, gc.Sysfunc("duffzero")) p.To.Offset = 4 * (128 - cnt/int64(gc.Widthptr)) } else { p = gc.Appendpp(p, arm64.AMOVD, obj.TYPE_CONST, 0, 8+frame+lo-8, obj.TYPE_REG, arm64.REGTMP, 0) p = gc.Appendpp(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGSP, 0, obj.TYPE_REG, arm64.REGRT1, 0) p = gc.Appendpp(p, arm64.AADD, obj.TYPE_REG, arm64.REGTMP, 0, obj.TYPE_REG, arm64.REGRT1, 0) p.Reg = arm64.REGRT1 p = gc.Appendpp(p, arm64.AMOVD, obj.TYPE_CONST, 0, cnt, obj.TYPE_REG, arm64.REGTMP, 0) p = gc.Appendpp(p, arm64.AADD, obj.TYPE_REG, arm64.REGTMP, 0, obj.TYPE_REG, arm64.REGRT2, 0) p.Reg = arm64.REGRT1 p = gc.Appendpp(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGZERO, 0, obj.TYPE_MEM, arm64.REGRT1, int64(gc.Widthptr)) p.Scond = arm64.C_XPRE p1 := p p = gc.Appendpp(p, arm64.ACMP, obj.TYPE_REG, arm64.REGRT1, 0, obj.TYPE_NONE, 0, 0) p.Reg = arm64.REGRT2 p = gc.Appendpp(p, arm64.ABNE, obj.TYPE_NONE, 0, 0, obj.TYPE_BRANCH, 0, 0) gc.Patch(p, p1) } return p }
/* * generate floating-point operation. */ func cgen_float(n *gc.Node, res *gc.Node) { nl := n.Left switch n.Op { case gc.OEQ, gc.ONE, gc.OLT, gc.OLE, gc.OGE: p1 := gc.Gbranch(obj.AJMP, nil, 0) p2 := gc.Pc gmove(gc.Nodbool(true), res) p3 := gc.Gbranch(obj.AJMP, nil, 0) gc.Patch(p1, gc.Pc) gc.Bgen(n, true, 0, p2) gmove(gc.Nodbool(false), res) gc.Patch(p3, gc.Pc) return case gc.OPLUS: gc.Cgen(nl, res) return case gc.OCONV: if gc.Eqtype(n.Type, nl.Type) || gc.Noconv(n.Type, nl.Type) { gc.Cgen(nl, res) return } var n2 gc.Node gc.Tempname(&n2, n.Type) var n1 gc.Node gc.Mgen(nl, &n1, res) gmove(&n1, &n2) gmove(&n2, res) gc.Mfree(&n1) return } if gc.Thearch.Use387 { cgen_float387(n, res) } else { cgen_floatsse(n, res) } }
func zerorange(p *obj.Prog, frame int64, lo int64, hi int64) *obj.Prog { cnt := hi - lo if cnt == 0 { return p } if cnt < int64(4*gc.Widthptr) { for i := int64(0); i < cnt; i += int64(gc.Widthptr) { p = appendpp(p, mips.AMOVV, obj.TYPE_REG, mips.REGZERO, 0, obj.TYPE_MEM, mips.REGSP, 8+frame+lo+i) } // TODO(dfc): https://golang.org/issue/12108 // If DUFFZERO is used inside a tail call (see genwrapper) it will // overwrite the link register. } else if false && cnt <= int64(128*gc.Widthptr) { p = appendpp(p, mips.AADDV, obj.TYPE_CONST, 0, 8+frame+lo-8, obj.TYPE_REG, mips.REGRT1, 0) p.Reg = mips.REGSP p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0) f := gc.Sysfunc("duffzero") gc.Naddr(&p.To, f) gc.Afunclit(&p.To, f) p.To.Offset = 8 * (128 - cnt/int64(gc.Widthptr)) } else { // ADDV $(8+frame+lo-8), SP, r1 // ADDV $cnt, r1, r2 // loop: // MOVV R0, (Widthptr)r1 // ADDV $Widthptr, r1 // BNE r1, r2, loop p = appendpp(p, mips.AADDV, obj.TYPE_CONST, 0, 8+frame+lo-8, obj.TYPE_REG, mips.REGRT1, 0) p.Reg = mips.REGSP p = appendpp(p, mips.AADDV, obj.TYPE_CONST, 0, cnt, obj.TYPE_REG, mips.REGRT2, 0) p.Reg = mips.REGRT1 p = appendpp(p, mips.AMOVV, obj.TYPE_REG, mips.REGZERO, 0, obj.TYPE_MEM, mips.REGRT1, int64(gc.Widthptr)) p1 := p p = appendpp(p, mips.AADDV, obj.TYPE_CONST, 0, int64(gc.Widthptr), obj.TYPE_REG, mips.REGRT1, 0) p = appendpp(p, mips.ABNE, obj.TYPE_REG, mips.REGRT1, 0, obj.TYPE_BRANCH, 0, 0) p.Reg = mips.REGRT2 gc.Patch(p, p1) } return p }
func zerorange(p *obj.Prog, frame int64, lo int64, hi int64, r0 *uint32) *obj.Prog { cnt := hi - lo if cnt == 0 { return p } if *r0 == 0 { p = appendpp(p, arm.AMOVW, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, arm.REG_R0, 0) *r0 = 1 } if cnt < int64(4*gc.Widthptr) { for i := int64(0); i < cnt; i += int64(gc.Widthptr) { p = appendpp(p, arm.AMOVW, obj.TYPE_REG, arm.REG_R0, 0, obj.TYPE_MEM, arm.REGSP, int32(4+frame+lo+i)) } } else if !gc.Nacl && (cnt <= int64(128*gc.Widthptr)) { p = appendpp(p, arm.AADD, obj.TYPE_CONST, 0, int32(4+frame+lo), obj.TYPE_REG, arm.REG_R1, 0) p.Reg = arm.REGSP p = appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0) f := gc.Sysfunc("duffzero") gc.Naddr(&p.To, f) gc.Afunclit(&p.To, f) p.To.Offset = 4 * (128 - cnt/int64(gc.Widthptr)) } else { p = appendpp(p, arm.AADD, obj.TYPE_CONST, 0, int32(4+frame+lo), obj.TYPE_REG, arm.REG_R1, 0) p.Reg = arm.REGSP p = appendpp(p, arm.AADD, obj.TYPE_CONST, 0, int32(cnt), obj.TYPE_REG, arm.REG_R2, 0) p.Reg = arm.REG_R1 p = appendpp(p, arm.AMOVW, obj.TYPE_REG, arm.REG_R0, 0, obj.TYPE_MEM, arm.REG_R1, 4) p1 := p p.Scond |= arm.C_PBIT p = appendpp(p, arm.ACMP, obj.TYPE_REG, arm.REG_R1, 0, obj.TYPE_NONE, 0, 0) p.Reg = arm.REG_R2 p = appendpp(p, arm.ABNE, obj.TYPE_NONE, 0, 0, obj.TYPE_BRANCH, 0, 0) gc.Patch(p, p1) } return p }
func zerorange(p *obj.Prog, frame int64, lo int64, hi int64) *obj.Prog { cnt := hi - lo if cnt == 0 { return p } if cnt < int64(4*gc.Widthptr) { for i := int64(0); i < cnt; i += int64(gc.Widthptr) { p = gc.Appendpp(p, mips.AMOVV, obj.TYPE_REG, mips.REGZERO, 0, obj.TYPE_MEM, mips.REGSP, 8+frame+lo+i) } } else if cnt <= int64(128*gc.Widthptr) { p = gc.Appendpp(p, mips.AADDV, obj.TYPE_CONST, 0, 8+frame+lo-8, obj.TYPE_REG, mips.REGRT1, 0) p.Reg = mips.REGSP p = gc.Appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0) gc.Naddr(&p.To, gc.Sysfunc("duffzero")) p.To.Offset = 8 * (128 - cnt/int64(gc.Widthptr)) } else { // ADDV $(8+frame+lo-8), SP, r1 // ADDV $cnt, r1, r2 // loop: // MOVV R0, (Widthptr)r1 // ADDV $Widthptr, r1 // BNE r1, r2, loop p = gc.Appendpp(p, mips.AADDV, obj.TYPE_CONST, 0, 8+frame+lo-8, obj.TYPE_REG, mips.REGRT1, 0) p.Reg = mips.REGSP p = gc.Appendpp(p, mips.AADDV, obj.TYPE_CONST, 0, cnt, obj.TYPE_REG, mips.REGRT2, 0) p.Reg = mips.REGRT1 p = gc.Appendpp(p, mips.AMOVV, obj.TYPE_REG, mips.REGZERO, 0, obj.TYPE_MEM, mips.REGRT1, int64(gc.Widthptr)) p1 := p p = gc.Appendpp(p, mips.AADDV, obj.TYPE_CONST, 0, int64(gc.Widthptr), obj.TYPE_REG, mips.REGRT1, 0) p = gc.Appendpp(p, mips.ABNE, obj.TYPE_REG, mips.REGRT1, 0, obj.TYPE_BRANCH, 0, 0) p.Reg = mips.REGRT2 gc.Patch(p, p1) } return p }
/* * generate comparison of nl, nr, both 64-bit. * nl is memory; nr is constant or memory. */ func cmp64(nl *gc.Node, nr *gc.Node, op gc.Op, likely int, to *obj.Prog) { var lo1 gc.Node var hi1 gc.Node var lo2 gc.Node var hi2 gc.Node var rr gc.Node split64(nl, &lo1, &hi1) split64(nr, &lo2, &hi2) // compare most significant word; // if they differ, we're done. t := hi1.Type if nl.Op == gc.OLITERAL || nr.Op == gc.OLITERAL { gins(x86.ACMPL, &hi1, &hi2) } else { gc.Regalloc(&rr, gc.Types[gc.TINT32], nil) gins(x86.AMOVL, &hi1, &rr) gins(x86.ACMPL, &rr, &hi2) gc.Regfree(&rr) } var br *obj.Prog switch op { default: gc.Fatalf("cmp64 %v %v", gc.Oconv(int(op), 0), t) // cmp hi // jne L // cmp lo // jeq to // L: case gc.OEQ: br = gc.Gbranch(x86.AJNE, nil, -likely) // cmp hi // jne to // cmp lo // jne to case gc.ONE: gc.Patch(gc.Gbranch(x86.AJNE, nil, likely), to) // cmp hi // jgt to // jlt L // cmp lo // jge to (or jgt to) // L: case gc.OGE, gc.OGT: gc.Patch(gc.Gbranch(optoas(gc.OGT, t), nil, likely), to) br = gc.Gbranch(optoas(gc.OLT, t), nil, -likely) // cmp hi // jlt to // jgt L // cmp lo // jle to (or jlt to) // L: case gc.OLE, gc.OLT: gc.Patch(gc.Gbranch(optoas(gc.OLT, t), nil, likely), to) br = gc.Gbranch(optoas(gc.OGT, t), nil, -likely) } // compare least significant word t = lo1.Type if nl.Op == gc.OLITERAL || nr.Op == gc.OLITERAL { gins(x86.ACMPL, &lo1, &lo2) } else { gc.Regalloc(&rr, gc.Types[gc.TINT32], nil) gins(x86.AMOVL, &lo1, &rr) gins(x86.ACMPL, &rr, &lo2) gc.Regfree(&rr) } // jump again gc.Patch(gc.Gbranch(optoas(op, t), nil, likely), to) // point first branch down here if appropriate if br != nil { gc.Patch(br, gc.Pc) } splitclean() splitclean() }
/* * generate move: * t = f * hard part is conversions. */ func gmove(f *gc.Node, t *gc.Node) { if gc.Debug['M'] != 0 { fmt.Printf("gmove %v -> %v\n", gc.Nconv(f, obj.FmtLong), gc.Nconv(t, obj.FmtLong)) } ft := int(gc.Simsimtype(f.Type)) tt := int(gc.Simsimtype(t.Type)) cvt := (*gc.Type)(t.Type) if gc.Iscomplex[ft] || gc.Iscomplex[tt] { gc.Complexmove(f, t) return } // cannot have two memory operands var r2 gc.Node var r1 gc.Node var a int if gc.Ismem(f) && gc.Ismem(t) { goto hard } // convert constant to desired type if f.Op == gc.OLITERAL { var con gc.Node switch tt { default: f.Convconst(&con, t.Type) case gc.TINT32, gc.TINT16, gc.TINT8: var con gc.Node f.Convconst(&con, gc.Types[gc.TINT64]) var r1 gc.Node gc.Regalloc(&r1, con.Type, t) gins(ppc64.AMOVD, &con, &r1) gmove(&r1, t) gc.Regfree(&r1) return case gc.TUINT32, gc.TUINT16, gc.TUINT8: var con gc.Node f.Convconst(&con, gc.Types[gc.TUINT64]) var r1 gc.Node gc.Regalloc(&r1, con.Type, t) gins(ppc64.AMOVD, &con, &r1) gmove(&r1, t) gc.Regfree(&r1) return } f = &con ft = tt // so big switch will choose a simple mov // constants can't move directly to memory. if gc.Ismem(t) { goto hard } } // float constants come from memory. //if(isfloat[tt]) // goto hard; // 64-bit immediates are also from memory. //if(isint[tt]) // goto hard; //// 64-bit immediates are really 32-bit sign-extended //// unless moving into a register. //if(isint[tt]) { // if(mpcmpfixfix(con.val.u.xval, minintval[TINT32]) < 0) // goto hard; // if(mpcmpfixfix(con.val.u.xval, maxintval[TINT32]) > 0) // goto hard; //} // value -> value copy, only one memory operand. // figure out the instruction to use. // break out of switch for one-instruction gins. // goto rdst for "destination must be register". // goto hard for "convert to cvt type first". // otherwise handle and return. switch uint32(ft)<<16 | uint32(tt) { default: gc.Fatalf("gmove %v -> %v", gc.Tconv(f.Type, obj.FmtLong), gc.Tconv(t.Type, obj.FmtLong)) /* * integer copy and truncate */ case gc.TINT8<<16 | gc.TINT8, // same size gc.TUINT8<<16 | gc.TINT8, gc.TINT16<<16 | gc.TINT8, // truncate gc.TUINT16<<16 | gc.TINT8, gc.TINT32<<16 | gc.TINT8, gc.TUINT32<<16 | gc.TINT8, gc.TINT64<<16 | gc.TINT8, gc.TUINT64<<16 | gc.TINT8: a = ppc64.AMOVB case gc.TINT8<<16 | gc.TUINT8, // same size gc.TUINT8<<16 | gc.TUINT8, gc.TINT16<<16 | gc.TUINT8, // truncate gc.TUINT16<<16 | gc.TUINT8, gc.TINT32<<16 | gc.TUINT8, gc.TUINT32<<16 | gc.TUINT8, gc.TINT64<<16 | gc.TUINT8, gc.TUINT64<<16 | gc.TUINT8: a = ppc64.AMOVBZ case gc.TINT16<<16 | gc.TINT16, // same size gc.TUINT16<<16 | gc.TINT16, gc.TINT32<<16 | gc.TINT16, // truncate gc.TUINT32<<16 | gc.TINT16, gc.TINT64<<16 | gc.TINT16, gc.TUINT64<<16 | gc.TINT16: a = ppc64.AMOVH case gc.TINT16<<16 | gc.TUINT16, // same size gc.TUINT16<<16 | gc.TUINT16, gc.TINT32<<16 | gc.TUINT16, // truncate gc.TUINT32<<16 | gc.TUINT16, gc.TINT64<<16 | gc.TUINT16, gc.TUINT64<<16 | gc.TUINT16: a = ppc64.AMOVHZ case gc.TINT32<<16 | gc.TINT32, // same size gc.TUINT32<<16 | gc.TINT32, gc.TINT64<<16 | gc.TINT32, // truncate gc.TUINT64<<16 | gc.TINT32: a = ppc64.AMOVW case gc.TINT32<<16 | gc.TUINT32, // same size gc.TUINT32<<16 | gc.TUINT32, gc.TINT64<<16 | gc.TUINT32, gc.TUINT64<<16 | gc.TUINT32: a = ppc64.AMOVWZ case gc.TINT64<<16 | gc.TINT64, // same size gc.TINT64<<16 | gc.TUINT64, gc.TUINT64<<16 | gc.TINT64, gc.TUINT64<<16 | gc.TUINT64: a = ppc64.AMOVD /* * integer up-conversions */ case gc.TINT8<<16 | gc.TINT16, // sign extend int8 gc.TINT8<<16 | gc.TUINT16, gc.TINT8<<16 | gc.TINT32, gc.TINT8<<16 | gc.TUINT32, gc.TINT8<<16 | gc.TINT64, gc.TINT8<<16 | gc.TUINT64: a = ppc64.AMOVB goto rdst case gc.TUINT8<<16 | gc.TINT16, // zero extend uint8 gc.TUINT8<<16 | gc.TUINT16, gc.TUINT8<<16 | gc.TINT32, gc.TUINT8<<16 | gc.TUINT32, gc.TUINT8<<16 | gc.TINT64, gc.TUINT8<<16 | gc.TUINT64: a = ppc64.AMOVBZ goto rdst case gc.TINT16<<16 | gc.TINT32, // sign extend int16 gc.TINT16<<16 | gc.TUINT32, gc.TINT16<<16 | gc.TINT64, gc.TINT16<<16 | gc.TUINT64: a = ppc64.AMOVH goto rdst case gc.TUINT16<<16 | gc.TINT32, // zero extend uint16 gc.TUINT16<<16 | gc.TUINT32, gc.TUINT16<<16 | gc.TINT64, gc.TUINT16<<16 | gc.TUINT64: a = ppc64.AMOVHZ goto rdst case gc.TINT32<<16 | gc.TINT64, // sign extend int32 gc.TINT32<<16 | gc.TUINT64: a = ppc64.AMOVW goto rdst case gc.TUINT32<<16 | gc.TINT64, // zero extend uint32 gc.TUINT32<<16 | gc.TUINT64: a = ppc64.AMOVWZ goto rdst //warn("gmove: convert float to int not implemented: %N -> %N\n", f, t); //return; // algorithm is: // if small enough, use native float64 -> int64 conversion. // otherwise, subtract 2^63, convert, and add it back. /* * float to integer */ case gc.TFLOAT32<<16 | gc.TINT32, gc.TFLOAT64<<16 | gc.TINT32, gc.TFLOAT32<<16 | gc.TINT64, gc.TFLOAT64<<16 | gc.TINT64, gc.TFLOAT32<<16 | gc.TINT16, gc.TFLOAT32<<16 | gc.TINT8, gc.TFLOAT32<<16 | gc.TUINT16, gc.TFLOAT32<<16 | gc.TUINT8, gc.TFLOAT64<<16 | gc.TINT16, gc.TFLOAT64<<16 | gc.TINT8, gc.TFLOAT64<<16 | gc.TUINT16, gc.TFLOAT64<<16 | gc.TUINT8, gc.TFLOAT32<<16 | gc.TUINT32, gc.TFLOAT64<<16 | gc.TUINT32, gc.TFLOAT32<<16 | gc.TUINT64, gc.TFLOAT64<<16 | gc.TUINT64: bignodes() var r1 gc.Node gc.Regalloc(&r1, gc.Types[ft], f) gmove(f, &r1) if tt == gc.TUINT64 { gc.Regalloc(&r2, gc.Types[gc.TFLOAT64], nil) gmove(&bigf, &r2) gins(ppc64.AFCMPU, &r1, &r2) p1 := (*obj.Prog)(gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TFLOAT64]), nil, +1)) gins(ppc64.AFSUB, &r2, &r1) gc.Patch(p1, gc.Pc) gc.Regfree(&r2) } gc.Regalloc(&r2, gc.Types[gc.TFLOAT64], nil) var r3 gc.Node gc.Regalloc(&r3, gc.Types[gc.TINT64], t) gins(ppc64.AFCTIDZ, &r1, &r2) p1 := (*obj.Prog)(gins(ppc64.AFMOVD, &r2, nil)) p1.To.Type = obj.TYPE_MEM p1.To.Reg = ppc64.REGSP p1.To.Offset = -8 p1 = gins(ppc64.AMOVD, nil, &r3) p1.From.Type = obj.TYPE_MEM p1.From.Reg = ppc64.REGSP p1.From.Offset = -8 gc.Regfree(&r2) gc.Regfree(&r1) if tt == gc.TUINT64 { p1 := (*obj.Prog)(gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TFLOAT64]), nil, +1)) // use CR0 here again gc.Nodreg(&r1, gc.Types[gc.TINT64], ppc64.REGTMP) gins(ppc64.AMOVD, &bigi, &r1) gins(ppc64.AADD, &r1, &r3) gc.Patch(p1, gc.Pc) } gmove(&r3, t) gc.Regfree(&r3) return //warn("gmove: convert int to float not implemented: %N -> %N\n", f, t); //return; // algorithm is: // if small enough, use native int64 -> uint64 conversion. // otherwise, halve (rounding to odd?), convert, and double. /* * integer to float */ case gc.TINT32<<16 | gc.TFLOAT32, gc.TINT32<<16 | gc.TFLOAT64, gc.TINT64<<16 | gc.TFLOAT32, gc.TINT64<<16 | gc.TFLOAT64, gc.TINT16<<16 | gc.TFLOAT32, gc.TINT16<<16 | gc.TFLOAT64, gc.TINT8<<16 | gc.TFLOAT32, gc.TINT8<<16 | gc.TFLOAT64, gc.TUINT16<<16 | gc.TFLOAT32, gc.TUINT16<<16 | gc.TFLOAT64, gc.TUINT8<<16 | gc.TFLOAT32, gc.TUINT8<<16 | gc.TFLOAT64, gc.TUINT32<<16 | gc.TFLOAT32, gc.TUINT32<<16 | gc.TFLOAT64, gc.TUINT64<<16 | gc.TFLOAT32, gc.TUINT64<<16 | gc.TFLOAT64: bignodes() var r1 gc.Node gc.Regalloc(&r1, gc.Types[gc.TINT64], nil) gmove(f, &r1) if ft == gc.TUINT64 { gc.Nodreg(&r2, gc.Types[gc.TUINT64], ppc64.REGTMP) gmove(&bigi, &r2) gins(ppc64.ACMPU, &r1, &r2) p1 := (*obj.Prog)(gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT64]), nil, +1)) p2 := (*obj.Prog)(gins(ppc64.ASRD, nil, &r1)) p2.From.Type = obj.TYPE_CONST p2.From.Offset = 1 gc.Patch(p1, gc.Pc) } gc.Regalloc(&r2, gc.Types[gc.TFLOAT64], t) p1 := (*obj.Prog)(gins(ppc64.AMOVD, &r1, nil)) p1.To.Type = obj.TYPE_MEM p1.To.Reg = ppc64.REGSP p1.To.Offset = -8 p1 = gins(ppc64.AFMOVD, nil, &r2) p1.From.Type = obj.TYPE_MEM p1.From.Reg = ppc64.REGSP p1.From.Offset = -8 gins(ppc64.AFCFID, &r2, &r2) gc.Regfree(&r1) if ft == gc.TUINT64 { p1 := (*obj.Prog)(gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT64]), nil, +1)) // use CR0 here again gc.Nodreg(&r1, gc.Types[gc.TFLOAT64], ppc64.FREGTWO) gins(ppc64.AFMUL, &r1, &r2) gc.Patch(p1, gc.Pc) } gmove(&r2, t) gc.Regfree(&r2) return /* * float to float */ case gc.TFLOAT32<<16 | gc.TFLOAT32: a = ppc64.AFMOVS case gc.TFLOAT64<<16 | gc.TFLOAT64: a = ppc64.AFMOVD case gc.TFLOAT32<<16 | gc.TFLOAT64: a = ppc64.AFMOVS goto rdst case gc.TFLOAT64<<16 | gc.TFLOAT32: a = ppc64.AFRSP goto rdst } gins(a, f, t) return // requires register destination rdst: { gc.Regalloc(&r1, t.Type, t) gins(a, f, &r1) gmove(&r1, t) gc.Regfree(&r1) return } // requires register intermediate hard: gc.Regalloc(&r1, cvt, t) gmove(f, &r1) gmove(&r1, t) gc.Regfree(&r1) return }
/* * attempt to generate 64-bit * res = n * return 1 on success, 0 if op not handled. */ func cgen64(n *gc.Node, res *gc.Node) { if res.Op != gc.OINDREG && res.Op != gc.ONAME { gc.Dump("n", n) gc.Dump("res", res) gc.Fatalf("cgen64 %v of %v", gc.Oconv(int(n.Op), 0), gc.Oconv(int(res.Op), 0)) } switch n.Op { default: gc.Fatalf("cgen64 %v", gc.Oconv(int(n.Op), 0)) case gc.OMINUS: gc.Cgen(n.Left, res) var hi1 gc.Node var lo1 gc.Node split64(res, &lo1, &hi1) gins(x86.ANEGL, nil, &lo1) gins(x86.AADCL, ncon(0), &hi1) gins(x86.ANEGL, nil, &hi1) splitclean() return case gc.OCOM: gc.Cgen(n.Left, res) var lo1 gc.Node var hi1 gc.Node split64(res, &lo1, &hi1) gins(x86.ANOTL, nil, &lo1) gins(x86.ANOTL, nil, &hi1) splitclean() return // binary operators. // common setup below. case gc.OADD, gc.OSUB, gc.OMUL, gc.OLROT, gc.OLSH, gc.ORSH, gc.OAND, gc.OOR, gc.OXOR: break } l := n.Left r := n.Right if !l.Addable { var t1 gc.Node gc.Tempname(&t1, l.Type) gc.Cgen(l, &t1) l = &t1 } if r != nil && !r.Addable { var t2 gc.Node gc.Tempname(&t2, r.Type) gc.Cgen(r, &t2) r = &t2 } var ax gc.Node gc.Nodreg(&ax, gc.Types[gc.TINT32], x86.REG_AX) var cx gc.Node gc.Nodreg(&cx, gc.Types[gc.TINT32], x86.REG_CX) var dx gc.Node gc.Nodreg(&dx, gc.Types[gc.TINT32], x86.REG_DX) // Setup for binary operation. var hi1 gc.Node var lo1 gc.Node split64(l, &lo1, &hi1) var lo2 gc.Node var hi2 gc.Node if gc.Is64(r.Type) { split64(r, &lo2, &hi2) } // Do op. Leave result in DX:AX. switch n.Op { // TODO: Constants case gc.OADD: gins(x86.AMOVL, &lo1, &ax) gins(x86.AMOVL, &hi1, &dx) gins(x86.AADDL, &lo2, &ax) gins(x86.AADCL, &hi2, &dx) // TODO: Constants. case gc.OSUB: gins(x86.AMOVL, &lo1, &ax) gins(x86.AMOVL, &hi1, &dx) gins(x86.ASUBL, &lo2, &ax) gins(x86.ASBBL, &hi2, &dx) case gc.OMUL: // let's call the next three EX, FX and GX var ex, fx, gx gc.Node gc.Regalloc(&ex, gc.Types[gc.TPTR32], nil) gc.Regalloc(&fx, gc.Types[gc.TPTR32], nil) gc.Regalloc(&gx, gc.Types[gc.TPTR32], nil) // load args into DX:AX and EX:GX. gins(x86.AMOVL, &lo1, &ax) gins(x86.AMOVL, &hi1, &dx) gins(x86.AMOVL, &lo2, &gx) gins(x86.AMOVL, &hi2, &ex) // if DX and EX are zero, use 32 x 32 -> 64 unsigned multiply. gins(x86.AMOVL, &dx, &fx) gins(x86.AORL, &ex, &fx) p1 := gc.Gbranch(x86.AJNE, nil, 0) gins(x86.AMULL, &gx, nil) // implicit &ax p2 := gc.Gbranch(obj.AJMP, nil, 0) gc.Patch(p1, gc.Pc) // full 64x64 -> 64, from 32x32 -> 64. gins(x86.AIMULL, &gx, &dx) gins(x86.AMOVL, &ax, &fx) gins(x86.AIMULL, &ex, &fx) gins(x86.AADDL, &dx, &fx) gins(x86.AMOVL, &gx, &dx) gins(x86.AMULL, &dx, nil) // implicit &ax gins(x86.AADDL, &fx, &dx) gc.Patch(p2, gc.Pc) gc.Regfree(&ex) gc.Regfree(&fx) gc.Regfree(&gx) // We only rotate by a constant c in [0,64). // if c >= 32: // lo, hi = hi, lo // c -= 32 // if c == 0: // no-op // else: // t = hi // shld hi:lo, c // shld lo:t, c case gc.OLROT: v := uint64(r.Int()) if v >= 32 { // reverse during load to do the first 32 bits of rotate v -= 32 gins(x86.AMOVL, &lo1, &dx) gins(x86.AMOVL, &hi1, &ax) } else { gins(x86.AMOVL, &lo1, &ax) gins(x86.AMOVL, &hi1, &dx) } if v == 0 { } else // done { gins(x86.AMOVL, &dx, &cx) p1 := gins(x86.ASHLL, ncon(uint32(v)), &dx) p1.From.Index = x86.REG_AX // double-width shift p1.From.Scale = 0 p1 = gins(x86.ASHLL, ncon(uint32(v)), &ax) p1.From.Index = x86.REG_CX // double-width shift p1.From.Scale = 0 } case gc.OLSH: if r.Op == gc.OLITERAL { v := uint64(r.Int()) if v >= 64 { if gc.Is64(r.Type) { splitclean() } splitclean() split64(res, &lo2, &hi2) gins(x86.AMOVL, ncon(0), &lo2) gins(x86.AMOVL, ncon(0), &hi2) splitclean() return } if v >= 32 { if gc.Is64(r.Type) { splitclean() } split64(res, &lo2, &hi2) gmove(&lo1, &hi2) if v > 32 { gins(x86.ASHLL, ncon(uint32(v-32)), &hi2) } gins(x86.AMOVL, ncon(0), &lo2) splitclean() splitclean() return } // general shift gins(x86.AMOVL, &lo1, &ax) gins(x86.AMOVL, &hi1, &dx) p1 := gins(x86.ASHLL, ncon(uint32(v)), &dx) p1.From.Index = x86.REG_AX // double-width shift p1.From.Scale = 0 gins(x86.ASHLL, ncon(uint32(v)), &ax) break } // load value into DX:AX. gins(x86.AMOVL, &lo1, &ax) gins(x86.AMOVL, &hi1, &dx) // load shift value into register. // if high bits are set, zero value. var p1 *obj.Prog if gc.Is64(r.Type) { gins(x86.ACMPL, &hi2, ncon(0)) p1 = gc.Gbranch(x86.AJNE, nil, +1) gins(x86.AMOVL, &lo2, &cx) } else { cx.Type = gc.Types[gc.TUINT32] gmove(r, &cx) } // if shift count is >=64, zero value gins(x86.ACMPL, &cx, ncon(64)) p2 := gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1) if p1 != nil { gc.Patch(p1, gc.Pc) } gins(x86.AXORL, &dx, &dx) gins(x86.AXORL, &ax, &ax) gc.Patch(p2, gc.Pc) // if shift count is >= 32, zero low. gins(x86.ACMPL, &cx, ncon(32)) p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1) gins(x86.AMOVL, &ax, &dx) gins(x86.ASHLL, &cx, &dx) // SHLL only uses bottom 5 bits of count gins(x86.AXORL, &ax, &ax) p2 = gc.Gbranch(obj.AJMP, nil, 0) gc.Patch(p1, gc.Pc) // general shift p1 = gins(x86.ASHLL, &cx, &dx) p1.From.Index = x86.REG_AX // double-width shift p1.From.Scale = 0 gins(x86.ASHLL, &cx, &ax) gc.Patch(p2, gc.Pc) case gc.ORSH: if r.Op == gc.OLITERAL { v := uint64(r.Int()) if v >= 64 { if gc.Is64(r.Type) { splitclean() } splitclean() split64(res, &lo2, &hi2) if hi1.Type.Etype == gc.TINT32 { gmove(&hi1, &lo2) gins(x86.ASARL, ncon(31), &lo2) gmove(&hi1, &hi2) gins(x86.ASARL, ncon(31), &hi2) } else { gins(x86.AMOVL, ncon(0), &lo2) gins(x86.AMOVL, ncon(0), &hi2) } splitclean() return } if v >= 32 { if gc.Is64(r.Type) { splitclean() } split64(res, &lo2, &hi2) gmove(&hi1, &lo2) if v > 32 { gins(optoas(gc.ORSH, hi1.Type), ncon(uint32(v-32)), &lo2) } if hi1.Type.Etype == gc.TINT32 { gmove(&hi1, &hi2) gins(x86.ASARL, ncon(31), &hi2) } else { gins(x86.AMOVL, ncon(0), &hi2) } splitclean() splitclean() return } // general shift gins(x86.AMOVL, &lo1, &ax) gins(x86.AMOVL, &hi1, &dx) p1 := gins(x86.ASHRL, ncon(uint32(v)), &ax) p1.From.Index = x86.REG_DX // double-width shift p1.From.Scale = 0 gins(optoas(gc.ORSH, hi1.Type), ncon(uint32(v)), &dx) break } // load value into DX:AX. gins(x86.AMOVL, &lo1, &ax) gins(x86.AMOVL, &hi1, &dx) // load shift value into register. // if high bits are set, zero value. var p1 *obj.Prog if gc.Is64(r.Type) { gins(x86.ACMPL, &hi2, ncon(0)) p1 = gc.Gbranch(x86.AJNE, nil, +1) gins(x86.AMOVL, &lo2, &cx) } else { cx.Type = gc.Types[gc.TUINT32] gmove(r, &cx) } // if shift count is >=64, zero or sign-extend value gins(x86.ACMPL, &cx, ncon(64)) p2 := gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1) if p1 != nil { gc.Patch(p1, gc.Pc) } if hi1.Type.Etype == gc.TINT32 { gins(x86.ASARL, ncon(31), &dx) gins(x86.AMOVL, &dx, &ax) } else { gins(x86.AXORL, &dx, &dx) gins(x86.AXORL, &ax, &ax) } gc.Patch(p2, gc.Pc) // if shift count is >= 32, sign-extend hi. gins(x86.ACMPL, &cx, ncon(32)) p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1) gins(x86.AMOVL, &dx, &ax) if hi1.Type.Etype == gc.TINT32 { gins(x86.ASARL, &cx, &ax) // SARL only uses bottom 5 bits of count gins(x86.ASARL, ncon(31), &dx) } else { gins(x86.ASHRL, &cx, &ax) gins(x86.AXORL, &dx, &dx) } p2 = gc.Gbranch(obj.AJMP, nil, 0) gc.Patch(p1, gc.Pc) // general shift p1 = gins(x86.ASHRL, &cx, &ax) p1.From.Index = x86.REG_DX // double-width shift p1.From.Scale = 0 gins(optoas(gc.ORSH, hi1.Type), &cx, &dx) gc.Patch(p2, gc.Pc) // make constant the right side (it usually is anyway). case gc.OXOR, gc.OAND, gc.OOR: if lo1.Op == gc.OLITERAL { nswap(&lo1, &lo2) nswap(&hi1, &hi2) } if lo2.Op == gc.OLITERAL { // special cases for constants. lv := uint32(lo2.Int()) hv := uint32(hi2.Int()) splitclean() // right side split64(res, &lo2, &hi2) switch n.Op { case gc.OXOR: gmove(&lo1, &lo2) gmove(&hi1, &hi2) switch lv { case 0: break case 0xffffffff: gins(x86.ANOTL, nil, &lo2) default: gins(x86.AXORL, ncon(lv), &lo2) } switch hv { case 0: break case 0xffffffff: gins(x86.ANOTL, nil, &hi2) default: gins(x86.AXORL, ncon(hv), &hi2) } case gc.OAND: switch lv { case 0: gins(x86.AMOVL, ncon(0), &lo2) default: gmove(&lo1, &lo2) if lv != 0xffffffff { gins(x86.AANDL, ncon(lv), &lo2) } } switch hv { case 0: gins(x86.AMOVL, ncon(0), &hi2) default: gmove(&hi1, &hi2) if hv != 0xffffffff { gins(x86.AANDL, ncon(hv), &hi2) } } case gc.OOR: switch lv { case 0: gmove(&lo1, &lo2) case 0xffffffff: gins(x86.AMOVL, ncon(0xffffffff), &lo2) default: gmove(&lo1, &lo2) gins(x86.AORL, ncon(lv), &lo2) } switch hv { case 0: gmove(&hi1, &hi2) case 0xffffffff: gins(x86.AMOVL, ncon(0xffffffff), &hi2) default: gmove(&hi1, &hi2) gins(x86.AORL, ncon(hv), &hi2) } } splitclean() splitclean() return } gins(x86.AMOVL, &lo1, &ax) gins(x86.AMOVL, &hi1, &dx) gins(optoas(n.Op, lo1.Type), &lo2, &ax) gins(optoas(n.Op, lo1.Type), &hi2, &dx) } if gc.Is64(r.Type) { splitclean() } splitclean() split64(res, &lo1, &hi1) gins(x86.AMOVL, &ax, &lo1) gins(x86.AMOVL, &dx, &hi1) splitclean() }
func floatmove_387(f *gc.Node, t *gc.Node) { var r1 gc.Node var a int ft := gc.Simsimtype(f.Type) tt := gc.Simsimtype(t.Type) cvt := t.Type switch uint32(ft)<<16 | uint32(tt) { default: goto fatal /* * float to integer */ case gc.TFLOAT32<<16 | gc.TINT16, gc.TFLOAT32<<16 | gc.TINT32, gc.TFLOAT32<<16 | gc.TINT64, gc.TFLOAT64<<16 | gc.TINT16, gc.TFLOAT64<<16 | gc.TINT32, gc.TFLOAT64<<16 | gc.TINT64: if t.Op == gc.OREGISTER { goto hardmem } var r1 gc.Node gc.Nodreg(&r1, gc.Types[ft], x86.REG_F0) if f.Op != gc.OREGISTER { if ft == gc.TFLOAT32 { gins(x86.AFMOVF, f, &r1) } else { gins(x86.AFMOVD, f, &r1) } } // set round to zero mode during conversion var t1 gc.Node memname(&t1, gc.Types[gc.TUINT16]) var t2 gc.Node memname(&t2, gc.Types[gc.TUINT16]) gins(x86.AFSTCW, nil, &t1) gins(x86.AMOVW, ncon(0xf7f), &t2) gins(x86.AFLDCW, &t2, nil) if tt == gc.TINT16 { gins(x86.AFMOVWP, &r1, t) } else if tt == gc.TINT32 { gins(x86.AFMOVLP, &r1, t) } else { gins(x86.AFMOVVP, &r1, t) } gins(x86.AFLDCW, &t1, nil) return // convert via int32. case gc.TFLOAT32<<16 | gc.TINT8, gc.TFLOAT32<<16 | gc.TUINT16, gc.TFLOAT32<<16 | gc.TUINT8, gc.TFLOAT64<<16 | gc.TINT8, gc.TFLOAT64<<16 | gc.TUINT16, gc.TFLOAT64<<16 | gc.TUINT8: var t1 gc.Node gc.Tempname(&t1, gc.Types[gc.TINT32]) gmove(f, &t1) switch tt { default: gc.Fatalf("gmove %v", t) case gc.TINT8: gins(x86.ACMPL, &t1, ncon(-0x80&(1<<32-1))) p1 := gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TINT32]), nil, -1) gins(x86.ACMPL, &t1, ncon(0x7f)) p2 := gc.Gbranch(optoas(gc.OGT, gc.Types[gc.TINT32]), nil, -1) p3 := gc.Gbranch(obj.AJMP, nil, 0) gc.Patch(p1, gc.Pc) gc.Patch(p2, gc.Pc) gmove(ncon(-0x80&(1<<32-1)), &t1) gc.Patch(p3, gc.Pc) gmove(&t1, t) case gc.TUINT8: gins(x86.ATESTL, ncon(0xffffff00), &t1) p1 := gc.Gbranch(x86.AJEQ, nil, +1) gins(x86.AMOVL, ncon(0), &t1) gc.Patch(p1, gc.Pc) gmove(&t1, t) case gc.TUINT16: gins(x86.ATESTL, ncon(0xffff0000), &t1) p1 := gc.Gbranch(x86.AJEQ, nil, +1) gins(x86.AMOVL, ncon(0), &t1) gc.Patch(p1, gc.Pc) gmove(&t1, t) } return // convert via int64. case gc.TFLOAT32<<16 | gc.TUINT32, gc.TFLOAT64<<16 | gc.TUINT32: cvt = gc.Types[gc.TINT64] goto hardmem /* * integer to float */ case gc.TINT16<<16 | gc.TFLOAT32, gc.TINT16<<16 | gc.TFLOAT64, gc.TINT32<<16 | gc.TFLOAT32, gc.TINT32<<16 | gc.TFLOAT64, gc.TINT64<<16 | gc.TFLOAT32, gc.TINT64<<16 | gc.TFLOAT64: if t.Op != gc.OREGISTER { goto hard } if f.Op == gc.OREGISTER { cvt = f.Type goto hardmem } switch ft { case gc.TINT16: a = x86.AFMOVW case gc.TINT32: a = x86.AFMOVL default: a = x86.AFMOVV } // convert via int32 memory case gc.TINT8<<16 | gc.TFLOAT32, gc.TINT8<<16 | gc.TFLOAT64, gc.TUINT16<<16 | gc.TFLOAT32, gc.TUINT16<<16 | gc.TFLOAT64, gc.TUINT8<<16 | gc.TFLOAT32, gc.TUINT8<<16 | gc.TFLOAT64: cvt = gc.Types[gc.TINT32] goto hardmem // convert via int64 memory case gc.TUINT32<<16 | gc.TFLOAT32, gc.TUINT32<<16 | gc.TFLOAT64: cvt = gc.Types[gc.TINT64] goto hardmem // The way the code generator uses floating-point // registers, a move from F0 to F0 is intended as a no-op. // On the x86, it's not: it pushes a second copy of F0 // on the floating point stack. So toss it away here. // Also, F0 is the *only* register we ever evaluate // into, so we should only see register/register as F0/F0. /* * float to float */ case gc.TFLOAT32<<16 | gc.TFLOAT32, gc.TFLOAT64<<16 | gc.TFLOAT64: if gc.Ismem(f) && gc.Ismem(t) { goto hard } if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER { if f.Reg != x86.REG_F0 || t.Reg != x86.REG_F0 { goto fatal } return } a = x86.AFMOVF if ft == gc.TFLOAT64 { a = x86.AFMOVD } if gc.Ismem(t) { if f.Op != gc.OREGISTER || f.Reg != x86.REG_F0 { gc.Fatalf("gmove %v", f) } a = x86.AFMOVFP if ft == gc.TFLOAT64 { a = x86.AFMOVDP } } case gc.TFLOAT32<<16 | gc.TFLOAT64: if gc.Ismem(f) && gc.Ismem(t) { goto hard } if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER { if f.Reg != x86.REG_F0 || t.Reg != x86.REG_F0 { goto fatal } return } if f.Op == gc.OREGISTER { gins(x86.AFMOVDP, f, t) } else { gins(x86.AFMOVF, f, t) } return case gc.TFLOAT64<<16 | gc.TFLOAT32: if gc.Ismem(f) && gc.Ismem(t) { goto hard } if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER { var r1 gc.Node gc.Tempname(&r1, gc.Types[gc.TFLOAT32]) gins(x86.AFMOVFP, f, &r1) gins(x86.AFMOVF, &r1, t) return } if f.Op == gc.OREGISTER { gins(x86.AFMOVFP, f, t) } else { gins(x86.AFMOVD, f, t) } return } gins(a, f, t) return // requires register intermediate hard: gc.Regalloc(&r1, cvt, t) gmove(f, &r1) gmove(&r1, t) gc.Regfree(&r1) return // requires memory intermediate hardmem: gc.Tempname(&r1, cvt) gmove(f, &r1) gmove(&r1, t) return // should not happen fatal: gc.Fatalf("gmove %v -> %v", gc.Nconv(f, obj.FmtLong), gc.Nconv(t, obj.FmtLong)) return }
func blockcopy(n, res *gc.Node, osrc, odst, w int64) { // determine alignment. // want to avoid unaligned access, so have to use // smaller operations for less aligned types. // for example moving [4]byte must use 4 MOVB not 1 MOVW. align := int(n.Type.Align) var op int switch align { default: gc.Fatalf("sgen: invalid alignment %d for %v", align, n.Type) case 1: op = arm.AMOVB case 2: op = arm.AMOVH case 4: op = arm.AMOVW } if w%int64(align) != 0 { gc.Fatalf("sgen: unaligned size %d (align=%d) for %v", w, align, n.Type) } c := int32(w / int64(align)) if osrc%int64(align) != 0 || odst%int64(align) != 0 { gc.Fatalf("sgen: unaligned offset src %d or dst %d (align %d)", osrc, odst, align) } // if we are copying forward on the stack and // the src and dst overlap, then reverse direction dir := align if osrc < odst && int64(odst) < int64(osrc)+w { dir = -dir } if op == arm.AMOVW && !gc.Nacl && dir > 0 && c >= 4 && c <= 128 { var r0 gc.Node r0.Op = gc.OREGISTER r0.Reg = arm.REG_R0 var r1 gc.Node r1.Op = gc.OREGISTER r1.Reg = arm.REG_R0 + 1 var r2 gc.Node r2.Op = gc.OREGISTER r2.Reg = arm.REG_R0 + 2 var src gc.Node gc.Regalloc(&src, gc.Types[gc.Tptr], &r1) var dst gc.Node gc.Regalloc(&dst, gc.Types[gc.Tptr], &r2) if n.Ullman >= res.Ullman { // eval n first gc.Agen(n, &src) if res.Op == gc.ONAME { gc.Gvardef(res) } gc.Agen(res, &dst) } else { // eval res first if res.Op == gc.ONAME { gc.Gvardef(res) } gc.Agen(res, &dst) gc.Agen(n, &src) } var tmp gc.Node gc.Regalloc(&tmp, gc.Types[gc.Tptr], &r0) f := gc.Sysfunc("duffcopy") p := gins(obj.ADUFFCOPY, nil, f) gc.Afunclit(&p.To, f) // 8 and 128 = magic constants: see ../../runtime/asm_arm.s p.To.Offset = 8 * (128 - int64(c)) gc.Regfree(&tmp) gc.Regfree(&src) gc.Regfree(&dst) return } var dst gc.Node var src gc.Node if n.Ullman >= res.Ullman { gc.Agenr(n, &dst, res) // temporarily use dst gc.Regalloc(&src, gc.Types[gc.Tptr], nil) gins(arm.AMOVW, &dst, &src) if res.Op == gc.ONAME { gc.Gvardef(res) } gc.Agen(res, &dst) } else { if res.Op == gc.ONAME { gc.Gvardef(res) } gc.Agenr(res, &dst, res) gc.Agenr(n, &src, nil) } var tmp gc.Node gc.Regalloc(&tmp, gc.Types[gc.TUINT32], nil) // set up end marker var nend gc.Node if c >= 4 { gc.Regalloc(&nend, gc.Types[gc.TUINT32], nil) p := gins(arm.AMOVW, &src, &nend) p.From.Type = obj.TYPE_ADDR if dir < 0 { p.From.Offset = int64(dir) } else { p.From.Offset = w } } // move src and dest to the end of block if necessary if dir < 0 { p := gins(arm.AMOVW, &src, &src) p.From.Type = obj.TYPE_ADDR p.From.Offset = w + int64(dir) p = gins(arm.AMOVW, &dst, &dst) p.From.Type = obj.TYPE_ADDR p.From.Offset = w + int64(dir) } // move if c >= 4 { p := gins(op, &src, &tmp) p.From.Type = obj.TYPE_MEM p.From.Offset = int64(dir) p.Scond |= arm.C_PBIT ploop := p p = gins(op, &tmp, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(dir) p.Scond |= arm.C_PBIT p = gins(arm.ACMP, &src, nil) raddr(&nend, p) gc.Patch(gc.Gbranch(arm.ABNE, nil, 0), ploop) gc.Regfree(&nend) } else { var p *obj.Prog for { tmp14 := c c-- if tmp14 <= 0 { break } p = gins(op, &src, &tmp) p.From.Type = obj.TYPE_MEM p.From.Offset = int64(dir) p.Scond |= arm.C_PBIT p = gins(op, &tmp, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(dir) p.Scond |= arm.C_PBIT } } gc.Regfree(&dst) gc.Regfree(&src) gc.Regfree(&tmp) }
/* * generate shift according to op, one of: * res = nl << nr * res = nl >> nr */ func cgen_shift(op int, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) { a := int(optoas(op, nl.Type)) if nr.Op == gc.OLITERAL { var n1 gc.Node gc.Regalloc(&n1, nl.Type, res) gc.Cgen(nl, &n1) sc := uint64(nr.Int()) if sc >= uint64(nl.Type.Width*8) { // large shift gets 2 shifts by width-1 var n3 gc.Node gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1) gins(a, &n3, &n1) gins(a, &n3, &n1) } else { gins(a, nr, &n1) } gmove(&n1, res) gc.Regfree(&n1) return } if nl.Ullman >= gc.UINF { var n4 gc.Node gc.Tempname(&n4, nl.Type) gc.Cgen(nl, &n4) nl = &n4 } if nr.Ullman >= gc.UINF { var n5 gc.Node gc.Tempname(&n5, nr.Type) gc.Cgen(nr, &n5) nr = &n5 } // Allow either uint32 or uint64 as shift type, // to avoid unnecessary conversion from uint32 to uint64 // just to do the comparison. tcount := gc.Types[gc.Simtype[nr.Type.Etype]] if tcount.Etype < gc.TUINT32 { tcount = gc.Types[gc.TUINT32] } var n1 gc.Node gc.Regalloc(&n1, nr.Type, nil) // to hold the shift type in CX var n3 gc.Node gc.Regalloc(&n3, tcount, &n1) // to clear high bits of CX var n2 gc.Node gc.Regalloc(&n2, nl.Type, res) if nl.Ullman >= nr.Ullman { gc.Cgen(nl, &n2) gc.Cgen(nr, &n1) gmove(&n1, &n3) } else { gc.Cgen(nr, &n1) gmove(&n1, &n3) gc.Cgen(nl, &n2) } gc.Regfree(&n3) // test and fix up large shifts if !bounded { gc.Nodconst(&n3, tcount, nl.Type.Width*8) gins(optoas(gc.OCMP, tcount), &n1, &n3) p1 := (*obj.Prog)(gc.Gbranch(optoas(gc.OLT, tcount), nil, +1)) if op == gc.ORSH && gc.Issigned[nl.Type.Etype] { gc.Nodconst(&n3, gc.Types[gc.TUINT32], nl.Type.Width*8-1) gins(a, &n3, &n2) } else { gc.Nodconst(&n3, nl.Type, 0) gmove(&n3, &n2) } gc.Patch(p1, gc.Pc) } gins(a, &n1, &n2) gmove(&n2, res) gc.Regfree(&n1) gc.Regfree(&n2) }
/* * generate shift according to op, one of: * res = nl << nr * res = nl >> nr */ func cgen_shift(op int, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) { if nl.Type.Width > 4 { gc.Fatalf("cgen_shift %v", nl.Type) } w := int(nl.Type.Width * 8) if op == gc.OLROT { v := nr.Int() var n1 gc.Node gc.Regalloc(&n1, nl.Type, res) if w == 32 { gc.Cgen(nl, &n1) gshift(arm.AMOVW, &n1, arm.SHIFT_RR, int32(w)-int32(v), &n1) } else { var n2 gc.Node gc.Regalloc(&n2, nl.Type, nil) gc.Cgen(nl, &n2) gshift(arm.AMOVW, &n2, arm.SHIFT_LL, int32(v), &n1) gshift(arm.AORR, &n2, arm.SHIFT_LR, int32(w)-int32(v), &n1) gc.Regfree(&n2) // Ensure sign/zero-extended result. gins(optoas(gc.OAS, nl.Type), &n1, &n1) } gmove(&n1, res) gc.Regfree(&n1) return } if nr.Op == gc.OLITERAL { var n1 gc.Node gc.Regalloc(&n1, nl.Type, res) gc.Cgen(nl, &n1) sc := uint64(nr.Int()) if sc == 0 { } else // nothing to do if sc >= uint64(nl.Type.Width*8) { if op == gc.ORSH && gc.Issigned[nl.Type.Etype] { gshift(arm.AMOVW, &n1, arm.SHIFT_AR, int32(w), &n1) } else { gins(arm.AEOR, &n1, &n1) } } else { if op == gc.ORSH && gc.Issigned[nl.Type.Etype] { gshift(arm.AMOVW, &n1, arm.SHIFT_AR, int32(sc), &n1) } else if op == gc.ORSH { gshift(arm.AMOVW, &n1, arm.SHIFT_LR, int32(sc), &n1) // OLSH } else { gshift(arm.AMOVW, &n1, arm.SHIFT_LL, int32(sc), &n1) } } if w < 32 && op == gc.OLSH { gins(optoas(gc.OAS, nl.Type), &n1, &n1) } gmove(&n1, res) gc.Regfree(&n1) return } tr := nr.Type var t gc.Node var n1 gc.Node var n2 gc.Node var n3 gc.Node if tr.Width > 4 { var nt gc.Node gc.Tempname(&nt, nr.Type) if nl.Ullman >= nr.Ullman { gc.Regalloc(&n2, nl.Type, res) gc.Cgen(nl, &n2) gc.Cgen(nr, &nt) n1 = nt } else { gc.Cgen(nr, &nt) gc.Regalloc(&n2, nl.Type, res) gc.Cgen(nl, &n2) } var hi gc.Node var lo gc.Node split64(&nt, &lo, &hi) gc.Regalloc(&n1, gc.Types[gc.TUINT32], nil) gc.Regalloc(&n3, gc.Types[gc.TUINT32], nil) gmove(&lo, &n1) gmove(&hi, &n3) splitclean() gins(arm.ATST, &n3, nil) gc.Nodconst(&t, gc.Types[gc.TUINT32], int64(w)) p1 := gins(arm.AMOVW, &t, &n1) p1.Scond = arm.C_SCOND_NE tr = gc.Types[gc.TUINT32] gc.Regfree(&n3) } else { if nl.Ullman >= nr.Ullman { gc.Regalloc(&n2, nl.Type, res) gc.Cgen(nl, &n2) gc.Regalloc(&n1, nr.Type, nil) gc.Cgen(nr, &n1) } else { gc.Regalloc(&n1, nr.Type, nil) gc.Cgen(nr, &n1) gc.Regalloc(&n2, nl.Type, res) gc.Cgen(nl, &n2) } } // test for shift being 0 gins(arm.ATST, &n1, nil) p3 := gc.Gbranch(arm.ABEQ, nil, -1) // test and fix up large shifts // TODO: if(!bounded), don't emit some of this. gc.Regalloc(&n3, tr, nil) gc.Nodconst(&t, gc.Types[gc.TUINT32], int64(w)) gmove(&t, &n3) gins(arm.ACMP, &n1, &n3) if op == gc.ORSH { var p1 *obj.Prog var p2 *obj.Prog if gc.Issigned[nl.Type.Etype] { p1 = gshift(arm.AMOVW, &n2, arm.SHIFT_AR, int32(w)-1, &n2) p2 = gregshift(arm.AMOVW, &n2, arm.SHIFT_AR, &n1, &n2) } else { p1 = gins(arm.AEOR, &n2, &n2) p2 = gregshift(arm.AMOVW, &n2, arm.SHIFT_LR, &n1, &n2) } p1.Scond = arm.C_SCOND_HS p2.Scond = arm.C_SCOND_LO } else { p1 := gins(arm.AEOR, &n2, &n2) p2 := gregshift(arm.AMOVW, &n2, arm.SHIFT_LL, &n1, &n2) p1.Scond = arm.C_SCOND_HS p2.Scond = arm.C_SCOND_LO } gc.Regfree(&n3) gc.Patch(p3, gc.Pc) // Left-shift of smaller word must be sign/zero-extended. if w < 32 && op == gc.OLSH { gins(optoas(gc.OAS, nl.Type), &n2, &n2) } gmove(&n2, res) gc.Regfree(&n1) gc.Regfree(&n2) }
// clearfat clears (i.e. replaces with zeros) the value pointed to by nl. func clearfat(nl *gc.Node) { if gc.Debug['g'] != 0 { fmt.Printf("clearfat %v (%v, size: %d)\n", nl, nl.Type, nl.Type.Width) } // Avoid taking the address for simple enough types. if gc.Componentgen(nil, nl) { return } var dst gc.Node gc.Regalloc(&dst, gc.Types[gc.Tptr], nil) gc.Agen(nl, &dst) var boff int64 w := nl.Type.Width if w > clearLoopCutoff { // Generate a loop clearing 256 bytes per iteration using XCs. var end gc.Node gc.Regalloc(&end, gc.Types[gc.Tptr], nil) p := gins(s390x.AMOVD, &dst, &end) p.From.Type = obj.TYPE_ADDR p.From.Offset = w - (w % 256) p = gins(s390x.AXC, &dst, &dst) p.From.Type = obj.TYPE_MEM p.From.Offset = 0 p.To.Type = obj.TYPE_MEM p.To.Offset = 0 p.From3 = new(obj.Addr) p.From3.Offset = 256 p.From3.Type = obj.TYPE_CONST pl := p ginscon(s390x.AADD, 256, &dst) gins(s390x.ACMP, &dst, &end) gc.Patch(gc.Gbranch(s390x.ABNE, nil, 0), pl) gc.Regfree(&end) w = w % 256 } // Generate instructions to clear the remaining memory. for w > 0 { n := w // Can clear at most 256 bytes per instruction. if n > 256 { n = 256 } switch n { // Handle very small clears using moves. case 8, 4, 2, 1: ins := s390x.AMOVB switch n { case 8: ins = s390x.AMOVD case 4: ins = s390x.AMOVW case 2: ins = s390x.AMOVH } p := gins(ins, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = 0 p.To.Type = obj.TYPE_MEM p.To.Offset = boff // Handle clears that would require multiple moves with a XC. default: p := gins(s390x.AXC, &dst, &dst) p.From.Type = obj.TYPE_MEM p.From.Offset = boff p.To.Type = obj.TYPE_MEM p.To.Offset = boff p.From3 = new(obj.Addr) p.From3.Offset = n p.From3.Type = obj.TYPE_CONST } boff += n w -= n } gc.Regfree(&dst) }
// zerorange clears the stack in the given range. func zerorange(p *obj.Prog, frame int64, lo int64, hi int64) *obj.Prog { cnt := hi - lo if cnt == 0 { return p } // Adjust the frame to account for LR. frame += gc.Ctxt.FixedFrameSize() offset := frame + lo reg := int16(s390x.REGSP) // If the offset cannot fit in a 12-bit unsigned displacement then we // need to create a copy of the stack pointer that we can adjust. // We also need to do this if we are going to loop. if offset < 0 || offset > 4096-clearLoopCutoff || cnt > clearLoopCutoff { p = appendpp(p, s390x.AADD, obj.TYPE_CONST, 0, offset, obj.TYPE_REG, s390x.REGRT1, 0) p.Reg = int16(s390x.REGSP) reg = s390x.REGRT1 offset = 0 } // Generate a loop of large clears. if cnt > clearLoopCutoff { n := cnt - (cnt % 256) end := int16(s390x.REGRT2) p = appendpp(p, s390x.AADD, obj.TYPE_CONST, 0, offset+n, obj.TYPE_REG, end, 0) p.Reg = reg p = appendpp(p, s390x.AXC, obj.TYPE_MEM, reg, offset, obj.TYPE_MEM, reg, offset) p.From3 = new(obj.Addr) p.From3.Type = obj.TYPE_CONST p.From3.Offset = 256 pl := p p = appendpp(p, s390x.AADD, obj.TYPE_CONST, 0, 256, obj.TYPE_REG, reg, 0) p = appendpp(p, s390x.ACMP, obj.TYPE_REG, reg, 0, obj.TYPE_REG, end, 0) p = appendpp(p, s390x.ABNE, obj.TYPE_NONE, 0, 0, obj.TYPE_BRANCH, 0, 0) gc.Patch(p, pl) cnt -= n } // Generate remaining clear instructions without a loop. for cnt > 0 { n := cnt // Can clear at most 256 bytes per instruction. if n > 256 { n = 256 } switch n { // Handle very small clears with move instructions. case 8, 4, 2, 1: ins := s390x.AMOVB switch n { case 8: ins = s390x.AMOVD case 4: ins = s390x.AMOVW case 2: ins = s390x.AMOVH } p = appendpp(p, ins, obj.TYPE_CONST, 0, 0, obj.TYPE_MEM, reg, offset) // Handle clears that would require multiple move instructions with XC. default: p = appendpp(p, s390x.AXC, obj.TYPE_MEM, reg, offset, obj.TYPE_MEM, reg, offset) p.From3 = new(obj.Addr) p.From3.Type = obj.TYPE_CONST p.From3.Offset = n } cnt -= n offset += n } return p }
func bgen_float(n *gc.Node, wantTrue bool, likely int, to *obj.Prog) { nl := n.Left nr := n.Right op := n.Op if !wantTrue { // brcom is not valid on floats when NaN is involved. p1 := gc.Gbranch(obj.AJMP, nil, 0) p2 := gc.Gbranch(obj.AJMP, nil, 0) gc.Patch(p1, gc.Pc) // No need to avoid re-genning ninit. bgen_float(n, true, -likely, p2) gc.Patch(gc.Gbranch(obj.AJMP, nil, 0), to) gc.Patch(p2, gc.Pc) return } if gc.Thearch.Use387 { op = gc.Brrev(op) // because the args are stacked if op == gc.OGE || op == gc.OGT { // only < and <= work right with NaN; reverse if needed nl, nr = nr, nl op = gc.Brrev(op) } var ax, n2, tmp gc.Node gc.Nodreg(&tmp, nr.Type, x86.REG_F0) gc.Nodreg(&n2, nr.Type, x86.REG_F0+1) gc.Nodreg(&ax, gc.Types[gc.TUINT16], x86.REG_AX) if gc.Simsimtype(nr.Type) == gc.TFLOAT64 { if nl.Ullman > nr.Ullman { gc.Cgen(nl, &tmp) gc.Cgen(nr, &tmp) gins(x86.AFXCHD, &tmp, &n2) } else { gc.Cgen(nr, &tmp) gc.Cgen(nl, &tmp) } gins(x86.AFUCOMPP, &tmp, &n2) } else { // TODO(rsc): The moves back and forth to memory // here are for truncating the value to 32 bits. // This handles 32-bit comparison but presumably // all the other ops have the same problem. // We need to figure out what the right general // solution is, besides telling people to use float64. var t1 gc.Node gc.Tempname(&t1, gc.Types[gc.TFLOAT32]) var t2 gc.Node gc.Tempname(&t2, gc.Types[gc.TFLOAT32]) gc.Cgen(nr, &t1) gc.Cgen(nl, &t2) gmove(&t2, &tmp) gins(x86.AFCOMFP, &t1, &tmp) } gins(x86.AFSTSW, nil, &ax) gins(x86.ASAHF, nil, nil) } else { // Not 387 if !nl.Addable { nl = gc.CgenTemp(nl) } if !nr.Addable { nr = gc.CgenTemp(nr) } var n2 gc.Node gc.Regalloc(&n2, nr.Type, nil) gmove(nr, &n2) nr = &n2 if nl.Op != gc.OREGISTER { var n3 gc.Node gc.Regalloc(&n3, nl.Type, nil) gmove(nl, &n3) nl = &n3 } if op == gc.OGE || op == gc.OGT { // only < and <= work right with NopN; reverse if needed nl, nr = nr, nl op = gc.Brrev(op) } gins(foptoas(gc.OCMP, nr.Type, 0), nl, nr) if nl.Op == gc.OREGISTER { gc.Regfree(nl) } gc.Regfree(nr) } switch op { case gc.OEQ: // neither NE nor P p1 := gc.Gbranch(x86.AJNE, nil, -likely) p2 := gc.Gbranch(x86.AJPS, nil, -likely) gc.Patch(gc.Gbranch(obj.AJMP, nil, 0), to) gc.Patch(p1, gc.Pc) gc.Patch(p2, gc.Pc) case gc.ONE: // either NE or P gc.Patch(gc.Gbranch(x86.AJNE, nil, likely), to) gc.Patch(gc.Gbranch(x86.AJPS, nil, likely), to) default: gc.Patch(gc.Gbranch(optoas(op, nr.Type), nil, likely), to) } }
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { s.SetLineno(v.Line) switch v.Op { case ssa.OpInitMem: // memory arg needs no code case ssa.OpArg: // input args need no code case ssa.OpSP, ssa.OpSB, ssa.OpGetG: // nothing to do case ssa.OpSelect0, ssa.OpSelect1: // nothing to do case ssa.OpCopy, ssa.OpMIPSMOVWconvert, ssa.OpMIPSMOVWreg: t := v.Type if t.IsMemory() { return } x := v.Args[0].Reg() y := v.Reg() if x == y { return } as := mips.AMOVW if isFPreg(x) && isFPreg(y) { as = mips.AMOVF if t.Size() == 8 { as = mips.AMOVD } } p := gc.Prog(as) p.From.Type = obj.TYPE_REG p.From.Reg = x p.To.Type = obj.TYPE_REG p.To.Reg = y if isHILO(x) && isHILO(y) || isHILO(x) && isFPreg(y) || isFPreg(x) && isHILO(y) { // cannot move between special registers, use TMP as intermediate p.To.Reg = mips.REGTMP p = gc.Prog(mips.AMOVW) p.From.Type = obj.TYPE_REG p.From.Reg = mips.REGTMP p.To.Type = obj.TYPE_REG p.To.Reg = y } case ssa.OpMIPSMOVWnop: if v.Reg() != v.Args[0].Reg() { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } // nothing to do case ssa.OpLoadReg: if v.Type.IsFlags() { v.Fatalf("load flags not implemented: %v", v.LongString()) return } r := v.Reg() p := gc.Prog(loadByType(v.Type, r)) gc.AddrAuto(&p.From, v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = r if isHILO(r) { // cannot directly load, load to TMP and move p.To.Reg = mips.REGTMP p = gc.Prog(mips.AMOVW) p.From.Type = obj.TYPE_REG p.From.Reg = mips.REGTMP p.To.Type = obj.TYPE_REG p.To.Reg = r } case ssa.OpStoreReg: if v.Type.IsFlags() { v.Fatalf("store flags not implemented: %v", v.LongString()) return } r := v.Args[0].Reg() if isHILO(r) { // cannot directly store, move to TMP and store p := gc.Prog(mips.AMOVW) p.From.Type = obj.TYPE_REG p.From.Reg = r p.To.Type = obj.TYPE_REG p.To.Reg = mips.REGTMP r = mips.REGTMP } p := gc.Prog(storeByType(v.Type, r)) p.From.Type = obj.TYPE_REG p.From.Reg = r gc.AddrAuto(&p.To, v) case ssa.OpMIPSADD, ssa.OpMIPSSUB, ssa.OpMIPSAND, ssa.OpMIPSOR, ssa.OpMIPSXOR, ssa.OpMIPSNOR, ssa.OpMIPSSLL, ssa.OpMIPSSRL, ssa.OpMIPSSRA, ssa.OpMIPSADDF, ssa.OpMIPSADDD, ssa.OpMIPSSUBF, ssa.OpMIPSSUBD, ssa.OpMIPSMULF, ssa.OpMIPSMULD, ssa.OpMIPSDIVF, ssa.OpMIPSDIVD, ssa.OpMIPSMUL: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpMIPSSGT, ssa.OpMIPSSGTU: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpMIPSSGTzero, ssa.OpMIPSSGTUzero: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.Reg = mips.REGZERO p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpMIPSADDconst, ssa.OpMIPSSUBconst, ssa.OpMIPSANDconst, ssa.OpMIPSORconst, ssa.OpMIPSXORconst, ssa.OpMIPSNORconst, ssa.OpMIPSSLLconst, ssa.OpMIPSSRLconst, ssa.OpMIPSSRAconst, ssa.OpMIPSSGTconst, ssa.OpMIPSSGTUconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpMIPSMULT, ssa.OpMIPSMULTU, ssa.OpMIPSDIV, ssa.OpMIPSDIVU: // result in hi,lo p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.Reg = v.Args[0].Reg() case ssa.OpMIPSMOVWconst: r := v.Reg() p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = r if isFPreg(r) || isHILO(r) { // cannot move into FP or special registers, use TMP as intermediate p.To.Reg = mips.REGTMP p = gc.Prog(mips.AMOVW) p.From.Type = obj.TYPE_REG p.From.Reg = mips.REGTMP p.To.Type = obj.TYPE_REG p.To.Reg = r } case ssa.OpMIPSMOVFconst, ssa.OpMIPSMOVDconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_FCONST p.From.Val = math.Float64frombits(uint64(v.AuxInt)) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpMIPSCMOVZ: if v.Reg() != v.Args[0].Reg() { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[2].Reg() p.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpMIPSCMOVZzero: if v.Reg() != v.Args[0].Reg() { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.Reg = mips.REGZERO p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpMIPSCMPEQF, ssa.OpMIPSCMPEQD, ssa.OpMIPSCMPGEF, ssa.OpMIPSCMPGED, ssa.OpMIPSCMPGTF, ssa.OpMIPSCMPGTD: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.Reg = v.Args[1].Reg() case ssa.OpMIPSMOVWaddr: p := gc.Prog(mips.AMOVW) p.From.Type = obj.TYPE_ADDR var wantreg string // MOVW $sym+off(base), R // the assembler expands it as the following: // - base is SP: add constant offset to SP (R29) // when constant is large, tmp register (R23) may be used // - base is SB: load external address with relocation switch v.Aux.(type) { default: v.Fatalf("aux is of unknown type %T", v.Aux) case *ssa.ExternSymbol: wantreg = "SB" gc.AddAux(&p.From, v) case *ssa.ArgSymbol, *ssa.AutoSymbol: wantreg = "SP" gc.AddAux(&p.From, v) case nil: // No sym, just MOVW $off(SP), R wantreg = "SP" p.From.Reg = mips.REGSP p.From.Offset = v.AuxInt } if reg := v.Args[0].RegName(); reg != wantreg { v.Fatalf("bad reg %s for symbol type %T, want %s", reg, v.Aux, wantreg) } p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpMIPSMOVBload, ssa.OpMIPSMOVBUload, ssa.OpMIPSMOVHload, ssa.OpMIPSMOVHUload, ssa.OpMIPSMOVWload, ssa.OpMIPSMOVFload, ssa.OpMIPSMOVDload: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpMIPSMOVBstore, ssa.OpMIPSMOVHstore, ssa.OpMIPSMOVWstore, ssa.OpMIPSMOVFstore, ssa.OpMIPSMOVDstore: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() gc.AddAux(&p.To, v) case ssa.OpMIPSMOVBstorezero, ssa.OpMIPSMOVHstorezero, ssa.OpMIPSMOVWstorezero: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = mips.REGZERO p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() gc.AddAux(&p.To, v) case ssa.OpMIPSMOVBreg, ssa.OpMIPSMOVBUreg, ssa.OpMIPSMOVHreg, ssa.OpMIPSMOVHUreg: a := v.Args[0] for a.Op == ssa.OpCopy || a.Op == ssa.OpMIPSMOVWreg || a.Op == ssa.OpMIPSMOVWnop { a = a.Args[0] } if a.Op == ssa.OpLoadReg { t := a.Type switch { case v.Op == ssa.OpMIPSMOVBreg && t.Size() == 1 && t.IsSigned(), v.Op == ssa.OpMIPSMOVBUreg && t.Size() == 1 && !t.IsSigned(), v.Op == ssa.OpMIPSMOVHreg && t.Size() == 2 && t.IsSigned(), v.Op == ssa.OpMIPSMOVHUreg && t.Size() == 2 && !t.IsSigned(): // arg is a proper-typed load, already zero/sign-extended, don't extend again if v.Reg() == v.Args[0].Reg() { return } p := gc.Prog(mips.AMOVW) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() return default: } } fallthrough case ssa.OpMIPSMOVWF, ssa.OpMIPSMOVWD, ssa.OpMIPSTRUNCFW, ssa.OpMIPSTRUNCDW, ssa.OpMIPSMOVFD, ssa.OpMIPSMOVDF, ssa.OpMIPSNEGF, ssa.OpMIPSNEGD, ssa.OpMIPSSQRTD, ssa.OpMIPSCLZ: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpMIPSNEG: // SUB from REGZERO p := gc.Prog(mips.ASUBU) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.Reg = mips.REGZERO p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpMIPSLoweredZero: // SUBU $4, R1 // MOVW R0, 4(R1) // ADDU $4, R1 // BNE Rarg1, R1, -2(PC) // arg1 is the address of the last element to zero var sz int64 var mov obj.As switch { case v.AuxInt%4 == 0: sz = 4 mov = mips.AMOVW case v.AuxInt%2 == 0: sz = 2 mov = mips.AMOVH default: sz = 1 mov = mips.AMOVB } p := gc.Prog(mips.ASUBU) p.From.Type = obj.TYPE_CONST p.From.Offset = sz p.To.Type = obj.TYPE_REG p.To.Reg = mips.REG_R1 p2 := gc.Prog(mov) p2.From.Type = obj.TYPE_REG p2.From.Reg = mips.REGZERO p2.To.Type = obj.TYPE_MEM p2.To.Reg = mips.REG_R1 p2.To.Offset = sz p3 := gc.Prog(mips.AADDU) p3.From.Type = obj.TYPE_CONST p3.From.Offset = sz p3.To.Type = obj.TYPE_REG p3.To.Reg = mips.REG_R1 p4 := gc.Prog(mips.ABNE) p4.From.Type = obj.TYPE_REG p4.From.Reg = v.Args[1].Reg() p4.Reg = mips.REG_R1 p4.To.Type = obj.TYPE_BRANCH gc.Patch(p4, p2) case ssa.OpMIPSLoweredMove: // SUBU $4, R1 // MOVW 4(R1), Rtmp // MOVW Rtmp, (R2) // ADDU $4, R1 // ADDU $4, R2 // BNE Rarg2, R1, -4(PC) // arg2 is the address of the last element of src var sz int64 var mov obj.As switch { case v.AuxInt%4 == 0: sz = 4 mov = mips.AMOVW case v.AuxInt%2 == 0: sz = 2 mov = mips.AMOVH default: sz = 1 mov = mips.AMOVB } p := gc.Prog(mips.ASUBU) p.From.Type = obj.TYPE_CONST p.From.Offset = sz p.To.Type = obj.TYPE_REG p.To.Reg = mips.REG_R1 p2 := gc.Prog(mov) p2.From.Type = obj.TYPE_MEM p2.From.Reg = mips.REG_R1 p2.From.Offset = sz p2.To.Type = obj.TYPE_REG p2.To.Reg = mips.REGTMP p3 := gc.Prog(mov) p3.From.Type = obj.TYPE_REG p3.From.Reg = mips.REGTMP p3.To.Type = obj.TYPE_MEM p3.To.Reg = mips.REG_R2 p4 := gc.Prog(mips.AADDU) p4.From.Type = obj.TYPE_CONST p4.From.Offset = sz p4.To.Type = obj.TYPE_REG p4.To.Reg = mips.REG_R1 p5 := gc.Prog(mips.AADDU) p5.From.Type = obj.TYPE_CONST p5.From.Offset = sz p5.To.Type = obj.TYPE_REG p5.To.Reg = mips.REG_R2 p6 := gc.Prog(mips.ABNE) p6.From.Type = obj.TYPE_REG p6.From.Reg = v.Args[2].Reg() p6.Reg = mips.REG_R1 p6.To.Type = obj.TYPE_BRANCH gc.Patch(p6, p2) case ssa.OpMIPSCALLstatic: if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym { // Deferred calls will appear to be returning to // the CALL deferreturn(SB) that we are about to emit. // However, the stack trace code will show the line // of the instruction byte before the return PC. // To avoid that being an unrelated instruction, // insert an actual hardware NOP that will have the right line number. // This is different from obj.ANOP, which is a virtual no-op // that doesn't make it into the instruction stream. ginsnop() } p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym)) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpMIPSCALLclosure: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Offset = 0 p.To.Reg = v.Args[0].Reg() if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpMIPSCALLdefer: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Deferproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpMIPSCALLgo: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Newproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpMIPSCALLinter: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Offset = 0 p.To.Reg = v.Args[0].Reg() if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpMIPSLoweredAtomicLoad: gc.Prog(mips.ASYNC) p := gc.Prog(mips.AMOVW) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg0() gc.Prog(mips.ASYNC) case ssa.OpMIPSLoweredAtomicStore: gc.Prog(mips.ASYNC) p := gc.Prog(mips.AMOVW) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() gc.Prog(mips.ASYNC) case ssa.OpMIPSLoweredAtomicStorezero: gc.Prog(mips.ASYNC) p := gc.Prog(mips.AMOVW) p.From.Type = obj.TYPE_REG p.From.Reg = mips.REGZERO p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() gc.Prog(mips.ASYNC) case ssa.OpMIPSLoweredAtomicExchange: // SYNC // MOVW Rarg1, Rtmp // LL (Rarg0), Rout // SC Rtmp, (Rarg0) // BEQ Rtmp, -3(PC) // SYNC gc.Prog(mips.ASYNC) p := gc.Prog(mips.AMOVW) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = mips.REGTMP p1 := gc.Prog(mips.ALL) p1.From.Type = obj.TYPE_MEM p1.From.Reg = v.Args[0].Reg() p1.To.Type = obj.TYPE_REG p1.To.Reg = v.Reg0() p2 := gc.Prog(mips.ASC) p2.From.Type = obj.TYPE_REG p2.From.Reg = mips.REGTMP p2.To.Type = obj.TYPE_MEM p2.To.Reg = v.Args[0].Reg() p3 := gc.Prog(mips.ABEQ) p3.From.Type = obj.TYPE_REG p3.From.Reg = mips.REGTMP p3.To.Type = obj.TYPE_BRANCH gc.Patch(p3, p) gc.Prog(mips.ASYNC) case ssa.OpMIPSLoweredAtomicAdd: // SYNC // LL (Rarg0), Rout // ADDU Rarg1, Rout, Rtmp // SC Rtmp, (Rarg0) // BEQ Rtmp, -3(PC) // SYNC // ADDU Rarg1, Rout gc.Prog(mips.ASYNC) p := gc.Prog(mips.ALL) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg0() p1 := gc.Prog(mips.AADDU) p1.From.Type = obj.TYPE_REG p1.From.Reg = v.Args[1].Reg() p1.Reg = v.Reg0() p1.To.Type = obj.TYPE_REG p1.To.Reg = mips.REGTMP p2 := gc.Prog(mips.ASC) p2.From.Type = obj.TYPE_REG p2.From.Reg = mips.REGTMP p2.To.Type = obj.TYPE_MEM p2.To.Reg = v.Args[0].Reg() p3 := gc.Prog(mips.ABEQ) p3.From.Type = obj.TYPE_REG p3.From.Reg = mips.REGTMP p3.To.Type = obj.TYPE_BRANCH gc.Patch(p3, p) gc.Prog(mips.ASYNC) p4 := gc.Prog(mips.AADDU) p4.From.Type = obj.TYPE_REG p4.From.Reg = v.Args[1].Reg() p4.Reg = v.Reg0() p4.To.Type = obj.TYPE_REG p4.To.Reg = v.Reg0() case ssa.OpMIPSLoweredAtomicAddconst: // SYNC // LL (Rarg0), Rout // ADDU $auxInt, Rout, Rtmp // SC Rtmp, (Rarg0) // BEQ Rtmp, -3(PC) // SYNC // ADDU $auxInt, Rout gc.Prog(mips.ASYNC) p := gc.Prog(mips.ALL) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg0() p1 := gc.Prog(mips.AADDU) p1.From.Type = obj.TYPE_CONST p1.From.Offset = v.AuxInt p1.Reg = v.Reg0() p1.To.Type = obj.TYPE_REG p1.To.Reg = mips.REGTMP p2 := gc.Prog(mips.ASC) p2.From.Type = obj.TYPE_REG p2.From.Reg = mips.REGTMP p2.To.Type = obj.TYPE_MEM p2.To.Reg = v.Args[0].Reg() p3 := gc.Prog(mips.ABEQ) p3.From.Type = obj.TYPE_REG p3.From.Reg = mips.REGTMP p3.To.Type = obj.TYPE_BRANCH gc.Patch(p3, p) gc.Prog(mips.ASYNC) p4 := gc.Prog(mips.AADDU) p4.From.Type = obj.TYPE_CONST p4.From.Offset = v.AuxInt p4.Reg = v.Reg0() p4.To.Type = obj.TYPE_REG p4.To.Reg = v.Reg0() case ssa.OpMIPSLoweredAtomicAnd, ssa.OpMIPSLoweredAtomicOr: // SYNC // LL (Rarg0), Rtmp // AND/OR Rarg1, Rtmp // SC Rtmp, (Rarg0) // BEQ Rtmp, -3(PC) // SYNC gc.Prog(mips.ASYNC) p := gc.Prog(mips.ALL) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = mips.REGTMP p1 := gc.Prog(v.Op.Asm()) p1.From.Type = obj.TYPE_REG p1.From.Reg = v.Args[1].Reg() p1.Reg = mips.REGTMP p1.To.Type = obj.TYPE_REG p1.To.Reg = mips.REGTMP p2 := gc.Prog(mips.ASC) p2.From.Type = obj.TYPE_REG p2.From.Reg = mips.REGTMP p2.To.Type = obj.TYPE_MEM p2.To.Reg = v.Args[0].Reg() p3 := gc.Prog(mips.ABEQ) p3.From.Type = obj.TYPE_REG p3.From.Reg = mips.REGTMP p3.To.Type = obj.TYPE_BRANCH gc.Patch(p3, p) gc.Prog(mips.ASYNC) case ssa.OpMIPSLoweredAtomicCas: // MOVW $0, Rout // SYNC // LL (Rarg0), Rtmp // BNE Rtmp, Rarg1, 4(PC) // MOVW Rarg2, Rout // SC Rout, (Rarg0) // BEQ Rout, -4(PC) // SYNC p := gc.Prog(mips.AMOVW) p.From.Type = obj.TYPE_REG p.From.Reg = mips.REGZERO p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg0() gc.Prog(mips.ASYNC) p1 := gc.Prog(mips.ALL) p1.From.Type = obj.TYPE_MEM p1.From.Reg = v.Args[0].Reg() p1.To.Type = obj.TYPE_REG p1.To.Reg = mips.REGTMP p2 := gc.Prog(mips.ABNE) p2.From.Type = obj.TYPE_REG p2.From.Reg = v.Args[1].Reg() p2.Reg = mips.REGTMP p2.To.Type = obj.TYPE_BRANCH p3 := gc.Prog(mips.AMOVW) p3.From.Type = obj.TYPE_REG p3.From.Reg = v.Args[2].Reg() p3.To.Type = obj.TYPE_REG p3.To.Reg = v.Reg0() p4 := gc.Prog(mips.ASC) p4.From.Type = obj.TYPE_REG p4.From.Reg = v.Reg0() p4.To.Type = obj.TYPE_MEM p4.To.Reg = v.Args[0].Reg() p5 := gc.Prog(mips.ABEQ) p5.From.Type = obj.TYPE_REG p5.From.Reg = v.Reg0() p5.To.Type = obj.TYPE_BRANCH gc.Patch(p5, p1) gc.Prog(mips.ASYNC) p6 := gc.Prog(obj.ANOP) gc.Patch(p2, p6) case ssa.OpVarDef: gc.Gvardef(v.Aux.(*gc.Node)) case ssa.OpVarKill: gc.Gvarkill(v.Aux.(*gc.Node)) case ssa.OpVarLive: gc.Gvarlive(v.Aux.(*gc.Node)) case ssa.OpKeepAlive: gc.KeepAlive(v) case ssa.OpPhi: gc.CheckLoweredPhi(v) case ssa.OpMIPSLoweredNilCheck: // Issue a load which will fault if arg is nil. p := gc.Prog(mips.AMOVB) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = mips.REGTMP if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers gc.Warnl(v.Line, "generated nil check") } case ssa.OpMIPSFPFlagTrue, ssa.OpMIPSFPFlagFalse: // MOVW $1, r // CMOVF R0, r cmov := mips.ACMOVF if v.Op == ssa.OpMIPSFPFlagFalse { cmov = mips.ACMOVT } p := gc.Prog(mips.AMOVW) p.From.Type = obj.TYPE_CONST p.From.Offset = 1 p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() p1 := gc.Prog(cmov) p1.From.Type = obj.TYPE_REG p1.From.Reg = mips.REGZERO p1.To.Type = obj.TYPE_REG p1.To.Reg = v.Reg() case ssa.OpMIPSLoweredGetClosurePtr: // Closure pointer is R22 (mips.REGCTXT). gc.CheckLoweredGetClosurePtr(v) default: v.Fatalf("genValue not implemented: %s", v.LongString()) } }
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { s.SetLineno(v.Line) switch v.Op { case ssa.OpInitMem: // memory arg needs no code case ssa.OpArg: // input args need no code case ssa.OpSP, ssa.OpSB, ssa.OpGetG: // nothing to do case ssa.OpCopy, ssa.OpARMMOVWconvert, ssa.OpARMMOVWreg: if v.Type.IsMemory() { return } x := v.Args[0].Reg() y := v.Reg() if x == y { return } as := arm.AMOVW if v.Type.IsFloat() { switch v.Type.Size() { case 4: as = arm.AMOVF case 8: as = arm.AMOVD default: panic("bad float size") } } p := gc.Prog(as) p.From.Type = obj.TYPE_REG p.From.Reg = x p.To.Type = obj.TYPE_REG p.To.Reg = y case ssa.OpARMMOVWnop: if v.Reg() != v.Args[0].Reg() { v.Fatalf("input[0] and output not in same register %s", v.LongString()) } // nothing to do case ssa.OpLoadReg: if v.Type.IsFlags() { v.Fatalf("load flags not implemented: %v", v.LongString()) return } p := gc.Prog(loadByType(v.Type)) gc.AddrAuto(&p.From, v.Args[0]) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpPhi: gc.CheckLoweredPhi(v) case ssa.OpStoreReg: if v.Type.IsFlags() { v.Fatalf("store flags not implemented: %v", v.LongString()) return } p := gc.Prog(storeByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() gc.AddrAuto(&p.To, v) case ssa.OpARMUDIVrtcall: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = obj.Linklookup(gc.Ctxt, "udiv", 0) case ssa.OpARMADD, ssa.OpARMADC, ssa.OpARMSUB, ssa.OpARMSBC, ssa.OpARMRSB, ssa.OpARMAND, ssa.OpARMOR, ssa.OpARMXOR, ssa.OpARMBIC, ssa.OpARMMUL, ssa.OpARMADDF, ssa.OpARMADDD, ssa.OpARMSUBF, ssa.OpARMSUBD, ssa.OpARMMULF, ssa.OpARMMULD, ssa.OpARMDIVF, ssa.OpARMDIVD: r := v.Reg() r1 := v.Args[0].Reg() r2 := v.Args[1].Reg() p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpARMADDS, ssa.OpARMSUBS: r := v.Reg0() r1 := v.Args[0].Reg() r2 := v.Args[1].Reg() p := gc.Prog(v.Op.Asm()) p.Scond = arm.C_SBIT p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpARMSLL, ssa.OpARMSRL, ssa.OpARMSRA: r := v.Reg() r1 := v.Args[0].Reg() r2 := v.Args[1].Reg() p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpARMSRAcond: // ARM shift instructions uses only the low-order byte of the shift amount // generate conditional instructions to deal with large shifts // flag is already set // SRA.HS $31, Rarg0, Rdst // shift 31 bits to get the sign bit // SRA.LO Rarg1, Rarg0, Rdst r := v.Reg() r1 := v.Args[0].Reg() r2 := v.Args[1].Reg() p := gc.Prog(arm.ASRA) p.Scond = arm.C_SCOND_HS p.From.Type = obj.TYPE_CONST p.From.Offset = 31 p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r p = gc.Prog(arm.ASRA) p.Scond = arm.C_SCOND_LO p.From.Type = obj.TYPE_REG p.From.Reg = r2 p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpARMADDconst, ssa.OpARMADCconst, ssa.OpARMSUBconst, ssa.OpARMSBCconst, ssa.OpARMRSBconst, ssa.OpARMRSCconst, ssa.OpARMANDconst, ssa.OpARMORconst, ssa.OpARMXORconst, ssa.OpARMBICconst, ssa.OpARMSLLconst, ssa.OpARMSRLconst, ssa.OpARMSRAconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpARMADDSconst, ssa.OpARMSUBSconst, ssa.OpARMRSBSconst: p := gc.Prog(v.Op.Asm()) p.Scond = arm.C_SBIT p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg0() case ssa.OpARMSRRconst: genshift(arm.AMOVW, 0, v.Args[0].Reg(), v.Reg(), arm.SHIFT_RR, v.AuxInt) case ssa.OpARMADDshiftLL, ssa.OpARMADCshiftLL, ssa.OpARMSUBshiftLL, ssa.OpARMSBCshiftLL, ssa.OpARMRSBshiftLL, ssa.OpARMRSCshiftLL, ssa.OpARMANDshiftLL, ssa.OpARMORshiftLL, ssa.OpARMXORshiftLL, ssa.OpARMBICshiftLL: genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm.SHIFT_LL, v.AuxInt) case ssa.OpARMADDSshiftLL, ssa.OpARMSUBSshiftLL, ssa.OpARMRSBSshiftLL: p := genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg0(), arm.SHIFT_LL, v.AuxInt) p.Scond = arm.C_SBIT case ssa.OpARMADDshiftRL, ssa.OpARMADCshiftRL, ssa.OpARMSUBshiftRL, ssa.OpARMSBCshiftRL, ssa.OpARMRSBshiftRL, ssa.OpARMRSCshiftRL, ssa.OpARMANDshiftRL, ssa.OpARMORshiftRL, ssa.OpARMXORshiftRL, ssa.OpARMBICshiftRL: genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm.SHIFT_LR, v.AuxInt) case ssa.OpARMADDSshiftRL, ssa.OpARMSUBSshiftRL, ssa.OpARMRSBSshiftRL: p := genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg0(), arm.SHIFT_LR, v.AuxInt) p.Scond = arm.C_SBIT case ssa.OpARMADDshiftRA, ssa.OpARMADCshiftRA, ssa.OpARMSUBshiftRA, ssa.OpARMSBCshiftRA, ssa.OpARMRSBshiftRA, ssa.OpARMRSCshiftRA, ssa.OpARMANDshiftRA, ssa.OpARMORshiftRA, ssa.OpARMXORshiftRA, ssa.OpARMBICshiftRA: genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm.SHIFT_AR, v.AuxInt) case ssa.OpARMADDSshiftRA, ssa.OpARMSUBSshiftRA, ssa.OpARMRSBSshiftRA: p := genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg0(), arm.SHIFT_AR, v.AuxInt) p.Scond = arm.C_SBIT case ssa.OpARMXORshiftRR: genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm.SHIFT_RR, v.AuxInt) case ssa.OpARMMVNshiftLL: genshift(v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm.SHIFT_LL, v.AuxInt) case ssa.OpARMMVNshiftRL: genshift(v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm.SHIFT_LR, v.AuxInt) case ssa.OpARMMVNshiftRA: genshift(v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm.SHIFT_AR, v.AuxInt) case ssa.OpARMMVNshiftLLreg: genregshift(v.Op.Asm(), 0, v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm.SHIFT_LL) case ssa.OpARMMVNshiftRLreg: genregshift(v.Op.Asm(), 0, v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm.SHIFT_LR) case ssa.OpARMMVNshiftRAreg: genregshift(v.Op.Asm(), 0, v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm.SHIFT_AR) case ssa.OpARMADDshiftLLreg, ssa.OpARMADCshiftLLreg, ssa.OpARMSUBshiftLLreg, ssa.OpARMSBCshiftLLreg, ssa.OpARMRSBshiftLLreg, ssa.OpARMRSCshiftLLreg, ssa.OpARMANDshiftLLreg, ssa.OpARMORshiftLLreg, ssa.OpARMXORshiftLLreg, ssa.OpARMBICshiftLLreg: genregshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg(), v.Reg(), arm.SHIFT_LL) case ssa.OpARMADDSshiftLLreg, ssa.OpARMSUBSshiftLLreg, ssa.OpARMRSBSshiftLLreg: p := genregshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg(), v.Reg0(), arm.SHIFT_LL) p.Scond = arm.C_SBIT case ssa.OpARMADDshiftRLreg, ssa.OpARMADCshiftRLreg, ssa.OpARMSUBshiftRLreg, ssa.OpARMSBCshiftRLreg, ssa.OpARMRSBshiftRLreg, ssa.OpARMRSCshiftRLreg, ssa.OpARMANDshiftRLreg, ssa.OpARMORshiftRLreg, ssa.OpARMXORshiftRLreg, ssa.OpARMBICshiftRLreg: genregshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg(), v.Reg(), arm.SHIFT_LR) case ssa.OpARMADDSshiftRLreg, ssa.OpARMSUBSshiftRLreg, ssa.OpARMRSBSshiftRLreg: p := genregshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg(), v.Reg0(), arm.SHIFT_LR) p.Scond = arm.C_SBIT case ssa.OpARMADDshiftRAreg, ssa.OpARMADCshiftRAreg, ssa.OpARMSUBshiftRAreg, ssa.OpARMSBCshiftRAreg, ssa.OpARMRSBshiftRAreg, ssa.OpARMRSCshiftRAreg, ssa.OpARMANDshiftRAreg, ssa.OpARMORshiftRAreg, ssa.OpARMXORshiftRAreg, ssa.OpARMBICshiftRAreg: genregshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg(), v.Reg(), arm.SHIFT_AR) case ssa.OpARMADDSshiftRAreg, ssa.OpARMSUBSshiftRAreg, ssa.OpARMRSBSshiftRAreg: p := genregshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg(), v.Reg0(), arm.SHIFT_AR) p.Scond = arm.C_SBIT case ssa.OpARMHMUL, ssa.OpARMHMULU: // 32-bit high multiplication p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_REGREG p.To.Reg = v.Reg() p.To.Offset = arm.REGTMP // throw away low 32-bit into tmp register case ssa.OpARMMULLU: // 32-bit multiplication, results 64-bit, high 32-bit in out0, low 32-bit in out1 p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_REGREG p.To.Reg = v.Reg0() // high 32-bit p.To.Offset = int64(v.Reg1()) // low 32-bit case ssa.OpARMMULA: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_REGREG2 p.To.Reg = v.Reg() // result p.To.Offset = int64(v.Args[2].Reg()) // addend case ssa.OpARMMOVWconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpARMMOVFconst, ssa.OpARMMOVDconst: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_FCONST p.From.Val = math.Float64frombits(uint64(v.AuxInt)) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpARMCMP, ssa.OpARMCMN, ssa.OpARMTST, ssa.OpARMTEQ, ssa.OpARMCMPF, ssa.OpARMCMPD: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG // Special layout in ARM assembly // Comparing to x86, the operands of ARM's CMP are reversed. p.From.Reg = v.Args[1].Reg() p.Reg = v.Args[0].Reg() case ssa.OpARMCMPconst, ssa.OpARMCMNconst, ssa.OpARMTSTconst, ssa.OpARMTEQconst: // Special layout in ARM assembly p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.Reg = v.Args[0].Reg() case ssa.OpARMCMPF0, ssa.OpARMCMPD0: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() case ssa.OpARMCMPshiftLL: genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm.SHIFT_LL, v.AuxInt) case ssa.OpARMCMPshiftRL: genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm.SHIFT_LR, v.AuxInt) case ssa.OpARMCMPshiftRA: genshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm.SHIFT_AR, v.AuxInt) case ssa.OpARMCMPshiftLLreg: genregshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg(), 0, arm.SHIFT_LL) case ssa.OpARMCMPshiftRLreg: genregshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg(), 0, arm.SHIFT_LR) case ssa.OpARMCMPshiftRAreg: genregshift(v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg(), 0, arm.SHIFT_AR) case ssa.OpARMMOVWaddr: p := gc.Prog(arm.AMOVW) p.From.Type = obj.TYPE_ADDR p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() var wantreg string // MOVW $sym+off(base), R // the assembler expands it as the following: // - base is SP: add constant offset to SP (R13) // when constant is large, tmp register (R11) may be used // - base is SB: load external address from constant pool (use relocation) switch v.Aux.(type) { default: v.Fatalf("aux is of unknown type %T", v.Aux) case *ssa.ExternSymbol: wantreg = "SB" gc.AddAux(&p.From, v) case *ssa.ArgSymbol, *ssa.AutoSymbol: wantreg = "SP" gc.AddAux(&p.From, v) case nil: // No sym, just MOVW $off(SP), R wantreg = "SP" p.From.Reg = arm.REGSP p.From.Offset = v.AuxInt } if reg := v.Args[0].RegName(); reg != wantreg { v.Fatalf("bad reg %s for symbol type %T, want %s", reg, v.Aux, wantreg) } case ssa.OpARMMOVBload, ssa.OpARMMOVBUload, ssa.OpARMMOVHload, ssa.OpARMMOVHUload, ssa.OpARMMOVWload, ssa.OpARMMOVFload, ssa.OpARMMOVDload: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpARMMOVBstore, ssa.OpARMMOVHstore, ssa.OpARMMOVWstore, ssa.OpARMMOVFstore, ssa.OpARMMOVDstore: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[1].Reg() p.To.Type = obj.TYPE_MEM p.To.Reg = v.Args[0].Reg() gc.AddAux(&p.To, v) case ssa.OpARMMOVWloadidx: // this is just shift 0 bits fallthrough case ssa.OpARMMOVWloadshiftLL: p := genshift(v.Op.Asm(), 0, v.Args[1].Reg(), v.Reg(), arm.SHIFT_LL, v.AuxInt) p.From.Reg = v.Args[0].Reg() case ssa.OpARMMOVWloadshiftRL: p := genshift(v.Op.Asm(), 0, v.Args[1].Reg(), v.Reg(), arm.SHIFT_LR, v.AuxInt) p.From.Reg = v.Args[0].Reg() case ssa.OpARMMOVWloadshiftRA: p := genshift(v.Op.Asm(), 0, v.Args[1].Reg(), v.Reg(), arm.SHIFT_AR, v.AuxInt) p.From.Reg = v.Args[0].Reg() case ssa.OpARMMOVWstoreidx: // this is just shift 0 bits fallthrough case ssa.OpARMMOVWstoreshiftLL: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[2].Reg() p.To.Type = obj.TYPE_SHIFT p.To.Reg = v.Args[0].Reg() p.To.Offset = int64(makeshift(v.Args[1].Reg(), arm.SHIFT_LL, v.AuxInt)) case ssa.OpARMMOVWstoreshiftRL: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[2].Reg() p.To.Type = obj.TYPE_SHIFT p.To.Reg = v.Args[0].Reg() p.To.Offset = int64(makeshift(v.Args[1].Reg(), arm.SHIFT_LR, v.AuxInt)) case ssa.OpARMMOVWstoreshiftRA: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[2].Reg() p.To.Type = obj.TYPE_SHIFT p.To.Reg = v.Args[0].Reg() p.To.Offset = int64(makeshift(v.Args[1].Reg(), arm.SHIFT_AR, v.AuxInt)) case ssa.OpARMMOVBreg, ssa.OpARMMOVBUreg, ssa.OpARMMOVHreg, ssa.OpARMMOVHUreg: a := v.Args[0] for a.Op == ssa.OpCopy || a.Op == ssa.OpARMMOVWreg || a.Op == ssa.OpARMMOVWnop { a = a.Args[0] } if a.Op == ssa.OpLoadReg { t := a.Type switch { case v.Op == ssa.OpARMMOVBreg && t.Size() == 1 && t.IsSigned(), v.Op == ssa.OpARMMOVBUreg && t.Size() == 1 && !t.IsSigned(), v.Op == ssa.OpARMMOVHreg && t.Size() == 2 && t.IsSigned(), v.Op == ssa.OpARMMOVHUreg && t.Size() == 2 && !t.IsSigned(): // arg is a proper-typed load, already zero/sign-extended, don't extend again if v.Reg() == v.Args[0].Reg() { return } p := gc.Prog(arm.AMOVW) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() return default: } } fallthrough case ssa.OpARMMVN, ssa.OpARMCLZ, ssa.OpARMSQRTD, ssa.OpARMNEGF, ssa.OpARMNEGD, ssa.OpARMMOVWF, ssa.OpARMMOVWD, ssa.OpARMMOVFW, ssa.OpARMMOVDW, ssa.OpARMMOVFD, ssa.OpARMMOVDF: p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpARMMOVWUF, ssa.OpARMMOVWUD, ssa.OpARMMOVFWU, ssa.OpARMMOVDWU: p := gc.Prog(v.Op.Asm()) p.Scond = arm.C_UBIT p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpARMCMOVWHSconst: p := gc.Prog(arm.AMOVW) p.Scond = arm.C_SCOND_HS p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpARMCMOVWLSconst: p := gc.Prog(arm.AMOVW) p.Scond = arm.C_SCOND_LS p.From.Type = obj.TYPE_CONST p.From.Offset = v.AuxInt p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpARMCALLstatic: if v.Aux.(*gc.Sym) == gc.Deferreturn.Sym { // Deferred calls will appear to be returning to // the CALL deferreturn(SB) that we are about to emit. // However, the stack trace code will show the line // of the instruction byte before the return PC. // To avoid that being an unrelated instruction, // insert an actual hardware NOP that will have the right line number. // This is different from obj.ANOP, which is a virtual no-op // that doesn't make it into the instruction stream. ginsnop() } p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(v.Aux.(*gc.Sym)) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARMCALLclosure: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Offset = 0 p.To.Reg = v.Args[0].Reg() if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARMCALLdefer: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Deferproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARMCALLgo: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Newproc.Sym) if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARMCALLinter: p := gc.Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Offset = 0 p.To.Reg = v.Args[0].Reg() if gc.Maxarg < v.AuxInt { gc.Maxarg = v.AuxInt } case ssa.OpARMDUFFZERO: p := gc.Prog(obj.ADUFFZERO) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg)) p.To.Offset = v.AuxInt case ssa.OpARMDUFFCOPY: p := gc.Prog(obj.ADUFFCOPY) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) p.To.Offset = v.AuxInt case ssa.OpARMLoweredNilCheck: // Issue a load which will fault if arg is nil. p := gc.Prog(arm.AMOVB) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = arm.REGTMP if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers gc.Warnl(v.Line, "generated nil check") } case ssa.OpARMLoweredZero: // MOVW.P Rarg2, 4(R1) // CMP Rarg1, R1 // BLE -2(PC) // arg1 is the address of the last element to zero // arg2 is known to be zero // auxint is alignment var sz int64 var mov obj.As switch { case v.AuxInt%4 == 0: sz = 4 mov = arm.AMOVW case v.AuxInt%2 == 0: sz = 2 mov = arm.AMOVH default: sz = 1 mov = arm.AMOVB } p := gc.Prog(mov) p.Scond = arm.C_PBIT p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[2].Reg() p.To.Type = obj.TYPE_MEM p.To.Reg = arm.REG_R1 p.To.Offset = sz p2 := gc.Prog(arm.ACMP) p2.From.Type = obj.TYPE_REG p2.From.Reg = v.Args[1].Reg() p2.Reg = arm.REG_R1 p3 := gc.Prog(arm.ABLE) p3.To.Type = obj.TYPE_BRANCH gc.Patch(p3, p) case ssa.OpARMLoweredMove: // MOVW.P 4(R1), Rtmp // MOVW.P Rtmp, 4(R2) // CMP Rarg2, R1 // BLE -3(PC) // arg2 is the address of the last element of src // auxint is alignment var sz int64 var mov obj.As switch { case v.AuxInt%4 == 0: sz = 4 mov = arm.AMOVW case v.AuxInt%2 == 0: sz = 2 mov = arm.AMOVH default: sz = 1 mov = arm.AMOVB } p := gc.Prog(mov) p.Scond = arm.C_PBIT p.From.Type = obj.TYPE_MEM p.From.Reg = arm.REG_R1 p.From.Offset = sz p.To.Type = obj.TYPE_REG p.To.Reg = arm.REGTMP p2 := gc.Prog(mov) p2.Scond = arm.C_PBIT p2.From.Type = obj.TYPE_REG p2.From.Reg = arm.REGTMP p2.To.Type = obj.TYPE_MEM p2.To.Reg = arm.REG_R2 p2.To.Offset = sz p3 := gc.Prog(arm.ACMP) p3.From.Type = obj.TYPE_REG p3.From.Reg = v.Args[2].Reg() p3.Reg = arm.REG_R1 p4 := gc.Prog(arm.ABLE) p4.To.Type = obj.TYPE_BRANCH gc.Patch(p4, p) case ssa.OpVarDef: gc.Gvardef(v.Aux.(*gc.Node)) case ssa.OpVarKill: gc.Gvarkill(v.Aux.(*gc.Node)) case ssa.OpVarLive: gc.Gvarlive(v.Aux.(*gc.Node)) case ssa.OpKeepAlive: gc.KeepAlive(v) case ssa.OpARMEqual, ssa.OpARMNotEqual, ssa.OpARMLessThan, ssa.OpARMLessEqual, ssa.OpARMGreaterThan, ssa.OpARMGreaterEqual, ssa.OpARMLessThanU, ssa.OpARMLessEqualU, ssa.OpARMGreaterThanU, ssa.OpARMGreaterEqualU: // generate boolean values // use conditional move p := gc.Prog(arm.AMOVW) p.From.Type = obj.TYPE_CONST p.From.Offset = 0 p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() p = gc.Prog(arm.AMOVW) p.Scond = condBits[v.Op] p.From.Type = obj.TYPE_CONST p.From.Offset = 1 p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpSelect0, ssa.OpSelect1: // nothing to do case ssa.OpARMLoweredGetClosurePtr: // Closure pointer is R7 (arm.REGCTXT). gc.CheckLoweredGetClosurePtr(v) case ssa.OpARMFlagEQ, ssa.OpARMFlagLT_ULT, ssa.OpARMFlagLT_UGT, ssa.OpARMFlagGT_ULT, ssa.OpARMFlagGT_UGT: v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString()) case ssa.OpARMInvertFlags: v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString()) default: v.Fatalf("genValue not implemented: %s", v.LongString()) } }
/* * generate shift according to op, one of: * res = nl << nr * res = nl >> nr */ func cgen_shift(op gc.Op, bounded bool, nl *gc.Node, nr *gc.Node, res *gc.Node) { if nl.Type.Width > 4 { gc.Fatalf("cgen_shift %v", nl.Type) } w := int(nl.Type.Width * 8) a := optoas(op, nl.Type) if nr.Op == gc.OLITERAL { var n2 gc.Node gc.Tempname(&n2, nl.Type) gc.Cgen(nl, &n2) var n1 gc.Node gc.Regalloc(&n1, nl.Type, res) gmove(&n2, &n1) sc := uint64(nr.Int()) if sc >= uint64(nl.Type.Width*8) { // large shift gets 2 shifts by width-1 gins(a, ncon(uint32(w)-1), &n1) gins(a, ncon(uint32(w)-1), &n1) } else { gins(a, nr, &n1) } gmove(&n1, res) gc.Regfree(&n1) return } var oldcx gc.Node var cx gc.Node gc.Nodreg(&cx, gc.Types[gc.TUINT32], x86.REG_CX) if gc.GetReg(x86.REG_CX) > 1 && !gc.Samereg(&cx, res) { gc.Tempname(&oldcx, gc.Types[gc.TUINT32]) gmove(&cx, &oldcx) } var n1 gc.Node var nt gc.Node if nr.Type.Width > 4 { gc.Tempname(&nt, nr.Type) n1 = nt } else { gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX) gc.Regalloc(&n1, nr.Type, &n1) // to hold the shift type in CX } var n2 gc.Node if gc.Samereg(&cx, res) { gc.Regalloc(&n2, nl.Type, nil) } else { gc.Regalloc(&n2, nl.Type, res) } if nl.Ullman >= nr.Ullman { gc.Cgen(nl, &n2) gc.Cgen(nr, &n1) } else { gc.Cgen(nr, &n1) gc.Cgen(nl, &n2) } // test and fix up large shifts if bounded { if nr.Type.Width > 4 { // delayed reg alloc gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX) gc.Regalloc(&n1, gc.Types[gc.TUINT32], &n1) // to hold the shift type in CX var lo gc.Node var hi gc.Node split64(&nt, &lo, &hi) gmove(&lo, &n1) splitclean() } } else { var p1 *obj.Prog if nr.Type.Width > 4 { // delayed reg alloc gc.Nodreg(&n1, gc.Types[gc.TUINT32], x86.REG_CX) gc.Regalloc(&n1, gc.Types[gc.TUINT32], &n1) // to hold the shift type in CX var lo gc.Node var hi gc.Node split64(&nt, &lo, &hi) gmove(&lo, &n1) gins(optoas(gc.OCMP, gc.Types[gc.TUINT32]), &hi, ncon(0)) p2 := gc.Gbranch(optoas(gc.ONE, gc.Types[gc.TUINT32]), nil, +1) gins(optoas(gc.OCMP, gc.Types[gc.TUINT32]), &n1, ncon(uint32(w))) p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1) splitclean() gc.Patch(p2, gc.Pc) } else { gins(optoas(gc.OCMP, nr.Type), &n1, ncon(uint32(w))) p1 = gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TUINT32]), nil, +1) } if op == gc.ORSH && gc.Issigned[nl.Type.Etype] { gins(a, ncon(uint32(w)-1), &n2) } else { gmove(ncon(0), &n2) } gc.Patch(p1, gc.Pc) } gins(a, &n1, &n2) if oldcx.Op != 0 { gmove(&oldcx, &cx) } gmove(&n2, res) gc.Regfree(&n1) gc.Regfree(&n2) }
/* * generate division. * caller must set: * ax = allocated AX register * dx = allocated DX register * generates one of: * res = nl / nr * res = nl % nr * according to op. */ func dodiv(op gc.Op, nl *gc.Node, nr *gc.Node, res *gc.Node, ax *gc.Node, dx *gc.Node) { // Have to be careful about handling // most negative int divided by -1 correctly. // The hardware will trap. // Also the byte divide instruction needs AH, // which we otherwise don't have to deal with. // Easiest way to avoid for int8, int16: use int32. // For int32 and int64, use explicit test. // Could use int64 hw for int32. t := nl.Type t0 := t check := false if gc.Issigned[t.Etype] { check = true if gc.Isconst(nl, gc.CTINT) && nl.Int() != -1<<uint64(t.Width*8-1) { check = false } else if gc.Isconst(nr, gc.CTINT) && nr.Int() != -1 { check = false } } if t.Width < 4 { if gc.Issigned[t.Etype] { t = gc.Types[gc.TINT32] } else { t = gc.Types[gc.TUINT32] } check = false } var t1 gc.Node gc.Tempname(&t1, t) var t2 gc.Node gc.Tempname(&t2, t) if t0 != t { var t3 gc.Node gc.Tempname(&t3, t0) var t4 gc.Node gc.Tempname(&t4, t0) gc.Cgen(nl, &t3) gc.Cgen(nr, &t4) // Convert. gmove(&t3, &t1) gmove(&t4, &t2) } else { gc.Cgen(nl, &t1) gc.Cgen(nr, &t2) } var n1 gc.Node if !gc.Samereg(ax, res) && !gc.Samereg(dx, res) { gc.Regalloc(&n1, t, res) } else { gc.Regalloc(&n1, t, nil) } gmove(&t2, &n1) gmove(&t1, ax) var p2 *obj.Prog var n4 gc.Node if gc.Nacl { // Native Client does not relay the divide-by-zero trap // to the executing program, so we must insert a check // for ourselves. gc.Nodconst(&n4, t, 0) gins(optoas(gc.OCMP, t), &n1, &n4) p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1) if panicdiv == nil { panicdiv = gc.Sysfunc("panicdivide") } gc.Ginscall(panicdiv, -1) gc.Patch(p1, gc.Pc) } if check { gc.Nodconst(&n4, t, -1) gins(optoas(gc.OCMP, t), &n1, &n4) p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1) if op == gc.ODIV { // a / (-1) is -a. gins(optoas(gc.OMINUS, t), nil, ax) gmove(ax, res) } else { // a % (-1) is 0. gc.Nodconst(&n4, t, 0) gmove(&n4, res) } p2 = gc.Gbranch(obj.AJMP, nil, 0) gc.Patch(p1, gc.Pc) } if !gc.Issigned[t.Etype] { var nz gc.Node gc.Nodconst(&nz, t, 0) gmove(&nz, dx) } else { gins(optoas(gc.OEXTEND, t), nil, nil) } gins(optoas(op, t), &n1, nil) gc.Regfree(&n1) if op == gc.ODIV { gmove(ax, res) } else { gmove(dx, res) } if check { gc.Patch(p2, gc.Pc) } }
/* * generate division. * generates one of: * res = nl / nr * res = nl % nr * according to op. */ func dodiv(op int, nl *gc.Node, nr *gc.Node, res *gc.Node) { // Have to be careful about handling // most negative int divided by -1 correctly. // The hardware will trap. // Also the byte divide instruction needs AH, // which we otherwise don't have to deal with. // Easiest way to avoid for int8, int16: use int32. // For int32 and int64, use explicit test. // Could use int64 hw for int32. t := nl.Type t0 := t check := 0 if gc.Issigned[t.Etype] { check = 1 if gc.Isconst(nl, gc.CTINT) && nl.Int() != -(1<<uint64(t.Width*8-1)) { check = 0 } else if gc.Isconst(nr, gc.CTINT) && nr.Int() != -1 { check = 0 } } if t.Width < 4 { if gc.Issigned[t.Etype] { t = gc.Types[gc.TINT32] } else { t = gc.Types[gc.TUINT32] } check = 0 } a := optoas(op, t) var n3 gc.Node gc.Regalloc(&n3, t0, nil) var ax gc.Node var oldax gc.Node if nl.Ullman >= nr.Ullman { savex(x86.REG_AX, &ax, &oldax, res, t0) gc.Cgen(nl, &ax) gc.Regalloc(&ax, t0, &ax) // mark ax live during cgen gc.Cgen(nr, &n3) gc.Regfree(&ax) } else { gc.Cgen(nr, &n3) savex(x86.REG_AX, &ax, &oldax, res, t0) gc.Cgen(nl, &ax) } if t != t0 { // Convert ax1 := ax n31 := n3 ax.Type = t n3.Type = t gmove(&ax1, &ax) gmove(&n31, &n3) } var n4 gc.Node if gc.Nacl { // Native Client does not relay the divide-by-zero trap // to the executing program, so we must insert a check // for ourselves. gc.Nodconst(&n4, t, 0) gins(optoas(gc.OCMP, t), &n3, &n4) p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1) if panicdiv == nil { panicdiv = gc.Sysfunc("panicdivide") } gc.Ginscall(panicdiv, -1) gc.Patch(p1, gc.Pc) } var p2 *obj.Prog if check != 0 { gc.Nodconst(&n4, t, -1) gins(optoas(gc.OCMP, t), &n3, &n4) p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1) if op == gc.ODIV { // a / (-1) is -a. gins(optoas(gc.OMINUS, t), nil, &ax) gmove(&ax, res) } else { // a % (-1) is 0. gc.Nodconst(&n4, t, 0) gmove(&n4, res) } p2 = gc.Gbranch(obj.AJMP, nil, 0) gc.Patch(p1, gc.Pc) } var olddx gc.Node var dx gc.Node savex(x86.REG_DX, &dx, &olddx, res, t) if !gc.Issigned[t.Etype] { gc.Nodconst(&n4, t, 0) gmove(&n4, &dx) } else { gins(optoas(gc.OEXTEND, t), nil, nil) } gins(a, &n3, nil) gc.Regfree(&n3) if op == gc.ODIV { gmove(&ax, res) } else { gmove(&dx, res) } restx(&dx, &olddx) if check != 0 { gc.Patch(p2, gc.Pc) } restx(&ax, &oldax) }
func clearfat(nl *gc.Node) { /* clear a fat object */ if gc.Debug['g'] != 0 { fmt.Printf("clearfat %v (%v, size: %d)\n", nl, nl.Type, nl.Type.Width) } w := uint64(uint64(nl.Type.Width)) // Avoid taking the address for simple enough types. if gc.Componentgen(nil, nl) { return } c := uint64(w % 8) // bytes q := uint64(w / 8) // dwords var r0 gc.Node gc.Nodreg(&r0, gc.Types[gc.TUINT64], arm64.REGZERO) var dst gc.Node // REGRT1 is reserved on arm64, see arm64/gsubr.go. gc.Nodreg(&dst, gc.Types[gc.Tptr], arm64.REGRT1) gc.Agen(nl, &dst) var boff uint64 if q > 128 { p := gins(arm64.ASUB, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 var end gc.Node gc.Regalloc(&end, gc.Types[gc.Tptr], nil) p = gins(arm64.AMOVD, &dst, &end) p.From.Type = obj.TYPE_ADDR p.From.Offset = int64(q * 8) p = gins(arm64.AMOVD, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = 8 p.Scond = arm64.C_XPRE pl := (*obj.Prog)(p) p = gcmp(arm64.ACMP, &dst, &end) gc.Patch(gc.Gbranch(arm64.ABNE, nil, 0), pl) gc.Regfree(&end) // The loop leaves R16 on the last zeroed dword boff = 8 } else if q >= 4 && !darwin { // darwin ld64 cannot handle BR26 reloc with non-zero addend p := gins(arm64.ASUB, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 f := (*gc.Node)(gc.Sysfunc("duffzero")) p = gins(obj.ADUFFZERO, nil, f) gc.Afunclit(&p.To, f) // 4 and 128 = magic constants: see ../../runtime/asm_arm64x.s p.To.Offset = int64(4 * (128 - q)) // duffzero leaves R16 on the last zeroed dword boff = 8 } else { var p *obj.Prog for t := uint64(0); t < q; t++ { p = gins(arm64.AMOVD, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(8 * t) } boff = 8 * q } var p *obj.Prog for t := uint64(0); t < c; t++ { p = gins(arm64.AMOVB, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(t + boff) } }
/* * generate division. * generates one of: * res = nl / nr * res = nl % nr * according to op. */ func dodiv(op int, nl *gc.Node, nr *gc.Node, res *gc.Node) { // Have to be careful about handling // most negative int divided by -1 correctly. // The hardware will generate undefined result. // Also need to explicitly trap on division on zero, // the hardware will silently generate undefined result. // DIVW will leave unpredicable result in higher 32-bit, // so always use DIVD/DIVDU. t := nl.Type t0 := t check := 0 if gc.Issigned[t.Etype] { check = 1 if gc.Isconst(nl, gc.CTINT) && nl.Int() != -(1<<uint64(t.Width*8-1)) { check = 0 } else if gc.Isconst(nr, gc.CTINT) && nr.Int() != -1 { check = 0 } } if t.Width < 8 { if gc.Issigned[t.Etype] { t = gc.Types[gc.TINT64] } else { t = gc.Types[gc.TUINT64] } check = 0 } a := optoas(gc.ODIV, t) var tl gc.Node gc.Regalloc(&tl, t0, nil) var tr gc.Node gc.Regalloc(&tr, t0, nil) if nl.Ullman >= nr.Ullman { gc.Cgen(nl, &tl) gc.Cgen(nr, &tr) } else { gc.Cgen(nr, &tr) gc.Cgen(nl, &tl) } if t != t0 { // Convert tl2 := tl tr2 := tr tl.Type = t tr.Type = t gmove(&tl2, &tl) gmove(&tr2, &tr) } // Handle divide-by-zero panic. p1 := gins(optoas(gc.OCMP, t), &tr, nil) p1.To.Type = obj.TYPE_REG p1.To.Reg = ppc64.REGZERO p1 = gc.Gbranch(optoas(gc.ONE, t), nil, +1) if panicdiv == nil { panicdiv = gc.Sysfunc("panicdivide") } gc.Ginscall(panicdiv, -1) gc.Patch(p1, gc.Pc) var p2 *obj.Prog if check != 0 { var nm1 gc.Node gc.Nodconst(&nm1, t, -1) gins(optoas(gc.OCMP, t), &tr, &nm1) p1 := gc.Gbranch(optoas(gc.ONE, t), nil, +1) if op == gc.ODIV { // a / (-1) is -a. gins(optoas(gc.OMINUS, t), nil, &tl) gmove(&tl, res) } else { // a % (-1) is 0. var nz gc.Node gc.Nodconst(&nz, t, 0) gmove(&nz, res) } p2 = gc.Gbranch(obj.AJMP, nil, 0) gc.Patch(p1, gc.Pc) } p1 = gins(a, &tr, &tl) if op == gc.ODIV { gc.Regfree(&tr) gmove(&tl, res) } else { // A%B = A-(A/B*B) var tm gc.Node gc.Regalloc(&tm, t, nil) // patch div to use the 3 register form // TODO(minux): add gins3? p1.Reg = p1.To.Reg p1.To.Reg = tm.Reg gins(optoas(gc.OMUL, t), &tr, &tm) gc.Regfree(&tr) gins(optoas(gc.OSUB, t), &tm, &tl) gc.Regfree(&tm) gmove(&tl, res) } gc.Regfree(&tl) if check != 0 { gc.Patch(p2, gc.Pc) } }
func floatmove(f *gc.Node, t *gc.Node) { var r1 gc.Node ft := gc.Simsimtype(f.Type) tt := gc.Simsimtype(t.Type) cvt := t.Type // cannot have two floating point memory operands. if gc.Isfloat[ft] && gc.Isfloat[tt] && gc.Ismem(f) && gc.Ismem(t) { goto hard } // convert constant to desired type if f.Op == gc.OLITERAL { var con gc.Node f.Convconst(&con, t.Type) f = &con ft = gc.Simsimtype(con.Type) // some constants can't move directly to memory. if gc.Ismem(t) { // float constants come from memory. if gc.Isfloat[tt] { goto hard } } } // value -> value copy, only one memory operand. // figure out the instruction to use. // break out of switch for one-instruction gins. // goto rdst for "destination must be register". // goto hard for "convert to cvt type first". // otherwise handle and return. switch uint32(ft)<<16 | uint32(tt) { default: if gc.Thearch.Use387 { floatmove_387(f, t) } else { floatmove_sse(f, t) } return // float to very long integer. case gc.TFLOAT32<<16 | gc.TINT64, gc.TFLOAT64<<16 | gc.TINT64: if f.Op == gc.OREGISTER { cvt = f.Type goto hardmem } var r1 gc.Node gc.Nodreg(&r1, gc.Types[ft], x86.REG_F0) if ft == gc.TFLOAT32 { gins(x86.AFMOVF, f, &r1) } else { gins(x86.AFMOVD, f, &r1) } // set round to zero mode during conversion var t1 gc.Node memname(&t1, gc.Types[gc.TUINT16]) var t2 gc.Node memname(&t2, gc.Types[gc.TUINT16]) gins(x86.AFSTCW, nil, &t1) gins(x86.AMOVW, ncon(0xf7f), &t2) gins(x86.AFLDCW, &t2, nil) if tt == gc.TINT16 { gins(x86.AFMOVWP, &r1, t) } else if tt == gc.TINT32 { gins(x86.AFMOVLP, &r1, t) } else { gins(x86.AFMOVVP, &r1, t) } gins(x86.AFLDCW, &t1, nil) return case gc.TFLOAT32<<16 | gc.TUINT64, gc.TFLOAT64<<16 | gc.TUINT64: if !gc.Ismem(f) { cvt = f.Type goto hardmem } bignodes() var f0 gc.Node gc.Nodreg(&f0, gc.Types[ft], x86.REG_F0) var f1 gc.Node gc.Nodreg(&f1, gc.Types[ft], x86.REG_F0+1) var ax gc.Node gc.Nodreg(&ax, gc.Types[gc.TUINT16], x86.REG_AX) if ft == gc.TFLOAT32 { gins(x86.AFMOVF, f, &f0) } else { gins(x86.AFMOVD, f, &f0) } // if 0 > v { answer = 0 } gins(x86.AFMOVD, &zerof, &f0) gins(x86.AFUCOMP, &f0, &f1) gins(x86.AFSTSW, nil, &ax) gins(x86.ASAHF, nil, nil) p1 := gc.Gbranch(optoas(gc.OGT, gc.Types[tt]), nil, 0) // if 1<<64 <= v { answer = 0 too } gins(x86.AFMOVD, &two64f, &f0) gins(x86.AFUCOMP, &f0, &f1) gins(x86.AFSTSW, nil, &ax) gins(x86.ASAHF, nil, nil) p2 := gc.Gbranch(optoas(gc.OGT, gc.Types[tt]), nil, 0) gc.Patch(p1, gc.Pc) gins(x86.AFMOVVP, &f0, t) // don't care about t, but will pop the stack var thi gc.Node var tlo gc.Node split64(t, &tlo, &thi) gins(x86.AMOVL, ncon(0), &tlo) gins(x86.AMOVL, ncon(0), &thi) splitclean() p1 = gc.Gbranch(obj.AJMP, nil, 0) gc.Patch(p2, gc.Pc) // in range; algorithm is: // if small enough, use native float64 -> int64 conversion. // otherwise, subtract 2^63, convert, and add it back. // set round to zero mode during conversion var t1 gc.Node memname(&t1, gc.Types[gc.TUINT16]) var t2 gc.Node memname(&t2, gc.Types[gc.TUINT16]) gins(x86.AFSTCW, nil, &t1) gins(x86.AMOVW, ncon(0xf7f), &t2) gins(x86.AFLDCW, &t2, nil) // actual work gins(x86.AFMOVD, &two63f, &f0) gins(x86.AFUCOMP, &f0, &f1) gins(x86.AFSTSW, nil, &ax) gins(x86.ASAHF, nil, nil) p2 = gc.Gbranch(optoas(gc.OLE, gc.Types[tt]), nil, 0) gins(x86.AFMOVVP, &f0, t) p3 := gc.Gbranch(obj.AJMP, nil, 0) gc.Patch(p2, gc.Pc) gins(x86.AFMOVD, &two63f, &f0) gins(x86.AFSUBDP, &f0, &f1) gins(x86.AFMOVVP, &f0, t) split64(t, &tlo, &thi) gins(x86.AXORL, ncon(0x80000000), &thi) // + 2^63 gc.Patch(p3, gc.Pc) splitclean() // restore rounding mode gins(x86.AFLDCW, &t1, nil) gc.Patch(p1, gc.Pc) return /* * integer to float */ case gc.TINT64<<16 | gc.TFLOAT32, gc.TINT64<<16 | gc.TFLOAT64: if t.Op == gc.OREGISTER { goto hardmem } var f0 gc.Node gc.Nodreg(&f0, t.Type, x86.REG_F0) gins(x86.AFMOVV, f, &f0) if tt == gc.TFLOAT32 { gins(x86.AFMOVFP, &f0, t) } else { gins(x86.AFMOVDP, &f0, t) } return // algorithm is: // if small enough, use native int64 -> float64 conversion. // otherwise, halve (rounding to odd?), convert, and double. case gc.TUINT64<<16 | gc.TFLOAT32, gc.TUINT64<<16 | gc.TFLOAT64: var ax gc.Node gc.Nodreg(&ax, gc.Types[gc.TUINT32], x86.REG_AX) var dx gc.Node gc.Nodreg(&dx, gc.Types[gc.TUINT32], x86.REG_DX) var cx gc.Node gc.Nodreg(&cx, gc.Types[gc.TUINT32], x86.REG_CX) var t1 gc.Node gc.Tempname(&t1, f.Type) var tlo gc.Node var thi gc.Node split64(&t1, &tlo, &thi) gmove(f, &t1) gins(x86.ACMPL, &thi, ncon(0)) p1 := gc.Gbranch(x86.AJLT, nil, 0) // native var r1 gc.Node gc.Nodreg(&r1, gc.Types[tt], x86.REG_F0) gins(x86.AFMOVV, &t1, &r1) if tt == gc.TFLOAT32 { gins(x86.AFMOVFP, &r1, t) } else { gins(x86.AFMOVDP, &r1, t) } p2 := gc.Gbranch(obj.AJMP, nil, 0) // simulated gc.Patch(p1, gc.Pc) gmove(&tlo, &ax) gmove(&thi, &dx) p1 = gins(x86.ASHRL, ncon(1), &ax) p1.From.Index = x86.REG_DX // double-width shift DX -> AX p1.From.Scale = 0 gins(x86.AMOVL, ncon(0), &cx) gins(x86.ASETCC, nil, &cx) gins(x86.AORL, &cx, &ax) gins(x86.ASHRL, ncon(1), &dx) gmove(&dx, &thi) gmove(&ax, &tlo) gc.Nodreg(&r1, gc.Types[tt], x86.REG_F0) var r2 gc.Node gc.Nodreg(&r2, gc.Types[tt], x86.REG_F0+1) gins(x86.AFMOVV, &t1, &r1) gins(x86.AFMOVD, &r1, &r1) gins(x86.AFADDDP, &r1, &r2) if tt == gc.TFLOAT32 { gins(x86.AFMOVFP, &r1, t) } else { gins(x86.AFMOVDP, &r1, t) } gc.Patch(p2, gc.Pc) splitclean() return } // requires register intermediate hard: gc.Regalloc(&r1, cvt, t) gmove(f, &r1) gmove(&r1, t) gc.Regfree(&r1) return // requires memory intermediate hardmem: gc.Tempname(&r1, cvt) gmove(f, &r1) gmove(&r1, t) return }
func clearfat(nl *gc.Node) { /* clear a fat object */ if gc.Debug['g'] != 0 { fmt.Printf("clearfat %v (%v, size: %d)\n", nl, nl.Type, nl.Type.Width) } w := uint64(uint64(nl.Type.Width)) // Avoid taking the address for simple enough types. if gc.Componentgen(nil, nl) { return } c := uint64(w % 8) // bytes q := uint64(w / 8) // dwords if gc.Reginuse(ppc64.REGRT1) { gc.Fatal("%v in use during clearfat", obj.Rconv(ppc64.REGRT1)) } var r0 gc.Node gc.Nodreg(&r0, gc.Types[gc.TUINT64], ppc64.REGZERO) var dst gc.Node gc.Nodreg(&dst, gc.Types[gc.Tptr], ppc64.REGRT1) gc.Regrealloc(&dst) gc.Agen(nl, &dst) var boff uint64 if q > 128 { p := gins(ppc64.ASUB, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 var end gc.Node gc.Regalloc(&end, gc.Types[gc.Tptr], nil) p = gins(ppc64.AMOVD, &dst, &end) p.From.Type = obj.TYPE_ADDR p.From.Offset = int64(q * 8) p = gins(ppc64.AMOVDU, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = 8 pl := (*obj.Prog)(p) p = gins(ppc64.ACMP, &dst, &end) gc.Patch(gc.Gbranch(ppc64.ABNE, nil, 0), pl) gc.Regfree(&end) // The loop leaves R3 on the last zeroed dword boff = 8 } else if q >= 4 { p := gins(ppc64.ASUB, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 f := (*gc.Node)(gc.Sysfunc("duffzero")) p = gins(obj.ADUFFZERO, nil, f) gc.Afunclit(&p.To, f) // 4 and 128 = magic constants: see ../../runtime/asm_ppc64x.s p.To.Offset = int64(4 * (128 - q)) // duffzero leaves R3 on the last zeroed dword boff = 8 } else { var p *obj.Prog for t := uint64(0); t < q; t++ { p = gins(ppc64.AMOVD, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(8 * t) } boff = 8 * q } var p *obj.Prog for t := uint64(0); t < c; t++ { p = gins(ppc64.AMOVB, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(t + boff) } gc.Regfree(&dst) }
func clearfat(nl *gc.Node) { /* clear a fat object */ if gc.Debug['g'] != 0 { gc.Dump("\nclearfat", nl) } w := uint32(nl.Type.Width) // Avoid taking the address for simple enough types. if gc.Componentgen(nil, nl) { return } c := w % 4 // bytes q := w / 4 // quads var r0 gc.Node r0.Op = gc.OREGISTER r0.Reg = arm.REG_R0 var r1 gc.Node r1.Op = gc.OREGISTER r1.Reg = arm.REG_R1 var dst gc.Node gc.Regalloc(&dst, gc.Types[gc.Tptr], &r1) gc.Agen(nl, &dst) var nc gc.Node gc.Nodconst(&nc, gc.Types[gc.TUINT32], 0) var nz gc.Node gc.Regalloc(&nz, gc.Types[gc.TUINT32], &r0) gc.Cgen(&nc, &nz) if q > 128 { var end gc.Node gc.Regalloc(&end, gc.Types[gc.Tptr], nil) p := gins(arm.AMOVW, &dst, &end) p.From.Type = obj.TYPE_ADDR p.From.Offset = int64(q) * 4 p = gins(arm.AMOVW, &nz, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = 4 p.Scond |= arm.C_PBIT pl := p p = gins(arm.ACMP, &dst, nil) raddr(&end, p) gc.Patch(gc.Gbranch(arm.ABNE, nil, 0), pl) gc.Regfree(&end) } else if q >= 4 && !gc.Nacl { f := gc.Sysfunc("duffzero") p := gins(obj.ADUFFZERO, nil, f) gc.Afunclit(&p.To, f) // 4 and 128 = magic constants: see ../../runtime/asm_arm.s p.To.Offset = 4 * (128 - int64(q)) } else { var p *obj.Prog for q > 0 { p = gins(arm.AMOVW, &nz, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = 4 p.Scond |= arm.C_PBIT //print("1. %v\n", p); q-- } } var p *obj.Prog for c > 0 { p = gins(arm.AMOVB, &nz, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = 1 p.Scond |= arm.C_PBIT //print("2. %v\n", p); c-- } gc.Regfree(&dst) gc.Regfree(&nz) }
func blockcopy(n, res *gc.Node, osrc, odst, w int64) { // determine alignment. // want to avoid unaligned access, so have to use // smaller operations for less aligned types. // for example moving [4]byte must use 4 MOVB not 1 MOVW. align := int(n.Type.Align) var op int switch align { default: gc.Fatalf("sgen: invalid alignment %d for %v", align, n.Type) case 1: op = mips.AMOVB case 2: op = mips.AMOVH case 4: op = mips.AMOVW case 8: op = mips.AMOVV } if w%int64(align) != 0 { gc.Fatalf("sgen: unaligned size %d (align=%d) for %v", w, align, n.Type) } c := int32(w / int64(align)) // if we are copying forward on the stack and // the src and dst overlap, then reverse direction dir := align if osrc < odst && odst < osrc+w { dir = -dir } var dst gc.Node var src gc.Node if n.Ullman >= res.Ullman { gc.Agenr(n, &dst, res) // temporarily use dst gc.Regalloc(&src, gc.Types[gc.Tptr], nil) gins(mips.AMOVV, &dst, &src) if res.Op == gc.ONAME { gc.Gvardef(res) } gc.Agen(res, &dst) } else { if res.Op == gc.ONAME { gc.Gvardef(res) } gc.Agenr(res, &dst, res) gc.Agenr(n, &src, nil) } var tmp gc.Node gc.Regalloc(&tmp, gc.Types[gc.Tptr], nil) // set up end marker var nend gc.Node // move src and dest to the end of block if necessary if dir < 0 { if c >= 4 { gc.Regalloc(&nend, gc.Types[gc.Tptr], nil) gins(mips.AMOVV, &src, &nend) } p := gins(mips.AADDV, nil, &src) p.From.Type = obj.TYPE_CONST p.From.Offset = w p = gins(mips.AADDV, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = w } else { p := gins(mips.AADDV, nil, &src) p.From.Type = obj.TYPE_CONST p.From.Offset = int64(-dir) p = gins(mips.AADDV, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = int64(-dir) if c >= 4 { gc.Regalloc(&nend, gc.Types[gc.Tptr], nil) p := gins(mips.AMOVV, &src, &nend) p.From.Type = obj.TYPE_ADDR p.From.Offset = w } } // move // TODO: enable duffcopy for larger copies. if c >= 4 { p := gins(op, &src, &tmp) p.From.Type = obj.TYPE_MEM p.From.Offset = int64(dir) ploop := p p = gins(mips.AADDV, nil, &src) p.From.Type = obj.TYPE_CONST p.From.Offset = int64(dir) p = gins(op, &tmp, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(dir) p = gins(mips.AADDV, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = int64(dir) gc.Patch(ginsbranch(mips.ABNE, nil, &src, &nend, 0), ploop) gc.Regfree(&nend) } else { // TODO: Instead of generating ADDV $-8,R8; ADDV // $-8,R7; n*(MOVV 8(R8),R9; ADDV $8,R8; MOVV R9,8(R7); // ADDV $8,R7;) just generate the offsets directly and // eliminate the ADDs. That will produce shorter, more // pipeline-able code. var p *obj.Prog for ; c > 0; c-- { p = gins(op, &src, &tmp) p.From.Type = obj.TYPE_MEM p.From.Offset = int64(dir) p = gins(mips.AADDV, nil, &src) p.From.Type = obj.TYPE_CONST p.From.Offset = int64(dir) p = gins(op, &tmp, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(dir) p = gins(mips.AADDV, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = int64(dir) } } gc.Regfree(&dst) gc.Regfree(&src) gc.Regfree(&tmp) }
func clearfat(nl *gc.Node) { /* clear a fat object */ if gc.Debug['g'] != 0 { fmt.Printf("clearfat %v (%v, size: %d)\n", nl, nl.Type, nl.Type.Width) } w := uint64(nl.Type.Width) // Avoid taking the address for simple enough types. if gc.Componentgen(nil, nl) { return } c := w % 8 // bytes q := w / 8 // dwords if gc.Reginuse(mips.REGRT1) { gc.Fatalf("%v in use during clearfat", obj.Rconv(mips.REGRT1)) } var r0 gc.Node gc.Nodreg(&r0, gc.Types[gc.TUINT64], mips.REGZERO) var dst gc.Node gc.Nodreg(&dst, gc.Types[gc.Tptr], mips.REGRT1) gc.Regrealloc(&dst) gc.Agen(nl, &dst) var boff uint64 if q > 128 { p := gins(mips.ASUBV, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 var end gc.Node gc.Regalloc(&end, gc.Types[gc.Tptr], nil) p = gins(mips.AMOVV, &dst, &end) p.From.Type = obj.TYPE_ADDR p.From.Offset = int64(q * 8) p = gins(mips.AMOVV, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = 8 pl := p p = gins(mips.AADDV, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 gc.Patch(ginsbranch(mips.ABNE, nil, &dst, &end, 0), pl) gc.Regfree(&end) // The loop leaves R1 on the last zeroed dword boff = 8 // TODO(dfc): https://golang.org/issue/12108 // If DUFFZERO is used inside a tail call (see genwrapper) it will // overwrite the link register. } else if false && q >= 4 { p := gins(mips.ASUBV, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = 8 f := gc.Sysfunc("duffzero") p = gins(obj.ADUFFZERO, nil, f) gc.Afunclit(&p.To, f) // 8 and 128 = magic constants: see ../../runtime/asm_mips64x.s p.To.Offset = int64(8 * (128 - q)) // duffzero leaves R1 on the last zeroed dword boff = 8 } else { var p *obj.Prog for t := uint64(0); t < q; t++ { p = gins(mips.AMOVV, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(8 * t) } boff = 8 * q } var p *obj.Prog for t := uint64(0); t < c; t++ { p = gins(mips.AMOVB, &r0, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(t + boff) } gc.Regfree(&dst) }