/* * generate: * res = &n; * The generated code checks that the result is not nil. */ func agen(n *gc.Node, res *gc.Node) { if gc.Debug['g'] != 0 { gc.Dump("\nagen-res", res) gc.Dump("agen-r", n) } if n == nil || n.Type == nil { return } for n.Op == gc.OCONVNOP { n = n.Left } if gc.Isconst(n, gc.CTNIL) && n.Type.Width > int64(gc.Widthptr) { // Use of a nil interface or nil slice. // Create a temporary we can take the address of and read. // The generated code is just going to panic, so it need not // be terribly efficient. See issue 3670. var n1 gc.Node gc.Tempname(&n1, n.Type) gc.Gvardef(&n1) clearfat(&n1) var n2 gc.Node regalloc(&n2, gc.Types[gc.Tptr], res) var n3 gc.Node n3.Op = gc.OADDR n3.Left = &n1 gins(ppc64.AMOVD, &n3, &n2) gmove(&n2, res) regfree(&n2) return } if n.Addable != 0 { var n1 gc.Node n1.Op = gc.OADDR n1.Left = n var n2 gc.Node regalloc(&n2, gc.Types[gc.Tptr], res) gins(ppc64.AMOVD, &n1, &n2) gmove(&n2, res) regfree(&n2) return } nl := n.Left switch n.Op { default: gc.Fatal("agen: unknown op %v", gc.Nconv(n, obj.FmtShort|obj.FmtSign)) // TODO(minux): 5g has this: Release res so that it is available for cgen_call. // Pick it up again after the call for OCALLMETH and OCALLFUNC. case gc.OCALLMETH: gc.Cgen_callmeth(n, 0) cgen_aret(n, res) case gc.OCALLINTER: cgen_callinter(n, res, 0) cgen_aret(n, res) case gc.OCALLFUNC: cgen_call(n, 0) cgen_aret(n, res) case gc.OSLICE, gc.OSLICEARR, gc.OSLICESTR, gc.OSLICE3, gc.OSLICE3ARR: var n1 gc.Node gc.Tempname(&n1, n.Type) gc.Cgen_slice(n, &n1) agen(&n1, res) case gc.OEFACE: var n1 gc.Node gc.Tempname(&n1, n.Type) gc.Cgen_eface(n, &n1) agen(&n1, res) case gc.OINDEX: var n1 gc.Node agenr(n, &n1, res) gmove(&n1, res) regfree(&n1) // should only get here with names in this func. case gc.ONAME: if n.Funcdepth > 0 && n.Funcdepth != gc.Funcdepth { gc.Dump("bad agen", n) gc.Fatal("agen: bad ONAME funcdepth %d != %d", n.Funcdepth, gc.Funcdepth) } // should only get here for heap vars or paramref if n.Class&gc.PHEAP == 0 && n.Class != gc.PPARAMREF { gc.Dump("bad agen", n) gc.Fatal("agen: bad ONAME class %#x", n.Class) } cgen(n.Heapaddr, res) if n.Xoffset != 0 { ginsadd(optoas(gc.OADD, gc.Types[gc.Tptr]), n.Xoffset, res) } case gc.OIND: cgen(nl, res) gc.Cgen_checknil(res) case gc.ODOT: agen(nl, res) if n.Xoffset != 0 { ginsadd(optoas(gc.OADD, gc.Types[gc.Tptr]), n.Xoffset, res) } case gc.ODOTPTR: cgen(nl, res) gc.Cgen_checknil(res) if n.Xoffset != 0 { ginsadd(optoas(gc.OADD, gc.Types[gc.Tptr]), n.Xoffset, res) } } }
func stackcopy(n, ns *gc.Node, osrc, odst, w int64) { var noddi gc.Node gc.Nodreg(&noddi, gc.Types[gc.Tptr], x86.REG_DI) var nodsi gc.Node gc.Nodreg(&nodsi, gc.Types[gc.Tptr], x86.REG_SI) var nodl gc.Node var nodr gc.Node if n.Ullman >= ns.Ullman { gc.Agenr(n, &nodr, &nodsi) if ns.Op == gc.ONAME { gc.Gvardef(ns) } gc.Agenr(ns, &nodl, &noddi) } else { if ns.Op == gc.ONAME { gc.Gvardef(ns) } gc.Agenr(ns, &nodl, &noddi) gc.Agenr(n, &nodr, &nodsi) } if nodl.Val.U.Reg != x86.REG_DI { gmove(&nodl, &noddi) } if nodr.Val.U.Reg != x86.REG_SI { gmove(&nodr, &nodsi) } gc.Regfree(&nodl) gc.Regfree(&nodr) c := w % 8 // bytes q := w / 8 // quads var oldcx gc.Node var cx gc.Node savex(x86.REG_CX, &cx, &oldcx, nil, gc.Types[gc.TINT64]) // if we are copying forward on the stack and // the src and dst overlap, then reverse direction if osrc < odst && odst < osrc+w { // reverse direction gins(x86.ASTD, nil, nil) // set direction flag if c > 0 { gconreg(addptr, w-1, x86.REG_SI) gconreg(addptr, w-1, x86.REG_DI) gconreg(movptr, c, x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.AMOVSB, nil, nil) // MOVB *(SI)-,*(DI)- } if q > 0 { if c > 0 { gconreg(addptr, -7, x86.REG_SI) gconreg(addptr, -7, x86.REG_DI) } else { gconreg(addptr, w-8, x86.REG_SI) gconreg(addptr, w-8, x86.REG_DI) } gconreg(movptr, q, x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)-,*(DI)- } // we leave with the flag clear gins(x86.ACLD, nil, nil) } else { // normal direction if q > 128 || (gc.Nacl && q >= 4) { gconreg(movptr, q, x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+ } else if q >= 4 { p := gins(obj.ADUFFCOPY, nil, nil) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) // 14 and 128 = magic constants: see ../../runtime/asm_amd64.s p.To.Offset = 14 * (128 - q) } else if !gc.Nacl && c == 0 { // We don't need the MOVSQ side-effect of updating SI and DI, // and issuing a sequence of MOVQs directly is faster. nodsi.Op = gc.OINDREG noddi.Op = gc.OINDREG for q > 0 { gmove(&nodsi, &cx) // MOVQ x+(SI),CX gmove(&cx, &noddi) // MOVQ CX,x+(DI) nodsi.Xoffset += 8 noddi.Xoffset += 8 q-- } } else { for q > 0 { gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+ q-- } } // copy the remaining c bytes if w < 4 || c <= 1 || (odst < osrc && osrc < odst+w) { for c > 0 { gins(x86.AMOVSB, nil, nil) // MOVB *(SI)+,*(DI)+ c-- } } else if w < 8 || c <= 4 { nodsi.Op = gc.OINDREG noddi.Op = gc.OINDREG cx.Type = gc.Types[gc.TINT32] nodsi.Type = gc.Types[gc.TINT32] noddi.Type = gc.Types[gc.TINT32] if c > 4 { nodsi.Xoffset = 0 noddi.Xoffset = 0 gmove(&nodsi, &cx) gmove(&cx, &noddi) } nodsi.Xoffset = c - 4 noddi.Xoffset = c - 4 gmove(&nodsi, &cx) gmove(&cx, &noddi) } else { nodsi.Op = gc.OINDREG noddi.Op = gc.OINDREG cx.Type = gc.Types[gc.TINT64] nodsi.Type = gc.Types[gc.TINT64] noddi.Type = gc.Types[gc.TINT64] nodsi.Xoffset = c - 8 noddi.Xoffset = c - 8 gmove(&nodsi, &cx) gmove(&cx, &noddi) } } restx(&cx, &oldcx) }
/* * block copy: * memmove(&ns, &n, w); */ func sgen(n *gc.Node, ns *gc.Node, w int64) { var res *gc.Node = ns if gc.Debug['g'] != 0 { fmt.Printf("\nsgen w=%d\n", w) gc.Dump("r", n) gc.Dump("res", ns) } if n.Ullman >= gc.UINF && ns.Ullman >= gc.UINF { gc.Fatal("sgen UINF") } if w < 0 { gc.Fatal("sgen copy %d", w) } // If copying .args, that's all the results, so record definition sites // for them for the liveness analysis. if ns.Op == gc.ONAME && ns.Sym.Name == ".args" { for l := gc.Curfn.Dcl; l != nil; l = l.Next { if l.N.Class == gc.PPARAMOUT { gc.Gvardef(l.N) } } } // Avoid taking the address for simple enough types. //if(componentgen(n, ns)) // return; if w == 0 { // evaluate side effects only. var dst gc.Node regalloc(&dst, gc.Types[gc.Tptr], nil) agen(res, &dst) agen(n, &dst) regfree(&dst) return } // determine alignment. // want to avoid unaligned access, so have to use // smaller operations for less aligned types. // for example moving [4]byte must use 4 MOVB not 1 MOVW. align := int(n.Type.Align) var op int switch align { default: gc.Fatal("sgen: invalid alignment %d for %v", align, gc.Tconv(n.Type, 0)) case 1: op = ppc64.AMOVBU case 2: op = ppc64.AMOVHU case 4: op = ppc64.AMOVWZU // there is no lwau, only lwaux case 8: op = ppc64.AMOVDU } if w%int64(align) != 0 { gc.Fatal("sgen: unaligned size %d (align=%d) for %v", w, align, gc.Tconv(n.Type, 0)) } c := int32(w / int64(align)) // offset on the stack osrc := int32(stkof(n)) odst := int32(stkof(res)) if osrc != -1000 && odst != -1000 && (osrc == 1000 || odst == 1000) { // osrc and odst both on stack, and at least one is in // an unknown position. Could generate code to test // for forward/backward copy, but instead just copy // to a temporary location first. var tmp gc.Node gc.Tempname(&tmp, n.Type) sgen(n, &tmp, w) sgen(&tmp, res, w) return } if osrc%int32(align) != 0 || odst%int32(align) != 0 { gc.Fatal("sgen: unaligned offset src %d or dst %d (align %d)", osrc, odst, align) } // if we are copying forward on the stack and // the src and dst overlap, then reverse direction dir := align if osrc < odst && int64(odst) < int64(osrc)+w { dir = -dir } var dst gc.Node var src gc.Node if n.Ullman >= res.Ullman { agenr(n, &dst, res) // temporarily use dst regalloc(&src, gc.Types[gc.Tptr], nil) gins(ppc64.AMOVD, &dst, &src) if res.Op == gc.ONAME { gc.Gvardef(res) } agen(res, &dst) } else { if res.Op == gc.ONAME { gc.Gvardef(res) } agenr(res, &dst, res) agenr(n, &src, nil) } var tmp gc.Node regalloc(&tmp, gc.Types[gc.Tptr], nil) // set up end marker var nend gc.Node // move src and dest to the end of block if necessary if dir < 0 { if c >= 4 { regalloc(&nend, gc.Types[gc.Tptr], nil) gins(ppc64.AMOVD, &src, &nend) } p := gins(ppc64.AADD, nil, &src) p.From.Type = obj.TYPE_CONST p.From.Offset = w p = gins(ppc64.AADD, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = w } else { p := gins(ppc64.AADD, nil, &src) p.From.Type = obj.TYPE_CONST p.From.Offset = int64(-dir) p = gins(ppc64.AADD, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = int64(-dir) if c >= 4 { regalloc(&nend, gc.Types[gc.Tptr], nil) p := gins(ppc64.AMOVD, &src, &nend) p.From.Type = obj.TYPE_ADDR p.From.Offset = w } } // move // TODO: enable duffcopy for larger copies. if c >= 4 { p := gins(op, &src, &tmp) p.From.Type = obj.TYPE_MEM p.From.Offset = int64(dir) ploop := p p = gins(op, &tmp, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(dir) p = gins(ppc64.ACMP, &src, &nend) gc.Patch(gc.Gbranch(ppc64.ABNE, nil, 0), ploop) regfree(&nend) } else { // TODO(austin): Instead of generating ADD $-8,R8; ADD // $-8,R7; n*(MOVDU 8(R8),R9; MOVDU R9,8(R7);) just // generate the offsets directly and eliminate the // ADDs. That will produce shorter, more // pipeline-able code. var p *obj.Prog for { tmp14 := c c-- if tmp14 <= 0 { break } p = gins(op, &src, &tmp) p.From.Type = obj.TYPE_MEM p.From.Offset = int64(dir) p = gins(op, &tmp, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(dir) } } regfree(&dst) regfree(&src) regfree(&tmp) }
/* * copy a composite value by moving its individual components. * Slices, strings and interfaces are supported. * Small structs or arrays with elements of basic type are * also supported. * nr is N when assigning a zero value. * return 1 if can do, 0 if can't. */ func componentgen(nr *gc.Node, nl *gc.Node) bool { var nodl gc.Node var nodr gc.Node freel := 0 freer := 0 switch nl.Type.Etype { default: goto no case gc.TARRAY: t := nl.Type // Slices are ok. if gc.Isslice(t) { break } // Small arrays are ok. if t.Bound > 0 && t.Bound <= 3 && !gc.Isfat(t.Type) { break } goto no // Small structs with non-fat types are ok. // Zero-sized structs are treated separately elsewhere. case gc.TSTRUCT: fldcount := int64(0) for t := nl.Type.Type; t != nil; t = t.Down { if gc.Isfat(t.Type) { goto no } if t.Etype != gc.TFIELD { gc.Fatal("componentgen: not a TFIELD: %v", gc.Tconv(t, obj.FmtLong)) } fldcount++ } if fldcount == 0 || fldcount > 4 { goto no } case gc.TSTRING, gc.TINTER: break } nodl = *nl if !cadable(nl) { if nr != nil && !cadable(nr) { goto no } igen(nl, &nodl, nil) freel = 1 } if nr != nil { nodr = *nr if !cadable(nr) { igen(nr, &nodr, nil) freer = 1 } } else { // When zeroing, prepare a register containing zero. var tmp gc.Node gc.Nodconst(&tmp, nl.Type, 0) regalloc(&nodr, gc.Types[gc.TUINT], nil) gmove(&tmp, &nodr) freer = 1 } // nl and nr are 'cadable' which basically means they are names (variables) now. // If they are the same variable, don't generate any code, because the // VARDEF we generate will mark the old value as dead incorrectly. // (And also the assignments are useless.) if nr != nil && nl.Op == gc.ONAME && nr.Op == gc.ONAME && nl == nr { goto yes } switch nl.Type.Etype { // componentgen for arrays. case gc.TARRAY: if nl.Op == gc.ONAME { gc.Gvardef(nl) } t := nl.Type if !gc.Isslice(t) { nodl.Type = t.Type nodr.Type = nodl.Type for fldcount := int64(0); fldcount < t.Bound; fldcount++ { if nr == nil { gc.Clearslim(&nodl) } else { gmove(&nodr, &nodl) } nodl.Xoffset += t.Type.Width nodr.Xoffset += t.Type.Width } goto yes } // componentgen for slices. nodl.Xoffset += int64(gc.Array_array) nodl.Type = gc.Ptrto(nl.Type.Type) if nr != nil { nodr.Xoffset += int64(gc.Array_array) nodr.Type = nodl.Type } gmove(&nodr, &nodl) nodl.Xoffset += int64(gc.Array_nel) - int64(gc.Array_array) nodl.Type = gc.Types[gc.Simtype[gc.TUINT]] if nr != nil { nodr.Xoffset += int64(gc.Array_nel) - int64(gc.Array_array) nodr.Type = nodl.Type } gmove(&nodr, &nodl) nodl.Xoffset += int64(gc.Array_cap) - int64(gc.Array_nel) nodl.Type = gc.Types[gc.Simtype[gc.TUINT]] if nr != nil { nodr.Xoffset += int64(gc.Array_cap) - int64(gc.Array_nel) nodr.Type = nodl.Type } gmove(&nodr, &nodl) goto yes case gc.TSTRING: if nl.Op == gc.ONAME { gc.Gvardef(nl) } nodl.Xoffset += int64(gc.Array_array) nodl.Type = gc.Ptrto(gc.Types[gc.TUINT8]) if nr != nil { nodr.Xoffset += int64(gc.Array_array) nodr.Type = nodl.Type } gmove(&nodr, &nodl) nodl.Xoffset += int64(gc.Array_nel) - int64(gc.Array_array) nodl.Type = gc.Types[gc.Simtype[gc.TUINT]] if nr != nil { nodr.Xoffset += int64(gc.Array_nel) - int64(gc.Array_array) nodr.Type = nodl.Type } gmove(&nodr, &nodl) goto yes case gc.TINTER: if nl.Op == gc.ONAME { gc.Gvardef(nl) } nodl.Xoffset += int64(gc.Array_array) nodl.Type = gc.Ptrto(gc.Types[gc.TUINT8]) if nr != nil { nodr.Xoffset += int64(gc.Array_array) nodr.Type = nodl.Type } gmove(&nodr, &nodl) nodl.Xoffset += int64(gc.Array_nel) - int64(gc.Array_array) nodl.Type = gc.Ptrto(gc.Types[gc.TUINT8]) if nr != nil { nodr.Xoffset += int64(gc.Array_nel) - int64(gc.Array_array) nodr.Type = nodl.Type } gmove(&nodr, &nodl) goto yes case gc.TSTRUCT: if nl.Op == gc.ONAME { gc.Gvardef(nl) } loffset := nodl.Xoffset roffset := nodr.Xoffset // funarg structs may not begin at offset zero. if nl.Type.Etype == gc.TSTRUCT && nl.Type.Funarg != 0 && nl.Type.Type != nil { loffset -= nl.Type.Type.Width } if nr != nil && nr.Type.Etype == gc.TSTRUCT && nr.Type.Funarg != 0 && nr.Type.Type != nil { roffset -= nr.Type.Type.Width } for t := nl.Type.Type; t != nil; t = t.Down { nodl.Xoffset = loffset + t.Width nodl.Type = t.Type if nr == nil { gc.Clearslim(&nodl) } else { nodr.Xoffset = roffset + t.Width nodr.Type = nodl.Type gmove(&nodr, &nodl) } } goto yes } no: if freer != 0 { regfree(&nodr) } if freel != 0 { regfree(&nodl) } return false yes: if freer != 0 { regfree(&nodr) } if freel != 0 { regfree(&nodl) } return true }
func blockcopy(n, res *gc.Node, osrc, odst, w int64) { // determine alignment. // want to avoid unaligned access, so have to use // smaller operations for less aligned types. // for example moving [4]byte must use 4 MOVB not 1 MOVW. align := int(n.Type.Align) var op int switch align { default: gc.Fatal("sgen: invalid alignment %d for %v", align, n.Type) case 1: op = ppc64.AMOVBU case 2: op = ppc64.AMOVHU case 4: op = ppc64.AMOVWZU // there is no lwau, only lwaux case 8: op = ppc64.AMOVDU } if w%int64(align) != 0 { gc.Fatal("sgen: unaligned size %d (align=%d) for %v", w, align, n.Type) } c := int32(w / int64(align)) // if we are copying forward on the stack and // the src and dst overlap, then reverse direction dir := align if osrc < odst && int64(odst) < int64(osrc)+w { dir = -dir } var dst gc.Node var src gc.Node if n.Ullman >= res.Ullman { gc.Agenr(n, &dst, res) // temporarily use dst gc.Regalloc(&src, gc.Types[gc.Tptr], nil) gins(ppc64.AMOVD, &dst, &src) if res.Op == gc.ONAME { gc.Gvardef(res) } gc.Agen(res, &dst) } else { if res.Op == gc.ONAME { gc.Gvardef(res) } gc.Agenr(res, &dst, res) gc.Agenr(n, &src, nil) } var tmp gc.Node gc.Regalloc(&tmp, gc.Types[gc.Tptr], nil) // set up end marker var nend gc.Node // move src and dest to the end of block if necessary if dir < 0 { if c >= 4 { gc.Regalloc(&nend, gc.Types[gc.Tptr], nil) gins(ppc64.AMOVD, &src, &nend) } p := gins(ppc64.AADD, nil, &src) p.From.Type = obj.TYPE_CONST p.From.Offset = w p = gins(ppc64.AADD, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = w } else { p := gins(ppc64.AADD, nil, &src) p.From.Type = obj.TYPE_CONST p.From.Offset = int64(-dir) p = gins(ppc64.AADD, nil, &dst) p.From.Type = obj.TYPE_CONST p.From.Offset = int64(-dir) if c >= 4 { gc.Regalloc(&nend, gc.Types[gc.Tptr], nil) p := gins(ppc64.AMOVD, &src, &nend) p.From.Type = obj.TYPE_ADDR p.From.Offset = w } } // move // TODO: enable duffcopy for larger copies. if c >= 4 { p := gins(op, &src, &tmp) p.From.Type = obj.TYPE_MEM p.From.Offset = int64(dir) ploop := p p = gins(op, &tmp, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(dir) p = gins(ppc64.ACMP, &src, &nend) gc.Patch(gc.Gbranch(ppc64.ABNE, nil, 0), ploop) gc.Regfree(&nend) } else { // TODO(austin): Instead of generating ADD $-8,R8; ADD // $-8,R7; n*(MOVDU 8(R8),R9; MOVDU R9,8(R7);) just // generate the offsets directly and eliminate the // ADDs. That will produce shorter, more // pipeline-able code. var p *obj.Prog for { tmp14 := c c-- if tmp14 <= 0 { break } p = gins(op, &src, &tmp) p.From.Type = obj.TYPE_MEM p.From.Offset = int64(dir) p = gins(op, &tmp, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(dir) } } gc.Regfree(&dst) gc.Regfree(&src) gc.Regfree(&tmp) }
func stackcopy(n, res *gc.Node, osrc, odst, w int64) { var dst gc.Node gc.Nodreg(&dst, gc.Types[gc.Tptr], x86.REG_DI) var src gc.Node gc.Nodreg(&src, gc.Types[gc.Tptr], x86.REG_SI) var tsrc gc.Node gc.Tempname(&tsrc, gc.Types[gc.Tptr]) var tdst gc.Node gc.Tempname(&tdst, gc.Types[gc.Tptr]) if n.Addable == 0 { gc.Agen(n, &tsrc) } if res.Addable == 0 { gc.Agen(res, &tdst) } if n.Addable != 0 { gc.Agen(n, &src) } else { gmove(&tsrc, &src) } if res.Op == gc.ONAME { gc.Gvardef(res) } if res.Addable != 0 { gc.Agen(res, &dst) } else { gmove(&tdst, &dst) } c := int32(w % 4) // bytes q := int32(w / 4) // doublewords // if we are copying forward on the stack and // the src and dst overlap, then reverse direction if osrc < odst && int64(odst) < int64(osrc)+w { // reverse direction gins(x86.ASTD, nil, nil) // set direction flag if c > 0 { gconreg(x86.AADDL, w-1, x86.REG_SI) gconreg(x86.AADDL, w-1, x86.REG_DI) gconreg(x86.AMOVL, int64(c), x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.AMOVSB, nil, nil) // MOVB *(SI)-,*(DI)- } if q > 0 { if c > 0 { gconreg(x86.AADDL, -3, x86.REG_SI) gconreg(x86.AADDL, -3, x86.REG_DI) } else { gconreg(x86.AADDL, w-4, x86.REG_SI) gconreg(x86.AADDL, w-4, x86.REG_DI) } gconreg(x86.AMOVL, int64(q), x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.AMOVSL, nil, nil) // MOVL *(SI)-,*(DI)- } // we leave with the flag clear gins(x86.ACLD, nil, nil) } else { gins(x86.ACLD, nil, nil) // paranoia. TODO(rsc): remove? // normal direction if q > 128 || (q >= 4 && gc.Nacl) { gconreg(x86.AMOVL, int64(q), x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.AMOVSL, nil, nil) // MOVL *(SI)+,*(DI)+ } else if q >= 4 { p := gins(obj.ADUFFCOPY, nil, nil) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) // 10 and 128 = magic constants: see ../../runtime/asm_386.s p.To.Offset = 10 * (128 - int64(q)) } else if !gc.Nacl && c == 0 { var cx gc.Node gc.Nodreg(&cx, gc.Types[gc.TINT32], x86.REG_CX) // We don't need the MOVSL side-effect of updating SI and DI, // and issuing a sequence of MOVLs directly is faster. src.Op = gc.OINDREG dst.Op = gc.OINDREG for q > 0 { gmove(&src, &cx) // MOVL x+(SI),CX gmove(&cx, &dst) // MOVL CX,x+(DI) src.Xoffset += 4 dst.Xoffset += 4 q-- } } else { for q > 0 { gins(x86.AMOVSL, nil, nil) // MOVL *(SI)+,*(DI)+ q-- } } for c > 0 { gins(x86.AMOVSB, nil, nil) // MOVB *(SI)+,*(DI)+ c-- } } }
func blockcopy(n, res *gc.Node, osrc, odst, w int64) { // determine alignment. // want to avoid unaligned access, so have to use // smaller operations for less aligned types. // for example moving [4]byte must use 4 MOVB not 1 MOVW. align := int(n.Type.Align) var op int switch align { default: gc.Fatal("sgen: invalid alignment %d for %v", align, n.Type) case 1: op = arm.AMOVB case 2: op = arm.AMOVH case 4: op = arm.AMOVW } if w%int64(align) != 0 { gc.Fatal("sgen: unaligned size %d (align=%d) for %v", w, align, n.Type) } c := int32(w / int64(align)) if osrc%int64(align) != 0 || odst%int64(align) != 0 { gc.Fatal("sgen: unaligned offset src %d or dst %d (align %d)", osrc, odst, align) } // if we are copying forward on the stack and // the src and dst overlap, then reverse direction dir := align if osrc < odst && int64(odst) < int64(osrc)+w { dir = -dir } if op == arm.AMOVW && !gc.Nacl && dir > 0 && c >= 4 && c <= 128 { var r0 gc.Node r0.Op = gc.OREGISTER r0.Reg = arm.REG_R0 var r1 gc.Node r1.Op = gc.OREGISTER r1.Reg = arm.REG_R0 + 1 var r2 gc.Node r2.Op = gc.OREGISTER r2.Reg = arm.REG_R0 + 2 var src gc.Node gc.Regalloc(&src, gc.Types[gc.Tptr], &r1) var dst gc.Node gc.Regalloc(&dst, gc.Types[gc.Tptr], &r2) if n.Ullman >= res.Ullman { // eval n first gc.Agen(n, &src) if res.Op == gc.ONAME { gc.Gvardef(res) } gc.Agen(res, &dst) } else { // eval res first if res.Op == gc.ONAME { gc.Gvardef(res) } gc.Agen(res, &dst) gc.Agen(n, &src) } var tmp gc.Node gc.Regalloc(&tmp, gc.Types[gc.Tptr], &r0) f := gc.Sysfunc("duffcopy") p := gins(obj.ADUFFCOPY, nil, f) gc.Afunclit(&p.To, f) // 8 and 128 = magic constants: see ../../runtime/asm_arm.s p.To.Offset = 8 * (128 - int64(c)) gc.Regfree(&tmp) gc.Regfree(&src) gc.Regfree(&dst) return } var dst gc.Node var src gc.Node if n.Ullman >= res.Ullman { gc.Agenr(n, &dst, res) // temporarily use dst gc.Regalloc(&src, gc.Types[gc.Tptr], nil) gins(arm.AMOVW, &dst, &src) if res.Op == gc.ONAME { gc.Gvardef(res) } gc.Agen(res, &dst) } else { if res.Op == gc.ONAME { gc.Gvardef(res) } gc.Agenr(res, &dst, res) gc.Agenr(n, &src, nil) } var tmp gc.Node gc.Regalloc(&tmp, gc.Types[gc.TUINT32], nil) // set up end marker var nend gc.Node if c >= 4 { gc.Regalloc(&nend, gc.Types[gc.TUINT32], nil) p := gins(arm.AMOVW, &src, &nend) p.From.Type = obj.TYPE_ADDR if dir < 0 { p.From.Offset = int64(dir) } else { p.From.Offset = w } } // move src and dest to the end of block if necessary if dir < 0 { p := gins(arm.AMOVW, &src, &src) p.From.Type = obj.TYPE_ADDR p.From.Offset = w + int64(dir) p = gins(arm.AMOVW, &dst, &dst) p.From.Type = obj.TYPE_ADDR p.From.Offset = w + int64(dir) } // move if c >= 4 { p := gins(op, &src, &tmp) p.From.Type = obj.TYPE_MEM p.From.Offset = int64(dir) p.Scond |= arm.C_PBIT ploop := p p = gins(op, &tmp, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(dir) p.Scond |= arm.C_PBIT p = gins(arm.ACMP, &src, nil) raddr(&nend, p) gc.Patch(gc.Gbranch(arm.ABNE, nil, 0), ploop) gc.Regfree(&nend) } else { var p *obj.Prog for { tmp14 := c c-- if tmp14 <= 0 { break } p = gins(op, &src, &tmp) p.From.Type = obj.TYPE_MEM p.From.Offset = int64(dir) p.Scond |= arm.C_PBIT p = gins(op, &tmp, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(dir) p.Scond |= arm.C_PBIT } } gc.Regfree(&dst) gc.Regfree(&src) gc.Regfree(&tmp) }
/* * block copy: * memmove(&ns, &n, w); */ func sgen(n *gc.Node, ns *gc.Node, w int64) { if gc.Debug['g'] != 0 { fmt.Printf("\nsgen w=%d\n", w) gc.Dump("r", n) gc.Dump("res", ns) } if n.Ullman >= gc.UINF && ns.Ullman >= gc.UINF { gc.Fatal("sgen UINF") } if w < 0 { gc.Fatal("sgen copy %d", w) } // If copying .args, that's all the results, so record definition sites // for them for the liveness analysis. if ns.Op == gc.ONAME && ns.Sym.Name == ".args" { for l := gc.Curfn.Dcl; l != nil; l = l.Next { if l.N.Class == gc.PPARAMOUT { gc.Gvardef(l.N) } } } // Avoid taking the address for simple enough types. if componentgen(n, ns) { return } if w == 0 { // evaluate side effects only var nodr gc.Node regalloc(&nodr, gc.Types[gc.Tptr], nil) agen(ns, &nodr) agen(n, &nodr) regfree(&nodr) return } // offset on the stack osrc := stkof(n) odst := stkof(ns) if osrc != -1000 && odst != -1000 && (osrc == 1000 || odst == 1000) { // osrc and odst both on stack, and at least one is in // an unknown position. Could generate code to test // for forward/backward copy, but instead just copy // to a temporary location first. var tmp gc.Node gc.Tempname(&tmp, n.Type) sgen(n, &tmp, w) sgen(&tmp, ns, w) return } var noddi gc.Node gc.Nodreg(&noddi, gc.Types[gc.Tptr], x86.REG_DI) var nodsi gc.Node gc.Nodreg(&nodsi, gc.Types[gc.Tptr], x86.REG_SI) var nodl gc.Node var nodr gc.Node if n.Ullman >= ns.Ullman { agenr(n, &nodr, &nodsi) if ns.Op == gc.ONAME { gc.Gvardef(ns) } agenr(ns, &nodl, &noddi) } else { if ns.Op == gc.ONAME { gc.Gvardef(ns) } agenr(ns, &nodl, &noddi) agenr(n, &nodr, &nodsi) } if nodl.Val.U.Reg != x86.REG_DI { gmove(&nodl, &noddi) } if nodr.Val.U.Reg != x86.REG_SI { gmove(&nodr, &nodsi) } regfree(&nodl) regfree(&nodr) c := w % 8 // bytes q := w / 8 // quads var oldcx gc.Node var cx gc.Node savex(x86.REG_CX, &cx, &oldcx, nil, gc.Types[gc.TINT64]) // if we are copying forward on the stack and // the src and dst overlap, then reverse direction if osrc < odst && odst < osrc+w { // reverse direction gins(x86.ASTD, nil, nil) // set direction flag if c > 0 { gconreg(addptr, w-1, x86.REG_SI) gconreg(addptr, w-1, x86.REG_DI) gconreg(movptr, c, x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.AMOVSB, nil, nil) // MOVB *(SI)-,*(DI)- } if q > 0 { if c > 0 { gconreg(addptr, -7, x86.REG_SI) gconreg(addptr, -7, x86.REG_DI) } else { gconreg(addptr, w-8, x86.REG_SI) gconreg(addptr, w-8, x86.REG_DI) } gconreg(movptr, q, x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)-,*(DI)- } // we leave with the flag clear gins(x86.ACLD, nil, nil) } else { // normal direction if q > 128 || (gc.Nacl && q >= 4) { gconreg(movptr, q, x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+ } else if q >= 4 { p := gins(obj.ADUFFCOPY, nil, nil) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) // 14 and 128 = magic constants: see ../../runtime/asm_amd64.s p.To.Offset = 14 * (128 - q) } else if !gc.Nacl && c == 0 { // We don't need the MOVSQ side-effect of updating SI and DI, // and issuing a sequence of MOVQs directly is faster. nodsi.Op = gc.OINDREG noddi.Op = gc.OINDREG for q > 0 { gmove(&nodsi, &cx) // MOVQ x+(SI),CX gmove(&cx, &noddi) // MOVQ CX,x+(DI) nodsi.Xoffset += 8 noddi.Xoffset += 8 q-- } } else { for q > 0 { gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+ q-- } } // copy the remaining c bytes if w < 4 || c <= 1 || (odst < osrc && osrc < odst+w) { for c > 0 { gins(x86.AMOVSB, nil, nil) // MOVB *(SI)+,*(DI)+ c-- } } else if w < 8 || c <= 4 { nodsi.Op = gc.OINDREG noddi.Op = gc.OINDREG cx.Type = gc.Types[gc.TINT32] nodsi.Type = gc.Types[gc.TINT32] noddi.Type = gc.Types[gc.TINT32] if c > 4 { nodsi.Xoffset = 0 noddi.Xoffset = 0 gmove(&nodsi, &cx) gmove(&cx, &noddi) } nodsi.Xoffset = c - 4 noddi.Xoffset = c - 4 gmove(&nodsi, &cx) gmove(&cx, &noddi) } else { nodsi.Op = gc.OINDREG noddi.Op = gc.OINDREG cx.Type = gc.Types[gc.TINT64] nodsi.Type = gc.Types[gc.TINT64] noddi.Type = gc.Types[gc.TINT64] nodsi.Xoffset = c - 8 noddi.Xoffset = c - 8 gmove(&nodsi, &cx) gmove(&cx, &noddi) } } restx(&cx, &oldcx) }
/* * block copy: * memmove(&res, &n, w); * NB: character copy assumed little endian architecture */ func sgen(n *gc.Node, res *gc.Node, w int64) { if gc.Debug['g'] != 0 { fmt.Printf("\nsgen w=%d\n", w) gc.Dump("r", n) gc.Dump("res", res) } if n.Ullman >= gc.UINF && res.Ullman >= gc.UINF { gc.Fatal("sgen UINF") } if w < 0 || int64(int32(w)) != w { gc.Fatal("sgen copy %d", w) } if n.Type == nil { gc.Fatal("sgen: missing type") } if w == 0 { // evaluate side effects only. var dst gc.Node regalloc(&dst, gc.Types[gc.Tptr], nil) agen(res, &dst) agen(n, &dst) regfree(&dst) return } // If copying .args, that's all the results, so record definition sites // for them for the liveness analysis. if res.Op == gc.ONAME && res.Sym.Name == ".args" { for l := gc.Curfn.Dcl; l != nil; l = l.Next { if l.N.Class == gc.PPARAMOUT { gc.Gvardef(l.N) } } } // Avoid taking the address for simple enough types. if componentgen(n, res) { return } // determine alignment. // want to avoid unaligned access, so have to use // smaller operations for less aligned types. // for example moving [4]byte must use 4 MOVB not 1 MOVW. align := int(n.Type.Align) var op int switch align { default: gc.Fatal("sgen: invalid alignment %d for %v", align, gc.Tconv(n.Type, 0)) case 1: op = arm.AMOVB case 2: op = arm.AMOVH case 4: op = arm.AMOVW } if w%int64(align) != 0 { gc.Fatal("sgen: unaligned size %d (align=%d) for %v", w, align, gc.Tconv(n.Type, 0)) } c := int32(w / int64(align)) // offset on the stack osrc := stkof(n) odst := stkof(res) if osrc != -1000 && odst != -1000 && (osrc == 1000 || odst == 1000) { // osrc and odst both on stack, and at least one is in // an unknown position. Could generate code to test // for forward/backward copy, but instead just copy // to a temporary location first. var tmp gc.Node gc.Tempname(&tmp, n.Type) sgen(n, &tmp, w) sgen(&tmp, res, w) return } if osrc%int32(align) != 0 || odst%int32(align) != 0 { gc.Fatal("sgen: unaligned offset src %d or dst %d (align %d)", osrc, odst, align) } // if we are copying forward on the stack and // the src and dst overlap, then reverse direction dir := align if osrc < odst && int64(odst) < int64(osrc)+w { dir = -dir } if op == arm.AMOVW && !gc.Nacl && dir > 0 && c >= 4 && c <= 128 { var r0 gc.Node r0.Op = gc.OREGISTER r0.Val.U.Reg = REGALLOC_R0 var r1 gc.Node r1.Op = gc.OREGISTER r1.Val.U.Reg = REGALLOC_R0 + 1 var r2 gc.Node r2.Op = gc.OREGISTER r2.Val.U.Reg = REGALLOC_R0 + 2 var src gc.Node regalloc(&src, gc.Types[gc.Tptr], &r1) var dst gc.Node regalloc(&dst, gc.Types[gc.Tptr], &r2) if n.Ullman >= res.Ullman { // eval n first agen(n, &src) if res.Op == gc.ONAME { gc.Gvardef(res) } agen(res, &dst) } else { // eval res first if res.Op == gc.ONAME { gc.Gvardef(res) } agen(res, &dst) agen(n, &src) } var tmp gc.Node regalloc(&tmp, gc.Types[gc.Tptr], &r0) f := gc.Sysfunc("duffcopy") p := gins(obj.ADUFFCOPY, nil, f) gc.Afunclit(&p.To, f) // 8 and 128 = magic constants: see ../../runtime/asm_arm.s p.To.Offset = 8 * (128 - int64(c)) regfree(&tmp) regfree(&src) regfree(&dst) return } var dst gc.Node var src gc.Node if n.Ullman >= res.Ullman { agenr(n, &dst, res) // temporarily use dst regalloc(&src, gc.Types[gc.Tptr], nil) gins(arm.AMOVW, &dst, &src) if res.Op == gc.ONAME { gc.Gvardef(res) } agen(res, &dst) } else { if res.Op == gc.ONAME { gc.Gvardef(res) } agenr(res, &dst, res) agenr(n, &src, nil) } var tmp gc.Node regalloc(&tmp, gc.Types[gc.TUINT32], nil) // set up end marker var nend gc.Node if c >= 4 { regalloc(&nend, gc.Types[gc.TUINT32], nil) p := gins(arm.AMOVW, &src, &nend) p.From.Type = obj.TYPE_ADDR if dir < 0 { p.From.Offset = int64(dir) } else { p.From.Offset = w } } // move src and dest to the end of block if necessary if dir < 0 { p := gins(arm.AMOVW, &src, &src) p.From.Type = obj.TYPE_ADDR p.From.Offset = w + int64(dir) p = gins(arm.AMOVW, &dst, &dst) p.From.Type = obj.TYPE_ADDR p.From.Offset = w + int64(dir) } // move if c >= 4 { p := gins(op, &src, &tmp) p.From.Type = obj.TYPE_MEM p.From.Offset = int64(dir) p.Scond |= arm.C_PBIT ploop := p p = gins(op, &tmp, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(dir) p.Scond |= arm.C_PBIT p = gins(arm.ACMP, &src, nil) raddr(&nend, p) gc.Patch(gc.Gbranch(arm.ABNE, nil, 0), ploop) regfree(&nend) } else { var p *obj.Prog for { tmp14 := c c-- if tmp14 <= 0 { break } p = gins(op, &src, &tmp) p.From.Type = obj.TYPE_MEM p.From.Offset = int64(dir) p.Scond |= arm.C_PBIT p = gins(op, &tmp, &dst) p.To.Type = obj.TYPE_MEM p.To.Offset = int64(dir) p.Scond |= arm.C_PBIT } } regfree(&dst) regfree(&src) regfree(&tmp) }
/* * address gen * res = &n; * The generated code checks that the result is not nil. */ func agen(n *gc.Node, res *gc.Node) { if gc.Debug['g'] != 0 { gc.Dump("\nagen-res", res) gc.Dump("agen-r", n) } if n == nil || n.Type == nil || res == nil || res.Type == nil { gc.Fatal("agen") } for n.Op == gc.OCONVNOP { n = n.Left } if gc.Isconst(n, gc.CTNIL) && n.Type.Width > int64(gc.Widthptr) { // Use of a nil interface or nil slice. // Create a temporary we can take the address of and read. // The generated code is just going to panic, so it need not // be terribly efficient. See issue 3670. var n1 gc.Node gc.Tempname(&n1, n.Type) gc.Gvardef(&n1) clearfat(&n1) var n2 gc.Node regalloc(&n2, gc.Types[gc.Tptr], res) gins(x86.ALEAL, &n1, &n2) gmove(&n2, res) regfree(&n2) return } // addressable var is easy if n.Addable != 0 { if n.Op == gc.OREGISTER { gc.Fatal("agen OREGISTER") } var n1 gc.Node regalloc(&n1, gc.Types[gc.Tptr], res) gins(x86.ALEAL, n, &n1) gmove(&n1, res) regfree(&n1) return } // let's compute nl := n.Left nr := n.Right switch n.Op { default: gc.Fatal("agen %v", gc.Oconv(int(n.Op), 0)) case gc.OCALLMETH: gc.Cgen_callmeth(n, 0) cgen_aret(n, res) case gc.OCALLINTER: cgen_callinter(n, res, 0) cgen_aret(n, res) case gc.OCALLFUNC: cgen_call(n, 0) cgen_aret(n, res) case gc.OSLICE, gc.OSLICEARR, gc.OSLICESTR, gc.OSLICE3, gc.OSLICE3ARR: var n1 gc.Node gc.Tempname(&n1, n.Type) gc.Cgen_slice(n, &n1) agen(&n1, res) case gc.OEFACE: var n1 gc.Node gc.Tempname(&n1, n.Type) gc.Cgen_eface(n, &n1) agen(&n1, res) case gc.OINDEX: var p2 *obj.Prog // to be patched to panicindex. w := uint32(n.Type.Width) bounded := gc.Debug['B'] != 0 || n.Bounded var n3 gc.Node var tmp gc.Node var n1 gc.Node if nr.Addable != 0 { // Generate &nl first, and move nr into register. if !gc.Isconst(nl, gc.CTSTR) { igen(nl, &n3, res) } if !gc.Isconst(nr, gc.CTINT) { p2 = igenindex(nr, &tmp, bool2int(bounded)) regalloc(&n1, tmp.Type, nil) gmove(&tmp, &n1) } } else if nl.Addable != 0 { // Generate nr first, and move &nl into register. if !gc.Isconst(nr, gc.CTINT) { p2 = igenindex(nr, &tmp, bool2int(bounded)) regalloc(&n1, tmp.Type, nil) gmove(&tmp, &n1) } if !gc.Isconst(nl, gc.CTSTR) { igen(nl, &n3, res) } } else { p2 = igenindex(nr, &tmp, bool2int(bounded)) nr = &tmp if !gc.Isconst(nl, gc.CTSTR) { igen(nl, &n3, res) } regalloc(&n1, tmp.Type, nil) gins(optoas(gc.OAS, tmp.Type), &tmp, &n1) } // For fixed array we really want the pointer in n3. var n2 gc.Node if gc.Isfixedarray(nl.Type) { regalloc(&n2, gc.Types[gc.Tptr], &n3) agen(&n3, &n2) regfree(&n3) n3 = n2 } // &a[0] is in n3 (allocated in res) // i is in n1 (if not constant) // len(a) is in nlen (if needed) // w is width // constant index if gc.Isconst(nr, gc.CTINT) { if gc.Isconst(nl, gc.CTSTR) { gc.Fatal("constant string constant index") // front end should handle } v := uint64(gc.Mpgetfix(nr.Val.U.Xval)) if gc.Isslice(nl.Type) || nl.Type.Etype == gc.TSTRING { if gc.Debug['B'] == 0 && !n.Bounded { nlen := n3 nlen.Type = gc.Types[gc.TUINT32] nlen.Xoffset += int64(gc.Array_nel) gc.Nodconst(&n2, gc.Types[gc.TUINT32], int64(v)) gins(optoas(gc.OCMP, gc.Types[gc.TUINT32]), &nlen, &n2) p1 := gc.Gbranch(optoas(gc.OGT, gc.Types[gc.TUINT32]), nil, +1) ginscall(gc.Panicindex, -1) gc.Patch(p1, gc.Pc) } } // Load base pointer in n2 = n3. regalloc(&n2, gc.Types[gc.Tptr], &n3) n3.Type = gc.Types[gc.Tptr] n3.Xoffset += int64(gc.Array_array) gmove(&n3, &n2) regfree(&n3) if v*uint64(w) != 0 { gc.Nodconst(&n1, gc.Types[gc.Tptr], int64(v*uint64(w))) gins(optoas(gc.OADD, gc.Types[gc.Tptr]), &n1, &n2) } gmove(&n2, res) regfree(&n2) break } // i is in register n1, extend to 32 bits. t := gc.Types[gc.TUINT32] if gc.Issigned[n1.Type.Etype] { t = gc.Types[gc.TINT32] } regalloc(&n2, t, &n1) // i gmove(&n1, &n2) regfree(&n1) if gc.Debug['B'] == 0 && !n.Bounded { // check bounds t := gc.Types[gc.TUINT32] var nlen gc.Node if gc.Isconst(nl, gc.CTSTR) { gc.Nodconst(&nlen, t, int64(len(nl.Val.U.Sval))) } else if gc.Isslice(nl.Type) || nl.Type.Etype == gc.TSTRING { nlen = n3 nlen.Type = t nlen.Xoffset += int64(gc.Array_nel) } else { gc.Nodconst(&nlen, t, nl.Type.Bound) } gins(optoas(gc.OCMP, t), &n2, &nlen) p1 := gc.Gbranch(optoas(gc.OLT, t), nil, +1) if p2 != nil { gc.Patch(p2, gc.Pc) } ginscall(gc.Panicindex, -1) gc.Patch(p1, gc.Pc) } if gc.Isconst(nl, gc.CTSTR) { regalloc(&n3, gc.Types[gc.Tptr], res) p1 := gins(x86.ALEAL, nil, &n3) gc.Datastring(nl.Val.U.Sval, &p1.From) p1.From.Scale = 1 p1.From.Index = n2.Val.U.Reg goto indexdone } // Load base pointer in n3. regalloc(&tmp, gc.Types[gc.Tptr], &n3) if gc.Isslice(nl.Type) || nl.Type.Etype == gc.TSTRING { n3.Type = gc.Types[gc.Tptr] n3.Xoffset += int64(gc.Array_array) gmove(&n3, &tmp) } regfree(&n3) n3 = tmp if w == 0 { } else // nothing to do if w == 1 || w == 2 || w == 4 || w == 8 { // LEAL (n3)(n2*w), n3 p1 := gins(x86.ALEAL, &n2, &n3) p1.From.Scale = int16(w) p1.From.Type = obj.TYPE_MEM p1.From.Index = p1.From.Reg p1.From.Reg = p1.To.Reg } else { gc.Nodconst(&tmp, gc.Types[gc.TUINT32], int64(w)) gins(optoas(gc.OMUL, gc.Types[gc.TUINT32]), &tmp, &n2) gins(optoas(gc.OADD, gc.Types[gc.Tptr]), &n2, &n3) } indexdone: gmove(&n3, res) regfree(&n2) regfree(&n3) // should only get here with names in this func. case gc.ONAME: if n.Funcdepth > 0 && n.Funcdepth != gc.Funcdepth { gc.Dump("bad agen", n) gc.Fatal("agen: bad ONAME funcdepth %d != %d", n.Funcdepth, gc.Funcdepth) } // should only get here for heap vars or paramref if n.Class&gc.PHEAP == 0 && n.Class != gc.PPARAMREF { gc.Dump("bad agen", n) gc.Fatal("agen: bad ONAME class %#x", n.Class) } cgen(n.Heapaddr, res) if n.Xoffset != 0 { var n1 gc.Node gc.Nodconst(&n1, gc.Types[gc.Tptr], n.Xoffset) gins(optoas(gc.OADD, gc.Types[gc.Tptr]), &n1, res) } case gc.OIND: cgen(nl, res) gc.Cgen_checknil(res) case gc.ODOT: agen(nl, res) if n.Xoffset != 0 { var n1 gc.Node gc.Nodconst(&n1, gc.Types[gc.Tptr], n.Xoffset) gins(optoas(gc.OADD, gc.Types[gc.Tptr]), &n1, res) } case gc.ODOTPTR: t := nl.Type if !gc.Isptr[t.Etype] { gc.Fatal("agen: not ptr %v", gc.Nconv(n, 0)) } cgen(nl, res) gc.Cgen_checknil(res) if n.Xoffset != 0 { var n1 gc.Node gc.Nodconst(&n1, gc.Types[gc.Tptr], n.Xoffset) gins(optoas(gc.OADD, gc.Types[gc.Tptr]), &n1, res) } } }
/* * struct gen * memmove(&res, &n, w); */ func sgen(n *gc.Node, res *gc.Node, w int64) { if gc.Debug['g'] != 0 { fmt.Printf("\nsgen w=%d\n", w) gc.Dump("r", n) gc.Dump("res", res) } if n.Ullman >= gc.UINF && res.Ullman >= gc.UINF { gc.Fatal("sgen UINF") } if w < 0 || int64(int32(w)) != w { gc.Fatal("sgen copy %d", w) } if w == 0 { // evaluate side effects only. var tdst gc.Node gc.Tempname(&tdst, gc.Types[gc.Tptr]) agen(res, &tdst) agen(n, &tdst) return } // If copying .args, that's all the results, so record definition sites // for them for the liveness analysis. if res.Op == gc.ONAME && res.Sym.Name == ".args" { for l := gc.Curfn.Dcl; l != nil; l = l.Next { if l.N.Class == gc.PPARAMOUT { gc.Gvardef(l.N) } } } // Avoid taking the address for simple enough types. if componentgen(n, res) { return } // offset on the stack osrc := stkof(n) odst := stkof(res) if osrc != -1000 && odst != -1000 && (osrc == 1000 || odst == 1000) { // osrc and odst both on stack, and at least one is in // an unknown position. Could generate code to test // for forward/backward copy, but instead just copy // to a temporary location first. var tsrc gc.Node gc.Tempname(&tsrc, n.Type) sgen(n, &tsrc, w) sgen(&tsrc, res, w) return } var dst gc.Node gc.Nodreg(&dst, gc.Types[gc.Tptr], x86.REG_DI) var src gc.Node gc.Nodreg(&src, gc.Types[gc.Tptr], x86.REG_SI) var tsrc gc.Node gc.Tempname(&tsrc, gc.Types[gc.Tptr]) var tdst gc.Node gc.Tempname(&tdst, gc.Types[gc.Tptr]) if n.Addable == 0 { agen(n, &tsrc) } if res.Addable == 0 { agen(res, &tdst) } if n.Addable != 0 { agen(n, &src) } else { gmove(&tsrc, &src) } if res.Op == gc.ONAME { gc.Gvardef(res) } if res.Addable != 0 { agen(res, &dst) } else { gmove(&tdst, &dst) } c := int32(w % 4) // bytes q := int32(w / 4) // doublewords // if we are copying forward on the stack and // the src and dst overlap, then reverse direction if osrc < odst && int64(odst) < int64(osrc)+w { // reverse direction gins(x86.ASTD, nil, nil) // set direction flag if c > 0 { gconreg(x86.AADDL, w-1, x86.REG_SI) gconreg(x86.AADDL, w-1, x86.REG_DI) gconreg(x86.AMOVL, int64(c), x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.AMOVSB, nil, nil) // MOVB *(SI)-,*(DI)- } if q > 0 { if c > 0 { gconreg(x86.AADDL, -3, x86.REG_SI) gconreg(x86.AADDL, -3, x86.REG_DI) } else { gconreg(x86.AADDL, w-4, x86.REG_SI) gconreg(x86.AADDL, w-4, x86.REG_DI) } gconreg(x86.AMOVL, int64(q), x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.AMOVSL, nil, nil) // MOVL *(SI)-,*(DI)- } // we leave with the flag clear gins(x86.ACLD, nil, nil) } else { gins(x86.ACLD, nil, nil) // paranoia. TODO(rsc): remove? // normal direction if q > 128 || (q >= 4 && gc.Nacl) { gconreg(x86.AMOVL, int64(q), x86.REG_CX) gins(x86.AREP, nil, nil) // repeat gins(x86.AMOVSL, nil, nil) // MOVL *(SI)+,*(DI)+ } else if q >= 4 { p := gins(obj.ADUFFCOPY, nil, nil) p.To.Type = obj.TYPE_ADDR p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) // 10 and 128 = magic constants: see ../../runtime/asm_386.s p.To.Offset = 10 * (128 - int64(q)) } else if !gc.Nacl && c == 0 { var cx gc.Node gc.Nodreg(&cx, gc.Types[gc.TINT32], x86.REG_CX) // We don't need the MOVSL side-effect of updating SI and DI, // and issuing a sequence of MOVLs directly is faster. src.Op = gc.OINDREG dst.Op = gc.OINDREG for q > 0 { gmove(&src, &cx) // MOVL x+(SI),CX gmove(&cx, &dst) // MOVL CX,x+(DI) src.Xoffset += 4 dst.Xoffset += 4 q-- } } else { for q > 0 { gins(x86.AMOVSL, nil, nil) // MOVL *(SI)+,*(DI)+ q-- } } for c > 0 { gins(x86.AMOVSB, nil, nil) // MOVB *(SI)+,*(DI)+ c-- } } }