Esempio n. 1
0
func preprocess(ctxt *obj.Link, cursym *obj.LSym) {
	if ctxt.Headtype == obj.Hplan9 && ctxt.Plan9privates == nil {
		ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
	}

	ctxt.Cursym = cursym

	if cursym.Text == nil || cursym.Text.Link == nil {
		return
	}

	p := cursym.Text
	autoffset := int32(p.To.Offset)
	if autoffset < 0 {
		autoffset = 0
	}

	var bpsize int
	if p.Mode == 64 && ctxt.Framepointer_enabled && autoffset > 0 && p.From3.Offset&obj.NOFRAME == 0 {
		// Make room for to save a base pointer. If autoffset == 0,
		// this might do something special like a tail jump to
		// another function, so in that case we omit this.
		bpsize = ctxt.Arch.PtrSize
		autoffset += int32(bpsize)
		p.To.Offset += int64(bpsize)
	} else {
		bpsize = 0
	}

	textarg := int64(p.To.Val.(int32))
	cursym.Args = int32(textarg)
	cursym.Locals = int32(p.To.Offset)

	// TODO(rsc): Remove.
	if p.Mode == 32 && cursym.Locals < 0 {
		cursym.Locals = 0
	}

	// TODO(rsc): Remove 'p.Mode == 64 &&'.
	if p.Mode == 64 && autoffset < obj.StackSmall && p.From3Offset()&obj.NOSPLIT == 0 {
		for q := p; q != nil; q = q.Link {
			if q.As == obj.ACALL {
				goto noleaf
			}
			if (q.As == obj.ADUFFCOPY || q.As == obj.ADUFFZERO) && autoffset >= obj.StackSmall-8 {
				goto noleaf
			}
		}

		p.From3.Offset |= obj.NOSPLIT
	noleaf:
	}

	if p.From3Offset()&obj.NOSPLIT == 0 || p.From3Offset()&obj.WRAPPER != 0 {
		p = obj.Appendp(ctxt, p)
		p = load_g_cx(ctxt, p) // load g into CX
	}

	if cursym.Text.From3Offset()&obj.NOSPLIT == 0 {
		p = stacksplit(ctxt, p, autoffset, int32(textarg)) // emit split check
	}

	if autoffset != 0 {
		if autoffset%int32(ctxt.Arch.RegSize) != 0 {
			ctxt.Diag("unaligned stack size %d", autoffset)
		}
		p = obj.Appendp(ctxt, p)
		p.As = AADJSP
		p.From.Type = obj.TYPE_CONST
		p.From.Offset = int64(autoffset)
		p.Spadj = autoffset
	} else {
		// zero-byte stack adjustment.
		// Insert a fake non-zero adjustment so that stkcheck can
		// recognize the end of the stack-splitting prolog.
		p = obj.Appendp(ctxt, p)

		p.As = obj.ANOP
		p.Spadj = int32(-ctxt.Arch.PtrSize)
		p = obj.Appendp(ctxt, p)
		p.As = obj.ANOP
		p.Spadj = int32(ctxt.Arch.PtrSize)
	}

	deltasp := autoffset

	if bpsize > 0 {
		// Save caller's BP
		p = obj.Appendp(ctxt, p)

		p.As = AMOVQ
		p.From.Type = obj.TYPE_REG
		p.From.Reg = REG_BP
		p.To.Type = obj.TYPE_MEM
		p.To.Reg = REG_SP
		p.To.Scale = 1
		p.To.Offset = int64(autoffset) - int64(bpsize)

		// Move current frame to BP
		p = obj.Appendp(ctxt, p)

		p.As = ALEAQ
		p.From.Type = obj.TYPE_MEM
		p.From.Reg = REG_SP
		p.From.Scale = 1
		p.From.Offset = int64(autoffset) - int64(bpsize)
		p.To.Type = obj.TYPE_REG
		p.To.Reg = REG_BP
	}

	if cursym.Text.From3Offset()&obj.WRAPPER != 0 {
		// if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame
		//
		//	MOVQ g_panic(CX), BX
		//	TESTQ BX, BX
		//	JEQ end
		//	LEAQ (autoffset+8)(SP), DI
		//	CMPQ panic_argp(BX), DI
		//	JNE end
		//	MOVQ SP, panic_argp(BX)
		// end:
		//	NOP
		//
		// The NOP is needed to give the jumps somewhere to land.
		// It is a liblink NOP, not an x86 NOP: it encodes to 0 instruction bytes.

		p = obj.Appendp(ctxt, p)

		p.As = AMOVQ
		p.From.Type = obj.TYPE_MEM
		p.From.Reg = REG_CX
		p.From.Offset = 4 * int64(ctxt.Arch.PtrSize) // G.panic
		p.To.Type = obj.TYPE_REG
		p.To.Reg = REG_BX
		if ctxt.Headtype == obj.Hnacl && p.Mode == 64 {
			p.As = AMOVL
			p.From.Type = obj.TYPE_MEM
			p.From.Reg = REG_R15
			p.From.Scale = 1
			p.From.Index = REG_CX
		}
		if p.Mode == 32 {
			p.As = AMOVL
		}

		p = obj.Appendp(ctxt, p)
		p.As = ATESTQ
		p.From.Type = obj.TYPE_REG
		p.From.Reg = REG_BX
		p.To.Type = obj.TYPE_REG
		p.To.Reg = REG_BX
		if ctxt.Headtype == obj.Hnacl || p.Mode == 32 {
			p.As = ATESTL
		}

		p = obj.Appendp(ctxt, p)
		p.As = AJEQ
		p.To.Type = obj.TYPE_BRANCH
		p1 := p

		p = obj.Appendp(ctxt, p)
		p.As = ALEAQ
		p.From.Type = obj.TYPE_MEM
		p.From.Reg = REG_SP
		p.From.Offset = int64(autoffset) + int64(ctxt.Arch.RegSize)
		p.To.Type = obj.TYPE_REG
		p.To.Reg = REG_DI
		if ctxt.Headtype == obj.Hnacl || p.Mode == 32 {
			p.As = ALEAL
		}

		p = obj.Appendp(ctxt, p)
		p.As = ACMPQ
		p.From.Type = obj.TYPE_MEM
		p.From.Reg = REG_BX
		p.From.Offset = 0 // Panic.argp
		p.To.Type = obj.TYPE_REG
		p.To.Reg = REG_DI
		if ctxt.Headtype == obj.Hnacl && p.Mode == 64 {
			p.As = ACMPL
			p.From.Type = obj.TYPE_MEM
			p.From.Reg = REG_R15
			p.From.Scale = 1
			p.From.Index = REG_BX
		}
		if p.Mode == 32 {
			p.As = ACMPL
		}

		p = obj.Appendp(ctxt, p)
		p.As = AJNE
		p.To.Type = obj.TYPE_BRANCH
		p2 := p

		p = obj.Appendp(ctxt, p)
		p.As = AMOVQ
		p.From.Type = obj.TYPE_REG
		p.From.Reg = REG_SP
		p.To.Type = obj.TYPE_MEM
		p.To.Reg = REG_BX
		p.To.Offset = 0 // Panic.argp
		if ctxt.Headtype == obj.Hnacl && p.Mode == 64 {
			p.As = AMOVL
			p.To.Type = obj.TYPE_MEM
			p.To.Reg = REG_R15
			p.To.Scale = 1
			p.To.Index = REG_BX
		}
		if p.Mode == 32 {
			p.As = AMOVL
		}

		p = obj.Appendp(ctxt, p)
		p.As = obj.ANOP
		p1.Pcond = p
		p2.Pcond = p
	}

	var a int
	var pcsize int
	for ; p != nil; p = p.Link {
		pcsize = int(p.Mode) / 8
		a = int(p.From.Name)
		if a == obj.NAME_AUTO {
			p.From.Offset += int64(deltasp) - int64(bpsize)
		}
		if a == obj.NAME_PARAM {
			p.From.Offset += int64(deltasp) + int64(pcsize)
		}
		if p.From3 != nil {
			a = int(p.From3.Name)
			if a == obj.NAME_AUTO {
				p.From3.Offset += int64(deltasp) - int64(bpsize)
			}
			if a == obj.NAME_PARAM {
				p.From3.Offset += int64(deltasp) + int64(pcsize)
			}
		}
		a = int(p.To.Name)
		if a == obj.NAME_AUTO {
			p.To.Offset += int64(deltasp) - int64(bpsize)
		}
		if a == obj.NAME_PARAM {
			p.To.Offset += int64(deltasp) + int64(pcsize)
		}

		switch p.As {
		default:
			continue

		case APUSHL, APUSHFL:
			deltasp += 4
			p.Spadj = 4
			continue

		case APUSHQ, APUSHFQ:
			deltasp += 8
			p.Spadj = 8
			continue

		case APUSHW, APUSHFW:
			deltasp += 2
			p.Spadj = 2
			continue

		case APOPL, APOPFL:
			deltasp -= 4
			p.Spadj = -4
			continue

		case APOPQ, APOPFQ:
			deltasp -= 8
			p.Spadj = -8
			continue

		case APOPW, APOPFW:
			deltasp -= 2
			p.Spadj = -2
			continue

		case obj.ARET:
			break
		}

		if autoffset != deltasp {
			ctxt.Diag("unbalanced PUSH/POP")
		}

		if autoffset != 0 {
			if bpsize > 0 {
				// Restore caller's BP
				p.As = AMOVQ

				p.From.Type = obj.TYPE_MEM
				p.From.Reg = REG_SP
				p.From.Scale = 1
				p.From.Offset = int64(autoffset) - int64(bpsize)
				p.To.Type = obj.TYPE_REG
				p.To.Reg = REG_BP
				p = obj.Appendp(ctxt, p)
			}

			p.As = AADJSP
			p.From.Type = obj.TYPE_CONST
			p.From.Offset = int64(-autoffset)
			p.Spadj = -autoffset
			p = obj.Appendp(ctxt, p)
			p.As = obj.ARET

			// If there are instructions following
			// this ARET, they come from a branch
			// with the same stackframe, so undo
			// the cleanup.
			p.Spadj = +autoffset
		}

		if p.To.Sym != nil { // retjmp
			p.As = obj.AJMP
		}
	}
}

func indir_cx(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) {
	if ctxt.Headtype == obj.Hnacl && p.Mode == 64 {
		a.Type = obj.TYPE_MEM
		a.Reg = REG_R15
		a.Index = REG_CX
		a.Scale = 1
		return
	}

	a.Type = obj.TYPE_MEM
	a.Reg = REG_CX
}

// Append code to p to load g into cx.
// Overwrites p with the first instruction (no first appendp).
// Overwriting p is unusual but it lets use this in both the
// prologue (caller must call appendp first) and in the epilogue.
// Returns last new instruction.
func load_g_cx(ctxt *obj.Link, p *obj.Prog) *obj.Prog {
	p.As = AMOVQ
	if ctxt.Arch.PtrSize == 4 {
		p.As = AMOVL
	}
	p.From.Type = obj.TYPE_MEM
	p.From.Reg = REG_TLS
	p.From.Offset = 0
	p.To.Type = obj.TYPE_REG
	p.To.Reg = REG_CX

	next := p.Link
	progedit(ctxt, p)
	for p.Link != next {
		p = p.Link
	}

	if p.From.Index == REG_TLS {
		p.From.Scale = 2
	}

	return p
}

// Append code to p to check for stack split.
// Appends to (does not overwrite) p.
// Assumes g is in CX.
// Returns last new instruction.
func stacksplit(ctxt *obj.Link, p *obj.Prog, framesize int32, textarg int32) *obj.Prog {
	cmp := ACMPQ
	lea := ALEAQ
	mov := AMOVQ
	sub := ASUBQ

	if ctxt.Headtype == obj.Hnacl || p.Mode == 32 {
		cmp = ACMPL
		lea = ALEAL
		mov = AMOVL
		sub = ASUBL
	}

	var q1 *obj.Prog
	if framesize <= obj.StackSmall {
		// small stack: SP <= stackguard
		//	CMPQ SP, stackguard
		p = obj.Appendp(ctxt, p)

		p.As = cmp
		p.From.Type = obj.TYPE_REG
		p.From.Reg = REG_SP
		indir_cx(ctxt, p, &p.To)
		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
		if ctxt.Cursym.Cfunc {
			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
		}
	} else if framesize <= obj.StackBig {
		// large stack: SP-framesize <= stackguard-StackSmall
		//	LEAQ -xxx(SP), AX
		//	CMPQ AX, stackguard
		p = obj.Appendp(ctxt, p)

		p.As = lea
		p.From.Type = obj.TYPE_MEM
		p.From.Reg = REG_SP
		p.From.Offset = -(int64(framesize) - obj.StackSmall)
		p.To.Type = obj.TYPE_REG
		p.To.Reg = REG_AX

		p = obj.Appendp(ctxt, p)
		p.As = cmp
		p.From.Type = obj.TYPE_REG
		p.From.Reg = REG_AX
		indir_cx(ctxt, p, &p.To)
		p.To.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
		if ctxt.Cursym.Cfunc {
			p.To.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
		}
	} else {
		// Such a large stack we need to protect against wraparound.
		// If SP is close to zero:
		//	SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall)
		// The +StackGuard on both sides is required to keep the left side positive:
		// SP is allowed to be slightly below stackguard. See stack.h.
		//
		// Preemption sets stackguard to StackPreempt, a very large value.
		// That breaks the math above, so we have to check for that explicitly.
		//	MOVQ	stackguard, CX
		//	CMPQ	CX, $StackPreempt
		//	JEQ	label-of-call-to-morestack
		//	LEAQ	StackGuard(SP), AX
		//	SUBQ	CX, AX
		//	CMPQ	AX, $(framesize+(StackGuard-StackSmall))

		p = obj.Appendp(ctxt, p)

		p.As = mov
		indir_cx(ctxt, p, &p.From)
		p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
		if ctxt.Cursym.Cfunc {
			p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
		}
		p.To.Type = obj.TYPE_REG
		p.To.Reg = REG_SI

		p = obj.Appendp(ctxt, p)
		p.As = cmp
		p.From.Type = obj.TYPE_REG
		p.From.Reg = REG_SI
		p.To.Type = obj.TYPE_CONST
		p.To.Offset = obj.StackPreempt
		if p.Mode == 32 {
			p.To.Offset = int64(uint32(obj.StackPreempt & (1<<32 - 1)))
		}

		p = obj.Appendp(ctxt, p)
		p.As = AJEQ
		p.To.Type = obj.TYPE_BRANCH
		q1 = p

		p = obj.Appendp(ctxt, p)
		p.As = lea
		p.From.Type = obj.TYPE_MEM
		p.From.Reg = REG_SP
		p.From.Offset = obj.StackGuard
		p.To.Type = obj.TYPE_REG
		p.To.Reg = REG_AX

		p = obj.Appendp(ctxt, p)
		p.As = sub
		p.From.Type = obj.TYPE_REG
		p.From.Reg = REG_SI
		p.To.Type = obj.TYPE_REG
		p.To.Reg = REG_AX

		p = obj.Appendp(ctxt, p)
		p.As = cmp
		p.From.Type = obj.TYPE_REG
		p.From.Reg = REG_AX
		p.To.Type = obj.TYPE_CONST
		p.To.Offset = int64(framesize) + (obj.StackGuard - obj.StackSmall)
	}

	// common
	jls := obj.Appendp(ctxt, p)
	jls.As = AJLS
	jls.To.Type = obj.TYPE_BRANCH

	var last *obj.Prog
	for last = ctxt.Cursym.Text; last.Link != nil; last = last.Link {
	}

	spfix := obj.Appendp(ctxt, last)
	spfix.As = obj.ANOP
	spfix.Spadj = -framesize

	call := obj.Appendp(ctxt, spfix)
	call.Lineno = ctxt.Cursym.Text.Lineno
	call.Mode = ctxt.Cursym.Text.Mode
	call.As = obj.ACALL
	call.To.Type = obj.TYPE_BRANCH
	call.To.Name = obj.NAME_EXTERN
	morestack := "runtime.morestack"
	switch {
	case ctxt.Cursym.Cfunc:
		morestack = "runtime.morestackc"
	case ctxt.Cursym.Text.From3Offset()&obj.NEEDCTXT == 0:
		morestack = "runtime.morestack_noctxt"
	}
	call.To.Sym = obj.Linklookup(ctxt, morestack, 0)
	// When compiling 386 code for dynamic linking, the call needs to be adjusted
	// to follow PIC rules. This in turn can insert more instructions, so we need
	// to keep track of the start of the call (where the jump will be to) and the
	// end (which following instructions are appended to).
	callend := call
	progedit(ctxt, callend)
	for ; callend.Link != nil; callend = callend.Link {
		progedit(ctxt, callend.Link)
	}

	jmp := obj.Appendp(ctxt, callend)
	jmp.As = obj.AJMP
	jmp.To.Type = obj.TYPE_BRANCH
	jmp.Pcond = ctxt.Cursym.Text.Link
	jmp.Spadj = +framesize

	jls.Pcond = call
	if q1 != nil {
		q1.Pcond = call
	}

	return jls
}

func follow(ctxt *obj.Link, s *obj.LSym) {
	ctxt.Cursym = s

	firstp := ctxt.NewProg()
	lastp := firstp
	xfol(ctxt, s.Text, &lastp)
	lastp.Link = nil
	s.Text = firstp.Link
}

func nofollow(a obj.As) bool {
	switch a {
	case obj.AJMP,
		obj.ARET,
		AIRETL,
		AIRETQ,
		AIRETW,
		ARETFL,
		ARETFQ,
		ARETFW,
		obj.AUNDEF:
		return true
	}

	return false
}

func pushpop(a obj.As) bool {
	switch a {
	case APUSHL,
		APUSHFL,
		APUSHQ,
		APUSHFQ,
		APUSHW,
		APUSHFW,
		APOPL,
		APOPFL,
		APOPQ,
		APOPFQ,
		APOPW,
		APOPFW:
		return true
	}

	return false
}

func relinv(a obj.As) obj.As {
	switch a {
	case AJEQ:
		return AJNE
	case AJNE:
		return AJEQ
	case AJLE:
		return AJGT
	case AJLS:
		return AJHI
	case AJLT:
		return AJGE
	case AJMI:
		return AJPL
	case AJGE:
		return AJLT
	case AJPL:
		return AJMI
	case AJGT:
		return AJLE
	case AJHI:
		return AJLS
	case AJCS:
		return AJCC
	case AJCC:
		return AJCS
	case AJPS:
		return AJPC
	case AJPC:
		return AJPS
	case AJOS:
		return AJOC
	case AJOC:
		return AJOS
	}

	log.Fatalf("unknown relation: %s", obj.Aconv(a))
	return 0
}

func xfol(ctxt *obj.Link, p *obj.Prog, last **obj.Prog) {
	var q *obj.Prog
	var i int
	var a obj.As

loop:
	if p == nil {
		return
	}
	if p.As == obj.AJMP {
		q = p.Pcond
		if q != nil && q.As != obj.ATEXT {
			/* mark instruction as done and continue layout at target of jump */
			p.Mark |= DONE

			p = q
			if p.Mark&DONE == 0 {
				goto loop
			}
		}
	}

	if p.Mark&DONE != 0 {
		/*
		 * p goes here, but already used it elsewhere.
		 * copy up to 4 instructions or else branch to other copy.
		 */
		i = 0
		q = p
		for ; i < 4; i, q = i+1, q.Link {
			if q == nil {
				break
			}
			if q == *last {
				break
			}
			a = q.As
			if a == obj.ANOP {
				i--
				continue
			}

			if nofollow(a) || pushpop(a) {
				break // NOTE(rsc): arm does goto copy
			}
			if q.Pcond == nil || q.Pcond.Mark&DONE != 0 {
				continue
			}
			if a == obj.ACALL || a == ALOOP {
				continue
			}
			for {
				if p.As == obj.ANOP {
					p = p.Link
					continue
				}

				q = obj.Copyp(ctxt, p)
				p = p.Link
				q.Mark |= DONE
				(*last).Link = q
				*last = q
				if q.As != a || q.Pcond == nil || q.Pcond.Mark&DONE != 0 {
					continue
				}

				q.As = relinv(q.As)
				p = q.Pcond
				q.Pcond = q.Link
				q.Link = p
				xfol(ctxt, q.Link, last)
				p = q.Link
				if p.Mark&DONE != 0 {
					return
				}
				goto loop
				/* */
			}
		}
		q = ctxt.NewProg()
		q.As = obj.AJMP
		q.Lineno = p.Lineno
		q.To.Type = obj.TYPE_BRANCH
		q.To.Offset = p.Pc
		q.Pcond = p
		p = q
	}

	/* emit p */
	p.Mark |= DONE

	(*last).Link = p
	*last = p
	a = p.As

	/* continue loop with what comes after p */
	if nofollow(a) {
		return
	}
	if p.Pcond != nil && a != obj.ACALL {
		/*
		 * some kind of conditional branch.
		 * recurse to follow one path.
		 * continue loop on the other.
		 */
		q = obj.Brchain(ctxt, p.Pcond)
		if q != nil {
			p.Pcond = q
		}
		q = obj.Brchain(ctxt, p.Link)
		if q != nil {
			p.Link = q
		}
		if p.From.Type == obj.TYPE_CONST {
			if p.From.Offset == 1 {
				/*
				 * expect conditional jump to be taken.
				 * rewrite so that's the fall-through case.
				 */
				p.As = relinv(a)

				q = p.Link
				p.Link = p.Pcond
				p.Pcond = q
			}
		} else {
			q = p.Link
			if q.Mark&DONE != 0 {
				if a != ALOOP {
					p.As = relinv(a)
					p.Link = p.Pcond
					p.Pcond = q
				}
			}
		}

		xfol(ctxt, p.Link, last)
		if p.Pcond.Mark&DONE != 0 {
			return
		}
		p = p.Pcond
		goto loop
	}

	p = p.Link
	goto loop
}

var unaryDst = map[obj.As]bool{
	ABSWAPL:    true,
	ABSWAPQ:    true,
	ACMPXCHG8B: true,
	ADECB:      true,
	ADECL:      true,
	ADECQ:      true,
	ADECW:      true,
	AINCB:      true,
	AINCL:      true,
	AINCQ:      true,
	AINCW:      true,
	ANEGB:      true,
	ANEGL:      true,
	ANEGQ:      true,
	ANEGW:      true,
	ANOTB:      true,
	ANOTL:      true,
	ANOTQ:      true,
	ANOTW:      true,
	APOPL:      true,
	APOPQ:      true,
	APOPW:      true,
	ASETCC:     true,
	ASETCS:     true,
	ASETEQ:     true,
	ASETGE:     true,
	ASETGT:     true,
	ASETHI:     true,
	ASETLE:     true,
	ASETLS:     true,
	ASETLT:     true,
	ASETMI:     true,
	ASETNE:     true,
	ASETOC:     true,
	ASETOS:     true,
	ASETPC:     true,
	ASETPL:     true,
	ASETPS:     true,
	AFFREE:     true,
	AFLDENV:    true,
	AFSAVE:     true,
	AFSTCW:     true,
	AFSTENV:    true,
	AFSTSW:     true,
	AFXSAVE:    true,
	AFXSAVE64:  true,
	ASTMXCSR:   true,
}

var Linkamd64 = obj.LinkArch{
	Arch:       sys.ArchAMD64,
	Preprocess: preprocess,
	Assemble:   span6,
	Follow:     follow,
	Progedit:   progedit,
	UnaryDst:   unaryDst,
}

var Linkamd64p32 = obj.LinkArch{
	Arch:       sys.ArchAMD64P32,
	Preprocess: preprocess,
	Assemble:   span6,
	Follow:     follow,
	Progedit:   progedit,
	UnaryDst:   unaryDst,
}

var Link386 = obj.LinkArch{
	Arch:       sys.Arch386,
	Preprocess: preprocess,
	Assemble:   span6,
	Follow:     follow,
	Progedit:   progedit,
	UnaryDst:   unaryDst,
}
Esempio n. 2
0
func sched(ctxt *obj.Link, p0, pe *obj.Prog) {
	var sch [NSCHED]Sch

	/*
	 * build side structure
	 */
	s := sch[:]
	for p := p0; ; p = p.Link {
		s[0].p = *p
		markregused(ctxt, &s[0])
		if p == pe {
			break
		}
		s = s[1:]
	}
	se := s

	for i := cap(sch) - cap(se); i >= 0; i-- {
		s = sch[i:]
		if s[0].p.Mark&DELAY == 0 {
			continue
		}
		if -cap(s) < -cap(se) {
			if !conflict(&s[0], &s[1]) {
				continue
			}
		}

		var t []Sch
		var j int
		for j = cap(sch) - cap(s) - 1; j >= 0; j-- {
			t = sch[j:]
			if t[0].comp {
				if s[0].p.Mark&BRANCH != 0 {
					goto no2
				}
			}
			if t[0].p.Mark&DELAY != 0 {
				if -cap(s) >= -cap(se) || conflict(&t[0], &s[1]) {
					goto no2
				}
			}
			for u := t[1:]; -cap(u) <= -cap(s); u = u[1:] {
				if depend(ctxt, &u[0], &t[0]) {
					goto no2
				}
			}
			goto out2
		no2:
		}

		if s[0].p.Mark&BRANCH != 0 {
			s[0].nop = 1
		}
		continue

	out2:
		// t[0] is the instruction being moved to fill the delay
		stmp := t[0]
		copy(t[:i-j], t[1:i-j+1])
		s[0] = stmp

		if t[i-j-1].p.Mark&BRANCH != 0 {
			// t[i-j] is being put into a branch delay slot
			// combine its Spadj with the branch instruction
			t[i-j-1].p.Spadj += t[i-j].p.Spadj
			t[i-j].p.Spadj = 0
		}

		i--
	}

	/*
	 * put it all back
	 */
	var p *obj.Prog
	var q *obj.Prog
	for s, p = sch[:], p0; -cap(s) <= -cap(se); s, p = s[1:], q {
		q = p.Link
		if q != s[0].p.Link {
			*p = s[0].p
			p.Link = q
		}
		for s[0].nop != 0 {
			s[0].nop--
			addnop(ctxt, p)
		}
	}
}

func markregused(ctxt *obj.Link, s *Sch) {
	p := &s.p
	s.comp = compound(ctxt, p)
	s.nop = 0
	if s.comp {
		s.set.ireg |= 1 << (REGTMP - REG_R0)
		s.used.ireg |= 1 << (REGTMP - REG_R0)
	}

	ar := 0  /* dest is really reference */
	ad := 0  /* source/dest is really address */
	ld := 0  /* opcode is load instruction */
	sz := 20 /* size of load/store for overlap computation */

	/*
	 * flags based on opcode
	 */
	switch p.As {
	case obj.ATEXT:
		ctxt.Autosize = int32(p.To.Offset + 8)
		ad = 1

	case AJAL:
		c := p.Reg
		if c == 0 {
			c = REGLINK
		}
		s.set.ireg |= 1 << uint(c-REG_R0)
		ar = 1
		ad = 1

	case ABGEZAL,
		ABLTZAL:
		s.set.ireg |= 1 << (REGLINK - REG_R0)
		fallthrough
	case ABEQ,
		ABGEZ,
		ABGTZ,
		ABLEZ,
		ABLTZ,
		ABNE:
		ar = 1
		ad = 1

	case ABFPT,
		ABFPF:
		ad = 1
		s.used.cc |= E_FCR

	case ACMPEQD,
		ACMPEQF,
		ACMPGED,
		ACMPGEF,
		ACMPGTD,
		ACMPGTF:
		ar = 1
		s.set.cc |= E_FCR
		p.Mark |= FCMP

	case AJMP:
		ar = 1
		ad = 1

	case AMOVB,
		AMOVBU:
		sz = 1
		ld = 1

	case AMOVH,
		AMOVHU:
		sz = 2
		ld = 1

	case AMOVF,
		AMOVW,
		AMOVWL,
		AMOVWR:
		sz = 4
		ld = 1

	case AMOVD,
		AMOVV,
		AMOVVL,
		AMOVVR:
		sz = 8
		ld = 1

	case ADIV,
		ADIVU,
		AMUL,
		AMULU,
		AREM,
		AREMU,
		ADIVV,
		ADIVVU,
		AMULV,
		AMULVU,
		AREMV,
		AREMVU:
		s.set.cc = E_HILO
		fallthrough
	case AADD,
		AADDU,
		AADDV,
		AADDVU,
		AAND,
		ANOR,
		AOR,
		ASGT,
		ASGTU,
		ASLL,
		ASRA,
		ASRL,
		ASLLV,
		ASRAV,
		ASRLV,
		ASUB,
		ASUBU,
		ASUBV,
		ASUBVU,
		AXOR,

		AADDD,
		AADDF,
		AADDW,
		ASUBD,
		ASUBF,
		ASUBW,
		AMULF,
		AMULD,
		AMULW,
		ADIVF,
		ADIVD,
		ADIVW:
		if p.Reg == 0 {
			if p.To.Type == obj.TYPE_REG {
				p.Reg = p.To.Reg
			}
			//if(p->reg == NREG)
			//	print("botch %P\n", p);
		}
	}

	/*
	 * flags based on 'to' field
	 */
	c := int(p.To.Class)
	if c == 0 {
		c = aclass(ctxt, &p.To) + 1
		p.To.Class = int8(c)
	}
	c--
	switch c {
	default:
		fmt.Printf("unknown class %d %v\n", c, p)

	case C_ZCON,
		C_SCON,
		C_ADD0CON,
		C_AND0CON,
		C_ADDCON,
		C_ANDCON,
		C_UCON,
		C_LCON,
		C_NONE,
		C_SBRA,
		C_LBRA,
		C_ADDR,
		C_TEXTSIZE:
		break

	case C_HI,
		C_LO:
		s.set.cc |= E_HILO

	case C_FCREG:
		s.set.cc |= E_FCR

	case C_MREG:
		s.set.cc |= E_MCR

	case C_ZOREG,
		C_SOREG,
		C_LOREG:
		c = int(p.To.Reg)
		s.used.ireg |= 1 << uint(c-REG_R0)
		if ad != 0 {
			break
		}
		s.size = uint8(sz)
		s.soffset = regoff(ctxt, &p.To)

		m := uint32(ANYMEM)
		if c == REGSB {
			m = E_MEMSB
		}
		if c == REGSP {
			m = E_MEMSP
		}

		if ar != 0 {
			s.used.cc |= m
		} else {
			s.set.cc |= m
		}

	case C_SACON,
		C_LACON:
		s.used.ireg |= 1 << (REGSP - REG_R0)

	case C_SECON,
		C_LECON:
		s.used.ireg |= 1 << (REGSB - REG_R0)

	case C_REG:
		if ar != 0 {
			s.used.ireg |= 1 << uint(p.To.Reg-REG_R0)
		} else {
			s.set.ireg |= 1 << uint(p.To.Reg-REG_R0)
		}

	case C_FREG:
		if ar != 0 {
			s.used.freg |= 1 << uint(p.To.Reg-REG_F0)
		} else {
			s.set.freg |= 1 << uint(p.To.Reg-REG_F0)
		}
		if ld != 0 && p.From.Type == obj.TYPE_REG {
			p.Mark |= LOAD
		}

	case C_SAUTO,
		C_LAUTO:
		s.used.ireg |= 1 << (REGSP - REG_R0)
		if ad != 0 {
			break
		}
		s.size = uint8(sz)
		s.soffset = regoff(ctxt, &p.To)

		if ar != 0 {
			s.used.cc |= E_MEMSP
		} else {
			s.set.cc |= E_MEMSP
		}

	case C_SEXT,
		C_LEXT:
		s.used.ireg |= 1 << (REGSB - REG_R0)
		if ad != 0 {
			break
		}
		s.size = uint8(sz)
		s.soffset = regoff(ctxt, &p.To)

		if ar != 0 {
			s.used.cc |= E_MEMSB
		} else {
			s.set.cc |= E_MEMSB
		}
	}

	/*
	 * flags based on 'from' field
	 */
	c = int(p.From.Class)
	if c == 0 {
		c = aclass(ctxt, &p.From) + 1
		p.From.Class = int8(c)
	}
	c--
	switch c {
	default:
		fmt.Printf("unknown class %d %v\n", c, p)

	case C_ZCON,
		C_SCON,
		C_ADD0CON,
		C_AND0CON,
		C_ADDCON,
		C_ANDCON,
		C_UCON,
		C_LCON,
		C_NONE,
		C_SBRA,
		C_LBRA,
		C_ADDR,
		C_TEXTSIZE:
		break

	case C_HI,
		C_LO:
		s.used.cc |= E_HILO

	case C_FCREG:
		s.used.cc |= E_FCR

	case C_MREG:
		s.used.cc |= E_MCR

	case C_ZOREG,
		C_SOREG,
		C_LOREG:
		c = int(p.From.Reg)
		s.used.ireg |= 1 << uint(c-REG_R0)
		if ld != 0 {
			p.Mark |= LOAD
		}
		s.size = uint8(sz)
		s.soffset = regoff(ctxt, &p.From)

		m := uint32(ANYMEM)
		if c == REGSB {
			m = E_MEMSB
		}
		if c == REGSP {
			m = E_MEMSP
		}

		s.used.cc |= m

	case C_SACON,
		C_LACON:
		c = int(p.From.Reg)
		if c == 0 {
			c = REGSP
		}
		s.used.ireg |= 1 << uint(c-REG_R0)

	case C_SECON,
		C_LECON:
		s.used.ireg |= 1 << (REGSB - REG_R0)

	case C_REG:
		s.used.ireg |= 1 << uint(p.From.Reg-REG_R0)

	case C_FREG:
		s.used.freg |= 1 << uint(p.From.Reg-REG_F0)
		if ld != 0 && p.To.Type == obj.TYPE_REG {
			p.Mark |= LOAD
		}

	case C_SAUTO,
		C_LAUTO:
		s.used.ireg |= 1 << (REGSP - REG_R0)
		if ld != 0 {
			p.Mark |= LOAD
		}
		if ad != 0 {
			break
		}
		s.size = uint8(sz)
		s.soffset = regoff(ctxt, &p.From)

		s.used.cc |= E_MEMSP

	case C_SEXT:
	case C_LEXT:
		s.used.ireg |= 1 << (REGSB - REG_R0)
		if ld != 0 {
			p.Mark |= LOAD
		}
		if ad != 0 {
			break
		}
		s.size = uint8(sz)
		s.soffset = regoff(ctxt, &p.From)

		s.used.cc |= E_MEMSB
	}

	c = int(p.Reg)
	if c != 0 {
		if REG_F0 <= c && c <= REG_F31 {
			s.used.freg |= 1 << uint(c-REG_F0)
		} else {
			s.used.ireg |= 1 << uint(c-REG_R0)
		}
	}
	s.set.ireg &^= (1 << (REGZERO - REG_R0)) /* R0 can't be set */
}

/*
 * test to see if two instructions can be
 * interchanged without changing semantics
 */
func depend(ctxt *obj.Link, sa, sb *Sch) bool {
	if sa.set.ireg&(sb.set.ireg|sb.used.ireg) != 0 {
		return true
	}
	if sb.set.ireg&sa.used.ireg != 0 {
		return true
	}

	if sa.set.freg&(sb.set.freg|sb.used.freg) != 0 {
		return true
	}
	if sb.set.freg&sa.used.freg != 0 {
		return true
	}

	/*
	 * special case.
	 * loads from same address cannot pass.
	 * this is for hardware fifo's and the like
	 */
	if sa.used.cc&sb.used.cc&E_MEM != 0 {
		if sa.p.Reg == sb.p.Reg {
			if regoff(ctxt, &sa.p.From) == regoff(ctxt, &sb.p.From) {
				return true
			}
		}
	}

	x := (sa.set.cc & (sb.set.cc | sb.used.cc)) | (sb.set.cc & sa.used.cc)
	if x != 0 {
		/*
		 * allow SB and SP to pass each other.
		 * allow SB to pass SB iff doffsets are ok
		 * anything else conflicts
		 */
		if x != E_MEMSP && x != E_MEMSB {
			return true
		}
		x = sa.set.cc | sb.set.cc | sa.used.cc | sb.used.cc
		if x&E_MEM != 0 {
			return true
		}
		if offoverlap(sa, sb) {
			return true
		}
	}

	return false
}

func offoverlap(sa, sb *Sch) bool {
	if sa.soffset < sb.soffset {
		if sa.soffset+int32(sa.size) > sb.soffset {
			return true
		}
		return false
	}
	if sb.soffset+int32(sb.size) > sa.soffset {
		return true
	}
	return false
}

/*
 * test 2 adjacent instructions
 * and find out if inserted instructions
 * are desired to prevent stalls.
 */
func conflict(sa, sb *Sch) bool {
	if sa.set.ireg&sb.used.ireg != 0 {
		return true
	}
	if sa.set.freg&sb.used.freg != 0 {
		return true
	}
	if sa.set.cc&sb.used.cc != 0 {
		return true
	}
	return false
}

func compound(ctxt *obj.Link, p *obj.Prog) bool {
	o := oplook(ctxt, p)
	if o.size != 4 {
		return true
	}
	if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSB {
		return true
	}
	return false
}

func follow(ctxt *obj.Link, s *obj.LSym) {
	ctxt.Cursym = s

	firstp := ctxt.NewProg()
	lastp := firstp
	xfol(ctxt, s.Text, &lastp)
	lastp.Link = nil
	s.Text = firstp.Link
}

func xfol(ctxt *obj.Link, p *obj.Prog, last **obj.Prog) {
	var q *obj.Prog
	var r *obj.Prog
	var i int

loop:
	if p == nil {
		return
	}
	a := p.As
	if a == AJMP {
		q = p.Pcond
		if (p.Mark&NOSCHED != 0) || q != nil && (q.Mark&NOSCHED != 0) {
			p.Mark |= FOLL
			(*last).Link = p
			*last = p
			p = p.Link
			xfol(ctxt, p, last)
			p = q
			if p != nil && p.Mark&FOLL == 0 {
				goto loop
			}
			return
		}

		if q != nil {
			p.Mark |= FOLL
			p = q
			if p.Mark&FOLL == 0 {
				goto loop
			}
		}
	}

	if p.Mark&FOLL != 0 {
		i = 0
		q = p
		for ; i < 4; i, q = i+1, q.Link {
			if q == *last || (q.Mark&NOSCHED != 0) {
				break
			}
			a = q.As
			if a == obj.ANOP {
				i--
				continue
			}

			if a == AJMP || a == ARET || a == ARFE {
				goto copy
			}
			if q.Pcond == nil || (q.Pcond.Mark&FOLL != 0) {
				continue
			}
			if a != ABEQ && a != ABNE {
				continue
			}

		copy:
			for {
				r = ctxt.NewProg()
				*r = *p
				if r.Mark&FOLL == 0 {
					fmt.Printf("can't happen 1\n")
				}
				r.Mark |= FOLL
				if p != q {
					p = p.Link
					(*last).Link = r
					*last = r
					continue
				}

				(*last).Link = r
				*last = r
				if a == AJMP || a == ARET || a == ARFE {
					return
				}
				r.As = ABNE
				if a == ABNE {
					r.As = ABEQ
				}
				r.Pcond = p.Link
				r.Link = p.Pcond
				if r.Link.Mark&FOLL == 0 {
					xfol(ctxt, r.Link, last)
				}
				if r.Pcond.Mark&FOLL == 0 {
					fmt.Printf("can't happen 2\n")
				}
				return
			}
		}

		a = AJMP
		q = ctxt.NewProg()
		q.As = a
		q.Lineno = p.Lineno
		q.To.Type = obj.TYPE_BRANCH
		q.To.Offset = p.Pc
		q.Pcond = p
		p = q
	}

	p.Mark |= FOLL
	(*last).Link = p
	*last = p
	if a == AJMP || a == ARET || a == ARFE {
		if p.Mark&NOSCHED != 0 {
			p = p.Link
			goto loop
		}

		return
	}

	if p.Pcond != nil {
		if a != AJAL && p.Link != nil {
			xfol(ctxt, p.Link, last)
			p = p.Pcond
			if p == nil || (p.Mark&FOLL != 0) {
				return
			}
			goto loop
		}
	}

	p = p.Link
	goto loop
}

var Linkmips64 = obj.LinkArch{
	Arch:       sys.ArchMIPS64,
	Preprocess: preprocess,
	Assemble:   span0,
	Follow:     follow,
	Progedit:   progedit,
}

var Linkmips64le = obj.LinkArch{
	Arch:       sys.ArchMIPS64LE,
	Preprocess: preprocess,
	Assemble:   span0,
	Follow:     follow,
	Progedit:   progedit,
}