code: 9ferno

ref: b502a62da2ec6058923db94f87ecc2d29db2fa77
dir: /utils/ql/sched.c/

View raw version
#include	"l.h"

enum
{
	E_ICC	= 1<<0,
	E_FCC	= 1<<1,
	E_MEM	= 1<<2,
	E_MEMSP	= 1<<3,	/* uses offset and size */
	E_MEMSB	= 1<<4,	/* uses offset and size */
	E_LR	= 1<<5,
	E_CR = 1<<6,
	E_CTR = 1<<7,
	E_XER = 1<<8,

	E_CR0 = 0xF<<0,
	E_CR1 = 0xF<<4,

	ANYMEM	= E_MEM|E_MEMSP|E_MEMSB,
	ALL	= ~0,
};

typedef	struct	Sch	Sch;
typedef	struct	Dep	Dep;

struct	Dep
{
	ulong	ireg;
	ulong	freg;
	ulong	cc;
	ulong	cr;
};
struct	Sch
{
	Prog	p;
	Dep	set;
	Dep	used;
	long	soffset;
	char	size;
	char	comp;
};

void	regused(Sch*, Prog*);
int	depend(Sch*, Sch*);
int	conflict(Sch*, Sch*);
int	offoverlap(Sch*, Sch*);
void	dumpbits(Sch*, Dep*);

void
sched(Prog *p0, Prog *pe)
{
	Prog *p, *q;
	Sch sch[NSCHED], *s, *t, *u, *se, stmp;

	if(!debug['Q'])
		return;
	/*
	 * build side structure
	 */
	s = sch;
	for(p=p0;; p=p->link) {
		memset(s, 0, sizeof(*s));
		s->p = *p;
		regused(s, p);
		if(debug['X']) {
			Bprint(&bso, "%P\tset", &s->p);
			dumpbits(s, &s->set);
			Bprint(&bso, "; used");
			dumpbits(s, &s->used);
			if(s->comp)
				Bprint(&bso, "; compound");
			if(s->p.mark & LOAD)
				Bprint(&bso, "; load");
			if(s->p.mark & BRANCH)
				Bprint(&bso, "; branch");
			if(s->p.mark & FCMP)
				Bprint(&bso, "; fcmp");
			Bprint(&bso, "\n");
		}
		s++;
		if(p == pe)
			break;
	}
	se = s;

	for(s=se-1; s>=sch; s--) {

		/*
		 * load delay. interlocked.
		 */
		if(s->p.mark & LOAD) {
			if(s >= se-1)
				continue;
			if(!conflict(s, (s+1)))
				continue;
			/*
			 * s is load, s+1 is immediate use of result
			 * t is the trial instruction to insert between s and s+1
			 */
			for(t=s-1; t>=sch; t--) {
				if(t->p.mark & BRANCH)
					goto no2;
				if(t->p.mark & FCMP)
					if((s+1)->p.mark & BRANCH)
						goto no2;
				if(t->p.mark & LOAD)
					if(conflict(t, (s+1)))
						goto no2;
				for(u=t+1; u<=s; u++)
					if(depend(u, t))
						goto no2;
				goto out2;
			no2:;
			}
			if(debug['X'])
				Bprint(&bso, "?l%P\n", &s->p);
			continue;
		out2:
			if(debug['X']) {
				Bprint(&bso, "!l%P\n", &t->p);
				Bprint(&bso, "%P\n", &s->p);
			}
			stmp = *t;
			memmove(t, t+1, (uchar*)s - (uchar*)t);
			*s = stmp;
			s--;
			continue;
		}

		/*
		 * fop2 delay.
		 */
		if(s->p.mark & FCMP) {
			if(s >= se-1)
				continue;
			if(!((s+1)->p.mark & BRANCH))
				continue;
			/* t is the trial instruction to use */
			for(t=s-1; t>=sch; t--) {
				for(u=t+1; u<=s; u++)
					if(depend(u, t))
						goto no3;
				goto out3;
			no3:;
			}
			if(debug['X'])
				Bprint(&bso, "?f%P\n", &s->p);
			continue;
		out3:
			if(debug['X']) {
				Bprint(&bso, "!f%P\n", &t->p);
				Bprint(&bso, "%P\n", &s->p);
			}
			stmp = *t;
			memmove(t, t+1, (uchar*)s - (uchar*)t);
			*s = stmp;
			s--;
			continue;
		}
	}

	/*
	 * put it all back
	 */
	for(s=sch, p=p0; s<se; s++, p=q) {
		q = p->link;
		if(q != s->p.link) {
			*p = s->p;
			p->link = q;
		}
	}
	if(debug['X'])
		Bprint(&bso, "\n");
}

void
regused(Sch *s, Prog *realp)
{
	int c, ar, ad, ld, sz, nr, upd;
	ulong m;
	Prog *p;

	p = &s->p;
	s->comp = compound(p);
	if(s->comp) {
		s->set.ireg |= 1<<REGTMP;
		s->used.ireg |= 1<<REGTMP;
	}
	ar = 0;		/* dest is really reference */
	ad = 0;		/* source/dest is really address */
	ld = 0;		/* opcode is load instruction */
	sz = 32*4;		/* size of load/store for overlap computation */
	nr = 0;	/* source/dest is not really reg */
	upd = 0;	/* move with update; changes reg */

/*
 * flags based on opcode
 */
	switch(p->as) {
	case ATEXT:
		curtext = realp;
		autosize = p->to.offset + 4;
		ad = 1;
		break;
	case ABL:
		s->set.cc |= E_LR;
		ar = 1;
		ad = 1;
		break;
	case ABR:
		ar = 1;
		ad = 1;
		break;
	case ACMP:
		s->set.cc |= E_ICC;
		if(p->reg == 0)
			s->set.cr |= E_CR0;
		else
			s->set.cr |= (0xF<<((p->reg&7)*4));
		ar = 1;
		break;
	case AFCMPO:
	case AFCMPU:
		s->set.cc |= E_FCC;
		if(p->reg == 0)
			s->set.cr |= E_CR0;
		else
			s->set.cr |= (0xF<<((p->reg&7)*4));
		ar = 1;
		break;
	case ACRAND:
	case ACRANDN:
	case ACREQV:
	case ACRNAND:
	case ACRNOR:
	case ACROR:
	case ACRORN:
	case ACRXOR:
		s->used.cr |= 1<<p->from.reg;
		s->set.cr |= 1<<p->to.reg;
		nr = 1;
		break;
	case ABCL:	/* tricky */
		s->used.cc |= E_FCC|E_ICC;
		s->used.cr = ALL;
		s->set.cc |= E_LR;
		ar = 1;
		break;
	case ABC:		/* tricky */
		s->used.cc |= E_FCC|E_ICC;
		s->used.cr = ALL;
		ar = 1;
		break;
	case ABEQ:
	case ABGE:
	case ABGT:
	case ABLE:
	case ABLT:
	case ABNE:
	case ABVC:
	case ABVS:
		s->used.cc |= E_ICC;
		s->used.cr |= E_CR0;
		ar = 1;
		break;
	case ALSW:
	case AMOVMW:
		/* could do better */
		sz = 32*4;
		ld = 1;
		break;
	case AMOVBU:
	case AMOVBZU:
		upd = 1;
		sz = 1;
		ld = 1;
		break;
	case AMOVB:
	case AMOVBZ:
		sz = 1;
		ld = 1;
		break;
	case AMOVHU:
	case AMOVHZU:
		upd = 1;
		sz = 2;
		ld = 1;
		break;
	case AMOVH:
	case AMOVHBR:
	case AMOVHZ:
		sz = 2;
		ld = 1;
		break;
	case AFMOVSU:
	case AMOVWU:
		upd = 1;
		sz = 4;
		ld = 1;
		break;
	case AFMOVS:
	case AMOVW:
	case AMOVWBR:
	case ALWAR:
		sz = 4;
		ld = 1;
		break;
	case AFMOVDU:
		upd = 1;
		sz = 8;
		ld = 1;
		break;
	case AFMOVD:
		sz = 8;
		ld = 1;
		break;
	case AFMOVDCC:
		sz = 8;
		ld = 1;
		s->set.cc |= E_FCC;
		s->set.cr |= E_CR1;
		break;
	case AMOVFL:
	case AMOVCRFS:
	case AMTFSB0:
	case AMTFSB0CC:
	case AMTFSB1:
	case AMTFSB1CC:
		s->set.ireg = ALL;
		s->set.freg = ALL;
		s->set.cc = ALL;
		s->set.cr = ALL;
		break;
	case AADDCC:
	case AADDVCC:
	case AADDCCC:
	case AADDCVCC:
	case AADDMECC:
	case AADDMEVCC:
	case AADDECC:
	case AADDEVCC:
	case AADDZECC:
	case AADDZEVCC:
	case AANDCC:
	case AANDNCC:
	case ACNTLZWCC:
	case ADIVWCC:
	case ADIVWVCC:
	case ADIVWUCC:
	case ADIVWUVCC:
	case AEQVCC:
	case AEXTSBCC:
	case AEXTSHCC:
	case AMULHWCC:
	case AMULHWUCC:
	case AMULLWCC:
	case AMULLWVCC:
	case ANANDCC:
	case ANEGCC:
	case ANEGVCC:
	case ANORCC:
	case AORCC:
	case AORNCC:
	case AREMCC:
	case AREMVCC:
	case AREMUCC:
	case AREMUVCC:
	case ARLWMICC:
	case ARLWNMCC:
	case ASLWCC:
	case ASRAWCC:
	case ASRWCC:
	case ASTWCCC:
	case ASUBCC:
	case ASUBVCC:
	case ASUBCCC:
	case ASUBCVCC:
	case ASUBMECC:
	case ASUBMEVCC:
	case ASUBECC:
	case ASUBEVCC:
	case ASUBZECC:
	case ASUBZEVCC:
	case AXORCC:
		s->set.cc |= E_ICC;
		s->set.cr |= E_CR0;
		break;
	case AFABSCC:
	case AFADDCC:
	case AFADDSCC:
	case AFCTIWCC:
	case AFCTIWZCC:
	case AFDIVCC:
	case AFDIVSCC:
	case AFMADDCC:
	case AFMADDSCC:
	case AFMSUBCC:
	case AFMSUBSCC:
	case AFMULCC:
	case AFMULSCC:
	case AFNABSCC:
	case AFNEGCC:
	case AFNMADDCC:
	case AFNMADDSCC:
	case AFNMSUBCC:
	case AFNMSUBSCC:
	case AFRSPCC:
	case AFSUBCC:
	case AFSUBSCC:
		s->set.cc |= E_FCC;
		s->set.cr |= E_CR1;
		break;
	}

/*
 * flags based on 'to' field
 */
	c = p->to.class;
	if(c == 0) {
		c = aclass(&p->to) + 1;
		p->to.class = c;
	}
	c--;
	switch(c) {
	default:
		print("unknown class %d %D\n", c, &p->to);

	case C_NONE:
	case C_ZCON:
	case C_SCON:
	case C_UCON:
	case C_LCON:
	case C_ADDCON:
	case C_ANDCON:
	case C_SBRA:
	case C_LBRA:
		break;
	case C_CREG:
		c = p->to.reg;
		if(c == NREG)
			s->set.cr = ALL;
		else
			s->set.cr |= (0xF << ((p->from.reg&7)*4));
		s->set.cc = ALL;
		break;
	case C_SPR:
	case C_SREG:
	case C_FPSCR:
	case C_MSR:
	case C_XER:
		s->set.ireg = ALL;
		s->set.freg = ALL;
		s->set.cc = ALL;
		s->set.cr = ALL;
		break;
	case C_LR:
		s->set.cc |= E_LR;
		break;
	case C_CTR:
		s->set.cc |= E_CTR;
		break;
	case C_ZOREG:
	case C_SOREG:
	case C_LOREG:
		c = p->to.reg;
		s->used.ireg |= 1<<c;
		if(upd)
			s->set.ireg |= 1<<c;
		if(ad)
			break;
		s->size = sz;
		s->soffset = regoff(&p->to);

		m = ANYMEM;
		if(c == REGSB)
			m = E_MEMSB;
		if(c == REGSP)
			m = E_MEMSP;

		if(ar)
			s->used.cc |= m;
		else
			s->set.cc |= m;
		break;
	case C_SACON:
	case C_LACON:
		s->used.ireg |= 1<<REGSP;
		if(upd)
			s->set.ireg |= 1<<c;
		break;
	case C_SECON:
	case C_LECON:
		s->used.ireg |= 1<<REGSB;
		if(upd)
			s->set.ireg |= 1<<c;
		break;
	case C_REG:
		if(nr)
			break;
		if(ar)
			s->used.ireg |= 1<<p->to.reg;
		else
			s->set.ireg |= 1<<p->to.reg;
		break;
	case C_FREG:
		if(ar)
			s->used.freg |= 1<<p->to.reg;
		else
			s->set.freg |= 1<<p->to.reg;
		break;
	case C_SAUTO:
	case C_LAUTO:
		s->used.ireg |= 1<<REGSP;
		if(upd)
			s->set.ireg |= 1<<c;
		if(ad)
			break;
		s->size = sz;
		s->soffset = regoff(&p->to);

		if(ar)
			s->used.cc |= E_MEMSP;
		else
			s->set.cc |= E_MEMSP;
		break;
	case C_SEXT:
	case C_LEXT:
		s->used.ireg |= 1<<REGSB;
		if(upd)
			s->set.ireg |= 1<<c;
		if(ad)
			break;
		s->size = sz;
		s->soffset = regoff(&p->to);

		if(ar)
			s->used.cc |= E_MEMSB;
		else
			s->set.cc |= E_MEMSB;
		break;
	}

/*
 * flags based on 'from' field
 */
	c = p->from.class;
	if(c == 0) {
		c = aclass(&p->from) + 1;
		p->from.class = c;
	}
	c--;
	switch(c) {
	default:
		print("unknown class %d %D\n", c, &p->from);

	case C_NONE:
	case C_ZCON:
	case C_SCON:
	case C_UCON:
	case C_LCON:
	case C_ADDCON:
	case C_ANDCON:
	case C_SBRA:
	case C_LBRA:
		c = p->from.reg;
		if(c != NREG)
			s->used.ireg |= 1<<c;
		break;
	case C_CREG:
		c = p->from.reg;
		if(c == NREG)
			s->used.cr = ALL;
		else
			s->used.cr |= (0xF << ((p->from.reg&7)*4));
		s->used.cc = ALL;
		break;
	case C_SPR:
	case C_SREG:
	case C_FPSCR:
	case C_MSR:
	case C_XER:
		s->set.ireg = ALL;
		s->set.freg = ALL;
		s->set.cc = ALL;
		s->set.cr = ALL;
		break;
	case C_LR:
		s->used.cc |= E_LR;
		break;
	case C_CTR:
		s->used.cc |= E_CTR;
		break;
	case C_ZOREG:
	case C_SOREG:
	case C_LOREG:
		c = p->from.reg;
		s->used.ireg |= 1<<c;
		if(ld)
			p->mark |= LOAD;
		if(ad)
			break;
		s->size = sz;
		s->soffset = regoff(&p->from);

		m = ANYMEM;
		if(c == REGSB)
			m = E_MEMSB;
		if(c == REGSP)
			m = E_MEMSP;

		s->used.cc |= m;
		break;
	case C_SACON:
	case C_LACON:
		s->used.ireg |= 1<<REGSP;
		break;
	case C_SECON:
	case C_LECON:
		s->used.ireg |= 1<<REGSB;
		break;
	case C_REG:
		if(nr)
			break;
		s->used.ireg |= 1<<p->from.reg;
		break;
	case C_FREG:
		s->used.freg |= 1<<p->from.reg;
		break;
	case C_SAUTO:
	case C_LAUTO:
		s->used.ireg |= 1<<REGSP;
		if(ld)
			p->mark |= LOAD;
		if(ad)
			break;
		s->size = sz;
		s->soffset = regoff(&p->from);

		s->used.cc |= E_MEMSP;
		break;
	case C_SEXT:
	case C_LEXT:
		s->used.ireg |= 1<<REGSB;
		if(ld)
			p->mark |= LOAD;
		if(ad)
			break;
		s->size = sz;
		s->soffset = regoff(&p->from);

		s->used.cc |= E_MEMSB;
		break;
	}
	
	c = p->reg;
	if(c != NREG) {
		if(p->from.type == D_FREG || p->to.type == D_FREG)
			s->used.freg |= 1<<c;
		else
			s->used.ireg |= 1<<c;
	}
}

/*
 * test to see if 2 instrictions can be
 * interchanged without changing semantics
 */
int
depend(Sch *sa, Sch *sb)
{
	ulong x;

	if(sa->set.ireg & (sb->set.ireg|sb->used.ireg))
		return 1;
	if(sb->set.ireg & sa->used.ireg)
		return 1;

	if(sa->set.freg & (sb->set.freg|sb->used.freg))
		return 1;
	if(sb->set.freg & sa->used.freg)
		return 1;

	if(sa->set.cr & (sb->set.cr|sb->used.cr))
		return 1;
	if(sb->set.cr & sa->used.cr)
		return 1;


	x = (sa->set.cc & (sb->set.cc|sb->used.cc)) |
		(sb->set.cc & sa->used.cc);
	if(x) {
		/*
		 * allow SB and SP to pass each other.
		 * allow SB to pass SB iff doffsets are ok
		 * anything else conflicts
		 */
		if(x != E_MEMSP && x != E_MEMSB)
			return 1;
		x = sa->set.cc | sb->set.cc |
			sa->used.cc | sb->used.cc;
		if(x & E_MEM)
			return 1;
		if(offoverlap(sa, sb))
			return 1;
	}

	return 0; 
}

int
offoverlap(Sch *sa, Sch *sb)
{

	if(sa->soffset < sb->soffset) {
		if(sa->soffset+sa->size > sb->soffset)
			return 1;
		return 0;
	}
	if(sb->soffset+sb->size > sa->soffset)
		return 1;
	return 0;
}

/*
 * test 2 adjacent instructions
 * and find out if inserted instructions
 * are desired to prevent stalls.
 * first instruction is a load instruction.
 */
int
conflict(Sch *sa, Sch *sb)
{

	if(sa->set.ireg & sb->used.ireg)
		return 1;
	if(sa->set.freg & sb->used.freg)
		return 1;
	if(sa->set.cr & sb->used.cr)
		return 1;
	return 0;
}

int
compound(Prog *p)
{
	Optab *o;

	o = oplook(p);
	if(o->size != 4)
		return 1;
	if(p->to.type == D_REG && p->to.reg == REGSB)
		return 1;
	return 0;
}

void
dumpbits(Sch *s, Dep *d)
{
	int i;

	for(i=0; i<32; i++)
		if(d->ireg & (1<<i))
			Bprint(&bso, " R%d", i);
	for(i=0; i<32; i++)
		if(d->freg & (1<<i))
			Bprint(&bso, " F%d", i);
	for(i=0; i<32; i++)
		if(d->cr & (1<<i))
			Bprint(&bso, " C%d", i);
	for(i=0; i<32; i++)
		switch(d->cc & (1<<i)) {
		default:
			break;
		case E_ICC:
			Bprint(&bso, " ICC");
			break;
		case E_FCC:
			Bprint(&bso, " FCC");
			break;
		case E_LR:
			Bprint(&bso, " LR");
			break;
		case E_CR:
			Bprint(&bso, " CR");
			break;
		case E_CTR:
			Bprint(&bso, " CTR");
			break;
		case E_XER:
			Bprint(&bso, " XER");
			break;
		case E_MEM:
			Bprint(&bso, " MEM%d", s->size);
			break;
		case E_MEMSB:
			Bprint(&bso, " SB%d", s->size);
			break;
		case E_MEMSP:
			Bprint(&bso, " SP%d", s->size);
			break;
		}
}