#include "l.h" enum { E_ICC = 1<<0, E_FCC = 1<<1, E_MEM = 1<<2, E_MEMSP = 1<<3, /* uses offset and size */ E_MEMSB = 1<<4, /* uses offset and size */ E_LR = 1<<5, E_CR = 1<<6, E_CTR = 1<<7, E_XER = 1<<8, E_CR0 = 0xF<<0, E_CR1 = 0xF<<4, ANYMEM = E_MEM|E_MEMSP|E_MEMSB, ALL = ~0, }; typedef struct Sch Sch; typedef struct Dep Dep; struct Dep { ulong ireg; ulong freg; ulong cc; ulong cr; }; struct Sch { Prog p; Dep set; Dep used; long soffset; char size; char comp; }; void regused(Sch*, Prog*); int depend(Sch*, Sch*); int conflict(Sch*, Sch*); int offoverlap(Sch*, Sch*); void dumpbits(Sch*, Dep*); void sched(Prog *p0, Prog *pe) { Prog *p, *q; Sch sch[NSCHED], *s, *t, *u, *se, stmp; if(!debug['Q']) return; /* * build side structure */ s = sch; for(p=p0;; p=p->link) { memset(s, 0, sizeof(*s)); s->p = *p; regused(s, p); if(debug['X']) { Bprint(&bso, "%P\tset", &s->p); dumpbits(s, &s->set); Bprint(&bso, "; used"); dumpbits(s, &s->used); if(s->comp) Bprint(&bso, "; compound"); if(s->p.mark & LOAD) Bprint(&bso, "; load"); if(s->p.mark & BRANCH) Bprint(&bso, "; branch"); if(s->p.mark & FCMP) Bprint(&bso, "; fcmp"); Bprint(&bso, "\n"); } s++; if(p == pe) break; } se = s; for(s=se-1; s>=sch; s--) { /* * load delay. interlocked. */ if(s->p.mark & LOAD) { if(s >= se-1) continue; if(!conflict(s, (s+1))) continue; /* * s is load, s+1 is immediate use of result * t is the trial instruction to insert between s and s+1 */ for(t=s-1; t>=sch; t--) { if(t->p.mark & BRANCH) goto no2; if(t->p.mark & FCMP) if((s+1)->p.mark & BRANCH) goto no2; if(t->p.mark & LOAD) if(conflict(t, (s+1))) goto no2; for(u=t+1; u<=s; u++) if(depend(u, t)) goto no2; goto out2; no2:; } if(debug['X']) Bprint(&bso, "?l%P\n", &s->p); continue; out2: if(debug['X']) { Bprint(&bso, "!l%P\n", &t->p); Bprint(&bso, "%P\n", &s->p); } stmp = *t; memmove(t, t+1, (uchar*)s - (uchar*)t); *s = stmp; s--; continue; } /* * fop2 delay. */ if(s->p.mark & FCMP) { if(s >= se-1) continue; if(!((s+1)->p.mark & BRANCH)) continue; /* t is the trial instruction to use */ for(t=s-1; t>=sch; t--) { for(u=t+1; u<=s; u++) if(depend(u, t)) goto no3; goto out3; no3:; } if(debug['X']) Bprint(&bso, "?f%P\n", &s->p); continue; out3: if(debug['X']) { Bprint(&bso, "!f%P\n", &t->p); Bprint(&bso, "%P\n", &s->p); } stmp = *t; memmove(t, t+1, (uchar*)s - (uchar*)t); *s = stmp; s--; continue; } } /* * put it all back */ for(s=sch, p=p0; slink; if(q != s->p.link) { *p = s->p; p->link = q; } } if(debug['X']) Bprint(&bso, "\n"); } void regused(Sch *s, Prog *realp) { int c, ar, ad, ld, sz, nr, upd; ulong m; Prog *p; p = &s->p; s->comp = compound(p); if(s->comp) { s->set.ireg |= 1<used.ireg |= 1<as) { case ATEXT: curtext = realp; autosize = p->to.offset + 4; ad = 1; break; case ABL: s->set.cc |= E_LR; ar = 1; ad = 1; break; case ABR: ar = 1; ad = 1; break; case ACMP: s->set.cc |= E_ICC; if(p->reg == 0) s->set.cr |= E_CR0; else s->set.cr |= (0xF<<((p->reg&7)*4)); ar = 1; break; case AFCMPO: case AFCMPU: s->set.cc |= E_FCC; if(p->reg == 0) s->set.cr |= E_CR0; else s->set.cr |= (0xF<<((p->reg&7)*4)); ar = 1; break; case ACRAND: case ACRANDN: case ACREQV: case ACRNAND: case ACRNOR: case ACROR: case ACRORN: case ACRXOR: s->used.cr |= 1<from.reg; s->set.cr |= 1<to.reg; nr = 1; break; case ABCL: /* tricky */ s->used.cc |= E_FCC|E_ICC; s->used.cr = ALL; s->set.cc |= E_LR; ar = 1; break; case ABC: /* tricky */ s->used.cc |= E_FCC|E_ICC; s->used.cr = ALL; ar = 1; break; case ABEQ: case ABGE: case ABGT: case ABLE: case ABLT: case ABNE: case ABVC: case ABVS: s->used.cc |= E_ICC; s->used.cr |= E_CR0; ar = 1; break; case ALSW: case AMOVMW: /* could do better */ sz = 32*4; ld = 1; break; case AMOVBU: case AMOVBZU: upd = 1; sz = 1; ld = 1; break; case AMOVB: case AMOVBZ: sz = 1; ld = 1; break; case AMOVHU: case AMOVHZU: upd = 1; sz = 2; ld = 1; break; case AMOVH: case AMOVHBR: case AMOVHZ: sz = 2; ld = 1; break; case AFMOVSU: case AMOVWU: upd = 1; sz = 4; ld = 1; break; case AFMOVS: case AMOVW: case AMOVWBR: case ALWAR: sz = 4; ld = 1; break; case AFMOVDU: upd = 1; sz = 8; ld = 1; break; case AFMOVD: sz = 8; ld = 1; break; case AFMOVDCC: sz = 8; ld = 1; s->set.cc |= E_FCC; s->set.cr |= E_CR1; break; case AMOVFL: case AMOVCRFS: case AMTFSB0: case AMTFSB0CC: case AMTFSB1: case AMTFSB1CC: s->set.ireg = ALL; s->set.freg = ALL; s->set.cc = ALL; s->set.cr = ALL; break; case AADDCC: case AADDVCC: case AADDCCC: case AADDCVCC: case AADDMECC: case AADDMEVCC: case AADDECC: case AADDEVCC: case AADDZECC: case AADDZEVCC: case AANDCC: case AANDNCC: case ACNTLZWCC: case ADIVWCC: case ADIVWVCC: case ADIVWUCC: case ADIVWUVCC: case AEQVCC: case AEXTSBCC: case AEXTSHCC: case AMULHWCC: case AMULHWUCC: case AMULLWCC: case AMULLWVCC: case ANANDCC: case ANEGCC: case ANEGVCC: case ANORCC: case AORCC: case AORNCC: case AREMCC: case AREMVCC: case AREMUCC: case AREMUVCC: case ARLWMICC: case ARLWNMCC: case ASLWCC: case ASRAWCC: case ASRWCC: case ASTWCCC: case ASUBCC: case ASUBVCC: case ASUBCCC: case ASUBCVCC: case ASUBMECC: case ASUBMEVCC: case ASUBECC: case ASUBEVCC: case ASUBZECC: case ASUBZEVCC: case AXORCC: s->set.cc |= E_ICC; s->set.cr |= E_CR0; break; case AFABSCC: case AFADDCC: case AFADDSCC: case AFCTIWCC: case AFCTIWZCC: case AFDIVCC: case AFDIVSCC: case AFMADDCC: case AFMADDSCC: case AFMSUBCC: case AFMSUBSCC: case AFMULCC: case AFMULSCC: case AFNABSCC: case AFNEGCC: case AFNMADDCC: case AFNMADDSCC: case AFNMSUBCC: case AFNMSUBSCC: case AFRSPCC: case AFSUBCC: case AFSUBSCC: s->set.cc |= E_FCC; s->set.cr |= E_CR1; break; } /* * flags based on 'to' field */ c = p->to.class; if(c == 0) { c = aclass(&p->to) + 1; p->to.class = c; } c--; switch(c) { default: print("unknown class %d %D\n", c, &p->to); case C_NONE: case C_ZCON: case C_SCON: case C_UCON: case C_LCON: case C_ADDCON: case C_ANDCON: case C_SBRA: case C_LBRA: break; case C_CREG: c = p->to.reg; if(c == NREG) s->set.cr = ALL; else s->set.cr |= (0xF << ((p->from.reg&7)*4)); s->set.cc = ALL; break; case C_SPR: case C_SREG: case C_FPSCR: case C_MSR: case C_XER: s->set.ireg = ALL; s->set.freg = ALL; s->set.cc = ALL; s->set.cr = ALL; break; case C_LR: s->set.cc |= E_LR; break; case C_CTR: s->set.cc |= E_CTR; break; case C_ZOREG: case C_SOREG: case C_LOREG: c = p->to.reg; s->used.ireg |= 1<set.ireg |= 1<size = sz; s->soffset = regoff(&p->to); m = ANYMEM; if(c == REGSB) m = E_MEMSB; if(c == REGSP) m = E_MEMSP; if(ar) s->used.cc |= m; else s->set.cc |= m; break; case C_SACON: case C_LACON: s->used.ireg |= 1<set.ireg |= 1<used.ireg |= 1<set.ireg |= 1<used.ireg |= 1<to.reg; else s->set.ireg |= 1<to.reg; break; case C_FREG: if(ar) s->used.freg |= 1<to.reg; else s->set.freg |= 1<to.reg; break; case C_SAUTO: case C_LAUTO: s->used.ireg |= 1<set.ireg |= 1<size = sz; s->soffset = regoff(&p->to); if(ar) s->used.cc |= E_MEMSP; else s->set.cc |= E_MEMSP; break; case C_SEXT: case C_LEXT: s->used.ireg |= 1<set.ireg |= 1<size = sz; s->soffset = regoff(&p->to); if(ar) s->used.cc |= E_MEMSB; else s->set.cc |= E_MEMSB; break; } /* * flags based on 'from' field */ c = p->from.class; if(c == 0) { c = aclass(&p->from) + 1; p->from.class = c; } c--; switch(c) { default: print("unknown class %d %D\n", c, &p->from); case C_NONE: case C_ZCON: case C_SCON: case C_UCON: case C_LCON: case C_ADDCON: case C_ANDCON: case C_SBRA: case C_LBRA: c = p->from.reg; if(c != NREG) s->used.ireg |= 1<from.reg; if(c == NREG) s->used.cr = ALL; else s->used.cr |= (0xF << ((p->from.reg&7)*4)); s->used.cc = ALL; break; case C_SPR: case C_SREG: case C_FPSCR: case C_MSR: case C_XER: s->set.ireg = ALL; s->set.freg = ALL; s->set.cc = ALL; s->set.cr = ALL; break; case C_LR: s->used.cc |= E_LR; break; case C_CTR: s->used.cc |= E_CTR; break; case C_ZOREG: case C_SOREG: case C_LOREG: c = p->from.reg; s->used.ireg |= 1<mark |= LOAD; if(ad) break; s->size = sz; s->soffset = regoff(&p->from); m = ANYMEM; if(c == REGSB) m = E_MEMSB; if(c == REGSP) m = E_MEMSP; s->used.cc |= m; break; case C_SACON: case C_LACON: s->used.ireg |= 1<used.ireg |= 1<used.ireg |= 1<from.reg; break; case C_FREG: s->used.freg |= 1<from.reg; break; case C_SAUTO: case C_LAUTO: s->used.ireg |= 1<mark |= LOAD; if(ad) break; s->size = sz; s->soffset = regoff(&p->from); s->used.cc |= E_MEMSP; break; case C_SEXT: case C_LEXT: s->used.ireg |= 1<mark |= LOAD; if(ad) break; s->size = sz; s->soffset = regoff(&p->from); s->used.cc |= E_MEMSB; break; } c = p->reg; if(c != NREG) { if(p->from.type == D_FREG || p->to.type == D_FREG) s->used.freg |= 1<used.ireg |= 1<set.ireg & (sb->set.ireg|sb->used.ireg)) return 1; if(sb->set.ireg & sa->used.ireg) return 1; if(sa->set.freg & (sb->set.freg|sb->used.freg)) return 1; if(sb->set.freg & sa->used.freg) return 1; if(sa->set.cr & (sb->set.cr|sb->used.cr)) return 1; if(sb->set.cr & sa->used.cr) return 1; x = (sa->set.cc & (sb->set.cc|sb->used.cc)) | (sb->set.cc & sa->used.cc); if(x) { /* * allow SB and SP to pass each other. * allow SB to pass SB iff doffsets are ok * anything else conflicts */ if(x != E_MEMSP && x != E_MEMSB) return 1; x = sa->set.cc | sb->set.cc | sa->used.cc | sb->used.cc; if(x & E_MEM) return 1; if(offoverlap(sa, sb)) return 1; } return 0; } int offoverlap(Sch *sa, Sch *sb) { if(sa->soffset < sb->soffset) { if(sa->soffset+sa->size > sb->soffset) return 1; return 0; } if(sb->soffset+sb->size > sa->soffset) return 1; return 0; } /* * test 2 adjacent instructions * and find out if inserted instructions * are desired to prevent stalls. * first instruction is a load instruction. */ int conflict(Sch *sa, Sch *sb) { if(sa->set.ireg & sb->used.ireg) return 1; if(sa->set.freg & sb->used.freg) return 1; if(sa->set.cr & sb->used.cr) return 1; return 0; } int compound(Prog *p) { Optab *o; o = oplook(p); if(o->size != 4) return 1; if(p->to.type == D_REG && p->to.reg == REGSB) return 1; return 0; } void dumpbits(Sch *s, Dep *d) { int i; for(i=0; i<32; i++) if(d->ireg & (1<freg & (1<cr & (1<cc & (1<size); break; case E_MEMSB: Bprint(&bso, " SB%d", s->size); break; case E_MEMSP: Bprint(&bso, " SP%d", s->size); break; } }