/* * Intel 16 bit assembler for Plan 9 * Reference: Intel `The 8086 Family User's Manual, October 1979' * While there are more instructions that the modern Intel chips * understand, these are the safe ones. * * The assmebly language is of the Ritchie PDP-11 style. * * Copyright (c) 2007, Brantley Coile * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Brantley Coile ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include enum { NSYM = 1000, NRELO = 1000, NTMP = 500, NSSPACE = 50000, }; #define dbprint print typedef struct Sym Sym; typedef struct Exec Exec; typedef struct Operand Operand; typedef struct Itab Itab; typedef struct Tmp Tmp; typedef struct Relo Relo; typedef struct R R; typedef struct Name Name; struct Name /* format in .o */ { int name; int type; int value; }; struct Sym /* internal format */ { Sym *next; char *name; int type; int addr; }; #define N_UNDEF 0x0 #define N_ABS 0x2 #define N_TEXT 0x4 #define N_DATA 0x6 #define N_BSS 0x8 #define N_REG 0xa #define N_KW 0xe #define N_INST 0x10 #define N_FN 0x1f #define N_EXT 01 #define N_TYPE 0x1e struct Relo { int r_addr; uint r_symbolnum:24, r_pcrel:1, r_length:2, r_extern:1, :4; }; struct R { R *next; Sym *what; Relo r; }; struct Exec { int magic; int text; int data; int bss; int symsz; int entry; int trelo; int drelo; }; struct Tmp { int loc; int label; int type; }; struct Operand { int mode; int type; Sym *sym; int offset; char reg; char base; char index; char scale; }; struct Itab { char *name; char *fmt; int noperands; int mod; /* index to mod/rm operand */ int omask1, omask2; }; enum { Fxreg = 1<<0, Fareg = 1<<1, /* AL, AX, or EAX */ Fdisp = 1<<2, /* displayment */ Fimm = 1<<3, Fmodrm = 1<<4, /* requires mod/rm byte */ Fsreg = 1<<9, /* Seg Register */ Fabs = 1<<10, /* *prefix to operand */ Freg = Fxreg|Fareg, Fmod = Fareg|Freg|Fdisp|Fmodrm, BX = 013, BP = 015, SI = 016, DI = 017, Oindex = (1< 0) { print("%08p: ", f); for (i = 0; i < 8; i++) print("%08x ", *f++); print("\n"); n -= 4*8; } } void print_undefs(void) { Sym *sp; for (sp = last_sys_sym->next; sp; sp = sp->next) if ((sp->type&~N_EXT) == N_UNDEF) fprint(2, "as: %s: undefined\n", sp->name); } void fixup_undefs(void) /* make all undefined external */ { Sym *sp; for (sp = last_sys_sym->next; sp; sp = sp->next) if (sp->type == N_UNDEF) sp->type |= N_EXT; } int xwrite(Biobuf *bp, void *where, int len) { return Bwrite(bp, where, len); } /* * we can edit .Ls out because no relocation record points to them */ void edit_names(void) /* squeeze out .L */ { Sym *sp, **sq; sp = last_sys_sym->next; sq = &last_sys_sym->next; while (sp) { if (strncmp(sp->name, ".L", 2) == 0) *sq = sp->next; else sq = &sp->next; sp = sp->next; } } void set_relo(Sym *sp, int snum) /* find an set snum in relocation table */ { R *rp; for (rp = trelo; rp; rp = rp->next) if (sp == rp->what) rp->r.r_symbolnum = snum; for (rp = drelo; rp; rp = rp->next) if (sp == rp->what) rp->r.r_symbolnum = snum; } int pack_names(int *nsize) /* put strings into string space */ { char *cp; Sym *sp; Name *np; int ne = 0, slen = 0, cnt; /* scan symbol table counting entries and string sizes */ for (sp = last_sys_sym->next; sp; sp = sp->next) { ne++; slen += strlen(sp->name)+1; } sspace = malloc(slen + 4); nspace = malloc(ne * sizeof (Name)); *nsize = ne * sizeof (Name); cp = sspace; cp += 4; /* save room for total string length */ np = nspace; cnt = 0; for (sp = last_sys_sym->next; sp; sp = sp->next) { strcpy(cp, sp->name); np->type = sp->type; np->value = sp->addr; np->name = cp - sspace; set_relo(sp, cnt++); cp += strlen(cp)+1; np++; } *(int *)sspace = cp - sspace; return cp - sspace; } int copyout_relo(Biobuf *bp, R *r) /* format and copyout relocation information */ { int nr = 0; R *rp; Relo *p; for (rp = r; rp; rp = rp->next) { /* if (rp->what && rp->r.r_symbolnum == 0) fatal("%s: relocation reference", rp->what->name); */ nr++; } rspace = (Relo *)malloc(nr * sizeof *rspace); p = rspace; for (rp = r; rp; rp = rp->next) *p++ = rp->r; Bwrite(bp, rspace, sizeof *rspace * nr); free(rspace); return nr * sizeof *rspace; } void store_image(void) { Exec e; int i, nsyms; if (!Lflag) edit_names(); /* take out .L symbols */ i = pack_names(&nsyms); /* get strings packed up to go */ e.magic = 0407; Bseek(outbp, sizeof e, 0); e.text = block_copyout(outbp, tbp); e.data = block_copyout(outbp, dbp); e.trelo = copyout_relo(outbp, trelo); e.drelo = copyout_relo(outbp, drelo); xwrite(outbp, nspace, nsyms); xwrite(outbp, sspace, i); e.bss = (locs[2] + 511) & ~0x1FF; e.entry = 0x1023; e.symsz = nsyms; Bseek(outbp, 0, 0); Bwrite(outbp, &e, sizeof e); } void dot_byte(char *); void dot_word(char *); void dot_long(char *); void dot_text(char *); void dot_bss(char *); void dot_data(char *); void dot_align(char *); void dot_space(char *); void dot_globl(char *); void dot_lcomm(char *); void dot_comm(char *); void not_yet(char *); void do_imul(char *); void do_pop(char *); void do_push(char *); void do_rot(char *); void do_shift(char *); void dot_set(char *); void do_ljmp(char *); void do_lcall(char *); void init_kw(void) /* put things in the symbol table */ { Itab *ip; /* * registers use octal incoding of classes of register * The register value is the least sig 3 bits. * 1 16 * 2 8 bit register * 3 segment register */ install("ax", 010, N_REG); install("cx", 011, N_REG); install("dx", 012, N_REG); install("bx", 013, N_REG); install("sp", 014, N_REG); install("bp", 015, N_REG); install("si", 016, N_REG); install("di", 017, N_REG); install("al", 020, N_REG); install("cl", 021, N_REG); install("dl", 022, N_REG); install("bl", 023, N_REG); install("ah", 024, N_REG); install("ch", 025, N_REG); install("dh", 026, N_REG); install("bh", 027, N_REG); install("es", 030, N_REG); install("cs", 031, N_REG); install("ss", 032, N_REG); install("ds", 033, N_REG); install("fs", 034, N_REG); /* with care */ install(".align", (int)dot_align, N_KW); install(".bss", (int)dot_bss, N_KW); install(".byte", (int)dot_byte, N_KW); install(".comm", (int)dot_comm, N_KW); install(".data", (int)dot_data, N_KW); install(".globl", (int)dot_globl, N_KW); install(".lcomm", (int)dot_lcomm, N_KW); install(".set", (int)dot_set, N_KW); install(".size", (int)not_yet, N_KW); install(".space", (int)dot_space, N_KW); install(".text", (int)dot_text, N_KW); install(".type", (int)not_yet, N_KW); install(".ident", (int)not_yet, N_KW); install(".word", (int)dot_word, N_KW); /* instructions that don't fit the pattern matching algorithim */ install("imul", (int)do_imul, N_KW); install("imulb", (int)do_imul, N_KW); install("lcall", (int)do_lcall, N_KW); install("ljmp", (int)do_ljmp, N_KW); install("pop", (int)do_pop, N_KW); install("pushb", (int)do_push, N_KW); install("push", (int)do_push, N_KW); install("salb", (int) do_shift, N_KW); install("sal", (int) do_shift, N_KW); install("sarb", (int) do_shift, N_KW); install("sar", (int) do_shift, N_KW); install("shlb", (int) do_shift, N_KW); install("shl", (int) do_shift, N_KW); install("shrb", (int) do_shift, N_KW); install("shr", (int) do_shift, N_KW); install("rclb", (int) do_rot, N_KW); install("rcl", (int) do_rot, N_KW); install("rcrb", (int) do_rot, N_KW); install("rcr", (int) do_rot, N_KW); install("rolb", (int) do_rot, N_KW); install("rol", (int) do_rot, N_KW); install("rorb", (int) do_rot, N_KW); install("ror", (int) do_rot, N_KW); for (ip = itab; ip->name; ip++) install(ip->name, (int)ip, N_INST); last_sys_sym = last_sym; } void install(char *name, ulong value, int type) /* insert symbol table entry */ { Sym *sp; allow_reg = 1; sp = lookup(name); allow_reg = 0; sp->type = type; sp->addr = value; } void fatal(char *msg, ...) { va_list arg; fprint(2, "Fatal error: %s:%d: ", filename, lineno); va_start(arg, msg); vfprint(2, msg, arg); va_end(arg); fprint(2, "\n"); *(int *)0 = 0; exits("fatal"); } void error(char *msg, ...) // nonfatal error message { va_list arg; errcnt++; fprint(2, "Error: %s:%d: ", filename, lineno); va_start(arg, msg); vfprint(2, msg, arg); va_end(arg); fprint(2, "\n"); longjmp(env, 1); } void warning(char *msg, ...) { va_list arg; fprint(2, "Warning: %s:%d: ", filename, lineno); va_start(arg, msg); vfprint(2, msg, arg); va_end(arg); fprint(2, "\n"); } int get_num(void) { int val = 0; if (*lexp == '0') { lexp++; if (*lexp == 'x' || *lexp == 'X') { lexp++; while (isxdigit(*lexp)) { val <<= 4; if (*lexp >= '0' && *lexp <= '9') val += *lexp - '0'; else if (*lexp >= 'a' && *lexp <= 'f') val += *lexp - 'a' + 10; else val += *lexp - 'A' + 10; lexp++; } return val; } while (isdigit(*lexp)) { val <<= 3; val += *lexp++ - '0'; } return val; } while (isdigit(*lexp)) { val *= 10; val += *lexp++ - '0'; } return val; } char * get_ident(void) { char *p, buf[512]; p = buf; *p++ = *lexp++; while (isalnum(*lexp) || *lexp == '_' || *lexp == '.') *p++ = *lexp++; *p = 0; return strdup(buf); } char * get_string(int closec) { char *p, buf[512]; int val = 0; p = buf; while (*lexp && *lexp != closec) { if (*lexp != '\\') *p++ = *lexp++; else if (lexp[1] == '\\') *p++ = '\\'; else switch (lexp[1]) { case 'b': *p++ = '\b'; lexp += 2; break; case 't': *p++ = '\t'; lexp += 2; break; case 'r': *p++ = '\r'; lexp += 2; break; case 'n': *p++ = '\n'; lexp += 2; break; case '>': *p++ = '>'; lexp += 2; break; case '0': lexp += 2; while (isdigit(*lexp)) { val <<= 3; val += *lexp - '0'; } *p++ = val; break; default: error("bad char in string"); } } *p = 0; slen = p - buf; if (*lexp == 0) error("newline in string"); lexp++; return strdup(buf); } int get_quoted(void) { int val = 0; lexp++; if (*lexp != '\\') val = *lexp++; else if (lexp[1] == '\\') { lexp += 2; val = '\\'; } else switch (lexp[1]) { case 'b': val = '\b'; lexp += 2; break; case 't': val = '\t'; lexp += 2; break; case 'r': val = '\r'; lexp += 2; break; case 'n': val = '\n'; lexp += 2; break; case '"': val = '"'; lexp += 2; break; case '0': lexp += 2; while (isdigit(*lexp)) { val <<= 3; val += *lexp - '0'; lexp++; } break; default: error("bad char in char const"); } if (*lexp != '\'') error("buggered-up character constant"); else lexp++; return val; } char line[256]; int op; /* true if we know we are in an operator; for '-' */ void rlex(void) { if (eof) { tok = TEOF; return; } if (lexp == 0) { read_again: lexp = Brdline(src, '\n'); if (lexp == 0) { tok = TEOF; eof = 1; return; } lineno++; } again: while (*lexp == ' ' || *lexp == '\t') lexp++; switch (*lexp) { case 0: goto read_again; case '-': case ',': case '(': case ')': case ':': case '=': case '+': case '*': case '@': case '"': /* XXX should parse string */ case '$': tok = *lexp++; return; case '/': if (lexp[1] == '*') { /* comments */ lexp += 2; for (;;) { while (*lexp != '*' && *lexp != '\n') lexp++; if (*lexp == '\n') { lexp = Brdline(src, '\n'); if (lexp == 0) { tok = TEOF; eof = 1; return; } lineno++; } else if (lexp[1] == '/') { lexp += 2; goto again; } else lexp++; } } while (*lexp != '\n' && *lexp != ';') lexp++; tok = '\n'; lexp = 0; return; case '<': if (lexp[1] == '<') { lexp += 2; tok = TLSHIFT; return; } tok = *lexp++; return; case '>': if (lexp[1] == '>') { lexp += 2; tok = TRSHIFT; return; } tok = *lexp++; return; case '\'': value = get_quoted(); tok = TCONST; return; case '\n': tok = *lexp; lexp = 0; return; case '\\': if (lexp[1] == '/') { lexp += 2; tok = '/'; return; } goto bad_char; case ';': lexp++; tok = '\n'; return; default: if (isdigit(*lexp)) { value = get_num(); tok = TCONST; return; } if (isalpha(*lexp) || *lexp == '_' || *lexp == '.' || *lexp == '%') { name = get_ident(); tok = TNAME; return; } bad_char: fprint(2, "%s:%d illegal character %c\n", filename, lineno, *lexp); lexp++; goto again; } } void pr_tok(void) { print("%s:%d: ", filename, lineno); switch (tok) { case ',': case '(': case ')': case ':': case ';': case '=': case '+': case '-': case '*': case '#': case '$': print(" %c\n", tok); break; case TCONST: print(" TCONST %d\n", value); break; case TNAME: print(" TNAME %s\n", name); break; case TRSHIFT: print(" TRSHIFT\n"); break; case TLSHIFT: print(" TLSHIFT\n"); break; case '\n': print(" NEWLINE\n"); break; case TEOF: print("EOF\n"); break; default: print("??? %c\n", tok); break; } } void lex(void) { rlex(); if (pass_no == 1 && lineno == 25000000) pr_tok(); } Sym * lookup(char *name) /* find non-instruction */ { Sym *sp; for (sp = symtab; sp; sp = sp->next) if (strcmp(sp->name, name) == 0) if (sp->type != N_INST) if (sp->type != N_KW) return sp; sp = (Sym *)malloc(sizeof *sp); if (last_sym) last_sym->next = sp; else symtab = sp; last_sym = sp; sp->name = strdup(name); sp->type = 0; sp->addr = 0; sp->next = 0; return sp; } Sym * lookupi(char *name) /* find instruction */ { Sym *sp; for (sp = symtab; sp; sp = sp->next) if (strcmp(sp->name, name) == 0) if (sp->type == N_INST || sp->type == N_KW) return sp; return 0; } Itab * omatch(char *name) { Operand *p; Itab *ip; for (ip = itab; ip->name; ip++) if (strcmp(ip->name, name) == 0) break; if (ip->name == 0) error("illegal opcode <%s>", name); for ( ; ip->name && strcmp(ip->name, name) == 0; ip++) { if (noprn != ip->noperands) continue; p = oprn; if (noprn > 0 && ((p->mode & ip->omask1) == 0 || p->mode & ~ip->omask1)) continue; if ((p->mode & Fabs) && (ip->omask1 & Fabs) == 0) continue; if (noprn > 1 && ((p[1].mode & ip->omask2) == 0 || p[1].mode & ~ip->omask2)) continue; if ((p[1].mode & Fabs) && (ip->omask2 & Fabs) == 0) continue; return ip; } pr_oper(); error("illegal operand"); return 0; } /* * build the symbol table */ void pass1(void) { char *np; Sym *sp, *sp2; int n; pass_no = 1; lineno = 0; locs[0] = locs[1] = locs[2] = 0; cur_loc = locs; if (setjmp(env)) { while (!eof && tok != '\n') lex(); } lex(); loop: if (tok == TEOF) return; if (tok == '\n') { lex(); goto loop; } if (tok == TNAME) { np = name; lex(); if (tok == ':') { sp = lookup(np); if ((sp->type&N_TYPE) != N_UNDEF) error("symbol already defined!"); sp->type = map_type(cur_loc - locs); sp->addr = *cur_loc; lex(); goto loop; } if (tok == '=') { sp = lookup(np); if (sp->type != N_UNDEF) error("symbol already defined!"); lex(); sp->addr = expr(&sp->type, &sp2); goto loop; } sp = lookupi(np); if (sp == 0) error("illegal instruction"); if (sp->type == N_KW) { (*(void(*)(char *))sp->addr)(np); goto loop; } if (sp->type == N_INST) { parseo(); mk_code(np); if (tok != '\n') error("extra stuff"); goto loop; } error("illegal instruction"); } if (tok == TCONST) { n = value; lex(); if (value >= 0 && value < 10) if (tok == ':') { mk_tmp(n); lex(); goto loop; } *cur_loc += 4; goto loop; } if (tok == '<') { get_string('>'); *cur_loc += slen; lex(); goto loop; } error("syntax"); } /* * pass over text again, this time generating the binary */ void pass2(void) { char *np; Sym *sp, *sp2; int n; pass_no = 2; lineno = 0; locs[0] = locs[1] = locs[2] = 0; cur_loc = locs; cur_bp = &tbp; cur_relo = &trelo; if (setjmp(env)) { while (tok != '\n') lex(); } lex(); loop: if (tok == TEOF) return; if (tok == '\n') { lex(); goto loop; } if (tok == TNAME) { np = name; lex(); if (tok == ':') { sp = lookup(np); if (sp->addr != *cur_loc && errcnt == 0) fatal("out of phase: %08x %08x", sp->addr, *cur_loc); lex(); goto loop; } if (tok == '=') { sp = lookup(np); lex(); sp->addr = expr(&sp->type, &sp2); goto loop; } sp = lookupi(np); if (sp == 0) error("illegal instruction"); if (sp->type == N_KW) { (*(void(*)(char *))sp->addr)(np); goto loop; } if (sp->type == N_INST) { parseo(); mk_code(np); if (tok != '\n') error("extra stuff"); goto loop; } } if (tok == TCONST) { n = value; lex(); if (value >= 0 && value < 10) if (tok == ':') { /* maybe I could check that the labels match */ lex(); goto loop; } put_const(n, 4); write_instr(); goto loop; } if (tok == '<') { char *cp; cp = get_string('>'); write_string(*cur_bp, cp, slen); *cur_loc += slen; lex(); goto loop; } error("syntax"); } void mk_tmp(int n) { Tmp *p; assert(nxt_tmp < NTMP); p = &tmp[nxt_tmp++]; p->loc = *cur_loc; p->type = map_type(cur_loc - locs); p->label = n; } int map_type(int loc_sub) /* map location counter subscript into type */ { return (loc_sub << 1) + N_TEXT; } int lookup_tmp(int n, int before, int *type) { Tmp *p, *q, *e; q = 0; e = &tmp[nxt_tmp]; if (before) { for (p = tmp; p < e; p++) { if (!cur_seg(p->type)) continue; if (p->loc > *cur_loc) break; if (p->label == n) q = p; } if (q == 0) error("missing temporary label %db", n); *type = q->type; return q->loc; } /* else after */ if (pass_no == 1) { *type = N_UNDEF; return 0; } for (p = tmp; p < e; p++) { if (!cur_seg(p->type)) continue; if (p->loc >= *cur_loc) break; } if (p >= e) error("temporary %df not found", n); while (p < e) { if (p->label == n) { *type = p->type; return p->loc; } p++; } error("temporary %df not found", n); return 0; } int term(int *type_p, Sym **spp) /* parse and evaluate expression */ { Sym *sp; int n; *spp = 0; if (tok == '-') { lex(); if (tok != TCONST) error("can only do unary minus with constant for now"); *type_p = N_ABS; n = value; lex(); return -n; } if (tok == TNAME) { sp = lookup(name); *type_p = sp->type; switch (sp->type & N_TYPE) { case N_TEXT: case N_DATA: case N_BSS: case N_ABS: case N_UNDEF: break; default: if (pass_no > 1) error("illegal type in expression"); } *spp = sp; lex(); if (sp->type & N_EXT) return 0; return sp->addr; } if (tok == TCONST) { if (value >= 0 && value <= 9) { n = value; lex(); if (tok == TNAME) if (name[0] == 'f' || name[0] == 'b') if (name[1] == 0) { lex(); if (pass_no != 2) return 0; return lookup_tmp(n, *name == 'b', type_p); } *type_p = N_ABS; return n; } *type_p = N_ABS; lex(); return value; } pr_tok(); error("syntax in expression"); return 0; } int prop_type(int t1, int t2, int op) /* propagate type */ { int a, b; if (pass_no == 1) return t1; a = t1 & N_TYPE; b = t2 & N_TYPE; if (a == N_UNDEF || b == N_UNDEF) return N_UNDEF; if (a == N_ABS && b == N_ABS) return N_ABS; switch (op) { case '+': if (a == N_TEXT || a == N_DATA || a == N_BSS || t1 == N_UNDEF+N_EXT) { if (b != N_ABS) error("illegal expression"); return t1; } if (b == N_TEXT || b == N_DATA || b == N_BSS || t2 == N_UNDEF+N_EXT) { if (a != N_ABS) error("illegal expression"); return t2; } error("illegal expression"); case '-': if (a == N_TEXT || a == N_DATA || a == N_BSS) { if (b == N_ABS) return t1; if (b == N_TEXT || b == N_DATA || b == N_BSS) return N_ABS; } /* fall thru */ default: error("illegal expression"); } return 0; } /* for now, we cheat and use the type of the first term */ int expr(int *type_p, Sym **spp) /* evaluate simple expressions */ { int t1, t2, v1, v2; Sym *sp1, *sp2; v1 = term(&t1, &sp1); op = 1; again: switch (tok) { case '+': lex(); op = 0; v2 = term(&t2, &sp2); t1 = prop_type(t1, t2, '+'); v1 += v2; if (sp1 && sp2) sp1 = 0; if (sp2) sp1 = sp2; break; case '-': lex(); op = 0; v2 = term(&t2, &sp2); t1 = prop_type(t1, t2, '-'); v1 -= v2; if (sp1 && sp2) sp1 = 0; if (sp2) sp1 = sp2; break; case TLSHIFT: lex(); op = 0; v2 = term(&t2, &sp2); t1 = prop_type(t1, t2, 0); v1 <<= v2; sp1 = 0; break; case TRSHIFT: lex(); op = 0; v2 = term(&t2, &sp2); t1 = prop_type(t1, t2, 0); v1 >>= v2; sp1 = 0; break; default: *type_p = t1; *spp = sp1; return v1; } goto again; } /* * Intel 486 syntax */ void parseo(void) /* parse zero, one or two operands */ { int i; Operand *p; Sym *sp; memset(oprn, 0, sizeof oprn); p = oprn; noprn = 0; i = 0; if (tok == '\n') return; for (;;) { if (tok == '*') { p->mode |= Fabs; lex(); } switch (tok) { case TNAME: sp = lookup(name); if (sp->type != N_REG) goto offset; if (sp->type == N_UNDEF) error("illegal register name:%s", name); p->type = N_REG; switch (sp->addr >> 3 & 7) { case 1: case 2: p->mode |= (sp->addr & 7) == 0 ? Fareg : Fxreg; break; case 3: p->mode |= Fsreg; break; default: error("illegal value for register (%s)", name); } p->reg = sp->addr; lex(); break; case '$': lex(); if (tok != TCONST && tok != TNAME && tok != '-') error("syntax"); p->mode |= Fimm; p->offset = expr(&p->type, &p->sym); break; case TCONST: case '-': offset: p->mode |= Fdisp; p->offset = expr(&p->type, &p->sym); if (tok != '(') break; /* fall thru */ case '(': p->index = 0; lex(); if (tok == TNAME) { sp = lookup(name); if (sp->type != N_REG) error("syntax0"); if ((sp->addr & 070) != 010) error("syntax1"); p->mode |= Fmodrm; p->base = sp->addr; lex(); if (!((1<base) & Oindex)) error("illegal index"); } if (tok == ',') { lex(); if (tok != TNAME) error("syntax2"); sp = lookup(name); if (sp->type != N_REG) error("synxtax3"); if ((sp->addr & 070) != 010) error("syntax4"); p->index = sp->addr; if (!((1<index) & Oindex)) error("illegal index"); if (p->base == p->index) error("duplicate index registers"); lex(); } if (tok != ')') error("syntax"); lex(); break; default: error("syntax in operand"); } if (p < &oprn[NOPRN]) p++; i++; if (tok != ',') break; lex(); } noprn = i; } void dbwrite(int loc, char *where, int len) { print("%8.8ux: %d: ", loc, len); while (len--) print("%2.2x", *where++ & 0xFF); print("\n"); } void mk_code(char *name) { Itab *ip; ip = omatch(name); format(ip); write_instr(); } void write_string(Blkptr *bp, char *where, int len) { block_write(bp, where, len); } void write_instr(void) { if (pass_no == 2) block_write(*cur_bp, instr, in_offset); *cur_loc += in_offset; in_offset = 0; } int cur_seg(int type) /* is this type the same as the current segment? */ { return cur_loc == &locs[type - N_TEXT >> 1]; } int mag(ulong v, int bytes) { ulong m; m = ~((1<<(bytes << 3-1)) - 1); /* msb in mask */ v &= m; if (v != 0 && (v ^ m) != 0) return 0; // too big return 1; } void /* make note of relocation requirement */ note_relo(Sym *sp, int type, int bytes, int pcrel) { R *rp; if (pass_no != 2) return; rp = (R *)malloc(sizeof *rp); memset(rp, 0, sizeof *rp); assert (bytes == 1 || bytes == 2); rp->r.r_pcrel = pcrel; rp->r.r_length = (bytes == 1 ? 0 : bytes == 2 ? 1 : 3); rp->r.r_addr = *cur_loc + in_offset; rp->r.r_symbolnum = 0; if (sp && sp->type & N_EXT) { rp->what = sp; rp->r.r_extern = sp->type & N_EXT; } else rp->r.r_symbolnum = type; rp->next = *cur_relo; *cur_relo = rp; } void put_const(int value, int bytes) /* stick into instruction space */ { assert(in_offset < sizeof instr); switch (bytes) { /* little endian */ case 2: instr[in_offset++] = value; value >>= 8; case 1: instr[in_offset++] = value; } } void rput(int offset, int type, int mode, Sym *sym, int n) { assert (n == 1 || n == 2); if (pass_no == 1) { in_offset += n; return; } type &= N_TYPE; if (mode & (Fdisp | Fimm)) if (type == N_TEXT || type == N_DATA || type == N_BSS || type == N_UNDEF) note_relo(sym, type, n, 0); put_const(offset, n); } void put(Operand *p, int n) { rput(p->offset, p->type, p->mode, p->sym, n); } void disp8(Operand *p) /* must be in same segment */ { int d, v; Sym *sp = p->sym; if (pass_no == 1) { put_const(0, 1); return; } if (sp) { if ((sp->type & N_TYPE) == N_UNDEF) error("disp8 undefined"); if (!cur_seg(sp->type & N_TYPE)) error("symbol in different segment"); v = sp->addr; } else v = p->offset; d = v - (*cur_loc+in_offset+1); if (d < -128 || d > 127) error("too far"); put_const(d, 1); } void disp_16(Operand *p) { int d, v; Sym *sp = p->sym; if (pass_no == 1) { put_const(0, 2); /* just to advance location counter */ return; } if (sp) { if (!(sp->type & N_EXT) && !cur_seg(sp->type & N_TYPE)) error("symbol in different segment"); v = sp->addr; } else v = p->offset; d = v - (*cur_loc+in_offset+2); if (d < -0x7fff || d > 0x7fff) error("disp too large"); if (sp && sp->type & N_EXT) { note_relo(sp, sp->type, 2, 1); put_const(0, 2); } else put_const(d, 2); } typedef struct { int mask; int rm; } RM; RM rm[] = { { 1<mode & (Fdisp|Fmodrm))) { b0 |= p->reg & 7; b0 |= 0300; put_const(b0, 1); return 1; } if (!(p->mode & Fmodrm)) { b0 |= 6; put_const(b0, 1); put(p, 2); return 3; } /* [disp](base[,index]) */ if (p->index == 0 && p->base == BP && !(p->mode & Fdisp)) { b0 |= 0106; put_const(b0, 1); put_const(0, 1); return 2; } m = 1<base; if (p->index != 0) m |= 1<index; for (rp = rm; rp->mask > 0; rp++) if (m == rp->mask) break; assert(rp->rm != -1); b0 |= rp->rm; if (p->mode & Fdisp) { b0 |= 0200; put_const(b0, 1); put(p, 2); return 3; } put_const(b0, 1); return 1; } int map_size(char let) { switch (let) { case 'b': return 1; case 'w': return 2; default: assert(0); } return 0; } /* * this function fills in the instruction buffer by * scanning the fmt string in the intstruction entry */ #define Fall_regs (Freg|Fsreg) void format(Itab *ip) /* format instruction using operands */ { Operand *reg = 0, *mod = 0, *imm, *disp = 0; char *cp; uchar byte; /* * this is a little tricky. * deduce where various things are. */ if (ip->mod > -1) { mod = &oprn[ip->mod]; if (mod->mode & Fdisp) disp = mod; if (ip->noperands > 1) if (oprn[ip->mod ^ 1].mode & Fall_regs) /* assume only two operands */ reg = &oprn[ip->mod ^ 1]; } else disp = &oprn[ip->noperands - 1]; /* always last */ imm = oprn; /* always first */ if (ip->noperands == 1) reg = oprn; if (ip->noperands == 2 && oprn->mode & Fimm && oprn[1].mode & Freg) reg = &oprn[1]; if (reg == 0) if (ip->omask1 & Fxreg) reg = oprn; else if (ip->omask2 & Fxreg) reg = &oprn[1]; /* now, loop over format string */ for (cp = ip->fmt; *cp; cp++) { switch (*cp) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': byte = *cp <= '9' ? *cp - '0' : *cp - 'A' + 10; byte <<= 4; byte |= *++cp <= '9' ? *cp - '0' : *cp - 'A' + 10; put_const(byte, 1); break; case ' ': break; case '/': assert(mod); switch (*++cp) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': put_modrm(*cp - '0', mod); break; case 'r': assert(reg); put_modrm(reg->reg, mod); break; default: assert(0); } break; case 'i': /* immediate stuff at the end */ assert(imm); switch (*++cp) { case 'b': if (imm->offset <0 || imm->offset > 0xff) error("immediate too large"); put(imm, map_size(*cp)); break; case 'w': if (imm->offset < -0x7fff || imm->offset > 0xffff) error("immediate too large"); put(imm, map_size(*cp)); break; case 'd': put(imm, map_size(*cp)); break; default: assert(0); } break; case 'c': /* call/jmp offsets */ assert(disp); /* * could make backward jmp's smaller by * doing some sort of checks here */ switch (*++cp) { case 'b': if (disp->mode & Fabs) error("too short for abs"); disp8(disp); break; case 'w': disp_16(disp); break; default: assert(0); } break; case '+': cp++; switch (*cp) { case 'r': assert(reg); instr[in_offset-1] += reg->reg & 7; break; default: assert(0); } break; default: assert(0); } } } /* * pseudo instruction */ void dot_word(char *) { int t, n; Sym *sp; for(;;) { n = expr(&t, &sp); rput(n, t, Fdisp, sp, 2); write_instr(); if (tok != ',') break; lex(); }; } void dot_byte(char *) { int t, n; Sym *sp; for (;;) { n = expr(&t, &sp); put_const(n, 1); write_instr(); if (tok != ',') break; lex(); } } /* * by aligning with 0x90 (nop) we can reliabily dis-assemble the text * segments */ void dot_align(char *) /* .align */ { int n; if (tok != TCONST) error("align expects constant"); if (value < 1 || value > 16 * 1024) error("align value out of range"); n = value; if (n == 0) return; if (pass_no == 2 && cur_loc != &locs[2]) while (*cur_loc % n) { block_write(*cur_bp, "\x90", 1); (*cur_loc)++; } else while (*cur_loc % n) (*cur_loc)++; lex(); } void dot_lcomm(char *) { Sym *sp; if (cur_loc != &locs[2]) error(".lcomm not in bss"); if (tok != TNAME) error("missing name in .lcomm"); sp = lookup(name); sp->addr = *cur_loc; sp->type = N_BSS; lex(); if (tok != ',') error("syntax"); lex(); if (tok == TNAME) { sp = lookup(name); if ((sp->type & N_TYPE) == N_UNDEF) error(".lcomm size must be defined in first pass"); *cur_loc += sp->addr; return; } if (tok == TCONST) { *cur_loc += value; lex(); return; } error("syntax"); } void dot_set(char *) { Sym *sp0, *sp; if (tok != TNAME) error("missing name in .set"); sp = lookup(name); lex(); if (tok != ',') error("syntax"); lex(); sp->addr = expr(&sp->type, &sp0); } void dot_space(char *) { Sym *sp; int len, size; SET(size); if (tok == TNAME) { sp = lookup(name); if ((sp->type & N_TYPE) == N_UNDEF) error(".space parameter must be defined in first pass"); size = sp->addr; } else if (tok == TCONST) size = value; else error("syntax"); if (pass_no == 2 && cur_loc != &locs[2]) { for (len = size; len > 4; len -= 4) block_write(*cur_bp, "\0\0\0\0", 4); if (len) block_write(*cur_bp, "\0\0\0\0", len); } *cur_loc += size; lex(); } void dot_globl(char *) { Sym *sp; while (tok == TNAME) { sp = lookup(name); sp->type |= N_EXT; lex(); if (tok != ',') break; lex(); } } void dot_comm(char *) { Sym *sp, *sp2; int type; if (pass_no == 2) { while (tok != '\n') lex(); return; } if (tok != TNAME) error(".comm expected name"); sp = lookup(name); if ((sp->type & N_TYPE) != N_UNDEF) error("%s already defined", name); lex(); if (tok != ',') error(".comm expected ','"); lex(); sp->addr = expr(&type, &sp2); sp->type |= N_EXT; } void dot_text(char *) { cur_bp = &tbp; cur_relo = &trelo; cur_loc = locs; } void dot_data(char *) { cur_bp = &dbp; cur_relo = &drelo; cur_loc = &locs[1]; } void dot_bss(char *) { cur_bp = 0; cur_loc = &locs[2]; } void not_yet(char *) { while (tok != '\n') lex(); } void lusyms(void) /* list the user's symbols */ { Sym *sp; for (sp = last_sys_sym->next; sp; sp = sp->next) print("%08p(%04ld): %08x %08x %s\n", sp, sp - symtab, sp->type, sp->addr, sp->name); } /* stuff to save temporary code and relocation bits */ Blkptr * block_open(void) /* get new open block */ { Blkptr *bp; for (bp = blkptrs; bp->first; bp++) ; if (bp == &blkptrs[NBLKPTRS]) abort(); bp->last = bp->first = malloc(sizeof *bp->first); bp->last->len = 0; bp->last->next = 0; return bp; } void block_write(Blkptr *bp, void *what, int len) /* write into block */ { Block *p; p = bp->last; if (len <= BLKLEN - p->len) { memcpy(&p->block[p->len], what, len); p->len += len; return; } bp->last->next = malloc(sizeof *bp->last->next); bp->last = bp->last->next; p = bp->last; p->len = len; p->next = 0; memcpy(p->block, what, len); } int block_copyout(Biobuf *bp, Blkptr *blkp) /* put block into file */ { int size; Block *p; size = 0; for (p = blkp->first; p; p = p->next) { size += p->len; Bwrite(bp, p->block, p->len); } return size; } void dumpb(char *p, int len) { while (len--) print("%02x%c", *p++ & 0xff, ((int)p & 0xF) == 0 ? '\n' : ' '); print("\n"); } void pr_oper(void) { int n; Operand *p; for (n = noprn, p = oprn; n--; p++) print("mode 0x%x, type %x, sym %s, offset %x, reg %o, base %o, index %o, scale %d\n", p->mode, p->type, p->sym ? p->sym->name : "nil", p->offset, p->reg, p->base, p->index, p->scale); } /* * instructions that don't fit well into the search algorithm */ void do_imul(char *name) { int size, m0, m1; if (strcmp(name, "imul") == 0) size = 'w'; else size = name[strlen(name)-1]; parseo(); switch (noprn) { case 1: switch (size) { case 'b': put_const(0xF6, 1); put_modrm(5, oprn); break; case 'w': put_const(0xF7, 1); put_modrm(5, oprn); break; default: assert(0); } break; case 2: m0 = oprn[0].mode; m1 = oprn[1].mode; if (oprn->mode == Fimm && (oprn[1].mode == Fareg || oprn[1].mode == Fxreg)) { switch (size) { case 'w': put_const(0x69, 1); put_modrm(oprn[1].reg, &oprn[1]); put(oprn, size == 'l' ? 4 : 2); break; default: goto err; } } else if ((m1 == Fareg || m1 == Fxreg) && (m0 & Fmod)) { switch (size) { case 'w': put_const(0x0F, 1); put_const(0xAF, 1); put_modrm(oprn[1].reg, oprn); break; default: goto err; } } else goto err; break; case 3: if (oprn->mode != Fimm) goto err; if ((oprn[1].mode & Fmod) == 0 || (oprn[1].mode & ~Fmod)) goto err; if (oprn[2].mode != Fareg && oprn[2].mode != Fxreg) goto err; if (size != 'w') if (size != 'l') goto err; put_const(0x69, 1); put_modrm(oprn[2].reg, &oprn[1]); put(oprn, size == 'l' ? 4 : 2); break; default: err: pr_oper(); error("illegal operand"); } write_instr(); } void do_pop(char *) { int size; size = 'w'; parseo(); if (noprn != 1) error("illegal operands"); if (oprn->mode == Fxreg || oprn->mode == Fareg) put_const(0x58 + (oprn->reg & 7), 1); else if ((oprn->mode & Fmod) && (oprn->mode & ~Fmod) == 0) { if (size == 'b') error("illegal operand"); put_const(0x8f, 1); put_modrm(0, oprn); } else if (oprn->mode == Fsreg) { switch (oprn->reg) { case 030: /* ES */ put_const(7, 1); break; case 031: /* CS */ error("illegal operand"); case 032: /* SS */ put_const(0x17, 1); break; case 033: /* DS */ put_const(0x1F, 1); break; case 034: /* FS */ put_const(0xF, 1); put_const(0xA1, 1); break; case 035: /* GS */ put_const(0xf, 1); put_const(0xA9, 1); break; default: error("illegal operand"); } } else error("illegal operand"); write_instr(); } void do_push(char *name) { int size; size = name[strlen(name)-1]; if (size != 'b') size = 'w'; parseo(); if (noprn != 1) error("illegal operands"); if (oprn->mode == Fxreg || oprn->mode == Fareg) { put_const(0x50 + (oprn->reg & 7), 1); } else if ((oprn->mode & Fmod) && (oprn->mode & ~Fmod) == 0) { if (size == 'b') error("illegal operand"); put_const(0xff, 1); put_modrm(6, oprn); } else if (oprn->mode == Fsreg) { if (size != 'l') error("illegal operand"); switch (oprn->reg) { case 030: /* ES */ put_const(6, 1); break; case 031: /* CS */ put_const(0xe, 1); break; case 032: /* SS */ put_const(0x16, 1); break; case 033: /* DS */ put_const(0x1e, 1); break; case 034: /* FS */ put_const(0xF, 1); put_const(0xA0, 1); break; case 035: /* GS */ put_const(0xf, 1); put_const(0xA8, 1); break; default: error("illegal operand"); } } else if (oprn->mode == Fimm) { switch (size) { case 'b': put_const(0x6a, 1); put(oprn, 1); break; case 'w': put_const(0x68, 1); put(oprn, size == 'w' ? 2 : 4); break; } } else error("illegal operand"); write_instr(); } void do_rot(char *name) { int op, s; s = name[strlen(name)-1]; if (s != 'b') s = 'w'; parseo(); if (noprn != 2) goto err; if (strncmp(name, "rcl", 3) == 0) op = 2; else if (strncmp(name, "rcr", 3) == 0) op = 3; else if (strncmp(name, "rol", 3) == 0) op = 0; else if (strncmp(name, "ror", 3) == 0) op = 1; else goto err; if ((oprn[1].mode & Fmod) == 0 || oprn[1].mode & ~Fmod) goto err; if (oprn->mode & Fimm) { if (oprn->offset == 1) { switch (s) { case 'b': put_const(0xd0, 1); put_modrm(op, &oprn[1]); break; case 'w': put_const(0xd1, 1); put_modrm(op, &oprn[1]); break; } } else if (oprn->offset < 128) { switch (s) { case 'b': put_const(0xc0, 1); put_modrm(op, &oprn[1]); put(oprn, 1); break; case 'w': put_const(0xc1, 1); put_modrm(op, &oprn[1]); put(oprn, 1); break; } } else goto err; } else if (oprn->mode & Fxreg && oprn->reg == 021) { /* CL */ switch (s) { case 'b': put_const(0xd2, 1); put_modrm(op, &oprn[1]); break; case 'w': put_const(0xd3, 1); put_modrm(op, &oprn[1]); break; } } else goto err; write_instr(); return; err: error("illegal operand"); } void do_shift(char *name) { int op, s; s = name[strlen(name)-1]; if (s != 'b') s = 'w'; parseo(); if (noprn != 2) goto err; if (strncmp(name, "sal", 3) == 0) op = 4; else if (strncmp(name, "sar", 3) == 0) op = 7; else if (strncmp(name, "shl", 3) == 0) op = 4; else if (strncmp(name, "shr", 3) == 0) op = 5; else goto err; if ((oprn[1].mode & Fmod) == 0 || oprn[1].mode & ~Fmod) goto err; if (oprn->mode & Fimm) { if (oprn->offset == 1) { switch (s) { case 'b': put_const(0xd0, 1); put_modrm(op, &oprn[1]); break; case 'w': put_const(0xd1, 1); put_modrm(op, &oprn[1]); break; } } else if (oprn->offset < 128) { switch (s) { case 'b': put_const(0xc0, 1); put_modrm(op, &oprn[1]); put(oprn, 1); break; case 'w': put_const(0xc1, 1); put_modrm(op, &oprn[1]); put(oprn, 1); break; } } else goto err; } else if (oprn->mode & Fxreg && oprn->reg == 021) { /* CL */ switch (s) { case 'b': put_const(0xd2, 1); put_modrm(op, &oprn[1]); break; case 'w': put_const(0xd3, 1); put_modrm(op, &oprn[1]); break; } } else goto err; write_instr(); return; err: error("illegal operand"); } void do_ljmp(char *) { int sel; Sym *sp; if (tok != TCONST) goto err; sel = value; lex(); if (tok != ':') goto err; lex(); if (tok != TNAME) goto err; sp = lookup(name); put_const(0xEA, 1); rput(sp->addr, sp->type, Fdisp, sp, 2); put_const(sel, 2); while (tok != '\n') lex(); write_instr(); return; err: error("syntax"); } void do_lcall(char *) { int sel; Sym *sp; if (tok != TCONST) goto err; sel = value; lex(); if (tok != ':') goto err; lex(); if (tok != TNAME) goto err; sp = lookup(name); put_const(0x9A, 1); rput(sp->addr, sp->type, Fdisp, sp, 2); put_const(sel, 2); while (tok != '\n') lex(); write_instr(); return; err: error("syntax"); }