📄 assemble.c
字号:
/* assemble.c code generation for the Netwide Assembler * * The Netwide Assembler is copyright (C) 1996 Simon Tatham and * Julian Hall. All rights reserved. The software is * redistributable under the licence given in the file "Licence" * distributed in the NASM archive. * * the actual codes (C syntax, i.e. octal): * \0 - terminates the code. (Unless it's a literal of course.) * \1, \2, \3 - that many literal bytes follow in the code stream * \4, \6 - the POP/PUSH (respectively) codes for CS, DS, ES, SS * (POP is never used for CS) depending on operand 0 * \5, \7 - the second byte of POP/PUSH codes for FS, GS, depending * on operand 0 * \10, \11, \12 - a literal byte follows in the code stream, to be added * to the register value of operand 0, 1 or 2 * \17 - encodes the literal byte 0. (Some compilers don't take * kindly to a zero byte in the _middle_ of a compile time * string constant, so I had to put this hack in.) * \14, \15, \16 - a signed byte immediate operand, from operand 0, 1 or 2 * \20, \21, \22 - a byte immediate operand, from operand 0, 1 or 2 * \24, \25, \26 - an unsigned byte immediate operand, from operand 0, 1 or 2 * \30, \31, \32 - a word immediate operand, from operand 0, 1 or 2 * \34, \35, \36 - select between \3[012] and \4[012] depending on 16/32 bit * assembly mode or the operand-size override on the operand * \37 - a word constant, from the _segment_ part of operand 0 * \40, \41, \42 - a long immediate operand, from operand 0, 1 or 2 * \44, \45, \46 - select between \3[012] and \4[012] depending on 16/32 bit * assembly mode or the address-size override on the operand * \50, \51, \52 - a byte relative operand, from operand 0, 1 or 2 * \60, \61, \62 - a word relative operand, from operand 0, 1 or 2 * \64, \65, \66 - select between \6[012] and \7[012] depending on 16/32 bit * assembly mode or the operand-size override on the operand * \70, \71, \72 - a long relative operand, from operand 0, 1 or 2 * \1ab - a ModRM, calculated on EA in operand a, with the spare * field the register value of operand b. * \130,\131,\132 - an immediate word or signed byte for operand 0, 1, or 2 * \133,\134,\135 - or 2 (s-field) into next opcode byte if operand 0, 1, or 2 * is a signed byte rather than a word. * \140,\141,\142 - an immediate dword or signed byte for operand 0, 1, or 2 * \143,\144,\145 - or 2 (s-field) into next opcode byte if operand 0, 1, or 2 * is a signed byte rather than a dword. * \2ab - a ModRM, calculated on EA in operand a, with the spare * field equal to digit b. * \30x - might be an 0x67 byte, depending on the address size of * the memory reference in operand x. * \310 - indicates fixed 16-bit address size, i.e. optional 0x67. * \311 - indicates fixed 32-bit address size, i.e. optional 0x67. * \312 - (disassembler only) marker on LOOP, LOOPxx instructions. * \320 - indicates fixed 16-bit operand size, i.e. optional 0x66. * \321 - indicates fixed 32-bit operand size, i.e. optional 0x66. * \322 - indicates that this instruction is only valid when the * operand size is the default (instruction to disassembler, * generates no code in the assembler) * \330 - a literal byte follows in the code stream, to be added * to the condition code value of the instruction. * \331 - instruction not valid with REP prefix. Hint for * disassembler only; for SSE instructions. * \332 - disassemble a rep (0xF3 byte) prefix as repe not rep. * \333 - REP prefix (0xF3 byte); for SSE instructions. Not encoded * as a literal byte in order to aid the disassembler. * \340 - reserve <operand 0> bytes of uninitialised storage. * Operand 0 had better be a segmentless constant. * \370,\371,\372 - match only if operand 0 meets byte jump criteria. * 370 is used for Jcc, 371 is used for JMP. * \373 - assemble 0x03 if bits==16, 0x05 if bits==32; * used for conditional jump over longer jump */#include <stdio.h>#include <string.h>#include "nasm.h"#include "nasmlib.h"#include "assemble.h"#include "insns.h"#include "preproc.h"extern struct itemplate *nasm_instructions[];typedef struct { int sib_present; /* is a SIB byte necessary? */ int bytes; /* # of bytes of offset needed */ int size; /* lazy - this is sib+bytes+1 */ unsigned char modrm, sib; /* the bytes themselves */} ea;static unsigned long cpu; /* cpu level received from nasm.c */static efunc errfunc;static struct ofmt *outfmt;static ListGen *list;static long calcsize (long, long, int, insn *, const char *);static void gencode (long, long, int, insn *, const char *, long);static int regval (operand *o);static int matches (struct itemplate *, insn *);static ea * process_ea (operand *, ea *, int, int, int);static int chsize (operand *, int);/* * This routine wrappers the real output format's output routine, * in order to pass a copy of the data off to the listing file * generator at the same time. */static void out (long offset, long segto, const void *data, unsigned long type, long segment, long wrt) { static long lineno = 0; /* static!!! */ static char *lnfname = NULL; if ((type & OUT_TYPMASK) == OUT_ADDRESS) { if (segment != NO_SEG || wrt != NO_SEG) { /* * This address is relocated. We must write it as * OUT_ADDRESS, so there's no work to be done here. */ list->output (offset, data, type); } else { unsigned char p[4], *q = p; /* * This is a non-relocated address, and we're going to * convert it into RAWDATA format. */ if ((type & OUT_SIZMASK) == 4) { WRITELONG (q, * (long *) data); list->output (offset, p, OUT_RAWDATA+4); } else { WRITESHORT (q, * (long *) data); list->output (offset, p, OUT_RAWDATA+2); } } } else if ((type & OUT_TYPMASK) == OUT_RAWDATA) { list->output (offset, data, type); } else if ((type & OUT_TYPMASK) == OUT_RESERVE) { list->output (offset, NULL, type); } else if ((type & OUT_TYPMASK) == OUT_REL2ADR || (type & OUT_TYPMASK) == OUT_REL4ADR) { list->output (offset, data, type); } /* * this call to src_get determines when we call the * debug-format-specific "linenum" function * it updates lineno and lnfname to the current values * returning 0 if "same as last time", -2 if lnfname * changed, and the amount by which lineno changed, * if it did. thus, these variables must be static */ if (src_get(&lineno,&lnfname)) { outfmt->current_dfmt->linenum(lnfname,lineno,segto); } outfmt->output (segto, data, type, segment, wrt);}static int jmp_match (long segment, long offset, int bits, insn *ins, const char *code){ long isize; unsigned char c = code[0]; if (c != 0370 && c != 0371) return 0; if (ins->oprs[0].opflags & OPFLAG_FORWARD) { if ((optimizing<0 || (ins->oprs[0].type & STRICT)) && c==0370) return 1; else return (pass0==0); /* match a forward reference */ } isize = calcsize (segment, offset, bits, ins, code); if (ins->oprs[0].segment != segment) return 0; isize = ins->oprs[0].offset - offset - isize; /* isize is now the delta */ if (isize >= -128L && isize <= 127L) return 1; /* it is byte size */ return 0;} long assemble (long segment, long offset, int bits, unsigned long cp, insn *instruction, struct ofmt *output, efunc error, ListGen *listgen) { struct itemplate *temp; int j; int size_prob; long insn_end; long itimes; long start = offset; long wsize = 0; /* size for DB etc. */ errfunc = error; /* to pass to other functions */ cpu = cp; outfmt = output; /* likewise */ list = listgen; /* and again */ switch (instruction->opcode) { case -1: return 0; case I_DB: wsize = 1; break; case I_DW: wsize = 2; break; case I_DD: wsize = 4; break; case I_DQ: wsize = 8; break; case I_DT: wsize = 10; break; } if (wsize) { extop * e; long t = instruction->times; if (t < 0) errfunc(ERR_PANIC, "instruction->times < 0 (%ld) in assemble()",t); while (t--) /* repeat TIMES times */ { for (e = instruction->eops; e; e = e->next) { if (e->type == EOT_DB_NUMBER) { if (wsize == 1) { if (e->segment != NO_SEG) errfunc (ERR_NONFATAL, "one-byte relocation attempted"); else { unsigned char out_byte = e->offset; out (offset, segment, &out_byte, OUT_RAWDATA+1, NO_SEG, NO_SEG); } } else if (wsize > 5) { errfunc (ERR_NONFATAL, "integer supplied to a D%c" " instruction", wsize==8 ? 'Q' : 'T'); } else out (offset, segment, &e->offset, OUT_ADDRESS+wsize, e->segment, e->wrt); offset += wsize; } else if (e->type == EOT_DB_STRING) { int align; out (offset, segment, e->stringval, OUT_RAWDATA+e->stringlen, NO_SEG, NO_SEG); align = e->stringlen % wsize; if (align) { align = wsize - align; out (offset, segment, "\0\0\0\0\0\0\0\0", OUT_RAWDATA+align, NO_SEG, NO_SEG); } offset += e->stringlen + align; } } if (t > 0 && t == instruction->times-1) { /* * Dummy call to list->output to give the offset to the * listing module. */ list->output (offset, NULL, OUT_RAWDATA); list->uplevel (LIST_TIMES); } } if (instruction->times > 1) list->downlevel (LIST_TIMES); return offset - start; } if (instruction->opcode == I_INCBIN) { static char fname[FILENAME_MAX]; FILE * fp; long len; char *prefix = "", *combine; char** pPrevPath = NULL; len = FILENAME_MAX-1; if (len > instruction->eops->stringlen) len = instruction->eops->stringlen; strncpy (fname, instruction->eops->stringval, len); fname[len] = '\0'; while (1) /* added by alexfru: 'incbin' uses include paths */ { combine = nasm_malloc(strlen(prefix) + len + 1); strcpy(combine, prefix); strcat(combine, fname); if ( (fp = fopen(combine, "rb")) != NULL) { nasm_free(combine); break; } nasm_free(combine); pPrevPath = pp_get_include_path_ptr (pPrevPath); if (pPrevPath == NULL) break; prefix = *pPrevPath; } if (fp == NULL) error (ERR_NONFATAL, "`incbin': unable to open file `%s'", fname); else if (fseek(fp, 0L, SEEK_END) < 0) error (ERR_NONFATAL, "`incbin': unable to seek on file `%s'", fname); else { static char buf[2048]; long t = instruction->times; long base = 0; len = ftell (fp); if (instruction->eops->next) { base = instruction->eops->next->offset; len -= base; if (instruction->eops->next->next && len > instruction->eops->next->next->offset) len = instruction->eops->next->next->offset; } /* * Dummy call to list->output to give the offset to the * listing module. */ list->output (offset, NULL, OUT_RAWDATA); list->uplevel(LIST_INCBIN); while (t--) { long l; fseek (fp, base, SEEK_SET); l = len; while (l > 0) { long m = fread (buf, 1, (l>sizeof(buf)?sizeof(buf):l), fp); if (!m) { /* * This shouldn't happen unless the file * actually changes while we are reading * it. */ error (ERR_NONFATAL, "`incbin': unexpected EOF while" " reading file `%s'", fname); t=0; /* Try to exit cleanly */ break; } out (offset, segment, buf, OUT_RAWDATA+m, NO_SEG, NO_SEG); l -= m; } } list->downlevel(LIST_INCBIN); if (instruction->times > 1) { /* * Dummy call to list->output to give the offset to the * listing module. */ list->output (offset, NULL, OUT_RAWDATA); list->uplevel(LIST_TIMES); list->downlevel(LIST_TIMES); } fclose (fp); return instruction->times * len; } return 0; /* if we're here, there's an error */ } size_prob = FALSE; temp = nasm_instructions[instruction->opcode]; while (temp->opcode != -1) { int m = matches (temp, instruction); if (m == 99) m += jmp_match(segment, offset, bits, instruction, temp->code); if (m == 100) /* matches! */ { const char *codes = temp->code; long insn_size = calcsize(segment, offset, bits, instruction, codes); itimes = instruction->times; if (insn_size < 0) /* shouldn't be, on pass two */ error (ERR_PANIC, "errors made it through from pass one"); else while (itimes--) { for (j=0; j<instruction->nprefix; j++) { unsigned char c=0; switch (instruction->prefixes[j]) { case P_LOCK: c = 0xF0; break; case P_REPNE: case P_REPNZ: c = 0xF2; break; case P_REPE: case P_REPZ: case P_REP: c = 0xF3; break; case R_CS: c = 0x2E; break; case R_DS: c = 0x3E; break; case R_ES: c = 0x26; break; case R_FS: c = 0x64; break; case R_GS: c = 0x65; break; case R_SS: c = 0x36; break; case R_SEGR6: case R_SEGR7: error (ERR_NONFATAL, "segr6 and segr7 cannot be used as prefixes"); break; case P_A16: if (bits != 16) c = 0x67; break; case P_A32: if (bits != 32) c = 0x67; break; case P_O16: if (bits != 16) c = 0x66; break; case P_O32: if (bits != 32) c = 0x66; break; default: error (ERR_PANIC, "invalid instruction prefix"); } if (c != 0) { out (offset, segment, &c, OUT_RAWDATA+1, NO_SEG, NO_SEG); offset++; } } insn_end = offset + insn_size; gencode (segment, offset, bits, instruction, codes, insn_end); offset += insn_size; if (itimes > 0 && itimes == instruction->times-1) { /* * Dummy call to list->output to give the offset to the * listing module. */ list->output (offset, NULL, OUT_RAWDATA); list->uplevel (LIST_TIMES); } } if (instruction->times > 1) list->downlevel (LIST_TIMES); return offset - start; } else if (m > 0 && m > size_prob) { size_prob = m; } temp++; } if (temp->opcode == -1) { /* didn't match any instruction */ if (size_prob == 1) /* would have matched, but for size */ error (ERR_NONFATAL, "operation size not specified"); else if (size_prob == 2) error (ERR_NONFATAL, "mismatch in operand sizes"); else if (size_prob == 3) error (ERR_NONFATAL, "no instruction for this cpu level"); else error (ERR_NONFATAL, "invalid combination of opcode and operands"); } return 0;}long insn_size (long segment, long offset, int bits, unsigned long cp, insn *instruction, efunc error) { struct itemplate *temp; errfunc = error; /* to pass to other functions */ cpu = cp; if (instruction->opcode == -1) return 0; if (instruction->opcode == I_DB || instruction->opcode == I_DW || instruction->opcode == I_DD || instruction->opcode == I_DQ || instruction->opcode == I_DT) { extop *e; long isize, osize, wsize = 0; /* placate gcc */ isize = 0; switch (instruction->opcode) { case I_DB: wsize = 1; break; case I_DW: wsize = 2; break; case I_DD: wsize = 4; break; case I_DQ: wsize = 8; break; case I_DT: wsize = 10; break; } for (e = instruction->eops; e; e = e->next) { long align; osize = 0; if (e->type == EOT_DB_NUMBER) osize = 1; else if (e->type == EOT_DB_STRING) osize = e->stringlen; align = (-osize) % wsize;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -