⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 assemble.c

📁 开源的nasm编译器源码,研究编译器原理很有帮且
💻 C
📖 第 1 页 / 共 3 页
字号:
/* assemble.c   code generation for the Netwide Assembler * * The Netwide Assembler is copyright (C) 1996 Simon Tatham and * Julian Hall. All rights reserved. The software is * redistributable under the licence given in the file "Licence" * distributed in the NASM archive. * * the actual codes (C syntax, i.e. octal): * \0            - terminates the code. (Unless it's a literal of course.) * \1, \2, \3    - that many literal bytes follow in the code stream * \4, \6        - the POP/PUSH (respectively) codes for CS, DS, ES, SS *                 (POP is never used for CS) depending on operand 0 * \5, \7        - the second byte of POP/PUSH codes for FS, GS, depending *                 on operand 0 * \10, \11, \12 - a literal byte follows in the code stream, to be added *                 to the register value of operand 0, 1 or 2 * \17           - encodes the literal byte 0. (Some compilers don't take *                 kindly to a zero byte in the _middle_ of a compile time *                 string constant, so I had to put this hack in.) * \14, \15, \16 - a signed byte immediate operand, from operand 0, 1 or 2 * \20, \21, \22 - a byte immediate operand, from operand 0, 1 or 2 * \24, \25, \26 - an unsigned byte immediate operand, from operand 0, 1 or 2 * \30, \31, \32 - a word immediate operand, from operand 0, 1 or 2 * \34, \35, \36 - select between \3[012] and \4[012] depending on 16/32 bit *                 assembly mode or the operand-size override on the operand * \37           - a word constant, from the _segment_ part of operand 0 * \40, \41, \42 - a long immediate operand, from operand 0, 1 or 2 * \44, \45, \46 - select between \3[012] and \4[012] depending on 16/32 bit *                 assembly mode or the address-size override on the operand * \50, \51, \52 - a byte relative operand, from operand 0, 1 or 2 * \60, \61, \62 - a word relative operand, from operand 0, 1 or 2 * \64, \65, \66 - select between \6[012] and \7[012] depending on 16/32 bit *                 assembly mode or the operand-size override on the operand * \70, \71, \72 - a long relative operand, from operand 0, 1 or 2 * \1ab          - a ModRM, calculated on EA in operand a, with the spare *                 field the register value of operand b. * \130,\131,\132 - an immediate word or signed byte for operand 0, 1, or 2 * \133,\134,\135 - or 2 (s-field) into next opcode byte if operand 0, 1, or 2 *		    is a signed byte rather than a word. * \140,\141,\142 - an immediate dword or signed byte for operand 0, 1, or 2 * \143,\144,\145 - or 2 (s-field) into next opcode byte if operand 0, 1, or 2 *		    is a signed byte rather than a dword. * \2ab          - a ModRM, calculated on EA in operand a, with the spare *                 field equal to digit b. * \30x          - might be an 0x67 byte, depending on the address size of *                 the memory reference in operand x. * \310          - indicates fixed 16-bit address size, i.e. optional 0x67. * \311          - indicates fixed 32-bit address size, i.e. optional 0x67. * \312		 - (disassembler only) marker on LOOP, LOOPxx instructions. * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66. * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66. * \322          - indicates that this instruction is only valid when the *                 operand size is the default (instruction to disassembler, *                 generates no code in the assembler) * \330          - a literal byte follows in the code stream, to be added *                 to the condition code value of the instruction. * \331		 - instruction not valid with REP prefix.  Hint for *                 disassembler only; for SSE instructions. * \332		 - disassemble a rep (0xF3 byte) prefix as repe not rep. * \333		 - REP prefix (0xF3 byte); for SSE instructions.  Not encoded *		   as a literal byte in order to aid the disassembler. * \340          - reserve <operand 0> bytes of uninitialised storage. *                 Operand 0 had better be a segmentless constant. * \370,\371,\372 - match only if operand 0 meets byte jump criteria. *		   370 is used for Jcc, 371 is used for JMP. * \373		 - assemble 0x03 if bits==16, 0x05 if bits==32; *		   used for conditional jump over longer jump */#include <stdio.h>#include <string.h>#include "nasm.h"#include "nasmlib.h"#include "assemble.h"#include "insns.h"#include "preproc.h"extern struct itemplate *nasm_instructions[];typedef struct {    int sib_present;		       /* is a SIB byte necessary? */    int bytes;			       /* # of bytes of offset needed */    int size;			       /* lazy - this is sib+bytes+1 */    unsigned char modrm, sib;	       /* the bytes themselves */} ea;static unsigned long cpu;		/* cpu level received from nasm.c */static efunc errfunc;static struct ofmt *outfmt;static ListGen *list;static long calcsize (long, long, int, insn *, const char *);static void gencode (long, long, int, insn *, const char *, long);static int  regval (operand *o);static int  matches (struct itemplate *, insn *);static ea * process_ea (operand *, ea *, int, int, int);static int  chsize (operand *, int);/* * This routine wrappers the real output format's output routine, * in order to pass a copy of the data off to the listing file * generator at the same time. */static void out (long offset, long segto, const void *data, unsigned long type,		 long segment, long wrt) {    static long lineno = 0;       /* static!!! */    static char *lnfname = NULL;    if ((type & OUT_TYPMASK) == OUT_ADDRESS) {	if (segment != NO_SEG || wrt != NO_SEG) {	    /*	     * This address is relocated. We must write it as	     * OUT_ADDRESS, so there's no work to be done here.	     */	    list->output (offset, data, type);	} 	else {	    unsigned char p[4], *q = p;	    /*	     * This is a non-relocated address, and we're going to	     * convert it into RAWDATA format.	     */	    if ((type & OUT_SIZMASK) == 4) {		WRITELONG (q, * (long *) data);		list->output (offset, p, OUT_RAWDATA+4);	    } 	    else {		WRITESHORT (q, * (long *) data);		list->output (offset, p, OUT_RAWDATA+2);	    }	}    }     else if ((type & OUT_TYPMASK) == OUT_RAWDATA) {	list->output (offset, data, type);    }     else if ((type & OUT_TYPMASK) == OUT_RESERVE) {	list->output (offset, NULL, type);    }     else if ((type & OUT_TYPMASK) == OUT_REL2ADR ||	       (type & OUT_TYPMASK) == OUT_REL4ADR) {	list->output (offset, data, type);    }    /*     * this call to src_get determines when we call the     * debug-format-specific "linenum" function     * it updates lineno and lnfname to the current values     * returning 0 if "same as last time", -2 if lnfname     * changed, and the amount by which lineno changed,     * if it did. thus, these variables must be static     */    if (src_get(&lineno,&lnfname))    {	outfmt->current_dfmt->linenum(lnfname,lineno,segto);    }    outfmt->output (segto, data, type, segment, wrt);}static int jmp_match (long segment, long offset, int bits,		insn *ins, const char *code){   long isize;    unsigned char c = code[0];    if (c != 0370 && c != 0371) return 0;    if (ins->oprs[0].opflags & OPFLAG_FORWARD) {	if ((optimizing<0 || (ins->oprs[0].type & STRICT))	    && c==0370) return 1;	else return (pass0==0);	/* match a forward reference */    }    isize = calcsize (segment, offset, bits, ins, code);    if (ins->oprs[0].segment != segment) return 0;    isize = ins->oprs[0].offset - offset - isize;	/* isize is now the delta */    if (isize >= -128L && isize <= 127L) return 1;	/* it is byte size */    return 0;}		long assemble (long segment, long offset, int bits, unsigned long cp,	       insn *instruction, struct ofmt *output, efunc error,	       ListGen *listgen) {    struct itemplate *temp;    int    j;    int    size_prob;    long   insn_end;    long   itimes;    long   start = offset;    long   wsize = 0;		       /* size for DB etc. */    errfunc = error;		       /* to pass to other functions */    cpu = cp;    outfmt = output;		       /* likewise */    list = listgen;		       /* and again */    switch (instruction->opcode)     {	case   -1: return 0;	case I_DB: wsize = 1; break;	case I_DW: wsize = 2; break;	case I_DD: wsize = 4; break;	case I_DQ: wsize = 8; break;	case I_DT: wsize = 10; break;    }    if (wsize) {	extop  * e;	long   t = instruction->times;	if (t < 0)	    errfunc(ERR_PANIC, "instruction->times < 0 (%ld) in assemble()",t);	while (t--) 		       /* repeat TIMES times */	{	    for (e = instruction->eops; e; e = e->next) 	    {		if (e->type == EOT_DB_NUMBER) 		{		    if (wsize == 1) {			if (e->segment != NO_SEG)			    errfunc (ERR_NONFATAL,				     "one-byte relocation attempted");			else {			    unsigned char out_byte = e->offset;			    out (offset, segment, &out_byte, OUT_RAWDATA+1,				 NO_SEG, NO_SEG);			}		    } 		    else if (wsize > 5) {			errfunc (ERR_NONFATAL, "integer supplied to a D%c"				 " instruction", wsize==8 ? 'Q' : 'T');		    } 		    else			out (offset, segment, &e->offset,			     OUT_ADDRESS+wsize, e->segment,			     e->wrt);		    offset += wsize;		} 		else if (e->type == EOT_DB_STRING) 		{		    int align;		    out (offset, segment, e->stringval,			 OUT_RAWDATA+e->stringlen, NO_SEG, NO_SEG);		    align = e->stringlen % wsize;		    if (align) {			align = wsize - align;			out (offset, segment, "\0\0\0\0\0\0\0\0",			     OUT_RAWDATA+align, NO_SEG, NO_SEG);			}		    offset += e->stringlen + align;		}	    }	    if (t > 0 && t == instruction->times-1) 	    {		/*		 * Dummy call to list->output to give the offset to the		 * listing module.		 */		list->output (offset, NULL, OUT_RAWDATA);		list->uplevel (LIST_TIMES);	    }	}	if (instruction->times > 1)	    list->downlevel (LIST_TIMES);	return offset - start;    }    if (instruction->opcode == I_INCBIN)     {	static char fname[FILENAME_MAX];	FILE        * fp;	long        len;        char *prefix = "", *combine;        char** pPrevPath = NULL;	len = FILENAME_MAX-1;	if (len > instruction->eops->stringlen)	    len = instruction->eops->stringlen;	strncpy (fname, instruction->eops->stringval, len);	fname[len] = '\0';        while (1)             /* added by alexfru: 'incbin' uses include paths */        {          combine = nasm_malloc(strlen(prefix) + len + 1);          strcpy(combine, prefix);          strcat(combine, fname);          if ( (fp = fopen(combine, "rb")) != NULL)          {              nasm_free(combine);              break;          }          nasm_free(combine);          pPrevPath = pp_get_include_path_ptr (pPrevPath);          if (pPrevPath == NULL)              break;          prefix = *pPrevPath;        }        if (fp == NULL)	    error (ERR_NONFATAL, "`incbin': unable to open file `%s'", fname);	else if (fseek(fp, 0L, SEEK_END) < 0)	    error (ERR_NONFATAL, "`incbin': unable to seek on file `%s'",		   fname);	else 	{	    static char buf[2048];	    long t = instruction->times;	    long base = 0;	    len = ftell (fp);	    if (instruction->eops->next) {		base = instruction->eops->next->offset;		len -= base;		if (instruction->eops->next->next &&		    len > instruction->eops->next->next->offset)		    len = instruction->eops->next->next->offset;	    }	    /*	     * Dummy call to list->output to give the offset to the	     * listing module.	     */	    list->output (offset, NULL, OUT_RAWDATA);	    list->uplevel(LIST_INCBIN);	    while (t--) 	    {		long l;		fseek (fp, base, SEEK_SET);				l = len;		while (l > 0) {		    long m = fread (buf, 1, (l>sizeof(buf)?sizeof(buf):l),				    fp);		    if (!m) {			/*			 * This shouldn't happen unless the file			 * actually changes while we are reading			 * it.			 */			error (ERR_NONFATAL, "`incbin': unexpected EOF while"			       " reading file `%s'", fname);			t=0;  /* Try to exit cleanly */			break;		    }		    out (offset, segment, buf, OUT_RAWDATA+m,			 NO_SEG, NO_SEG);		    l -= m;		}	    }	    list->downlevel(LIST_INCBIN);	    if (instruction->times > 1) {		/*		 * Dummy call to list->output to give the offset to the		 * listing module.		 */		list->output (offset, NULL, OUT_RAWDATA);		list->uplevel(LIST_TIMES);		list->downlevel(LIST_TIMES);	    }	    fclose (fp);	    return instruction->times * len;	}	return 0;		       /* if we're here, there's an error */    }    size_prob = FALSE;    temp = nasm_instructions[instruction->opcode];    while (temp->opcode != -1) {	int m = matches (temp, instruction);	if (m == 99)	    m += jmp_match(segment, offset, bits, instruction, temp->code);	if (m == 100) 		       /* matches! */	{	    const char *codes = temp->code;	    long insn_size = calcsize(segment, offset, bits,				      instruction, codes);	    itimes = instruction->times;	    if (insn_size < 0)	       /* shouldn't be, on pass two */	    	error (ERR_PANIC, "errors made it through from pass one");	    else while (itimes--) {		for (j=0; j<instruction->nprefix; j++) {		    unsigned char c=0;		    switch (instruction->prefixes[j]) {		      case P_LOCK:			c = 0xF0; break;		      case P_REPNE: case P_REPNZ:			c = 0xF2; break;		      case P_REPE: case P_REPZ: case P_REP:			c = 0xF3; break;		      case R_CS: c = 0x2E; break;		      case R_DS: c = 0x3E; break;		      case R_ES: c = 0x26; break;		      case R_FS: c = 0x64; break;		      case R_GS: c = 0x65; break;		      case R_SS: c = 0x36; break;		      case R_SEGR6:		      case R_SEGR7:			error (ERR_NONFATAL, "segr6 and segr7 cannot be used as prefixes");			break;		      case P_A16:			if (bits != 16)			    c = 0x67;			break;		      case P_A32:			if (bits != 32)			    c = 0x67;			break;		      case P_O16:			if (bits != 16)			    c = 0x66;			break;		      case P_O32:			if (bits != 32)			    c = 0x66;			break;		      default:			error (ERR_PANIC,			       "invalid instruction prefix");		    }		    if (c != 0) {			out (offset, segment, &c, OUT_RAWDATA+1,			     NO_SEG, NO_SEG);			offset++;		    }		}		insn_end = offset + insn_size;		gencode (segment, offset, bits, instruction, codes, insn_end);		offset += insn_size;		if (itimes > 0 && itimes == instruction->times-1) {		    /*		     * Dummy call to list->output to give the offset to the		     * listing module.		     */		    list->output (offset, NULL, OUT_RAWDATA);		    list->uplevel (LIST_TIMES);		}	    }	    if (instruction->times > 1)		list->downlevel (LIST_TIMES);	    return offset - start;	} else if (m > 0  &&  m > size_prob) {	    size_prob = m;	}	temp++;    }    if (temp->opcode == -1) {	       /* didn't match any instruction */	if (size_prob == 1)	       /* would have matched, but for size */	    error (ERR_NONFATAL, "operation size not specified");	else if (size_prob == 2)	    error (ERR_NONFATAL, "mismatch in operand sizes");	else if (size_prob == 3)	    error (ERR_NONFATAL, "no instruction for this cpu level");	else	    error (ERR_NONFATAL,		   "invalid combination of opcode and operands");    }    return 0;}long insn_size (long segment, long offset, int bits, unsigned long cp,		insn *instruction, efunc error) {    struct itemplate *temp;    errfunc = error;		       /* to pass to other functions */    cpu = cp;    if (instruction->opcode == -1)    	return 0;    if (instruction->opcode == I_DB ||	instruction->opcode == I_DW ||	instruction->opcode == I_DD ||	instruction->opcode == I_DQ ||	instruction->opcode == I_DT)     {	extop *e;	long isize, osize, wsize = 0;  /* placate gcc */	isize = 0;	switch (instruction->opcode) 	{	  case I_DB: wsize = 1; break;	  case I_DW: wsize = 2; break;	  case I_DD: wsize = 4; break;	  case I_DQ: wsize = 8; break;	  case I_DT: wsize = 10; break;	}	for (e = instruction->eops; e; e = e->next) 	{	    long align;	    osize = 0;	    if (e->type == EOT_DB_NUMBER)		osize = 1;	    else if (e->type == EOT_DB_STRING)		osize = e->stringlen;	    align = (-osize) % wsize;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -