⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 assemble.c

📁 一个汇编语言编译器源码
💻 C
📖 第 1 页 / 共 3 页
字号:
/* assemble.c   code generation for the Netwide Assembler
 *
 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
 * Julian Hall. All rights reserved. The software is
 * redistributable under the licence given in the file "Licence"
 * distributed in the NASM archive.
 *
 * the actual codes (C syntax, i.e. octal):
 * \0            - terminates the code. (Unless it's a literal of course.)
 * \1, \2, \3    - that many literal bytes follow in the code stream
 * \4, \6        - the POP/PUSH (respectively) codes for CS, DS, ES, SS
 *                 (POP is never used for CS) depending on operand 0
 * \5, \7        - the second byte of POP/PUSH codes for FS, GS, depending
 *                 on operand 0
 * \10, \11, \12 - a literal byte follows in the code stream, to be added
 *                 to the register value of operand 0, 1 or 2
 * \17           - encodes the literal byte 0. (Some compilers don't take
 *                 kindly to a zero byte in the _middle_ of a compile time
 *                 string constant, so I had to put this hack in.)
 * \14, \15, \16 - a signed byte immediate operand, from operand 0, 1 or 2
 * \20, \21, \22 - a byte immediate operand, from operand 0, 1 or 2
 * \24, \25, \26 - an unsigned byte immediate operand, from operand 0, 1 or 2
 * \30, \31, \32 - a word immediate operand, from operand 0, 1 or 2
 * \34, \35, \36 - select between \3[012] and \4[012] depending on 16/32 bit
 *                 assembly mode or the address-size override on the operand
 * \37           - a word constant, from the _segment_ part of operand 0
 * \40, \41, \42 - a long immediate operand, from operand 0, 1 or 2
 * \50, \51, \52 - a byte relative operand, from operand 0, 1 or 2
 * \60, \61, \62 - a word relative operand, from operand 0, 1 or 2
 * \64, \65, \66 - select between \6[012] and \7[012] depending on 16/32 bit
 *                 assembly mode or the address-size override on the operand
 * \70, \71, \72 - a long relative operand, from operand 0, 1 or 2
 * \1ab          - a ModRM, calculated on EA in operand a, with the spare
 *                 field the register value of operand b.
 * \2ab          - a ModRM, calculated on EA in operand a, with the spare
 *                 field equal to digit b.
 * \30x          - might be an 0x67 byte, depending on the address size of
 *                 the memory reference in operand x.
 * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 * \322          - indicates that this instruction is only valid when the
 *                 operand size is the default (instruction to disassembler,
 *                 generates no code in the assembler)
 * \330          - a literal byte follows in the code stream, to be added
 *                 to the condition code value of the instruction.
 * \331		 - instruction not valid with REP prefix.  Hint for
 *                 disassembler only; for SSE instructions.
 * \332		 - disassemble a rep (0xF3 byte) prefix as repe not rep.
 * \333		 - REP prefix (0xF3 byte); for SSE instructions.  Not encoded
 *		   as a literal byte in order to aid the disassembler.
 * \340          - reserve <operand 0> bytes of uninitialised storage.
 *                 Operand 0 had better be a segmentless constant.
 */

#include <stdio.h>
#include <string.h>

#include "nasm.h"
#include "nasmlib.h"
#include "assemble.h"
#include "insns.h"

extern struct itemplate *nasm_instructions[];

typedef struct {
    int sib_present;		       /* is a SIB byte necessary? */
    int bytes;			       /* # of bytes of offset needed */
    int size;			       /* lazy - this is sib+bytes+1 */
    unsigned char modrm, sib;	       /* the bytes themselves */
} ea;

static efunc errfunc;
static struct ofmt *outfmt;
static ListGen *list;

static long calcsize (long, long, int, insn *, char *);
static void gencode (long, long, int, insn *, char *, long);
static int  regval (operand *o);
static int  matches (struct itemplate *, insn *);
static ea * process_ea (operand *, ea *, int, int, int);
static int  chsize (operand *, int);

/*
 * This routine wrappers the real output format's output routine,
 * in order to pass a copy of the data off to the listing file
 * generator at the same time.
 */
static void out (long offset, long segto, void *data, unsigned long type,
		 long segment, long wrt) 
{
    static long lineno;
    static char *lnfname;

    if ((type & OUT_TYPMASK) == OUT_ADDRESS) {
	if (segment != NO_SEG || wrt != NO_SEG) {
	    /*
	     * This address is relocated. We must write it as
	     * OUT_ADDRESS, so there's no work to be done here.
	     */
	    list->output (offset, data, type);
	} 
	else {
	    unsigned char p[4], *q = p;
	    /*
	     * This is a non-relocated address, and we're going to
	     * convert it into RAWDATA format.
	     */
	    if ((type & OUT_SIZMASK) == 4) {
		WRITELONG (q, * (long *) data);
		list->output (offset, p, OUT_RAWDATA+4);
	    } 
	    else {
		WRITESHORT (q, * (long *) data);
		list->output (offset, p, OUT_RAWDATA+2);
	    }
	}
    } 
    else if ((type & OUT_TYPMASK) == OUT_RAWDATA) {
	list->output (offset, data, type);
    } 
    else if ((type & OUT_TYPMASK) == OUT_RESERVE) {
	list->output (offset, NULL, type);
    } 
    else if ((type & OUT_TYPMASK) == OUT_REL2ADR ||
	       (type & OUT_TYPMASK) == OUT_REL4ADR) {
	list->output (offset, data, type);
    }

    if (src_get(&lineno,&lnfname))
	outfmt->current_dfmt->linenum(lnfname,lineno,segto);

    outfmt->output (segto, data, type, segment, wrt);
}

long assemble (long segment, long offset, int bits,
	       insn *instruction, struct ofmt *output, efunc error,
	       ListGen *listgen) 
{
    struct itemplate *temp;
    int    j;
    int    size_prob;
    long   insn_end;
    long   itimes;
    long   start = offset;
    long   wsize = 0;		       /* size for DB etc. */

    errfunc = error;		       /* to pass to other functions */
    outfmt = output;		       /* likewise */
    list = listgen;		       /* and again */

    switch (instruction->opcode) 
    {
	case   -1: return 0;
	case I_DB: wsize = 1; break;
	case I_DW: wsize = 2; break;
	case I_DD: wsize = 4; break;
	case I_DQ: wsize = 8; break;
	case I_DT: wsize = 10; break;
    }

    if (wsize) {
	extop  * e;
	long   t = instruction->times;
	if (t < 0)
	    errfunc(ERR_PANIC, "instruction->times < 0 (%ld) in assemble()",t);

	while (t--) 		       /* repeat TIMES times */
	{
	    for (e = instruction->eops; e; e = e->next) 
	    {
		if (e->type == EOT_DB_NUMBER) 
		{
		    if (wsize == 1) {
			if (e->segment != NO_SEG)
			    errfunc (ERR_NONFATAL,
				     "one-byte relocation attempted");
			else {
			    unsigned char out_byte = e->offset;
			    out (offset, segment, &out_byte, OUT_RAWDATA+1,
				 NO_SEG, NO_SEG);
			}
		    } 
		    else if (wsize > 5) {
			errfunc (ERR_NONFATAL, "integer supplied to a D%c"
				 " instruction", wsize==8 ? 'Q' : 'T');
		    } 
		    else
			out (offset, segment, &e->offset,
			     OUT_ADDRESS+wsize, e->segment,
			     e->wrt);
		    offset += wsize;
		} 
		else if (e->type == EOT_DB_STRING) 
		{
		    int align;

		    out (offset, segment, e->stringval,
			 OUT_RAWDATA+e->stringlen, NO_SEG, NO_SEG);
		    align = e->stringlen % wsize;

		    if (align) {
			align = wsize - align;
			out (offset, segment, "\0\0\0\0\0\0\0\0",
			     OUT_RAWDATA+align, NO_SEG, NO_SEG);
			}
		    offset += e->stringlen + align;
		}
	    }
	    if (t > 0 && t == instruction->times-1) 
	    {
		/*
		 * Dummy call to list->output to give the offset to the
		 * listing module.
		 */
		list->output (offset, NULL, OUT_RAWDATA);
		list->uplevel (LIST_TIMES);
	    }
	}
	if (instruction->times > 1)
	    list->downlevel (LIST_TIMES);
	return offset - start;
    }

    if (instruction->opcode == I_INCBIN) 
    {
	static char fname[FILENAME_MAX];
	FILE        * fp;
	long        len;

	len = FILENAME_MAX-1;
	if (len > instruction->eops->stringlen)
	    len = instruction->eops->stringlen;
	strncpy (fname, instruction->eops->stringval, len);
	fname[len] = '\0';

	if ( (fp = fopen(fname, "rb")) == NULL)
	    error (ERR_NONFATAL, "`incbin': unable to open file `%s'", fname);
	else if (fseek(fp, 0L, SEEK_END) < 0)
	    error (ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
		   fname);
	else 
	{
	    static char buf[2048];
	    long t = instruction->times;
	    long base = 0;

	    len = ftell (fp);
	    if (instruction->eops->next) {
		base = instruction->eops->next->offset;
		len -= base;
		if (instruction->eops->next->next &&
		    len > instruction->eops->next->next->offset)
		    len = instruction->eops->next->next->offset;
	    }
	    /*
	     * Dummy call to list->output to give the offset to the
	     * listing module.
	     */
	    list->output (offset, NULL, OUT_RAWDATA);
	    list->uplevel(LIST_INCBIN);
	    while (t--) 
	    {
		long l;

		fseek (fp, base, SEEK_SET);		
		l = len;
		while (l > 0) {
		    long m = fread (buf, 1, (l>sizeof(buf)?sizeof(buf):l),
				    fp);
		    if (!m) {
			/*
			 * This shouldn't happen unless the file
			 * actually changes while we are reading
			 * it.
			 */
			error (ERR_NONFATAL, "`incbin': unexpected EOF while"
			       " reading file `%s'", fname);
			t=0;  /* Try to exit cleanly */
			break;
		    }
		    out (offset, segment, buf, OUT_RAWDATA+m,
			 NO_SEG, NO_SEG);
		    l -= m;
		}
	    }
	    list->downlevel(LIST_INCBIN);
	    if (instruction->times > 1) {
		/*
		 * Dummy call to list->output to give the offset to the
		 * listing module.
		 */
		list->output (offset, NULL, OUT_RAWDATA);
		list->uplevel(LIST_TIMES);
		list->downlevel(LIST_TIMES);
	    }
	    fclose (fp);
	    return instruction->times * len;
	}
	return 0;		       /* if we're here, there's an error */
    }

    size_prob = FALSE;
    temp = nasm_instructions[instruction->opcode];
    while (temp->opcode != -1) {
	int m = matches (temp, instruction);

	if (m == 100) 		       /* matches! */
	{
	    char *codes = temp->code;
	    long insn_size = calcsize(segment, offset, bits,
				      instruction, codes);
	    itimes = instruction->times;
	    if (insn_size < 0)	       /* shouldn't be, on pass two */
	    	error (ERR_PANIC, "errors made it through from pass one");
	    else while (itimes--) {
		insn_end = offset + insn_size;
		for (j=0; j<instruction->nprefix; j++) {
		    unsigned char c=0;
		    switch (instruction->prefixes[j]) {
		      case P_LOCK:
			c = 0xF0; break;
		      case P_REPNE: case P_REPNZ:
			c = 0xF2; break;
		      case P_REPE: case P_REPZ: case P_REP:
			c = 0xF3; break;
		      case R_CS: c = 0x2E; break;
		      case R_DS: c = 0x3E; break;
		      case R_ES: c = 0x26; break;
		      case R_FS: c = 0x64; break;
		      case R_GS: c = 0x65; break;
		      case R_SS: c = 0x36; break;
		      case P_A16:
			if (bits != 16)
			    c = 0x67;
			break;
		      case P_A32:
			if (bits != 32)
			    c = 0x67;
			break;
		      case P_O16:
			if (bits != 16)
			    c = 0x66;
			break;
		      case P_O32:
			if (bits != 32)
			    c = 0x66;
			break;
		      default:
			error (ERR_PANIC,
			       "invalid instruction prefix");
		    }
		    if (c != 0) {
			out (offset, segment, &c, OUT_RAWDATA+1,
			     NO_SEG, NO_SEG);
			offset++;
		    }
		}
		gencode (segment, offset, bits, instruction, codes, insn_end);
		offset += insn_size;
		if (itimes > 0 && itimes == instruction->times-1) {
		    /*
		     * Dummy call to list->output to give the offset to the
		     * listing module.
		     */
		    list->output (offset, NULL, OUT_RAWDATA);
		    list->uplevel (LIST_TIMES);
		}
	    }
	    if (instruction->times > 1)
		list->downlevel (LIST_TIMES);
	    return offset - start;
	} else if (m > 0) {
	    size_prob = m;
	}
	temp++;
    }

    if (temp->opcode == -1) {	       /* didn't match any instruction */
	if (size_prob == 1)	       /* would have matched, but for size */
	    error (ERR_NONFATAL, "operation size not specified");
	else if (size_prob == 2)
	    error (ERR_NONFATAL, "mismatch in operand sizes");
	else
	    error (ERR_NONFATAL,
		   "invalid combination of opcode and operands");
    }
    return 0;
}

long insn_size (long segment, long offset, int bits,
		insn *instruction, efunc error) 
{
    struct itemplate *temp;

    errfunc = error;		       /* to pass to other functions */

    if (instruction->opcode == -1)
    	return 0;

    if (instruction->opcode == I_DB ||
	instruction->opcode == I_DW ||
	instruction->opcode == I_DD ||
	instruction->opcode == I_DQ ||
	instruction->opcode == I_DT) 
    {
	extop *e;
	long isize, osize, wsize = 0;  /* placate gcc */

	isize = 0;
	switch (instruction->opcode) 
	{
	  case I_DB: wsize = 1; break;
	  case I_DW: wsize = 2; break;
	  case I_DD: wsize = 4; break;
	  case I_DQ: wsize = 8; break;
	  case I_DT: wsize = 10; break;
	}

	for (e = instruction->eops; e; e = e->next) 
	{
	    long align;

	    osize = 0;
	    if (e->type == EOT_DB_NUMBER)
		osize = 1;
	    else if (e->type == EOT_DB_STRING)
		osize = e->stringlen;

	    align = (-osize) % wsize;
	    if (align < 0)
		align += wsize;
	    isize += osize + align;
	}
	return isize * instruction->times;
    }

    if (instruction->opcode == I_INCBIN) 
    {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -