⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 assemble.c

📁 一个免费的汇编语言编译器的源代码
💻 C
📖 第 1 页 / 共 4 页
字号:
/* assemble.c   code generation for the Netwide Assembler
 *
 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
 * Julian Hall. All rights reserved. The software is
 * redistributable under the licence given in the file "Licence"
 * distributed in the NASM archive.
 *
 * the actual codes (C syntax, i.e. octal):
 * \0            - terminates the code. (Unless it's a literal of course.)
 * \1, \2, \3    - that many literal bytes follow in the code stream
 * \4, \6        - the POP/PUSH (respectively) codes for CS, DS, ES, SS
 *                 (POP is never used for CS) depending on operand 0
 * \5, \7        - the second byte of POP/PUSH codes for FS, GS, depending
 *                 on operand 0
 * \10, \11, \12 - a literal byte follows in the code stream, to be added
 *                 to the register value of operand 0, 1 or 2
 * \17           - encodes the literal byte 0. (Some compilers don't take
 *                 kindly to a zero byte in the _middle_ of a compile time
 *                 string constant, so I had to put this hack in.)
 * \14, \15, \16 - a signed byte immediate operand, from operand 0, 1 or 2
 * \20, \21, \22 - a byte immediate operand, from operand 0, 1 or 2
 * \24, \25, \26 - an unsigned byte immediate operand, from operand 0, 1 or 2
 * \30, \31, \32 - a word immediate operand, from operand 0, 1 or 2
 * \34, \35, \36 - select between \3[012] and \4[012] depending on 16/32 bit
 *                 assembly mode or the operand-size override on the operand
 * \37           - a word constant, from the _segment_ part of operand 0
 * \40, \41, \42 - a long immediate operand, from operand 0, 1 or 2
 * \44, \45, \46 - select between \3[012] and \4[012] depending on 16/32 bit
 *                 assembly mode or the address-size override on the operand
 * \50, \51, \52 - a byte relative operand, from operand 0, 1 or 2
 * \60, \61, \62 - a word relative operand, from operand 0, 1 or 2
 * \64, \65, \66 - select between \6[012] and \7[012] depending on 16/32 bit
 *                 assembly mode or the operand-size override on the operand
 * \70, \71, \72 - a long relative operand, from operand 0, 1 or 2
 * \1ab          - a ModRM, calculated on EA in operand a, with the spare
 *                 field the register value of operand b.
 * \130,\131,\132 - an immediate word or signed byte for operand 0, 1, or 2
 * \133,\134,\135 - or 2 (s-field) into next opcode byte if operand 0, 1, or 2
 *		    is a signed byte rather than a word.
 * \140,\141,\142 - an immediate dword or signed byte for operand 0, 1, or 2
 * \143,\144,\145 - or 2 (s-field) into next opcode byte if operand 0, 1, or 2
 *		    is a signed byte rather than a dword.
 * \2ab          - a ModRM, calculated on EA in operand a, with the spare
 *                 field equal to digit b.
 * \30x          - might be an 0x67 byte, depending on the address size of
 *                 the memory reference in operand x.
 * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 * \312		 - (disassembler only) marker on LOOP, LOOPxx instructions.
 * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 * \322          - indicates that this instruction is only valid when the
 *                 operand size is the default (instruction to disassembler,
 *                 generates no code in the assembler)
 * \330          - a literal byte follows in the code stream, to be added
 *                 to the condition code value of the instruction.
 * \331		 - instruction not valid with REP prefix.  Hint for
 *                 disassembler only; for SSE instructions.
 * \332		 - disassemble a rep (0xF3 byte) prefix as repe not rep.
 * \333		 - REP prefix (0xF3 byte); for SSE instructions.  Not encoded
 *		   as a literal byte in order to aid the disassembler.
 * \340          - reserve <operand 0> bytes of uninitialised storage.
 *                 Operand 0 had better be a segmentless constant.
 * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 *		   370 is used for Jcc, 371 is used for JMP.
 * \373		 - assemble 0x03 if bits==16, 0x05 if bits==32;
 *		   used for conditional jump over longer jump
 */

#include <stdio.h>
#include <string.h>

#include "nasm.h"
#include "nasmlib.h"
#include "assemble.h"
#include "insns.h"
#include "preproc.h"

extern struct itemplate *nasm_instructions[];

typedef struct {
    int sib_present;		       /* is a SIB byte necessary? */
    int bytes;			       /* # of bytes of offset needed */
    int size;			       /* lazy - this is sib+bytes+1 */
    unsigned char modrm, sib;	       /* the bytes themselves */
} ea;

static unsigned long cpu;		/* cpu level received from nasm.c */
static efunc errfunc;
static struct ofmt *outfmt;
static ListGen *list;

static long calcsize (long, long, int, insn *, const char *);
static void gencode (long, long, int, insn *, const char *, long);
static int  regval (operand *o);
static int  matches (struct itemplate *, insn *);
static ea * process_ea (operand *, ea *, int, int, int);
static int  chsize (operand *, int);

/*
 * This routine wrappers the real output format's output routine,
 * in order to pass a copy of the data off to the listing file
 * generator at the same time.
 */
static void out (long offset, long segto, const void *data, unsigned long type,
		 long segment, long wrt) 
{
    static long lineno = 0;       /* static!!! */
    static char *lnfname = NULL;

    if ((type & OUT_TYPMASK) == OUT_ADDRESS) {
	if (segment != NO_SEG || wrt != NO_SEG) {
	    /*
	     * This address is relocated. We must write it as
	     * OUT_ADDRESS, so there's no work to be done here.
	     */
	    list->output (offset, data, type);
	} 
	else {
	    unsigned char p[4], *q = p;
	    /*
	     * This is a non-relocated address, and we're going to
	     * convert it into RAWDATA format.
	     */
	    if ((type & OUT_SIZMASK) == 4) {
		WRITELONG (q, * (long *) data);
		list->output (offset, p, OUT_RAWDATA+4);
	    } 
	    else {
		WRITESHORT (q, * (long *) data);
		list->output (offset, p, OUT_RAWDATA+2);
	    }
	}
    } 
    else if ((type & OUT_TYPMASK) == OUT_RAWDATA) {
	list->output (offset, data, type);
    } 
    else if ((type & OUT_TYPMASK) == OUT_RESERVE) {
	list->output (offset, NULL, type);
    } 
    else if ((type & OUT_TYPMASK) == OUT_REL2ADR ||
	       (type & OUT_TYPMASK) == OUT_REL4ADR) {
	list->output (offset, data, type);
    }

    /*
     * this call to src_get determines when we call the
     * debug-format-specific "linenum" function
     * it updates lineno and lnfname to the current values
     * returning 0 if "same as last time", -2 if lnfname
     * changed, and the amount by which lineno changed,
     * if it did. thus, these variables must be static
     */

    if (src_get(&lineno,&lnfname))
    {
	outfmt->current_dfmt->linenum(lnfname,lineno,segto);
    }

    outfmt->output (segto, data, type, segment, wrt);
}

static int jmp_match (long segment, long offset, int bits,
		insn *ins, const char *code)
{   long isize;
    unsigned char c = code[0];


    if (c != 0370 && c != 0371) return 0;
    if (ins->oprs[0].opflags & OPFLAG_FORWARD) {
	if ((optimizing<0 || (ins->oprs[0].type & STRICT))
	    && c==0370) return 1;
	else return (pass0==0);	/* match a forward reference */
    }
    isize = calcsize (segment, offset, bits, ins, code);
    if (ins->oprs[0].segment != segment) return 0;
    isize = ins->oprs[0].offset - offset - isize;	/* isize is now the delta */
    if (isize >= -128L && isize <= 127L) return 1;	/* it is byte size */

    return 0;
}		


long assemble (long segment, long offset, int bits, unsigned long cp,
	       insn *instruction, struct ofmt *output, efunc error,
	       ListGen *listgen) 
{
    struct itemplate *temp;
    int    j;
    int    size_prob;
    long   insn_end;
    long   itimes;
    long   start = offset;
    long   wsize = 0;		       /* size for DB etc. */

    errfunc = error;		       /* to pass to other functions */
    cpu = cp;
    outfmt = output;		       /* likewise */
    list = listgen;		       /* and again */

    switch (instruction->opcode) 
    {
	case   -1: return 0;
	case I_DB: wsize = 1; break;
	case I_DW: wsize = 2; break;
	case I_DD: wsize = 4; break;
	case I_DQ: wsize = 8; break;
	case I_DT: wsize = 10; break;
    }

    if (wsize) {
	extop  * e;
	long   t = instruction->times;
	if (t < 0)
	    errfunc(ERR_PANIC, "instruction->times < 0 (%ld) in assemble()",t);

	while (t--) 		       /* repeat TIMES times */
	{
	    for (e = instruction->eops; e; e = e->next) 
	    {
		if (e->type == EOT_DB_NUMBER) 
		{
		    if (wsize == 1) {
			if (e->segment != NO_SEG)
			    errfunc (ERR_NONFATAL,
				     "one-byte relocation attempted");
			else {
			    unsigned char out_byte = e->offset;
			    out (offset, segment, &out_byte, OUT_RAWDATA+1,
				 NO_SEG, NO_SEG);
			}
		    } 
		    else if (wsize > 5) {
			errfunc (ERR_NONFATAL, "integer supplied to a D%c"
				 " instruction", wsize==8 ? 'Q' : 'T');
		    } 
		    else
			out (offset, segment, &e->offset,
			     OUT_ADDRESS+wsize, e->segment,
			     e->wrt);
		    offset += wsize;
		} 
		else if (e->type == EOT_DB_STRING) 
		{
		    int align;

		    out (offset, segment, e->stringval,
			 OUT_RAWDATA+e->stringlen, NO_SEG, NO_SEG);
		    align = e->stringlen % wsize;

		    if (align) {
			align = wsize - align;
			out (offset, segment, "\0\0\0\0\0\0\0\0",
			     OUT_RAWDATA+align, NO_SEG, NO_SEG);
			}
		    offset += e->stringlen + align;
		}
	    }
	    if (t > 0 && t == instruction->times-1) 
	    {
		/*
		 * Dummy call to list->output to give the offset to the
		 * listing module.
		 */
		list->output (offset, NULL, OUT_RAWDATA);
		list->uplevel (LIST_TIMES);
	    }
	}
	if (instruction->times > 1)
	    list->downlevel (LIST_TIMES);
	return offset - start;
    }

    if (instruction->opcode == I_INCBIN) 
    {
	static char fname[FILENAME_MAX];
	FILE        * fp;
	long        len;
        char *prefix = "", *combine;
        char** pPrevPath = NULL;

	len = FILENAME_MAX-1;
	if (len > instruction->eops->stringlen)
	    len = instruction->eops->stringlen;
	strncpy (fname, instruction->eops->stringval, len);
	fname[len] = '\0';

        while (1)             /* added by alexfru: 'incbin' uses include paths */
        {
          combine = nasm_malloc(strlen(prefix) + len + 1);
          strcpy(combine, prefix);
          strcat(combine, fname);

          if ( (fp = fopen(combine, "rb")) != NULL)
          {
              nasm_free(combine);
              break;
          }

          nasm_free(combine);
          pPrevPath = pp_get_include_path_ptr (pPrevPath);
          if (pPrevPath == NULL)
              break;
          prefix = *pPrevPath;
        }

        if (fp == NULL)
	    error (ERR_NONFATAL, "`incbin': unable to open file `%s'", fname);
	else if (fseek(fp, 0L, SEEK_END) < 0)
	    error (ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
		   fname);
	else 
	{
	    static char buf[2048];
	    long t = instruction->times;
	    long base = 0;

	    len = ftell (fp);
	    if (instruction->eops->next) {
		base = instruction->eops->next->offset;
		len -= base;
		if (instruction->eops->next->next &&
		    len > instruction->eops->next->next->offset)
		    len = instruction->eops->next->next->offset;
	    }
	    /*
	     * Dummy call to list->output to give the offset to the
	     * listing module.
	     */
	    list->output (offset, NULL, OUT_RAWDATA);
	    list->uplevel(LIST_INCBIN);
	    while (t--) 
	    {
		long l;

		fseek (fp, base, SEEK_SET);		
		l = len;
		while (l > 0) {
		    long m = fread (buf, 1, (l>sizeof(buf)?sizeof(buf):l),
				    fp);
		    if (!m) {
			/*
			 * This shouldn't happen unless the file
			 * actually changes while we are reading
			 * it.
			 */
			error (ERR_NONFATAL, "`incbin': unexpected EOF while"
			       " reading file `%s'", fname);
			t=0;  /* Try to exit cleanly */
			break;
		    }
		    out (offset, segment, buf, OUT_RAWDATA+m,
			 NO_SEG, NO_SEG);
		    l -= m;
		}
	    }
	    list->downlevel(LIST_INCBIN);
	    if (instruction->times > 1) {
		/*
		 * Dummy call to list->output to give the offset to the
		 * listing module.
		 */
		list->output (offset, NULL, OUT_RAWDATA);
		list->uplevel(LIST_TIMES);
		list->downlevel(LIST_TIMES);
	    }
	    fclose (fp);
	    return instruction->times * len;
	}
	return 0;		       /* if we're here, there's an error */
    }

    size_prob = FALSE;
    temp = nasm_instructions[instruction->opcode];
    while (temp->opcode != -1) {
	int m = matches (temp, instruction);
	if (m == 99)
	    m += jmp_match(segment, offset, bits, instruction, temp->code);

	if (m == 100) 		       /* matches! */
	{
	    const char *codes = temp->code;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -