⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 assemble.c

📁 nasm的全套源代码,有些我做了些修改,以方便您更方便更容易调试成功,方便学习做编译器
💻 C
📖 第 1 页 / 共 5 页
字号:
/* assemble.c   code generation for the Netwide Assembler
 *
 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
 * Julian Hall. All rights reserved. The software is
 * redistributable under the licence given in the file "Licence"
 * distributed in the NASM archive.
 *
 * the actual codes (C syntax, i.e. octal):
 * \0            - terminates the code. (Unless it's a literal of course.)
 * \1, \2, \3    - that many literal bytes follow in the code stream
 * \4, \6        - the POP/PUSH (respectively) codes for CS, DS, ES, SS
 *                 (POP is never used for CS) depending on operand 0
 * \5, \7        - the second byte of POP/PUSH codes for FS, GS, depending
 *                 on operand 0
 * \10, \11, \12 - a literal byte follows in the code stream, to be added
 *                 to the register value of operand 0, 1 or 2
 * \17           - encodes the literal byte 0. (Some compilers don't take
 *                 kindly to a zero byte in the _middle_ of a compile time
 *                 string constant, so I had to put this hack in.)
 * \14, \15, \16 - a signed byte immediate operand, from operand 0, 1 or 2
 * \20, \21, \22 - a byte immediate operand, from operand 0, 1 or 2
 * \24, \25, \26 - an unsigned byte immediate operand, from operand 0, 1 or 2
 * \30, \31, \32 - a word immediate operand, from operand 0, 1 or 2
 * \34, \35, \36 - select between \3[012] and \4[012] depending on 16/32 bit
 *                 assembly mode or the operand-size override on the operand
 * \37           - a word constant, from the _segment_ part of operand 0
 * \40, \41, \42 - a long immediate operand, from operand 0, 1 or 2
 * \44, \45, \46 - select between \3[012] and \4[012] depending on 16/32 bit
 *                 assembly mode or the address-size override on the operand
 * \50, \51, \52 - a byte relative operand, from operand 0, 1 or 2
 * \60, \61, \62 - a word relative operand, from operand 0, 1 or 2
 * \64, \65, \66 - select between \6[012] and \7[012] depending on 16/32 bit
 *                 assembly mode or the operand-size override on the operand
 * \70, \71, \72 - a long relative operand, from operand 0, 1 or 2
 * \1ab          - a ModRM, calculated on EA in operand a, with the spare
 *                 field the register value of operand b.
 * \130,\131,\132 - an immediate word or signed byte for operand 0, 1, or 2
 * \133,\134,\135 - or 2 (s-field) into next opcode byte if operand 0, 1, or 2
 *		    is a signed byte rather than a word.
 * \140,\141,\142 - an immediate dword or signed byte for operand 0, 1, or 2
 * \143,\144,\145 - or 2 (s-field) into next opcode byte if operand 0, 1, or 2
 *		    is a signed byte rather than a dword.
 * \2ab          - a ModRM, calculated on EA in operand a, with the spare
 *                 field equal to digit b.
 * \30x          - might be an 0x67 byte, depending on the address size of
 *                 the memory reference in operand x.
 * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 * \312		 - (disassembler only) marker on LOOP, LOOPxx instructions.
 * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 * \322          - indicates that this instruction is only valid when the
 *                 operand size is the default (instruction to disassembler,
 *                 generates no code in the assembler)
 * \330          - a literal byte follows in the code stream, to be added
 *                 to the condition code value of the instruction.
 * \331		 - instruction not valid with REP prefix.  Hint for
 *                 disassembler only; for SSE instructions.
 * \332		 - disassemble a rep (0xF3 byte) prefix as repe not rep.
 * \333		 - REP prefix (0xF3 byte); for SSE instructions.  Not encoded
 *		   as a literal byte in order to aid the disassembler.
 * \340          - reserve <operand 0> bytes of uninitialised storage.
 *                 Operand 0 had better be a segmentless constant.
 * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 *		   370 is used for Jcc, 371 is used for JMP.
 * \373		 - assemble 0x03 if bits==16, 0x05 if bits==32;
 *		   used for conditional jump over longer jump
 */

#include <stdio.h>
#include <string.h>

#include "nasm.h"
#include "nasmlib.h"
#include "assemble.h"
#include "insns.h"
#include "preproc.h"

extern struct itemplate *nasm_instructions[];

typedef struct {
    int sib_present;            /* is a SIB byte necessary? */
    int bytes;                  /* # of bytes of offset needed */
    int size;                   /* lazy - this is sib+bytes+1 */
    unsigned char modrm, sib;   /* the bytes themselves */
} ea;

static unsigned long cpu;       /* cpu level received from nasm.c */
static efunc errfunc;
static struct ofmt *outfmt;
static ListGen *list;

static long calcsize(long, long, int, insn *, const char *);
static void gencode(long, long, int, insn *, const char *, long);
static int regval(operand * o);
static int matches(struct itemplate *, insn *);
static ea *process_ea(operand *, ea *, int, int, int);
static int chsize(operand *, int);

/*
 * This routine wrappers the real output format's output routine,
 * in order to pass a copy of the data off to the listing file
 * generator at the same time.
 */
static void out(long offset, long segto, const void *data,
                unsigned long type, long segment, long wrt)
{
    static long lineno = 0;     /* static!!! */
    static char *lnfname = NULL;

    if ((type & OUT_TYPMASK) == OUT_ADDRESS) {
        if (segment != NO_SEG || wrt != NO_SEG) {
            /*
             * This address is relocated. We must write it as
             * OUT_ADDRESS, so there's no work to be done here.
             */
            list->output(offset, data, type);
        } else {
            unsigned char p[4], *q = p;
            /*
             * This is a non-relocated address, and we're going to
             * convert it into RAWDATA format.
             */
            if ((type & OUT_SIZMASK) == 4) {
                WRITELONG(q, *(long *)data);
                list->output(offset, p, OUT_RAWDATA + 4);
            } else {
                WRITESHORT(q, *(long *)data);
                list->output(offset, p, OUT_RAWDATA + 2);
            }
        }
    } else if ((type & OUT_TYPMASK) == OUT_RAWDATA) {
        list->output(offset, data, type);
    } else if ((type & OUT_TYPMASK) == OUT_RESERVE) {
        list->output(offset, NULL, type);
    } else if ((type & OUT_TYPMASK) == OUT_REL2ADR ||
               (type & OUT_TYPMASK) == OUT_REL4ADR) {
        list->output(offset, data, type);
    }

    /*
     * this call to src_get determines when we call the
     * debug-format-specific "linenum" function
     * it updates lineno and lnfname to the current values
     * returning 0 if "same as last time", -2 if lnfname
     * changed, and the amount by which lineno changed,
     * if it did. thus, these variables must be static
     */

    if (src_get(&lineno, &lnfname)) {
        outfmt->current_dfmt->linenum(lnfname, lineno, segto);
    }

    outfmt->output(segto, data, type, segment, wrt);
}

static int jmp_match(long segment, long offset, int bits,
                     insn * ins, const char *code)
{
    long isize;
    unsigned char c = code[0];

    if (c != 0370 && c != 0371)
        return 0;
    if (ins->oprs[0].opflags & OPFLAG_FORWARD) {
        if ((optimizing < 0 || (ins->oprs[0].type & STRICT))
            && c == 0370)
            return 1;
        else
            return (pass0 == 0);        /* match a forward reference */
    }
    isize = calcsize(segment, offset, bits, ins, code);
    if (ins->oprs[0].segment != segment)
        return 0;
    isize = ins->oprs[0].offset - offset - isize;       /* isize is now the delta */
    if (isize >= -128L && isize <= 127L)
        return 1;               /* it is byte size */

    return 0;
}

long assemble(long segment, long offset, int bits, unsigned long cp,
              insn * instruction, struct ofmt *output, efunc error,
              ListGen * listgen)
{
    struct itemplate *temp;
    int j;
    int size_prob;
    long insn_end;
    long itimes;
    long start = offset;
    long wsize = 0;             /* size for DB etc. */

    errfunc = error;            /* to pass to other functions */
    cpu = cp;
    outfmt = output;            /* likewise */
    list = listgen;             /* and again */

    switch (instruction->opcode) {
    case -1:
        return 0;
    case I_DB:
        wsize = 1;
        break;
    case I_DW:
        wsize = 2;
        break;
    case I_DD:
        wsize = 4;
        break;
    case I_DQ:
        wsize = 8;
        break;
    case I_DT:
        wsize = 10;
        break;
    }

    if (wsize) {
        extop *e;
        long t = instruction->times;
        if (t < 0)
            errfunc(ERR_PANIC,
                    "instruction->times < 0 (%ld) in assemble()", t);

        while (t--) {           /* repeat TIMES times */
            for (e = instruction->eops; e; e = e->next) {
                if (e->type == EOT_DB_NUMBER) {
                    if (wsize == 1) {
                        if (e->segment != NO_SEG)
                            errfunc(ERR_NONFATAL,
                                    "one-byte relocation attempted");
                        else {
                            unsigned char out_byte = e->offset;
                            out(offset, segment, &out_byte,
                                OUT_RAWDATA + 1, NO_SEG, NO_SEG);
                        }
                    } else if (wsize > 5) {
                        errfunc(ERR_NONFATAL, "integer supplied to a D%c"
                                " instruction", wsize == 8 ? 'Q' : 'T');
                    } else
                        out(offset, segment, &e->offset,
                            OUT_ADDRESS + wsize, e->segment, e->wrt);
                    offset += wsize;
                } else if (e->type == EOT_DB_STRING) {
                    int align;

                    out(offset, segment, e->stringval,
                        OUT_RAWDATA + e->stringlen, NO_SEG, NO_SEG);
                    align = e->stringlen % wsize;

                    if (align) {
                        align = wsize - align;
                        out(offset, segment, "\0\0\0\0\0\0\0\0",
                            OUT_RAWDATA + align, NO_SEG, NO_SEG);
                    }
                    offset += e->stringlen + align;
                }
            }
            if (t > 0 && t == instruction->times - 1) {
                /*
                 * Dummy call to list->output to give the offset to the
                 * listing module.
                 */
                list->output(offset, NULL, OUT_RAWDATA);
                list->uplevel(LIST_TIMES);
            }
        }
        if (instruction->times > 1)
            list->downlevel(LIST_TIMES);
        return offset - start;
    }

    if (instruction->opcode == I_INCBIN) {
        static char fname[FILENAME_MAX];
        FILE *fp;
        long len;
        char *prefix = "", *combine;
        char **pPrevPath = NULL;

        len = FILENAME_MAX - 1;
        if (len > instruction->eops->stringlen)
            len = instruction->eops->stringlen;
        strncpy(fname, instruction->eops->stringval, len);
        fname[len] = '\0';

        while (1) {             /* added by alexfru: 'incbin' uses include paths */
            combine = nasm_malloc(strlen(prefix) + len + 1);
            strcpy(combine, prefix);
            strcat(combine, fname);

            if ((fp = fopen(combine, "rb")) != NULL) {
                nasm_free(combine);
                break;
            }

            nasm_free(combine);
            pPrevPath = pp_get_include_path_ptr(pPrevPath);
            if (pPrevPath == NULL)
                break;
            prefix = *pPrevPath;
        }

        if (fp == NULL)
            error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
                  fname);
        else if (fseek(fp, 0L, SEEK_END) < 0)
            error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
                  fname);
        else {
            static char buf[2048];
            long t = instruction->times;
            long base = 0;

            len = ftell(fp);
            if (instruction->eops->next) {
                base = instruction->eops->next->offset;
                len -= base;
                if (instruction->eops->next->next &&
                    len > instruction->eops->next->next->offset)
                    len = instruction->eops->next->next->offset;
            }
            /*
             * Dummy call to list->output to give the offset to the
             * listing module.
             */
            list->output(offset, NULL, OUT_RAWDATA);
            list->uplevel(LIST_INCBIN);
            while (t--) {
                long l;

                fseek(fp, base, SEEK_SET);
                l = len;
                while (l > 0) {
                    long m =
                        fread(buf, 1, (l > sizeof(buf) ? sizeof(buf) : l),
                              fp);
                    if (!m) {
                        /*
                         * This shouldn't happen unless the file
                         * actually changes while we are reading
                         * it.
                         */
                        error(ERR_NONFATAL,
                              "`incbin': unexpected EOF while"
                              " reading file `%s'", fname);
                        t = 0;  /* Try to exit cleanly */
                        break;
                    }
                    out(offset, segment, buf, OUT_RAWDATA + m,
                        NO_SEG, NO_SEG);
                    l -= m;
                }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -