⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 assemble.c

📁 nasm早期的源代码,比较简单是学习汇编和编译原理的好例子
💻 C
📖 第 1 页 / 共 5 页
字号:
/* assemble.c   code generation for the Netwide Assembler
 *
 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
 * Julian Hall. All rights reserved. The software is
 * redistributable under the licence given in the file "Licence"
 * distributed in the NASM archive.
 *
 * the actual codes (C syntax, i.e. octal):
 * \0            - terminates the code. (Unless it's a literal of course.)
 * \1, \2, \3    - that many literal bytes follow in the code stream
 * \4, \6        - the POP/PUSH (respectively) codes for CS, DS, ES, SS
 *                 (POP is never used for CS) depending on operand 0
 * \5, \7        - the second byte of POP/PUSH codes for FS, GS, depending
 *                 on operand 0
 * \10..\13      - a literal byte follows in the code stream, to be added
 *                 to the register value of operand 0..3
 * \14..\17      - a signed byte immediate operand, from operand 0..3
 * \20..\23      - a byte immediate operand, from operand 0..3
 * \24..\27      - an unsigned byte immediate operand, from operand 0..3
 * \30..\33      - a word immediate operand, from operand 0..3
 * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
 *                 assembly mode or the operand-size override on the operand
 * \40..\43      - a long immediate operand, from operand 0..3
 * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
 *		   depending on assembly mode or the address-size override
 *		   on the operand.
 * \50..\53      - a byte relative operand, from operand 0..3
 * \54..\57      - a qword immediate operand, from operand 0..3
 * \60..\63      - a word relative operand, from operand 0..3
 * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
 *                 assembly mode or the operand-size override on the operand
 * \70..\73      - a long relative operand, from operand 0..3
 * \74..\77       - a word constant, from the _segment_ part of operand 0..3
 * \1ab          - a ModRM, calculated on EA in operand a, with the spare
 *                 field the register value of operand b.
 * \140..\143    - an immediate word or signed byte for operand 0..3
 * \144..\147    - or 2 (s-field) into next opcode byte if operand 0..3
 *		    is a signed byte rather than a word.
 * \150..\153     - an immediate dword or signed byte for operand 0..3
 * \154..\157     - or 2 (s-field) into next opcode byte if operand 0..3
 *		    is a signed byte rather than a dword.
 * \160..\163    - this instruction uses DREX rather than REX, with the
 *		   OC0 field set to 0, and the dest field taken from
 *                 operand 0..3.
 * \164..\167    - this instruction uses DREX rather than REX, with the
 *		   OC0 field set to 1, and the dest field taken from
 *                 operand 0..3.
 * \170          - encodes the literal byte 0. (Some compilers don't take
 *                 kindly to a zero byte in the _middle_ of a compile time
 *                 string constant, so I had to put this hack in.)
 * \171		 - placement of DREX suffix in the absence of an EA
 * \2ab          - a ModRM, calculated on EA in operand a, with the spare
 *                 field equal to digit b.
 * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 * \312          - (disassembler only) marker on LOOP, LOOPxx instructions.
 * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 * \322          - indicates that this instruction is only valid when the
 *                 operand size is the default (instruction to disassembler,
 *                 generates no code in the assembler)
 * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 * \324          - indicates 64-bit operand size requiring REX prefix.
 * \330          - a literal byte follows in the code stream, to be added
 *                 to the condition code value of the instruction.
 * \331          - instruction not valid with REP prefix.  Hint for
 *                 disassembler only; for SSE instructions.
 * \332          - REP prefix (0xF2 byte) used as opcode extension.
 * \333          - REP prefix (0xF3 byte) used as opcode extension.
 * \334          - LOCK prefix used instead of REX.R
 * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 * \340          - reserve <operand 0> bytes of uninitialized storage.
 *                 Operand 0 had better be a segmentless constant.
 * \364          - operand-size prefix (0x66) not permitted
 * \365          - address-size prefix (0x67) not permitted
 * \366          - operand-size prefix (0x66) used as opcode extension
 * \367          - address-size prefix (0x67) used as opcode extension
 * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 *		   370 is used for Jcc, 371 is used for JMP.
 * \373		 - assemble 0x03 if bits==16, 0x05 if bits==32;
 *		   used for conditional jump over longer jump
 */

#include "compiler.h"

#include <stdio.h>
#include <string.h>
#include <inttypes.h>

#include "nasm.h"
#include "nasmlib.h"
#include "assemble.h"
#include "insns.h"
#include "preproc.h"
#include "regflags.c"
#include "regvals.c"

typedef struct {
    int sib_present;                 /* is a SIB byte necessary? */
    int bytes;                       /* # of bytes of offset needed */
    int size;                        /* lazy - this is sib+bytes+1 */
    uint8_t modrm, sib, rex, rip;    /* the bytes themselves */
} ea;

static uint32_t cpu;            /* cpu level received from nasm.c */
static efunc errfunc;
static struct ofmt *outfmt;
static ListGen *list;

static int32_t calcsize(int32_t, int32_t, int, insn *, const char *);
static void gencode(int32_t, int32_t, int, insn *, const char *, int32_t);
static int matches(const struct itemplate *, insn *, int bits);
static int32_t regflag(const operand *);
static int32_t regval(const operand *);
static int rexflags(int, int32_t, int);
static int op_rexflags(const operand *, int);
static ea *process_ea(operand *, ea *, int, int, int32_t, int);
static void add_asp(insn *, int);

static int has_prefix(insn * ins, enum prefixes prefix)
{
    int j;
    for (j = 0; j < ins->nprefix; j++) {
	if (ins->prefixes[j] == prefix)
	    return 1;
    }
    return 0;
}

static void assert_no_prefix(insn * ins, enum prefixes prefix)
{
    if (has_prefix(ins, prefix))
	errfunc(ERR_NONFATAL, "invalid %s prefix", prefix_name(prefix));
}

/*
 * This routine wrappers the real output format's output routine,
 * in order to pass a copy of the data off to the listing file
 * generator at the same time.
 */
static void out(int32_t offset, int32_t segto, const void *data,
                uint32_t type, int32_t segment, int32_t wrt)
{
    static int32_t lineno = 0;     /* static!!! */
    static char *lnfname = NULL;

    if ((type & OUT_TYPMASK) == OUT_ADDRESS) {
        if (segment != NO_SEG || wrt != NO_SEG) {
            /*
             * This address is relocated. We must write it as
             * OUT_ADDRESS, so there's no work to be done here.
             */
            list->output(offset, data, type);
        } else {
            uint8_t p[8], *q = p;
            /*
             * This is a non-relocated address, and we're going to
             * convert it into RAWDATA format.
             */
            if ((type & OUT_SIZMASK) == 4) {
                WRITELONG(q, *(int32_t *)data);
                list->output(offset, p, OUT_RAWDATA + 4);
            } else if ((type & OUT_SIZMASK) == 8) {
                WRITEDLONG(q, *(int64_t *)data);
                list->output(offset, p, OUT_RAWDATA + 8);
            } else {
                WRITESHORT(q, *(int32_t *)data);
                list->output(offset, p, OUT_RAWDATA + 2);
            }
        }
    } else if ((type & OUT_TYPMASK) == OUT_RAWDATA) {
        list->output(offset, data, type);
    } else if ((type & OUT_TYPMASK) == OUT_RESERVE) {
        list->output(offset, NULL, type);
    } else if ((type & OUT_TYPMASK) == OUT_REL2ADR ||
               (type & OUT_TYPMASK) == OUT_REL4ADR) {
        list->output(offset, data, type);
    }

    /*
     * this call to src_get determines when we call the
     * debug-format-specific "linenum" function
     * it updates lineno and lnfname to the current values
     * returning 0 if "same as last time", -2 if lnfname
     * changed, and the amount by which lineno changed,
     * if it did. thus, these variables must be static
     */

    if (src_get(&lineno, &lnfname)) {
        outfmt->current_dfmt->linenum(lnfname, lineno, segto);
    }

    outfmt->output(segto, data, type, segment, wrt);
}

static int jmp_match(int32_t segment, int32_t offset, int bits,
                     insn * ins, const char *code)
{
    int32_t isize;
    uint8_t c = code[0];

    if (c != 0370 && c != 0371)
        return 0;
    if (ins->oprs[0].opflags & OPFLAG_FORWARD) {
        if ((optimizing < 0 || (ins->oprs[0].type & STRICT))
            && c == 0370)
            return 1;
        else
            return (pass0 == 0);        /* match a forward reference */
    }
    isize = calcsize(segment, offset, bits, ins, code);
    if (ins->oprs[0].segment != segment)
        return 0;
    isize = ins->oprs[0].offset - offset - isize;       /* isize is now the delta */
    if (isize >= -128L && isize <= 127L)
        return 1;               /* it is byte size */

    return 0;
}

int32_t assemble(int32_t segment, int32_t offset, int bits, uint32_t cp,
              insn * instruction, struct ofmt *output, efunc error,
              ListGen * listgen)
{
    const struct itemplate *temp;
    int j;
    int size_prob;
    int32_t insn_end;
    int32_t itimes;
    int32_t start = offset;
    int32_t wsize = 0;             /* size for DB etc. */

    errfunc = error;            /* to pass to other functions */
    cpu = cp;
    outfmt = output;            /* likewise */
    list = listgen;             /* and again */

    switch (instruction->opcode) {
    case -1:
        return 0;
    case I_DB:
        wsize = 1;
        break;
    case I_DW:
        wsize = 2;
        break;
    case I_DD:
        wsize = 4;
        break;
    case I_DQ:
        wsize = 8;
        break;
    case I_DT:
        wsize = 10;
        break;
    case I_DO:
	wsize = 16;
	break;
    default:
	break;
    }

    if (wsize) {
        extop *e;
        int32_t t = instruction->times;
        if (t < 0)
            errfunc(ERR_PANIC,
                    "instruction->times < 0 (%ld) in assemble()", t);

        while (t--) {           /* repeat TIMES times */
            for (e = instruction->eops; e; e = e->next) {
                if (e->type == EOT_DB_NUMBER) {
                    if (wsize == 1) {
                        if (e->segment != NO_SEG)
                            errfunc(ERR_NONFATAL,
                                    "one-byte relocation attempted");
                        else {
                            uint8_t out_byte = e->offset;
                            out(offset, segment, &out_byte,
                                OUT_RAWDATA + 1, NO_SEG, NO_SEG);
                        }
                    } else if (wsize > 8) {
                        errfunc(ERR_NONFATAL, "integer supplied to a DT or DO"
                                " instruction");
                    } else
                        out(offset, segment, &e->offset,
                            OUT_ADDRESS + wsize, e->segment, e->wrt);
                    offset += wsize;
                } else if (e->type == EOT_DB_STRING) {
                    int align;

                    out(offset, segment, e->stringval,
                        OUT_RAWDATA + e->stringlen, NO_SEG, NO_SEG);
                    align = e->stringlen % wsize;

                    if (align) {
                        align = wsize - align;
                        out(offset, segment, "\0\0\0\0\0\0\0\0",
                            OUT_RAWDATA + align, NO_SEG, NO_SEG);
                    }
                    offset += e->stringlen + align;
                }
            }
            if (t > 0 && t == instruction->times - 1) {
                /*
                 * Dummy call to list->output to give the offset to the
                 * listing module.
                 */
                list->output(offset, NULL, OUT_RAWDATA);
                list->uplevel(LIST_TIMES);
            }
        }
        if (instruction->times > 1)
            list->downlevel(LIST_TIMES);
        return offset - start;
    }

    if (instruction->opcode == I_INCBIN) {
        static char fname[FILENAME_MAX];
        FILE *fp;
        int32_t len;
        char *prefix = "", *combine;
        char **pPrevPath = NULL;

        len = FILENAME_MAX - 1;
        if (len > instruction->eops->stringlen)
            len = instruction->eops->stringlen;
        strncpy(fname, instruction->eops->stringval, len);
        fname[len] = '\0';

        while (1) {         /* added by alexfru: 'incbin' uses include paths */
            combine = nasm_malloc(strlen(prefix) + len + 1);
            strcpy(combine, prefix);
            strcat(combine, fname);

            if ((fp = fopen(combine, "rb")) != NULL) {
                nasm_free(combine);
                break;
            }

            nasm_free(combine);
            pPrevPath = pp_get_include_path_ptr(pPrevPath);
            if (pPrevPath == NULL)
                break;
            prefix = *pPrevPath;
        }

        if (fp == NULL)
            error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
                  fname);
        else if (fseek(fp, 0L, SEEK_END) < 0)
            error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
                  fname);
        else {
            static char buf[2048];
            int32_t t = instruction->times;
            int32_t base = 0;

            len = ftell(fp);
            if (instruction->eops->next) {
                base = instruction->eops->next->offset;
                len -= base;
                if (instruction->eops->next->next &&
                    len > instruction->eops->next->next->offset)
                    len = instruction->eops->next->next->offset;
            }
            /*
             * Dummy call to list->output to give the offset to the
             * listing module.
             */
            list->output(offset, NULL, OUT_RAWDATA);
            list->uplevel(LIST_INCBIN);
            while (t--) {
                int32_t l;

                fseek(fp, base, SEEK_SET);
                l = len;
                while (l > 0) {
                    int32_t m =
                        fread(buf, 1, (l > sizeof(buf) ? sizeof(buf) : l),
                              fp);
                    if (!m) {
                        /*
                         * This shouldn't happen unless the file
                         * actually changes while we are reading
                         * it.
                         */
                        error(ERR_NONFATAL,
                              "`incbin': unexpected EOF while"
                              " reading file `%s'", fname);
                        t = 0;  /* Try to exit cleanly */
                        break;
                    }
                    out(offset, segment, buf, OUT_RAWDATA + m,
                        NO_SEG, NO_SEG);
                    l -= m;
                }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -