📄 assemble.c
字号:
/* assemble.c code generation for the Netwide Assembler
*
* The Netwide Assembler is copyright (C) 1996 Simon Tatham and
* Julian Hall. All rights reserved. The software is
* redistributable under the licence given in the file "Licence"
* distributed in the NASM archive.
*
* the actual codes (C syntax, i.e. octal):
* \0 - terminates the code. (Unless it's a literal of course.)
* \1, \2, \3 - that many literal bytes follow in the code stream
* \4, \6 - the POP/PUSH (respectively) codes for CS, DS, ES, SS
* (POP is never used for CS) depending on operand 0
* \5, \7 - the second byte of POP/PUSH codes for FS, GS, depending
* on operand 0
* \10..\13 - a literal byte follows in the code stream, to be added
* to the register value of operand 0..3
* \14..\17 - a signed byte immediate operand, from operand 0..3
* \20..\23 - a byte immediate operand, from operand 0..3
* \24..\27 - an unsigned byte immediate operand, from operand 0..3
* \30..\33 - a word immediate operand, from operand 0..3
* \34..\37 - select between \3[0-3] and \4[0-3] depending on 16/32 bit
* assembly mode or the operand-size override on the operand
* \40..\43 - a long immediate operand, from operand 0..3
* \44..\47 - select between \3[0-3], \4[0-3] and \5[4-7]
* depending on assembly mode or the address-size override
* on the operand.
* \50..\53 - a byte relative operand, from operand 0..3
* \54..\57 - a qword immediate operand, from operand 0..3
* \60..\63 - a word relative operand, from operand 0..3
* \64..\67 - select between \6[0-3] and \7[0-3] depending on 16/32 bit
* assembly mode or the operand-size override on the operand
* \70..\73 - a long relative operand, from operand 0..3
* \74..\77 - a word constant, from the _segment_ part of operand 0..3
* \1ab - a ModRM, calculated on EA in operand a, with the spare
* field the register value of operand b.
* \140..\143 - an immediate word or signed byte for operand 0..3
* \144..\147 - or 2 (s-field) into next opcode byte if operand 0..3
* is a signed byte rather than a word.
* \150..\153 - an immediate dword or signed byte for operand 0..3
* \154..\157 - or 2 (s-field) into next opcode byte if operand 0..3
* is a signed byte rather than a dword.
* \160..\163 - this instruction uses DREX rather than REX, with the
* OC0 field set to 0, and the dest field taken from
* operand 0..3.
* \164..\167 - this instruction uses DREX rather than REX, with the
* OC0 field set to 1, and the dest field taken from
* operand 0..3.
* \170 - encodes the literal byte 0. (Some compilers don't take
* kindly to a zero byte in the _middle_ of a compile time
* string constant, so I had to put this hack in.)
* \171 - placement of DREX suffix in the absence of an EA
* \2ab - a ModRM, calculated on EA in operand a, with the spare
* field equal to digit b.
* \310 - indicates fixed 16-bit address size, i.e. optional 0x67.
* \311 - indicates fixed 32-bit address size, i.e. optional 0x67.
* \312 - (disassembler only) marker on LOOP, LOOPxx instructions.
* \313 - indicates fixed 64-bit address size, 0x67 invalid.
* \320 - indicates fixed 16-bit operand size, i.e. optional 0x66.
* \321 - indicates fixed 32-bit operand size, i.e. optional 0x66.
* \322 - indicates that this instruction is only valid when the
* operand size is the default (instruction to disassembler,
* generates no code in the assembler)
* \323 - indicates fixed 64-bit operand size, REX on extensions only.
* \324 - indicates 64-bit operand size requiring REX prefix.
* \330 - a literal byte follows in the code stream, to be added
* to the condition code value of the instruction.
* \331 - instruction not valid with REP prefix. Hint for
* disassembler only; for SSE instructions.
* \332 - REP prefix (0xF2 byte) used as opcode extension.
* \333 - REP prefix (0xF3 byte) used as opcode extension.
* \334 - LOCK prefix used instead of REX.R
* \335 - disassemble a rep (0xF3 byte) prefix as repe not rep.
* \340 - reserve <operand 0> bytes of uninitialized storage.
* Operand 0 had better be a segmentless constant.
* \364 - operand-size prefix (0x66) not permitted
* \365 - address-size prefix (0x67) not permitted
* \366 - operand-size prefix (0x66) used as opcode extension
* \367 - address-size prefix (0x67) used as opcode extension
* \370,\371,\372 - match only if operand 0 meets byte jump criteria.
* 370 is used for Jcc, 371 is used for JMP.
* \373 - assemble 0x03 if bits==16, 0x05 if bits==32;
* used for conditional jump over longer jump
*/
#include "compiler.h"
#include <stdio.h>
#include <string.h>
#include <inttypes.h>
#include "nasm.h"
#include "nasmlib.h"
#include "assemble.h"
#include "insns.h"
#include "preproc.h"
#include "regflags.c"
#include "regvals.c"
typedef struct {
int sib_present; /* is a SIB byte necessary? */
int bytes; /* # of bytes of offset needed */
int size; /* lazy - this is sib+bytes+1 */
uint8_t modrm, sib, rex, rip; /* the bytes themselves */
} ea;
static uint32_t cpu; /* cpu level received from nasm.c */
static efunc errfunc;
static struct ofmt *outfmt;
static ListGen *list;
static int32_t calcsize(int32_t, int32_t, int, insn *, const char *);
static void gencode(int32_t, int32_t, int, insn *, const char *, int32_t);
static int matches(const struct itemplate *, insn *, int bits);
static int32_t regflag(const operand *);
static int32_t regval(const operand *);
static int rexflags(int, int32_t, int);
static int op_rexflags(const operand *, int);
static ea *process_ea(operand *, ea *, int, int, int32_t, int);
static void add_asp(insn *, int);
static int has_prefix(insn * ins, enum prefixes prefix)
{
int j;
for (j = 0; j < ins->nprefix; j++) {
if (ins->prefixes[j] == prefix)
return 1;
}
return 0;
}
static void assert_no_prefix(insn * ins, enum prefixes prefix)
{
if (has_prefix(ins, prefix))
errfunc(ERR_NONFATAL, "invalid %s prefix", prefix_name(prefix));
}
/*
* This routine wrappers the real output format's output routine,
* in order to pass a copy of the data off to the listing file
* generator at the same time.
*/
static void out(int32_t offset, int32_t segto, const void *data,
uint32_t type, int32_t segment, int32_t wrt)
{
static int32_t lineno = 0; /* static!!! */
static char *lnfname = NULL;
if ((type & OUT_TYPMASK) == OUT_ADDRESS) {
if (segment != NO_SEG || wrt != NO_SEG) {
/*
* This address is relocated. We must write it as
* OUT_ADDRESS, so there's no work to be done here.
*/
list->output(offset, data, type);
} else {
uint8_t p[8], *q = p;
/*
* This is a non-relocated address, and we're going to
* convert it into RAWDATA format.
*/
if ((type & OUT_SIZMASK) == 4) {
WRITELONG(q, *(int32_t *)data);
list->output(offset, p, OUT_RAWDATA + 4);
} else if ((type & OUT_SIZMASK) == 8) {
WRITEDLONG(q, *(int64_t *)data);
list->output(offset, p, OUT_RAWDATA + 8);
} else {
WRITESHORT(q, *(int32_t *)data);
list->output(offset, p, OUT_RAWDATA + 2);
}
}
} else if ((type & OUT_TYPMASK) == OUT_RAWDATA) {
list->output(offset, data, type);
} else if ((type & OUT_TYPMASK) == OUT_RESERVE) {
list->output(offset, NULL, type);
} else if ((type & OUT_TYPMASK) == OUT_REL2ADR ||
(type & OUT_TYPMASK) == OUT_REL4ADR) {
list->output(offset, data, type);
}
/*
* this call to src_get determines when we call the
* debug-format-specific "linenum" function
* it updates lineno and lnfname to the current values
* returning 0 if "same as last time", -2 if lnfname
* changed, and the amount by which lineno changed,
* if it did. thus, these variables must be static
*/
if (src_get(&lineno, &lnfname)) {
outfmt->current_dfmt->linenum(lnfname, lineno, segto);
}
outfmt->output(segto, data, type, segment, wrt);
}
static int jmp_match(int32_t segment, int32_t offset, int bits,
insn * ins, const char *code)
{
int32_t isize;
uint8_t c = code[0];
if (c != 0370 && c != 0371)
return 0;
if (ins->oprs[0].opflags & OPFLAG_FORWARD) {
if ((optimizing < 0 || (ins->oprs[0].type & STRICT))
&& c == 0370)
return 1;
else
return (pass0 == 0); /* match a forward reference */
}
isize = calcsize(segment, offset, bits, ins, code);
if (ins->oprs[0].segment != segment)
return 0;
isize = ins->oprs[0].offset - offset - isize; /* isize is now the delta */
if (isize >= -128L && isize <= 127L)
return 1; /* it is byte size */
return 0;
}
int32_t assemble(int32_t segment, int32_t offset, int bits, uint32_t cp,
insn * instruction, struct ofmt *output, efunc error,
ListGen * listgen)
{
const struct itemplate *temp;
int j;
int size_prob;
int32_t insn_end;
int32_t itimes;
int32_t start = offset;
int32_t wsize = 0; /* size for DB etc. */
errfunc = error; /* to pass to other functions */
cpu = cp;
outfmt = output; /* likewise */
list = listgen; /* and again */
switch (instruction->opcode) {
case -1:
return 0;
case I_DB:
wsize = 1;
break;
case I_DW:
wsize = 2;
break;
case I_DD:
wsize = 4;
break;
case I_DQ:
wsize = 8;
break;
case I_DT:
wsize = 10;
break;
case I_DO:
wsize = 16;
break;
default:
break;
}
if (wsize) {
extop *e;
int32_t t = instruction->times;
if (t < 0)
errfunc(ERR_PANIC,
"instruction->times < 0 (%ld) in assemble()", t);
while (t--) { /* repeat TIMES times */
for (e = instruction->eops; e; e = e->next) {
if (e->type == EOT_DB_NUMBER) {
if (wsize == 1) {
if (e->segment != NO_SEG)
errfunc(ERR_NONFATAL,
"one-byte relocation attempted");
else {
uint8_t out_byte = e->offset;
out(offset, segment, &out_byte,
OUT_RAWDATA + 1, NO_SEG, NO_SEG);
}
} else if (wsize > 8) {
errfunc(ERR_NONFATAL, "integer supplied to a DT or DO"
" instruction");
} else
out(offset, segment, &e->offset,
OUT_ADDRESS + wsize, e->segment, e->wrt);
offset += wsize;
} else if (e->type == EOT_DB_STRING) {
int align;
out(offset, segment, e->stringval,
OUT_RAWDATA + e->stringlen, NO_SEG, NO_SEG);
align = e->stringlen % wsize;
if (align) {
align = wsize - align;
out(offset, segment, "\0\0\0\0\0\0\0\0",
OUT_RAWDATA + align, NO_SEG, NO_SEG);
}
offset += e->stringlen + align;
}
}
if (t > 0 && t == instruction->times - 1) {
/*
* Dummy call to list->output to give the offset to the
* listing module.
*/
list->output(offset, NULL, OUT_RAWDATA);
list->uplevel(LIST_TIMES);
}
}
if (instruction->times > 1)
list->downlevel(LIST_TIMES);
return offset - start;
}
if (instruction->opcode == I_INCBIN) {
static char fname[FILENAME_MAX];
FILE *fp;
int32_t len;
char *prefix = "", *combine;
char **pPrevPath = NULL;
len = FILENAME_MAX - 1;
if (len > instruction->eops->stringlen)
len = instruction->eops->stringlen;
strncpy(fname, instruction->eops->stringval, len);
fname[len] = '\0';
while (1) { /* added by alexfru: 'incbin' uses include paths */
combine = nasm_malloc(strlen(prefix) + len + 1);
strcpy(combine, prefix);
strcat(combine, fname);
if ((fp = fopen(combine, "rb")) != NULL) {
nasm_free(combine);
break;
}
nasm_free(combine);
pPrevPath = pp_get_include_path_ptr(pPrevPath);
if (pPrevPath == NULL)
break;
prefix = *pPrevPath;
}
if (fp == NULL)
error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
fname);
else if (fseek(fp, 0L, SEEK_END) < 0)
error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
fname);
else {
static char buf[2048];
int32_t t = instruction->times;
int32_t base = 0;
len = ftell(fp);
if (instruction->eops->next) {
base = instruction->eops->next->offset;
len -= base;
if (instruction->eops->next->next &&
len > instruction->eops->next->next->offset)
len = instruction->eops->next->next->offset;
}
/*
* Dummy call to list->output to give the offset to the
* listing module.
*/
list->output(offset, NULL, OUT_RAWDATA);
list->uplevel(LIST_INCBIN);
while (t--) {
int32_t l;
fseek(fp, base, SEEK_SET);
l = len;
while (l > 0) {
int32_t m =
fread(buf, 1, (l > sizeof(buf) ? sizeof(buf) : l),
fp);
if (!m) {
/*
* This shouldn't happen unless the file
* actually changes while we are reading
* it.
*/
error(ERR_NONFATAL,
"`incbin': unexpected EOF while"
" reading file `%s'", fname);
t = 0; /* Try to exit cleanly */
break;
}
out(offset, segment, buf, OUT_RAWDATA + m,
NO_SEG, NO_SEG);
l -= m;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -