📄 assemble.c
字号:
/* assemble.c code generation for the Netwide Assembler
*
* The Netwide Assembler is copyright (C) 1996 Simon Tatham and
* Julian Hall. All rights reserved. The software is
* redistributable under the license given in the file "LICENSE"
* distributed in the NASM archive.
*
* the actual codes (C syntax, i.e. octal):
* \0 - terminates the code. (Unless it's a literal of course.)
* \1, \2, \3 - that many literal bytes follow in the code stream
* \10..\13 - a literal byte follows in the code stream, to be added
* to the register value of operand 0..3
* \14..\17 - a signed byte immediate operand, from operand 0..3
* \20..\23 - a byte immediate operand, from operand 0..3
* \24..\27 - an unsigned byte immediate operand, from operand 0..3
* \30..\33 - a word immediate operand, from operand 0..3
* \34..\37 - select between \3[0-3] and \4[0-3] depending on 16/32 bit
* assembly mode or the operand-size override on the operand
* \40..\43 - a long immediate operand, from operand 0..3
* \44..\47 - select between \3[0-3], \4[0-3] and \5[4-7]
* depending on the address size of the instruction.
* \50..\53 - a byte relative operand, from operand 0..3
* \54..\57 - a qword immediate operand, from operand 0..3
* \60..\63 - a word relative operand, from operand 0..3
* \64..\67 - select between \6[0-3] and \7[0-3] depending on 16/32 bit
* assembly mode or the operand-size override on the operand
* \70..\73 - a long relative operand, from operand 0..3
* \74..\77 - a word constant, from the _segment_ part of operand 0..3
* \1ab - a ModRM, calculated on EA in operand a, with the spare
* field the register value of operand b.
* \140..\143 - an immediate word or signed byte for operand 0..3
* \144..\147 - or 2 (s-field) into opcode byte if operand 0..3
* is a signed byte rather than a word. Opcode byte follows.
* \150..\153 - an immediate dword or signed byte for operand 0..3
* \154..\157 - or 2 (s-field) into opcode byte if operand 0..3
* is a signed byte rather than a dword. Opcode byte follows.
* \160..\163 - this instruction uses DREX rather than REX, with the
* OC0 field set to 0, and the dest field taken from
* operand 0..3.
* \164..\167 - this instruction uses DREX rather than REX, with the
* OC0 field set to 1, and the dest field taken from
* operand 0..3.
* \171 - placement of DREX suffix in the absence of an EA
* \172\ab - the register number from operand a in bits 7..4, with
* the 4-bit immediate from operand b in bits 3..0.
* \173\xab - the register number from operand a in bits 7..4, with
* the value b in bits 3..0.
* \174\a - the register number from operand a in bits 7..4, and
* an arbitrary value in bits 3..0 (assembled as zero.)
* \2ab - a ModRM, calculated on EA in operand a, with the spare
* field equal to digit b.
* \250..\253 - same as \150..\153, except warn if the 64-bit operand
* is not equal to the truncated and sign-extended 32-bit
* operand; used for 32-bit immediates in 64-bit mode.
* \254..\257 - a signed 32-bit operand to be extended to 64 bits.
* \260..\263 - this instruction uses VEX rather than REX, with the
* V field taken from operand 0..3.
* \270 - this instruction uses VEX rather than REX, with the
* V field set to 1111b.
*
* VEX prefixes are followed by the sequence:
* \mm\wlp where mm is the M field; and wlp is:
* 00 0ww lpp
* [w0] ww = 0 for W = 0
* [w1] ww = 1 for W = 1
* [wx] ww = 2 for W don't care (always assembled as 0)
* [ww] ww = 3 for W used as REX.W
*
*
* \274..\277 - a signed byte immediate operand, from operand 0..3,
* which is to be extended to the operand size.
* \310 - indicates fixed 16-bit address size, i.e. optional 0x67.
* \311 - indicates fixed 32-bit address size, i.e. optional 0x67.
* \312 - (disassembler only) marker on LOOP, LOOPxx instructions.
* \313 - indicates fixed 64-bit address size, 0x67 invalid.
* \314 - (disassembler only) invalid with REX.B
* \315 - (disassembler only) invalid with REX.X
* \316 - (disassembler only) invalid with REX.R
* \317 - (disassembler only) invalid with REX.W
* \320 - indicates fixed 16-bit operand size, i.e. optional 0x66.
* \321 - indicates fixed 32-bit operand size, i.e. optional 0x66.
* \322 - indicates that this instruction is only valid when the
* operand size is the default (instruction to disassembler,
* generates no code in the assembler)
* \323 - indicates fixed 64-bit operand size, REX on extensions only.
* \324 - indicates 64-bit operand size requiring REX prefix.
* \330 - a literal byte follows in the code stream, to be added
* to the condition code value of the instruction.
* \331 - instruction not valid with REP prefix. Hint for
* disassembler only; for SSE instructions.
* \332 - REP prefix (0xF2 byte) used as opcode extension.
* \333 - REP prefix (0xF3 byte) used as opcode extension.
* \334 - LOCK prefix used instead of REX.R
* \335 - disassemble a rep (0xF3 byte) prefix as repe not rep.
* \336 - force a REP(E) prefix (0xF2) even if not specified.
* \337 - force a REPNE prefix (0xF3) even if not specified.
* \336-\337 are still listed as prefixes in the disassembler.
* \340 - reserve <operand 0> bytes of uninitialized storage.
* Operand 0 had better be a segmentless constant.
* \344,\345 - the PUSH/POP (respectively) codes for CS, DS, ES, SS
* (POP is never used for CS) depending on operand 0
* \346,\347 - the second byte of PUSH/POP codes for FS, GS, depending
* on operand 0
* \360 - no SSE prefix (== \364\331)
* \361 - 66 SSE prefix (== \366\331)
* \362 - F2 SSE prefix (== \364\332)
* \363 - F3 SSE prefix (== \364\333)
* \364 - operand-size prefix (0x66) not permitted
* \365 - address-size prefix (0x67) not permitted
* \366 - operand-size prefix (0x66) used as opcode extension
* \367 - address-size prefix (0x67) used as opcode extension
* \370,\371,\372 - match only if operand 0 meets byte jump criteria.
* 370 is used for Jcc, 371 is used for JMP.
* \373 - assemble 0x03 if bits==16, 0x05 if bits==32;
* used for conditional jump over longer jump
*/
#include "compiler.h"
#include <stdio.h>
#include <string.h>
#include <inttypes.h>
#include "nasm.h"
#include "nasmlib.h"
#include "assemble.h"
#include "insns.h"
#include "tables.h"
/* Initialized to zero by the C standard */
static const uint8_t const_zero_buf[256];
typedef struct {
int sib_present; /* is a SIB byte necessary? */
int bytes; /* # of bytes of offset needed */
int size; /* lazy - this is sib+bytes+1 */
uint8_t modrm, sib, rex, rip; /* the bytes themselves */
} ea;
static uint32_t cpu; /* cpu level received from nasm.c */
static efunc errfunc;
static struct ofmt *outfmt;
static ListGen *list;
static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
static void gencode(int32_t segment, int64_t offset, int bits,
insn * ins, const struct itemplate *temp,
int64_t insn_end);
static int matches(const struct itemplate *, insn *, int bits);
static int32_t regflag(const operand *);
static int32_t regval(const operand *);
static int rexflags(int, int32_t, int);
static int op_rexflags(const operand *, int);
static ea *process_ea(operand *, ea *, int, int, int, int32_t);
static void add_asp(insn *, int);
static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
{
return ins->prefixes[pos] == prefix;
}
static void assert_no_prefix(insn * ins, enum prefix_pos pos)
{
if (ins->prefixes[pos])
errfunc(ERR_NONFATAL, "invalid %s prefix",
prefix_name(ins->prefixes[pos]));
}
static const char *size_name(int size)
{
switch (size) {
case 1:
return "byte";
case 2:
return "word";
case 4:
return "dword";
case 8:
return "qword";
case 10:
return "tword";
case 16:
return "oword";
case 32:
return "yword";
default:
return "???";
}
}
static void warn_overflow(int size, const struct operand *o)
{
if (size < 8 && o->wrt == NO_SEG && o->segment == NO_SEG) {
int64_t lim = ((int64_t)1 << (size*8))-1;
int64_t data = o->offset;
if (data < ~lim || data > lim)
errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
"%s data exceeds bounds", size_name(size));
}
}
/*
* This routine wrappers the real output format's output routine,
* in order to pass a copy of the data off to the listing file
* generator at the same time.
*/
static void out(int64_t offset, int32_t segto, const void *data,
enum out_type type, uint64_t size,
int32_t segment, int32_t wrt)
{
static int32_t lineno = 0; /* static!!! */
static char *lnfname = NULL;
uint8_t p[8];
if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
/*
* This is a non-relocated address, and we're going to
* convert it into RAWDATA format.
*/
uint8_t *q = p;
if (size > 8) {
errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
return;
}
WRITEADDR(q, *(int64_t *)data, size);
data = p;
type = OUT_RAWDATA;
}
list->output(offset, data, type, size);
/*
* this call to src_get determines when we call the
* debug-format-specific "linenum" function
* it updates lineno and lnfname to the current values
* returning 0 if "same as last time", -2 if lnfname
* changed, and the amount by which lineno changed,
* if it did. thus, these variables must be static
*/
if (src_get(&lineno, &lnfname)) {
outfmt->current_dfmt->linenum(lnfname, lineno, segto);
}
outfmt->output(segto, data, type, size, segment, wrt);
}
static bool jmp_match(int32_t segment, int64_t offset, int bits,
insn * ins, const uint8_t *code)
{
int64_t isize;
uint8_t c = code[0];
if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
return false;
if (!optimizing)
return false;
if (optimizing < 0 && c == 0371)
return false;
isize = calcsize(segment, offset, bits, ins, code);
if (ins->oprs[0].segment != segment)
return false;
isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
return (isize >= -128 && isize <= 127); /* is it byte size? */
}
int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
insn * instruction, struct ofmt *output, efunc error,
ListGen * listgen)
{
const struct itemplate *temp;
int j;
int size_prob;
int64_t insn_end;
int32_t itimes;
int64_t start = offset;
int64_t wsize = 0; /* size for DB etc. */
errfunc = error; /* to pass to other functions */
cpu = cp;
outfmt = output; /* likewise */
list = listgen; /* and again */
switch (instruction->opcode) {
case -1:
return 0;
case I_DB:
wsize = 1;
break;
case I_DW:
wsize = 2;
break;
case I_DD:
wsize = 4;
break;
case I_DQ:
wsize = 8;
break;
case I_DT:
wsize = 10;
break;
case I_DO:
wsize = 16;
break;
case I_DY:
wsize = 32;
break;
default:
break;
}
if (wsize) {
extop *e;
int32_t t = instruction->times;
if (t < 0)
errfunc(ERR_PANIC,
"instruction->times < 0 (%ld) in assemble()", t);
while (t--) { /* repeat TIMES times */
for (e = instruction->eops; e; e = e->next) {
if (e->type == EOT_DB_NUMBER) {
if (wsize == 1) {
if (e->segment != NO_SEG)
errfunc(ERR_NONFATAL,
"one-byte relocation attempted");
else {
uint8_t out_byte = e->offset;
out(offset, segment, &out_byte,
OUT_RAWDATA, 1, NO_SEG, NO_SEG);
}
} else if (wsize > 8) {
errfunc(ERR_NONFATAL,
"integer supplied to a DT, DO or DY"
" instruction");
} else
out(offset, segment, &e->offset,
OUT_ADDRESS, wsize, e->segment, e->wrt);
offset += wsize;
} else if (e->type == EOT_DB_STRING ||
e->type == EOT_DB_STRING_FREE) {
int align;
out(offset, segment, e->stringval,
OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
align = e->stringlen % wsize;
if (align) {
align = wsize - align;
out(offset, segment, const_zero_buf,
OUT_RAWDATA, align, NO_SEG, NO_SEG);
}
offset += e->stringlen + align;
}
}
if (t > 0 && t == instruction->times - 1) {
/*
* Dummy call to list->output to give the offset to the
* listing module.
*/
list->output(offset, NULL, OUT_RAWDATA, 0);
list->uplevel(LIST_TIMES);
}
}
if (instruction->times > 1)
list->downlevel(LIST_TIMES);
return offset - start;
}
if (instruction->opcode == I_INCBIN) {
const char *fname = instruction->eops->stringval;
FILE *fp;
fp = fopen(fname, "rb");
if (!fp) {
error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
fname);
} else if (fseek(fp, 0L, SEEK_END) < 0) {
error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
fname);
} else {
static char buf[4096];
size_t t = instruction->times;
size_t base = 0;
size_t len;
len = ftell(fp);
if (instruction->eops->next) {
base = instruction->eops->next->offset;
len -= base;
if (instruction->eops->next->next &&
len > (size_t)instruction->eops->next->next->offset)
len = (size_t)instruction->eops->next->next->offset;
}
/*
* Dummy call to list->output to give the offset to the
* listing module.
*/
list->output(offset, NULL, OUT_RAWDATA, 0);
list->uplevel(LIST_INCBIN);
while (t--) {
size_t l;
fseek(fp, base, SEEK_SET);
l = len;
while (l > 0) {
int32_t m =
fread(buf, 1, (l > (int32_t) sizeof(buf) ? (int32_t) sizeof(buf) : l),
fp);
if (!m) {
/*
* This shouldn't happen unless the file
* actually changes while we are reading
* it.
*/
error(ERR_NONFATAL,
"`incbin': unexpected EOF while"
" reading file `%s'", fname);
t = 0; /* Try to exit cleanly */
break;
}
out(offset, segment, buf, OUT_RAWDATA, m,
NO_SEG, NO_SEG);
l -= m;
}
}
list->downlevel(LIST_INCBIN);
if (instruction->times > 1) {
/*
* Dummy call to list->output to give the offset to the
* listing module.
*/
list->output(offset, NULL, OUT_RAWDATA, 0);
list->uplevel(LIST_TIMES);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -