📄 assemble.c
字号:
/* assemble.c code generation for the Netwide Assembler
*
* The Netwide Assembler is copyright (C) 1996 Simon Tatham and
* Julian Hall. All rights reserved. The software is
* redistributable under the licence given in the file "Licence"
* distributed in the NASM archive.
*
* the actual codes (C syntax, i.e. octal):
* \0 - terminates the code. (Unless it's a literal of course.)
* \1, \2, \3 - that many literal bytes follow in the code stream
* \4, \6 - the POP/PUSH (respectively) codes for CS, DS, ES, SS
* (POP is never used for CS) depending on operand 0
* \5, \7 - the second byte of POP/PUSH codes for FS, GS, depending
* on operand 0
* \10, \11, \12 - a literal byte follows in the code stream, to be added
* to the register value of operand 0, 1 or 2
* \17 - encodes the literal byte 0. (Some compilers don't take
* kindly to a zero byte in the _middle_ of a compile time
* string constant, so I had to put this hack in.)
* \14, \15, \16 - a signed byte immediate operand, from operand 0, 1 or 2
* \20, \21, \22 - a byte immediate operand, from operand 0, 1 or 2
* \24, \25, \26 - an unsigned byte immediate operand, from operand 0, 1 or 2
* \30, \31, \32 - a word immediate operand, from operand 0, 1 or 2
* \34, \35, \36 - select between \3[012] and \4[012] depending on 16/32 bit
* assembly mode or the address-size override on the operand
* \37 - a word constant, from the _segment_ part of operand 0
* \40, \41, \42 - a long immediate operand, from operand 0, 1 or 2
* \50, \51, \52 - a byte relative operand, from operand 0, 1 or 2
* \60, \61, \62 - a word relative operand, from operand 0, 1 or 2
* \64, \65, \66 - select between \6[012] and \7[012] depending on 16/32 bit
* assembly mode or the address-size override on the operand
* \70, \71, \72 - a long relative operand, from operand 0, 1 or 2
* \1ab - a ModRM, calculated on EA in operand a, with the spare
* field the register value of operand b.
* \2ab - a ModRM, calculated on EA in operand a, with the spare
* field equal to digit b.
* \30x - might be an 0x67 byte, depending on the address size of
* the memory reference in operand x.
* \310 - indicates fixed 16-bit address size, i.e. optional 0x67.
* \311 - indicates fixed 32-bit address size, i.e. optional 0x67.
* \320 - indicates fixed 16-bit operand size, i.e. optional 0x66.
* \321 - indicates fixed 32-bit operand size, i.e. optional 0x66.
* \322 - indicates that this instruction is only valid when the
* operand size is the default (instruction to disassembler,
* generates no code in the assembler)
* \330 - a literal byte follows in the code stream, to be added
* to the condition code value of the instruction.
* \331 - instruction not valid with REP prefix. Hint for
* disassembler only; for SSE instructions.
* \332 - disassemble a rep (0xF3 byte) prefix as repe not rep.
* \333 - REP prefix (0xF3 byte); for SSE instructions. Not encoded
* as a literal byte in order to aid the disassembler.
* \340 - reserve <operand 0> bytes of uninitialised storage.
* Operand 0 had better be a segmentless constant.
*/
#include <stdio.h>
#include <string.h>
#include "nasm.h"
#include "nasmlib.h"
#include "assemble.h"
#include "insns.h"
extern struct itemplate *nasm_instructions[];
typedef struct {
int sib_present; /* is a SIB byte necessary? */
int bytes; /* # of bytes of offset needed */
int size; /* lazy - this is sib+bytes+1 */
unsigned char modrm, sib; /* the bytes themselves */
} ea;
static efunc errfunc;
static struct ofmt *outfmt;
static ListGen *list;
static long calcsize (long, long, int, insn *, char *);
static void gencode (long, long, int, insn *, char *, long);
static int regval (operand *o);
static int matches (struct itemplate *, insn *);
static ea * process_ea (operand *, ea *, int, int, int);
static int chsize (operand *, int);
/*
* This routine wrappers the real output format's output routine,
* in order to pass a copy of the data off to the listing file
* generator at the same time.
*/
static void out (long offset, long segto, void *data, unsigned long type,
long segment, long wrt)
{
static long lineno;
static char *lnfname;
if ((type & OUT_TYPMASK) == OUT_ADDRESS) {
if (segment != NO_SEG || wrt != NO_SEG) {
/*
* This address is relocated. We must write it as
* OUT_ADDRESS, so there's no work to be done here.
*/
list->output (offset, data, type);
}
else {
unsigned char p[4], *q = p;
/*
* This is a non-relocated address, and we're going to
* convert it into RAWDATA format.
*/
if ((type & OUT_SIZMASK) == 4) {
WRITELONG (q, * (long *) data);
list->output (offset, p, OUT_RAWDATA+4);
}
else {
WRITESHORT (q, * (long *) data);
list->output (offset, p, OUT_RAWDATA+2);
}
}
}
else if ((type & OUT_TYPMASK) == OUT_RAWDATA) {
list->output (offset, data, type);
}
else if ((type & OUT_TYPMASK) == OUT_RESERVE) {
list->output (offset, NULL, type);
}
else if ((type & OUT_TYPMASK) == OUT_REL2ADR ||
(type & OUT_TYPMASK) == OUT_REL4ADR) {
list->output (offset, data, type);
}
if (src_get(&lineno,&lnfname))
outfmt->current_dfmt->linenum(lnfname,lineno,segto);
outfmt->output (segto, data, type, segment, wrt);
}
long assemble (long segment, long offset, int bits,
insn *instruction, struct ofmt *output, efunc error,
ListGen *listgen)
{
struct itemplate *temp;
int j;
int size_prob;
long insn_end;
long itimes;
long start = offset;
long wsize = 0; /* size for DB etc. */
errfunc = error; /* to pass to other functions */
outfmt = output; /* likewise */
list = listgen; /* and again */
switch (instruction->opcode)
{
case -1: return 0;
case I_DB: wsize = 1; break;
case I_DW: wsize = 2; break;
case I_DD: wsize = 4; break;
case I_DQ: wsize = 8; break;
case I_DT: wsize = 10; break;
}
if (wsize) {
extop * e;
long t = instruction->times;
if (t < 0)
errfunc(ERR_PANIC, "instruction->times < 0 (%ld) in assemble()",t);
while (t--) /* repeat TIMES times */
{
for (e = instruction->eops; e; e = e->next)
{
if (e->type == EOT_DB_NUMBER)
{
if (wsize == 1) {
if (e->segment != NO_SEG)
errfunc (ERR_NONFATAL,
"one-byte relocation attempted");
else {
unsigned char out_byte = e->offset;
out (offset, segment, &out_byte, OUT_RAWDATA+1,
NO_SEG, NO_SEG);
}
}
else if (wsize > 5) {
errfunc (ERR_NONFATAL, "integer supplied to a D%c"
" instruction", wsize==8 ? 'Q' : 'T');
}
else
out (offset, segment, &e->offset,
OUT_ADDRESS+wsize, e->segment,
e->wrt);
offset += wsize;
}
else if (e->type == EOT_DB_STRING)
{
int align;
out (offset, segment, e->stringval,
OUT_RAWDATA+e->stringlen, NO_SEG, NO_SEG);
align = e->stringlen % wsize;
if (align) {
align = wsize - align;
out (offset, segment, "\0\0\0\0\0\0\0\0",
OUT_RAWDATA+align, NO_SEG, NO_SEG);
}
offset += e->stringlen + align;
}
}
if (t > 0 && t == instruction->times-1)
{
/*
* Dummy call to list->output to give the offset to the
* listing module.
*/
list->output (offset, NULL, OUT_RAWDATA);
list->uplevel (LIST_TIMES);
}
}
if (instruction->times > 1)
list->downlevel (LIST_TIMES);
return offset - start;
}
if (instruction->opcode == I_INCBIN)
{
static char fname[FILENAME_MAX];
FILE * fp;
long len;
len = FILENAME_MAX-1;
if (len > instruction->eops->stringlen)
len = instruction->eops->stringlen;
strncpy (fname, instruction->eops->stringval, len);
fname[len] = '\0';
if ( (fp = fopen(fname, "rb")) == NULL)
error (ERR_NONFATAL, "`incbin': unable to open file `%s'", fname);
else if (fseek(fp, 0L, SEEK_END) < 0)
error (ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
fname);
else
{
static char buf[2048];
long t = instruction->times;
long base = 0;
len = ftell (fp);
if (instruction->eops->next) {
base = instruction->eops->next->offset;
len -= base;
if (instruction->eops->next->next &&
len > instruction->eops->next->next->offset)
len = instruction->eops->next->next->offset;
}
/*
* Dummy call to list->output to give the offset to the
* listing module.
*/
list->output (offset, NULL, OUT_RAWDATA);
list->uplevel(LIST_INCBIN);
while (t--)
{
long l;
fseek (fp, base, SEEK_SET);
l = len;
while (l > 0) {
long m = fread (buf, 1, (l>sizeof(buf)?sizeof(buf):l),
fp);
if (!m) {
/*
* This shouldn't happen unless the file
* actually changes while we are reading
* it.
*/
error (ERR_NONFATAL, "`incbin': unexpected EOF while"
" reading file `%s'", fname);
t=0; /* Try to exit cleanly */
break;
}
out (offset, segment, buf, OUT_RAWDATA+m,
NO_SEG, NO_SEG);
l -= m;
}
}
list->downlevel(LIST_INCBIN);
if (instruction->times > 1) {
/*
* Dummy call to list->output to give the offset to the
* listing module.
*/
list->output (offset, NULL, OUT_RAWDATA);
list->uplevel(LIST_TIMES);
list->downlevel(LIST_TIMES);
}
fclose (fp);
return instruction->times * len;
}
return 0; /* if we're here, there's an error */
}
size_prob = FALSE;
temp = nasm_instructions[instruction->opcode];
while (temp->opcode != -1) {
int m = matches (temp, instruction);
if (m == 100) /* matches! */
{
char *codes = temp->code;
long insn_size = calcsize(segment, offset, bits,
instruction, codes);
itimes = instruction->times;
if (insn_size < 0) /* shouldn't be, on pass two */
error (ERR_PANIC, "errors made it through from pass one");
else while (itimes--) {
insn_end = offset + insn_size;
for (j=0; j<instruction->nprefix; j++) {
unsigned char c=0;
switch (instruction->prefixes[j]) {
case P_LOCK:
c = 0xF0; break;
case P_REPNE: case P_REPNZ:
c = 0xF2; break;
case P_REPE: case P_REPZ: case P_REP:
c = 0xF3; break;
case R_CS: c = 0x2E; break;
case R_DS: c = 0x3E; break;
case R_ES: c = 0x26; break;
case R_FS: c = 0x64; break;
case R_GS: c = 0x65; break;
case R_SS: c = 0x36; break;
case P_A16:
if (bits != 16)
c = 0x67;
break;
case P_A32:
if (bits != 32)
c = 0x67;
break;
case P_O16:
if (bits != 16)
c = 0x66;
break;
case P_O32:
if (bits != 32)
c = 0x66;
break;
default:
error (ERR_PANIC,
"invalid instruction prefix");
}
if (c != 0) {
out (offset, segment, &c, OUT_RAWDATA+1,
NO_SEG, NO_SEG);
offset++;
}
}
gencode (segment, offset, bits, instruction, codes, insn_end);
offset += insn_size;
if (itimes > 0 && itimes == instruction->times-1) {
/*
* Dummy call to list->output to give the offset to the
* listing module.
*/
list->output (offset, NULL, OUT_RAWDATA);
list->uplevel (LIST_TIMES);
}
}
if (instruction->times > 1)
list->downlevel (LIST_TIMES);
return offset - start;
} else if (m > 0) {
size_prob = m;
}
temp++;
}
if (temp->opcode == -1) { /* didn't match any instruction */
if (size_prob == 1) /* would have matched, but for size */
error (ERR_NONFATAL, "operation size not specified");
else if (size_prob == 2)
error (ERR_NONFATAL, "mismatch in operand sizes");
else
error (ERR_NONFATAL,
"invalid combination of opcode and operands");
}
return 0;
}
long insn_size (long segment, long offset, int bits,
insn *instruction, efunc error)
{
struct itemplate *temp;
errfunc = error; /* to pass to other functions */
if (instruction->opcode == -1)
return 0;
if (instruction->opcode == I_DB ||
instruction->opcode == I_DW ||
instruction->opcode == I_DD ||
instruction->opcode == I_DQ ||
instruction->opcode == I_DT)
{
extop *e;
long isize, osize, wsize = 0; /* placate gcc */
isize = 0;
switch (instruction->opcode)
{
case I_DB: wsize = 1; break;
case I_DW: wsize = 2; break;
case I_DD: wsize = 4; break;
case I_DQ: wsize = 8; break;
case I_DT: wsize = 10; break;
}
for (e = instruction->eops; e; e = e->next)
{
long align;
osize = 0;
if (e->type == EOT_DB_NUMBER)
osize = 1;
else if (e->type == EOT_DB_STRING)
osize = e->stringlen;
align = (-osize) % wsize;
if (align < 0)
align += wsize;
isize += osize + align;
}
return isize * instruction->times;
}
if (instruction->opcode == I_INCBIN)
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -