uasm.c

来自「MMURTL(tm) Computer Operating System Ver」· C语言 代码 · 共 716 行 · 第 1/2 页

C
716
字号
/* This is the MMURTL Debugger disassembler.
*/

/*
  MMURTL Operating System Source Code
  Copyright 1991,1992,1993, Richard A. Burgess
  ALL RIGHTS RESERVED
  Version x0.8
*/


#define U32 unsigned long
#define U16 unsigned short
#define U8 unsigned char
#define S32 signed long
#define S16 signed short
#define S8 signed char

U8 getbyte(void);
U8 modrm(void);
U8 sib(void);
int bytes(char c);
void ohex(char c, int extend, int optional, int defsize);
void reg_name(U8 which, char size);
void escape(char c, char t);
void decode(char *s);
void do_sib(int m);
void do_modrm(char t);
U32 disassemble(U32 Addr);

extern long xprintf(char *fmt, ...);		/* From Monitor.c */

#define SEGSIZE  32
U8 *addrIn;

/*  The Intel 386 DX Programmer's Reference Manual provides a table that
uses the following codes to assist in disassembly of 386 code (page A-3).
The letters below are the same as the codes in the manual.  The ~ (tilde)
is an escape character to signify expansion of the codes is
required when the string is being outut.

Tilde tokens in strings:
   First char after '~':
 A - Direct address
 C - Reg of R/M picks control register
 D - Reg of R/M picks debug register
 E - R/M picks operand
 F - Flag register
 G - Reg of R/M selects a general register
 I - Immediate data
 J - Relative IP offset
 M - R/M picks memory
 O - No R/M, offset only
 R - Mod of R/M picks register only
 S - Reg of R/M picks segment register
 T - reg of R/M picks test register
 X - DS:ESI
 Y - ES:EDI
 2 - prefix of two-byte opcode
 e - put in 'e' if use32 (second char is part of reg name)
     put in 'w' for use16 or 'd' for use32 (second char is 'w')
 f - Floating point (second char is esc value)
 g - do R/M group 'n'
 p - prefix
 s - Size override (second char is a,o)

   Second char after '~':
 a - Two words in memory (BOUND)
 b - Byte
 c - Byte or word
 d - DWord
 p - 32 or 48 bit pointer
 s - Six byte pseudo-descriptor
 v - Word or DWord
 w - Word
 1-8 - group number, esc value, etc
*/

char *opmap1[] = {
/* 0 */
  "ADD ~Eb,~Gb", "ADD ~Ev,~Gv",  "ADD ~Gb,~Eb", "ADD ~Gv,~Ev",
  "ADD AL,~Ib",  "ADD ~eAX,~Iv", "PUSH ES",     "POP ES",
  "OR ~Eb,~Gb",  "OR ~Ev,~Gv",   "OR ~Gb,~Eb",  "OR ~Gv,~Ev",
  "OR AL,~Ib",   "OR ~eAX,~Iv",  "PUSH CS",     "~2 ",
/* 1 */
  "ADC ~Eb,~Gb", "ADC ~Ev,~Gv",  "ADC ~Gb,~Eb", "ADC ~Gv,~Ev",
  "ADC AL,~Ib",  "ADC ~eAX,~Iv", "PUSH SS",     "POP SS",
  "SBB ~Eb,~Gb", "SBB ~Ev,~Gv",  "SBB ~Gb,~Eb", "SBB ~Gv,~Ev",
  "SBB AL,~Ib",  "SBB ~eAX,~Iv", "PUSH DS",     "POP DS",
/* 2 */
  "AND ~Eb,~Gb", "AND ~Ev,~Gv",  "AND ~Gb,~Eb", "AND ~Gv,~Ev",
  "AND AL,~Ib",  "AND ~eAX,~Iv", "~pE",         "DAA",
  "SUB ~Eb,~Gb", "SUB ~Ev,~Gv",  "SUB ~Gb,~Eb", "SUB ~Gv,~Ev",
  "SUB AL,~Ib",  "SUB ~eAX,~Iv", "~pC",         "DAS",
/* 3 */
  "XOR ~Eb,~Gb", "XOR ~Ev,~Gv",  "XOR ~Gb,~Eb", "XOR ~Gv,~Ev",
  "XOR AL,~Ib",  "XOR ~eAX,~Iv", "~pS",         "AAA",
  "CMP ~Eb,~Gb", "CMP ~Ev,~Gv",  "CMP ~Gb,~Eb", "CMP ~Gv,~Ev",
  "CMP AL,~Ib",  "CMP ~eAX,~Iv", "~pD",         "AAS",
/* 4 */
  "INC ~eAX",    "INC ~eCX",     "INC ~eDX",    "INC ~eBX",
  "INC ~eSP",    "INC ~eBP",     "INC ~eSI",    "INC ~eDI",
  "DEC ~eAX",    "DEC ~eCX",     "DEC ~eDX",    "DEC ~eBX",
  "DEC ~eSP",    "DEC ~eBP",     "DEC ~eSI",    "DEC ~eDI",
/* 5 */
  "PUSH ~eAX",   "PUSH ~eCX",    "PUSH ~eDX",   "PUSH ~eBX",
  "PUSH ~eSP",   "PUSH ~eBP",    "PUSH ~eSI",   "PUSH ~eDI",
  "POP ~eAX",    "POP ~eCX",     "POP ~eDX",    "POP ~eBX",
  "POP ~eSP",    "POP ~eBP",     "POP ~eSI",    "POP ~eDI",
/* 6 */
  "PUSHA",       "POPA",         "BOUND ~Gv,~Ma", "ARPL ~Ew,~Rw",
  "~pF",         "~pG",          "~so",           "~sa",
  "PUSH ~Iv",    "IMUL ~Gv=~Ev*~Iv", "PUSH ~Ib",  "IMUL ~Gv=~Ev*~Ib",
  "INSB ~Yb,DX", "INS~ew ~Yv,DX", "OUTSB DX,~Xb", "OUTS~ew DX,~Xv",
/* 7 */
  "JO ~Jb",      "JNO ~Jb",       "JNC ~Jb",      "JC ~Jb",
  "JZ ~Jb",      "JNZ ~Jb",       "JBE ~Jb",      "JNBE ~Jb",
  "JS ~Jb",      "JNS ~Jb",       "JPE ~Jb",      "JPO ~Jb",
  "JL ~Jb",      "JGE ~Jb",       "JLE ~Jb",      "JG ~Jb",
/* 8 */
  "~g1 ~Eb,~Ib",  "~g1 ~Ev,~Iv",  "MOV AL,~Ib",   "~g1 ~Ev,~Ib",
  "TEST ~Eb,~Gb", "TEST ~Ev,~Gv", "XCHG ~Eb,~Gb", "XCHG ~Ev,~Gv",
  "MOV ~Eb,~Gb",  "MOV ~Ev,~Gv",  "MOV ~Gb,~Eb",  "MOV ~Gv,~Ev",
  "MOV ~Ew,~Sw",  "LEA ~Gv,~M ",  "MOV ~Sw,~Ew",  "POP ~Ev",
/* 9 */
  "NOP",            "XCHG ~eAX,~eCX", "XCHG ~eAX,~eDX", "XCHG ~eAX,~eBX",
  "XCHG ~eAX,~eSP", "XCHG ~eAX,~eBP", "XCHG ~eAX,~eSI", "XCHG ~eAX,~eDI",
  "CBW",            "CDW",            "CALL ~Ap",       "FWAIT",
  "PUSH ~eflags",   "POP ~eflags",    "SAHF",           "LAHF",
/* a */
  "MOV AL,~Ov",     "MOV ~eAX,~Ov",     "MOV ~Ov,al",    "MOV ~Ov,~eAX",
  "MOVSB ~Xb,~Yb",  "MOVS~ew ~Xv,~Yv",  "CMPSB ~Xb,~Yb", "CMPS~ew ~Xv,~Yv",
  "TEST AL,~Ib",    "TEST ~eAX,~Iv",    "STOSB ~Yb,AL",  "STOS~ew ~Yv,~eAX",
  "LODSB AL,~Xb",   "LODS~ew ~eAX,~Xv", "SCASB AL,~Xb",  "SCAS~ew ~eAX,~Xv",
/* b */
  "MOV AL,~Ib",   "MOV CL,~Ib",   "MOV DL,~Ib",   "MOV BL,~Ib",
  "MOV AH,~Ib",   "MOV CH,~Ib",   "MOV DH,~Ib",   "MOV BH,~Ib",
  "MOV ~eAX,~Iv", "MOV ~eCX,~Iv", "MOV ~eDX,~Iv", "MOV ~eBX,~Iv",
  "MOV ~eSP,~Iv", "MOV ~eBP,~Iv", "MOV ~eSI,~Iv", "MOV ~eDI,~Iv",
/* c */
  "~g2 ~Eb,~Ib",   "~g2 ~Ev,~Ib",  "RET ~Iw",      "RET",
  "LES ~Gv,~Mp",   "LDS ~Gv,~Mp",  "MOV ~Eb,~Ib",  "MOV ~Ev,~Iv",
  "ENTER ~Iw,~Ib", "LEAVE",        "RETF ~Iw",     "retf",
  "INT 3",         "INT ~Ib",      "INTO",         "IRET",
/* d */
  "~g2 ~Eb,1", "~g2 ~Ev,1", "~g2 ~Eb,cl", "~g2 ~Ev,cl",
  "AAM", "AAD", 0, "XLAT",

/*
  "ESC 0,~Ib", "ESC 1,~Ib", "ESC 2,~Ib", "ESC 3,~Ib",
  "ESC 4,~Ib", "ESC 5,~Ib", "ESC 6,~Ib", "ESC 7,~Ib",
*/

  "~f0", "~f1", "~f2", "~f3",
  "~f4", "~f5", "~f6", "~f7",


/* e */
  "LOOPNE ~Jb", "LOOPE ~Jb", "LOOP ~Jb", "JCXZ ~Jb",
  "IN AL,~Ib", "IN ~eAX,~Ib", "OUT ~Ib,AL", "OUT ~Ib,~eAX",
  "CALL ~Jv", "JMP ~Jv", "JMP ~Ap", "JMP ~Jb",
  "IN AL,DX", "IN ~eAX,DX", "OUT DX,AL", "OUT DX,~eAX",
/* f */
  "LOCK~p ", 0, "REPNE~p ", "REP(e)~p ",
  "HLT", "CMC", "~g3", "~g0",
  "CLC", "STC", "CLI", "STI",
  "CLD", "STD", "~g4", "~g5"
  };

char *SecOp00[] = {
/* 0 */
  "~g6", "~g7", "LAR ~Gv,~Ew", "LSL ~Gv,~Ew", 0, 0, "CLTS", 0,
  0, 0, 0, 0, 0, 0, 0, 0 };

char *SecOp20[] = {
/* 2 */
  "MOV ~Rd,~Cd", "MOV ~Rd,~Dd", "MOV ~Cd,~Rd", "MOV ~Dd,~Rd",
  "MOV ~Rd,~Td", 0, "MOV ~Td,~Rd", 0,
  0, 0, 0, 0, 0, 0, 0, 0}

char *SecOp80[] = {
  "JO ~Jv", "JNO ~Jv", "JC ~Jv", "JNC ~Jv",
  "JZ ~Jv", "JNZ ~Jv", "JBE ~Jv", "JNBE ~Jv",
  "JS ~Jv", "JNS ~Jv", "JPE ~Jv", "JPO ~Jv",
  "JL ~Jv", "JGE ~Jv", "JLE ~Jv", "JG ~Jv",
/* 9 */
  "SETO ~Eb", "SETNO ~Eb", "SETNC ~Eb", "SETC ~Eb",
  "SETZ ~Eb", "SETNZ ~Eb", "SETBE ~Eb", "SETNBE ~Eb",
  "SETS ~Eb", "SETNS ~Eb", "SETP ~Eb", "SETNP ~Eb",
  "SETL ~Eb", "SETGE ~Eb", "SETLE ~Eb", "SETG ~Eb",
/* a */
  "PUSH FS",          "POP FS",          0,          "BT ~Ev,~Gv",
  "SHLD ~Ev,~Gv,~Ib", "SHLD ~Ev,~Gv,cl", 0,           0,
  "PUSH GS",          "POP GS",          0,          "BTS ~Ev,~Gv",
  "SHRD ~Ev,~Gv,~Ib", "SHRD ~Ev,~Gv,cl", 0,          "IMUL ~Gv,~Ev",
/* b */
  0, 0, "LSS ~Mp", "BTR ~Ev,~Gv",
  "LFS ~Mp", "LGS ~Mp", "MOVZX ~Gv,~Eb", "MOVZX ~Gv,~Ew",
  0, 0, "~g8 ~Ev,~Ib", "BTC ~Ev,~Gv",
  "BSF ~Gv,~Ev", "BSR~Gv,~Ev", "MOVSX ~Gv,~Eb", "MOVSX ~Gv,~Ew",
  };
/* NOTE: Second byte of 2 byte OpCodes are Invalid if over 0xBF */


char *groups[9][8] = {   /* group 0 is group 3 for ~Ev set */
  { "TEST ~Ev,~Iv", "TEST ~Ev,~Iv,", "NOT ~Ev", "NEG ~Ev",
    "MUL ~eAX,~Ev", "IMUL ~eAX,~Ev", "DIV ~eAX,~Ev", "IDIV ~eAX,~Ev" },
  { "ADD", "OR", "ADC", "SBB", "AND", "SUB", "XOR", "CMP" },
  { "ROL", "ROR", "RCL", "RCR", "SHL", "SHR", "SHL", "SAR" },
  { "TEST ~Eb,~Ib", "TEST ~Eb,~Ib,", "NOT ~Eb", "NEG ~Eb",
    "MUL AL,~Eb", "IMUL AL,~Eb", "DIV AL,~Eb", "IDIV AL,~Eb" },
  { "INC ~Eb", "DEC ~Eb", 0, 0, 0, 0, 0, 0 },
  { "INC ~Ev", "DEC ~Ev", "CALL ~Ev", "CALL ~Ep",
    "JMP ~Ev", "JMP ~Ep", "PUSH ~Ev", 0 },
  { "SLDT ~Ew", "STR ~Ew", "LLDT ~Ew", "LTR ~Ew",
    "VERR ~Ew", "VERW ~Ew", 0, 0 },
  { "SGDT ~Ms", "SIDT ~Ms", "LGDT ~Ms", "LIDT ~Ms",
    "SMSW ~Ew", 0, "LMSW ~Ew", 0 },
  { 0, 0, 0, 0, "BT", "BTS", "BTR", "BTC" }
  };

	/* for display */
char *seg_names[]= {"ES","CS","SS","DS","FS","GS"};
char *breg_names[]={"AL","CL","DL","BL","AH","CH","DH","BH" };
char *wreg_names[]={"AX","CX","DX","BX","SP","BP","SI","DI" };
char *dreg_names[]={"EAX","ECX","EDX","EBX","ESP","EBP","ESI","EDI" };

S16 prefix;
U8 modrmv;
S8 fmodrmv;
U8 sibv;
S8 fsibv;
S16 opsize;

/*****************************************************
Gets a byte to disassemble and update addrIn.
******************************************************/

U8 getbyte(void)
{
U8 b;
;
#asm
	MOV EAX, _addrIn
	MOV AL, CS:[EAX]
	MOV [EBP-1], AL
#endasm

 ++addrIn;
 return b;
}


/*************************************************/
/* Get Mod/RM field byte for current instruction */

U8 modrm(void)
{
  if (!fmodrmv) {
    modrmv = getbyte();
    fmodrmv = 1;
    }
  return modrmv;
}


/*************************************************/
/* Get 'scale-index-base' byte for current instruction */

U8 sib(void)
{
  if (!fsibv) {
    sibv = getbyte();
    fsibv = 1;
    }
  return sibv;
}

/**********************************************************/
/* The register is encode as bit 3,4,5 in the byte.
   xxRRRxxx
   This macro extracts it.  Used in several places.
*/

#define reg(a)	(((a)>>3)&7)

/*------------------------------------------------------------------------*/

/*------------------------------------------------------------------------*/
/* Determines how many bytes left in the instruction from the
  letter in the table (which is passed in here).
*/

int bytes(char c)
{
  switch (c)
  {
    case 'b':
      return 1;
    case 'w':
      return 2;
    case 'd':
      return 4;
    case 'v':
      if (opsize == 32)
        return 4;
      else
        return 2;
  }
  return 0;
}

/**************************************************************
Get the correct number of bytes for immediate data from the
code stream and output it as hex.
***************************************************************/

void ohex(char c, int extend, int optional, int defsize)
{
int n, s, i;
unsigned char buf[6];

  n=0;
  s=0;

  switch (c)
  {
    case 'a':
      break;
    case 'b':	/* byte */
      n = 1;
      break;
    case 'w':	/* word */
      n = 2;
      break;
    case 'd':	/* dword */
      n = 4;
      break;
    case 's':	/* fword */
      n = 6;
      break;
    case 'c':
    case 'v':
      if (defsize == 32)
        n = 4;
      else
        n = 2;
      break;
    case 'p':	/* 32 or 48 bit pointer */
      if (defsize == 32)
        n = 6;
      else
        n = 4;
      s = 1;
      break;
  }

  for (i=0; i<n; i++)

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?