📄 x86id.re
字号:
/* * x86 identifier recognition and instruction handling * * Copyright (C) 2002 Peter Johnson * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS'' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */#include <util.h>RCSID("$Id: x86id.re 1163 2004-10-13 03:55:04Z peter $");#define YASM_LIB_INTERNAL#define YASM_BC_INTERNAL#define YASM_EXPR_INTERNAL#include <libyasm.h>#include "modules/arch/x86/x86arch.h"/* Opcode modifiers. The opcode bytes are in "reverse" order because the * parameters are read from the arch-specific data in LSB->MSB order. * (only for asthetic reasons in the lexer code below, no practical reason). */#define MOD_Gap0 (1UL<<0) /* Eats a parameter */#define MOD_Op2Add (1UL<<1) /* Parameter adds to opcode byte 2 */#define MOD_Gap1 (1UL<<2) /* Eats a parameter */#define MOD_Op1Add (1UL<<3) /* Parameter adds to opcode byte 1 */#define MOD_Gap2 (1UL<<4) /* Eats a parameter */#define MOD_Op0Add (1UL<<5) /* Parameter adds to opcode byte 0 */#define MOD_PreAdd (1UL<<6) /* Parameter adds to "special" prefix */#define MOD_SpAdd (1UL<<7) /* Parameter adds to "spare" value */#define MOD_OpSizeR (1UL<<8) /* Parameter replaces opersize */#define MOD_Imm8 (1UL<<9) /* Parameter is included as immediate byte */#define MOD_AdSizeR (1UL<<10) /* Parameter replaces addrsize (jmp only) */#define MOD_DOpS64R (1UL<<11) /* Parameter replaces default 64-bit opersize *//* Modifiers that aren't actually used as modifiers. Rather, if set, bits * 20-27 in the modifier are used as an index into an array. * Obviously, only one of these may be set at a time. */#define MOD_ExtNone (0UL<<28) /* No extended modifier */#define MOD_ExtErr (1UL<<28) /* Extended error: index into error strings */#define MOD_ExtWarn (2UL<<28) /* Extended warning: index into warning strs */#define MOD_Ext_MASK (0xFUL<<28)#define MOD_ExtIndex_SHIFT 20#define MOD_ExtIndex(indx) (((unsigned long)(indx))<<MOD_ExtIndex_SHIFT)#define MOD_ExtIndex_MASK (0xFFUL<<MOD_ExtIndex_SHIFT)/* Operand types. These are more detailed than the "general" types for all * architectures, as they include the size, for instance. * Bit Breakdown (from LSB to MSB): * - 5 bits = general type (must be exact match, except for =3): * 0 = immediate * 1 = any general purpose or FPU register * 2 = memory * 3 = any general purpose or FPU register OR memory * 4 = any MMX or XMM register * 5 = any MMX or XMM register OR memory * 6 = any segment register * 7 = any CR register * 8 = any DR register * 9 = any TR register * A = ST0 * B = AL/AX/EAX/RAX (depending on size) * C = CL/CX/ECX/RCX (depending on size) * D = DL/DX/EDX/RDX (depending on size) * E = CS * F = DS * 10 = ES * 11 = FS * 12 = GS * 13 = SS * 14 = CR4 * 15 = memory offset (an EA, but with no registers allowed) * [special case for MOV opcode] * - 3 bits = size (user-specified, or from register size): * 0 = any size acceptable/no size spec acceptable (dep. on strict) * 1/2/3/4 = 8/16/32/64 bits (from user or reg size) * 5/6 = 80/128 bits (from user) * - 1 bit = size implicit or explicit ("strictness" of size matching on * non-registers -- registers are always strictly matched): * 0 = user size must exactly match size above. * 1 = user size either unspecified or exactly match size above. * - 3 bits = target modification. * 0 = no target mod acceptable * 1 = NEAR * 2 = SHORT * 3 = FAR * 4 = TO * - 1 bit = effective address size * 0 = any address size allowed except for 64-bit * 1 = only 64-bit address size allowed * * MSBs than the above are actions: what to do with the operand if the * instruction matches. Essentially describes what part of the output bytecode * gets the operand. This may require conversion (e.g. a register going into * an ea field). Naturally, only one of each of these may be contained in the * operands of a single insn_info structure. * - 4 bits = action: * 0 = does nothing (operand data is discarded) * 1 = operand data goes into ea field * 2 = operand data goes into imm field * 3 = operand data goes into sign-extended imm field * 4 = operand data goes into "spare" field * 5 = operand data is added to opcode byte 0 * 6 = operand data is added to opcode byte 1 * 7 = operand data goes into BOTH ea and spare * [special case for imul opcode] * 8 = relative jump (outputs a jmp instead of normal insn) * 9 = operand size goes into address size (jmp only) * The below describes postponed actions: actions which can't be completed at * parse-time due to things like EQU and complex expressions. For these, some * additional data (stored in the second byte of the opcode with a one-byte * opcode) is passed to later stages of the assembler with flags set to * indicate postponed actions. * - 3 bits = postponed action: * 0 = none * 1 = shift operation with a ,1 short form (instead of imm8). * 2 = large imm16/32 that can become a sign-extended imm8. * 3 = can be far jump * 4 = could become a short opcode mov with bits=64 and a32 prefix */#define OPT_Imm 0x0#define OPT_Reg 0x1#define OPT_Mem 0x2#define OPT_RM 0x3#define OPT_SIMDReg 0x4#define OPT_SIMDRM 0x5#define OPT_SegReg 0x6#define OPT_CRReg 0x7#define OPT_DRReg 0x8#define OPT_TRReg 0x9#define OPT_ST0 0xA#define OPT_Areg 0xB#define OPT_Creg 0xC#define OPT_Dreg 0xD#define OPT_CS 0xE#define OPT_DS 0xF#define OPT_ES 0x10#define OPT_FS 0x11#define OPT_GS 0x12#define OPT_SS 0x13#define OPT_CR4 0x14#define OPT_MemOffs 0x15#define OPT_MASK 0x1F#define OPS_Any (0UL<<5)#define OPS_8 (1UL<<5)#define OPS_16 (2UL<<5)#define OPS_32 (3UL<<5)#define OPS_64 (4UL<<5)#define OPS_80 (5UL<<5)#define OPS_128 (6UL<<5)#define OPS_MASK (7UL<<5)#define OPS_SHIFT 5#define OPS_Relaxed (1UL<<8)#define OPS_RMASK (1UL<<8)#define OPEAS_Not64 (0UL<<9)#define OPEAS_64 (1UL<<9)#define OPEAS_MASK (1UL<<9)#define OPTM_None (0UL<<10)#define OPTM_Near (1UL<<10)#define OPTM_Short (2UL<<10)#define OPTM_Far (3UL<<10)#define OPTM_To (4UL<<10)#define OPTM_MASK (7UL<<10)#define OPA_None (0UL<<13)#define OPA_EA (1UL<<13)#define OPA_Imm (2UL<<13)#define OPA_SImm (3UL<<13)#define OPA_Spare (4UL<<13)#define OPA_Op0Add (5UL<<13)#define OPA_Op1Add (6UL<<13)#define OPA_SpareEA (7UL<<13)#define OPA_JmpRel (8UL<<13)#define OPA_AdSizeR (9UL<<13)#define OPA_MASK (0xFUL<<13)#define OPAP_None (0UL<<17)#define OPAP_ShiftOp (1UL<<17)#define OPAP_SImm8Avail (2UL<<17)#define OPAP_JmpFar (3UL<<17)#define OPAP_ShortMov (4UL<<17)#define OPAP_MASK (7UL<<17)typedef struct x86_insn_info { /* The CPU feature flags needed to execute this instruction. This is OR'ed * with arch-specific data[2]. This combined value is compared with * cpu_enabled to see if all bits set here are set in cpu_enabled--if so, * the instruction is available on this CPU. */ unsigned long cpu; /* Opcode modifiers for variations of instruction. As each modifier reads * its parameter in LSB->MSB order from the arch-specific data[1] from the * lexer data, and the LSB of the arch-specific data[1] is reserved for the * count of insn_info structures in the instruction grouping, there can * only be a maximum of 3 modifiers. */ unsigned long modifiers; /* Operand Size */ unsigned char opersize; /* Default operand size in 64-bit mode (0 = 32-bit for readability). */ unsigned char def_opersize_64; /* A special instruction prefix, used for some of the Intel SSE and SSE2 * instructions. Intel calls these 3-byte opcodes, but in AMD64's 64-bit * mode, they're treated like normal prefixes (e.g. the REX prefix needs * to be *after* the F2/F3/66 "prefix"). * (0=no special prefix) */ unsigned char special_prefix; /* The length of the basic opcode */ unsigned char opcode_len; /* The basic 1-3 byte opcode (not including the special instruction * prefix). */ unsigned char opcode[3]; /* The 3-bit "spare" value (extended opcode) for the R/M byte field */ unsigned char spare; /* The number of operands this form of the instruction takes */ unsigned char num_operands; /* The types of each operand, see above */ unsigned long operands[3];} x86_insn_info;/* Define lexer arch-specific data with 0-3 modifiers. */#define DEF_INSN_DATA(group, mod, cpu) do { \ data[0] = (unsigned long)group##_insn; \ data[1] = ((mod)<<8) | \ ((unsigned char)(sizeof(group##_insn)/sizeof(x86_insn_info))); \ data[2] = cpu; \ } while (0)#define RET_INSN(group, mod, cpu) do { \ DEF_INSN_DATA(group, mod, cpu); \ return YASM_ARCH_CHECK_ID_INSN; \ } while (0)/* * General instruction groupings *//* Placeholder for instructions invalid in 64-bit mode */static const x86_insn_info not64_insn[] = { { CPU_Not64, 0, 0, 0, 0, 0, {0, 0, 0}, 0, 0, {0, 0, 0} }};/* One byte opcode instructions with no operands */static const x86_insn_info onebyte_insn[] = {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -