📄 ljit_x86.dash
字号:
|//|// Common DynASM definitions and macros for x86 CPUs.|// Copyright (C) 2005-2007 Mike Pall. See Copyright Notice in luajit.h|//||// Standard DynASM declarations.|.arch x86|.section code, deopt, tail, mfmap||// Type definitions with (almost) global validity.|.type L, lua_State, esi // L.|.type BASE, TValue, ebx // L->base.|.type TOP, TValue, edi // L->top (calls/open ops).|.type CI, CallInfo, ecx // L->ci (calls, locally).|.type LCL, LClosure, eax // func->value (calls).||// Type definitions with local validity.|.type GL, global_State|.type TVALUE, TValue|.type VALUE, Value|.type CINFO, CallInfo|.type GCOBJECT, GCObject|.type TSTRING, TString|.type TABLE, Table|.type CCLOSURE, CClosure|.type PROTO, Proto|.type UPVAL, UpVal|.type NODE, Node||// Definitions copied to DynASM domain to avoid unnecessary constant args.|// CHECK: must match with the definitions in lua.h!|.define LUA_TNIL, 0|.define LUA_TBOOLEAN, 1|.define LUA_TLIGHTUSERDATA, 2|.define LUA_TNUMBER, 3|.define LUA_TSTRING, 4|.define LUA_TTABLE, 5|.define LUA_TFUNCTION, 6|.define LUA_TUSERDATA, 7|.define LUA_TTHREAD, 8||.define LUA_TNUM_NUM, 0x33|.define LUA_TNUM_NUM_NUM, 0x333|.define LUA_TSTR_STR, 0x44|.define LUA_TSTR_NUM, 0x43|.define LUA_TSTR_NUM_NUM, 0x433|.define LUA_TTABLE_NUM, 0x53|.define LUA_TTABLE_STR, 0x54||// Macros to test, set and copy stack slots.|.macro istt, idx, tp; cmp dword BASE[idx].tt, tp; .endmacro|.macro isnil, idx; istt idx, LUA_TNIL; .endmacro|.macro isnumber, idx; istt idx, LUA_TNUMBER; .endmacro|.macro isstring, idx; istt idx, LUA_TSTRING; .endmacro|.macro istable, idx; istt idx, LUA_TTABLE; .endmacro|.macro isfunction, idx; istt idx, LUA_TFUNCTION; .endmacro||.macro isnumber2, idx1, idx2, reg| mov reg, BASE[idx1].tt; shl reg, 4; or reg, BASE[idx2].tt| cmp reg, LUA_TNUM_NUM|.endmacro|.macro isnumber2, idx1, idx2; isnumber2, idx1, idx2, eax; .endmacro||.macro isnumber3, idx1, idx2, idx3, reg| mov reg, BASE[idx1].tt; shl reg, 4; or reg, BASE[idx2].tt| shl reg, 4; or reg, BASE[idx3].tt; cmp reg, LUA_TNUM_NUM_NUM|.endmacro|.macro isnumber3, idx1, idx2, idx3; isnumber3, idx1, idx2, idx3, eax; .endmacro||.macro tvisnil, tv; cmp dword tv.tt, LUA_TNIL; .endmacro||.macro settt, tv, tp; mov dword tv.tt, tp; .endmacro|.macro setnilvalue, tv; settt tv, LUA_TNIL; .endmacro||.macro setbvalue, tv, val // May use edx.||if (val) { /* true */| mov edx, LUA_TBOOLEAN| mov dword tv.value, edx // Assumes: LUA_TBOOLEAN == 1| settt tv, edx||} else { /* false */| mov dword tv.value, 0| settt tv, LUA_TBOOLEAN||}|.endmacro||.macro loadnvaluek, vptr||if ((vptr)->n == (lua_Number)0) {| fldz||} else if ((vptr)->n == (lua_Number)1) {| fld1||} else {| fld qword [vptr]||}|.endmacro||.macro setnvaluek, tv, vptr // Pass a Value *! With permanent addr.| // SSE2 does not pay off here (I tried).| loadnvaluek vptr| fstp qword tv.value| settt tv, LUA_TNUMBER|.endmacro||.macro setnvalue, tv, vptr // Pass a Value *! Temporary ok.| mov dword tv.value, (vptr)->na[0]| mov dword tv.value.na[1], (vptr)->na[1]| settt tv, LUA_TNUMBER|.endmacro||.macro setsvalue, tv, vptr| mov aword tv.value, vptr| settt tv, LUA_TSTRING|.endmacro||.macro sethvalue, tv, vptr| mov aword tv.value, vptr| settt tv, LUA_TTABLE|.endmacro||.macro copyslotSSE, D, S, R1 // May use xmm0.| mov R1, S.tt; movq xmm0, qword S.value| mov D.tt, R1; movq qword D.value, xmm0|.endmacro||.macro copyslot, D, S, R1, R2, R3||if (J->flags & JIT_F_CPU_SSE2) {| copyslotSSE D, S, R1||} else {| mov R1, S.value; mov R2, S.value.na[1]; mov R3, S.tt| mov D.value, R1; mov D.value.na[1], R2; mov D.tt, R3||}|.endmacro||.macro copyslot, D, S, R1, R2||if (J->flags & JIT_F_CPU_SSE2) {| copyslotSSE D, S, R1||} else {| mov R1, S.value; mov R2, S.value.na[1]; mov D.value, R1| mov R1, S.tt; mov D.value.na[1], R2; mov D.tt, R1||}|.endmacro||.macro copyslot, D, S| copyslot D, S, ecx, edx, eax|.endmacro||.macro copyconst, tv, tvk // May use edx.||switch (ttype(tvk)) {||case LUA_TNIL:| setnilvalue tv|| break;||case LUA_TBOOLEAN:| setbvalue tv, bvalue(tvk) // May use edx.|| break;||case LUA_TNUMBER: {| setnvaluek tv, &(tvk)->value|| break;||}||case LUA_TSTRING:| setsvalue tv, &gcvalue(tvk)|| break;||default: lua_assert(0); break;||}|.endmacro||// Macros to get Lua structures.|.macro getLCL, reg // May use CI and TOP (edi).||if (!J->pt->is_vararg) {| mov LCL:reg, BASE[-1].value||} else {| mov CI, L->ci| mov TOP, CI->func| mov LCL:reg, TOP->value||}|.endmacro|.macro getLCL; getLCL eax; .endmacro||// Macros to handle variants.|.macro addidx, type, idx||if (idx) {| add type, idx*#type||}|.endmacro||.macro subidx, type, idx||if (idx) {| sub type, idx*#type||}|.endmacro||// Annoying x87 stuff: support for two compare variants.|.macro fcomparepp // Compare and pop st0 >< st1.||if (J->flags & JIT_F_CPU_CMOV) {| fucomip st1| fpop||} else {| fucompp| fnstsw ax // eax modified!| sahf| // Sometimes test ah, imm8 would be faster.| // But all following compares need to be changed then.| // Don't bother since this is only compatibility stuff for old CPUs.||}|.endmacro||// If you change LUA_TVALUE_ALIGN, be sure to change the Makefile, too:|// DASMFLAGS= -D TVALUE_SIZE=...|// Then rerun make. Or change the default below:|.if not TVALUE_SIZE; .define TVALUE_SIZE, 16; .endif||.if TVALUE_SIZE == 16| .macro TValuemul, reg; sal reg, 4; .endmacro // *16| .macro TValuediv, reg; sar reg, 4; .endmacro // /16| .macro Nodemul, reg; sal reg, 5; .endmacro // *32|.elif TVALUE_SIZE == 12| .macro TValuemul, reg; sal reg, 2; lea reg, [reg+reg*2]; .endmacro // *12| .macro TValuediv, reg; sal reg, 2; imul reg, 0xaaaaaaab; .endmacro // /12| .macro Nodemul, reg; imul reg, 28; .endmacro // *28|.else| .fatal Unsupported TValue size `TVALUE_SIZE'.|.endif||//|// x86 C calling conventions and stack frame layout during a JIT call:|//|// ebp+aword*4 CARG2 nresults|// ebp+aword*3 CARG2 func (also used as SAVER3 for L)|// ebp+aword*2 CARG1 L|// ------------------------------- call to GATE_LJ|// ebp+aword*1 retaddr|// ebp+aword*0 frameptr ebp|// ebp-aword*1 SAVER1 TOP edi|// ebp-aword*2 SAVER2 BASE ebx|// -------------------------------|// GATE_LJ retaddr|// esp+aword*2 ARG3|// esp+aword*1 ARG2|// esp+aword*0 ARG1 <-- esp for first JIT frame|// -------------------------------|// 1st JIT frame retaddr|// esp+aword*2 ARG3|// esp+aword*1 ARG2|// esp+aword*0 ARG1 <-- esp for second JIT frame|// -------------------------------|// 2nd JIT frame retaddr|//|// We could omit the fixed frame pointer (ebp) and have one more register|// available. But there is no pressing need (could use it for CI).|// And it's easier for debugging (gdb is still confused -- why?).|//|// The stack is aligned to 4 awords (16 bytes). Calls to C functions|// with up to 3 arguments do not need any stack pointer adjustment.|//||.define CARG3, [ebp+aword*4]|.define CARG2, [ebp+aword*3]|.define CARG1, [ebp+aword*2]|.define SAVER1, [ebp-aword*1]|.define SAVER2, [ebp-aword*2]|.define ARG7, aword [esp+aword*6] // Requires large frame.|.define ARG6, aword [esp+aword*5] // Requires large frame.|.define ARG5, aword [esp+aword*4] // Requires large frame.|.define ARG4, aword [esp+aword*3] // Requires large frame.|.define ARG3, aword [esp+aword*2]|.define ARG2, aword [esp+aword*1]|.define ARG1, aword [esp]|.define FRAME_RETADDR, aword [esp+aword*3]|.define TMP3, [esp+aword*2]|.define TMP2, [esp+aword*1]|.define TMP1, [esp]|.define FPARG2, qword [esp+qword*1] // Requires large frame.|.define FPARG1, qword [esp]|.define LJFRAME_OFFSET, aword*2 // 16 byte aligned with retaddr + ebp.|.define FRAME_OFFSET, aword*3 // 16 byte aligned with retaddr.|.define LFRAME_OFFSET, aword*7 // 16 byte aligned with retaddr.|.define S2LFRAME_OFFSET, aword*4 // Delta to large frame.||.macro call, target, a1| mov ARG1, a1; call target; .endmacro|.macro call, target, a1, a2| mov ARG1, a1; mov ARG2, a2; call target; .endmacro|.macro call, target, a1, a2, a3| mov ARG1, a1; mov ARG2, a2; mov ARG3, a3; call target; .endmacro|.macro call, target, a1, a2, a3, a4| push a4; push a3; push a2; push a1| call target; add esp, S2LFRAME_OFFSET; .endmacro|.macro call, target, a1, a2, a3, a4, a5| mov ARG1, a5; push a4; push a3; push a2; push a1| call target; add esp, S2LFRAME_OFFSET; .endmacro||// The following macros require a large frame.|.macro call_LFRAME, target, a1, a2, a3, a4| mov ARG1, a1; mov ARG2, a2; mov ARG3, a3; mov ARG4, a4| call target; .endmacro|.macro call_LFRAME, target, a1, a2, a3, a4, a5| mov ARG1, a1; mov ARG2, a2; mov ARG3, a3; mov ARG4, a4; mov ARG5, a5| call target; .endmacro|
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -