📄 ljit_x86.dasc
字号:
/*** Bytecode to machine code translation for x86 CPUs.** Copyright (C) 2005-2008 Mike Pall. See Copyright Notice in luajit.h*/|// Include common definitions and macros.|.include ljit_x86.dash||// Place actionlist and globals here at the top of the file.|.actionlist jit_actionlist|.globals JSUB_/* ------------------------------------------------------------------------ *//* Arch string. */const char luaJIT_arch[] = "x86";/* Forward declarations for C functions called from jsubs. */static void jit_hookins(lua_State *L, const Instruction *newpc);static void jit_gettable_fb(lua_State *L, Table *t, StkId dest);static void jit_settable_fb(lua_State *L, Table *t, StkId val);/* ------------------------------------------------------------------------ *//* Detect CPU features and set JIT flags. */static int jit_cpudetect(jit_State *J){ void *mcode; size_t sz; int status; /* Some of the jsubs need the flags. So compile this separately. */ unsigned int feature; dasm_setup(Dst, jit_actionlist); | // Check for CPUID support first. | pushfd | pop edx | mov ecx, edx | xor edx, 0x00200000 // Toggle ID bit in flags. | push edx | popfd | pushfd | pop edx | xor eax, eax // Zero means no features supported. | cmp ecx, edx | jz >1 // No ID toggle means no CPUID support. | | inc eax // CPUID function 1. | push ebx // Callee-save ebx modified by CPUID. | cpuid | pop ebx | mov eax, edx // Return feature support bits. |1: | ret (void)dasm_checkstep(Dst, DASM_SECTION_CODE); status = luaJIT_link(J, &mcode, &sz); if (status != JIT_S_OK) return status; /* Check feature bits. See the Intel/AMD manuals for the bit definitions. */ feature = ((unsigned int (*)(void))mcode)(); if (feature & (1<<15)) J->flags |= JIT_F_CPU_CMOV; if (feature & (1<<26)) J->flags |= JIT_F_CPU_SSE2; luaJIT_freemcode(J, mcode, sz); /* We don't need this code anymore. */ return JIT_S_OK;}/* Check some assumptions. Should compile to nop. */static int jit_consistency_check(jit_State *J){ do { /* Force a compiler error for inconsistent structure sizes. */ /* Check LUA_TVALUE_ALIGN in luaconf.h, too. */ ||int check_TVALUE_SIZE_in_ljit_x86_dash[1+TVALUE_SIZE-sizeof(TValue)]; ||int check_TVALUE_SIZE_in_ljit_x86_dash_[1+sizeof(TValue)-TVALUE_SIZE]; ((void)check_TVALUE_SIZE_in_ljit_x86_dash[0]); ((void)check_TVALUE_SIZE_in_ljit_x86_dash_[0]); if (LUA_TNIL != 0 || LUA_TBOOLEAN != 1 || PCRLUA != 0) break; if ((int)&(((Node *)0)->i_val) != (int)&(((StkId)0)->value)) break; return JIT_S_OK; } while (0); J->dasmstatus = 999999999; /* Recognizable error. */ return JIT_S_COMPILER_ERROR;}/* Compile JIT subroutines (once). */static int jit_compile_jsub(jit_State *J){ int status = jit_consistency_check(J); if (status != JIT_S_OK) return status; status = jit_cpudetect(J); if (status != JIT_S_OK) return status; dasm_setup(Dst, jit_actionlist); |// Macros to reorder and combine JIT subroutine definitions. |.macro .jsub, name |.capture JSUB // Add the entry point. ||//----------------------------------------------------------------------- ||//->name: | .align 16 |->name: |.endmacro |.macro .endjsub; .endcapture; .endmacro |.macro .dumpjsub; .dumpcapture JSUB; .endmacro | |.code |//----------------------------------------------------------------------- | .align 16 | // Must be the first JSUB defined or used. |->STACKPTR: // Get stack pointer (for jit.util.*). | lea eax, [esp+aword*1] // But adjust for the return address. | ret | |//----------------------------------------------------------------------- | .align 16 |->GATE_LJ: // Lua -> JIT gate. (L, func, nresults) | push ebp | mov ebp, esp | sub esp, LJFRAME_OFFSET | mov SAVER1, BASE | mov BASE, CARG2 // func | mov CARG2, L // Arg used as savereg. Avoids aword*8 stack frame. | mov L, CARG1 // L | mov SAVER2, TOP | mov TOP, L->top | mov LCL, BASE->value | mov CI, L->ci | // Prevent stackless yields. No limit check -- this is not a real C call. | inc word L->nCcalls // short | | call aword LCL->jit_gate // Call the compiled code. | | mov CI, L->ci | mov L->top, TOP // Only correct for LUA_MULTRET. | mov edx, CI->savedpc | mov eax, CARG3 // nresults | mov L->savedpc, edx // L->savedpc = CI->savedpc | mov edx, CI->base | test eax, eax | mov L->base, edx // L->base = CI->base | js >2 // Skip for nresults == LUA_MULTRET. | | TValuemul eax | add BASE, eax | xor ecx, ecx | mov L->top, BASE // L->top = &func[nresults] |1: // No initial check. May use EXTRA_STACK (once). | mov TOP->tt, ecx // Clear unset stack slots. | add TOP, #TOP | cmp TOP, BASE | jb <1 | |2: | dec word L->nCcalls // short | mov eax, PCRC | mov TOP, SAVER2 | mov BASE, SAVER1 | mov L, CARG2 | mov esp, ebp | pop ebp | ret | |//----------------------------------------------------------------------- | .align 16 |->GATE_JL: // JIT -> Lua callgate. | mov PROTO:edx, LCL->p | cmp dword PROTO:edx->jit_status, JIT_S_OK | jne >1 // Already compiled? | | // Yes, copy callgate to closure (so GATE_JL is not called again). | mov edx, PROTO:edx->jit_mcode | mov LCL->jit_gate, edx | jmp edx // Chain to compiled code. | |1: // Let luaD_precall do the hard work: compile & run or fallback. | sub esp, FRAME_OFFSET | mov eax, CI->savedpc | mov L->ci, CI // May not be in sync for tailcalls. | mov L->top, TOP | mov ARG3, -1 // LUA_MULTRET | mov L->savedpc, eax // luaD_precall expects it there. | mov ARG2, BASE | sub BASE, L->stack // Preserve old BASE (= func). | mov ARG1, L | call &luaD_precall // luaD_precall(L, func, nresults) | test eax,eax // Assumes: PCRLUA == 0 | jnz >2 // PCRC? PCRYIELD cannot happen. | | // Returned PCRLUA: need to call the bytecode interpreter. | call &luaV_execute, L, 1 | // Indirect yield (L->status == LUA_YIELD) cannot happen. | |2: // Returned PCRC: compile & run done. Frame is already unwound. | add esp, FRAME_OFFSET | add BASE, L->stack // Restore stack-relative pointers BASE and TOP. | mov TOP, L->top | ret | |//----------------------------------------------------------------------- | .align 16 |->GATE_JC: // JIT -> C callgate. | lea eax, TOP[LUA_MINSTACK] | sub esp, FRAME_OFFSET | cmp eax, L->stack_last | jae ->GROW_STACK // Stack overflow? | cmp CI, L->end_ci | lea CI, CI[1] | je ->GROW_CI // CI overflow? | mov L->ci, CI | mov CI->func, BASE | mov CI->top, eax | mov CCLOSURE:edx, BASE->value | add BASE, #BASE | mov L->top, TOP | mov L->base, BASE | mov CI->base, BASE | // ci->nresults is not set because we don't use luaD_poscall(). | |->GATE_JC_PATCH: // Patch mark for jmp to GATE_JC_DEBUG. | | call aword CCLOSURE:edx->f, L // Call the C function. | |2: // Label used below! | add esp, FRAME_OFFSET | mov CI, L->ci | TValuemul eax // eax = nresults*sizeof(TValue) | mov TOP, CI->func | jz >4 // Skip loop if nresults == 0. | // Yield (-1) cannot happen. | mov BASE, L->top | mov edx, BASE | sub BASE, eax // BASE = &L->top[-nresults] |3: // Relocate [L->top-nresults, L->top) -> [ci->func, ci->func+nresults) | mov eax, [BASE] | add BASE, aword*1 | mov [TOP], eax | add TOP, aword*1 | cmp BASE, edx | jb <3 | |4: | mov BASE, CI->func | sub CI, #CI | mov L->ci, CI | ret | |//----------------------------------------------------------------------- | nop; nop; nop; nop; nop; nop // Save area. See DEBUGPATCH_SIZE. | .align 16 |->GATE_JC_DEBUG: // JIT -> C callgate for debugging. | test byte L->hookmask, LUA_MASKCALL // Need to call hook? | jnz >7 |6: | call aword CCLOSURE:edx->f, L // Call the C function. | | test byte L->hookmask, LUA_MASKRET // Need to call hook? | jz <2 | | // Return hook. TODO: LUA_HOOKTAILRET is not called since tailcalls == 0. | mov BASE, eax // BASE (ebx) is callee-save. | call &luaD_callhook, L, LUA_HOOKRET, -1 | mov eax, BASE | jmp <2 | |7: // Call hook. | mov BASE, CCLOSURE:edx // BASE (ebx) is callee-save. | call &luaD_callhook, L, LUA_HOOKCALL, -1 | mov CCLOSURE:edx, BASE | jmp <6 | |//----------------------------------------------------------------------- | .align 16 |->GROW_STACK: // Grow stack. Jump from/to prologue. | sub eax, TOP | TValuediv eax // eax = (eax-TOP)/sizeof(TValue). | mov L->top, TOP | sub BASE, L->stack | mov ARG3, CI | call &luaD_growstack, L, eax | mov CI, ARG3 // CI may not be in sync with L->ci. | add BASE, L->stack // Restore stack-relative pointers. | mov TOP, L->top | mov LCL, BASE->value | add esp, FRAME_OFFSET // Undo esp adjust of prologue/GATE_JC. | jmp aword LCL->jit_gate // Retry prologue. | |//----------------------------------------------------------------------- | .align 16 |->GROW_CI: // Grow CI. Jump from/to prologue. | mov L->top, TOP // May throw LUA_ERRMEM, so save TOP. | call &luaD_growCI, L | lea CI, CINFO:eax[-1] // Undo ci++ (L->ci reset in prologue). | mov LCL, BASE->value | mov L->ci, CI | add esp, FRAME_OFFSET // Undo esp adjust of prologue/GATE_JC. | jmp aword LCL->jit_gate // Retry prologue. | |//----------------------------------------------------------------------- |.dumpjsub // Dump all captured .jsub's. | |// Uncritical jsubs follow. No need to align them. |//----------------------------------------------------------------------- |->DEOPTIMIZE_CALLER: // Deoptimize calling instruction. | pop edx | jmp ->DEOPTIMIZE | |->DEOPTIMIZE_OPEN: // Deoptimize open instruction. | mov L->top, TOP // Save TOP. | |->DEOPTIMIZE: // Deoptimize instruction. | mov L->savedpc, edx // &J->nextins expected in edx. | call &luaJIT_deoptimize, L | mov BASE, L->base | mov TOP, L->top // Restore TOP for open ins. | jmp eax // Continue with new mcode addr. | | .align 16 |//----------------------------------------------------------------------- (void)dasm_checkstep(Dst, DASM_SECTION_CODE); status = luaJIT_link(J, &J->jsubmcode, &J->szjsubmcode); if (status != JIT_S_OK) return status; /* Copy the callgates from the globals to the global state. */ G(J->L)->jit_gateLJ = (luaJIT_GateLJ)J->jsub[JSUB_GATE_LJ]; G(J->L)->jit_gateJL = (lua_CFunction)J->jsub[JSUB_GATE_JL]; G(J->L)->jit_gateJC = (lua_CFunction)J->jsub[JSUB_GATE_JC]; return JIT_S_OK;}/* Match with number of nops above. Avoid confusing the instruction decoder. */#define DEBUGPATCH_SIZE 6/* Notify backend that the debug mode may have changed. */void luaJIT_debugnotify(jit_State *J){ unsigned char *patch = (unsigned char *)J->jsub[JSUB_GATE_JC_PATCH]; unsigned char *target = (unsigned char *)J->jsub[JSUB_GATE_JC_DEBUG]; /* Yep, this is self-modifying code -- don't tell anyone. */ if (patch[0] == 0xe9) { /* Debug patch is active. */ if (!(J->flags & JIT_F_DEBUG_CALL)) /* Deactivate it. */ memcpy(patch, target-DEBUGPATCH_SIZE, DEBUGPATCH_SIZE); } else { /* Debug patch is inactive. */ if (J->flags & JIT_F_DEBUG_CALL) { /* Activate it. */ int rel = target-(patch+5); memcpy(target-DEBUGPATCH_SIZE, patch, DEBUGPATCH_SIZE); patch[0] = 0xe9; /* jmp */ memcpy(patch+1, &rel, 4); /* Relative address. */ memset(patch+5, 0x90, DEBUGPATCH_SIZE-5); /* nop */ } }}/* Patch a jmp into existing mcode. */static void jit_patch_jmp(jit_State *J, void *mcode, void *to){ unsigned char *patch = (unsigned char *)mcode; int rel = ((unsigned char *)to)-(patch+5); patch[0] = 0xe9; /* jmp */ memcpy((void *)(patch+1), &rel, 4); /* Relative addr. */}/* ------------------------------------------------------------------------ *//* Call line/count hook. */static void jit_hookins(lua_State *L, const Instruction *newpc){ Proto *pt = ci_func(L->ci)->l.p; int pc = luaJIT_findpc(pt, newpc); /* Sloooow with mcode addrs. */ const Instruction *savedpc = L->savedpc; L->savedpc = pt->code + pc + 1; if (L->hookmask > LUA_MASKLINE && L->hookcount == 0) { resethookcount(L); luaD_callhook(L, LUA_HOOKCOUNT, -1); } if (L->hookmask & LUA_MASKLINE) { int newline = getline(pt, pc); if (pc != 0) { int oldpc = luaJIT_findpc(pt, savedpc); if (!(pc <= oldpc || newline != getline(pt, oldpc))) return; } luaD_callhook(L, LUA_HOOKLINE, newline); }}/* Insert hook check for each instruction in full debug mode. */static void jit_ins_debug(jit_State *J, int openop){ if (openop) { | mov L->top, TOP } |// TODO: Passing bytecode addrs would speed this up (but use more space). | call ->HOOKINS |.jsub HOOKINS | test byte L->hookmask, LUA_MASKLINE|LUA_MASKCOUNT | jz >2 | dec dword L->hookcount | jz >1 | test byte L->hookmask, LUA_MASKLINE | jz >2 |1: | mov eax, [esp] // Current machine code address. | sub esp, FRAME_OFFSET | call &jit_hookins, L, eax | add esp, FRAME_OFFSET | mov BASE, L->base // Restore stack-relative pointers. | mov TOP, L->top |2: | ret |.endjsub}/* Called before every instruction. */static void jit_ins_start(jit_State *J){ |// Always emit PC labels, even for dead code (but not for combined JMP). |=>J->nextpc:}/* Chain to another instruction. */static void jit_ins_chainto(jit_State *J, int pc){ | jmp =>pc}/* Set PC label. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -