📄 ljit_x86.dasc
字号:
static void jit_ins_setpc(jit_State *J, int pc, void *target){ |.label =>pc, &target}/* Called after the last instruction has been encoded. */static void jit_ins_last(jit_State *J, int lastpc, int sizemfm){ if (J->tflags & JIT_TF_USED_DEOPT) { /* Deopt section has been used? */ |.deopt | jmp ->DEOPTIMIZE // Yes, need to add final jmp. |.code } |=>lastpc+1: // Extra label at the end of .code. |.tail |=>lastpc+2: // And at the end of .deopt/.tail. | .align word // Keep next section word aligned. | .word 0xffff // Terminate mfm with JIT_MFM_STOP. |.mfmap | // <-- Deoptimization hints are inserted here. | .space sizemfm // To be filled in with inverse mfm. | .aword 0, 0 // Next mcode block pointer and size. | // The previous two awords are only word, but not aword aligned. | // Copying them is easier than aligning them and adjusting mfm handling. |.code}/* Add a deoptimize target for the current instruction. */static void jit_deopt_target(jit_State *J, int nargs){ |.define L_DEOPTLABEL, 9 // Local deopt label. |.define L_DEOPTIMIZE, <9 // Local deopt target. Use after call. |.define L_DEOPTIMIZEF, >9 // Local deopt target. Use before call. if (nargs != -1) { |// Alas, x86 doesn't have conditional calls. So branch to the .deopt |// section to load J->nextins and jump to JSUB_DEOPTIMIZE. |// Only a single jump is added at the end (if needed) and any |// intervening code sequences are shadowed (lea trick). |.deopt // Occupies 6 bytes in .deopt section. | .byte 0x8d // Shadow mov with lea edi, [edx+ofs]. |L_DEOPTLABEL: | mov edx, &J->nextins // Current instruction + 1. |.code J->tflags |= JIT_TF_USED_DEOPT; } else { |.tail // Occupies 10 bytes in .tail section. |L_DEOPTLABEL: | mov edx, &J->nextins | jmp ->DEOPTIMIZE_OPEN // Open ins need to save TOP, too. | // And TOP (edi) would be overwritten by the lea trick. | // So checking for open ops later on wouldn't suffice. Sigh. |.code }}/* luaC_checkGC() inlined. Destroys caller-saves + TOP (edi). Uses label 7:. *//* Use this only at the _end_ of an instruction. */static void jit_checkGC(jit_State *J){ | mov GL:ecx, L->l_G | mov eax, GL:ecx->totalbytes // size_t | mov TOP, >7 | cmp eax, GL:ecx->GCthreshold // size_t | jae ->GCSTEP |7: |.jsub GCSTEP | call &luaC_step, L | mov BASE, L->base | jmp TOP |.endjsub}/* ------------------------------------------------------------------------ */|// JIT->JIT calling conventions:|//|// Register/Type | Call Setup | Prologue | Epilogue | Call Finish|// ===========================================================================|// eax | LCL | = BASE->value| | * | *|// ecx | CI | = L->ci | L->ci = ++CI | * | *|// edx | * | * | * | * | *|// ---------------------------------------------------------------------------|// esi | L | | | ||// ebx | BASE | += f | ++ | -- | -= f|// edi | TOP | += f+1+nargs | = BASE+maxst | = f+nresults | = BASE+maxst|// ---------------------------------------------------------------------------|// L->base | | = BASE | | = BASE|// L->top | | = TOP | | = TOP|// L->ci | | ++, -> = ... | -- ||// L->ci->savedpc| = &code[pc] | [ L-> = ] | ||// ---------------------------------------------------------------------------|// args + vars | | setnil | ||// results | | | move | setnil|// ---------------------------------------------------------------------------|// Include support for function inlining.|.include ljit_x86_inline.dash#ifdef LUA_COMPAT_VARARGstatic void jit_vararg_table(lua_State *L){ Table *tab; StkId base, func; int i, num, numparams; luaC_checkGC(L); base = L->base; func = L->ci->func; numparams = clvalue(func)->l.p->numparams; num = base - func - numparams - 1; tab = luaH_new(L, num, 1); for (i = 0; i < num; i++) setobj2n(L, luaH_setnum(L, tab, i+1), base - num + i); setnvalue(luaH_setstr(L, tab, luaS_newliteral(L, "n")), (lua_Number)num); sethvalue(L, base + numparams, tab);}#endif/* Encode JIT function prologue. */static void jit_prologue(jit_State *J){ Proto *pt = J->pt; int numparams = pt->numparams; int stacksize = pt->maxstacksize; |// Note: the order of the following instructions has been carefully tuned. | lea eax, TOP[stacksize] | sub esp, FRAME_OFFSET | cmp eax, L->stack_last | jae ->GROW_STACK // Stack overflow? | // This is a slight overallocation (BASE[1+stacksize] would be enough). | // We duplicate luaD_precall() behaviour so we can use luaD_growstack(). | cmp CI, L->end_ci | lea CI, CI[1] | je ->GROW_CI // CI overflow? | xor eax, eax // Assumes: LUA_TNIL == 0 | mov CI->func, BASE | add BASE, #BASE | mov L->ci, CI if (numparams > 0) { | lea edx, BASE[numparams] | cmp TOP, edx // L->top >< L->base+numparams ? } if (!pt->is_vararg) { /* Fixarg function. */ /* Must cap L->top at L->base+numparams because 1st LOADNIL is omitted. */ if (numparams == 0) { | mov TOP, BASE } else if (J->flags & JIT_F_CPU_CMOV) { | cmova TOP, edx } else { | jna >1 | mov TOP, edx |1: } | lea edx, BASE[stacksize] // New ci->top. | mov CI->tailcalls, eax // 0 | mov CI->top, edx | mov L->top, edx | mov L->base, BASE | mov CI->base, BASE } else { /* Vararg function. */ int i; if (numparams > 0) { |// If some fixargs are missing we need to clear them and |// bump TOP to get a consistent frame layout for OP_VARARG. | jb >5 |4: |.tail |5: // This is uncommon. So move it to .tail and use a loop. | mov TOP->tt, eax | add TOP, #TOP | cmp TOP, edx | jb <5 | jmp <4 |.code } | mov L->base, TOP // New base is after last arg. | mov CI->base, TOP | mov CI->tailcalls, eax // 0 for (i = 0; i < numparams; i++) { /* Move/clear fixargs. */ |// Inline this. Vararg funcs usually have very few fixargs. | copyslot TOP[i], BASE[i], ecx, edx | mov BASE[i].tt, eax // Clear old fixarg slot (help the GC). } if (numparams > 0) { | mov CI, L->ci // Reload CI = ecx (used by move). } | mov BASE, TOP | lea edx, BASE[stacksize] // New ci->top. | lea TOP, BASE[numparams] // Start of vars to clear. | mov CI->top, edx | mov L->top, edx stacksize -= numparams; /* Fixargs are already cleared. */ } /* Clear undefined args and all vars. Still assumes eax = LUA_TNIL = 0. */ /* Note: cannot clear only args because L->top has grown. */ if (stacksize <= EXTRA_STACK) { /* Loopless clear. May use EXTRA_STACK. */ int i; for (i = 0; i < stacksize; i++) { | mov TOP[i].tt, eax } } else { /* Standard loop. */ |2: // Unrolled for 2 stack slots. No initial check. May use EXTRA_STACK. | mov TOP[0].tt, eax | mov TOP[1].tt, eax | add TOP, 2*#TOP | cmp TOP, edx | jb <2 |// Note: TOP is undefined now. TOP is only valid across calls/open ins. }#ifdef LUA_COMPAT_VARARG if (pt->is_vararg & VARARG_NEEDSARG) { | call &jit_vararg_table, L }#endif /* Call hook check. */ if (J->flags & JIT_F_DEBUG_CALL) { | test byte L->hookmask, LUA_MASKCALL | jz >9 | call ->HOOKCALL |9: |.jsub HOOKCALL | mov CI, L->ci | mov TOP, CI->func | mov LCL, TOP->value | mov PROTO:edi, LCL->p // clvalue(L->ci->func)->l.p | mov eax, PROTO:edi->code | add eax, 4 // Hooks expect incremented PC. | mov L->savedpc, eax | sub esp, FRAME_OFFSET | call &luaD_callhook, L, LUA_HOOKCALL, -1 | add esp, FRAME_OFFSET | mov eax, PROTO:edi->code // PROTO:edi is callee-save. | mov L->savedpc, eax // jit_hookins needs previous PC. | mov BASE, L->base | ret |.endjsub }}/* Check if we can combine 'return const'. */static int jit_return_k(jit_State *J){ if (!J->combine) return 0; /* COMBINE hint set? */ /* May need to close open upvalues. */ if (!fhint_isset(J, NOCLOSE)) { | call &luaF_close, L, BASE } if (!J->pt->is_vararg) { /* Fixarg function. */ | sub aword L->ci, #CI | mov TOP, BASE | sub BASE, #BASE | add esp, FRAME_OFFSET } else { /* Vararg function. */ | mov CI, L->ci | mov BASE, CI->func | sub CI, #CI | mov L->ci, CI | lea TOP, BASE[1] | add esp, FRAME_OFFSET } jit_assert(J->combine == 1); /* Required to skip next RETURN instruction. */ return 1;}static void jit_op_return(jit_State *J, int rbase, int nresults){ /* Return hook check. */ if (J->flags & JIT_F_DEBUG_CALL) { if (nresults < 0 && !(J->flags & JIT_F_DEBUG_INS)) { | mov L->top, TOP } |// TODO: LUA_HOOKTAILRET (+ ci->tailcalls counting) or changed debug API. | test byte L->hookmask, LUA_MASKRET | jz >7 | call ->HOOKRET |7: if (J->flags & JIT_F_DEBUG_INS) { | mov eax, FRAME_RETADDR | mov L->savedpc, eax } |.jsub HOOKRET | mov eax, [esp] // Current machine code address. | mov L->savedpc, eax | sub esp, FRAME_OFFSET | call &luaD_callhook, L, LUA_HOOKRET, -1 | add esp, FRAME_OFFSET | mov BASE, L->base // Restore stack-relative pointers. | mov TOP, L->top | ret |.endjsub } /* May need to close open upvalues. */ if (!fhint_isset(J, NOCLOSE)) { | call &luaF_close, L, BASE } /* Previous op was open: 'return f()' or 'return ...' */ if (nresults < 0) { |// Relocate [BASE+rbase, TOP) -> [ci->func, *). | mov CI, L->ci | addidx BASE, rbase | mov edx, CI->func | cmp BASE, TOP | jnb >2 |1: | mov eax, [BASE] | add BASE, aword*1 | mov [edx], eax | add edx, aword*1 | cmp BASE, TOP | jb <1 |2: | add esp, FRAME_OFFSET | mov BASE, CI->func | sub CI, #CI | mov TOP, edx // Relocated TOP. | mov L->ci, CI | ret return; } if (!J->pt->is_vararg) { /* Fixarg function, nresults >= 0. */ int i; | sub aword L->ci, #CI |// Relocate [BASE+rbase,BASE+rbase+nresults) -> [BASE-1, *). |// TODO: loop for large nresults? | sub BASE, #BASE for (i = 0; i < nresults; i++) { | copyslot BASE[i], BASE[rbase+i+1] } | add esp, FRAME_OFFSET | lea TOP, BASE[nresults] | ret } else { /* Vararg function, nresults >= 0. */ int i; |// Relocate [BASE+rbase,BASE+rbase+nresults) -> [ci->func, *). | mov CI, L->ci | mov TOP, CI->func | sub CI, #CI | mov L->ci, CI // CI = ecx is used by copyslot. for (i = 0; i < nresults; i++) { | copyslot TOP[i], BASE[rbase+i] } | add esp, FRAME_OFFSET | mov BASE, TOP | addidx TOP, nresults | ret }}static void jit_op_call(jit_State *J, int func, int nargs, int nresults){ int cltype = jit_inline_call(J, func, nargs, nresults); if (cltype < 0) return; /* Inlined? */ |// Note: the order of the following instructions has been carefully tuned. | addidx BASE, func | mov CI, L->ci | isfunction 0 // BASE[0] is L->base[func]. if (nargs >= 0) { /* Previous op was not open and did not set TOP. */ | lea TOP, BASE[1+nargs] } | mov LCL, BASE->value | mov edx, &J->nextins | mov CI->savedpc, edx if (cltype == LUA_TFUNCTION) { if (nargs == -1) { | jne ->DEOPTIMIZE_OPEN // TYPE hint was wrong (open op)? } else { | jne ->DEOPTIMIZE // TYPE hint was wrong? } } else { | je >1 // Skip __call handling for functions. | call ->METACALL |1: |.jsub METACALL // CALL to __call metamethod. | sub esp, FRAME_OFFSET | mov L->savedpc, edx // May throw errors. Save PC and TOP. | mov L->top, TOP | call &luaD_tryfuncTM, L, BASE // Resolve __call metamethod. | add esp, FRAME_OFFSET | mov BASE, eax // Restore stack-relative pointers. | mov TOP, L->top | mov LCL, BASE->value | mov CI, L->ci | ret |.endjsub } | call aword LCL->jit_gate // Call JIT func or GATE_JL/GATE_JC. | subidx BASE, func | mov L->base, BASE /* Clear undefined results TOP <= o < func+nresults. */ if (nresults > 0) { | xor eax, eax if (nresults <= EXTRA_STACK) { /* Loopless clear. May use EXTRA_STACK. */ int i; for (i = 0; i < nresults; i++) { | mov TOP[i].tt, eax } } else { /* Standard loop. TODO: move to .tail? */ | lea edx, BASE[func+nresults] |1: // Unrolled for 2 stack slots. No initial check. May use EXTRA_STACK. | mov TOP[0].tt, eax // LUA_TNIL | mov TOP[1].tt, eax // LUA_TNIL | add TOP, 2*#TOP | cmp TOP, edx | jb <1 } } if (nresults >= 0) { /* Not an open ins. Restore L->top. */ | lea TOP, BASE[J->pt->maxstacksize] // Faster than getting L->ci->top.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -