📄 latency.cpp
字号:
//---------------------------------------------------------------------------
//
// Latency.cpp: CLatencyFunc Class Implementation
//
//---------------------------------------------------------------------------
#include "StdAfx.h"
#include "Latency.h"
HRESULT __fastcall CLatencyFunc::Initialize()
{
lpMemBlock = ::VirtualAlloc(NULL, 20480, MEM_RESERVE, PAGE_EXECUTE_READWRITE);
lpMemBlock = ::VirtualAlloc(lpMemBlock, 20480, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
if (! lpMemBlock)
{
return HRESULT_FROM_WIN32(::GetLastError());
}
ReadLatency0 = (void (__cdecl *)(void*, DWORD))lpMemBlock;
ReadLatency1 = (void (__cdecl *)(void*, DWORD))(DWORD(lpMemBlock) + 4096);
ReadLatency2 = (void (__cdecl *)(void*, DWORD))(DWORD(lpMemBlock) + 8192);
ReadLatency3 = (void (__cdecl *)(void*, DWORD))(DWORD(lpMemBlock) + 12288);
TestNop1 = (void (__cdecl *)())(DWORD(lpMemBlock) + 16384);
GenerateTestNopCode1();
return S_OK;
}
HRESULT __fastcall CLatencyFunc::Terminate()
{
if (lpMemBlock)
{
::VirtualFree(lpMemBlock, 20480, MEM_DECOMMIT);
::VirtualFree(lpMemBlock, 0, MEM_RELEASE);
lpMemBlock = NULL;
ReadLatency0 = NULL;
ReadLatency1 = NULL;
ReadLatency2 = NULL;
ReadLatency3 = NULL;
TestNop1 = NULL;
}
return S_OK;
}
void __fastcall CLatencyFunc::GenerateCode0(DWORD NopCount)
{
// Generating code block
BYTE* cb = (BYTE*)ReadLatency0;
if (! cb) return;
// Code prologue
(DWORD&)cb[0] = 0x0424448B; // mov eax, [esp+4]
(DWORD&)cb[4] = 0x08244C8B; // mov ecx, [esp+8]
(DWORD&)cb[8] = 0xD2319053; // push ebx; nop; xor edx, edx
(DWORD&)cb[12] = 0xF68BDB31; // xor ebx, ebx; mov esi, esi
// Main latency code
DWORD off;
int joff;
(WORD&)cb[16] = 0x008B; // mov eax, [eax]
DWORD i;
for (i = 0; i < NopCount; ++i)
{
(WORD&)cb[18 + (i << 1)] = 0xD009; // or eax, edx
}
off = 18 + (NopCount << 1);
(WORD&)cb[off] = 0x008B; // mov eax, [eax]
off += 2;
for (i = 0; i < NopCount; ++i)
{
(WORD&)cb[off + (i << 1)] = 0xD009; // or eax, edx
}
off = 20 + (NopCount << 2);
// jump offset = 9 + (NopCount + 1) * 4
joff = -(int(NopCount << 2) + 13);
// Conditional jump-code
cb[off] = 0x83;
cb[off + 1] = 0xE9;
cb[off + 2] = 0x02; // sub ecx, 2
cb[off + 3] = 0x0F;
cb[off + 4] = 0x85;
cb[off + 5] = BYTE(joff & 0x000000FF);
cb[off + 6] = BYTE((joff & 0x0000FF00) >> 8);
cb[off + 7] = BYTE((joff & 0x00FF0000) >> 16);
cb[off + 8] = BYTE((joff & 0xFF000000) >> 24); // jnz -jump_offset
// Code epilogue
cb[off + 9] = 0x5B; // pop ebx
cb[off + 10] = 0xC3; // ret
cb[off + 11] = 0x90; // nop
}
void __fastcall CLatencyFunc::GenerateCode1(DWORD NopCount)
{
// Generating code block
BYTE* cb = (BYTE*)ReadLatency1;
if (! cb) return;
// Code prologue
(DWORD&)cb[0] = 0x0424448B; // mov eax, [esp+4]
(DWORD&)cb[4] = 0x08244C8B; // mov ecx, [esp+8]
(DWORD&)cb[8] = 0xD2319053; // push ebx; nop; xor edx, edx
(DWORD&)cb[12] = 0xF68BDB31; // xor ebx, ebx; mov esi, esi
// Main latency code
// Fixed NOPs
DWORD i;
for (i = 0; i < FIXED_NOP_COUNT; ++i)
{
(WORD&)cb[16 + (i << 1)] = 0xD301; // add ebx, edx
}
cb[16 + (FIXED_NOP_COUNT << 1)] = 0x8B;
cb[17 + (FIXED_NOP_COUNT << 1)] = 0x04;
cb[18 + (FIXED_NOP_COUNT << 1)] = 0x03; // mov eax, [eax+ebx]
// Variable NOPs
for (i = 0; i < NopCount; ++i)
{
cb[19 + (FIXED_NOP_COUNT << 1) + (i << 1)] = 0x01;
cb[20 + (FIXED_NOP_COUNT << 1) + (i << 1)] = 0xD3; // add ebx, ebx
}
DWORD off = 19 + (FIXED_NOP_COUNT << 1) + (NopCount << 1);
// Dependency
cb[off] = 0x21;
cb[off + 1] = 0xC3; // and ebx, eax
// jump offset = 9 + 3 + 2 + FIXED_NOP_COUNT * 2 + NopCount * 2
int joff = -(int((FIXED_NOP_COUNT << 1) + (NopCount << 1)) + 14);
// Conditional jump-code
cb[off + 2] = 0x83;
cb[off + 3] = 0xE9;
cb[off + 4] = 0x01; // sub ecx, 1
cb[off + 5] = 0x0F;
cb[off + 6] = 0x85;
cb[off + 7] = BYTE(joff & 0x000000FF);
cb[off + 8] = BYTE((joff & 0x0000FF00) >> 8);
cb[off + 9] = BYTE((joff & 0x00FF0000) >> 16);
cb[off + 10] = BYTE((joff & 0xFF000000) >> 24); // jnz -jump_offset
// Code epilogue
cb[off + 11] = 0x5B; // pop ebx
cb[off + 12] = 0xC3; // ret
}
void __fastcall CLatencyFunc::GenerateCode2(DWORD NopCount, DWORD SyncNopCount)
{
// Generating code block
BYTE* cb = (BYTE*)ReadLatency2;
if (! cb) return;
// Code prologue
(DWORD&)cb[0] = 0x0424448B; // mov eax, [esp+4]
(DWORD&)cb[4] = 0x08244C8B; // mov ecx, [esp+8]
(DWORD&)cb[8] = 0xD2319053; // push ebx; nop; xor edx, edx
(DWORD&)cb[12] = 0xF68BDB31; // xor ebx, ebx; mov esi, esi
// Main latency code
(WORD&)cb[16] = 0x008B; // mov eax, [eax]
DWORD i;
for (i = 0; i < SyncNopCount; ++i)
{
(WORD&)cb[18 + (i << 1)] = 0xD809; // or eax, ebx
}
DWORD off = 18 + (SyncNopCount << 1);
(WORD&)cb[off] = 0x008B; // mov eax, [eax]
off += 2;
for (i = 0; i < NopCount; ++i)
{
(WORD&)cb[off + (i << 1)] = 0xD009; // or eax, edx
}
off = 20 + (SyncNopCount << 1) + (NopCount << 1);
// jump offset = 9 + (NopCount + 1) * 2 + (SyncNopCount + 1) * 2
int joff = -(int((SyncNopCount << 1) + (NopCount << 1)) + 13);
// Conditional jump-code
cb[off] = 0x83;
cb[off + 1] = 0xE9;
cb[off + 2] = 0x01; // sub ecx, 1
cb[off + 3] = 0x0F;
cb[off + 4] = 0x85;
cb[off + 5] = BYTE(joff & 0x000000FF);
cb[off + 6] = BYTE((joff & 0x0000FF00) >> 8);
cb[off + 7] = BYTE((joff & 0x00FF0000) >> 16);
cb[off + 8] = BYTE((joff & 0xFF000000) >> 24); // jnz -jump_offset
// Code epilogue
cb[off + 9] = 0x5B; // pop ebx
cb[off + 10] = 0xC3; // ret
cb[off + 11] = 0x90; // nop
}
void __fastcall CLatencyFunc::GenerateCode3(DWORD NopCount)
{
// Generating code block
BYTE* cb = (BYTE*)ReadLatency3;
if (! cb) return;
// Code prologue
(DWORD&)cb[0] = 0x0424448B; // mov eax, [esp+4]
(DWORD&)cb[4] = 0x08244C8B; // mov ecx, [esp+8]
(DWORD&)cb[8] = 0xD2319053; // push ebx; nop; xor edx, edx
(DWORD&)cb[12] = 0xF68BDB31; // xor ebx, ebx; mov esi, esi
// Main latency code
(WORD&)cb[16] = 0x008B; // mov eax, [eax]
DWORD off = 18;
DWORD i;
for (i = 0; i < NopCount; ++i)
{
cb[off + i] = 0x90; // nop
}
off += NopCount;
cb[off] = 0x8B;
cb[off + 1] = 0x00; // mov eax, [eax]
off += 2;
for (i = 0; i < NopCount; ++i)
{
cb[off + i] = 0x90; // nop
}
off += NopCount;
// jump offset = 9 + 4 + NopCount * 2
int joff = -(int(NopCount << 1) + 13);
// Conditional jump-code
cb[off] = 0x83;
cb[off + 1] = 0xE9;
cb[off + 2] = 0x02; // sub ecx, 2
cb[off + 3] = 0x0F;
cb[off + 4] = 0x85;
cb[off + 5] = BYTE(joff & 0x000000FF);
cb[off + 6] = BYTE((joff & 0x0000FF00) >> 8);
cb[off + 7] = BYTE((joff & 0x00FF0000) >> 16);
cb[off + 8] = BYTE((joff & 0xFF000000) >> 24); // jnz -jump_offset
// Code epilogue
cb[off + 9] = 0x5B; // pop ebx
cb[off + 10] = 0xC3; // ret
}
void __fastcall CLatencyFunc::GenerateTestNopCode1()
{
// Generating code block
BYTE* cb = (BYTE*)TestNop1;
if (! cb) return;
// Counter setup
(DWORD&)cb[0] = 0x080000B9; // mov ecx, 0x00080000
(DWORD&)cb[4] = 0x8DD23100; // xor edx, edx
(DWORD&)cb[8] = 0x00001D1C; // lea ebx, [ebx*1+0x00000000]
(DWORD&)cb[12] = 0xF68B0000; // mov esi, esi
// Fixed NOPs
for (DWORD i = 0; i < FIXED_NOP_COUNT; ++i)
{
(WORD&)cb[16 + (i << 1)] = 0xD001; // add eax, edx
}
// Dependency
DWORD off = 16 + (FIXED_NOP_COUNT << 1);
(WORD&)cb[off] = 0xD021; // and eax, edx
off += 2;
// jump offset = 9 + 2 + FIXED_NOP_COUNT * 2
int joff = -(int(FIXED_NOP_COUNT << 1) + 11);
// Conditional jump-code
cb[off] = 0x83;
cb[off + 1] = 0xE9;
cb[off + 2] = 0x01; // sub ecx, 1
cb[off + 3] = 0x0F;
cb[off + 4] = 0x85;
cb[off + 5] = BYTE(joff & 0x000000FF);
cb[off + 6] = BYTE((joff & 0x0000FF00) >> 8);
cb[off + 7] = BYTE((joff & 0x00FF0000) >> 16);
cb[off + 8] = BYTE((joff & 0xFF000000) >> 24); // jnz -jump_offset
// Code epilogue
cb[off + 9] = 0xC3; // ret
cb[off + 10] = 0x90; // nop
cb[off + 11] = 0x90; // nop
}
void __declspec(naked) __cdecl CLatencyFunc::ReadTLB(void* src, DWORD count)
{
__asm
{
mov eax, [esp+8] // first parameter is 'this'
mov ecx, [esp+12]
ALIGN 16
$loop:
mov eax, [eax]
mov eax, [eax]
sub ecx, 2
jnz $loop
ret
}
}
void __declspec(naked) __cdecl CLatencyFunc::TestNop0()
{
__asm
{
xor eax, eax
xor edx, edx
mov ecx, 0x10000000
ALIGN 16
$loop:
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
or eax, edx
sub ecx, 64
jnz $loop
ret
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -