⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 latency.cpp

📁 获得多核硬件所有的相关信息。汇编源代码。Visual Studio2003、2005环境皆可。速度快。
💻 CPP
字号:
//---------------------------------------------------------------------------
//
// Latency.cpp: CLatencyFunc Class Implementation
//
//---------------------------------------------------------------------------

#include "StdAfx.h"
#include "Latency.h"

HRESULT __fastcall CLatencyFunc::Initialize()
{
    lpMemBlock = ::VirtualAlloc(NULL, 20480, MEM_RESERVE, PAGE_EXECUTE_READWRITE);
    lpMemBlock = ::VirtualAlloc(lpMemBlock, 20480, MEM_COMMIT, PAGE_EXECUTE_READWRITE);

    if (! lpMemBlock)
    {
        return HRESULT_FROM_WIN32(::GetLastError());
    }

    ReadLatency0 = (void (__cdecl *)(void*, DWORD))lpMemBlock;
    ReadLatency1 = (void (__cdecl *)(void*, DWORD))(DWORD(lpMemBlock) + 4096);
    ReadLatency2 = (void (__cdecl *)(void*, DWORD))(DWORD(lpMemBlock) + 8192);
    ReadLatency3 = (void (__cdecl *)(void*, DWORD))(DWORD(lpMemBlock) + 12288);
    TestNop1 = (void (__cdecl *)())(DWORD(lpMemBlock) + 16384);

    GenerateTestNopCode1();

    return S_OK;
}

HRESULT __fastcall CLatencyFunc::Terminate()
{
    if (lpMemBlock)
    {
        ::VirtualFree(lpMemBlock, 20480, MEM_DECOMMIT);
        ::VirtualFree(lpMemBlock, 0, MEM_RELEASE);

        lpMemBlock = NULL;

        ReadLatency0 = NULL;
        ReadLatency1 = NULL;
        ReadLatency2 = NULL;
        ReadLatency3 = NULL;
        TestNop1 = NULL;
    }

    return S_OK;
}

void __fastcall CLatencyFunc::GenerateCode0(DWORD NopCount)
{
    // Generating code block
    BYTE* cb = (BYTE*)ReadLatency0;
    if (! cb) return;

    // Code prologue
    (DWORD&)cb[0] = 0x0424448B;    // mov eax, [esp+4]
    (DWORD&)cb[4] = 0x08244C8B;    // mov ecx, [esp+8]
    (DWORD&)cb[8] = 0xD2319053;    // push ebx; nop; xor edx, edx
    (DWORD&)cb[12] = 0xF68BDB31;   // xor ebx, ebx; mov esi, esi

    // Main latency code
    DWORD off;
    int joff;

    (WORD&)cb[16] = 0x008B;                     // mov eax, [eax]

    DWORD i;
    for (i = 0; i < NopCount; ++i)
    {
        (WORD&)cb[18 + (i << 1)] = 0xD009;      // or eax, edx
    }

    off = 18 + (NopCount << 1);
    (WORD&)cb[off] = 0x008B;                    // mov eax, [eax]

    off += 2;
    for (i = 0; i < NopCount; ++i)
    {
        (WORD&)cb[off + (i << 1)] = 0xD009;     // or eax, edx
    }

    off = 20 + (NopCount << 2);

    // jump offset = 9 + (NopCount + 1) * 4
    joff = -(int(NopCount << 2) + 13);

    // Conditional jump-code
    cb[off] = 0x83;
    cb[off + 1] = 0xE9;
    cb[off + 2] = 0x02;             // sub ecx, 2
    cb[off + 3] = 0x0F;
    cb[off + 4] = 0x85;
    cb[off + 5] = BYTE(joff & 0x000000FF);
    cb[off + 6] = BYTE((joff & 0x0000FF00) >> 8);
    cb[off + 7] = BYTE((joff & 0x00FF0000) >> 16);
    cb[off + 8] = BYTE((joff & 0xFF000000) >> 24);  // jnz -jump_offset

    // Code epilogue
    cb[off + 9] = 0x5B;             // pop ebx
    cb[off + 10] = 0xC3;            // ret
    cb[off + 11] = 0x90;            // nop
}

void __fastcall CLatencyFunc::GenerateCode1(DWORD NopCount)
{
    // Generating code block
    BYTE* cb = (BYTE*)ReadLatency1;
    if (! cb) return;

    // Code prologue
    (DWORD&)cb[0] = 0x0424448B;     // mov eax, [esp+4]
    (DWORD&)cb[4] = 0x08244C8B;     // mov ecx, [esp+8]
    (DWORD&)cb[8] = 0xD2319053;     // push ebx; nop; xor edx, edx
    (DWORD&)cb[12] = 0xF68BDB31;    // xor ebx, ebx; mov esi, esi

    // Main latency code
    // Fixed NOPs
    DWORD i;
    for (i = 0; i < FIXED_NOP_COUNT; ++i)
    {
        (WORD&)cb[16 + (i << 1)] = 0xD301;      // add ebx, edx
    }

    cb[16 + (FIXED_NOP_COUNT << 1)] = 0x8B;
    cb[17 + (FIXED_NOP_COUNT << 1)] = 0x04;
    cb[18 + (FIXED_NOP_COUNT << 1)] = 0x03;     // mov eax, [eax+ebx]

    // Variable NOPs
    for (i = 0; i < NopCount; ++i)
    {
        cb[19 + (FIXED_NOP_COUNT << 1) + (i << 1)] = 0x01;
        cb[20 + (FIXED_NOP_COUNT << 1) + (i << 1)] = 0xD3;  // add ebx, ebx
    }
    DWORD off = 19 + (FIXED_NOP_COUNT << 1) + (NopCount << 1);

    // Dependency
    cb[off] = 0x21;
    cb[off + 1] = 0xC3;                         // and ebx, eax

    // jump offset = 9 + 3 + 2 + FIXED_NOP_COUNT * 2 + NopCount * 2
    int joff = -(int((FIXED_NOP_COUNT << 1) + (NopCount << 1)) + 14);

    // Conditional jump-code
    cb[off + 2] = 0x83;
    cb[off + 3] = 0xE9;
    cb[off + 4] = 0x01;                         // sub ecx, 1
    cb[off + 5] = 0x0F;
    cb[off + 6] = 0x85;
    cb[off + 7] = BYTE(joff & 0x000000FF);
    cb[off + 8] = BYTE((joff & 0x0000FF00) >> 8);
    cb[off + 9] = BYTE((joff & 0x00FF0000) >> 16);
    cb[off + 10] = BYTE((joff & 0xFF000000) >> 24); // jnz -jump_offset

    // Code epilogue
    cb[off + 11] = 0x5B;            // pop ebx
    cb[off + 12] = 0xC3;            // ret
}

void __fastcall CLatencyFunc::GenerateCode2(DWORD NopCount, DWORD SyncNopCount)
{
    // Generating code block
    BYTE* cb = (BYTE*)ReadLatency2;
    if (! cb) return;

    // Code prologue
    (DWORD&)cb[0] = 0x0424448B;     // mov eax, [esp+4]
    (DWORD&)cb[4] = 0x08244C8B;     // mov ecx, [esp+8]
    (DWORD&)cb[8] = 0xD2319053;     // push ebx; nop; xor edx, edx
    (DWORD&)cb[12] = 0xF68BDB31;    // xor ebx, ebx; mov esi, esi

    // Main latency code
    (WORD&)cb[16] = 0x008B;                     // mov eax, [eax]

    DWORD i;
    for (i = 0; i < SyncNopCount; ++i)
    {
        (WORD&)cb[18 + (i << 1)] = 0xD809;      // or eax, ebx
    }

    DWORD off = 18 + (SyncNopCount << 1);
    (WORD&)cb[off] = 0x008B;                    // mov eax, [eax]
    off += 2;
    for (i = 0; i < NopCount; ++i)
    {
        (WORD&)cb[off + (i << 1)] = 0xD009;     // or eax, edx
    }
    off = 20 + (SyncNopCount << 1) + (NopCount << 1);

    // jump offset = 9 + (NopCount + 1) * 2 + (SyncNopCount + 1) * 2
    int joff = -(int((SyncNopCount << 1) + (NopCount << 1)) + 13);

    // Conditional jump-code
    cb[off] = 0x83;
    cb[off + 1] = 0xE9;
    cb[off + 2] = 0x01;             // sub ecx, 1
    cb[off + 3] = 0x0F;
    cb[off + 4] = 0x85;
    cb[off + 5] = BYTE(joff & 0x000000FF);
    cb[off + 6] = BYTE((joff & 0x0000FF00) >> 8);
    cb[off + 7] = BYTE((joff & 0x00FF0000) >> 16);
    cb[off + 8] = BYTE((joff & 0xFF000000) >> 24);  // jnz -jump_offset

    // Code epilogue
    cb[off + 9] = 0x5B;             // pop ebx
    cb[off + 10] = 0xC3;            // ret
    cb[off + 11] = 0x90;            // nop
}

void __fastcall CLatencyFunc::GenerateCode3(DWORD NopCount)
{
    // Generating code block
    BYTE* cb = (BYTE*)ReadLatency3;
    if (! cb) return;

    // Code prologue
    (DWORD&)cb[0] = 0x0424448B;    // mov eax, [esp+4]
    (DWORD&)cb[4] = 0x08244C8B;    // mov ecx, [esp+8]
    (DWORD&)cb[8] = 0xD2319053;    // push ebx; nop; xor edx, edx
    (DWORD&)cb[12] = 0xF68BDB31;   // xor ebx, ebx; mov esi, esi

    // Main latency code
    (WORD&)cb[16] = 0x008B;                     // mov eax, [eax]

    DWORD off = 18;

    DWORD i;
    for (i = 0; i < NopCount; ++i)
    {
        cb[off + i] = 0x90;                     // nop
    }

    off += NopCount;

    cb[off] = 0x8B;
    cb[off + 1] = 0x00;                         // mov eax, [eax]

    off += 2;

    for (i = 0; i < NopCount; ++i)
    {
        cb[off + i] = 0x90;                     // nop
    }

    off += NopCount;

    // jump offset = 9 + 4 + NopCount * 2
    int joff = -(int(NopCount << 1) + 13);

    // Conditional jump-code
    cb[off] = 0x83;
    cb[off + 1] = 0xE9;
    cb[off + 2] = 0x02;             // sub ecx, 2
    cb[off + 3] = 0x0F;
    cb[off + 4] = 0x85;
    cb[off + 5] = BYTE(joff & 0x000000FF);
    cb[off + 6] = BYTE((joff & 0x0000FF00) >> 8);
    cb[off + 7] = BYTE((joff & 0x00FF0000) >> 16);
    cb[off + 8] = BYTE((joff & 0xFF000000) >> 24);  // jnz -jump_offset

    // Code epilogue
    cb[off + 9] = 0x5B;             // pop ebx
    cb[off + 10] = 0xC3;            // ret
}

void __fastcall CLatencyFunc::GenerateTestNopCode1()
{
    // Generating code block
    BYTE* cb = (BYTE*)TestNop1;
    if (! cb) return;

    // Counter setup
    (DWORD&)cb[0] = 0x080000B9;     // mov ecx, 0x00080000
    (DWORD&)cb[4] = 0x8DD23100;     // xor edx, edx
    (DWORD&)cb[8] = 0x00001D1C;     // lea ebx, [ebx*1+0x00000000]
    (DWORD&)cb[12] = 0xF68B0000;    // mov esi, esi

    // Fixed NOPs
    for (DWORD i = 0; i < FIXED_NOP_COUNT; ++i)
    {
        (WORD&)cb[16 + (i << 1)] = 0xD001;      // add eax, edx
    }

    // Dependency
    DWORD off = 16 + (FIXED_NOP_COUNT << 1);
    (WORD&)cb[off] = 0xD021;        // and eax, edx
    off += 2;

    // jump offset = 9 + 2 + FIXED_NOP_COUNT * 2
    int joff = -(int(FIXED_NOP_COUNT << 1) + 11);

    // Conditional jump-code
    cb[off] = 0x83;
    cb[off + 1] = 0xE9;
    cb[off + 2] = 0x01;                         // sub ecx, 1
    cb[off + 3] = 0x0F;
    cb[off + 4] = 0x85;
    cb[off + 5] = BYTE(joff & 0x000000FF);
    cb[off + 6] = BYTE((joff & 0x0000FF00) >> 8);
    cb[off + 7] = BYTE((joff & 0x00FF0000) >> 16);
    cb[off + 8] = BYTE((joff & 0xFF000000) >> 24);  // jnz -jump_offset

    // Code epilogue
    cb[off + 9] = 0xC3;     // ret
    cb[off + 10] = 0x90;    // nop
    cb[off + 11] = 0x90;    // nop
}

void __declspec(naked) __cdecl CLatencyFunc::ReadTLB(void* src, DWORD count)
{
    __asm
    {
        mov         eax, [esp+8]    // first parameter is 'this'
        mov         ecx, [esp+12]
        ALIGN       16
$loop:
        mov         eax, [eax]
        mov         eax, [eax]
        sub         ecx, 2
        jnz         $loop
        ret
    }
}

void __declspec(naked) __cdecl CLatencyFunc::TestNop0()
{
    __asm
    {
        xor         eax, eax
        xor         edx, edx
        mov         ecx, 0x10000000
        ALIGN       16
$loop:
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        sub         ecx, 64
        jnz         $loop
        ret
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -