latency.cpp

来自「获得多核硬件所有的相关信息。汇编源代码。Visual Studio2003、20」· C++ 代码 · 共 403 行

CPP
403
字号
//---------------------------------------------------------------------------
//
// Latency.cpp: CLatencyFunc Class Implementation
//
//---------------------------------------------------------------------------

#include "StdAfx.h"
#include "Latency.h"

HRESULT __fastcall CLatencyFunc::Initialize()
{
    lpMemBlock = ::VirtualAlloc(NULL, 20480, MEM_RESERVE, PAGE_EXECUTE_READWRITE);
    lpMemBlock = ::VirtualAlloc(lpMemBlock, 20480, MEM_COMMIT, PAGE_EXECUTE_READWRITE);

    if (! lpMemBlock)
    {
        return HRESULT_FROM_WIN32(::GetLastError());
    }

    ReadLatency0 = (void (__cdecl *)(void*, DWORD))lpMemBlock;
    ReadLatency1 = (void (__cdecl *)(void*, DWORD))(DWORD(lpMemBlock) + 4096);
    ReadLatency2 = (void (__cdecl *)(void*, DWORD))(DWORD(lpMemBlock) + 8192);
    ReadLatency3 = (void (__cdecl *)(void*, DWORD))(DWORD(lpMemBlock) + 12288);
    TestNop1 = (void (__cdecl *)())(DWORD(lpMemBlock) + 16384);

    GenerateTestNopCode1();

    return S_OK;
}

HRESULT __fastcall CLatencyFunc::Terminate()
{
    if (lpMemBlock)
    {
        ::VirtualFree(lpMemBlock, 20480, MEM_DECOMMIT);
        ::VirtualFree(lpMemBlock, 0, MEM_RELEASE);

        lpMemBlock = NULL;

        ReadLatency0 = NULL;
        ReadLatency1 = NULL;
        ReadLatency2 = NULL;
        ReadLatency3 = NULL;
        TestNop1 = NULL;
    }

    return S_OK;
}

void __fastcall CLatencyFunc::GenerateCode0(DWORD NopCount)
{
    // Generating code block
    BYTE* cb = (BYTE*)ReadLatency0;
    if (! cb) return;

    // Code prologue
    (DWORD&)cb[0] = 0x0424448B;    // mov eax, [esp+4]
    (DWORD&)cb[4] = 0x08244C8B;    // mov ecx, [esp+8]
    (DWORD&)cb[8] = 0xD2319053;    // push ebx; nop; xor edx, edx
    (DWORD&)cb[12] = 0xF68BDB31;   // xor ebx, ebx; mov esi, esi

    // Main latency code
    DWORD off;
    int joff;

    (WORD&)cb[16] = 0x008B;                     // mov eax, [eax]

    DWORD i;
    for (i = 0; i < NopCount; ++i)
    {
        (WORD&)cb[18 + (i << 1)] = 0xD009;      // or eax, edx
    }

    off = 18 + (NopCount << 1);
    (WORD&)cb[off] = 0x008B;                    // mov eax, [eax]

    off += 2;
    for (i = 0; i < NopCount; ++i)
    {
        (WORD&)cb[off + (i << 1)] = 0xD009;     // or eax, edx
    }

    off = 20 + (NopCount << 2);

    // jump offset = 9 + (NopCount + 1) * 4
    joff = -(int(NopCount << 2) + 13);

    // Conditional jump-code
    cb[off] = 0x83;
    cb[off + 1] = 0xE9;
    cb[off + 2] = 0x02;             // sub ecx, 2
    cb[off + 3] = 0x0F;
    cb[off + 4] = 0x85;
    cb[off + 5] = BYTE(joff & 0x000000FF);
    cb[off + 6] = BYTE((joff & 0x0000FF00) >> 8);
    cb[off + 7] = BYTE((joff & 0x00FF0000) >> 16);
    cb[off + 8] = BYTE((joff & 0xFF000000) >> 24);  // jnz -jump_offset

    // Code epilogue
    cb[off + 9] = 0x5B;             // pop ebx
    cb[off + 10] = 0xC3;            // ret
    cb[off + 11] = 0x90;            // nop
}

void __fastcall CLatencyFunc::GenerateCode1(DWORD NopCount)
{
    // Generating code block
    BYTE* cb = (BYTE*)ReadLatency1;
    if (! cb) return;

    // Code prologue
    (DWORD&)cb[0] = 0x0424448B;     // mov eax, [esp+4]
    (DWORD&)cb[4] = 0x08244C8B;     // mov ecx, [esp+8]
    (DWORD&)cb[8] = 0xD2319053;     // push ebx; nop; xor edx, edx
    (DWORD&)cb[12] = 0xF68BDB31;    // xor ebx, ebx; mov esi, esi

    // Main latency code
    // Fixed NOPs
    DWORD i;
    for (i = 0; i < FIXED_NOP_COUNT; ++i)
    {
        (WORD&)cb[16 + (i << 1)] = 0xD301;      // add ebx, edx
    }

    cb[16 + (FIXED_NOP_COUNT << 1)] = 0x8B;
    cb[17 + (FIXED_NOP_COUNT << 1)] = 0x04;
    cb[18 + (FIXED_NOP_COUNT << 1)] = 0x03;     // mov eax, [eax+ebx]

    // Variable NOPs
    for (i = 0; i < NopCount; ++i)
    {
        cb[19 + (FIXED_NOP_COUNT << 1) + (i << 1)] = 0x01;
        cb[20 + (FIXED_NOP_COUNT << 1) + (i << 1)] = 0xD3;  // add ebx, ebx
    }
    DWORD off = 19 + (FIXED_NOP_COUNT << 1) + (NopCount << 1);

    // Dependency
    cb[off] = 0x21;
    cb[off + 1] = 0xC3;                         // and ebx, eax

    // jump offset = 9 + 3 + 2 + FIXED_NOP_COUNT * 2 + NopCount * 2
    int joff = -(int((FIXED_NOP_COUNT << 1) + (NopCount << 1)) + 14);

    // Conditional jump-code
    cb[off + 2] = 0x83;
    cb[off + 3] = 0xE9;
    cb[off + 4] = 0x01;                         // sub ecx, 1
    cb[off + 5] = 0x0F;
    cb[off + 6] = 0x85;
    cb[off + 7] = BYTE(joff & 0x000000FF);
    cb[off + 8] = BYTE((joff & 0x0000FF00) >> 8);
    cb[off + 9] = BYTE((joff & 0x00FF0000) >> 16);
    cb[off + 10] = BYTE((joff & 0xFF000000) >> 24); // jnz -jump_offset

    // Code epilogue
    cb[off + 11] = 0x5B;            // pop ebx
    cb[off + 12] = 0xC3;            // ret
}

void __fastcall CLatencyFunc::GenerateCode2(DWORD NopCount, DWORD SyncNopCount)
{
    // Generating code block
    BYTE* cb = (BYTE*)ReadLatency2;
    if (! cb) return;

    // Code prologue
    (DWORD&)cb[0] = 0x0424448B;     // mov eax, [esp+4]
    (DWORD&)cb[4] = 0x08244C8B;     // mov ecx, [esp+8]
    (DWORD&)cb[8] = 0xD2319053;     // push ebx; nop; xor edx, edx
    (DWORD&)cb[12] = 0xF68BDB31;    // xor ebx, ebx; mov esi, esi

    // Main latency code
    (WORD&)cb[16] = 0x008B;                     // mov eax, [eax]

    DWORD i;
    for (i = 0; i < SyncNopCount; ++i)
    {
        (WORD&)cb[18 + (i << 1)] = 0xD809;      // or eax, ebx
    }

    DWORD off = 18 + (SyncNopCount << 1);
    (WORD&)cb[off] = 0x008B;                    // mov eax, [eax]
    off += 2;
    for (i = 0; i < NopCount; ++i)
    {
        (WORD&)cb[off + (i << 1)] = 0xD009;     // or eax, edx
    }
    off = 20 + (SyncNopCount << 1) + (NopCount << 1);

    // jump offset = 9 + (NopCount + 1) * 2 + (SyncNopCount + 1) * 2
    int joff = -(int((SyncNopCount << 1) + (NopCount << 1)) + 13);

    // Conditional jump-code
    cb[off] = 0x83;
    cb[off + 1] = 0xE9;
    cb[off + 2] = 0x01;             // sub ecx, 1
    cb[off + 3] = 0x0F;
    cb[off + 4] = 0x85;
    cb[off + 5] = BYTE(joff & 0x000000FF);
    cb[off + 6] = BYTE((joff & 0x0000FF00) >> 8);
    cb[off + 7] = BYTE((joff & 0x00FF0000) >> 16);
    cb[off + 8] = BYTE((joff & 0xFF000000) >> 24);  // jnz -jump_offset

    // Code epilogue
    cb[off + 9] = 0x5B;             // pop ebx
    cb[off + 10] = 0xC3;            // ret
    cb[off + 11] = 0x90;            // nop
}

void __fastcall CLatencyFunc::GenerateCode3(DWORD NopCount)
{
    // Generating code block
    BYTE* cb = (BYTE*)ReadLatency3;
    if (! cb) return;

    // Code prologue
    (DWORD&)cb[0] = 0x0424448B;    // mov eax, [esp+4]
    (DWORD&)cb[4] = 0x08244C8B;    // mov ecx, [esp+8]
    (DWORD&)cb[8] = 0xD2319053;    // push ebx; nop; xor edx, edx
    (DWORD&)cb[12] = 0xF68BDB31;   // xor ebx, ebx; mov esi, esi

    // Main latency code
    (WORD&)cb[16] = 0x008B;                     // mov eax, [eax]

    DWORD off = 18;

    DWORD i;
    for (i = 0; i < NopCount; ++i)
    {
        cb[off + i] = 0x90;                     // nop
    }

    off += NopCount;

    cb[off] = 0x8B;
    cb[off + 1] = 0x00;                         // mov eax, [eax]

    off += 2;

    for (i = 0; i < NopCount; ++i)
    {
        cb[off + i] = 0x90;                     // nop
    }

    off += NopCount;

    // jump offset = 9 + 4 + NopCount * 2
    int joff = -(int(NopCount << 1) + 13);

    // Conditional jump-code
    cb[off] = 0x83;
    cb[off + 1] = 0xE9;
    cb[off + 2] = 0x02;             // sub ecx, 2
    cb[off + 3] = 0x0F;
    cb[off + 4] = 0x85;
    cb[off + 5] = BYTE(joff & 0x000000FF);
    cb[off + 6] = BYTE((joff & 0x0000FF00) >> 8);
    cb[off + 7] = BYTE((joff & 0x00FF0000) >> 16);
    cb[off + 8] = BYTE((joff & 0xFF000000) >> 24);  // jnz -jump_offset

    // Code epilogue
    cb[off + 9] = 0x5B;             // pop ebx
    cb[off + 10] = 0xC3;            // ret
}

void __fastcall CLatencyFunc::GenerateTestNopCode1()
{
    // Generating code block
    BYTE* cb = (BYTE*)TestNop1;
    if (! cb) return;

    // Counter setup
    (DWORD&)cb[0] = 0x080000B9;     // mov ecx, 0x00080000
    (DWORD&)cb[4] = 0x8DD23100;     // xor edx, edx
    (DWORD&)cb[8] = 0x00001D1C;     // lea ebx, [ebx*1+0x00000000]
    (DWORD&)cb[12] = 0xF68B0000;    // mov esi, esi

    // Fixed NOPs
    for (DWORD i = 0; i < FIXED_NOP_COUNT; ++i)
    {
        (WORD&)cb[16 + (i << 1)] = 0xD001;      // add eax, edx
    }

    // Dependency
    DWORD off = 16 + (FIXED_NOP_COUNT << 1);
    (WORD&)cb[off] = 0xD021;        // and eax, edx
    off += 2;

    // jump offset = 9 + 2 + FIXED_NOP_COUNT * 2
    int joff = -(int(FIXED_NOP_COUNT << 1) + 11);

    // Conditional jump-code
    cb[off] = 0x83;
    cb[off + 1] = 0xE9;
    cb[off + 2] = 0x01;                         // sub ecx, 1
    cb[off + 3] = 0x0F;
    cb[off + 4] = 0x85;
    cb[off + 5] = BYTE(joff & 0x000000FF);
    cb[off + 6] = BYTE((joff & 0x0000FF00) >> 8);
    cb[off + 7] = BYTE((joff & 0x00FF0000) >> 16);
    cb[off + 8] = BYTE((joff & 0xFF000000) >> 24);  // jnz -jump_offset

    // Code epilogue
    cb[off + 9] = 0xC3;     // ret
    cb[off + 10] = 0x90;    // nop
    cb[off + 11] = 0x90;    // nop
}

void __declspec(naked) __cdecl CLatencyFunc::ReadTLB(void* src, DWORD count)
{
    __asm
    {
        mov         eax, [esp+8]    // first parameter is 'this'
        mov         ecx, [esp+12]
        ALIGN       16
$loop:
        mov         eax, [eax]
        mov         eax, [eax]
        sub         ecx, 2
        jnz         $loop
        ret
    }
}

void __declspec(naked) __cdecl CLatencyFunc::TestNop0()
{
    __asm
    {
        xor         eax, eax
        xor         edx, edx
        mov         ecx, 0x10000000
        ALIGN       16
$loop:
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        or          eax, edx
        sub         ecx, 64
        jnz         $loop
        ret
    }
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?