📄 memorytestimpl.cpp
字号:
ReadCacheLineFwd = &ReadCacheLineFwd256;
WriteCacheLineFwd = &WriteCacheLineFwd256;
ReadCacheLineBwd = &ReadCacheLineBwd256;
WriteCacheLineBwd = &WriteCacheLineBwd256;
break;
case 9:
ReadCacheLineFwd = &ReadCacheLineFwd512;
WriteCacheLineFwd = &WriteCacheLineFwd512;
ReadCacheLineBwd = &ReadCacheLineBwd512;
WriteCacheLineBwd = &WriteCacheLineBwd512;
break;
}
}
void __fastcall CMemoryTest::MeasureNopLatency(LATENCYMEASUREMENT lm)
{
__int64 c0, c1, c2, c3, c4;
if (lm == LM_METHOD_1)
{
c0 = ReadTSC();
LatencyFunc.TestNop0();
c1 = ReadTSC();
LatencyFunc.TestNop0();
c2 = ReadTSC();
LatencyFunc.TestNop0();
c3 = ReadTSC();
LatencyFunc.TestNop0();
c4 = ReadTSC();
}
else
{
c0 = ReadTSC();
LatencyFunc.TestNop1();
c1 = ReadTSC();
LatencyFunc.TestNop1();
c2 = ReadTSC();
LatencyFunc.TestNop1();
c3 = ReadTSC();
LatencyFunc.TestNop1();
c4 = ReadTSC();
}
c4 -= c3;
c3 -= c2;
c2 -= c1;
c1 -= c0;
FIND_MIN(c1, c2, c3, c4);
c1 -= SerializeLatency;
if (lm == LM_METHOD_1)
{
NopLatency = double(c1) / 268435456.0;
}
else
{
NopLatency = double(c1) / (524288.0 * float(FIXED_NOP_COUNT + 1));
}
}
void __fastcall CMemoryTest::CleanUp()
{
// Cleaning up
UINT i;
for (i = 0; i < MAX_TESTS; ++i)
{
NumPoints[i] = -1;
}
MinSize = 0;
MaxSize = 0;
NumMajorPoints = 0;
MinIncrementUnit = 0;
yMax = 0.1f;
for (i = 0; i < MAX_MAJOR_POINTS; ++i)
{
xMainVal[i] = 0;
yMainVal[i] = 0;
}
for (i = 0; i < MAX_POINTS; ++i)
{
sizeVal[i] = 0;
}
UINT j;
for (j = 0; j < MAX_TESTS; ++j)
{
for (i = 0; i < MAX_POINTS; ++i)
{
GraphData[j][i].Size = 0;
GraphData[j][i].xVal = 0;
GraphData[j][i].Value = 0.0f;
}
}
// Invalidate NopLatency
NopLatency = 0.0;
IsWrapAround = false;
IsUpdateNeeded = true;
IsTerminatePending = false;
}
HRESULT __stdcall CMemoryTest::MeasureCacheLineSize(DWORD dwLevel)
{
// @WARNING: CMemoryTest object must be initialized
if (dwLevel > 1)
{
return E_INVALIDARG;
}
DWORD blockSize = (dwLevel == 0) ? POST_L1_REGION : POST_L2_REGION;
// Preparing the Cache Line Size determination test
TESTDATA td =
{
TT_DATA_CACHE_ARRIVAL,
ST_RANDOM,
VP_SECOND_DWORD_OFFSET,
(1 << 26), // data set size = 64M
9, 9, // LogStrideSize[2]
blockSize, blockSize, // block size[2]
// Memory Bandwidth tests
RT_64_BIT_MMX, // unused
PREFETCHTYPE(LM_METHOD_1), // unused
FALSE, // unused
FALSE, // unused
0, 0, // unused
0, 0, // unused
// Memory Latency tests
1, 1, // SegmentsCount[2]
64, 64, // NopCount[2]
16, 16, // SyncNopCount[2]
0, 0, // FirstOffset[2]
4, 272, // SecondOffset[2]
// Decode Bandwidth test
IT_NOP_1BYTE, // unused
// Misc
FALSE,
FALSE,
0, 0,
0
};
SetData(&td);
MEASUREMENTDATA md =
{
TM_DCACHE_ARRIVAL_RANDOM, // test type
td.BlockSize[0], // block size
DWORD(float(td.DataSetSize) / float(td.BlockSize[0])), // repeat count
td.LogStrideSize[0], // stride size
td.NopCount[0], // NOP count
td.SyncNopCount[0], // SyncNOP count
td.SegmentsCount[0], // Segments count, unused
0, // unused
0, // unused
td.FirstOffset[0], // First DWORD offset = 0
td.SecondOffset[0] // Second DWORD offset, variable
};
// This is faster, as NopLatency isn't actually needed
NopLatency = 0.0;
HRESULT hr = S_OK;
// Probing for 16 bytes
md.SecondOffset = 12;
if (FAILED(hr = Measure(md))) return hr;
md.SecondOffset = 16;
if (FAILED(hr = Measure(md))) return hr;
md.SecondOffset = 20;
if (FAILED(hr = Measure(md))) return hr;
// Probing for 32 bytes
md.SecondOffset = 28;
if (FAILED(hr = Measure(md))) return hr;
md.SecondOffset = 32;
if (FAILED(hr = Measure(md))) return hr;
md.SecondOffset = 36;
if (FAILED(hr = Measure(md))) return hr;
// Probing for 64 bytes
md.SecondOffset = 60;
if (FAILED(hr = Measure(md))) return hr;
md.SecondOffset = 64;
if (FAILED(hr = Measure(md))) return hr;
md.SecondOffset = 68;
if (FAILED(hr = Measure(md))) return hr;
// Probing for 128 bytes
md.SecondOffset = 124;
if (FAILED(hr = Measure(md))) return hr;
md.SecondOffset = 128;
if (FAILED(hr = Measure(md))) return hr;
md.SecondOffset = 132;
if (FAILED(hr = Measure(md))) return hr;
// Probing for 256 bytes
md.SecondOffset = 252;
if (FAILED(hr = Measure(md))) return hr;
md.SecondOffset = 256;
if (FAILED(hr = Measure(md))) return hr;
md.SecondOffset = 260;
if (FAILED(hr = Measure(md))) return hr;
TESTRESULTS tr;
GetResults(&tr, DWORD(TM_DCACHE_ARRIVAL_RANDOM));
// Finding the cache line size
DWORD CLSize = 0;
float dfmax = 0.0f;
for (int j = 1; j < tr.NumPoints; ++j)
{
float df = tr.GraphData[j].Value - tr.GraphData[j - 1].Value;
if ((df > dfmax) && ((tr.GraphData[j + 1].Value - df) > 0.0f))
{
dfmax = df;
CLSize = tr.GraphData[j].Size;
}
}
if (CLSize != 0)
{
LogCLSize[DWORD(dwLevel)] = DWORD(roundf(log2f(float(CLSize))));
return S_OK;
}
else
{
return E_FAIL;
}
}
HRESULT __stdcall CMemoryTest::Initialize(MEMORYALLOCATION memAlloc, char* pError)
{
// Clean up
CleanUp();
// Get the CPU description
sprintf(CpuDesc, "%s (%s) %.1lf MHz", CpuModel, CpuCore, CpuFrequency);
// Get the RAM description
char szCL[4], szRCD[4], szRP[4], szRAS[4];
if (! MemoryTimings[0])
{
strcpy(szCL, "N/A");
}
else
{
sprintf(szCL, "%.1f", float(MemoryTimings[0]) * 0.1f);
}
if (! MemoryTimings[1])
{
strcpy(szRCD, "N/A");
}
else
{
sprintf(szRCD, "%u", MemoryTimings[1]);
}
if (! MemoryTimings[2])
{
strcpy(szRP, "N/A");
}
else
{
sprintf(szRP, "%u", MemoryTimings[2]);
}
if (! MemoryTimings[3])
{
strcpy(szRAS, "N/A");
}
else
{
sprintf(szRAS, "%u", MemoryTimings[3]);
}
sprintf(RamDesc, "%s (%s-%s-%s-%s) %.1lf MHz", RamType, szCL, szRCD, szRP, szRAS, MemoryFrequency);
// Fill the system information structure
::GetSystemInfo(&sSysInfo);
// @WARNING:
if (sSysInfo.dwPageSize != DEF_PAGE_SIZE)
{
if (pError) sprintf(pError, "Unsupported page size %u.", sSysInfo.dwPageSize);
return E_FAIL;
}
// Initialize CLatencyFunc object
if FAILED(LatencyFunc.Initialize())
{
return E_FAIL;
}
// TRICKY: on a multi-processor systems, make sure that the memory block
// is always allocated by CPU0. This is needed for a proper operation on
// a NUMA platforms.
if (sSysInfo.dwNumberOfProcessors > 1)
{
if (! ::SetThreadAffinityMask(::GetCurrentThread(), 1))
{
return HRESULT_FROM_WIN32(::GetLastError());
}
}
__try {
switch (memAlloc)
{
case MA_STANDARD:
{
// Allocate memory
lpMemReserved = malloc(MAX_MEMORY_RESERVED_SIZE);
if (! lpMemReserved)
{
if (pError) strcpy(pError, "malloc() failed.");
return E_OUTOFMEMORY;
}
// main aligned pointer to the allocated memory
ptr[0] = (DWORD *)((DWORD(lpMemReserved) + 4092) & DWORD(-4096L));
break;
}
case MA_VIRTUALLOCK:
{
// Get the process working set size
if (! ::GetProcessWorkingSetSize(::GetCurrentProcess(),
&dwMinimumWorkingSetSize,
&dwMaximumWorkingSetSize))
{
DWORD err = ::GetLastError();
if (pError) sprintf(pError, "GetProcessWorkingSetSize() failed, error %u.", err);
return HRESULT_FROM_WIN32(err);
}
// Set the process working set size
if (! ::SetProcessWorkingSetSize(::GetCurrentProcess(),
MAX_MEMORY_BLOCK_SIZE + 1048576,
MAX_MEMORY_BLOCK_SIZE + 2097152))
{
DWORD err = ::GetLastError();
if (pError) sprintf(pError, "SetProcessWorkingSetSize() failed, error %u.", err);
return HRESULT_FROM_WIN32(err);
}
lpMemReserved = ::VirtualAlloc(NULL,
MAX_MEMORY_RESERVED_SIZE,
MEM_RESERVE,
PAGE_EXECUTE_READWRITE);
if (! lpMemReserved)
{
DWORD err = ::GetLastError();
if (pError) sprintf(pError, "VirtualAlloc() failed to reserve memory, error %u.", err);
return HRESULT_FROM_WIN32(err);
}
lpMemReserved = ::VirtualAlloc(lpMemReserved,
MAX_MEMORY_RESERVED_SIZE,
MEM_COMMIT,
PAGE_EXECUTE_READWRITE);
if (! lpMemReserved)
{
DWORD err = ::GetLastError();
if (pError) sprintf(pError, "VirtualAlloc() failed to commit memory, error %u.", err);
return HRESULT_FROM_WIN32(err);
}
// main aligned pointer to the allocated memory
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -