📄 hxfprofiler.c
字号:
/* ************************************************************************* *\
**
** INTEL Corporation Proprietary Information
**
** This listing is supplied under the terms of a license
** agreement with INTEL Corporation and may not be copied
** nor disclosed except in accordance with the terms of
** that agreement.
**
** Copyright (c) 2003 Intel Corporation.
** All Rights Reserved.
**
** ************************************************************************* **
** FILE: HXFProfile.c
** DESCRIPTION:
**
** AUTHOR: Cian Montgomery
** CREATED: November 30, 2003
**
* $Date: 7/19/04 5:13p $ $Revision: 6 $
* $Log: /Intel_Development/Drivers/Marathon/WinCE42/opengles/HXFProfiler.c $
*
* 6 7/19/04 5:13p Cmdoan
* changed ARM to not use delta calculations (always resets PMU), and
* added I64 to %u output for large numbers.
*
* 5 2/03/04 8:36p Clmontgo
* Slaveport Rewrite and partial Fix(HACK)
*
* 4 1/30/04 2:47p Clmontgo
* Fix calculations of TnL/Vertex calculation.
*
* 3 1/30/04 10:36a Clmontgo
*
* 2 1/26/04 6:46p Clmontgo
* Added Profiler, Converted Lighting procs to Fixed point, Worked on
* screen rotation code, Added wMMX debug Macros to Lighting.
*
* ************************************************************************* */
#include "HXFTypes.h"
#include "HXFLocal.h"
#include "HXFProfiler.h"
#include <windows.h>
#include <stdio.h>
#ifdef HXF_PROFILE // Entire file only included if Profiling is enabled
extern void HXFSetupPMU(const HUINT32*);
typedef enum _PMU_EVENT
{
PMU_EVENT_ICACHE_MISS = 0x0, // ICache Miss ->Fetch from external Mem
// Counts events
PMU_EVENT_ISTALL = 0x1, // Instruction Stall due to DCache
// miss or branch mispredict
// Counts Cycles
PMU_EVENT_DATASTALL = 0x2, // Stall due to data dependency
// Counts Cycles
PMU_EVENT_ITLB_MISS = 0x3, // Instruction TLB Miss
// Counts Events
PMU_EVENT_DTLB_MISS = 0x4, // Data TLB Miss
// Counts Events
PMU_EVENT_BRANCHES = 0x5, // Counts Number of Branches
PMU_EVENT_BRANCH_MISS = 0x6, // Branch Misprdict
// Counts Events
PMU_EVENT_INST_X = 0x7, // Instruction executed
PMU_EVENT_DBUFFERS_FULL_CYCLES = 0x8, // Stall due to full data cache buffers
// Counts Cycles
PMU_EVENT_DBUFFERS_FULL = 0x9, // Stall due to full data cache buffers
// Counts Events
PMU_EVENT_DCACHE_ACCESSES = 0xA, // Cache accesses.
// Counts Events
PMU_EVENT_DCACHE_MISSES = 0xB, // Count Number of Cache Misses.
// Counts Events
PMU_EVENT_DCACHE_WB = 0xC, // Number 4 Word fills to the cache.
PMU_EVENT_SW_PC_CHANGED = 0xD, // Count number of SE changes to the PC
// including mov, b, bl, blx, idm, ldr
PMU_EVENT_CYCLES = 0xE // FAKE entry to Name look up work
} PMU_EVENT;
// These are standard profiling setup that I have hard coded because I am lazy
typedef enum
{
HXF_EVENT_PROFILE_ICACHE_PERFORMANCE, // 1) HPMU_EVT_INST_X
// 2) HPMU_EVT_ICACHE_MISS
// 3) HPMU_EVT_ITLB_MISS
// 4) HPMU_EVT_ISTALL
HXF_EVENT_PROFILE_DCACHE_PERFORMANCE, // 1) HPMU_EVT_DCACHE_ACCESSES,
// 2) HPMU_EVT_DCACHE_MISSES,
// 3) HPMU_EVT_DBUFFERS_FULL
// 4) HPMU_EVT_DTLB_MISS
HXF_EVENT_PROFILE_CACHE_ACTIVITY, // 1) HPMU_EVT_DATA_CACHE_MISSES
// 2) HPMU_EVT_DATACACHE_WB
// 3) HPMU_EVT_ICACHE_MISS
// 4) HPMU_EVT_INST_X
HXF_EVENT_PROFILE_BRANCH_PERFORMANCE, // 1) HPMU_EVT_BRANCHES
// 2) HPMU_EVT_BRANCH_MISS
// 3) HPMU_EVT_SW_PC_CHANGED
// 4)HPMU_EVT_INST_X
HXF_EVENT_PROFILE_STALLS, // 1) HPMU_EVT_ISTALL
// 2) HPMU_EVT_DATASTALL
// 3) HPMU_EVT_BRANCHMISPRE
// 4) HPMU_EVT_DATABUFFERSFULL_CYCLES
HXF_EVENT_PROFILE_NUM_PROFILES
} HXF_EVENT_PROFILE;
static const PMU_EVENT s_PMUProfiles[HXF_EVENT_PROFILE_NUM_PROFILES][5] =
{
// HXF_EVENT_PROFILE_ICACHE_PERFORMANCE
{ PMU_EVENT_CYCLES,
PMU_EVENT_INST_X,
PMU_EVENT_ICACHE_MISS,
PMU_EVENT_ITLB_MISS,
PMU_EVENT_ISTALL
},
// HXF_EVENT_PROFILE_DCACHE_PERFORMANCE
{ PMU_EVENT_CYCLES,
PMU_EVENT_DCACHE_ACCESSES,
PMU_EVENT_DCACHE_MISSES,
PMU_EVENT_DBUFFERS_FULL,
PMU_EVENT_DTLB_MISS
},
// HXF_EVENT_PROFILE_CACHE_ACTIVITY
{ PMU_EVENT_CYCLES,
PMU_EVENT_DCACHE_MISSES,
PMU_EVENT_DCACHE_WB,
PMU_EVENT_ICACHE_MISS,
PMU_EVENT_INST_X
},
// HXF_EVENT_PROFILE_BRANCH_PERFORMANCE,
{ PMU_EVENT_CYCLES,
PMU_EVENT_BRANCHES,
PMU_EVENT_BRANCH_MISS,
PMU_EVENT_SW_PC_CHANGED,
PMU_EVENT_INST_X
},
// HXF_EVENT_PROFILE_STALLS
{ PMU_EVENT_CYCLES,
PMU_EVENT_ISTALL,
PMU_EVENT_DATASTALL,
PMU_EVENT_BRANCH_MISS,
PMU_EVENT_DBUFFERS_FULL_CYCLES
}
};
static const char* s_ProfileNames[] =
{
"ICache Performance",
"DCache Performance",
"Cache Activity",
"Branch Performance",
"Stalls",
};
static const char* s_EventNames[] =
{
"ICache Miss(events)",
"Instruction Stall(cycles)",
"Data Dependency Stall(cycles)",
"Instruction TLB Miss(events)",
"Data TLB Miss(events)",
"Branches(events)",
"Branch Misspredicts(events)",
"Instructions Executed(events)",
"Data Buffers Full(clocks)",
"Data Buffers Full(events)",
"DCache Access(events)",
"DCache Miss(events)",
"DCache Write(events)",
"Software PC Change(events)",
"Cycles"
};
static const char* s_BinNames[] =
{
// State
"Null",
"Fog",
"Matrix",
"Light",
"Vertex",
// Draw
"Setup",
"Prepare",
"Validate",
"TnL",
"Assemble",
"Clip",
"Slaveport",
};
#define PMU_NUMBER_COUNTERS 5 // Number of event counters on bulverde
typedef struct _Accumulator
{
HUINT64 Count;
HUINT64 Events[PMU_NUMBER_COUNTERS];
} Accumulator;
#define HXFPROFILER_EVENT_PROFILE HXF_EVENT_PROFILE_STALLS
static HBOOL s_bHXFProfile = HFALSE;
static HUINT32 s_NumContexts = 0;
static HUINT64 s_StartTime = 0;
static HUINT64 s_EndTime = 0;
Accumulator g_Accumulators[HXFPROFILE_NUM_BINS];
Accumulator g_AccumulatorStartValue;
HUINT32 g_CurrentAccumulator;
HUINT32 g_ProfileCounters[HXFPROFILE_NUM_COUNTERS] = { 0 };
/*
Return
*/
void HXFProfilerInitialize(void)
{
if(s_NumContexts == 0)
{
// Setup the PMU
HXFSetupPMU(s_PMUProfiles[HXFPROFILER_EVENT_PROFILE]);
QueryPerformanceCounter((LARGE_INTEGER*)&s_StartTime);
s_bHXFProfile = HTRUE;
}
s_NumContexts++;
}
typedef enum _RPT_TYPE
{
RT_NONE,
RT_COUNT,
RT_VERTS,
RT_TRIS,
NUM_RT
} RPT_TYPE;
static const char* s_RTNames[] =
{
"",
"Draw",
"Primitive",
"Vertex",
"SP Bytes",
"SP Polls",
"Count",
"BAD NUM COUNTERS"
};
static const HXFProfileCounter s_RptTypes[HXFPROFILE_NUM_BINS] =
{
HXFPROFILE_COUNTER_COUNT, // HXFPROFILE_STATE_NULL,
HXFPROFILE_COUNTER_COUNT, // HXFPROFILE_STATE_FOG,
HXFPROFILE_COUNTER_COUNT, // HXFPROFILE_STATE_MATRIX,
HXFPROFILE_COUNTER_COUNT, // HXFPROFILE_STATE_LIGHT,
HXFPROFILE_COUNTER_COUNT, // HXFPROFILE_STATE_VERTEX,
HXFPROFILE_COUNTER_COUNT, // HXFPROFILE_DRAW_SETUP,
HXFPROFILE_COUNTER_COUNT, // HXFPROFILE_DRAW_PREPARE,
HXFPROFILE_COUNTER_COUNT, // HXFPROFILE_DRAW_VALIDATE,
HXFPROFILE_COUNTER_NUM_VERTICES, // HXFPROFILE_DRAW_TNL,
HXFPROFILE_COUNTER_NUM_PRIMITIVES, // HXFPROFILE_DRAW_ASSEMBLE,
HXFPROFILE_COUNTER_COUNT, // HXFPROFILE_DRAW_CLIP,
HXFPROFILE_COUNTER_COUNT // HXFPROFILE_DRAW_SLAVEPORT,
};
void LogResults(void)
{
FILE* f = NULL;
float cpufreq = 0;
float ftotaltime = 0;
float RTVal[HXFPROFILE_NUM_COUNTERS];
HUINT32 i = 0;
f = fopen("\\release\\libglescl.csv", "wt+");
if(!f)
{
f = fopen("\\libglescl.csv", "wt+");
if(!f)
{
return;
}
}
for( i = 0; i < HXFPROFILE_NUM_COUNTERS; ++i)
{
RTVal[i] = (float)g_ProfileCounters[i];
}
{
float ffreq = 0;
HUINT64 freq = 0, et = 0;
QueryPerformanceFrequency((LARGE_INTEGER*)&freq);
ffreq = (float)freq;
et = s_EndTime - s_StartTime;
{
HUINT64 s = 0, e = 0;
HXFProfilerStart(HXFPROFILE_NULL);
QueryPerformanceCounter((LARGE_INTEGER*)&s);
e = s +freq;
while(s < e) // spin lock till 1 second elapses
{
QueryPerformanceCounter((LARGE_INTEGER*)&s);
}
HXFProfilerStop(HXFPROFILE_NULL);
}
cpufreq = (float)g_Accumulators[HXFPROFILE_NULL].Events[0];
ftotaltime = (et)/ffreq;
}
/* Dump out the data */
if(g_ProfileCounters[HXFPROFILE_COUNTER_NUM_DRAWS])
{
HUINT32 s = 0, e = 0, j = 0;
fprintf(f, "********************************************************************************\n");
// Write log header
fprintf(f, "%s Profile\n", s_ProfileNames[HXFPROFILER_EVENT_PROFILE]);
#ifdef DEBUG
fprintf(f, "Debug Build\n\n");
#endif // DEBUG
// Context summary
fprintf(f, "-------------------------------------------------------------------------------\n");
fprintf(f, "Context Summary\n");
fprintf(f, "-------------------------------------------------------------------------------\n\n");
fprintf(f, "CPU Frequency, %8fhz, %8fmhz\n\n", (cpufreq), (cpufreq/1000000.0));
fprintf(f, "Draw Commands, Primitives, Vertices\n");
fprintf(f, "%u, %u, %u\n\n", g_ProfileCounters[HXFPROFILE_COUNTER_NUM_DRAWS],
g_ProfileCounters[HXFPROFILE_COUNTER_NUM_PRIMITIVES],
g_ProfileCounters[HXFPROFILE_COUNTER_NUM_VERTICES]);
// State Report
s = HXFPROFILE_STATE_FOG;
// HXFPROFILE_STATE_MATRIX,
// HXFPROFILE_STATE_LIGHT;
e = HXFPROFILE_STATE_VERTEX;
// Print Headers
fprintf(f, "State Change Report\n");
for(i = s; i <= e; ++i)
{
fprintf(f, ", %s", s_BinNames[i]);
if(s_RptTypes[i] != RT_NONE)
{
fprintf(f, ", %s\\%s", s_BinNames[i], s_RTNames[s_RptTypes[i]]);
}
}
fprintf(f, "\n");
// Print Data Table
for(j = 0; j < PMU_NUMBER_COUNTERS; ++j)
{
float val = 0;
//label
fprintf(f, "%s", s_EventNames[s_PMUProfiles[HXFPROFILER_EVENT_PROFILE][j]]);
for(i = s; i <= e; ++i)
{
fprintf(f, ", %I64u", g_Accumulators[i].Events[j]);
if(s_RptTypes[i] != RT_NONE)
{
RTVal[HXFPROFILE_COUNTER_COUNT] = (float)g_Accumulators[i].Count;
val = (float)g_Accumulators[i].Events[j];
fprintf(f, ", %8.3f", (val/RTVal[s_RptTypes[i]]));
}
}
fprintf(f, "\n");
}
fprintf(f, "\n");
// Draw
s = HXFPROFILE_DRAW_SETUP;
// HXFPROFILE_DRAW_PREPARE,
// HXFPROFILE_DRAW_VALIDATE,
// HXFPROFILE_DRAW_TNL,
// HXFPROFILE_DRAW_ASSEMBLE,
// HXFPROFILE_DRAW_CLIP,
e = HXFPROFILE_DRAW_SLAVEPORT;
// Print Headers
fprintf(f, "Draw Commands Report\n");
for(i = s; i <= e; ++i)
{
fprintf(f, ", %s", s_BinNames[i]);
if(s_RptTypes[i] != RT_NONE)
{
fprintf(f, ", %s\\%s", s_BinNames[i], s_RTNames[s_RptTypes[i]]);
}
fprintf(f, ", %% of Draw Time" );
if(i == HXFPROFILE_DRAW_SLAVEPORT)
{
fprintf(f, ", MB/S, Polls/Count" );
}
}
fprintf(f, "\n");
// Print Data Table
for(j = 0; j < PMU_NUMBER_COUNTERS; ++j)
{
float total = 0;
float val = 0;
//label
fprintf(f, "%s", s_EventNames[s_PMUProfiles[HXFPROFILER_EVENT_PROFILE][j]]);
for(i = s; i <= e; ++i)
{
total += g_Accumulators[i].Events[j];
}
for(i = s; i <= e; ++i)
{
val = (float)g_Accumulators[i].Events[j];
fprintf(f, ", %I64u",g_Accumulators[i].Events[j] );
if(s_RptTypes[i] != RT_NONE)
{
RTVal[HXFPROFILE_COUNTER_COUNT] = (float)g_Accumulators[i].Count;
fprintf(f, ", %8.3f", (val/RTVal[s_RptTypes[i]]));
}
fprintf(f, ", %8.3f", (val/total));
if(j == 0 && i == HXFPROFILE_DRAW_SLAVEPORT)
{
fprintf(f, ", %8.3f, %8.3f",
RTVal[HXFPROFILE_COUNTER_SP_BYTES] / (1024.0f*1024.0f) / (val/cpufreq),
RTVal[HXFPROFILE_COUNTER_SP_POLLS] / RTVal[HXFPROFILE_COUNTER_COUNT]);
}
}
fprintf(f, "\n");
}
fprintf(f, "\n");
{
float t = 0;
float val = 0;
float pt = 0;
float ot = 1.0f;
fprintf(f, "Title, Time, %% of Total\n");
s = HXFPROFILE_STATE_FOG;
// HXFPROFILE_STATE_MATRIX,
// HXFPROFILE_STATE_LIGHT;
e = HXFPROFILE_STATE_VERTEX;
for(i = s; i <= e; ++i)
{
val = (float)g_Accumulators[i].Events[0];
t =(val/cpufreq);
pt = t/ftotaltime;
ot -= pt;
fprintf(f, "%s Time, %8.3f, %8.3f\n", s_BinNames[i], t, (pt));
}
s = HXFPROFILE_DRAW_SETUP;
// HXFPROFILE_DRAW_PREPARE,
// HXFPROFILE_DRAW_VALIDATE,
// HXFPROFILE_DRAW_TNL,
// HXFPROFILE_DRAW_ASSEMBLE,
// HXFPROFILE_DRAW_CLIP,
e = HXFPROFILE_DRAW_SLAVEPORT;
val = 0.0f;
for(i = s; i <= e; ++i)
{
val += (float)g_Accumulators[i].Events[0];
}
t = val/cpufreq;
pt = t/ftotaltime;
ot -= pt;
fprintf(f, "Draw Time, %8.3f, %8.3f\n", t, pt);
pt = ftotaltime * ot;
fprintf(f, "Other Time, %8.3f, %8.3f\n", pt, ot);
fprintf(f, "\nTotal Time, %8.3f, 1.0\n", ftotaltime);
}
fprintf(f, "\n");
fprintf(f, "********************************************************************************\n");
}
fclose(f);
}
void HXFProfilerShutdown(void)
{
QueryPerformanceCounter((LARGE_INTEGER*)&s_EndTime);
LogResults();
}
#endif // HXF_PROFILE
/* ************************************************************************* *\
** ************************************************************************* **
** EOF
** ************************************************************************* **
\* ************************************************************************* */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -