📄 docpu.h
字号:
/*----------------------------------------------------------------------------
*
* D 0 C P U
* S D K
* ================================
*
* "DoCPU CLOCK" P r o f i l e r V e r s i o n 1.1
* =================================================
*
* for internal use
* with the "Code Optimization: Effective Memory Usage"
* book by Kris Kaspersky kpnc@programme.ru
kk@sendmail.ru
-------------------------------------------------------------------------- */
/*============================================================================
*
* GLOBAL COMMAND-LINE KEYS
*
* $Fcpu:xxx - Forcedly set CPU speed
* $NoSort - Do no sort
*
* $DEBUG.print.Fcpu - Debug print of the CPU speed
*
*
*
============================================================================*/
/*============================================================================
*
*
* A_NITER - number of times to run the profiled fragment
_NORDTSC
* TITLE - Title displayed by the PRINT_TITLE macro
============================================================================*/
// built-in libraries
#include <math.h>
#include <malloc.h>
#include <time.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
// DoCPU libraries
//#define __DOS__
#include "CPU.ini"
#include "PC.ini"
// Global macros
#undef _TEXT
#ifdef __DOS__
#define TITLE "Utility for the \"Code Optimization: Effective Memory Usage\" "
#else
#define TITLE "Utility for the \"Code Optimization: Effective Memory Usage\" "
#endif
#define ERR_OEM "-ERR translates a string into the OEM-defined character set"
#define TEXT_PIPE _TEXT("* Program output can be redirected into the file\n")
#ifndef A_NITER // how many times to run the profiled fragment
#define A_NITER 8
#endif
#ifndef DoCPU_BUFF_N // maximum number of measured buffers
#define DoCPU_BUFF_N 0x10
#endif
// Non-configurable definitions
#define TIME_TO_WAIT 100
#define MAX_STR_LEN 256
#define DoCPU_BUFF_SIZE A_NITER
#define MAX_GRAPH_LEN 80
#define GRAPH_CHAR ":"
#define A_BREAK __asm{int 0x3}
/*----------------------------------------------------------------------------
*
* FUNCTION PROTOTYPES
*
----------------------------------------------------------------------------*/
// Declaring kernel functions of the profiler from DoCPU.obj
// ------------------------------------------------------
#ifdef __cplusplus
extern "C" int* __cdecl DoCPU(void* x);
extern "C" void __cdecl A1(unsigned int *t);
extern "C" int __cdecl A2(unsigned int *t);
#else
extern int* __cdecl DoCPU(void* x);
extern void __cdecl A1(unsigned int *t);
extern int __cdecl A2(unsigned int *t);
#endif
// To avoid loading windows.h declare everything here
#ifndef _WINDOWS_
extern int __stdcall Sleep(int a);
extern int __stdcall GetTickCount(void);
extern int __stdcall CharToOemA(char *src, char *dst);
#endif
// ERROR - there is such a macro
#ifdef ERROR
#undef ERROR
#endif
// ???
extern void _null();
/*----------------------------------------------------------------------------
*
* STATIC VARIABLES
* ================
* for use in clock.h
*
* Attention:
* do not use these variables in your programs, since
* they might be changed in future releases!
----------------------------------------------------------------------------*/
static unsigned int DoCPU_AL_Rool; // AL runs loop
static unsigned int DoCPU_tmp, DoCPU_tmp0, DoCPU_tmp1; // temporary variables
static unsigned int DoCPU_t[DoCPU_BUFF_N];
static unsigned int DoCPU_buff[DoCPU_BUFF_N][DoCPU_BUFF_SIZE];
static char DoCPU_v[]="|/-\\";
static char DoCPU_s[MAX_STR_LEN]; // string buffer
float DoCPU_float_tmp;
static int CPU_CLOCK = 0;
static unsigned int DoCPU_vcp=0;
static int* p_cache=0;
/* ---------------------------------------------------------------------------
*
* "PHYSICAL" CONSTANTS
*
----------------------------------------------------------------------------*/
#define K 1024
#define M (1024*K)
#define G (1024*M)
#define T (1024*G)
#define Mega 1000000
#define Bit 8
/* ---------------------------------------------------------------------------
*
* MEMORY BLOCK SIZES
*
----------------------------------------------------------------------------*/
#define _HUGE (512*M)
#define _LARGE (MAX_AVIAL_MEM)
#define _NORMAL (L2_CACHE_SIZE*10)
#define _MEDIUM (L2_CACHE_SIZE/2)
#define _SMALL (L1_CACHE_SIZE/2)
#define MAX_CACHE_LINE_SIZE \
MAX(L1_CACHE_LINE_SIZE, L2_CACHE_LINE_SIZE)
#define MAX(a,b) (((a)>(b))?a:b)
#define MIN(a,b) ((a>b)?b:a)
/* -----------------------------------------------------------------------
DEFINING BLOCK SIZE
---------------------------------------------------------------------- */
// Defining the size of the processed block
// TERMINOLOGY:
// _SMALL_BLOCKS_ (SMALL) - blocks that fit within L1 cache
// _MEDIUM_BLOCKS_ (MEDIUM) - blocks that fit within L2 cache
// _LARGE_BLOCKS_ (LARGE) - blocks exceeding L2 cache
// _HUGE_BLOCKS_ (HUGE) - blocks exceeding RAM size
// This macro creates the _BLOCK_SIZE definition according to one
// of the defined keys [__LARGE|__MEDIUM|__SMALL] and cache size
// Large block is specified by default
// Block size definitions can be set via #define in the code of the program
// or by means of using the /D command-line key of the VC compiler
// For example: cl bla-bla-bla.c /D__MEDIUM
#ifdef __HUGE
#define _BLOCK_SIZE _HUGE
#endif
#ifdef __LARGE
#define _BLOCK_SIZE _LARGE
#endif
#ifdef __MEDIUM
#define _BLOCK_SIZE _MEDIUM
#endif
#ifdef __SMALL
#define _BLOCK_SIZE _SMALL
#endif
// Large blocks by default
#ifndef _BLOCK_SIZE
#define _BLOCK_SIZE _LARGE
#endif
/*----------------------------------------------------------------------------
*
* MEASUREMENTS OF EXECUTION TIME
*
----------------------------------------------------------------------------*/
// BASE MACRO FOR MEASURING EXECUTION TIME
// --------------------------------------
//
// Macros such as _[BEGIN | END]_[RDTSC|CLOCK]_ are wrappers for kernel
// functions for measuring execution times of the profiled fragment of the program.
// These are low-level functions! Therefore do not call them from your program
//
// Measurements are written into the DoCPU_buff buffer specified
// by the t argument into the position defined by the value of the DoCPU_AL_Rool variable.
#define _BEGIN_RDTSC(t) A1(&DoCPU_t[t]);
#define _END_RDTSC(t) DoCPU_tmp = A2(&DoCPU_t[t]);\
DoCPU_buff[t][DoCPU_AL_Rool] = DoCPU_tmp;
#define _BEGIN_CLOCK(t) DoCPU_t[t] = clock();
#define _END_CLOCK(t) DoCPU_tmp = clock() - DoCPU_t[t];\
DoCPU_buff[t][DoCPU_AL_Rool] = DoCPU_tmp;
// RUN-TIME CHECKPOINTS
// ---------------------
// The L_BEGIN macro sets the checkpoint for staring measurment,
// while L_END sets the checkpoint for the end of the execution time measurment.
//
// If the __NORDTSC__ definition is specified, then system timer is
// used for measurements, otherwise the RDTSC command will be used (default)
//
// The L_BEGIN/L_END macros are recommended for use only within
// the AL_BEGIN -- AL_END loop body! It makes sense to use these macros only
// in cases when it is necessary to perform specific actions before each
// of the A_NITER runs of the profiled fragment, and execution time of these
// action must not be taken into account. The A_BEGIN/A_END macros are used
// more frequently
#ifdef _NORDTSC
#define L_BEGIN(t) _BEGIN_CLOCK(t)
#define L_END(t) _END_CLOCK(t)
#else
#define L_BEGIN(t) _BEGIN_RDTSC(t)
#define L_END(t) _END_RDTSC(t)
#endif
// The UL_BEGIN/UL_END macros are the same as L_BEGIN/
// L_END, however the choice of the measurement strategy here can be done both
// at compile time and at run time.
//
// ARG:
// t - stream index for writing measurment results
// tt - measurement strategy.
// 0 : RDTSC is used
// !=0 : system timer is used
#define UL_BEGIN(t,tt) if (tt) {_BEGIN_CLOCK(t);} else {_BEGIN_RDTSC(t);}
#define UL_END(t,tt) if (tt) {_END_CLOCK(t); } else {_END_RDTSC(t); }
// CYCLIC PROFILING RUNNING THE PROFILED FRAGMENT A_NITER times
// ---------------------------------------------------------------------
// The AL_BEGIN/A_END macros run the marked fragment A_NITER times
// and write the number of the current iteration into the DoCPU_AL_Rool variable,
// defining the current position for writing the measurement results of the [U]L_BEGIN/
// [U]L_END macros
#define AL_BEGIN for(DoCPU_AL_Rool = 0;\
DoCPU_AL_Rool < A_NITER; DoCPU_AL_Rool++ )\
{
#define AL_END }
// The A_BEGIN/A_END macros represent ready-to-use tool for
// creating the measurement checkpoints. They run the profiled fragment
// A_NITER times, measuring execution time for each iteration
//
// ATTENTION: MEASUREMENTS CANNOT BE NESTED!!!!
#define A_BEGIN(t) AL_BEGIN; L_BEGIN(t);
#define A_END(t) L_END(t); AL_END;
// READING AND PROCESSING MEASUREMENT RESULTS
// ---------------------------------------------
// The Ax_GET macro returns average exeuction time of the fragment
// marked by the t checkpoint.
#define Ax_GET(t) cycle_mid(DoCPU_buff[t],0)
// The Lx_GET macro returns the _current_ value of the t checkpoint
// measurement. ATTENTION: do not use this macro if you are not absolutely,
// sure what actually are you doing!
#define Lx_GET(t) DoCPU_buff[t][DoCPU_AL_Rool]
// The Ax_GET_MIN macro returns the minimum execution time
// for the profiled fragment marked by the t checkpoint.
#define Ax_GET_MIN(t) cycle_min(DoCPU_buff[t],0)
// The Ax_GET_MIN macro returns the execution time mode for the
// profiled fragment marked by the t checkpoint.
#define Ax_GET_MOD(t) cycle_mod(DoCPU_buff[t],0)
// The ALx_GET_MIN macro returms the measurment time for the profiled
// fragment specified by the t checkpoint in tt iteration
#define ALx_GET(t,tt) DoCPU_buff[t][DoCPU_AL_Rool*0 + tt]
/*----------------------------------------------------------------------------
*
* SCREEN OUTPUT
*
----------------------------------------------------------------------------*/
// WIN --> MS-DOS conversion
// ----------------------------
//
// - before version 1.1 this conversion had to be used for console output,
// of the text containing national language characters
// and typed in using andy Windows editor
//
// - starting with version 1.1 is transparent for programmers
// ATTENTION: reverse conversion distorts the text!
#define _TEXT(a) (CharToOemA(a,DoCPU_s)?&DoCPU_s[0]:ERR_OEM)
// MACROS FOR SCREEN OUTPUT
// ------------------------------
// The PRINT macro translates the s string into DOS-encoding and
// outputs it to the terminal. The PRINT macro output cannot be redirected into a file.
// To redirect output, use the built-in printf function
#define PRINT(s) fputs(_TEXT(s),stderr);
// The ERROR macro displays the s string on the terminal, beeps and terminates execution
#define ERROR(s) { PRINT(s); PRINT("\x7"); return 0; }
// The PRINT_TITLE macro displays the service header and (c)
#define PRINT_TITLE PRINT("* "); PRINT(TITLE);PRINT("\n");
// The PRINT_PIPE macro informs on the necessity to redirect the application output
// into a file. This is useful for applications that generate output tables that later can be
// imported into MS Graph
#define PRINT_PIPE PRINT(TEXT_PIPE);
// MACROS FOR OUTPUT OF THE MEASUREMENT RESULTS ON THE SCREEN
// ----------------------------------------------------------
// The L1_OUT macro displays the s string and the 100% label on screen
// Usually it is used in combination with Lx_OUT for output of the relative execution speed
// of profiled fragments of the program
#define L1_OUT(s) printf("%s : 100%%\n",_TEXT(s));
// The Lx_OUT macro displays the s and val/base ration on the screen
#define Lx_OUT(s,base,val) printf("%s : %3.1f%%\n",\
_TEXT(s),(float)val/(float)base*100);
// The A1_OUT macro does the same thing as L1_OUT
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -