⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 docpu.h

📁 代码优化,有效使用内存,透视优化技术,对比优化方法,如果你在追求代码效率的最大化,该资源你不能不读.
💻 H
📖 第 1 页 / 共 2 页
字号:
/*----------------------------------------------------------------------------
 *
 *							D	0		C	P	U
 *                      S               D              K
 *                      ================================
 *
 *				"DoCPU CLOCK" P r o f i l e r  V e r s i o n  1.1
 *              =================================================
 *
 *										for internal use
 *										with the "Code Optimization: Effective Memory Usage"
 *										book by Kris Kaspersky	kpnc@programme.ru
															kk@sendmail.ru
-------------------------------------------------------------------------- */


/*============================================================================
 *
 *						GLOBAL COMMAND-LINE KEYS
 *
 *	$Fcpu:xxx				-	Forcedly set CPU speed
 *	$NoSort					-	Do no sort
 *
 *	$DEBUG.print.Fcpu		-	Debug print of the CPU speed
 *
 *
 *
============================================================================*/


/*============================================================================
 *
 *
 *	A_NITER				-	number of times to run the profiled fragment

_NORDTSC
 * TITLE				-	Title displayed by the PRINT_TITLE macro
============================================================================*/

// built-in libraries
#include <math.h>
#include <malloc.h>
#include <time.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

// DoCPU libraries
//#define __DOS__
#include "CPU.ini"
#include "PC.ini"


// Global macros
#undef _TEXT
#ifdef __DOS__
    #define TITLE "Utility for the \"Code Optimization: Effective Memory Usage\"  "
#else
    #define TITLE "Utility for the \"Code Optimization: Effective Memory Usage\"  "
#endif

#define ERR_OEM	"-ERR translates a string into the OEM-defined character set"
#define TEXT_PIPE	_TEXT("* Program output can be redirected into the file\n")

#ifndef A_NITER					// how many times to run the profiled fragment
	#define A_NITER			8
#endif


#ifndef DoCPU_BUFF_N			// maximum number of measured buffers
	#define DoCPU_BUFF_N	0x10
#endif

// Non-configurable definitions
#define TIME_TO_WAIT		100
#define MAX_STR_LEN			256
#define DoCPU_BUFF_SIZE		A_NITER

#define	MAX_GRAPH_LEN	80
#define	GRAPH_CHAR		":"



#define A_BREAK __asm{int 0x3}

/*----------------------------------------------------------------------------
 *
 *						FUNCTION PROTOTYPES
 *
----------------------------------------------------------------------------*/

// Declaring kernel functions of the profiler from DoCPU.obj
// ------------------------------------------------------
#ifdef __cplusplus
	extern "C" int*	__cdecl	DoCPU(void* x);
	extern "C" void	__cdecl	A1(unsigned int *t);
	extern "C" int	__cdecl	A2(unsigned int *t);
#else
	extern int*		__cdecl	DoCPU(void* x);
	extern void		__cdecl	A1(unsigned int *t);
	extern int		__cdecl	A2(unsigned int *t);
#endif

// To avoid loading windows.h declare everything here
#ifndef _WINDOWS_
	extern int __stdcall Sleep(int a);
	extern int __stdcall GetTickCount(void);
	extern int __stdcall CharToOemA(char *src, char *dst);
#endif

// ERROR - there is such a macro
#ifdef ERROR
	#undef ERROR
#endif

// ???
extern void _null();


/*----------------------------------------------------------------------------
 *
 *						STATIC VARIABLES
 *						================
 *										for use in clock.h
 *
 * Attention:
 *			do not use these variables in your programs, since
 *			they might be changed in future releases!
----------------------------------------------------------------------------*/
static unsigned int	DoCPU_AL_Rool;						// AL runs loop
static unsigned int	DoCPU_tmp, DoCPU_tmp0, DoCPU_tmp1;	// temporary variables

static unsigned int	DoCPU_t[DoCPU_BUFF_N];
static unsigned int	DoCPU_buff[DoCPU_BUFF_N][DoCPU_BUFF_SIZE];

static char			DoCPU_v[]="|/-\\";
static char			DoCPU_s[MAX_STR_LEN];				// string buffer

float				DoCPU_float_tmp;
static int			CPU_CLOCK = 0;

static unsigned int			DoCPU_vcp=0;
static int*			p_cache=0;



/* ---------------------------------------------------------------------------
 *
 *							"PHYSICAL" CONSTANTS
 *
----------------------------------------------------------------------------*/
#define	K			1024
#define	M			(1024*K)
#define G			(1024*M)
#define T			(1024*G)
#define	Mega		1000000
#define	Bit			8



/* ---------------------------------------------------------------------------
 *
 *							MEMORY BLOCK SIZES
 *
----------------------------------------------------------------------------*/
#define _HUGE		(512*M)
#define _LARGE		(MAX_AVIAL_MEM)
#define	_NORMAL		(L2_CACHE_SIZE*10)
#define _MEDIUM		(L2_CACHE_SIZE/2)
#define _SMALL		(L1_CACHE_SIZE/2)

#define MAX_CACHE_LINE_SIZE												\
					MAX(L1_CACHE_LINE_SIZE, L2_CACHE_LINE_SIZE)

#define MAX(a,b)	(((a)>(b))?a:b)
#define MIN(a,b)	((a>b)?b:a)


/*  -----------------------------------------------------------------------

                        DEFINING BLOCK SIZE

    ---------------------------------------------------------------------- */
//  Defining the size of the processed block
//  TERMINOLOGY:
//  _SMALL_BLOCKS_  (SMALL)  -   blocks that fit within L1 cache
//  _MEDIUM_BLOCKS_  (MEDIUM) -   blocks that fit within L2 cache
//  _LARGE_BLOCKS_    (LARGE)  -   blocks exceeding L2 cache
//  _HUGE_BLOCKS_ (HUGE)   -   blocks exceeding RAM size

// This macro creates the  _BLOCK_SIZE definition according to one
// of the defined keys [__LARGE|__MEDIUM|__SMALL] and cache size
// Large block is specified by default
// Block size definitions can be set via #define in the code of the program
// or by means of using the /D command-line key of the VC compiler
// For example: cl bla-bla-bla.c /D__MEDIUM


#ifdef __HUGE
	#define _BLOCK_SIZE _HUGE
#endif

#ifdef __LARGE
	#define _BLOCK_SIZE _LARGE
#endif

#ifdef __MEDIUM
	#define _BLOCK_SIZE _MEDIUM
#endif

#ifdef __SMALL
	#define _BLOCK_SIZE _SMALL
#endif

// Large blocks by default
#ifndef _BLOCK_SIZE
	#define _BLOCK_SIZE _LARGE
#endif



/*----------------------------------------------------------------------------
 *
 *							MEASUREMENTS OF EXECUTION TIME
 *
----------------------------------------------------------------------------*/

// BASE MACRO FOR MEASURING EXECUTION TIME
// --------------------------------------
//
//		Macros such as _[BEGIN | END]_[RDTSC|CLOCK]_  are wrappers for kernel 
// functions for measuring execution times of the profiled fragment of the program.
// These are low-level functions! Therefore do not call them from your program
//
//		Measurements are written into the DoCPU_buff buffer  specified
// by the t argument into the position defined by the value of the DoCPU_AL_Rool variable.
#define	_BEGIN_RDTSC(t)	A1(&DoCPU_t[t]);
#define	_END_RDTSC(t)	DoCPU_tmp = A2(&DoCPU_t[t]);\
						DoCPU_buff[t][DoCPU_AL_Rool] = DoCPU_tmp;

#define	_BEGIN_CLOCK(t)	DoCPU_t[t] = clock();
#define	_END_CLOCK(t)	DoCPU_tmp = clock() - DoCPU_t[t];\
						DoCPU_buff[t][DoCPU_AL_Rool] = DoCPU_tmp;


// RUN-TIME CHECKPOINTS
// ---------------------


//		The   L_BEGIN   macro sets the checkpoint for staring measurment,
// while L_END sets the checkpoint for the end of the execution time measurment.
//
//		If the  __NORDTSC__  definition is specified, then system timer is 
// used for measurements, otherwise the RDTSC command will be used (default)
//
//		The L_BEGIN/L_END macros are recommended for use only within 
// the AL_BEGIN -- AL_END loop body! It makes sense to use these macros only
// in cases when it is necessary to perform specific actions before each
// of the A_NITER runs of the profiled fragment, and execution time of these
// action must not be taken into account. The A_BEGIN/A_END macros are used 
// more frequently
#ifdef _NORDTSC
	#define L_BEGIN(t)	_BEGIN_CLOCK(t)
	#define L_END(t)	_END_CLOCK(t)
#else
	#define L_BEGIN(t)	_BEGIN_RDTSC(t)
	#define L_END(t)	_END_RDTSC(t)
#endif

//		The UL_BEGIN/UL_END macros are the same as L_BEGIN/
// L_END, however the choice of the measurement strategy here can be done both
// at compile time and at run time.
//
//	ARG:
//		t	-	stream index for writing measurment results
//		tt	-	measurement strategy. 
//				  0 : RDTSC is used
//				!=0 : system timer is used
#define	UL_BEGIN(t,tt)	if (tt) {_BEGIN_CLOCK(t);}	else	{_BEGIN_RDTSC(t);}
#define	UL_END(t,tt)	if (tt) {_END_CLOCK(t);	}	else	{_END_RDTSC(t);  }



// CYCLIC PROFILING  RUNNING THE PROFILED FRAGMENT A_NITER times
// ---------------------------------------------------------------------

//		The  AL_BEGIN/A_END  macros run the marked fragment A_NITER times
// and write the number of the current iteration into the DoCPU_AL_Rool variable,
// defining the current position for writing the measurement results of the [U]L_BEGIN/
// [U]L_END macros
#define	AL_BEGIN		for(DoCPU_AL_Rool = 0;\
						DoCPU_AL_Rool < A_NITER; DoCPU_AL_Rool++ )\
						{
#define	AL_END			}


//		The  A_BEGIN/A_END   macros represent ready-to-use tool for
// creating the measurement checkpoints.  They run the profiled fragment
// A_NITER times, measuring execution time for each iteration
//
//		ATTENTION: MEASUREMENTS CANNOT BE NESTED!!!!
#define	A_BEGIN(t)		AL_BEGIN; L_BEGIN(t);
#define	A_END(t)		L_END(t); AL_END;


// READING AND PROCESSING MEASUREMENT RESULTS
// ---------------------------------------------

//		The Ax_GET macro returns average exeuction time of the fragment
// marked by the t checkpoint.
#define	Ax_GET(t)		cycle_mid(DoCPU_buff[t],0)

//		The  Lx_GET  macro returns the  _current_  value of the t checkpoint
// measurement. ATTENTION: do not use this macro if you are not absolutely,
// sure what actually are you doing!
#define	Lx_GET(t)		DoCPU_buff[t][DoCPU_AL_Rool]

//		The   Ax_GET_MIN   macro returns the minimum execution time 
// for the profiled fragment marked by the t checkpoint.
#define	Ax_GET_MIN(t)	cycle_min(DoCPU_buff[t],0)

//		The Ax_GET_MIN macro returns the execution time mode for the
// profiled fragment marked by the t checkpoint.
#define	Ax_GET_MOD(t)	cycle_mod(DoCPU_buff[t],0)


//		The ALx_GET_MIN macro returms the measurment time for the profiled 
// fragment specified by the t checkpoint in tt iteration
#define	ALx_GET(t,tt)	DoCPU_buff[t][DoCPU_AL_Rool*0 + tt]


/*----------------------------------------------------------------------------
 *
 *								SCREEN OUTPUT
 *
----------------------------------------------------------------------------*/

// WIN --> MS-DOS conversion
// ----------------------------
//
//	-	before version 1.1 this conversion had to be used for console output,
//		of the text containing national language characters
//      and typed in using andy Windows editor
//
//	-	starting with version 1.1 is transparent for programmers
//		ATTENTION: reverse conversion distorts the text!
#define _TEXT(a)		(CharToOemA(a,DoCPU_s)?&DoCPU_s[0]:ERR_OEM)


// MACROS FOR SCREEN OUTPUT
// ------------------------------

//		The  PRINT  macro translates the s string into DOS-encoding and
// outputs it to the terminal. The PRINT macro output cannot be redirected into a file.
//		To redirect output, use the built-in printf function
#define	PRINT(s)		fputs(_TEXT(s),stderr);

//		The ERROR macro displays the s string on the terminal, beeps and terminates execution
#define	ERROR(s)		{ PRINT(s); PRINT("\x7"); return 0; }

//		The PRINT_TITLE macro displays the service header and (c)
#define	PRINT_TITLE		PRINT("* "); PRINT(TITLE);PRINT("\n");

//		The  PRINT_PIPE  macro informs on the necessity to redirect the application output
// into a file. This is useful for applications that generate output tables that later can be 
// imported into MS Graph
#define	PRINT_PIPE		PRINT(TEXT_PIPE);



// MACROS FOR OUTPUT OF THE MEASUREMENT RESULTS ON THE SCREEN
// ----------------------------------------------------------

//		The L1_OUT macro displays the s string and the 100% label on screen
// Usually it is used in combination with Lx_OUT for output of the relative execution speed
// of profiled fragments of the program
#define	L1_OUT(s)			printf("%s : 100%%\n",_TEXT(s));

//		The Lx_OUT macro displays the s and val/base ration on the screen
#define	Lx_OUT(s,base,val)	printf("%s : %3.1f%%\n",\
								_TEXT(s),(float)val/(float)base*100);

//		The A1_OUT macro does the same thing as L1_OUT

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -