📄 column.big.c

📁 代码优化,有效使用内存,透视优化技术,对比优化方法,如果你在追求代码效率的最大化,该资源你不能不读.
💻 C
字号:
/*----------------------------------------------------------------------------
 *
 *						THE EXAMPLE DEMONSTRATING EFFICIENCY 
 *					OF PROCESSING LARGE TWO-DIMENSIONAL ARRAYS
 *					============================================
 *
 * Build 0x001 07.08.2002
----------------------------------------------------------------------------*/
//#define __OPTIMIZE__			// uncomment this line for optimized
								// processing of the array row by row
#ifdef __OPTIMIZE__
	#define N_COL				(512+16)	// optimal number of matrix rows
											// since it is not a multiple of the
											// cache bank size and cache is used
											// entirely
#else
	#define N_COL				(512)		//  non-optimal number of matrix rows
											// since it is a multiple of the
											// cache bank size and cache is not used
											// entirely
#endif

#define N_ROW					(512)

#include <DoCPU.h>

/*----------------------------------------------------------------------------
 *
 *				SEQUENTIAL PROCESSING OF THE ARRAY BY COLUMNS
 *
----------------------------------------------------------------------------*/
int FOR_COL(int (*foo)[N_COL])
{
	int	x, y;
	int	z = 0;

	for (x = 0; x < N_ROW; x++)
	{
		for (y = 0; y < N_COL; y++)
			z += foo[x][y];
	}
	return z;
}


/*----------------------------------------------------------------------------
 *
 *				SEQUENTIAL PROCESSING OF THE ARRAY BY ROWS
 *
----------------------------------------------------------------------------*/
int FOR_ROW(int (*foo)[N_COL])
{
	int	x, y;
	int	z = 0;

	for (x = 0; x < N_COL; x++)
	{
		// attention: if the column height is a multiple of the cache bank size
		// the cache capacity will sighificantly decrease due to its
		// limited associativity. The amount of cache memory might become
		// insufficient, which would result in constant cache misses!
		for (y = 0; y < N_ROW; y++)
			z += foo[y][x];
	}
	return z;
}

#define _CACHE_SIZE	(64*K)
#define _LINE_SIZE	(32)


/*----------------------------------------------------------------------------
 *
 *				PARALLEL PROCESSING OF THE ARRAY BY COLUMNS
 *
----------------------------------------------------------------------------*/
int FOR_32(int (*foo)[N_COL])
{
	int x, y;
	int k, w;
	int z = 0;

	// testing rows one by one
	for (x = 0; x < N_ROW; x++)
	{
		// processing the next column...
		for (k = 0; k < N_COL; k += (_CACHE_SIZE/sizeof(int)))
		{	//... by portions that entirely fit
			// within L1 cache (on P-III and especially P-4 - L2 cache)

			// initiating parallel loading of data into the cache
			for (y = k; (y < k + (_CACHE_SIZE/sizeof(int))) && (y < N_COL);  y+=(_LINE_SIZE/sizeof(int)));
			{
				z += foo[x][y];
			}
			
			// reading the data that are already loaded into the cache
			for (y = k; y < k + (_CACHE_SIZE/sizeof(int)) && (y < N_COL);  y+=(_LINE_SIZE/sizeof(int)))
			{
				for (w = y + 1; w < (y + 8); w++)
					z += foo[x][w];
			}
			
		}
	}

	return z;
}



main()
{

	int	z = 0;

	// allocating memory for 2D-array
	int (*foo)[N_COL] = (int (*)[N_COL]) malloc(N_COL*N_ROW*sizeof(int));
	
	// TITLE
	PRINT("= = = Specific features of processing large two-dimensional arrays = = =\n");
	PRINT_TITLE;

	// processing by columns
	A_BEGIN(0)
		FOR_COL(foo);
	A_END(0)

	// processing by rows
	A_BEGIN(1)
		FOR_ROW(foo);
	A_END(1)

	A_BEGIN(2)
		FOR_32(foo);
	A_END(2)

	// console output of the measurement results
	Lx_OUT("SERAL	 COL vs SERIAL ROW",	Ax_GET(0), Ax_GET(1));
	Lx_OUT("PARALLEL COL vs SERIAL COL",	Ax_GET(0), Ax_GET(2));

	return z;
}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -