📄 dct8x8_gold.cpp

📁 cuda开发环境下的矩阵运算
💻 CPP
📖 第 1 页 / 共 2 页
字号:
上一页 12
*  predefined quantization matrices.
*
* \param fSrcDst		[IN/OUT] - Coefficients plane
* \param Stride			[IN] - Stride of SrcDst
* \param Size			[IN] - Size of the plane
*  
* \return None
*/
void quantizeGold(float* fSrcDst, int Stride, ROI Size)
{

	//perform block wise in-place quantization using Q
	//Q(A) = round(A ./ Q) .* Q;
	for (int i=0; i<Size.height; i++)
	{
		for (int j=0; j<Size.width; j++)
		{
			int qx = j % BLOCK_SIZE;
			int qy = i % BLOCK_SIZE;
			float quantized = round_f(fSrcDst[i*Stride+j] / Q[(qy<<BLOCK_SIZE_LOG2)+qx]);
			fSrcDst[i*Stride+j] = quantized * Q[(qy<<BLOCK_SIZE_LOG2)+qx];
		}
	}
}


float C_a = 1.387039845322148f; //!< a = (2^0.5) * cos(    pi / 16);  Used in forward and inverse DCT.  
float C_b = 1.306562964876377f; //!< b = (2^0.5) * cos(    pi /  8);  Used in forward and inverse DCT.  
float C_c = 1.175875602419359f; //!< c = (2^0.5) * cos(3 * pi / 16);  Used in forward and inverse DCT.  
float C_d = 0.785694958387102f; //!< d = (2^0.5) * cos(5 * pi / 16);  Used in forward and inverse DCT.  
float C_e = 0.541196100146197f; //!< e = (2^0.5) * cos(3 * pi /  8);  Used in forward and inverse DCT.  
float C_f = 0.275899379282943f; //!< f = (2^0.5) * cos(7 * pi / 16);  Used in forward and inverse DCT.  


/**
*  Normalization constant that is used in forward and inverse DCT
*/
float C_norm = 0.3535533905932737f; // 1 / (8^0.5)


/**
**************************************************************************
*  Performs DCT of vector of 8 elements.
*
* \param FirstIn		[IN] - Pointer to the first element of input vector
* \param StepIn			[IN] - Value to add to ptr to access other input elements 
* \param FirstOut		[OUT] - Pointer to the first element of output vector
* \param StepOut		[IN] - Value to add to ptr to access other output elements 
*  
* \return None
*/
void SubroutineDCTvector(float *FirstIn, int StepIn, float *FirstOut, int StepOut)
{
	float X07P = FirstIn[0*StepIn] + FirstIn[7*StepIn];
	float X16P = FirstIn[1*StepIn] + FirstIn[6*StepIn];
	float X25P = FirstIn[2*StepIn] + FirstIn[5*StepIn];
	float X34P = FirstIn[3*StepIn] + FirstIn[4*StepIn];

	float X07M = FirstIn[0*StepIn] - FirstIn[7*StepIn];
	float X61M = FirstIn[6*StepIn] - FirstIn[1*StepIn];
	float X25M = FirstIn[2*StepIn] - FirstIn[5*StepIn];
	float X43M = FirstIn[4*StepIn] - FirstIn[3*StepIn];

	float X07P34PP = X07P + X34P;
	float X07P34PM = X07P - X34P;
	float X16P25PP = X16P + X25P;
	float X16P25PM = X16P - X25P;

	FirstOut[0*StepOut] = C_norm * (X07P34PP + X16P25PP);
	FirstOut[2*StepOut] = C_norm * (C_b * X07P34PM + C_e * X16P25PM);
	FirstOut[4*StepOut] = C_norm * (X07P34PP - X16P25PP);
	FirstOut[6*StepOut] = C_norm * (C_e * X07P34PM - C_b * X16P25PM);

	FirstOut[1*StepOut] = C_norm * (C_a * X07M - C_c * X61M + C_d * X25M - C_f * X43M);
	FirstOut[3*StepOut] = C_norm * (C_c * X07M + C_f * X61M - C_a * X25M + C_d * X43M);
	FirstOut[5*StepOut] = C_norm * (C_d * X07M + C_a * X61M + C_f * X25M - C_c * X43M);
	FirstOut[7*StepOut] = C_norm * (C_f * X07M + C_d * X61M + C_c * X25M + C_a * X43M);
}


/**
**************************************************************************
*  Performs IDCT of vector of 8 elements.
*
* \param FirstIn		[IN] - Pointer to the first element of input vector
* \param StepIn			[IN] - Value to add to ptr to access other input elements 
* \param FirstOut		[OUT] - Pointer to the first element of output vector
* \param StepOut		[IN] - Value to add to ptr to access other output elements 
*  
* \return None
*/
void SubroutineIDCTvector(float *FirstIn, int StepIn, float *FirstOut, int StepOut)
{
	float Y04P   = FirstIn[0*StepIn] + FirstIn[4*StepIn];
	float Y2b6eP = C_b * FirstIn[2*StepIn] + C_e * FirstIn[6*StepIn];

	float Y04P2b6ePP = Y04P + Y2b6eP;
	float Y04P2b6ePM = Y04P - Y2b6eP;
	float Y7f1aP3c5dPP = C_f * FirstIn[7*StepIn] + C_a * FirstIn[1*StepIn] + C_c * FirstIn[3*StepIn] + C_d * FirstIn[5*StepIn];
	float Y7a1fM3d5cMP = C_a * FirstIn[7*StepIn] - C_f * FirstIn[1*StepIn] + C_d * FirstIn[3*StepIn] - C_c * FirstIn[5*StepIn];

	float Y04M   = FirstIn[0*StepIn] - FirstIn[4*StepIn];
	float Y2e6bM = C_e * FirstIn[2*StepIn] - C_b * FirstIn[6*StepIn];

	float Y04M2e6bMP = Y04M + Y2e6bM;
	float Y04M2e6bMM = Y04M - Y2e6bM;
	float Y1c7dM3f5aPM = C_c * FirstIn[1*StepIn] - C_d * FirstIn[7*StepIn] - C_f * FirstIn[3*StepIn] - C_a * FirstIn[5*StepIn];
	float Y1d7cP3a5fMM = C_d * FirstIn[1*StepIn] + C_c * FirstIn[7*StepIn] - C_a * FirstIn[3*StepIn] + C_f * FirstIn[5*StepIn];

	FirstOut[0*StepOut] = C_norm * (Y04P2b6ePP + Y7f1aP3c5dPP);
	FirstOut[7*StepOut] = C_norm * (Y04P2b6ePP - Y7f1aP3c5dPP);
	FirstOut[4*StepOut] = C_norm * (Y04P2b6ePM + Y7a1fM3d5cMP);
	FirstOut[3*StepOut] = C_norm * (Y04P2b6ePM - Y7a1fM3d5cMP);

	FirstOut[1*StepOut] = C_norm * (Y04M2e6bMP + Y1c7dM3f5aPM);
	FirstOut[5*StepOut] = C_norm * (Y04M2e6bMM - Y1d7cP3a5fMM);
	FirstOut[2*StepOut] = C_norm * (Y04M2e6bMM + Y1d7cP3a5fMM);
	FirstOut[6*StepOut] = C_norm * (Y04M2e6bMP - Y1c7dM3f5aPM);
}


/**
**************************************************************************
*  Performs 8x8 block-wise Forward Discrete Cosine Transform of the given 
*  image plane and outputs result to the plane of coefficients.
*  2nd version.
*
* \param fSrc		[IN] - Source image plane
* \param fDst		[OUT] - Destination coefficients plane
* \param Stride		[IN] - Stride of both planes
* \param Size		[IN] - Size of planes
*  
* \return None
*/
void computeDCT8x8Gold2(const float* fSrc, float* fDst, int Stride, ROI Size)
{
	for (int i = 0; i + BLOCK_SIZE - 1 < Size.height; i += BLOCK_SIZE)
	{
		for (int j = 0; j + BLOCK_SIZE - 1 < Size.width; j += BLOCK_SIZE)
		{
			//process rows
			for (int k = 0; k < BLOCK_SIZE; k++)
			{
				SubroutineDCTvector((float*)fSrc + (i+k) * Stride + j, 1, fDst + (i+k) * Stride + j, 1);
			}

			//process columns
			for (int k = 0; k < BLOCK_SIZE; k++)
			{
				SubroutineDCTvector(fDst + i * Stride + (j+k), Stride, fDst + i * Stride + (j+k), Stride);
			}
		}
	}
}


/**
**************************************************************************
*  Performs 8x8 block-wise Inverse Discrete Cosine Transform of the given 
*  coefficients plane and outputs result to the image plane
*  2nd version.
*
* \param fSrc		[IN] - Source coefficients plane
* \param fDst		[OUT] - Destination image plane
* \param Stride		[IN] - Stride of both planes
* \param Size		[IN] - Size of planes
*  
* \return None
*/
void computeIDCT8x8Gold2(const float* fSrc, float* fDst, int Stride, ROI Size)
{
	for (int i = 0; i + BLOCK_SIZE - 1 < Size.height; i += BLOCK_SIZE)
	{
		for (int j = 0; j + BLOCK_SIZE - 1 < Size.width; j += BLOCK_SIZE)
		{
			//process rows
			for (int k = 0; k < BLOCK_SIZE; k++)
			{
				SubroutineIDCTvector((float*)fSrc + (i+k) * Stride + j, 1, fDst + (i+k) * Stride + j, 1);
			}

			//process columns
			for (int k = 0; k < BLOCK_SIZE; k++)
			{
				SubroutineIDCTvector(fDst + i * Stride + (j+k), Stride, fDst + i * Stride + (j+k), Stride);
			}
		}
	}
}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -