📄 dct8x8_gold.cpp

📁 cuda开发环境下的矩阵运算
💻 CPP
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*
 * Copyright 1993-2007 NVIDIA Corporation.  All rights reserved.
 *
 * NOTICE TO USER:
 *
 * This source code is subject to NVIDIA ownership rights under U.S. and
 * international Copyright laws.  Users and possessors of this source code
 * are hereby granted a nonexclusive, royalty-free license to use this code
 * in individual and commercial software.
 *
 * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
 * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
 * IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
 * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
 * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
 * OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
 * OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
 * OR PERFORMANCE OF THIS SOURCE CODE.
 *
 * U.S. Government End Users.   This source code is a "commercial item" as
 * that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
 * "commercial computer  software"  and "commercial computer software
 * documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
 * and is provided to the U.S. Government only as a commercial end item.
 * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
 * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
 * source code with only those rights set forth herein.
 *
 * Any use of this source code in individual and commercial software must
 * include, in the user documentation and internal comments to the code,
 * the above Disclaimer and U.S. Government End Users Notice.
 */

/**
**************************************************************************
* \file DCT8x8_Gold.cpp
* \brief Contains DCT, IDCT and quantization routines, used in JPEG internal 
* data processing. Host code.
*
* This sample implements forward and inverse Discrete Cosine Transform to blocks
* of image pixels (of 8x8 size), as in JPEG standard. The data processing is done
* using floating point representation.
* The routine that performs quantization of coefficients is also included.
*/

#include "Common.h"


/**
*  This unitary matrix performs DCT of rows of the matrix to the left
*/
const float DCTv8matrix[BLOCK_SIZE2] = {
	0.3535533905932738f,  0.4903926402016152f,  0.4619397662556434f,  0.4157348061512726f,  0.3535533905932738f,  0.2777851165098011f,  0.1913417161825449f,  0.0975451610080642f, 
	0.3535533905932738f,  0.4157348061512726f,  0.1913417161825449f, -0.0975451610080641f, -0.3535533905932737f, -0.4903926402016152f, -0.4619397662556434f, -0.2777851165098011f, 
	0.3535533905932738f,  0.2777851165098011f, -0.1913417161825449f, -0.4903926402016152f, -0.3535533905932738f,  0.0975451610080642f,  0.4619397662556433f,  0.4157348061512727f, 
	0.3535533905932738f,  0.0975451610080642f, -0.4619397662556434f, -0.2777851165098011f,  0.3535533905932737f,  0.4157348061512727f, -0.1913417161825450f, -0.4903926402016153f, 
	0.3535533905932738f, -0.0975451610080641f, -0.4619397662556434f,  0.2777851165098009f,  0.3535533905932738f, -0.4157348061512726f, -0.1913417161825453f,  0.4903926402016152f, 
	0.3535533905932738f, -0.2777851165098010f, -0.1913417161825452f,  0.4903926402016153f, -0.3535533905932733f, -0.0975451610080649f,  0.4619397662556437f, -0.4157348061512720f, 
	0.3535533905932738f, -0.4157348061512727f,  0.1913417161825450f,  0.0975451610080640f, -0.3535533905932736f,  0.4903926402016152f, -0.4619397662556435f,  0.2777851165098022f, 
	0.3535533905932738f, -0.4903926402016152f,  0.4619397662556433f, -0.4157348061512721f,  0.3535533905932733f, -0.2777851165098008f,  0.1913417161825431f, -0.0975451610080625f
};


/**
*  This unitary matrix performs DCT of columns of the matrix to the right
*/
const float DCTv8matrixT[BLOCK_SIZE2] = {
	0.3535533905932738f,  0.3535533905932738f,  0.3535533905932738f,  0.3535533905932738f,  0.3535533905932738f,  0.3535533905932738f,  0.3535533905932738f,  0.3535533905932738f, 
	0.4903926402016152f,  0.4157348061512726f,  0.2777851165098011f,  0.0975451610080642f, -0.0975451610080641f, -0.2777851165098010f, -0.4157348061512727f, -0.4903926402016152f, 
	0.4619397662556434f,  0.1913417161825449f, -0.1913417161825449f, -0.4619397662556434f, -0.4619397662556434f, -0.1913417161825452f,  0.1913417161825450f,  0.4619397662556433f, 
	0.4157348061512726f, -0.0975451610080641f, -0.4903926402016152f, -0.2777851165098011f,  0.2777851165098009f,  0.4903926402016153f,  0.0975451610080640f, -0.4157348061512721f, 
	0.3535533905932738f, -0.3535533905932737f, -0.3535533905932738f,  0.3535533905932737f,  0.3535533905932738f, -0.3535533905932733f, -0.3535533905932736f,  0.3535533905932733f, 
	0.2777851165098011f, -0.4903926402016152f,  0.0975451610080642f,  0.4157348061512727f, -0.4157348061512726f, -0.0975451610080649f,  0.4903926402016152f, -0.2777851165098008f, 
	0.1913417161825449f, -0.4619397662556434f,  0.4619397662556433f, -0.1913417161825450f, -0.1913417161825453f,  0.4619397662556437f, -0.4619397662556435f,  0.1913417161825431f, 
	0.0975451610080642f, -0.2777851165098011f,  0.4157348061512727f, -0.4903926402016153f,  0.4903926402016152f, -0.4157348061512720f,  0.2777851165098022f, -0.0975451610080625f
};


/**
*  JPEG quality=0_of_12 quantization matrix
*/
float Q[BLOCK_SIZE2] = {
	32.f,  33.f,  51.f,  81.f,  66.f,  39.f,  34.f,  17.f,
	33.f,  36.f,  48.f,  47.f,  28.f,  23.f,  12.f,  12.f,
	51.f,  48.f,  47.f,  28.f,  23.f,  12.f,  12.f,  12.f,
	81.f,  47.f,  28.f,  23.f,  12.f,  12.f,  12.f,  12.f,
	66.f,  28.f,  23.f,  12.f,  12.f,  12.f,  12.f,  12.f,
	39.f,  23.f,  12.f,  12.f,  12.f,  12.f,  12.f,  12.f,
	34.f,  12.f,  12.f,  12.f,  12.f,  12.f,  12.f,  12.f,
	17.f,  12.f,  12.f,  12.f,  12.f,  12.f,  12.f,  12.f 
};


/**
**************************************************************************
*  Performs multiplication of two 8x8 matrices
*
* \param M1				[IN] - Pointer to the first matrix
* \param M1Stride		[IN] - Stride of the first matrix
* \param M2				[IN] - Pointer to the second matrix
* \param M2Stride		[IN] - Stride of the second matrix
* \param Mres			[OUT] - Pointer to the result matrix
* \param MresStride		[IN] - Stride of the result matrix
*  
* \return None
*/
void mult8x8(const float* M1, int M1Stride, const float* M2, int M2Stride, float* Mres, int MresStride)
{
	for (int i = 0; i < BLOCK_SIZE; i++)
	{
		for (int j = 0; j < BLOCK_SIZE; j++)
		{
			float accumul = 0;
			for (int k = 0; k < BLOCK_SIZE; k++)
			{
				accumul += M1[i*M1Stride+k] * M2[k*M2Stride+j];
			}
			Mres[i*MresStride+j] = accumul;
		}
	}
}


/**
**************************************************************************
*  Performs 8x8 block-wise Forward Discrete Cosine Transform of the given 
*  image plane and outputs result to the plane of coefficients.
*  1st version.
*
* \param fSrc		[IN] - Source image plane
* \param fDst		[OUT] - Destination coefficients plane
* \param Stride		[IN] - Stride of both planes
* \param Size		[IN] - Size of planes
*  
* \return None
*/
void computeDCT8x8Gold1(const float* fSrc, float* fDst, int Stride, ROI Size)
{
	float tmpblock[BLOCK_SIZE2];

	//perform block wise DCT
	//DCT(A) = DCTv8matrixT * A * DCTv8matrix
	for (int i = 0; i + BLOCK_SIZE - 1 < Size.height; i += BLOCK_SIZE)
	{
		for (int j = 0; j + BLOCK_SIZE - 1 < Size.width; j += BLOCK_SIZE)
		{
			// tmpblock = DCTv8matrixT * A
			mult8x8(DCTv8matrixT, BLOCK_SIZE, fSrc + i * Stride + j, Stride, tmpblock, BLOCK_SIZE);
			// DCT(A) = tmpblock * DCTv8matrix
			mult8x8(tmpblock, BLOCK_SIZE, DCTv8matrix, BLOCK_SIZE, fDst + i * Stride + j, Stride);
		}
	}
}


/**
**************************************************************************
*  Performs 8x8 block-wise Inverse Discrete Cosine Transform of the given 
*  coefficients plane and outputs result to the image plane.
*  1st version.
*
* \param fSrc		[IN] - Source coefficients plane
* \param fDst		[OUT] - Destination image plane
* \param Stride		[IN] - Stride of both planes
* \param Size		[IN] - Size of planes
*  
* \return None
*/
void computeIDCT8x8Gold1(const float* fSrc, float* fDst, int Stride, ROI Size)
{
	float tmpblock[BLOCK_SIZE2];

	//perform block wise IDCT
	//IDCT(A) = DCTv8matrix * A * DCTv8matrixT
	for (int i = 0; i + BLOCK_SIZE - 1 < Size.height; i += BLOCK_SIZE)
	{
		for (int j = 0; j + BLOCK_SIZE - 1 < Size.width; j += BLOCK_SIZE)
		{
			// tmpblock = DCTv8matrix * A
			mult8x8(DCTv8matrix, BLOCK_SIZE, fSrc + i * Stride + j, Stride, tmpblock, BLOCK_SIZE);
			// DCT(A) = tmpblock * DCTv8matrixT;
			mult8x8(tmpblock, BLOCK_SIZE, DCTv8matrixT, BLOCK_SIZE, fDst + i * Stride + j, Stride);
		}
	}
}


/**
**************************************************************************
*  Performs in-place quantization of given coefficients plane using
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -