📄 imagedenoising.cu
字号:
/*
* Copyright 1993-2007 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws. Users and possessors of this source code
* are hereby granted a nonexclusive, royalty-free license to use this code
* in individual and commercial software.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*
* Any use of this source code in individual and commercial software must
* include, in the user documentation and internal comments to the code,
* the above Disclaimer and U.S. Government End Users Notice.
*/
/*
* This sample demonstrates two adaptive image denoising technqiues:
* KNN and NLM, based on computation of both geometric and color distance
* between texels. While both techniques are already implemented in the
* DirectX SDK using shaders, massively speeded up variation
* of the latter techique, taking advantage of shared memory, is implemented
* in addition to DirectX counterparts.
* See supplied whitepaper for more explanations.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <GL/glew.h>
#include <GL/glut.h>
#include <cuda_gl_interop.h>
#include <cutil.h>
#include <cutil_gl_error.h>
typedef unsigned int TColor;
////////////////////////////////////////////////////////////////////////////////
// Small BMP loading utility
////////////////////////////////////////////////////////////////////////////////
extern "C" void LoadBMPFile(uchar4 **, int *, int *, const char *);
////////////////////////////////////////////////////////////////////////////////
// Filter configuration
////////////////////////////////////////////////////////////////////////////////
#define KNN_WINDOW_RADIUS 3
#define NLM_WINDOW_RADIUS 3
#define NLM_BLOCK_RADIUS 3
#define KNN_WINDOW_AREA ( (2 * KNN_WINDOW_RADIUS + 1) * (2 * KNN_WINDOW_RADIUS + 1) )
#define NLM_WINDOW_AREA ( (2 * NLM_WINDOW_RADIUS + 1) * (2 * NLM_WINDOW_RADIUS + 1) )
#define INV_KNN_WINDOW_AREA ( 1.0f / (float)KNN_WINDOW_AREA )
#define INV_NLM_WINDOW_AREA ( 1.0f / (float)NLM_WINDOW_AREA )
#define KNN_WEIGHT_THRESHOLD 0.02f
#define KNN_LERP_THRESHOLD 0.79f
#define NLM_WEIGHT_THRESHOLD 0.10f
#define NLM_LERP_THRESHOLD 0.10f
////////////////////////////////////////////////////////////////////////////////
// Helper functions
////////////////////////////////////////////////////////////////////////////////
float Max(float x, float y){
return (x > y) ? x : y;
}
float Min(float x, float y){
return (x < y) ? x : y;
}
int iDivUp(int a, int b){
return ((a % b) != 0) ? (a / b + 1) : (a / b);
}
__device__ float lerpf(float a, float b, float c){
return a + (b - a) * c;
}
__device__ float vecLen(float4 a, float4 b){
return (
(b.x - a.x) * (b.x - a.x) +
(b.y - a.y) * (b.y - a.y) +
(b.z - a.z) * (b.z - a.z)
);
}
__device__ TColor make_color(float r, float g, float b, float a){
return
((int)(a * 255.0f) << 24) |
((int)(b * 255.0f) << 16) |
((int)(g * 255.0f) << 8) |
((int)(r * 255.0f) << 0);
}
////////////////////////////////////////////////////////////////////////////////
// Global data handlers and parameters
////////////////////////////////////////////////////////////////////////////////
//Texture reference and channel descriptor for image texture
texture<uchar4, 2, cudaReadModeNormalizedFloat> texImage;
cudaChannelFormatDesc uchar4tex = cudaCreateChannelDesc<uchar4>();
//OpenGL PBO and texture "names"
GLuint gl_PBO, gl_Tex;
//Source image on the host side
uchar4 *h_Src;
//CUDA array descriptor
cudaArray *a_Src;
//Original image width and height
int imageW, imageH;
////////////////////////////////////////////////////////////////////////////////
// Filtering kernels
////////////////////////////////////////////////////////////////////////////////
#define BLOCKDIM_X 8
#define BLOCKDIM_Y 8
#include "imageDenoising_copy_kernel.cu"
#include "imageDenoising_knn_kernel.cu"
#include "imageDenoising_nlm_kernel.cu"
#include "imageDenoising_nlm2_kernel.cu"
////////////////////////////////////////////////////////////////////////////////
// Main program
////////////////////////////////////////////////////////////////////////////////
int g_Kernel = 0;
bool g_FPS = false;
bool g_Diag = false;
unsigned int hTimer;
//Algorithms global parameters
const float noiseStep = 0.025f;
const float lerpStep = 0.025f;
static float knnNoise = 0.32f;
static float nlmNoise = 1.45f;
static float lerpC = 0.2f;
const int frameN = 24;
int frameCounter = 0;
#define BUFFER_DATA(i) ((char *)0 + i)
void displayFunc(void){
TColor *d_dst = NULL;
if(frameCounter++ == 0) cutResetTimer(hTimer);
CUDA_SAFE_CALL( cudaGLMapBufferObject((void**)&d_dst, gl_PBO) );
CUDA_SAFE_CALL( cudaBindTextureToArray(texImage, a_Src) );
dim3 threads(BLOCKDIM_X, BLOCKDIM_Y);
dim3 grid(iDivUp(imageW, BLOCKDIM_X), iDivUp(imageH, BLOCKDIM_Y));
switch(g_Kernel){
case 0:
Copy<<<grid, threads>>>(d_dst, imageW, imageH);
break;
case 1:
if(!g_Diag)
KNN<<<grid, threads>>>(d_dst, imageW, imageH, 1.0f / (knnNoise * knnNoise), lerpC);
else
KNNdiag<<<grid, threads>>>(d_dst, imageW, imageH, 1.0f / (knnNoise * knnNoise), lerpC);
break;
case 2:
if(!g_Diag)
NLM<<<grid, threads>>>(d_dst, imageW, imageH, 1.0f / (nlmNoise * nlmNoise), lerpC);
else
NLMdiag<<<grid, threads>>>(d_dst, imageW, imageH, 1.0f / (nlmNoise * nlmNoise), lerpC);
break;
case 3:
if(!g_Diag)
NLM2<<<grid, threads>>>(d_dst, imageW, imageH, 1.0f / (nlmNoise * nlmNoise), lerpC);
else
NLM2diag<<<grid, threads>>>(d_dst, imageW, imageH, 1.0f / (nlmNoise * nlmNoise), lerpC);
break;
}
CUT_CHECK_ERROR("Filtering kernel execution failed.\n");
CUDA_SAFE_CALL( cudaUnbindTexture(texImage) );
CUDA_SAFE_CALL( cudaGLUnmapBufferObject(gl_PBO) );
glTexSubImage2D( GL_TEXTURE_2D, 0, 0, 0, imageW, imageH, GL_RGBA, GL_UNSIGNED_BYTE, BUFFER_DATA(0) );
glBegin(GL_TRIANGLES);
glTexCoord2f(0, 0); glVertex2f(-1, -1);
glTexCoord2f(2, 0); glVertex2f(+3, -1);
glTexCoord2f(0, 2); glVertex2f(-1, +3);
glEnd();
glFinish();
if(frameCounter == frameN){
frameCounter = 0;
if(g_FPS){
printf("FPS: %3.1f\n", frameN / (cutGetTimerValue(hTimer) * 0.001) );
g_FPS = false;
}
}
}
void shutDown(unsigned char k, int x, int y){
switch (k){
case '\033':
case 'q':
case 'Q':
printf("Shutting down...\n");
CUT_SAFE_CALL( cutStopTimer(hTimer) );
CUT_SAFE_CALL( cutDeleteTimer(hTimer) );
CUDA_SAFE_CALL( cudaGLUnregisterBufferObject(gl_PBO) );
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
glDeleteBuffers(1, &gl_PBO);
glDeleteTextures(1, &gl_Tex);
CUDA_SAFE_CALL( cudaFreeArray(a_Src) );
free(h_Src);
printf("Shutdown done.\n");
exit(0);
break;
case '1':
printf("Passthrough.\n");
g_Kernel = 0;
break;
case '2':
printf("KNN method \n");
g_Kernel = 1;
break;
case '3':
printf("NLM method\n");
g_Kernel = 2;
break;
case '4':
printf("Quick NLM(NLM2) method\n");
g_Kernel = 3;
break;
case ' ':
printf(g_Diag ? "LERP highlighting mode.\n" : "Normal mode.\n");
g_Diag = !g_Diag;
break;
case 'n':
printf("Decrease noise level.\n");
knnNoise -= noiseStep;
nlmNoise -= noiseStep;
break;
case 'N':
printf("Increase noise level.\n");
knnNoise += noiseStep;
nlmNoise += noiseStep;
break;
case 'l':
printf("Decrease LERP quotent.\n");
lerpC = Max(lerpC - lerpStep, 0.0f);
break;
case 'L':
printf("Increase LERP quotent.\n");
lerpC = Min(lerpC + lerpStep, 1.0f);
break;
case 'f' : case 'F':
g_FPS = true;
break;
case '?':
printf("lerpC = %5.5f\n", lerpC);
printf("knnNoise = %5.5f\n", knnNoise);
printf("nlmNoise = %5.5f\n", nlmNoise);
break;
}
}
int main(int argc, char **argv){
CUT_DEVICE_INIT();
printf("Allocating host and CUDA memory and loading image file...\n");
const char *image_path = cutFindFilePath("portrait_noise.bmp", argv[0]);
LoadBMPFile(&h_Src, &imageW, &imageH, image_path);
CUDA_SAFE_CALL( cudaMallocArray(&a_Src, &uchar4tex, imageW, imageH) );
CUDA_SAFE_CALL( cudaMemcpyToArray(
a_Src, 0, 0,
h_Src, imageW * imageH * sizeof(uchar4),
cudaMemcpyHostToDevice
)
);
printf("Data init done.\n");
printf("Initializing GLUT...\n");
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_RGBA | GLUT_SINGLE);
glutInitWindowSize(imageW, imageH);
glutInitWindowPosition(512 - imageW / 2, 384 - imageH / 2);
glutCreateWindow(argv[0]);
printf("Loading extensions: %s\n", glewGetErrorString(glewInit()));
if(!glewIsSupported(
"GL_VERSION_2_0 "
"GL_ARB_pixel_buffer_object "
"GL_EXT_framebuffer_object "
)){
fprintf(stderr, "ERROR: Support for necessary OpenGL extensions missing.");
fflush(stderr);
return CUTFalse;
}
printf("OpenGL window created.\n");
printf("Creating GL texture...\n");
glEnable(GL_TEXTURE_2D);
glGenTextures(1, &gl_Tex);
glBindTexture(GL_TEXTURE_2D, gl_Tex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, imageW, imageH, 0, GL_RGBA, GL_UNSIGNED_BYTE, h_Src);
printf("Texture created.\n");
printf("Creating PBO...\n");
glGenBuffers(1, &gl_PBO);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, gl_PBO);
glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, imageW * imageH * 4, h_Src, GL_STREAM_COPY);
//While a PBO is registered to CUDA, it can't be used
//as the destination for OpenGL drawing calls.
//But in our particular case OpenGL is only used
//to display the content of the PBO, specified by CUDA kernels,
//so we need to register/unregister it only once.
CUDA_SAFE_CALL( cudaGLRegisterBufferObject(gl_PBO) );
CUT_CHECK_ERROR_GL();
printf("PBO created.\n");
printf("Starting GLUT main loop...\n");
printf("Press [1] to view noisy image\n");
printf("Press [2] to view image restored with knn filter\n");
printf("Press [3] to view image restored with nlm filter\n");
printf("Press [4] to view image restored with modified nlm filter\n");
printf("Press [ ] to view smooth/edgy areas [RED/BLUE] Ct's\n");
printf("Press [f] to print frame rate\n");
printf("Press [?] to print Noise and Lerp Ct's\n");
printf("Press [q] to exit\n");
glutIdleFunc(displayFunc);
glutDisplayFunc(displayFunc);
glutKeyboardFunc(shutDown);
CUT_SAFE_CALL( cutCreateTimer(&hTimer) );
CUT_SAFE_CALL( cutStartTimer(hTimer) );
glutMainLoop();
CUT_EXIT(argc, argv);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -