⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 oceanfft.cu

📁 采用GPU通用计算API实现快速傅立叶变换
💻 CU
📖 第 1 页 / 共 2 页
字号:
/*
 * Copyright 1993-2007 NVIDIA Corporation.  All rights reserved.
 *
 * NOTICE TO USER:
 *
 * This source code is subject to NVIDIA ownership rights under U.S. and
 * international Copyright laws.  Users and possessors of this source code
 * are hereby granted a nonexclusive, royalty-free license to use this code
 * in individual and commercial software.
 *
 * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
 * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
 * IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
 * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
 * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
 * OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
 * OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
 * OR PERFORMANCE OF THIS SOURCE CODE.
 *
 * U.S. Government End Users.   This source code is a "commercial item" as
 * that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
 * "commercial computer  software"  and "commercial computer software
 * documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
 * and is provided to the U.S. Government only as a commercial end item.
 * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
 * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
 * source code with only those rights set forth herein.
 *
 * Any use of this source code in individual and commercial software must
 * include, in the user documentation and internal comments to the code,
 * the above Disclaimer and U.S. Government End Users Notice.
 */

/* 
    FFT-based Ocean simulation
    based on original code by Yury Uralsky

    This sample demonstrates how to use CUFFT to synthesize and
    render an ocean surface in real-time.

    See Jerry Tessendorf's Siggraph course notes for more details:
    http://www.finelightvisualtechnology.com/pages/coursematerials.php    

    It also serves as an example of how to generate multiple vertex
    buffer streams from CUDA and render them using GLSL shaders.
*/

#ifdef _WIN32
#  define WINDOWS_LEAN_AND_MEAN
#  define NOMINMAX
#  include <windows.h>
#endif

// includes
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <GL/glew.h>
#include <cutil.h>
#include <cutil_gl_error.h>
#include <cuda_gl_interop.h>
#include <cufft.h>
#include <math_constants.h>

#if defined(__APPLE__) || defined(MACOSX)
#include <GLUT/glut.h>
#else
#include <GL/glut.h>
#endif

////////////////////////////////////////////////////////////////////////////////
// constants
unsigned int windowW = 512, windowH = 512;
const unsigned int meshW = 256,  meshH = 256;

unsigned int fftInputW, fftInputH;
unsigned int fftInputSize;

// OpenGL vertex buffers
GLuint posVertexBuffer;
GLuint heightVertexBuffer, slopeVertexBuffer;
GLuint indexBuffer;
GLuint shaderProg;
char* vertShaderPath = 0, *fragShaderPath = 0;

// mouse controls
int mouseOldX, mouseOldY;
int mouseButtons = 0;
float rotateX = 20.0f, rotateY = 0.0f;
float translateX = 0.0f, translateY = 0.0f, translateZ = -2.0f;

bool animate = true;
bool drawPoints = false;
bool wireFrame = false;

// FFT data
cufftHandle fftPlan;
float2 *d_h0 = 0, *d_ht = 0;
float2 *h_h0 = 0;
float2 *d_slope = 0;

// simulation parameters
const float g = 9.81;		        // gravitational constant
const float A = 2*.00000000775f;	// wave scale factor
const float patchSize = 100;        // patch size
float windSpeed = 10.0f;
float windDir = CUDART_PI_F/3.0f;

unsigned int timer;
float animTime = 0.0f;
float prevTime = 0.0f;
float animationRate = -0.0005f;

////////////////////////////////////////////////////////////////////////////////
// kernels
#include <oceanFFT_kernel.cu>

////////////////////////////////////////////////////////////////////////////////
// forward declarations
void runTest(int argc, char** argv);

// GL functionality
CUTBoolean initGL();
void createVBO(GLuint* vbo, int size);
void deleteVBO(GLuint* vbo);
void createMeshIndexBuffer(GLuint *id, int w, int h);
void createMeshPositionVBO(GLuint *id, int w, int h);
GLuint loadGLSLProgram(const char *vertFileName, const char *fragFileName);

// rendering callbacks
void display();
void keyboard(unsigned char key, int x, int y);
void mouse(int button, int state, int x, int y);
void motion(int x, int y);
void reshape(int w, int h);
void idle();

// Cuda functionality
void runCuda();
void generate_h0();
void generateFftInput();

////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char** argv)
{
    printf("CUDA FFT Ocean\n\n"
           "Left mouse button          - rotate\n"
           "Middle mouse button        - pan\n"
           "Left + middle mouse button - zoom\n"
           "'w' key                    - toggle wireframe\n");

    runTest(argc, argv);

    CUT_EXIT(argc, argv);
}

////////////////////////////////////////////////////////////////////////////////
//! Run test
////////////////////////////////////////////////////////////////////////////////
void runTest(int argc, char** argv)
{
    // Cuda init
    CUT_DEVICE_INIT(argc, argv);

    // create FFT plan
    CUFFT_SAFE_CALL(cufftPlan2d(&fftPlan, meshW, meshH, CUFFT_C2R) );

    // allocate memory
    fftInputW = (meshW / 2)+1;
    fftInputH = meshH;
    fftInputSize = (fftInputW*fftInputH)*sizeof(float2);

    CUDA_SAFE_CALL(cudaMalloc((void **)&d_h0, fftInputSize) );
    CUDA_SAFE_CALL(cudaMalloc((void **)&d_ht, fftInputSize) );
    h_h0 = (float2 *) malloc(fftInputSize);
    generate_h0();
    CUDA_SAFE_CALL(cudaMemcpy(d_h0, h_h0, fftInputSize, cudaMemcpyHostToDevice) );

    CUDA_SAFE_CALL(cudaMalloc((void **)&d_slope, meshW*meshH*sizeof(float2)) );

    cutCreateTimer(&timer);
    cutStartTimer(timer);
    prevTime = cutGetTimerValue(timer);

    // Create GL context
    glutInit(&argc, argv);
    glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE | GLUT_DEPTH);
    glutInitWindowSize(windowW, windowH);
    glutCreateWindow("CUDA FFT Ocean Simulation");

    vertShaderPath = cutFindFilePath("ocean.vert", argv[0]);
    fragShaderPath = cutFindFilePath("ocean.frag", argv[0]);
    if (vertShaderPath == 0 || fragShaderPath == 0) {
        fprintf(stderr, "Error finding shader files!\n");
        exit(EXIT_FAILURE);
    }

    // initialize GL
    if(CUTFalse == initGL()) {
        return;
    }

    // create vertex buffers and register with CUDA
    createVBO(&heightVertexBuffer, meshW*meshH*sizeof(float));
    CUDA_SAFE_CALL(cudaGLRegisterBufferObject(heightVertexBuffer));

    createVBO(&slopeVertexBuffer, meshW*meshH*sizeof(float2));
    CUDA_SAFE_CALL(cudaGLRegisterBufferObject(slopeVertexBuffer));

    // create vertex and index buffer for mesh
    createMeshPositionVBO(&posVertexBuffer, meshW, meshH);
    createMeshIndexBuffer(&indexBuffer, meshW, meshH);

    runCuda();

    // register callbacks
    glutDisplayFunc(display);
    glutKeyboardFunc(keyboard);
    glutMouseFunc(mouse);
    glutMotionFunc(motion);
    glutReshapeFunc(reshape);
    glutIdleFunc(idle);

    // start rendering mainloop
    glutMainLoop();
}

// Phillips spectrum
// Vdir - wind angle in radians
// V - wind speed
float phillips(float Kx, float Ky, float Vdir, float V, float A)
{
    float k_squared = Kx * Kx + Ky * Ky;
    float k_x = Kx / sqrtf(k_squared);
    float k_y = Ky / sqrtf(k_squared);
    float L = V * V / g;
    float w_dot_k = k_x * cosf(Vdir) + k_y * sinf(Vdir);

    if (k_squared == 0) return 0;

    return A * expf(-1.0 / (k_squared * L * L)) / (k_squared * k_squared) * w_dot_k * w_dot_k;
}

// Generate base heightfield in frequency space
void generate_h0()
{
    for (unsigned int y = 0; y<fftInputH; y++) {
        for (unsigned int x = 0; x<fftInputW; x++) {
            float kx = CUDART_PI_F * x / (float) patchSize;
            float ky = 2.0f * CUDART_PI_F * y / (float) patchSize;

            // note - these random numbers should be from a Gaussian distribution really
            float Er = 2.0f * rand() / (float) RAND_MAX - 1.0f;
            float Ei = 2.0f * rand() / (float) RAND_MAX - 1.0f;

            float P = sqrt(phillips(kx, ky, windDir, windSpeed, A));  

            float h0_re = 1.0f / sqrtf(2.0f) * Er * P;
            float h0_im = 1.0f / sqrtf(2.0f) * Ei * P;

            int i = y*fftInputW+x;
            h_h0[i].x = h0_re;
            h_h0[i].y = h0_im;
        }
    }
}

//Round a / b to nearest higher integer value
int iDivUp(int a, int b)
{
    return (a + (b - 1)) / b;
}

////////////////////////////////////////////////////////////////////////////////
//! Run the Cuda kernels
////////////////////////////////////////////////////////////////////////////////
void runCuda()
{
    // generate wave spectrum in frequency domain
    dim3 block(8, 8, 1);
    dim3 grid(iDivUp(fftInputW, block.x), iDivUp(fftInputH, block.y), 1);
    generateSpectrumKernel<<<grid, block>>>(d_h0, d_ht, fftInputW, fftInputH, animTime, patchSize);

    // execute inverse FFT to convert to spatial domain
    float *hptr;
    CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&hptr, heightVertexBuffer));
    CUFFT_SAFE_CALL( cufftExecC2R(fftPlan, (cufftComplex *) d_ht, hptr) );

    // calculate slope for shading
    float2 *sptr;
    CUDA_SAFE_CALL(cudaGLMapBufferObject((void**)&sptr, slopeVertexBuffer));
    dim3 grid2(iDivUp(meshW, block.x), iDivUp(meshH, block.y), 1);
    calculateSlopeKernel<<<grid2, block>>>(hptr, sptr, meshW, meshH);

    CUDA_SAFE_CALL(cudaGLUnmapBufferObject(slopeVertexBuffer));
    CUDA_SAFE_CALL(cudaGLUnmapBufferObject(heightVertexBuffer));
}

////////////////////////////////////////////////////////////////////////////////
//! Display callback
////////////////////////////////////////////////////////////////////////////////
void display()
{
    // run CUDA kernel to generate vertex positions
    if (animate) {
        runCuda();
    }

    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

    // set view matrix
    glMatrixMode(GL_MODELVIEW);
    glLoadIdentity();

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -