⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cusvd.cu

📁 在nvidia G80以上GPU上进行奇异值分解的程序。
💻 CU
字号:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cutil.h>
#define  NUM    (512)		//NUM must be mutiple of 16 to obtain the best performance

#include "cuSVD_kernel.cu"

int main(int argc, char** argv)
{
    CUT_DEVICE_INIT();
	unsigned int num2 = NUM * NUM;
	unsigned int iteration = 2 * (NUM-1);
	    
    printf("\n");
    
    dim3 grid	(NUM>>1, 1, 1);
	dim3 threads(256, 1, 1);
	
	float * w;
	float * u;
	float * orign;
	float * unit;
	
	float * d_w;
	float * d_u;
	float * d_w_temp;
	float * d_u_temp;
	float * d_index;

	float index[NUM];
	for(int i = 0; i < NUM; i++)
	{
		if(i%2)
		{index[i] = i-2.0f;}
		else
		{index[i] = i+2.0f;}
	}
	index[0] = 0.0f;
	index[1] = 2.0f;
	index[NUM-2] = NUM-1;

	w = (float*)malloc(num2 * sizeof(float)); 
	u = (float*)malloc(num2 * sizeof(float));
	orign = (float*)malloc(num2 * sizeof(float));
	unit  = (float*)malloc(num2 * sizeof(float));
	
	FILE *fp;

    //if((fp=fopen("C:\\b.dat","rb"))==NULL)
	{
		printf("cannot open file\n");
	}
    for(int i = 0; i < NUM; i++)
    {
    for(int j = 0; j < NUM; j++)
    {
    //orign[i*NUM + j] = 0.001f * (j + 1.0f) + (i + 1.0f);
	orign[i*NUM + j] = (float)rand()/(float)RAND_MAX;
    //fread(&orign[i*NUM + j],sizeof(float),1,fp);
    w[i*NUM + j] = unit[i*NUM + j] = 0.0f;
    //printf("%3.3f	", orign[i*NUM + j]);
    }
    unit[i*NUM + i] = 1.0f;
    //printf("\n");
    }
    //fclose(fp);
    
	unsigned int timer = 0;
    float elapsedTimeInMs = 0.0f;
	CUDA_SAFE_CALL( cudaMalloc((void**)&d_w, sizeof(float) * num2));
	CUDA_SAFE_CALL( cudaMalloc((void**)&d_u, sizeof(float) * num2));
	CUDA_SAFE_CALL( cudaMalloc((void**)&d_w_temp, sizeof(float) * num2));
	CUDA_SAFE_CALL( cudaMalloc((void**)&d_u_temp, sizeof(float) * num2));
	CUDA_SAFE_CALL( cudaMalloc((void**)&d_index, sizeof(float) * NUM));

	CUDA_SAFE_CALL( cudaMemcpy(d_index, index, sizeof(float) * NUM, cudaMemcpyHostToDevice));
	CUDA_SAFE_CALL( cudaMemcpy(d_u, unit, sizeof(float) * num2, cudaMemcpyHostToDevice));
    CUDA_SAFE_CALL( cudaMemcpy(d_w, orign, sizeof(float) * num2, cudaMemcpyHostToDevice));

	CUT_SAFE_CALL( cutCreateTimer( &timer ) );
    CUT_SAFE_CALL( cutStartTimer( timer));

	for(int i=0;i<iteration;i++)
	{
	bjrot<<<grid, threads, 0>>>(d_w_temp, d_w, d_u_temp, d_u, d_index);
	bjrot<<<grid, threads, 0>>>(d_w, d_w_temp, d_u, d_u_temp, d_index);
	}
	
	CUT_SAFE_CALL( cutStopTimer( timer));
	elapsedTimeInMs = cutGetTimerValue( timer);		
	CUDA_SAFE_CALL( cudaMemcpy(u, d_u, sizeof(float) * num2, cudaMemcpyDeviceToHost));
	CUDA_SAFE_CALL( cudaMemcpy(w, d_w, sizeof(float) * num2, cudaMemcpyDeviceToHost));	

    CUDA_SAFE_CALL( cudaFree(d_w));
    CUDA_SAFE_CALL( cudaFree(d_u));
	CUDA_SAFE_CALL( cudaFree(d_w_temp));
	CUDA_SAFE_CALL( cudaFree(d_u_temp));
	CUDA_SAFE_CALL( cudaFree(d_index));

	float wi[NUM];
	float sorttemp;

    for(int i = 0; i < NUM; i ++)
		{
		wi[i]=0.0f;
		for( int j = 0; j < NUM; j++)
		{
		wi[i] +=  w[i*NUM + j] * w[i*NUM + j];
		}
		wi[i] = sqrt(wi[i]);
		}

	for(int i=0;i<NUM; i++)
		for(int j=0; j < NUM; j++)
			if(wi[i]> wi[j])
			{
				sorttemp = wi[i];
				wi[i] = wi[j];
				wi[j] = sorttemp;
			}

	//for(int i=0; i<NUM; i++)
	//printf("%f	", w[i]);

	for(int i=0; i<NUM; i++)
	printf("%f	", wi[i]);

	printf("\n");

	free(w);
	free(u);
	free(orign);
	free(unit);
	printf("\n");
	printf("%f", elapsedTimeInMs);
    CUT_EXIT(argc, argv);
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -