⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 computetime.cu

📁 基于NV的CUDA(计算统一构架)的一段程序
💻 CU
字号:
#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <cutil.h>

#define DATA_SIZE 1048576
#define THREAD_NUM 512
#define BLOCK_NUM 16

int data[DATA_SIZE];


__global__ static void sumOfSquares(int* num, int* result, clock_t* time)
{
    extern __shared__ int shared[];
	const int tid = threadIdx.x;
	const int bid = blockIdx.x;
    const int size = DATA_SIZE / THREAD_NUM;
    int i;
	shared[tid]=0;

    if(tid == 0) time[bid] = clock();
    for(i = bid*THREAD_NUM+tid ; i < DATA_SIZE; i+=BLOCK_NUM*THREAD_NUM) {
       shared[tid]+= num[i] * num[i];
    }
	__syncthreads();

	if(tid==0){
		for(i=1;i<THREAD_NUM;i++){
			shared[0]+=shared[i];
		}
		result[bid] = shared[0];
	}
	if(tid == 0) time[bid+BLOCK_NUM] = clock();
}

void GenerateNumber(int* number, int size)
{
	for(int i=0;i<size;i++){
	number[i]=rand()%10;
	}
}

int main(int argc, char** argv)
{
	int *gpudata,*result;
	clock_t *time;
	GenerateNumber(data,DATA_SIZE);
	cudaMalloc((void**)&gpudata,sizeof(int)*DATA_SIZE);
	cudaMalloc((void**)&result,sizeof(int)*BLOCK_NUM);
	cudaMalloc((void**)&time,sizeof(clock_t)*BLOCK_NUM*2);

	cudaMemcpy(gpudata, data, sizeof(int)*DATA_SIZE, cudaMemcpyHostToDevice);

	sumOfSquares<<<BLOCK_NUM,THREAD_NUM,THREAD_NUM*sizeof(int)>>>(gpudata,result,time);

	int sum[BLOCK_NUM];
	clock_t time_used[BLOCK_NUM*2];

 	cudaMemcpy(sum, result, sizeof(int)*BLOCK_NUM, cudaMemcpyDeviceToHost);
	cudaMemcpy(&time_used, time, sizeof(clock_t)*BLOCK_NUM*2, cudaMemcpyDeviceToHost);

	cudaFree(gpudata);
	cudaFree(result);
	cudaFree(time);

	int Final_Sum=0;
	for(int i=0;i<BLOCK_NUM;i++)
	{
		Final_Sum+=sum[i];
	}

	clock_t min_time=time_used[0];
	clock_t max_time=time_used[BLOCK_NUM];
	for(int i=0;i<BLOCK_NUM;i++)
	{
		if(min_time>time_used[i]) min_time=time_used[i];
		if(max_time<time_used[i+BLOCK_NUM]) max_time=time_used[i+BLOCK_NUM];
	}

	printf("sum:%d \n",Final_Sum);
	printf("time_used:%ld \n",max_time-min_time);

	Final_Sum = 0;
    for(int i = 0; i < DATA_SIZE; i++) {
        Final_Sum += data[i] * data[i];
    }
    printf("sum (CPU): %d\n", Final_Sum);

	CUT_EXIT(argc, argv);
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -