⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pmlseg.br

📁 用于GPU通用计算的编程语言BrookGPU 0.4
💻 BR
字号:
// This is code based of the work on Sherbondy et al. that does a// Perona Malik segmentation across an image.#include <stdio.h>#include <stdlib.h>#include "main.h"#include "PMLSeg.h"//packed version (float4 in/out)kernel void test_iter( float4 img[][], float diff_strength,                       float inv_grad_cutoff, float loww, float highw,                       iter float2 it0<>, iter float2 it1<>,                        iter float2 it2<>, iter float2 it3<>,                        iter float2 it4<>, out float4 o_img<> ) {    o_img.x = it0.x;    o_img.y = it0.y;    o_img.z = it1.x;    o_img.w = it1.y;}//packed version (float4 in/out)kernel void process_img_pack( float4 img[][], float diff_strength,                              float inv_grad_cutoff, float loww, float highw,                              iter float2 it0<>, iter float2 it1<>,                               iter float2 it2<>, iter float2 it3<>,                               iter float2 it4<>, out float4 o_img<> ) {    float e = 2.7182817f;    float4 R0;    float4 R1;    float4 R2;    float4 R3;    float4 R4;    float4 R5;    float4 R6;    float4 R7;    float4 R8;    R0 = img[it0];    R1 = img[it1]; //-1,0 West    R2 = img[it2]; //+1,0 East    R3 = img[it3]; //0,-1 South    R4 = img[it4]; //0,+1 North    //Image differentials    R6.x = (R3.x - R0.x); //-d1s    R6.y = (R1.z - R0.x); //-d1w    R6.z = (R4.x - R0.x); //d1n    R6.w = (R0.z - R0.x); //d1e    R7.x = (R3.z - R0.z); //-d2s    R7.y = (R0.x - R0.z); //-d2w    R7.z = (R4.z - R0.z); //d2n    R7.w = (R2.x - R0.z); //d2e	    //Segmentation differentials    R5.x = (R3.y - R0.y);    R5.y = (R1.w - R0.y);    R5.z = (R4.y - R0.y);	    R5.w = (R0.w - R0.y);    R5 = R5*diff_strength;    R8.x = (R3.w - R0.w);	    R8.y = (R0.y - R0.w);	    R8.z = (R4.w - R0.w);    R8.w = (R2.y - R0.w);    R8 = R8*diff_strength;    R6 = R6*R6*inv_grad_cutoff;    R7 = R7*R7*inv_grad_cutoff;    R6.x = pow(e, -R6.x);    R6.y = pow(e, -R6.y);    R6.z = pow(e, -R6.z);    R6.w = pow(e, -R6.w);    R7.x = pow(e, -R7.x);    R7.y = pow(e, -R7.y);    R7.z = pow(e, -R7.z);    R7.w = pow(e, -R7.w);    R6 = dot(R6, R5);    R7 = dot(R7, R8);    R0.y = R0.y+R6.x;    R0.w = R0.w+R7.x;    R4.y = R0.x - loww;    o_img.y = R4.y < 0 ? 0.0 : R0.y;    R4.y = R0.x - highw;    o_img.y = R4.y < 0 ? R0.y : 0.0;    R4.y = R0.z - loww;    o_img.w = R4.y < 0 ? 0.0 : R0.w;    R4.y = R0.z - highw;    o_img.w = R4.y < 0 ? R0.w : 0.0;    o_img.x = R0.x;    o_img.z = R0.z;}int PMLSeg_main(int argc, char* argv[]) {    float* output = NULL;    float* input = NULL;    int i, j;    float time_in_ms = 0.0f;    int xsize, ysize, loop_count;    float xsizef, ysizef;    float diff_strength = 0.20f;    float inv_grad_cutoff = 0.0f;//0.025f;    float loww = 0.0f;//1200./65535.;    float highw = 1.0f;//2000./65535.0f;    if(argc < 4){        char buf[255];        fprintf(stderr, "Usage: %s <size x> <size y> <loop count>\n", argv[0]);        fprintf(stderr, "Size in X: ");        fgets(buf, 254, stdin);        xsize = atoi(buf);        fprintf(stderr, "Size in Y: ");        fgets(buf, 254, stdin);        ysize = atoi(buf);        xsize = xsize/2 + xsize%2; //round up        fprintf(stderr, "# of loops: ");        fgets(buf, 254, stdin);        loop_count = atoi(buf);        xsizef = (float)xsize;        ysizef = (float)ysize;    }    else{        xsize = atoi(argv[1]);        ysize = atoi(argv[2]);        xsize = xsize/2 + xsize%2; //round up        xsizef = (float)xsize;        ysizef = (float)ysize;        loop_count = atoi(argv[3]);    }            //run sanity check first!    // MCH: It makes me ill that I have to open a new scope to get this to work.    {        float test[80];        float ep = 0.005;        int error = 0;                // Setup the iterators to do a north/south/east/west/local lookup into the image        iter float2 it0<8,5> = iter( float2(0.0f,  0.0f), float2(5.0,      8.0) );        iter float2 it1<8,5> = iter( float2(-1.0f, 0.0f), float2(5.0-1.0f, 8.0) );        iter float2 it2<8,5> = iter( float2(1.0f,  0.0f), float2(5.0+1.0f, 8.0) );        iter float2 it3<8,5> = iter( float2(0.0f, -1.0f), float2(5.0,      8.0-1.0f) );        iter float2 it4<8,5> = iter( float2(0.0f,  1.0f), float2(5.0,      8.0+1.0f) );        float4 img<8,5>;        float4 o_img<8,5>;                input  = (float*)malloc(2*8*10*sizeof(float));        output = (float*)malloc(2*8*10*sizeof(float));                // Our test should return these values +/- epsilon        test[0] = 0.0;        test[1] = 0.0;        test[2] = 0.0;        test[3] = 0.0;        test[4] = 0.006;        test[5] = 0.006;        test[6] = 0.006;        test[7] = 0.0;        test[8] = 0.0;        test[9] = 0.0;                test[10] = 0.0;        test[11] = 0.0;        test[12] = 0.0;        test[13] = 0.010;        test[14] = 0.019;        test[15] = 0.035;        test[16] = 0.019;        test[17] = 0.010;        test[18] = 0.0;        test[19] = 0.0;                test[20] = 0.0;        test[21] = 0.0;        test[22] = 0.006;        test[23] = 0.019;        test[24] = 0.058;        test[25] = 0.064;        test[26] = 0.058;        test[27] = 0.019;        test[28] = 0.006;        test[29] = 0.0;                test[30] = 0.0;        test[31] = 0.0;        test[32] = 0.006;        test[33] = 0.035;        test[34] = 0.064;        test[35] = 0.098;        test[36] = 0.064;        test[37] = 0.035;        test[38] = 0.006;        test[39] = 0.003;                test[40] = 0.0;        test[41] = 0.0;        test[42] = 0.006;        test[43] = 0.019;        test[44] = 0.058;        test[45] = 0.064;        test[46] = 0.058;        test[47] = 0.019;        test[48] = 0.006;        test[49] = 0.0;                test[50] = 0.0;        test[51] = 0.0;        test[52] = 0.0;        test[53] = 0.010;        test[54] = 0.019;        test[55] = 0.035;        test[56] = 0.019;        test[57] = 0.010;        test[58] = 0.0;        test[59] = 0.0;                test[60] = 0.0;        test[61] = 0.0;        test[62] = 0.0;        test[63] = 0.0;        test[64] = 0.006;        test[65] = 0.006;        test[66] = 0.006;        test[67] = 0.0;        test[68] = 0.0;        test[69] = 0.0;                test[70] = 0.0;        test[71] = 0.0;        test[72] = 0.0;        test[73] = 0.0;        test[74] = 0.0;        test[75] = 0.003;        test[76] = 0.0;        test[77] = 0.0;        test[78] = 0.0;        test[79] = 0.0;                // Fill an image with random data and set up the seed point in the middle        for(i=0; i<2*8*10; i++){            input[i] = 0.0f;        }         //Let's insert some seed data in the middle                input[35*2+1] = 1.0f;                //Build the input stream out of the image and seed data        streamRead(img, input);                //Do the requested number of iterations        for(i=0; i<4; i++){            //We can't use the input and output buffers without hosing things            //so we'll need to "ping-pong" between them            if(i%2==0)                process_img_pack(img, diff_strength, inv_grad_cutoff,                                  loww, highw,                                  it0, it1, it2, it3, it4, o_img );            else                process_img_pack(o_img, diff_strength, inv_grad_cutoff,                                  loww, highw,                                 it0, it1, it2, it3, it4, img );                    }        if(i%2==0)            streamWrite(img, output);        else            streamWrite(o_img, output);                //Let's check the results!        for(i=0; i<80; i++){            if((output[2*i+1]>(test[i]+ep)) || (output[2*i+1]<(test[i]-ep))){                fprintf(stderr, "Mismatch @ %d; expected %1.4f, got %1.4f\n", i, test[i], output[2*i+1]);                error = 1;            }            }        if(error){            printf("\n");        }    }    //Run the actual test!    {	 // Setup the iterators to do a north/south/east/west/local lookup into the image	 iter float2 it0<ysize,xsize> = iter( float2(0.0f,  0.0f), float2(xsizef,      ysizef) );	 iter float2 it1<ysize,xsize> = iter( float2(-1.0f, 0.0f), float2(xsizef-1.0f, ysizef) );	 iter float2 it2<ysize,xsize> = iter( float2(1.0f,  0.0f), float2(xsizef+1.0f, ysizef) );	 iter float2 it3<ysize,xsize> = iter( float2(0.0f, -1.0f), float2(xsizef,      ysizef-1.0f) );	 iter float2 it4<ysize,xsize> = iter( float2(0.0f,  1.0f), float2(xsizef,      ysizef+1.0f) );	 float4 img<ysize,xsize>;	 float4 o_img<ysize,xsize>;	 	 input  = (float*)malloc(xsize*ysize*4*sizeof(float));	 output = (float*)malloc(xsize*ysize*4*sizeof(float));	 	 // Fill an image with random data and set up the seed point in the middle	 for(i=0; i<xsize*ysize*4; i++){	      input[i] = 0.0f;	 }   	 for(i=0; i<xsize*ysize*4; i+=2){	   input[i] = rand()/(float)INT_MAX;	 }    	 //Let's insert some seed data in the middle	 for(i=xsize-1; i<xsize+1; i++){	      for(j=ysize/2-1; j<ysize/2+1; j++){		   input[ (j*8)+(i*2)+1] = 1.0f;	      }	 }	 	 //Start timing	 start = GetTime();	 	 //Build the input stream out of the image and seed data	 streamRead(img, input);	 	 //Do the requested number of iterations	 for(i=0; i<loop_count; i++){	      //We can't use the input and output buffers without hosing things	      //so we'll need to "ping-pong" between them	      if(i%2==0)		   process_img_pack(img, diff_strength, inv_grad_cutoff, loww, highw, 				    it0, it1, it2, it3, it4, o_img );	      else		   process_img_pack(o_img, diff_strength, inv_grad_cutoff, loww, highw,				    it0, it1, it2, it3, it4, img );	      	 }	 if(i%2==0)	      streamWrite(img, output);	 else	      streamWrite(o_img, output);	 	 //Stop time	 stop = GetTime();	 	 time_in_ms = (float)(stop-start)/1000.;	 fprintf(stderr, "That took %.2fms total\n", time_in_ms);	 fprintf(stderr, "That's %.2fms per loop\n", time_in_ms/loop_count);	 fprintf(stderr, "That's %.2fMFlops\n", (64*xsize*ysize)/(time_in_ms*1000./loop_count));	 fprintf(stderr, "Internal read bandwidth is %.2fMB/s\n", 		 (4*5*4*xsize*ysize/(1024.*1024.))/((time_in_ms/loop_count)/1000.));	 fprintf(stderr, "Internal write bandwidth is %.2fMB/s\n", 		 (4*4*xsize*ysize/(1024.*1024.))/((time_in_ms/loop_count)/1000.));    }    return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -