📄 pmlseg.br
字号:
// This is code based of the work on Sherbondy et al. that does a// Perona Malik segmentation across an image.#include <stdio.h>#include <stdlib.h>#include "main.h"#include "PMLSeg.h"//packed version (float4 in/out)kernel void test_iter( float4 img[][], float diff_strength, float inv_grad_cutoff, float loww, float highw, iter float2 it0<>, iter float2 it1<>, iter float2 it2<>, iter float2 it3<>, iter float2 it4<>, out float4 o_img<> ) { o_img.x = it0.x; o_img.y = it0.y; o_img.z = it1.x; o_img.w = it1.y;}//packed version (float4 in/out)kernel void process_img_pack( float4 img[][], float diff_strength, float inv_grad_cutoff, float loww, float highw, iter float2 it0<>, iter float2 it1<>, iter float2 it2<>, iter float2 it3<>, iter float2 it4<>, out float4 o_img<> ) { float e = 2.7182817f; float4 R0; float4 R1; float4 R2; float4 R3; float4 R4; float4 R5; float4 R6; float4 R7; float4 R8; R0 = img[it0]; R1 = img[it1]; //-1,0 West R2 = img[it2]; //+1,0 East R3 = img[it3]; //0,-1 South R4 = img[it4]; //0,+1 North //Image differentials R6.x = (R3.x - R0.x); //-d1s R6.y = (R1.z - R0.x); //-d1w R6.z = (R4.x - R0.x); //d1n R6.w = (R0.z - R0.x); //d1e R7.x = (R3.z - R0.z); //-d2s R7.y = (R0.x - R0.z); //-d2w R7.z = (R4.z - R0.z); //d2n R7.w = (R2.x - R0.z); //d2e //Segmentation differentials R5.x = (R3.y - R0.y); R5.y = (R1.w - R0.y); R5.z = (R4.y - R0.y); R5.w = (R0.w - R0.y); R5 = R5*diff_strength; R8.x = (R3.w - R0.w); R8.y = (R0.y - R0.w); R8.z = (R4.w - R0.w); R8.w = (R2.y - R0.w); R8 = R8*diff_strength; R6 = R6*R6*inv_grad_cutoff; R7 = R7*R7*inv_grad_cutoff; R6.x = pow(e, -R6.x); R6.y = pow(e, -R6.y); R6.z = pow(e, -R6.z); R6.w = pow(e, -R6.w); R7.x = pow(e, -R7.x); R7.y = pow(e, -R7.y); R7.z = pow(e, -R7.z); R7.w = pow(e, -R7.w); R6 = dot(R6, R5); R7 = dot(R7, R8); R0.y = R0.y+R6.x; R0.w = R0.w+R7.x; R4.y = R0.x - loww; o_img.y = R4.y < 0 ? 0.0 : R0.y; R4.y = R0.x - highw; o_img.y = R4.y < 0 ? R0.y : 0.0; R4.y = R0.z - loww; o_img.w = R4.y < 0 ? 0.0 : R0.w; R4.y = R0.z - highw; o_img.w = R4.y < 0 ? R0.w : 0.0; o_img.x = R0.x; o_img.z = R0.z;}int PMLSeg_main(int argc, char* argv[]) { float* output = NULL; float* input = NULL; int i, j; float time_in_ms = 0.0f; int xsize, ysize, loop_count; float xsizef, ysizef; float diff_strength = 0.20f; float inv_grad_cutoff = 0.0f;//0.025f; float loww = 0.0f;//1200./65535.; float highw = 1.0f;//2000./65535.0f; if(argc < 4){ char buf[255]; fprintf(stderr, "Usage: %s <size x> <size y> <loop count>\n", argv[0]); fprintf(stderr, "Size in X: "); fgets(buf, 254, stdin); xsize = atoi(buf); fprintf(stderr, "Size in Y: "); fgets(buf, 254, stdin); ysize = atoi(buf); xsize = xsize/2 + xsize%2; //round up fprintf(stderr, "# of loops: "); fgets(buf, 254, stdin); loop_count = atoi(buf); xsizef = (float)xsize; ysizef = (float)ysize; } else{ xsize = atoi(argv[1]); ysize = atoi(argv[2]); xsize = xsize/2 + xsize%2; //round up xsizef = (float)xsize; ysizef = (float)ysize; loop_count = atoi(argv[3]); } //run sanity check first! // MCH: It makes me ill that I have to open a new scope to get this to work. { float test[80]; float ep = 0.005; int error = 0; // Setup the iterators to do a north/south/east/west/local lookup into the image iter float2 it0<8,5> = iter( float2(0.0f, 0.0f), float2(5.0, 8.0) ); iter float2 it1<8,5> = iter( float2(-1.0f, 0.0f), float2(5.0-1.0f, 8.0) ); iter float2 it2<8,5> = iter( float2(1.0f, 0.0f), float2(5.0+1.0f, 8.0) ); iter float2 it3<8,5> = iter( float2(0.0f, -1.0f), float2(5.0, 8.0-1.0f) ); iter float2 it4<8,5> = iter( float2(0.0f, 1.0f), float2(5.0, 8.0+1.0f) ); float4 img<8,5>; float4 o_img<8,5>; input = (float*)malloc(2*8*10*sizeof(float)); output = (float*)malloc(2*8*10*sizeof(float)); // Our test should return these values +/- epsilon test[0] = 0.0; test[1] = 0.0; test[2] = 0.0; test[3] = 0.0; test[4] = 0.006; test[5] = 0.006; test[6] = 0.006; test[7] = 0.0; test[8] = 0.0; test[9] = 0.0; test[10] = 0.0; test[11] = 0.0; test[12] = 0.0; test[13] = 0.010; test[14] = 0.019; test[15] = 0.035; test[16] = 0.019; test[17] = 0.010; test[18] = 0.0; test[19] = 0.0; test[20] = 0.0; test[21] = 0.0; test[22] = 0.006; test[23] = 0.019; test[24] = 0.058; test[25] = 0.064; test[26] = 0.058; test[27] = 0.019; test[28] = 0.006; test[29] = 0.0; test[30] = 0.0; test[31] = 0.0; test[32] = 0.006; test[33] = 0.035; test[34] = 0.064; test[35] = 0.098; test[36] = 0.064; test[37] = 0.035; test[38] = 0.006; test[39] = 0.003; test[40] = 0.0; test[41] = 0.0; test[42] = 0.006; test[43] = 0.019; test[44] = 0.058; test[45] = 0.064; test[46] = 0.058; test[47] = 0.019; test[48] = 0.006; test[49] = 0.0; test[50] = 0.0; test[51] = 0.0; test[52] = 0.0; test[53] = 0.010; test[54] = 0.019; test[55] = 0.035; test[56] = 0.019; test[57] = 0.010; test[58] = 0.0; test[59] = 0.0; test[60] = 0.0; test[61] = 0.0; test[62] = 0.0; test[63] = 0.0; test[64] = 0.006; test[65] = 0.006; test[66] = 0.006; test[67] = 0.0; test[68] = 0.0; test[69] = 0.0; test[70] = 0.0; test[71] = 0.0; test[72] = 0.0; test[73] = 0.0; test[74] = 0.0; test[75] = 0.003; test[76] = 0.0; test[77] = 0.0; test[78] = 0.0; test[79] = 0.0; // Fill an image with random data and set up the seed point in the middle for(i=0; i<2*8*10; i++){ input[i] = 0.0f; } //Let's insert some seed data in the middle input[35*2+1] = 1.0f; //Build the input stream out of the image and seed data streamRead(img, input); //Do the requested number of iterations for(i=0; i<4; i++){ //We can't use the input and output buffers without hosing things //so we'll need to "ping-pong" between them if(i%2==0) process_img_pack(img, diff_strength, inv_grad_cutoff, loww, highw, it0, it1, it2, it3, it4, o_img ); else process_img_pack(o_img, diff_strength, inv_grad_cutoff, loww, highw, it0, it1, it2, it3, it4, img ); } if(i%2==0) streamWrite(img, output); else streamWrite(o_img, output); //Let's check the results! for(i=0; i<80; i++){ if((output[2*i+1]>(test[i]+ep)) || (output[2*i+1]<(test[i]-ep))){ fprintf(stderr, "Mismatch @ %d; expected %1.4f, got %1.4f\n", i, test[i], output[2*i+1]); error = 1; } } if(error){ printf("\n"); } } //Run the actual test! { // Setup the iterators to do a north/south/east/west/local lookup into the image iter float2 it0<ysize,xsize> = iter( float2(0.0f, 0.0f), float2(xsizef, ysizef) ); iter float2 it1<ysize,xsize> = iter( float2(-1.0f, 0.0f), float2(xsizef-1.0f, ysizef) ); iter float2 it2<ysize,xsize> = iter( float2(1.0f, 0.0f), float2(xsizef+1.0f, ysizef) ); iter float2 it3<ysize,xsize> = iter( float2(0.0f, -1.0f), float2(xsizef, ysizef-1.0f) ); iter float2 it4<ysize,xsize> = iter( float2(0.0f, 1.0f), float2(xsizef, ysizef+1.0f) ); float4 img<ysize,xsize>; float4 o_img<ysize,xsize>; input = (float*)malloc(xsize*ysize*4*sizeof(float)); output = (float*)malloc(xsize*ysize*4*sizeof(float)); // Fill an image with random data and set up the seed point in the middle for(i=0; i<xsize*ysize*4; i++){ input[i] = 0.0f; } for(i=0; i<xsize*ysize*4; i+=2){ input[i] = rand()/(float)INT_MAX; } //Let's insert some seed data in the middle for(i=xsize-1; i<xsize+1; i++){ for(j=ysize/2-1; j<ysize/2+1; j++){ input[ (j*8)+(i*2)+1] = 1.0f; } } //Start timing start = GetTime(); //Build the input stream out of the image and seed data streamRead(img, input); //Do the requested number of iterations for(i=0; i<loop_count; i++){ //We can't use the input and output buffers without hosing things //so we'll need to "ping-pong" between them if(i%2==0) process_img_pack(img, diff_strength, inv_grad_cutoff, loww, highw, it0, it1, it2, it3, it4, o_img ); else process_img_pack(o_img, diff_strength, inv_grad_cutoff, loww, highw, it0, it1, it2, it3, it4, img ); } if(i%2==0) streamWrite(img, output); else streamWrite(o_img, output); //Stop time stop = GetTime(); time_in_ms = (float)(stop-start)/1000.; fprintf(stderr, "That took %.2fms total\n", time_in_ms); fprintf(stderr, "That's %.2fms per loop\n", time_in_ms/loop_count); fprintf(stderr, "That's %.2fMFlops\n", (64*xsize*ysize)/(time_in_ms*1000./loop_count)); fprintf(stderr, "Internal read bandwidth is %.2fMB/s\n", (4*5*4*xsize*ysize/(1024.*1024.))/((time_in_ms/loop_count)/1000.)); fprintf(stderr, "Internal write bandwidth is %.2fMB/s\n", (4*4*xsize*ysize/(1024.*1024.))/((time_in_ms/loop_count)/1000.)); } return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -