📄 sdl_imagefilter.c
字号:
"movd %%eax, %%mm2 \n\t" // copy EAX into MM2 "punpckldq %%mm2, %%mm1 \n\t" // fill higher words of MM1 with C "xor %%ecx, %%ecx \n\t" // zero ECX "mov %3, %%cl \n\t" // load N into CL "movd %%ecx, %%mm7 \n\t" // copy N into MM7 "pxor %%mm0, %%mm0 \n\t" // zero MM0 register "mov %1, %%eax \n\t" // load Src1 address into eax "mov %0, %%edi \n\t" // load Dest address into edi "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry ".L1026: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3 "movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4 "punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words "punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words "psrlw %%mm7, %%mm3 \n\t" // shift 4 WORDS of MM3 (N) bits to the right "psrlw %%mm7, %%mm4 \n\t" // shift 4 WORDS of MM4 (N) bits to the right "pmullw %%mm1, %%mm3 \n\t" // mul low bytes of SrcDest by MM1 "pmullw %%mm1, %%mm4 \n\t" // mul high bytes of SrcDest by MM1 "packuswb %%mm4, %%mm3 \n\t" // pack words back into bytes with saturation "movq %%mm3, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter "jnz .L1026 \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 "m"(length), // %2 "m"(N), // %3 "m"(C) // %4 );#endif return (0);}// SDL_imageFilterShiftRightAndMultByByte: D = saturation255((S >> N) * C)int SDL_imageFilterShiftRightAndMultByByte(unsigned char *Src1, unsigned char *Dest, int length, unsigned char N, unsigned char C){ unsigned int i, istart; int iC; unsigned char *cursrc1; unsigned char *curdest; int result; /* Check shift */ if ((N > 8) || (N < 1)) { return (-1); } if ((SDL_imageFilterMMXdetect()) && (length > 7)) { SDL_imageFilterShiftRightAndMultByByteMMX(Src1, Dest, length, N, C); /* Check for unaligned bytes */ if ((length & 7) > 0) { /* Setup to process unaligned bytes */ istart = length & 0xfffffff8; cursrc1 = &Src1[istart]; curdest = &Dest[istart]; } else { /* No unaligned bytes - we are done */ return (0); } } else { /* Setup to process whole image */ istart = 0; cursrc1 = Src1; curdest = Dest; } /* C routine to process image */ iC = (int) C; for (i = istart; i < length; i++) { result = (int) (*cursrc1 >> N) * iC; if (result > 255) result = 255; *curdest = (unsigned char) result; /* Advance pointers */ cursrc1++; curdest++; } return (0);}// SDL_imageFilterShiftLeftByteMMX: D = (S << N)int SDL_imageFilterShiftLeftByteMMX(unsigned char *Src1, unsigned char *Dest, int length, unsigned char N, unsigned char *Mask){#ifdef USE_MMX asm volatile ("pusha \n\t" "movl %4, %%edx \n\t" // load Mask address into edx "movq (%%edx), %%mm0 \n\t" // load Mask into mm0 "xor %%ecx, %%ecx \n\t" // zero ECX "mov %3, %%cl \n\t" // load loop counter (N) into CL "movd %%ecx, %%mm3 \n\t" // copy (N) into MM3 "pcmpeqb %%mm1, %%mm1 \n\t" // generate all 1's in mm1 ".L10270: \n\t" // ** Prepare proper bit-Mask in MM1 ** "psllw $1, %%mm1 \n\t" // shift 4 WORDS of MM1 1 bit to the left // "pand %%mm0, %%mm1 \n\t" // apply Mask to 8 BYTES of MM1 ".byte 0x0f, 0xdb, 0xc8 \n\t" "dec %%cl \n\t" // decrease loop counter "jnz .L10270 \n\t" // check loop termination, proceed if required // ** Shift all bytes of the image ** "mov %1, %%eax \n\t" // load Src1 address into eax "mov %0, %%edi \n\t" // load SrcDest address into edi "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry ".L10271: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into MM0 "psllw %%mm3, %%mm0 \n\t" // shift 4 WORDS of MM0 (N) bits to the left // "pand %%mm1, %%mm0 \n\t" // apply proper bit-Mask to 8 BYTES of MM0 ".byte 0x0f, 0xdb, 0xc1 \n\t" "movq %%mm0, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter "jnz .L10271 \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 "m"(length), // %2 "m"(N), // %3 "m"(Mask) // %4 );#endif return (0);}// SDL_imageFilterShiftLeftByte: D = (S << N)int SDL_imageFilterShiftLeftByte(unsigned char *Src1, unsigned char *Dest, int length, unsigned char N){ static unsigned char Mask[8] = { 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE }; unsigned int i, istart; unsigned char *cursrc1, *curdest; int result; if ((N > 8) || (N < 1)) return (-1); // image size must be at least 8 bytes // and min. 1 bit and max. 8 bit shift is allowed if ((SDL_imageFilterMMXdetect()) && (length > 7)) { SDL_imageFilterShiftLeftByteMMX(Src1, Dest, length, N, Mask); /* Check for unaligned bytes */ if ((length & 7) > 0) { /* Setup to process unaligned bytes */ istart = length & 0xfffffff8; cursrc1 = &Src1[istart]; curdest = &Dest[istart]; } else { /* No unaligned bytes - we are done */ return (0); } } else { /* Setup to process whole image */ istart = 0; cursrc1 = Src1; curdest = Dest; } /* C routine to process image */ for (i = istart; i < length; i++) { result = ((int) *cursrc1 << N) & 0xff; *curdest = (unsigned char) result; /* Advance pointers */ cursrc1++; curdest++; } return (0);}// SDL_imageFilterShiftLeftMMX: D = saturation255(S << N)int SDL_imageFilterShiftLeftMMX(unsigned char *Src1, unsigned char *Dest, int length, unsigned char N){#ifdef USE_MMX asm volatile ("pusha \n\t" "xor %%eax, %%eax \n\t" // zero EAX "mov %3, %%al \n\t" // load N into AL "movd %%eax, %%mm7 \n\t" // copy N into MM7 "pxor %%mm0, %%mm0 \n\t" // zero MM0 register "mov %1, %%eax \n\t" // load Src1 address into eax "mov %0, %%edi \n\t" // load Dest address into edi "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) "cmp $7, %%al \n\t" // if (N <= 7) execute more efficient code "jg .L10281 \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry ".L10280: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3 "movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4 "punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words "punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words "psllw %%mm7, %%mm3 \n\t" // shift 4 WORDS of MM3 (N) bits to the right "psllw %%mm7, %%mm4 \n\t" // shift 4 WORDS of MM4 (N) bits to the right "packuswb %%mm4, %%mm3 \n\t" // pack words back into bytes with saturation "movq %%mm3, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter "jnz .L10280 \n\t" // check loop termination, proceed if required "jmp .L10282 \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry ".L10281: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3 "movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4 "punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words "punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words "psllw %%mm7, %%mm3 \n\t" // shift 4 WORDS of MM3 (N) bits to the right "psllw %%mm7, %%mm4 \n\t" // shift 4 WORDS of MM4 (N) bits to the right // ** Take abs value of the signed words ** "movq %%mm3, %%mm5 \n\t" // copy mm3 into mm5 "movq %%mm4, %%mm6 \n\t" // copy mm4 into mm6 "psraw $15, %%mm5 \n\t" // fill mm5 words with word sign bit "psraw $15, %%mm6 \n\t" // fill mm6 words with word sign bit "pxor %%mm5, %%mm3 \n\t" // take 1's compliment of only neg. words "pxor %%mm6, %%mm4 \n\t" // take 1's compliment of only neg. words "psubsw %%mm5, %%mm3 \n\t" // add 1 to only neg. words, W-(-1) or W-0 "psubsw %%mm6, %%mm4 \n\t" // add 1 to only neg. words, W-(-1) or W-0 "packuswb %%mm4, %%mm3 \n\t" // pack words back into bytes with saturation "movq %%mm3, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter "jnz .L10281 \n\t" // check loop termination, proceed if required ".L10282: \n\t" "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 "m"(length), // %2 "m"(N) // %3 );#endif return (0);}// SDL_imageFilterShiftLeft: D = saturation255(S << N)int SDL_imageFilterShiftLeft(unsigned char *Src1, unsigned char *Dest, int length, unsigned char N){ unsigned int i, istart; unsigned char *cursrc1, *curdest; int result; if ((N > 8) || (N < 1)) return (-1); // image size must be at least 8 bytes // and min. 1 bit and max. 8 bit shift is allowed if ((SDL_imageFilterMMXdetect()) && (length > 7)) { SDL_imageFilterShiftLeftMMX(Src1, Dest, length, N); /* Check for unaligned bytes */ if ((length & 7) > 0) { /* Setup to process unaligned bytes */ istart = length & 0xfffffff8; cursrc1 = &Src1[istart]; curdest = &Dest[istart]; } else { /* No unaligned bytes - we are done */ return (0); } } else { /* Setup to process whole image */ istart = 0; cursrc1 = Src1; curdest = Dest; } /* C routine to process image */ for (i = istart; i < length; i++) { result = (int) *cursrc1 << N; if (result > 255) result = 255; *curdest = (unsigned char) result; /* Advance pointers */ cursrc1++; curdest++; } return (0);}// SDL_imageFilterBinarizeUsingThresholdMMX: D = (S >= T) ? 255:0int SDL_imageFilterBinarizeUsingThresholdMMX(unsigned char *Src1, unsigned char *Dest, int length, unsigned char T){#ifdef USE_MMX asm volatile ("pusha \n\t" // ** Duplicate T in 8 bytes of MM3 ** "pcmpeqb %%mm1, %%mm1 \n\t" // generate all 1's in mm1 "pcmpeqb %%mm2, %%mm2 \n\t" // generate all 1's in mm2 "mov %3, %%al \n\t" // load T into AL "mov %%al, %%ah \n\t" // copy AL into AH "mov %%ax, %%bx \n\t" // copy AX into BX "shl $16, %%eax \n\t" // shift 2 bytes of EAX left "mov %%bx, %%ax \n\t" // copy BX into AX "movd %%eax, %%mm3 \n\t" // copy EAX into MM3 "movd %%eax, %%mm4 \n\t" // copy EAX into MM4 "punpckldq %%mm4, %%mm3 \n\t" // fill higher bytes of MM3 with T "psubusb %%mm3, %%mm2 \n\t" // store 0xFF - T in MM2 "mov %1, %%eax \n\t" // load Src1 address into eax "mov %0, %%edi \n\t" // load Dest address into edi "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte alignment of the loop entry ".L1029: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0 "paddusb %%mm2, %%mm0 \n\t" // MM0=SrcDest+(0xFF-T) (add 8 bytes with saturation) "pcmpeqb %%mm1, %%mm0 \n\t" // binarize 255:0, comparing to 255 "movq %%mm0, (%%edi) \n\t" // store result in SrcDest "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter "jnz .L1029 \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 "m"(length), // %2 "m"(T) // %3 );#endif return (0);}// SDL_imageFilterBinarizeUsingThreshold: D = (S >= T) ? 255:0int SDL_imageFilterBinarizeUsingThreshold(unsigned char *Src1, unsigned char *Dest, int length, unsigned char T){ unsigned int i, istart; unsigned char *cursrc1; unsigned char *curdest; if ((SDL_imageFilterMMXdetect()) && (length > 7)) { SDL_imageFilterBinarizeUsingThresholdMMX(Src1, Dest, length, T); /* Check for unaligned bytes */ if ((length & 7) > 0) { /* Setup to process unaligned bytes */ istart = length & 0xfffffff8; cursrc1 = &Src1[istart]; curdest = &Dest[istart]; } else { /* No unaligned bytes - we are done */ return (0); } } else { /* Setup to process whole image */ istart = 0; cursrc1 = Src1; curdest = Dest; } /* C routine to process image */ for (i = istart; i < length; i++) { *curdest = ((unsigned char) *cursrc1 >= T) ? 255 : 0; /* Advance pointers */ cursrc1++; curd
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -