📄 sdl_imagefilter.c
字号:
"dec %%ecx \n\t" // decrease loop counter "jnz .L1022 \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 "m"(length), // %2 "m"(C), // %3 "m"(Mask) // %4 );#endif return (0);}// SDL_imageFilterAddByteToHalf: D = saturation255(S/2 + C)int SDL_imageFilterAddByteToHalf(unsigned char *Src1, unsigned char *Dest, int length, unsigned char C){ static unsigned char Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F }; unsigned int i, istart; int iC; unsigned char *cursrc1; unsigned char *curdest; int result; if ((SDL_imageFilterMMXdetect()) && (length > 7)) { /* MMX routine */ SDL_imageFilterAddByteToHalfMMX(Src1, Dest, length, C, Mask); /* Check for unaligned bytes */ if ((length & 7) > 0) { /* Setup to process unaligned bytes */ istart = length & 0xfffffff8; cursrc1 = &Src1[istart]; curdest = &Dest[istart]; } else { /* No unaligned bytes - we are done */ return (0); } } else { /* Setup to process whole image */ istart = 0; cursrc1 = Src1; curdest = Dest; } /* C routine to process image */ iC = (int) C; for (i = istart; i < length; i++) { result = (int) (*cursrc1 / 2) + iC; if (result > 255) result = 255; *curdest = (unsigned char) result; /* Advance pointers */ cursrc1++; curdest++; } return (0);}// SDL_imageFilterSubByteMMX: D = saturation0(S - C)int SDL_imageFilterSubByteMMX(unsigned char *Src1, unsigned char *Dest, int length, unsigned char C){#ifdef USE_MMX asm volatile ("pusha \n\t" // ** Duplicate C in 8 bytes of MM1 ** "mov %3, %%al \n\t" // load C into AL "mov %%al, %%ah \n\t" // copy AL into AH "mov %%ax, %%bx \n\t" // copy AX into BX "shl $16, %%eax \n\t" // shift 2 bytes of EAX left "mov %%bx, %%ax \n\t" // copy BX into AX "movd %%eax, %%mm1 \n\t" // copy EAX into MM1 "movd %%eax, %%mm2 \n\t" // copy EAX into MM2 "punpckldq %%mm2, %%mm1 \n\t" // fill higher bytes of MM1 with C "mov %1, %%eax \n\t" // load Src1 address into eax "mov %0, %%edi \n\t" // load Dest address into edi "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry ".L1023: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0 "psubusb %%mm1, %%mm0 \n\t" // MM0=SrcDest+C (add 8 bytes with saturation) "movq %%mm0, (%%edi) \n\t" // store result in SrcDest "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter "jnz .L1023 \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 "m"(length), // %2 "m"(C) // %3 );#endif return (0);}// SDL_imageFilterSubByte: D = saturation0(S - C)int SDL_imageFilterSubByte(unsigned char *Src1, unsigned char *Dest, int length, unsigned char C){ unsigned int i, istart; int iC; unsigned char *cursrc1; unsigned char *curdest; int result; if ((SDL_imageFilterMMXdetect()) && (length > 7)) { /* MMX routine */ SDL_imageFilterSubByteMMX(Src1, Dest, length, C); /* Check for unaligned bytes */ if ((length & 7) > 0) { /* Setup to process unaligned bytes */ istart = length & 0xfffffff8; cursrc1 = &Src1[istart]; curdest = &Dest[istart]; } else { /* No unaligned bytes - we are done */ return (0); } } else { /* Setup to process whole image */ istart = 0; cursrc1 = Src1; curdest = Dest; } /* C routine to process image */ iC = (int) C; for (i = istart; i < length; i++) { result = (int) *cursrc1 - iC; if (result < 0) result = 0; *curdest = (unsigned char) result; /* Advance pointers */ cursrc1++; curdest++; } return (0);}// SDL_imageFilterShiftRightMMX: D = saturation0(S >> N)int SDL_imageFilterShiftRightMMX(unsigned char *Src1, unsigned char *Dest, int length, unsigned char N, unsigned char *Mask){#ifdef USE_MMX asm volatile ("pusha \n\t" "movl %4, %%edx \n\t" // load Mask address into edx "movq (%%edx), %%mm0 \n\t" // load Mask into mm0 "xor %%ecx, %%ecx \n\t" // zero ECX "mov %3, %%cl \n\t" // load loop counter (N) into CL "movd %%ecx, %%mm3 \n\t" // copy (N) into MM3 "pcmpeqb %%mm1, %%mm1 \n\t" // generate all 1's in mm1 ".L10240: \n\t" // ** Prepare proper bit-Mask in MM1 ** "psrlw $1, %%mm1 \n\t" // shift 4 WORDS of MM1 1 bit to the right // "pand %%mm0, %%mm1 \n\t" // apply Mask to 8 BYTES of MM1 ".byte 0x0f, 0xdb, 0xc8 \n\t" "dec %%cl \n\t" // decrease loop counter "jnz .L10240 \n\t" // check loop termination, proceed if required // ** Shift all bytes of the image ** "mov %1, %%eax \n\t" // load Src1 address into eax "mov %0, %%edi \n\t" // load Dest address into edi "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry ".L10241: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0 "psrlw %%mm3, %%mm0 \n\t" // shift 4 WORDS of MM0 (N) bits to the right // "pand %%mm1, %%mm0 \n\t" // apply proper bit-Mask to 8 BYTES of MM0 ".byte 0x0f, 0xdb, 0xc1 \n\t" "movq %%mm0, (%%edi) \n\t" // store result in SrcDest "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter "jnz .L10241 \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 "m"(length), // %2 "m"(N), // %3 "m"(Mask) // %4 );#endif return (0);}// SDL_imageFilterShiftRight: D = saturation0(S >> N)int SDL_imageFilterShiftRight(unsigned char *Src1, unsigned char *Dest, int length, unsigned char N){ static unsigned char Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F }; unsigned int i, istart; unsigned char *cursrc1; unsigned char *curdest; /* Check shift */ if ((N > 8) || (N < 1)) { return (-1); } if ((SDL_imageFilterMMXdetect()) && (length > 7)) { /* MMX routine */ SDL_imageFilterShiftRightMMX(Src1, Dest, length, N, Mask); /* Check for unaligned bytes */ if ((length & 7) > 0) { /* Setup to process unaligned bytes */ istart = length & 0xfffffff8; cursrc1 = &Src1[istart]; curdest = &Dest[istart]; } else { /* No unaligned bytes - we are done */ return (0); } } else { /* Setup to process whole image */ istart = 0; cursrc1 = Src1; curdest = Dest; } /* C routine to process image */ for (i = istart; i < length; i++) { *curdest = (unsigned char) *cursrc1 >> N; /* Advance pointers */ cursrc1++; curdest++; } return (0);}// SDL_imageFilterMultByByteMMX: D = saturation255(S * C)int SDL_imageFilterMultByByteMMX(unsigned char *Src1, unsigned char *Dest, int length, unsigned char C){#ifdef USE_MMX asm volatile ("pusha \n\t" // ** Duplicate C in 4 words of MM1 ** "mov %3, %%al \n\t" // load C into AL "xor %%ah, %%ah \n\t" // zero AH "mov %%ax, %%bx \n\t" // copy AX into BX "shl $16, %%eax \n\t" // shift 2 bytes of EAX left "mov %%bx, %%ax \n\t" // copy BX into AX "movd %%eax, %%mm1 \n\t" // copy EAX into MM1 "movd %%eax, %%mm2 \n\t" // copy EAX into MM2 "punpckldq %%mm2, %%mm1 \n\t" // fill higher words of MM1 with C "pxor %%mm0, %%mm0 \n\t" // zero MM0 register "mov %1, %%eax \n\t" // load Src1 address into eax "mov %0, %%edi \n\t" // load Dest address into edi "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) "cmp $128, %%al \n\t" // if (C <= 128) execute more efficient code "jg .L10251 \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry ".L10250: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3 "movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4 "punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words "punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words "pmullw %%mm1, %%mm3 \n\t" // mul low bytes of SrcDest and MM1 "pmullw %%mm1, %%mm4 \n\t" // mul high bytes of SrcDest and MM1 "packuswb %%mm4, %%mm3 \n\t" // pack words back into bytes with saturation "movq %%mm3, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter "jnz .L10250 \n\t" // check loop termination, proceed if required "jmp .L10252 \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry ".L10251: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3 "movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4 "punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words "punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words "pmullw %%mm1, %%mm3 \n\t" // mul low bytes of SrcDest and MM1 "pmullw %%mm1, %%mm4 \n\t" // mul high bytes of SrcDest and MM1 // ** Take abs value of the results (signed words) ** "movq %%mm3, %%mm5 \n\t" // copy mm3 into mm5 "movq %%mm4, %%mm6 \n\t" // copy mm4 into mm6 "psraw $15, %%mm5 \n\t" // fill mm5 words with word sign bit "psraw $15, %%mm6 \n\t" // fill mm6 words with word sign bit "pxor %%mm5, %%mm3 \n\t" // take 1's compliment of only neg. words "pxor %%mm6, %%mm4 \n\t" // take 1's compliment of only neg. words "psubsw %%mm5, %%mm3 \n\t" // add 1 to only neg. words, W-(-1) or W-0 "psubsw %%mm6, %%mm4 \n\t" // add 1 to only neg. words, W-(-1) or W-0 "packuswb %%mm4, %%mm3 \n\t" // pack words back into bytes with saturation "movq %%mm3, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter "jnz .L10251 \n\t" // check loop termination, proceed if required ".L10252: \n\t" "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 "m"(length), // %2 "m"(C) // %3 );#endif return (0);}// SDL_imageFilterMultByByte: D = saturation255(S * C)int SDL_imageFilterMultByByte(unsigned char *Src1, unsigned char *Dest, int length, unsigned char C){ unsigned int i, istart; int iC; unsigned char *cursrc1; unsigned char *curdest; int result; if ((SDL_imageFilterMMXdetect()) && (length > 7)) { SDL_imageFilterMultByByteMMX(Src1, Dest, length, C); /* Check for unaligned bytes */ if ((length & 7) > 0) { /* Setup to process unaligned bytes */ istart = length & 0xfffffff8; cursrc1 = &Src1[istart]; curdest = &Dest[istart]; } else { /* No unaligned bytes - we are done */ return (0); } } else { /* Setup to process whole image */ istart = 0; cursrc1 = Src1; curdest = Dest; } /* C routine to process image */ iC = (int) C; for (i = istart; i < length; i++) { result = (int) *cursrc1 * iC; if (result > 255) result = 255; *curdest = (unsigned char) result; /* Advance pointers */ cursrc1++; curdest++; } return (0);}// SDL_imageFilterShiftRightAndMultByByteMMX: D = saturation255((S >> N) * C)int SDL_imageFilterShiftRightAndMultByByteMMX(unsigned char *Src1, unsigned char *Dest, int length, unsigned char N, unsigned char C){#ifdef USE_MMX asm volatile ("pusha \n\t" // ** Duplicate C in 4 words of MM1 ** "mov %4, %%al \n\t" // load C into AL "xor %%ah, %%ah \n\t" // zero AH "mov %%ax, %%bx \n\t" // copy AX into BX "shl $16, %%eax \n\t" // shift 2 bytes of EAX left "mov %%bx, %%ax \n\t" // copy BX into AX "movd %%eax, %%mm1 \n\t" // copy EAX into MM1
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -