📄 sdl_imagefilter.c
字号:
/* SDL_imageFilter - bytes-image "filter" routines (uses inline x86 MMX optimizations if available) LGPL (c) A. Schiffler Note: Most MMX code is based on published routines by Vladimir Kravtchenko at vk@cs.ubc.ca - credits to him for his work.*/#include <stdio.h>#include <stdlib.h>#include "SDL_imageFilter.h"/* ------ Static variables ----- *//* Toggle the use of the MMX routines - ON by default */static int SDL_imageFilterUseMMX = 1;/* MMX detection routine (with override flag) */unsigned int cpuFlags(){ int flags = 0;#ifdef USE_MMX asm volatile ("pusha \n\t" "mov %1, %%eax \n\t" // request feature flag "cpuid \n\t" // get CPU ID flag "mov %%edx, %0 \n\t" // move result to mmx_bit "popa \n\t":"=m" (flags) // %0 :"i"(0x00000001) // %1 );#endif return (flags);}int SDL_imageFilterMMXdetect(void){ unsigned int mmx_bit; /* Check override flag */ if (SDL_imageFilterUseMMX == 0) { return (0); } mmx_bit = cpuFlags(); mmx_bit &= 0x00800000; mmx_bit = (mmx_bit && 0x00800000); return (mmx_bit);}void SDL_imageFilterMMXoff(){ SDL_imageFilterUseMMX = 0;}void SDL_imageFilterMMXon(){ SDL_imageFilterUseMMX = 1;}// ------------------------------------------------------------------------------------// SDL_imageFilterAdd: D = saturation255(S1 + S2)int SDL_imageFilterAddMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, int length){#ifdef USE_MMX asm volatile ("pusha \n\t" "mov %2, %%eax \n\t" // load Src1 address into eax "mov %1, %%ebx \n\t" // load Src2 address into ebx "mov %0, %%edi \n\t" // load Dest address into edi "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry ".L1010: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 "paddusb (%%ebx), %%mm1 \n\t" // mm1=Src1+Src2 (add 8 bytes with saturation) "movq %%mm1, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Src1, Src2 and Dest "add $8, %%ebx \n\t" // register pointers by 8 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter "jnz .L1010 \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src2), // %1 "m"(Src1), // %2 "m"(length) // %3 );#endif return (0);}// SDL_imageFilterAdd: D = saturation255(S1 + S2)int SDL_imageFilterAdd(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, int length){ unsigned int i, istart; unsigned char *cursrc1, *cursrc2, *curdst; int result; if ((SDL_imageFilterMMXdetect()) && (length > 7)) { /* Use MMX assembly routine */ SDL_imageFilterAddMMX(Src1, Src2, Dest, length); /* Check for unaligned bytes */ if ((length & 7) > 0) { /* Setup to process unaligned bytes */ istart = length & 0xfffffff8; cursrc1 = &Src1[istart]; cursrc2 = &Src2[istart]; curdst = &Dest[istart]; } else { /* No unaligned bytes - we are done */ return (0); } } else { /* Setup to process whole image */ istart = 0; cursrc1 = Src1; cursrc2 = Src2; curdst = Dest; } /* C routine to process image */ for (i = istart; i < length; i++) { result = (int) *cursrc1 + (int) *cursrc2; if (result > 255) result = 255; *curdst = (unsigned char) result; /* Advance pointers */ cursrc1++; cursrc2++; curdst++; } return (0);}// SDL_imageFilterMean: D = S1/2 + S2/2int SDL_imageFilterMeanMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, int length, unsigned char *Mask){#ifdef USE_MMX asm volatile ("pusha \n\t" "movl %4, %%edx \n\t" // load Mask address into edx "movq (%%edx), %%mm0 \n\t" // load Mask into mm0 "mov %2, %%eax \n\t" // load Src1 address into eax "mov %1, %%ebx \n\t" // load Src2 address into ebx "mov %0, %%edi \n\t" // load Dest address into edi "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry ".L21011: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 "movq (%%ebx), %%mm2 \n\t" // load 8 bytes from Src2 into mm2 // --- Byte shift via Word shift --- "psrlw $1, %%mm1 \n\t" // shift 4 WORDS of mm1 1 bit to the right "psrlw $1, %%mm2 \n\t" // shift 4 WORDS of mm2 1 bit to the right// "pand %%mm0, %%mm1 \n\t" // apply Mask to 8 BYTES of mm1 ".byte 0x0f, 0xdb, 0xc8 \n\t"// "pand %%mm0, %%mm2 \n\t" // apply Mask to 8 BYTES of mm2 ".byte 0x0f, 0xdb, 0xd0 \n\t" "paddusb %%mm2, %%mm1 \n\t" // mm1=mm1+mm2 (add 8 bytes with saturation) "movq %%mm1, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Src1, Src2 and Dest "add $8, %%ebx \n\t" // register pointers by 8 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter "jnz .L21011 \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src2), // %1 "m"(Src1), // %2 "m"(length), // %3 "m"(Mask) // %4 );#endif return (0);}// SDL_imageFilterMean: D = S1/2 + S2/2int SDL_imageFilterMean(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, int length){ static unsigned char Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F }; unsigned int i, istart; unsigned char *cursrc1, *cursrc2, *curdst; int result; if ((SDL_imageFilterMMXdetect()) && (length > 7)) { /* MMX routine */ SDL_imageFilterMeanMMX(Src1, Src2, Dest, length, Mask); /* Check for unaligned bytes */ if ((length & 7) > 0) { /* Setup to process unaligned bytes */ istart = length & 0xfffffff8; cursrc1 = &Src1[istart]; cursrc2 = &Src2[istart]; curdst = &Dest[istart]; } else { /* No unaligned bytes - we are done */ return (0); } } else { /* Setup to process whole image */ istart = 0; cursrc1 = Src1; cursrc2 = Src2; curdst = Dest; } /* C routine to process image */ for (i = istart; i < length; i++) { result = (int) *cursrc1 / 2 + (int) *cursrc2 / 2; *curdst = (unsigned char) result; /* Advance pointers */ cursrc1++; cursrc2++; curdst++; } return (0);}// SDL_imageFilterSub: D = saturation0(S1 - S2)int SDL_imageFilterSubMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, int length){#ifdef USE_MMX asm volatile ("pusha \n\t" "mov %2, %%eax \n\t" // load Src1 address into eax "mov %1, %%ebx \n\t" // load Src2 address into ebx "mov %0, %%edi \n\t" // load Dest address into edi "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry ".L1012: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 "psubusb (%%ebx), %%mm1 \n\t" // mm1=Src1-Src2 (sub 8 bytes with saturation) "movq %%mm1, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Src1, Src2 and Dest "add $8, %%ebx \n\t" // register pointers by 8 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter "jnz .L1012 \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src2), // %1 "m"(Src1), // %2 "m"(length) // %3 );#endif return (0);}int SDL_imageFilterSub(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, int length){ unsigned int i, istart; unsigned char *cursrc1, *cursrc2, *curdst; int result; if ((SDL_imageFilterMMXdetect()) && (length > 7)) { /* MMX routine */ SDL_imageFilterSubMMX(Src1, Src2, Dest, length); /* Check for unaligned bytes */ if ((length & 7) > 0) { /* Setup to process unaligned bytes */ istart = length & 0xfffffff8; cursrc1 = &Src1[istart]; cursrc2 = &Src2[istart]; curdst = &Dest[istart]; } else { /* No unaligned bytes - we are done */ return (0); } } else { /* Setup to process whole image */ istart = 0; cursrc1 = Src1; cursrc2 = Src2; curdst = Dest; } /* C routine to process image */ for (i = istart; i < length; i++) { result = (int) *cursrc1 - (int) *cursrc2; if (result < 0) result = 0; *curdst = (unsigned char) result; /* Advance pointers */ cursrc1++; cursrc2++; curdst++; } return (0);}// SDL_imageFilterAbsDiff: D = | S1 - S2 |int SDL_imageFilterAbsDiffMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, int length){#ifdef USE_MMX asm volatile ("pusha \n\t" "mov %2, %%eax \n\t" // load Src1 address into eax "mov %1, %%ebx \n\t" // load Src2 address into ebx "mov %0, %%edi \n\t" // load Dest address into edi "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry ".L1013: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 "movq (%%ebx), %%mm2 \n\t" // load 8 bytes from Src2 into mm2 "psubusb (%%ebx), %%mm1 \n\t" // mm1=Src1-Src2 (sub 8 bytes with saturation) "psubusb (%%eax), %%mm2 \n\t" // mm2=Src2-Src1 (sub 8 bytes with saturation) "por %%mm2, %%mm1 \n\t" // combine both mm2 and mm1 results "movq %%mm1, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Src1, Src2 and Dest "add $8, %%ebx \n\t" // register pointers by 8 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter "jnz .L1013 \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src2), // %1 "m"(Src1), // %2 "m"(length) // %3 );#endif return (0);}// SDL_imageFilterAbsDiff: D = | S1 - S2 |int SDL_imageFilterAbsDiff(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, int length){ unsigned int i, istart; unsigned char *cursrc1, *cursrc2, *curdst; int result; if ((SDL_imageFilterMMXdetect()) && (length > 7)) { /* MMX routine */ SDL_imageFilterAbsDiffMMX(Src1, Src2, Dest, length); /* Check for unaligned bytes */ if ((length & 7) > 0) { /* Setup to process unaligned bytes */ istart = length & 0xfffffff8; cursrc1 = &Src1[istart]; cursrc2 = &Src2[istart]; curdst = &Dest[istart]; } else { /* No unaligned bytes - we are done */ return (0); } } else { /* Setup to process whole image */ istart = 0; cursrc1 = Src1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -