📄 sdl_imagefilter.c
字号:
cursrc2 = Src2; curdst = Dest; } /* C routine to process image */ for (i = istart; i < length; i++) { result = abs((int) *cursrc1 - (int) *cursrc2); *curdst = (unsigned char) result; /* Advance pointers */ cursrc1++; cursrc2++; curdst++; } return (0);}// SDL_imageFilterMult: D = saturation255(S1 * S2)int SDL_imageFilterMultMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, int length){#ifdef USE_MMX asm volatile ("pusha \n\t" "mov %2, %%eax \n\t" // load Src1 address into eax "mov %1, %%ebx \n\t" // load Src2 address into ebx "mov %0, %%edi \n\t" // load Dest address into edi "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) "pxor %%mm0, %%mm0 \n\t" // zero mm0 register ".align 16 \n\t" // 16 byte allignment of the loop entry ".L1014: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 "movq (%%ebx), %%mm3 \n\t" // load 8 bytes from Src2 into mm3 "movq %%mm1, %%mm2 \n\t" // copy mm1 into mm2 "movq %%mm3, %%mm4 \n\t" // copy mm3 into mm4 "punpcklbw %%mm0, %%mm1 \n\t" // unpack low bytes of Src1 into words "punpckhbw %%mm0, %%mm2 \n\t" // unpack high bytes of Src1 into words "punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of Src2 into words "punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of Src2 into words "pmullw %%mm3, %%mm1 \n\t" // mul low bytes of Src1 and Src2 "pmullw %%mm4, %%mm2 \n\t" // mul high bytes of Src1 and Src2 // Take abs value of the results (signed words) "movq %%mm1, %%mm5 \n\t" // copy mm1 into mm5 "movq %%mm2, %%mm6 \n\t" // copy mm2 into mm6 "psraw $15, %%mm5 \n\t" // fill mm5 words with word sign bit "psraw $15, %%mm6 \n\t" // fill mm6 words with word sign bit "pxor %%mm5, %%mm1 \n\t" // take 1's compliment of only neg. words "pxor %%mm6, %%mm2 \n\t" // take 1's compliment of only neg. words "psubsw %%mm5, %%mm1 \n\t" // add 1 to only neg. words, W-(-1) or W-0 "psubsw %%mm6, %%mm2 \n\t" // add 1 to only neg. words, W-(-1) or W-0 "packuswb %%mm2, %%mm1 \n\t" // pack words back into bytes with saturation "movq %%mm1, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Src1, Src2 and Dest "add $8, %%ebx \n\t" // register pointers by 8 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter "jnz .L1014 \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src2), // %1 "m"(Src1), // %2 "m"(length) // %3 );#endif return (0);}// SDL_imageFilterMult: D = saturation255(S1 * S2)int SDL_imageFilterMult(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, int length){ unsigned int i, istart; unsigned char *cursrc1, *cursrc2, *curdst; int result; if ((SDL_imageFilterMMXdetect()) && (length > 7)) { /* MMX routine */ SDL_imageFilterMultMMX(Src1, Src2, Dest, length); /* Check for unaligned bytes */ if ((length & 7) > 0) { /* Setup to process unaligned bytes */ istart = length & 0xfffffff8; cursrc1 = &Src1[istart]; cursrc2 = &Src2[istart]; curdst = &Dest[istart]; } else { /* No unaligned bytes - we are done */ return (0); } } else { /* Setup to process whole image */ istart = 0; cursrc1 = Src1; cursrc2 = Src2; curdst = Dest; } /* C routine to process image */ for (i = istart; i < length; i++) { /* NOTE: this is probably wrong - dunno what the MMX code does */ result = (int) *cursrc1 * (int) *cursrc2; if (result > 255) result = 255; *curdst = (unsigned char) result; /* Advance pointers */ cursrc1++; cursrc2++; curdst++; } return (0);}// SDL_imageFilterMultNor: D = S1 * S2 (non-MMX)int SDL_imageFilterMultNorASM(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, int length){#ifdef USE_MMX asm volatile ("pusha \n\t" "mov %2, %%edx \n\t" // load Src1 address into edx "mov %1, %%esi \n\t" // load Src2 address into esi "mov %0, %%edi \n\t" // load Dest address into edi "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx ".align 16 \n\t" // 16 byte allignment of the loop entry ".L10141: \n\t" "mov (%%edx), %%al \n\t" // load a byte from Src1 "mulb (%%esi) \n\t" // mul with a byte from Src2 ".L10142: \n\t" "mov %%al, (%%edi) \n\t" // move a byte result to Dest "inc %%edx \n\t" // increment Src1, Src2, Dest "inc %%esi \n\t" // pointer registers by one "inc %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter "jnz .L10141 \n\t" // check loop termination, proceed if required "popa \n\t":"=m" (Dest) // %0 :"m"(Src2), // %1 "m"(Src1), // %2 "m"(length) // %3 );#endif return (0);}// SDL_imageFilterMultNor: D = S1 * S2int SDL_imageFilterMultNor(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, int length){ unsigned int i, istart; unsigned char *cursrc1, *cursrc2, *curdst; int result; if (length > 0) { /* ASM routine */ SDL_imageFilterMultNorASM(Src1, Src2, Dest, length); /* Check for unaligned bytes */ if ((length & 7) > 0) { /* Setup to process unaligned bytes */ istart = length & 0xfffffff8; cursrc1 = &Src1[istart]; cursrc2 = &Src2[istart]; curdst = &Dest[istart]; } else { /* No unaligned bytes - we are done */ return (0); } } else { /* No bytes - we are done */ return (0); } /* C routine to process image */ for (i = istart; i < length; i++) { result = (int) *cursrc1 * (int) *cursrc2; *curdst = (unsigned char) result; /* Advance pointers */ cursrc1++; cursrc2++; curdst++; } return (0);}// SDL_imageFilterMultDivby2: D = saturation255(S1/2 * S2)int SDL_imageFilterMultDivby2MMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, int length){#ifdef USE_MMX asm volatile ("pusha \n\t" "mov %2, %%eax \n\t" // load Src1 address into eax "mov %1, %%ebx \n\t" // load Src2 address into ebx "mov %0, %%edi \n\t" // load Dest address into edi "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) "pxor %%mm0, %%mm0 \n\t" // zero mm0 register ".align 16 \n\t" // 16 byte allignment of the loop entry ".L1015: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 "movq (%%ebx), %%mm3 \n\t" // load 8 bytes from Src2 into mm3 "movq %%mm1, %%mm2 \n\t" // copy mm1 into mm2 "movq %%mm3, %%mm4 \n\t" // copy mm3 into mm4 "punpcklbw %%mm0, %%mm1 \n\t" // unpack low bytes of Src1 into words "punpckhbw %%mm0, %%mm2 \n\t" // unpack high bytes of Src1 into words "punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of Src2 into words "punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of Src2 into words "psrlw $1, %%mm1 \n\t" // divide mm1 words by 2, Src1 low bytes "psrlw $1, %%mm2 \n\t" // divide mm2 words by 2, Src1 high bytes "pmullw %%mm3, %%mm1 \n\t" // mul low bytes of Src1 and Src2 "pmullw %%mm4, %%mm2 \n\t" // mul high bytes of Src1 and Src2 "packuswb %%mm2, %%mm1 \n\t" // pack words back into bytes with saturation "movq %%mm1, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Src1, Src2 and Dest "add $8, %%ebx \n\t" // register pointers by 8 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter "jnz .L1015 \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src2), // %1 "m"(Src1), // %2 "m"(length) // %3 );#endif return (0);}// SDL_imageFilterMultDivby2: D = saturation255(S1/2 * S2)int SDL_imageFilterMultDivby2(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, int length){ unsigned int i, istart; unsigned char *cursrc1, *cursrc2, *curdst; int result; if ((SDL_imageFilterMMXdetect()) && (length > 7)) { /* MMX routine */ SDL_imageFilterMultDivby2MMX(Src1, Src2, Dest, length); /* Check for unaligned bytes */ if ((length & 7) > 0) { /* Setup to process unaligned bytes */ istart = length & 0xfffffff8; cursrc1 = &Src1[istart]; cursrc2 = &Src2[istart]; curdst = &Dest[istart]; } else { /* No unaligned bytes - we are done */ return (0); } } else { /* Setup to process whole image */ istart = 0; cursrc1 = Src1; cursrc2 = Src2; curdst = Dest; } /* C routine to process image */ for (i = istart; i < length; i++) { result = ((int) *cursrc1 / 2) * (int) *cursrc2; if (result > 255) result = 255; *curdst = (unsigned char) result; /* Advance pointers */ cursrc1++; cursrc2++; curdst++; } return (0);}// SDL_imageFilterMultDivby4: D = saturation255(S1/2 * S2/2)int SDL_imageFilterMultDivby4MMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, int length){#ifdef USE_MMX asm volatile ("pusha \n\t" "mov %2, %%eax \n\t" // load Src1 address into eax "mov %1, %%ebx \n\t" // load Src2 address into ebx "mov %0, %%edi \n\t" // load Dest address into edi "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) "pxor %%mm0, %%mm0 \n\t" // zero mm0 register ".align 16 \n\t" // 16 byte allignment of the loop entry ".L1016: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 "movq (%%ebx), %%mm3 \n\t" // load 8 bytes from Src2 into mm3 "movq %%mm1, %%mm2 \n\t" // copy mm1 into mm2 "movq %%mm3, %%mm4 \n\t" // copy mm3 into mm4 "punpcklbw %%mm0, %%mm1 \n\t" // unpack low bytes of Src1 into words "punpckhbw %%mm0, %%mm2 \n\t" // unpack high bytes of Src1 into words "punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of Src2 into words "punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of Src2 into words "psrlw $1, %%mm1 \n\t" // divide mm1 words by 2, Src1 low bytes "psrlw $1, %%mm2 \n\t" // divide mm2 words by 2, Src1 high bytes "psrlw $1, %%mm3 \n\t" // divide mm3 words by 2, Src2 low bytes "psrlw $1, %%mm4 \n\t" // divide mm4 words by 2, Src2 high bytes "pmullw %%mm3, %%mm1 \n\t" // mul low bytes of Src1 and Src2 "pmullw %%mm4, %%mm2 \n\t" // mul high bytes of Src1 and Src2 "packuswb %%mm2, %%mm1 \n\t" // pack words back into bytes with saturation "movq %%mm1, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Src1, Src2 and Dest "add $8, %%ebx \n\t" // register pointers by 8 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter "jnz .L1016 \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src2), // %1 "m"(Src1), // %2 "m"(length) // %3 );#endif return (0);}// SDL_imageFilterMultDivby4: D = saturation255(S1/2 * S2/2)int SDL_imageFilterMultDivby4(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, int length){ unsigned int i, istart; unsigned char *cursrc1, *cursrc2, *curdst; int result; if ((SDL_imageFilterMMXdetect()) && (length > 7)) { /* MMX routine */ SDL_imageFilterMultDivby4MMX(Src1, Src2, Dest, length); /* Check for unaligned bytes */ if ((length & 7) > 0) { /* Setup to process unaligned bytes */ istart = length & 0xfffffff8; cursrc1 = &Src1[istart]; cursrc2 = &Src2[istart]; curdst = &Dest[istart]; } else { /* No unaligned bytes - we are done */ return (0); } } else { /* Setup to process whole image */ istart = 0; cursrc1 = Src1; cursrc2 = Src2; curdst = Dest; } /* C routine to process image */ for (i = istart; i < length; i++) { result = ((int) *cursrc1 / 2) * ((int) *cursrc2 / 2); if (result > 255) result = 255; *curdst = (unsigned char) result; /* Advance pointers */ cursrc1++; cursrc2++; curdst++; } return (0);}// SDL_imageFilterBitAnd: D = S1 & S2int SDL_imageFilterBitAndMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, int length){#ifdef USE_MMX asm volatile ("pusha \n\t" "mov %2, %%eax \n\t" // load Src1 address into eax
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -