📄 sdl_imagefilter.c
字号:
"mov %1, %%ebx \n\t" // load Src2 address into ebx "mov %0, %%edi \n\t" // load Dest address into edi "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry ".L1017: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 "pand (%%ebx), %%mm1 \n\t" // mm1=Src1&Src2 "movq %%mm1, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Src1, Src2 and Dest "add $8, %%ebx \n\t" // register pointers by 8 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter "jnz .L1017 \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src2), // %1 "m"(Src1), // %2 "m"(length) // %3 );#endif return (0);}// SDL_imageFilterBitAnd: D = S1 & S2int SDL_imageFilterBitAnd(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, int length){ unsigned int i, istart; unsigned char *cursrc1, *cursrc2, *curdst;// if ((SDL_imageFilterMMXdetect()>0) && (length>7)) { if (length > 7) { /* Call MMX routine */ SDL_imageFilterBitAndMMX(Src1, Src2, Dest, length); /* Check for unaligned bytes */ if ((length & 7) > 0) { /* Setup to process unaligned bytes */ istart = length & 0xfffffff8; cursrc1 = &Src1[istart]; cursrc2 = &Src2[istart]; curdst = &Dest[istart]; } else { /* No unaligned bytes - we are done */ return (0); } } else { /* Setup to process whole image */ istart = 0; cursrc1 = Src1; cursrc2 = Src2; curdst = Dest; } /* C routine to process image */ for (i = istart; i < length; i++) { *curdst = (*cursrc1) & (*cursrc2); /* Advance pointers */ cursrc1++; cursrc2++; curdst++; } return (0);}// SDL_imageFilterBitOr: D = S1 | S2int SDL_imageFilterBitOrMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, int length){#ifdef USE_MMX asm volatile ("pusha \n\t" "mov %2, %%eax \n\t" // load Src1 address into eax "mov %1, %%ebx \n\t" // load Src2 address into ebx "mov %0, %%edi \n\t" // load Dest address into edi "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry ".L91017: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1 "por (%%ebx), %%mm1 \n\t" // mm1=Src1|Src2 "movq %%mm1, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Src1, Src2 and Dest "add $8, %%ebx \n\t" // register pointers by 8 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter "jnz .L91017 \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src2), // %1 "m"(Src1), // %2 "m"(length) // %3 );#endif return (0);}// SDL_imageFilterBitOr: D = S1 | S2int SDL_imageFilterBitOr(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, int length){ unsigned int i, istart; unsigned char *cursrc1, *cursrc2, *curdst; if ((SDL_imageFilterMMXdetect()) && (length > 7)) { /* MMX routine */ SDL_imageFilterBitOrMMX(Src1, Src2, Dest, length); /* Check for unaligned bytes */ if ((length & 7) > 0) { /* Setup to process unaligned bytes */ istart = length & 0xfffffff8; cursrc1 = &Src1[istart]; cursrc2 = &Src2[istart]; curdst = &Dest[istart]; } else { /* No unaligned bytes - we are done */ return (0); } } else { /* Setup to process whole image */ istart = 0; cursrc1 = Src1; cursrc2 = Src2; curdst = Dest; } /* C routine to process image */ for (i = istart; i < length; i++) { *curdst = *cursrc1 | *cursrc2; /* Advance pointers */ cursrc1++; cursrc2++; curdst++; } return (0);}// SDL_imageFilterDiv: D = S1 / S2 (non-MMX)int SDL_imageFilterDivASM(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, int length){#ifdef USE_MMX asm volatile ("pusha \n\t" "mov %2, %%edx \n\t" // load Src1 address into edx "mov %1, %%esi \n\t" // load Src2 address into esi "mov %0, %%edi \n\t" // load Dest address into edi "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx ".align 16 \n\t" // 16 byte allignment of the loop entry ".L10191: \n\t" "mov (%%esi), %%bl \n\t" // load a byte from Src2 "cmp $0, %%bl \n\t" // check if it zero "jnz .L10192 \n\t" "movb $255, (%%edi) \n\t" // division by zero = 255 !!! "jmp .L10193 \n\t" ".L10192: \n\t" "xor %%ah, %%ah \n\t" // prepare AX, zero AH register "mov (%%edx), %%al \n\t" // load a byte from Src1 into AL "div %%bl \n\t" // divide AL by BL "mov %%al, (%%edi) \n\t" // move a byte result to Dest ".L10193: \n\t" "inc %%edx \n\t" // increment Src1, Src2, Dest "inc %%esi \n\t" // pointer registers by one "inc %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter "jnz .L10191 \n\t" // check loop termination, proceed if required "popa \n\t":"=m" (Dest) // %0 :"m"(Src2), // %1 "m"(Src1), // %2 "m"(length) // %3 );#endif return (0);}// SDL_imageFilterDiv: D = S1 / S2 (non-MMX!)int SDL_imageFilterDiv(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, int length){ if (length > 0) { /* Call ASM routine */ SDL_imageFilterDivASM(Src1, Src2, Dest, length); /* Never unaligned bytes - we are done */ return (0); } else { return (-1); }}// ------------------------------------------------------------------------------------// SDL_imageFilterBitNegation: D = !Sint SDL_imageFilterBitNegationMMX(unsigned char *Src1, unsigned char *Dest, int length){#ifdef USE_MMX asm volatile ("pusha \n\t" "pcmpeqb %%mm1, %%mm1 \n\t" // generate all 1's in mm1 "mov %1, %%eax \n\t" // load Src1 address into eax "mov %0, %%edi \n\t" // load Dest address into edi "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry ".L91117: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into mm1 "pxor %%mm1, %%mm0 \n\t" // negate mm0 by xoring with mm1 "movq %%mm0, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Src1, Src2 and Dest "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter "jnz .L91117 \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 "m"(length) // %2 );#endif return (0);}// SDL_imageFilterBitNegation: D = !Sint SDL_imageFilterBitNegation(unsigned char *Src1, unsigned char *Dest, int length){ unsigned int i, istart; unsigned char *cursrc1, *curdst; if ((SDL_imageFilterMMXdetect()) && (length > 7)) { /* MMX routine */ SDL_imageFilterBitNegationMMX(Src1, Dest, length); /* Check for unaligned bytes */ if ((length & 7) > 0) { /* Setup to process unaligned bytes */ istart = length & 0xfffffff8; cursrc1 = &Src1[istart]; curdst = &Dest[istart]; } else { /* No unaligned bytes - we are done */ return (0); } } else { /* Setup to process whole image */ istart = 0; cursrc1 = Src1; curdst = Dest; } /* C routine to process image */ for (i = istart; i < length; i++) { *curdst = ~(*cursrc1); /* Advance pointers */ cursrc1++; curdst++; } return (0);}// SDL_imageFilterAddByteMMX: D = saturation255(S + C)int SDL_imageFilterAddByteMMX(unsigned char *Src1, unsigned char *Dest, int length, unsigned char C){#ifdef USE_MMX asm volatile ("pusha \n\t" // ** Duplicate C in 8 bytes of MM1 ** "mov %3, %%al \n\t" // load C into AL "mov %%al, %%ah \n\t" // copy AL into AH "mov %%ax, %%bx \n\t" // copy AX into BX "shl $16, %%eax \n\t" // shift 2 bytes of EAX left "mov %%bx, %%ax \n\t" // copy BX into AX "movd %%eax, %%mm1 \n\t" // copy EAX into MM1 "movd %%eax, %%mm2 \n\t" // copy EAX into MM2 "punpckldq %%mm2, %%mm1 \n\t" // fill higher bytes of MM1 with C "mov %1, %%eax \n\t" // load Src1 address into eax "mov %0, %%edi \n\t" // load Dest address into edi "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry ".L1021: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into MM0 "paddusb %%mm1, %%mm0 \n\t" // MM0=SrcDest+C (add 8 bytes with saturation) "movq %%mm0, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Dest register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8 "dec %%ecx \n\t" // decrease loop counter "jnz .L1021 \n\t" // check loop termination, proceed if required "emms \n\t" // exit MMX state "popa \n\t":"=m" (Dest) // %0 :"m"(Src1), // %1 "m"(length), // %2 "m"(C) // %3 );#endif return (0);}// SDL_imageFilterAddByte: D = saturation255(S + C)int SDL_imageFilterAddByte(unsigned char *Src1, unsigned char *Dest, int length, unsigned char C){ unsigned int i, istart; int iC; unsigned char *cursrc1, *curdest; int result; if ((SDL_imageFilterMMXdetect()) && (length > 7)) { /* MMX routine */ SDL_imageFilterAddByteMMX(Src1, Dest, length, C); /* Check for unaligned bytes */ if ((length & 7) > 0) { /* Setup to process unaligned bytes */ istart = length & 0xfffffff8; cursrc1 = &Src1[istart]; curdest = &Dest[istart]; } else { /* No unaligned bytes - we are done */ return (0); } } else { /* Setup to process whole image */ istart = 0; cursrc1 = Src1; curdest = Dest; } /* C routine to process image */ iC = (int) C; for (i = istart; i < length; i++) { result = (int) *cursrc1 + iC; if (result > 255) result = 255; *curdest = (unsigned char) result; /* Advance pointers */ cursrc1++; curdest++; } return (0);}// SDL_imageFilterAddByteToHalfMMX: D = saturation255(S/2 + C)int SDL_imageFilterAddByteToHalfMMX(unsigned char *Src1, unsigned char *Dest, int length, unsigned char C, unsigned char *Mask){#ifdef USE_MMX asm volatile ("pusha \n\t" // ** Duplicate C in 8 bytes of MM1 ** "mov %3, %%al \n\t" // load C into AL "mov %%al, %%ah \n\t" // copy AL into AH "mov %%ax, %%bx \n\t" // copy AX into BX "shl $16, %%eax \n\t" // shift 2 bytes of EAX left "mov %%bx, %%ax \n\t" // copy BX into AX "movd %%eax, %%mm1 \n\t" // copy EAX into MM1 "movd %%eax, %%mm2 \n\t" // copy EAX into MM2 "punpckldq %%mm2, %%mm1 \n\t" // fill higher bytes of MM1 with C "movl %4, %%edx \n\t" // load Mask address into edx "movq (%%edx), %%mm0 \n\t" // load Mask into mm0 "mov %1, %%eax \n\t" // load Src1 address into eax "mov %0, %%edi \n\t" // load Dest address into edi "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time) ".align 16 \n\t" // 16 byte allignment of the loop entry ".L1022: \n\t" "movq (%%eax), %%mm2 \n\t" // load 8 bytes from Src1 into MM2 "psrlw $1, %%mm2 \n\t" // shift 4 WORDS of MM2 1 bit to the right // "pand %%mm0, %%mm2 \n\t" // apply Mask to 8 BYTES of MM2 ".byte 0x0f, 0xdb, 0xd0 \n\t" "paddusb %%mm1, %%mm2 \n\t" // MM2=SrcDest+C (add 8 bytes with saturation) "movq %%mm2, (%%edi) \n\t" // store result in Dest "add $8, %%eax \n\t" // increase Src1 register pointer by 8 "add $8, %%edi \n\t" // increase Dest register pointer by 8
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -