⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sdl_imagefilter.c

📁 sdl的gfx开发包
💻 C
📖 第 1 页 / 共 5 页
字号:
      "movd      %%eax, %%mm2 \n\t"	// copy EAX into MM2      "punpckldq %%mm2, %%mm1 \n\t"	// fill higher words of MM1 with C       "xor       %%ecx, %%ecx \n\t"	// zero ECX      "mov           %3, %%cl \n\t"	// load N into CL      "movd      %%ecx, %%mm7 \n\t"	// copy N into MM7       "pxor      %%mm0, %%mm0 \n\t"	// zero MM0 register       "mov          %1, %%eax \n\t"	// load Src1 address into eax      "mov          %0, %%edi \n\t"	// load Dest address into edi      "mov          %2, %%ecx \n\t"	// load loop counter (SIZE) into ecx      "shr          $3, %%ecx \n\t"	// counter/8 (MMX loads 8 bytes at a time)       ".align 16              \n\t"	// 16 byte allignment of the loop entry      ".L1026:                \n\t" "movq    (%%eax), %%mm3 \n\t"	// load 8 bytes from Src1 into MM3      "movq      %%mm3, %%mm4 \n\t"	// copy MM3 into MM4       "punpcklbw %%mm0, %%mm3 \n\t"	// unpack low  bytes of SrcDest into words      "punpckhbw %%mm0, %%mm4 \n\t"	// unpack high bytes of SrcDest into words      "psrlw     %%mm7, %%mm3 \n\t"	// shift 4 WORDS of MM3 (N) bits to the right      "psrlw     %%mm7, %%mm4 \n\t"	// shift 4 WORDS of MM4 (N) bits to the right      "pmullw    %%mm1, %%mm3 \n\t"	// mul low  bytes of SrcDest by MM1      "pmullw    %%mm1, %%mm4 \n\t"	// mul high bytes of SrcDest by MM1      "packuswb  %%mm4, %%mm3 \n\t"	// pack words back into bytes with saturation      "movq    %%mm3, (%%edi) \n\t"	// store result in Dest       "add          $8, %%eax \n\t"	// increase Src1 register pointer by 8      "add          $8, %%edi \n\t"	// increase Dest register pointer by 8      "dec              %%ecx \n\t"	// decrease loop counter      "jnz             .L1026 \n\t"	// check loop termination, proceed if required       "emms                   \n\t"	// exit MMX state      "popa                   \n\t":"=m" (Dest)	// %0      :"m"(Src1),		// %1      "m"(length),		// %2      "m"(N),			// %3      "m"(C)			// %4	);#endif    return (0);}//  SDL_imageFilterShiftRightAndMultByByte: D = saturation255((S >> N) * C)int SDL_imageFilterShiftRightAndMultByByte(unsigned char *Src1, unsigned char *Dest, int length, unsigned char N,					   unsigned char C){    unsigned int i, istart;    int iC;    unsigned char *cursrc1;    unsigned char *curdest;    int result;    /* Check shift */    if ((N > 8) || (N < 1)) {	return (-1);    }    if ((SDL_imageFilterMMXdetect()) && (length > 7)) {	SDL_imageFilterShiftRightAndMultByByteMMX(Src1, Dest, length, N, C);	/* Check for unaligned bytes */	if ((length & 7) > 0) {	    /* Setup to process unaligned bytes */	    istart = length & 0xfffffff8;	    cursrc1 = &Src1[istart];	    curdest = &Dest[istart];	} else {	    /* No unaligned bytes - we are done */	    return (0);	}    } else {	/* Setup to process whole image */	istart = 0;	cursrc1 = Src1;	curdest = Dest;    }    /* C routine to process image */    iC = (int) C;    for (i = istart; i < length; i++) {	result = (int) (*cursrc1 >> N) * iC;	if (result > 255)	    result = 255;	*curdest = (unsigned char) result;	/* Advance pointers */	cursrc1++;	curdest++;    }    return (0);}//  SDL_imageFilterShiftLeftByteMMX: D = (S << N)int SDL_imageFilterShiftLeftByteMMX(unsigned char *Src1, unsigned char *Dest, int length, unsigned char N,				    unsigned char *Mask){#ifdef USE_MMX    asm volatile     ("pusha		     \n\t" "movl         %4, %%edx \n\t"	// load Mask address into edx      "movq    (%%edx), %%mm0 \n\t"	// load Mask into mm0       "xor       %%ecx, %%ecx \n\t"	// zero ECX      "mov           %3, %%cl \n\t"	// load loop counter (N) into CL      "movd      %%ecx, %%mm3 \n\t"	// copy (N) into MM3        "pcmpeqb   %%mm1, %%mm1 \n\t"	// generate all 1's in mm1       ".L10270:               \n\t"	// ** Prepare proper bit-Mask in MM1 **       "psllw        $1, %%mm1 \n\t"	// shift 4 WORDS of MM1 1 bit to the left      //    "pand      %%mm0, %%mm1 \n\t"    // apply Mask to 8 BYTES of MM1      ".byte     0x0f, 0xdb, 0xc8 \n\t" "dec %%cl               \n\t"	// decrease loop counter      "jnz            .L10270 \n\t"	// check loop termination, proceed if required      // ** Shift all bytes of the image **       "mov          %1, %%eax \n\t"	// load Src1 address into eax      "mov          %0, %%edi \n\t"	// load SrcDest address into edi      "mov          %2, %%ecx \n\t"	// load loop counter (SIZE) into ecx      "shr          $3, %%ecx \n\t"	// counter/8 (MMX loads 8 bytes at a time)       ".align 16              \n\t"	// 16 byte allignment of the loop entry      ".L10271:               \n\t" "movq    (%%eax), %%mm0 \n\t"	// load 8 bytes from Src1 into MM0      "psllw     %%mm3, %%mm0 \n\t"	// shift 4 WORDS of MM0 (N) bits to the left      //    "pand      %%mm1, %%mm0 \n\t"    // apply proper bit-Mask to 8 BYTES of MM0      ".byte     0x0f, 0xdb, 0xc1 \n\t" "movq    %%mm0, (%%edi) \n\t"	// store result in Dest       "add          $8, %%eax \n\t"	// increase Src1 register pointer by 8      "add          $8, %%edi \n\t"	// increase Dest register pointer by 8      "dec              %%ecx \n\t"	// decrease loop counter      "jnz            .L10271 \n\t"	// check loop termination, proceed if required       "emms                   \n\t"	// exit MMX state      "popa                   \n\t":"=m" (Dest)	// %0      :"m"(Src1),		// %1      "m"(length),		// %2      "m"(N),			// %3      "m"(Mask)			// %4	);#endif    return (0);}//  SDL_imageFilterShiftLeftByte: D = (S << N)int SDL_imageFilterShiftLeftByte(unsigned char *Src1, unsigned char *Dest, int length, unsigned char N){    static unsigned char Mask[8] = { 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE };    unsigned int i, istart;    unsigned char *cursrc1, *curdest;    int result;    if ((N > 8) || (N < 1))	return (-1);		// image size must be at least 8 bytes     // and min. 1 bit and max. 8 bit shift is allowed    if ((SDL_imageFilterMMXdetect()) && (length > 7)) {	SDL_imageFilterShiftLeftByteMMX(Src1, Dest, length, N, Mask);	/* Check for unaligned bytes */	if ((length & 7) > 0) {	    /* Setup to process unaligned bytes */	    istart = length & 0xfffffff8;	    cursrc1 = &Src1[istart];	    curdest = &Dest[istart];	} else {	    /* No unaligned bytes - we are done */	    return (0);	}    } else {	/* Setup to process whole image */	istart = 0;	cursrc1 = Src1;	curdest = Dest;    }    /* C routine to process image */    for (i = istart; i < length; i++) {	result = ((int) *cursrc1 << N) & 0xff;	*curdest = (unsigned char) result;	/* Advance pointers */	cursrc1++;	curdest++;    }    return (0);}//  SDL_imageFilterShiftLeftMMX: D = saturation255(S << N)int SDL_imageFilterShiftLeftMMX(unsigned char *Src1, unsigned char *Dest, int length, unsigned char N){#ifdef USE_MMX    asm volatile     ("pusha		     \n\t" "xor       %%eax, %%eax \n\t"	// zero EAX      "mov           %3, %%al \n\t"	// load N into AL      "movd      %%eax, %%mm7 \n\t"	// copy N into MM7       "pxor      %%mm0, %%mm0 \n\t"	// zero MM0 register       "mov         %1, %%eax  \n\t"	// load Src1 address into eax      "mov         %0, %%edi  \n\t"	// load Dest address into edi      "mov         %2, %%ecx  \n\t"	// load loop counter (SIZE) into ecx      "shr         $3, %%ecx  \n\t"	// counter/8 (MMX loads 8 bytes at a time)       "cmp           $7, %%al \n\t"	// if (N <= 7) execute more efficient code      "jg             .L10281 \n\t" ".align 16              \n\t"	// 16 byte allignment of the loop entry      ".L10280:               \n\t" "movq    (%%eax), %%mm3 \n\t"	// load 8 bytes from Src1 into MM3      "movq      %%mm3, %%mm4 \n\t"	// copy MM3 into MM4       "punpcklbw %%mm0, %%mm3 \n\t"	// unpack low  bytes of SrcDest into words      "punpckhbw %%mm0, %%mm4 \n\t"	// unpack high bytes of SrcDest into words      "psllw     %%mm7, %%mm3 \n\t"	// shift 4 WORDS of MM3 (N) bits to the right      "psllw     %%mm7, %%mm4 \n\t"	// shift 4 WORDS of MM4 (N) bits to the right      "packuswb  %%mm4, %%mm3 \n\t"	// pack words back into bytes with saturation      "movq    %%mm3, (%%edi) \n\t"	// store result in Dest       "add          $8, %%eax \n\t"	// increase Src1 register pointer by 8      "add          $8, %%edi \n\t"	// increase Dest register pointer by 8      "dec              %%ecx \n\t"	// decrease loop counter      "jnz            .L10280 \n\t"	// check loop termination, proceed if required      "jmp            .L10282 \n\t" ".align 16              \n\t"	// 16 byte allignment of the loop entry      ".L10281:               \n\t" "movq    (%%eax), %%mm3 \n\t"	// load 8 bytes from Src1 into MM3      "movq      %%mm3, %%mm4 \n\t"	// copy MM3 into MM4       "punpcklbw %%mm0, %%mm3 \n\t"	// unpack low  bytes of SrcDest into words      "punpckhbw %%mm0, %%mm4 \n\t"	// unpack high bytes of SrcDest into words      "psllw     %%mm7, %%mm3 \n\t"	// shift 4 WORDS of MM3 (N) bits to the right      "psllw     %%mm7, %%mm4 \n\t"	// shift 4 WORDS of MM4 (N) bits to the right      // ** Take abs value of the signed words **       "movq      %%mm3, %%mm5 \n\t"	// copy mm3 into mm5      "movq      %%mm4, %%mm6 \n\t"	// copy mm4 into mm6      "psraw       $15, %%mm5 \n\t"	// fill mm5 words with word sign bit      "psraw       $15, %%mm6 \n\t"	// fill mm6 words with word sign bit      "pxor      %%mm5, %%mm3 \n\t"	// take 1's compliment of only neg. words      "pxor      %%mm6, %%mm4 \n\t"	// take 1's compliment of only neg. words      "psubsw    %%mm5, %%mm3 \n\t"	// add 1 to only neg. words, W-(-1) or W-0      "psubsw    %%mm6, %%mm4 \n\t"	// add 1 to only neg. words, W-(-1) or W-0       "packuswb  %%mm4, %%mm3 \n\t"	// pack words back into bytes with saturation      "movq    %%mm3, (%%edi) \n\t"	// store result in Dest       "add          $8, %%eax \n\t"	// increase Src1 register pointer by 8      "add          $8, %%edi \n\t"	// increase Dest register pointer by 8      "dec              %%ecx \n\t"	// decrease loop counter      "jnz            .L10281 \n\t"	// check loop termination, proceed if required       ".L10282:               \n\t" "emms                   \n\t"	// exit MMX state      "popa                   \n\t":"=m" (Dest)	// %0      :"m"(Src1),		// %1      "m"(length),		// %2      "m"(N)			// %3	);#endif    return (0);}//  SDL_imageFilterShiftLeft: D = saturation255(S << N)int SDL_imageFilterShiftLeft(unsigned char *Src1, unsigned char *Dest, int length, unsigned char N){    unsigned int i, istart;    unsigned char *cursrc1, *curdest;    int result;    if ((N > 8) || (N < 1))	return (-1);		// image size must be at least 8 bytes     // and min. 1 bit and max. 8 bit shift is allowed    if ((SDL_imageFilterMMXdetect()) && (length > 7)) {	SDL_imageFilterShiftLeftMMX(Src1, Dest, length, N);	/* Check for unaligned bytes */	if ((length & 7) > 0) {	    /* Setup to process unaligned bytes */	    istart = length & 0xfffffff8;	    cursrc1 = &Src1[istart];	    curdest = &Dest[istart];	} else {	    /* No unaligned bytes - we are done */	    return (0);	}    } else {	/* Setup to process whole image */	istart = 0;	cursrc1 = Src1;	curdest = Dest;    }    /* C routine to process image */    for (i = istart; i < length; i++) {	result = (int) *cursrc1 << N;	if (result > 255)	    result = 255;	*curdest = (unsigned char) result;	/* Advance pointers */	cursrc1++;	curdest++;    }    return (0);}//  SDL_imageFilterBinarizeUsingThresholdMMX: D = (S >= T) ? 255:0int SDL_imageFilterBinarizeUsingThresholdMMX(unsigned char *Src1, unsigned char *Dest, int length, unsigned char T){#ifdef USE_MMX    asm volatile     ("pusha		     \n\t"      // ** Duplicate T in 8 bytes of MM3 **      "pcmpeqb   %%mm1, %%mm1 \n\t"	// generate all 1's in mm1      "pcmpeqb   %%mm2, %%mm2 \n\t"	// generate all 1's in mm2       "mov           %3, %%al \n\t"	// load T into AL      "mov         %%al, %%ah \n\t"	// copy AL into AH      "mov         %%ax, %%bx \n\t"	// copy AX into BX      "shl         $16, %%eax \n\t"	// shift 2 bytes of EAX left      "mov         %%bx, %%ax \n\t"	// copy BX into AX      "movd      %%eax, %%mm3 \n\t"	// copy EAX into MM3      "movd      %%eax, %%mm4 \n\t"	// copy EAX into MM4      "punpckldq %%mm4, %%mm3 \n\t"	// fill higher bytes of MM3 with T       "psubusb   %%mm3, %%mm2 \n\t"	// store 0xFF - T in MM2       "mov          %1, %%eax \n\t"	// load Src1 address into eax      "mov          %0, %%edi \n\t"	// load Dest address into edi      "mov          %2, %%ecx \n\t"	// load loop counter (SIZE) into ecx      "shr          $3, %%ecx \n\t"	// counter/8 (MMX loads 8 bytes at a time)       ".align 16              \n\t"	// 16 byte alignment of the loop entry      ".L1029:                \n\t" "movq    (%%eax), %%mm0 \n\t"	// load 8 bytes from SrcDest into MM0      "paddusb   %%mm2, %%mm0 \n\t"	// MM0=SrcDest+(0xFF-T) (add 8 bytes with saturation)      "pcmpeqb   %%mm1, %%mm0 \n\t"	// binarize 255:0, comparing to 255      "movq    %%mm0, (%%edi) \n\t"	// store result in SrcDest       "add          $8, %%eax \n\t"	// increase Src1 register pointer by 8      "add          $8, %%edi \n\t"	// increase Dest register pointer by 8      "dec              %%ecx \n\t"	// decrease loop counter      "jnz             .L1029 \n\t"	// check loop termination, proceed if required       "emms                   \n\t"	// exit MMX state      "popa                   \n\t":"=m" (Dest)	// %0      :"m"(Src1),		// %1      "m"(length),		// %2      "m"(T)			// %3	);#endif    return (0);}//  SDL_imageFilterBinarizeUsingThreshold: D = (S >= T) ? 255:0int SDL_imageFilterBinarizeUsingThreshold(unsigned char *Src1, unsigned char *Dest, int length, unsigned char T){    unsigned int i, istart;    unsigned char *cursrc1;    unsigned char *curdest;    if ((SDL_imageFilterMMXdetect()) && (length > 7)) {	SDL_imageFilterBinarizeUsingThresholdMMX(Src1, Dest, length, T);	/* Check for unaligned bytes */	if ((length & 7) > 0) {	    /* Setup to process unaligned bytes */	    istart = length & 0xfffffff8;	    cursrc1 = &Src1[istart];	    curdest = &Dest[istart];	} else {	    /* No unaligned bytes - we are done */	    return (0);	}    } else {	/* Setup to process whole image */	istart = 0;	cursrc1 = Src1;	curdest = Dest;    }    /* C routine to process image */    for (i = istart; i < length; i++) {	*curdest = ((unsigned char) *cursrc1 >= T) ? 255 : 0;	/* Advance pointers */	cursrc1++;	curd

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -