📄 pnggccrd.c
字号:
static unsigned long long _mask48_0 = 0x4040808080808080LL;static unsigned long long _const4 = 0x0000000000FFFFFFLL;//static unsigned long long _const5 = 0x000000FFFFFF0000LL; // NOT USEDstatic unsigned long long _const6 = 0x00000000000000FFLL;// These are used in the row-filter routines and should/would be local// variables if not for gcc addressing limitations.// WARNING: Their presence probably defeats the thread safety of libpng.#ifdef PNG_THREAD_UNSAFE_OKstatic png_uint_32 _FullLength;static png_uint_32 _MMXLength;static int _dif;static int _patemp; // temp variables for Paeth routinestatic int _pbtemp;static int _pctemp;#endifvoid /* PRIVATE */png_squelch_warnings(void){#ifdef PNG_THREAD_UNSAFE_OK _dif = _dif; _patemp = _patemp; _pbtemp = _pbtemp; _pctemp = _pctemp; _MMXLength = _MMXLength;#endif _const4 = _const4; _const6 = _const6; _mask8_0 = _mask8_0; _mask16_1 = _mask16_1; _mask16_0 = _mask16_0; _mask24_2 = _mask24_2; _mask24_1 = _mask24_1; _mask24_0 = _mask24_0; _mask32_3 = _mask32_3; _mask32_2 = _mask32_2; _mask32_1 = _mask32_1; _mask32_0 = _mask32_0; _mask48_5 = _mask48_5; _mask48_4 = _mask48_4; _mask48_3 = _mask48_3; _mask48_2 = _mask48_2; _mask48_1 = _mask48_1; _mask48_0 = _mask48_0;}#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */static int _mmx_supported = 2;/*===========================================================================*//* *//* P N G _ C O M B I N E _ R O W *//* *//*===========================================================================*/#if defined(PNG_HAVE_ASSEMBLER_COMBINE_ROW)#define BPP2 2#define BPP3 3 /* bytes per pixel (a.k.a. pixel_bytes) */#define BPP4 4#define BPP6 6 /* (defined only to help avoid cut-and-paste errors) */#define BPP8 8/* Combines the row recently read in with the previous row. This routine takes care of alpha and transparency if requested. This routine also handles the two methods of progressive display of interlaced images, depending on the mask value. The mask value describes which pixels are to be combined with the row. The pattern always repeats every 8 pixels, so just 8 bits are needed. A one indicates the pixel is to be combined; a zero indicates the pixel is to be skipped. This is in addition to any alpha or transparency value associated with the pixel. If you want all pixels to be combined, pass 0xff (255) in mask. *//* Use this routine for the x86 platform - it uses a faster MMX routine if the machine supports MMX. */void /* PRIVATE */png_combine_row(png_structp png_ptr, png_bytep row, int mask){ png_debug(1, "in png_combine_row (pnggccrd.c)\n");#if defined(PNG_ASSEMBLER_CODE_SUPPORTED) if (_mmx_supported == 2) { /* this should have happened in png_init_mmx_flags() already */ png_warning(png_ptr, "asm_flags may not have been initialized"); png_mmx_support(); }#endif if (mask == 0xff) { png_debug(2,"mask == 0xff: doing single png_memcpy()\n"); png_memcpy(row, png_ptr->row_buf + 1, (png_size_t)((png_ptr->width * png_ptr->row_info.pixel_depth + 7) >> 3)); } else /* (png_combine_row() is never called with mask == 0) */ { switch (png_ptr->row_info.pixel_depth) { case 1: /* png_ptr->row_info.pixel_depth */ { png_bytep sp; png_bytep dp; int s_inc, s_start, s_end; int m; int shift; png_uint_32 i; sp = png_ptr->row_buf + 1; dp = row; m = 0x80;#if defined(PNG_READ_PACKSWAP_SUPPORTED) if (png_ptr->transformations & PNG_PACKSWAP) { s_start = 0; s_end = 7; s_inc = 1; } else#endif { s_start = 7; s_end = 0; s_inc = -1; } shift = s_start; for (i = 0; i < png_ptr->width; i++) { if (m & mask) { int value; value = (*sp >> shift) & 0x1; *dp &= (png_byte)((0x7f7f >> (7 - shift)) & 0xff); *dp |= (png_byte)(value << shift); } if (shift == s_end) { shift = s_start; sp++; dp++; } else shift += s_inc; if (m == 1) m = 0x80; else m >>= 1; } break; } case 2: /* png_ptr->row_info.pixel_depth */ { png_bytep sp; png_bytep dp; int s_start, s_end, s_inc; int m; int shift; png_uint_32 i; int value; sp = png_ptr->row_buf + 1; dp = row; m = 0x80;#if defined(PNG_READ_PACKSWAP_SUPPORTED) if (png_ptr->transformations & PNG_PACKSWAP) { s_start = 0; s_end = 6; s_inc = 2; } else#endif { s_start = 6; s_end = 0; s_inc = -2; } shift = s_start; for (i = 0; i < png_ptr->width; i++) { if (m & mask) { value = (*sp >> shift) & 0x3; *dp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff); *dp |= (png_byte)(value << shift); } if (shift == s_end) { shift = s_start; sp++; dp++; } else shift += s_inc; if (m == 1) m = 0x80; else m >>= 1; } break; } case 4: /* png_ptr->row_info.pixel_depth */ { png_bytep sp; png_bytep dp; int s_start, s_end, s_inc; int m; int shift; png_uint_32 i; int value; sp = png_ptr->row_buf + 1; dp = row; m = 0x80;#if defined(PNG_READ_PACKSWAP_SUPPORTED) if (png_ptr->transformations & PNG_PACKSWAP) { s_start = 0; s_end = 4; s_inc = 4; } else#endif { s_start = 4; s_end = 0; s_inc = -4; } shift = s_start; for (i = 0; i < png_ptr->width; i++) { if (m & mask) { value = (*sp >> shift) & 0xf; *dp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff); *dp |= (png_byte)(value << shift); } if (shift == s_end) { shift = s_start; sp++; dp++; } else shift += s_inc; if (m == 1) m = 0x80; else m >>= 1; } break; } case 8: /* png_ptr->row_info.pixel_depth */ { png_bytep srcptr; png_bytep dstptr;#if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)#if !defined(PNG_1_0_X) if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW) /* && _mmx_supported */ )#else if (_mmx_supported)#endif { png_uint_32 len; int diff; int dummy_value_a; // fix 'forbidden register spilled' error int dummy_value_d; int dummy_value_c; int dummy_value_S; int dummy_value_D; _unmask = ~mask; // global variable for -fPIC version srcptr = png_ptr->row_buf + 1; dstptr = row; len = png_ptr->width &~7; // reduce to multiple of 8 diff = (int) (png_ptr->width & 7); // amount lost __asm__ __volatile__ ( "movd _unmask, %%mm7 \n\t" // load bit pattern "psubb %%mm6, %%mm6 \n\t" // zero mm6 "punpcklbw %%mm7, %%mm7 \n\t" "punpcklwd %%mm7, %%mm7 \n\t" "punpckldq %%mm7, %%mm7 \n\t" // fill reg with 8 masks "movq _mask8_0, %%mm0 \n\t" "pand %%mm7, %%mm0 \n\t" // nonzero if keep byte "pcmpeqb %%mm6, %%mm0 \n\t" // zeros->1s, v versa// preload "movl len, %%ecx \n\t" // load length of line// preload "movl srcptr, %%esi \n\t" // load source// preload "movl dstptr, %%edi \n\t" // load dest "cmpl $0, %%ecx \n\t" // len == 0 ? "je mainloop8end \n\t" "mainloop8: \n\t" "movq (%%esi), %%mm4 \n\t" // *srcptr "pand %%mm0, %%mm4 \n\t" "movq %%mm0, %%mm6 \n\t" "pandn (%%edi), %%mm6 \n\t" // *dstptr "por %%mm6, %%mm4 \n\t" "movq %%mm4, (%%edi) \n\t" "addl $8, %%esi \n\t" // inc by 8 bytes processed "addl $8, %%edi \n\t" "subl $8, %%ecx \n\t" // dec by 8 pixels processed "ja mainloop8 \n\t"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -