📄 pnggccrd.c
字号:
static int _pbtemp;static int _pctemp;//===========================================================================//// //// P N G _ C O M B I N E _ R O W //// ////===========================================================================//#if defined(PNG_HAVE_ASSEMBLER_COMBINE_ROW)/* Combines the row recently read in with the previous row. This routine takes care of alpha and transparency if requested. This routine also handles the two methods of progressive display of interlaced images, depending on the mask value. The mask value describes which pixels are to be combined with the row. The pattern always repeats every 8 pixels, so just 8 bits are needed. A one indicates the pixel is to be combined; a zero indicates the pixel is to be skipped. This is in addition to any alpha or transparency value associated with the pixel. If you want all pixels to be combined, pass 0xff (255) in mask. *//* Use this routine for the x86 platform - it uses a faster MMX routine if the machine supports MMX. */void /* PRIVATE */png_combine_row(png_structp png_ptr, png_bytep row, int mask){ png_debug(1,"in png_combine_row_asm\n"); if (_mmx_supported == 2) { png_mmx_support(); } if (mask == 0xff) { png_memcpy(row, png_ptr->row_buf + 1, (png_size_t)((png_ptr->width * png_ptr->row_info.pixel_depth + 7) >> 3)); } /* GRR: png_combine_row() never called with mask == 0 */ else { switch (png_ptr->row_info.pixel_depth) { case 1: // png_ptr->row_info.pixel_depth { png_bytep sp; png_bytep dp; int s_inc, s_start, s_end; int m; int shift; png_uint_32 i; sp = png_ptr->row_buf + 1; dp = row; m = 0x80;#if defined(PNG_READ_PACKSWAP_SUPPORTED) if (png_ptr->transformations & PNG_PACKSWAP) { s_start = 0; s_end = 7; s_inc = 1; } else#endif { s_start = 7; s_end = 0; s_inc = -1; } shift = s_start; for (i = 0; i < png_ptr->width; i++) { if (m & mask) { int value; value = (*sp >> shift) & 0x1; *dp &= (png_byte)((0x7f7f >> (7 - shift)) & 0xff); *dp |= (png_byte)(value << shift); } if (shift == s_end) { shift = s_start; sp++; dp++; } else shift += s_inc; if (m == 1) m = 0x80; else m >>= 1; } break; } case 2: // png_ptr->row_info.pixel_depth { png_bytep sp; png_bytep dp; int s_start, s_end, s_inc; int m; int shift; png_uint_32 i; int value; sp = png_ptr->row_buf + 1; dp = row; m = 0x80;#if defined(PNG_READ_PACKSWAP_SUPPORTED) if (png_ptr->transformations & PNG_PACKSWAP) { s_start = 0; s_end = 6; s_inc = 2; } else#endif { s_start = 6; s_end = 0; s_inc = -2; } shift = s_start; for (i = 0; i < png_ptr->width; i++) { if (m & mask) { value = (*sp >> shift) & 0x3; *dp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff); *dp |= (png_byte)(value << shift); } if (shift == s_end) { shift = s_start; sp++; dp++; } else shift += s_inc; if (m == 1) m = 0x80; else m >>= 1; } break; } case 4: // png_ptr->row_info.pixel_depth { png_bytep sp; png_bytep dp; int s_start, s_end, s_inc; int m; int shift; png_uint_32 i; int value; sp = png_ptr->row_buf + 1; dp = row; m = 0x80;#if defined(PNG_READ_PACKSWAP_SUPPORTED) if (png_ptr->transformations & PNG_PACKSWAP) { s_start = 0; s_end = 4; s_inc = 4; } else#endif { s_start = 4; s_end = 0; s_inc = -4; } shift = s_start; for (i = 0; i < png_ptr->width; i++) { if (m & mask) { value = (*sp >> shift) & 0xf; *dp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff); *dp |= (png_byte)(value << shift); } if (shift == s_end) { shift = s_start; sp++; dp++; } else shift += s_inc; if (m == 1) m = 0x80; else m >>= 1; } break; } case 8: // png_ptr->row_info.pixel_depth { png_bytep srcptr; png_bytep dstptr; if ( _mmx_supported ) { png_uint_32 len; int diff; int dummy_value_a; // fix 'forbidden register spilled' error int dummy_value_d; int dummy_value_c; int dummy_value_S; int dummy_value_D; _unmask = ~mask; // global variable for -fPIC version srcptr = png_ptr->row_buf + 1; dstptr = row; len = png_ptr->width &~7; // reduce to multiple of 8 diff = png_ptr->width & 7; // amount lost __asm__ __volatile__ ( "movd _unmask, %%mm7 \n\t" // load bit pattern "psubb %%mm6, %%mm6 \n\t" // zero mm6 "punpcklbw %%mm7, %%mm7 \n\t" "punpcklwd %%mm7, %%mm7 \n\t" "punpckldq %%mm7, %%mm7 \n\t" // fill reg with 8 masks "movq _mask8_0, %%mm0 \n\t" "pand %%mm7, %%mm0 \n\t" // nonzero if keep byte "pcmpeqb %%mm6, %%mm0 \n\t" // zeros->1s, v versa// preload "movl len, %%ecx \n\t" // load length of line// preload "movl srcptr, %%esi \n\t" // load source// preload "movl dstptr, %%edi \n\t" // load dest "cmpl $0, %%ecx \n\t" // len == 0 ? "je mainloop8end \n\t" "mainloop8: \n\t" "movq (%%esi), %%mm4 \n\t" // *srcptr "pand %%mm0, %%mm4 \n\t" "movq %%mm0, %%mm6 \n\t" "pandn (%%edi), %%mm6 \n\t" // *dstptr "por %%mm6, %%mm4 \n\t" "movq %%mm4, (%%edi) \n\t" "addl $8, %%esi \n\t" // inc by 8 bytes processed "addl $8, %%edi \n\t" "subl $8, %%ecx \n\t" // dec by 8 pixels processed "ja mainloop8 \n\t" "mainloop8end: \n\t"// preload "movl diff, %%ecx \n\t" // (diff is in eax) "movl %%eax, %%ecx \n\t" "cmpl $0, %%ecx \n\t" "jz end8 \n\t"// preload "movl mask, %%edx \n\t" "sall $24, %%edx \n\t" // make low byte, high byte "secondloop8: \n\t" "sall %%edx \n\t" // move high bit to CF "jnc skip8 \n\t" // if CF = 0 "movb (%%esi), %%al \n\t" "movb %%al, (%%edi) \n\t" "skip8: \n\t" "incl %%esi \n\t" "incl %%edi \n\t" "decl %%ecx \n\t" "jnz secondloop8 \n\t" "end8: \n\t" "EMMS \n\t" // DONE : "=a" (dummy_value_a), // output regs (dummy) "=d" (dummy_value_d), "=c" (dummy_value_c), "=S" (dummy_value_S), "=D" (dummy_value_D) : "3" (srcptr), // esi // input regs "4" (dstptr), // edi "0" (diff), // eax// was (unmask) "b" RESERVED // ebx // Global Offset Table idx "2" (len), // ecx "1" (mask) // edx#if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */ : "%mm0", "%mm4", "%mm6", "%mm7" // clobber list#endif ); } else /* mmx _not supported - Use modified C routine */ { register png_uint_32 i; png_uint_32 initial_val = png_pass_start[png_ptr->pass]; // png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; register int stride = png_pass_inc[png_ptr->pass]; // png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; register int rep_bytes = png_pass_width[png_ptr->pass]; // png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; register png_uint_32 final_val = png_ptr->width; srcptr = png_ptr->row_buf + 1 + initial_val; dstptr = row + initial_val; for (i = initial_val; i < final_val; i += stride) { png_memcpy(dstptr, srcptr, rep_bytes); srcptr += stride; dstptr += stride; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -