📄 pnggccrd.c
字号:
#endif /* PNG_HAVE_ASSEMBLER_COMBINE_ROW *///===========================================================================//// //// P N G _ D O _ R E A D _ I N T E R L A C E //// ////===========================================================================//#if defined(PNG_READ_INTERLACING_SUPPORTED)#if defined(PNG_HAVE_ASSEMBLER_READ_INTERLACE)/* png_do_read_interlace() is called after any 16-bit to 8-bit conversion * has taken place. [GRR: what other steps come before and/or after?] */void /* PRIVATE */png_do_read_interlace(png_structp png_ptr){ png_row_infop row_info = &(png_ptr->row_info); png_bytep row = png_ptr->row_buf + 1; int pass = png_ptr->pass; png_uint_32 transformations = png_ptr->transformations; png_debug(1,"in png_do_read_interlace\n"); if (_mmx_supported == 2) { png_mmx_support(); } if (row != NULL && row_info != NULL) { png_uint_32 final_width; final_width = row_info->width * png_pass_inc[pass]; switch (row_info->pixel_depth) { case 1: { png_bytep sp, dp; int sshift, dshift; int s_start, s_end, s_inc; png_byte v; png_uint_32 i; int j; sp = row + (png_size_t)((row_info->width - 1) >> 3); dp = row + (png_size_t)((final_width - 1) >> 3);#if defined(PNG_READ_PACKSWAP_SUPPORTED) if (transformations & PNG_PACKSWAP) { sshift = (int)((row_info->width + 7) & 7); dshift = (int)((final_width + 7) & 7); s_start = 7; s_end = 0; s_inc = -1; } else#endif { sshift = 7 - (int)((row_info->width + 7) & 7); dshift = 7 - (int)((final_width + 7) & 7); s_start = 0; s_end = 7; s_inc = 1; } for (i = row_info->width; i; i--) { v = (png_byte)((*sp >> sshift) & 0x1); for (j = 0; j < png_pass_inc[pass]; j++) { *dp &= (png_byte)((0x7f7f >> (7 - dshift)) & 0xff); *dp |= (png_byte)(v << dshift); if (dshift == s_end) { dshift = s_start; dp--; } else dshift += s_inc; } if (sshift == s_end) { sshift = s_start; sp--; } else sshift += s_inc; } break; } case 2: { png_bytep sp, dp; int sshift, dshift; int s_start, s_end, s_inc; png_uint_32 i; sp = row + (png_size_t)((row_info->width - 1) >> 2); dp = row + (png_size_t)((final_width - 1) >> 2);#if defined(PNG_READ_PACKSWAP_SUPPORTED) if (transformations & PNG_PACKSWAP) { sshift = (png_size_t)(((row_info->width + 3) & 3) << 1); dshift = (png_size_t)(((final_width + 3) & 3) << 1); s_start = 6; s_end = 0; s_inc = -2; } else#endif { sshift = (png_size_t)((3 - ((row_info->width + 3) & 3)) << 1); dshift = (png_size_t)((3 - ((final_width + 3) & 3)) << 1); s_start = 0; s_end = 6; s_inc = 2; } for (i = row_info->width; i; i--) { png_byte v; int j; v = (png_byte)((*sp >> sshift) & 0x3); for (j = 0; j < png_pass_inc[pass]; j++) { *dp &= (png_byte)((0x3f3f >> (6 - dshift)) & 0xff); *dp |= (png_byte)(v << dshift); if (dshift == s_end) { dshift = s_start; dp--; } else dshift += s_inc; } if (sshift == s_end) { sshift = s_start; sp--; } else sshift += s_inc; } break; } case 4: { png_bytep sp, dp; int sshift, dshift; int s_start, s_end, s_inc; png_uint_32 i; sp = row + (png_size_t)((row_info->width - 1) >> 1); dp = row + (png_size_t)((final_width - 1) >> 1);#if defined(PNG_READ_PACKSWAP_SUPPORTED) if (transformations & PNG_PACKSWAP) { sshift = (png_size_t)(((row_info->width + 1) & 1) << 2); dshift = (png_size_t)(((final_width + 1) & 1) << 2); s_start = 4; s_end = 0; s_inc = -4; } else#endif { sshift = (png_size_t)((1 - ((row_info->width + 1) & 1)) << 2); dshift = (png_size_t)((1 - ((final_width + 1) & 1)) << 2); s_start = 0; s_end = 4; s_inc = 4; } for (i = row_info->width; i; i--) { png_byte v; int j; v = (png_byte)((*sp >> sshift) & 0xf); for (j = 0; j < png_pass_inc[pass]; j++) { *dp &= (png_byte)((0xf0f >> (4 - dshift)) & 0xff); *dp |= (png_byte)(v << dshift); if (dshift == s_end) { dshift = s_start; dp--; } else dshift += s_inc; } if (sshift == s_end) { sshift = s_start; sp--; } else sshift += s_inc; } break; } //==================================================================== default: // 8-bit or larger (this is where the routine is modified) {// static unsigned long long _const4 = 0x0000000000FFFFFFLL; no good// static unsigned long long const4 = 0x0000000000FFFFFFLL; no good// unsigned long long _const4 = 0x0000000000FFFFFFLL; no good// unsigned long long const4 = 0x0000000000FFFFFFLL; no good png_bytep sptr, dp; png_uint_32 i; png_size_t pixel_bytes; int width = row_info->width; pixel_bytes = (row_info->pixel_depth >> 3); // point sptr at the last pixel in the pre-expanded row: sptr = row + (width - 1) * pixel_bytes; // point dp at the last pixel position in the expanded row: dp = row + (final_width - 1) * pixel_bytes; // New code by Nirav Chhatrapati - Intel Corporation if ( _mmx_supported ) { //-------------------------------------------------------------- if (pixel_bytes == 3) { if (((pass == 0) || (pass == 1)) && width) { int dummy_value_c; // fix 'forbidden register spilled' int dummy_value_S; int dummy_value_D; __asm__ __volatile__ ( "subl $21, %%edi \n\t" // (png_pass_inc[pass] - 1)*pixel_bytes ".loop3_pass0: \n\t" "movd (%%esi), %%mm0 \n\t" // x x x x x 2 1 0 "pand _const4, %%mm0 \n\t" // z z z z z 2 1 0 "movq %%mm0, %%mm1 \n\t" // z z z z z 2 1 0 "psllq $16, %%mm0 \n\t" // z z z 2 1 0 z z "movq %%mm0, %%mm2 \n\t" // z z z 2 1 0 z z "psllq $24, %%mm0 \n\t" // 2 1 0 z z z z z "psrlq $8, %%mm1 \n\t" // z z z z z z 2 1 "por %%mm2, %%mm0 \n\t" // 2 1 0 2 1 0 z z "por %%mm1, %%mm0 \n\t" // 2 1 0 2 1 0 2 1 "movq %%mm0, %%mm3 \n\t" // 2 1 0 2 1 0 2 1 "psllq $16, %%mm0 \n\t" // 0 2 1 0 2 1 z z "movq %%mm3, %%mm4 \n\t" // 2 1 0 2 1 0 2 1 "punpckhdq %%mm0, %%mm3 \n\t" // 0 2 1 0 2 1 0 2 "movq %%mm4, 16(%%edi) \n\t" "psrlq $32, %%mm0 \n\t" // z z z z 0 2 1 0 "movq %%mm3, 8(%%edi) \n\t" "punpckldq %%mm4, %%mm0 \n\t" // 1 0 2 1 0 2 1 0 "subl $3, %%esi \n\t" "movq %%mm0, (%%edi) \n\t" "subl $24, %%edi \n\t" "decl %%ecx \n\t" "jnz .loop3_pass0 \n\t" "EMMS \n\t" // DONE : "=c" (dummy_value_c), // output regs (dummy) "=S" (dummy_value_S), "=D" (dummy_value_D) : "1" (sptr), // esi // input regs "2" (dp), // edi "0" (width) // ecx// doesn't work "i" (0x0000000000FFFFFFLL) // %1 (a.k.a. _const4)#if 0 /* %mm0, ..., %mm4 not supported by gcc 2.7.2.3 or egcs 1.1 */ : "%mm0", "%mm1", "%mm2" // clobber list , "%mm3", "%mm4"#endif ); } else if (((pass == 2) || (pass == 3)) && width) { int dummy_value_c; // fix 'forbidden register spilled' int dummy_value_S; int dummy_value_D; __asm__ __volatile__ ( "subl $9, %%edi \n\t" // (png_pass_inc[pass] - 1)*pixel_bytes ".loop3_pass2: \n\t" "movd (%%esi), %%mm0 \n\t" // x x x x x 2 1 0 "pand _const4, %%mm0 \n\t" // z z z z z 2 1 0 "movq %%mm0, %%mm1 \n\t" // z z z z z 2 1 0 "psllq $16, %%mm0 \n\t" // z z z 2 1 0 z z "movq %%mm0, %%mm2 \n\t" // z z z 2 1 0 z z "psllq $24, %%mm0 \n\t" // 2 1 0 z z z z z "psrlq $8, %%mm1 \n\t" // z z z z z z 2 1 "por %%mm2, %%mm0 \n\t" // 2 1 0 2 1 0 z z "por %%mm1, %%mm0 \n\t" // 2 1 0 2 1 0 2 1 "movq %%mm0, 4(%%edi) \n\t" "psrlq $16, %%mm0 \n\t" // z z 2 1 0 2 1 0 "subl $3, %%esi \n\t" "movd %%mm0, (%%edi) \n\t" "subl $12, %%edi \n\t" "decl %%ecx \n\t" "jnz .loop3_pass2 \n\t" "EMMS \n\t" // DONE : "=c" (dummy_value_c), // output regs (dummy) "=S" (dummy_value_S), "=D" (dummy_val
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -