📄 pnggccrd.c
字号:
png_uint_32 i; sp = row + (png_size_t)((row_info->width - 1) >> 2); dp = row + (png_size_t)((final_width - 1) >> 2);#if defined(PNG_READ_PACKSWAP_SUPPORTED) if (transformations & PNG_PACKSWAP) { sshift = (png_size_t)(((row_info->width + 3) & 3) << 1); dshift = (png_size_t)(((final_width + 3) & 3) << 1); s_start = 6; s_end = 0; s_inc = -2; } else#endif { sshift = (png_size_t)((3 - ((row_info->width + 3) & 3)) << 1); dshift = (png_size_t)((3 - ((final_width + 3) & 3)) << 1); s_start = 0; s_end = 6; s_inc = 2; } for (i = row_info->width; i; i--) { png_byte v; int j; v = (png_byte)((*sp >> sshift) & 0x3); for (j = 0; j < png_pass_inc[pass]; j++) { *dp &= (png_byte)((0x3f3f >> (6 - dshift)) & 0xff); *dp |= (png_byte)(v << dshift); if (dshift == s_end) { dshift = s_start; dp--; } else dshift += s_inc; } if (sshift == s_end) { sshift = s_start; sp--; } else sshift += s_inc; } break; } case 4: { png_bytep sp, dp; int sshift, dshift; int s_start, s_end, s_inc; png_uint_32 i; sp = row + (png_size_t)((row_info->width - 1) >> 1); dp = row + (png_size_t)((final_width - 1) >> 1);#if defined(PNG_READ_PACKSWAP_SUPPORTED) if (transformations & PNG_PACKSWAP) { sshift = (png_size_t)(((row_info->width + 1) & 1) << 2); dshift = (png_size_t)(((final_width + 1) & 1) << 2); s_start = 4; s_end = 0; s_inc = -4; } else#endif { sshift = (png_size_t)((1 - ((row_info->width + 1) & 1)) << 2); dshift = (png_size_t)((1 - ((final_width + 1) & 1)) << 2); s_start = 0; s_end = 4; s_inc = 4; } for (i = row_info->width; i; i--) { png_byte v; int j; v = (png_byte)((*sp >> sshift) & 0xf); for (j = 0; j < png_pass_inc[pass]; j++) { *dp &= (png_byte)((0xf0f >> (4 - dshift)) & 0xff); *dp |= (png_byte)(v << dshift); if (dshift == s_end) { dshift = s_start; dp--; } else dshift += s_inc; } if (sshift == s_end) { sshift = s_start; sp--; } else sshift += s_inc; } break; } //==================================================================== default: // 8-bit or larger (this is where the routine is modified) {// static unsigned long long _const4 = 0x0000000000FFFFFFLL; no good// static unsigned long long const4 = 0x0000000000FFFFFFLL; no good// unsigned long long _const4 = 0x0000000000FFFFFFLL; no good// unsigned long long const4 = 0x0000000000FFFFFFLL; no good png_bytep sptr, dp; png_uint_32 i; png_size_t pixel_bytes; int width = row_info->width; pixel_bytes = (row_info->pixel_depth >> 3); // point sptr at the last pixel in the pre-expanded row: sptr = row + (width - 1) * pixel_bytes; // point dp at the last pixel position in the expanded row: dp = row + (final_width - 1) * pixel_bytes; // New code by Nirav Chhatrapati - Intel Corporation if (mmx_supported) // use MMX code if machine supports it { //-------------------------------------------------------------- if (pixel_bytes == 3) { if (((pass == 0) || (pass == 1)) && width) { __asm__ ( "subl $21, %%edi \n\t" // (png_pass_inc[pass] - 1)*pixel_bytes ".loop3_pass0: \n\t" "movd (%%esi), %%mm0 \n\t" // x x x x x 2 1 0 "pand _const4, %%mm0 \n\t" // z z z z z 2 1 0 "movq %%mm0, %%mm1 \n\t" // z z z z z 2 1 0 "psllq $16, %%mm0 \n\t" // z z z 2 1 0 z z "movq %%mm0, %%mm2 \n\t" // z z z 2 1 0 z z "psllq $24, %%mm0 \n\t" // 2 1 0 z z z z z "psrlq $8, %%mm1 \n\t" // z z z z z z 2 1 "por %%mm2, %%mm0 \n\t" // 2 1 0 2 1 0 z z "por %%mm1, %%mm0 \n\t" // 2 1 0 2 1 0 2 1 "movq %%mm0, %%mm3 \n\t" // 2 1 0 2 1 0 2 1 "psllq $16, %%mm0 \n\t" // 0 2 1 0 2 1 z z "movq %%mm3, %%mm4 \n\t" // 2 1 0 2 1 0 2 1 "punpckhdq %%mm0, %%mm3 \n\t" // 0 2 1 0 2 1 0 2 "movq %%mm4, 16(%%edi) \n\t" "psrlq $32, %%mm0 \n\t" // z z z z 0 2 1 0 "movq %%mm3, 8(%%edi) \n\t" "punpckldq %%mm4, %%mm0 \n\t" // 1 0 2 1 0 2 1 0 "subl $3, %%esi \n\t" "movq %%mm0, (%%edi) \n\t" "subl $24, %%edi \n\t" "decl %%ecx \n\t" "jnz .loop3_pass0 \n\t" "EMMS \n\t" // DONE : // output regs (none) : "S" (sptr), // esi // input regs "D" (dp), // edi "c" (width) // ecx// doesn't work "i" (0x0000000000FFFFFFLL) // %1 (a.k.a. _const4) : "%esi", "%edi", "%ecx" // clobber list#if 0 /* %mm0, ..., %mm4 not supported by gcc 2.7.2.3 or egcs 1.1 */ , "%mm0", "%mm1", "%mm2", "%mm3", "%mm4"#endif ); } else if (((pass == 2) || (pass == 3)) && width) { __asm__ ( "subl $9, %%edi \n\t" // (png_pass_inc[pass] - 1)*pixel_bytes ".loop3_pass2: \n\t" "movd (%%esi), %%mm0 \n\t" // x x x x x 2 1 0 "pand _const4, %%mm0 \n\t" // z z z z z 2 1 0 "movq %%mm0, %%mm1 \n\t" // z z z z z 2 1 0 "psllq $16, %%mm0 \n\t" // z z z 2 1 0 z z "movq %%mm0, %%mm2 \n\t" // z z z 2 1 0 z z "psllq $24, %%mm0 \n\t" // 2 1 0 z z z z z "psrlq $8, %%mm1 \n\t" // z z z z z z 2 1 "por %%mm2, %%mm0 \n\t" // 2 1 0 2 1 0 z z "por %%mm1, %%mm0 \n\t" // 2 1 0 2 1 0 2 1 "movq %%mm0, 4(%%edi) \n\t" "psrlq $16, %%mm0 \n\t" // z z 2 1 0 2 1 0 "subl $3, %%esi \n\t" "movd %%mm0, (%%edi) \n\t" "subl $12, %%edi \n\t" "decl %%ecx \n\t" "jnz .loop3_pass2 \n\t" "EMMS \n\t" // DONE : // output regs (none) : "S" (sptr), // esi // input regs "D" (dp), // edi "c" (width) // ecx : "%esi", "%edi", "%ecx" // clobber list#if 0 /* %mm0, ..., %mm2 not supported by gcc 2.7.2.3 or egcs 1.1 */ , "%mm0", "%mm1", "%mm2"#endif ); } else if (width) /* && ((pass == 4) || (pass == 5)) */ { int width_mmx = ((width >> 1) << 1) - 8; // GRR: huh? if (width_mmx < 0) width_mmx = 0; width -= width_mmx; // 8 or 9 pix, 24 or 27 bytes if (width_mmx) { // png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; // sptr points at last pixel in pre-expanded row // dp points at last pixel position in expanded row __asm__ ( "subl $3, %%esi \n\t" "subl $9, %%edi \n\t" // (png_pass_inc[pass] + 1)*pixel_bytes ".loop3_pass4: \n\t" "movq (%%esi), %%mm0 \n\t" // x x 5 4 3 2 1 0 "movq %%mm0, %%mm1 \n\t" // x x 5 4 3 2 1 0 "movq %%mm0, %%mm2 \n\t" // x x 5 4 3 2 1 0 "psllq $24, %%mm0 \n\t" // 4 3 2 1 0 z z z "pand _const4, %%mm1 \n\t" // z z z z z 2 1 0 "psrlq $24, %%mm2 \n\t" // z z z x x 5 4 3 "por %%mm1, %%mm0 \n\t" // 4 3 2 1 0 2 1 0 "movq %%mm2, %%mm3 \n\t" // z z z x x 5 4 3 "psllq $8, %%mm2 \n\t" // z z x x 5 4 3 z "movq %%mm0, (%%edi) \n\t" "psrlq $16, %%mm3 \n\t" // z z z z z x x 5 "pand _const6, %%mm3 \n\t" // z z z z z z z 5 "por %%mm3, %%mm2 \n\t" // z z x x 5 4 3 5 "subl $6, %%esi \n\t" "movd %%mm2, 8(%%edi) \n\t" "subl $12, %%edi \n\t" "subl $2, %%ecx \n\t" "jnz .loop3_pass4 \n\t" "EMMS \n\t" // DONE : // output regs (none) : "S" (sptr), // esi // input regs "D" (dp), // edi "c" (width_mmx) // ecx : "%esi", "%edi", "%ecx" // clobber list#if 0 /* %mm0, ..., %mm3 not supported by gcc 2.7.2.3 or egcs 1.1 */ , "%mm0", "%mm1", "%mm2", "%mm3"#endif ); } sptr -= width_mmx*3; dp -= width_mmx*6; for (i = width; i; i--) { png_byte v[8]; int j; png_memcpy(v, sptr, 3); for (j = 0; j < png_pass_inc[pass]; j++) { png_memcpy(dp, v, 3); dp -= 3; } sptr -= 3; } } } /* end of pixel_bytes == 3 */ //-------------------------------------------------------------- else if (pixel_bytes == 1) { if (((pass == 0) || (pass == 1)) && width) { int width_mmx = ((width >> 2) << 2); width -= width_mmx; // 0-3 pixels => 0-3 bytes if (width_mmx) { __asm__ ( "subl $3, %%esi \n\t" "subl $31, %%edi \n\t" ".loop1_pass0: \n\t" "movd (%%esi), %%mm0 \n\t" // x x x x 3 2 1 0 "movq %%mm0, %%mm1 \n\t" // x x x x 3 2 1 0 "punpcklbw %%mm0, %%mm0 \n\t" // 3 3 2 2 1 1 0 0 "movq %%mm0, %%mm2 \n\t" // 3 3 2 2 1 1 0 0 "punpcklwd %%mm0, %%mm0 \n\t" // 1 1 1 1 0 0 0 0 "movq %%mm0, %%mm3 \n\t" // 1 1 1 1 0 0 0 0 "punpckldq %%mm0, %%mm0 \n\t" // 0 0 0 0 0 0 0 0 "punpckhdq %%mm3, %%mm3 \n\t" // 1 1 1 1 1 1 1 1 "movq %%mm0, (%%edi) \n\t" "punpckhwd %%mm2, %%mm2 \n\t" // 3 3 3 3 2 2 2 2 "movq %%mm3, 8(%%edi) \n\t" "movq %%mm2, %%mm4 \n\t" // 3 3 3 3 2 2 2 2 "punpckldq %%mm2, %%mm2 \n\t" // 2 2 2 2 2 2 2 2 "punpckhdq %%mm4, %%mm4 \n\t" // 3 3 3 3 3 3 3 3 "movq %%mm2, 16(%%edi) \n\t" "subl $4, %%esi \n\t" "movq %%mm4, 24(%%edi) \n\t" "subl $32, %%edi \n\t" "subl $4, %%ecx \n\t" "jnz .loop1_pass0 \n\t" "EMMS \n\t" // DONE : // output regs (none) : "S" (sptr), // esi // input regs "D" (dp), // edi "c"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -