📄 pngvcrd.c
字号:
/* pngvcrd.c - mixed C/assembler version of utilities to read a PNG file * * For Intel x86 CPU and Microsoft Visual C++ compiler * * libpng 1.0.12 - June 8, 2001 * For conditions of distribution and use, see copyright notice in png.h * Copyright (c) 1998-2001 Glenn Randers-Pehrson * Copyright (c) 1998, Intel Corporation * * Contributed by Nirav Chhatrapati, Intel Corporation, 1998 * Interface to libpng contributed by Gilles Vollant, 1999 * Debugging and cleanup by Greg Roelofs, 2000, 2001 * * In png_do_read_interlace() in libpng versions 1.0.3a through 1.0.4d, * a sign error in the post-MMX cleanup code for each pixel_depth resulted * in bad pixels at the beginning of some rows of some images, and also * (due to out-of-range memory reads and writes) caused heap corruption * when compiled with MSVC 6.0. The error was fixed in version 1.0.4e. * * [png_read_filter_row_mmx_avg() bpp == 2 bugfix, GRR 20000916] * */#define PNG_INTERNAL#include "png.h"#if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_USE_PNGVCRD)static int mmx_supported=2;int PNGAPIpng_mmx_support(void){ int mmx_supported_local = 0; _asm { push ebx //CPUID will trash these push ecx push edx pushfd //Save Eflag to stack pop eax //Get Eflag from stack into eax mov ecx, eax //Make another copy of Eflag in ecx xor eax, 0x200000 //Toggle ID bit in Eflag [i.e. bit(21)] push eax //Save modified Eflag back to stack popfd //Restored modified value back to Eflag reg pushfd //Save Eflag to stack pop eax //Get Eflag from stack xor eax, ecx //Compare the new Eflag with the original Eflag jz NOT_SUPPORTED //If the same, CPUID instruction is not supported, //skip following instructions and jump to //NOT_SUPPORTED label xor eax, eax //Set eax to zero _asm _emit 0x0f //CPUID instruction (two bytes opcode) _asm _emit 0xa2 cmp eax, 1 //make sure eax return non-zero value jl NOT_SUPPORTED //If eax is zero, mmx not supported xor eax, eax //set eax to zero inc eax //Now increment eax to 1. This instruction is //faster than the instruction "mov eax, 1" _asm _emit 0x0f //CPUID instruction _asm _emit 0xa2 and edx, 0x00800000 //mask out all bits but mmx bit(24) cmp edx, 0 // 0 = mmx not supported jz NOT_SUPPORTED // non-zero = Yes, mmx IS supported mov mmx_supported_local, 1 //set return value to 1NOT_SUPPORTED: mov eax, mmx_supported_local //move return value to eax pop edx //CPUID trashed these pop ecx pop ebx } //mmx_supported_local=0; // test code for force don't support MMX //printf("MMX : %u (1=MMX supported)\n",mmx_supported_local); mmx_supported = mmx_supported_local; return mmx_supported_local;}/* Combines the row recently read in with the previous row. This routine takes care of alpha and transparency if requested. This routine also handles the two methods of progressive display of interlaced images, depending on the mask value. The mask value describes which pixels are to be combined with the row. The pattern always repeats every 8 pixels, so just 8 bits are needed. A one indicates the pixel is to be combined; a zero indicates the pixel is to be skipped. This is in addition to any alpha or transparency value associated with the pixel. If you want all pixels to be combined, pass 0xff (255) in mask. *//* Use this routine for x86 platform - uses faster MMX routine if machine supports MMX */void /* PRIVATE */png_combine_row(png_structp png_ptr, png_bytep row, int mask){#ifdef PNG_USE_LOCAL_ARRAYS const int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};#endif png_debug(1,"in png_combine_row_asm\n"); if (mmx_supported == 2) { png_mmx_support(); } if (mask == 0xff) { png_memcpy(row, png_ptr->row_buf + 1, (png_size_t)((png_ptr->width * png_ptr->row_info.pixel_depth + 7) >> 3)); } /* GRR: add "else if (mask == 0)" case? * or does png_combine_row() not even get called in that case? */ else { switch (png_ptr->row_info.pixel_depth) { case 1: { png_bytep sp; png_bytep dp; int s_inc, s_start, s_end; int m; int shift; png_uint_32 i; sp = png_ptr->row_buf + 1; dp = row; m = 0x80;#if defined(PNG_READ_PACKSWAP_SUPPORTED) if (png_ptr->transformations & PNG_PACKSWAP) { s_start = 0; s_end = 7; s_inc = 1; } else#endif { s_start = 7; s_end = 0; s_inc = -1; } shift = s_start; for (i = 0; i < png_ptr->width; i++) { if (m & mask) { int value; value = (*sp >> shift) & 0x1; *dp &= (png_byte)((0x7f7f >> (7 - shift)) & 0xff); *dp |= (png_byte)(value << shift); } if (shift == s_end) { shift = s_start; sp++; dp++; } else shift += s_inc; if (m == 1) m = 0x80; else m >>= 1; } break; } case 2: { png_bytep sp; png_bytep dp; int s_start, s_end, s_inc; int m; int shift; png_uint_32 i; int value; sp = png_ptr->row_buf + 1; dp = row; m = 0x80;#if defined(PNG_READ_PACKSWAP_SUPPORTED) if (png_ptr->transformations & PNG_PACKSWAP) { s_start = 0; s_end = 6; s_inc = 2; } else#endif { s_start = 6; s_end = 0; s_inc = -2; } shift = s_start; for (i = 0; i < png_ptr->width; i++) { if (m & mask) { value = (*sp >> shift) & 0x3; *dp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff); *dp |= (png_byte)(value << shift); } if (shift == s_end) { shift = s_start; sp++; dp++; } else shift += s_inc; if (m == 1) m = 0x80; else m >>= 1; } break; } case 4: { png_bytep sp; png_bytep dp; int s_start, s_end, s_inc; int m; int shift; png_uint_32 i; int value; sp = png_ptr->row_buf + 1; dp = row; m = 0x80;#if defined(PNG_READ_PACKSWAP_SUPPORTED) if (png_ptr->transformations & PNG_PACKSWAP) { s_start = 0; s_end = 4; s_inc = 4; } else#endif { s_start = 4; s_end = 0; s_inc = -4; } shift = s_start; for (i = 0; i < png_ptr->width; i++) { if (m & mask) { value = (*sp >> shift) & 0xf; *dp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff); *dp |= (png_byte)(value << shift); } if (shift == s_end) { shift = s_start; sp++; dp++; } else shift += s_inc; if (m == 1) m = 0x80; else m >>= 1; } break; } case 8: { png_bytep srcptr; png_bytep dstptr; png_uint_32 len; int m; int diff, unmask; __int64 mask0=0x0102040810204080; if ( mmx_supported ) { srcptr = png_ptr->row_buf + 1; dstptr = row; m = 0x80; unmask = ~mask; len = png_ptr->width &~7; //reduce to multiple of 8 diff = png_ptr->width & 7; //amount lost _asm { movd mm7, unmask //load bit pattern psubb mm6,mm6 //zero mm6 punpcklbw mm7,mm7 punpcklwd mm7,mm7 punpckldq mm7,mm7 //fill register with 8 masks movq mm0,mask0 pand mm0,mm7 //nonzero if keep byte pcmpeqb mm0,mm6 //zeros->1s, v versa mov ecx,len //load length of line (pixels) mov esi,srcptr //load source mov ebx,dstptr //load dest cmp ecx,0 //lcr je mainloop8endmainloop8: movq mm4,[esi] pand mm4,mm0 movq mm6,mm0 pandn mm6,[ebx] por mm4,mm6 movq [ebx],mm4 add esi,8 //inc by 8 bytes processed add ebx,8 sub ecx,8 //dec by 8 pixels processed ja mainloop8mainloop8end: mov ecx,diff cmp ecx,0 jz end8 mov edx,mask sal edx,24 //make low byte the high bytesecondloop8: sal edx,1 //move high bit to CF jnc skip8 //if CF = 0 mov al,[esi] mov [ebx],alskip8: inc esi inc ebx dec ecx jnz secondloop8end8: emms } } else /* mmx not supported - use modified C routine */ { register unsigned int incr1, initial_val, final_val; png_size_t pixel_bytes; png_uint_32 i;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -