📄 pngvcrd.c
字号:
/* pngvcrd.c - mixed C/assembler version of utilities to read a PNG file
*
* For Intel x86 CPU and Microsoft Visual C++ compiler
*
* libpng version 1.2.1 - December 12, 2001
* For conditions of distribution and use, see copyright notice in png.h
* Copyright (c) 1998-2001 Glenn Randers-Pehrson
* Copyright (c) 1998, Intel Corporation
*
* Contributed by Nirav Chhatrapati, Intel Corporation, 1998
* Interface to libpng contributed by Gilles Vollant, 1999
*
*
* In png_do_read_interlace() in libpng versions 1.0.3a through 1.0.4d,
* a sign error in the post-MMX cleanup code for each pixel_depth resulted
* in bad pixels at the beginning of some rows of some images, and also
* (due to out-of-range memory reads and writes) caused heap corruption
* when compiled with MSVC 6.0. The error was fixed in version 1.0.4e.
*
* [png_read_filter_row_mmx_avg() bpp == 2 bugfix, GRR 20000916]
*
* [runtime MMX configuration, GRR 20010102]
*
*/
#define PNG_INTERNAL
#include "png.h"
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_USE_PNGVCRD)
static int mmx_supported=2;
int PNGAPI
png_mmx_support(void)
{
int mmx_supported_local = 0;
_asm {
push ebx //CPUID will trash these
push ecx
push edx
pushfd //Save Eflag to stack
pop eax //Get Eflag from stack into eax
mov ecx, eax //Make another copy of Eflag in ecx
xor eax, 0x200000 //Toggle ID bit in Eflag [i.e. bit(21)]
push eax //Save modified Eflag back to stack
popfd //Restored modified value back to Eflag reg
pushfd //Save Eflag to stack
pop eax //Get Eflag from stack
push ecx // save original Eflag to stack
popfd // restore original Eflag
xor eax, ecx //Compare the new Eflag with the original Eflag
jz NOT_SUPPORTED //If the same, CPUID instruction is not supported,
//skip following instructions and jump to
//NOT_SUPPORTED label
xor eax, eax //Set eax to zero
_asm _emit 0x0f //CPUID instruction (two bytes opcode)
_asm _emit 0xa2
cmp eax, 1 //make sure eax return non-zero value
jl NOT_SUPPORTED //If eax is zero, mmx not supported
xor eax, eax //set eax to zero
inc eax //Now increment eax to 1. This instruction is
//faster than the instruction "mov eax, 1"
_asm _emit 0x0f //CPUID instruction
_asm _emit 0xa2
and edx, 0x00800000 //mask out all bits but mmx bit(24)
cmp edx, 0 // 0 = mmx not supported
jz NOT_SUPPORTED // non-zero = Yes, mmx IS supported
mov mmx_supported_local, 1 //set return value to 1
NOT_SUPPORTED:
mov eax, mmx_supported_local //move return value to eax
pop edx //CPUID trashed these
pop ecx
pop ebx
}
//mmx_supported_local=0; // test code for force don't support MMX
//printf("MMX : %u (1=MMX supported)\n",mmx_supported_local);
mmx_supported = mmx_supported_local;
return mmx_supported_local;
}
/* Combines the row recently read in with the previous row.
This routine takes care of alpha and transparency if requested.
This routine also handles the two methods of progressive display
of interlaced images, depending on the mask value.
The mask value describes which pixels are to be combined with
the row. The pattern always repeats every 8 pixels, so just 8
bits are needed. A one indicates the pixel is to be combined; a
zero indicates the pixel is to be skipped. This is in addition
to any alpha or transparency value associated with the pixel. If
you want all pixels to be combined, pass 0xff (255) in mask. */
/* Use this routine for x86 platform - uses faster MMX routine if machine
supports MMX */
void /* PRIVATE */
png_combine_row(png_structp png_ptr, png_bytep row, int mask)
{
#ifdef PNG_USE_LOCAL_ARRAYS
const int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
#endif
png_debug(1,"in png_combine_row_asm\n");
if (mmx_supported == 2) {
/* this should have happened in png_init_mmx_flags() already */
png_warning(png_ptr, "asm_flags may not have been initialized");
png_mmx_support();
}
if (mask == 0xff)
{
png_memcpy(row, png_ptr->row_buf + 1,
(png_size_t)((png_ptr->width * png_ptr->row_info.pixel_depth + 7) >> 3));
}
/* GRR: add "else if (mask == 0)" case?
* or does png_combine_row() not even get called in that case? */
else
{
switch (png_ptr->row_info.pixel_depth)
{
case 1:
{
png_bytep sp;
png_bytep dp;
int s_inc, s_start, s_end;
int m;
int shift;
png_uint_32 i;
sp = png_ptr->row_buf + 1;
dp = row;
m = 0x80;
#if defined(PNG_READ_PACKSWAP_SUPPORTED)
if (png_ptr->transformations & PNG_PACKSWAP)
{
s_start = 0;
s_end = 7;
s_inc = 1;
}
else
#endif
{
s_start = 7;
s_end = 0;
s_inc = -1;
}
shift = s_start;
for (i = 0; i < png_ptr->width; i++)
{
if (m & mask)
{
int value;
value = (*sp >> shift) & 0x1;
*dp &= (png_byte)((0x7f7f >> (7 - shift)) & 0xff);
*dp |= (png_byte)(value << shift);
}
if (shift == s_end)
{
shift = s_start;
sp++;
dp++;
}
else
shift += s_inc;
if (m == 1)
m = 0x80;
else
m >>= 1;
}
break;
}
case 2:
{
png_bytep sp;
png_bytep dp;
int s_start, s_end, s_inc;
int m;
int shift;
png_uint_32 i;
int value;
sp = png_ptr->row_buf + 1;
dp = row;
m = 0x80;
#if defined(PNG_READ_PACKSWAP_SUPPORTED)
if (png_ptr->transformations & PNG_PACKSWAP)
{
s_start = 0;
s_end = 6;
s_inc = 2;
}
else
#endif
{
s_start = 6;
s_end = 0;
s_inc = -2;
}
shift = s_start;
for (i = 0; i < png_ptr->width; i++)
{
if (m & mask)
{
value = (*sp >> shift) & 0x3;
*dp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff);
*dp |= (png_byte)(value << shift);
}
if (shift == s_end)
{
shift = s_start;
sp++;
dp++;
}
else
shift += s_inc;
if (m == 1)
m = 0x80;
else
m >>= 1;
}
break;
}
case 4:
{
png_bytep sp;
png_bytep dp;
int s_start, s_end, s_inc;
int m;
int shift;
png_uint_32 i;
int value;
sp = png_ptr->row_buf + 1;
dp = row;
m = 0x80;
#if defined(PNG_READ_PACKSWAP_SUPPORTED)
if (png_ptr->transformations & PNG_PACKSWAP)
{
s_start = 0;
s_end = 4;
s_inc = 4;
}
else
#endif
{
s_start = 4;
s_end = 0;
s_inc = -4;
}
shift = s_start;
for (i = 0; i < png_ptr->width; i++)
{
if (m & mask)
{
value = (*sp >> shift) & 0xf;
*dp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff);
*dp |= (png_byte)(value << shift);
}
if (shift == s_end)
{
shift = s_start;
sp++;
dp++;
}
else
shift += s_inc;
if (m == 1)
m = 0x80;
else
m >>= 1;
}
break;
}
case 8:
{
png_bytep srcptr;
png_bytep dstptr;
png_uint_32 len;
int m;
int diff, unmask;
__int64 mask0=0x0102040810204080;
if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
/* && mmx_supported */ )
{
srcptr = png_ptr->row_buf + 1;
dstptr = row;
m = 0x80;
unmask = ~mask;
len = png_ptr->width &~7; //reduce to multiple of 8
diff = png_ptr->width & 7; //amount lost
_asm
{
movd mm7, unmask //load bit pattern
psubb mm6,mm6 //zero mm6
punpcklbw mm7,mm7
punpcklwd mm7,mm7
punpckldq mm7,mm7 //fill register with 8 masks
movq mm0,mask0
pand mm0,mm7 //nonzero if keep byte
pcmpeqb mm0,mm6 //zeros->1s, v versa
mov ecx,len //load length of line (pixels)
mov esi,srcptr //load source
mov ebx,dstptr //load dest
cmp ecx,0 //lcr
je mainloop8end
mainloop8:
movq mm4,[esi]
pand mm4,mm0
movq mm6,mm0
pandn mm6,[ebx]
por mm4,mm6
movq [ebx],mm4
add esi,8 //inc by 8 bytes processed
add ebx,8
sub ecx,8 //dec by 8 pixels processed
ja mainloop8
mainloop8end:
mov ecx,diff
cmp ecx,0
jz end8
mov edx,mask
sal edx,24 //make low byte the high byte
secondloop8:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -