📄 pngvcrd.c

📁 一个国人自己实现图像库的程序（有参考价值）
💻 C
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/* pngvcrd.c - mixed C/assembler version of utilities to read a PNG file
 *
 * For Intel x86 CPU and Microsoft Visual C++ compiler
 *
 * libpng version 1.2.1 - December 12, 2001
 * For conditions of distribution and use, see copyright notice in png.h
 * Copyright (c) 1998-2001 Glenn Randers-Pehrson
 * Copyright (c) 1998, Intel Corporation
 *
 * Contributed by Nirav Chhatrapati, Intel Corporation, 1998
 * Interface to libpng contributed by Gilles Vollant, 1999
 *
 *
 * In png_do_read_interlace() in libpng versions 1.0.3a through 1.0.4d,
 * a sign error in the post-MMX cleanup code for each pixel_depth resulted
 * in bad pixels at the beginning of some rows of some images, and also
 * (due to out-of-range memory reads and writes) caused heap corruption
 * when compiled with MSVC 6.0.  The error was fixed in version 1.0.4e.
 *
 * [png_read_filter_row_mmx_avg() bpp == 2 bugfix, GRR 20000916]
 *
 * [runtime MMX configuration, GRR 20010102]
 *
 */

#define PNG_INTERNAL
#include "png.h"

#if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_USE_PNGVCRD)

static int mmx_supported=2;


int PNGAPI
png_mmx_support(void)
{
  int mmx_supported_local = 0;
  _asm {
    push ebx          //CPUID will trash these
    push ecx
    push edx

    pushfd            //Save Eflag to stack
    pop eax           //Get Eflag from stack into eax
    mov ecx, eax      //Make another copy of Eflag in ecx
    xor eax, 0x200000 //Toggle ID bit in Eflag [i.e. bit(21)]
    push eax          //Save modified Eflag back to stack

    popfd             //Restored modified value back to Eflag reg
    pushfd            //Save Eflag to stack
    pop eax           //Get Eflag from stack
    push ecx          // save original Eflag to stack
    popfd             // restore original Eflag
    xor eax, ecx      //Compare the new Eflag with the original Eflag
    jz NOT_SUPPORTED  //If the same, CPUID instruction is not supported,
                      //skip following instructions and jump to
                      //NOT_SUPPORTED label

    xor eax, eax      //Set eax to zero

    _asm _emit 0x0f   //CPUID instruction  (two bytes opcode)
    _asm _emit 0xa2

    cmp eax, 1        //make sure eax return non-zero value
    jl NOT_SUPPORTED  //If eax is zero, mmx not supported

    xor eax, eax      //set eax to zero
    inc eax           //Now increment eax to 1.  This instruction is
                      //faster than the instruction "mov eax, 1"

    _asm _emit 0x0f   //CPUID instruction
    _asm _emit 0xa2

    and edx, 0x00800000  //mask out all bits but mmx bit(24)
    cmp edx, 0        // 0 = mmx not supported
    jz  NOT_SUPPORTED // non-zero = Yes, mmx IS supported

    mov  mmx_supported_local, 1  //set return value to 1

NOT_SUPPORTED:
    mov  eax, mmx_supported_local  //move return value to eax
    pop edx          //CPUID trashed these
    pop ecx
    pop ebx
  }

  //mmx_supported_local=0; // test code for force don't support MMX
  //printf("MMX : %u (1=MMX supported)\n",mmx_supported_local);

  mmx_supported = mmx_supported_local;
  return mmx_supported_local;
}

/* Combines the row recently read in with the previous row.
   This routine takes care of alpha and transparency if requested.
   This routine also handles the two methods of progressive display
   of interlaced images, depending on the mask value.
   The mask value describes which pixels are to be combined with
   the row.  The pattern always repeats every 8 pixels, so just 8
   bits are needed.  A one indicates the pixel is to be combined; a
   zero indicates the pixel is to be skipped.  This is in addition
   to any alpha or transparency value associated with the pixel.  If
   you want all pixels to be combined, pass 0xff (255) in mask.  */

/* Use this routine for x86 platform - uses faster MMX routine if machine
   supports MMX */

void /* PRIVATE */
png_combine_row(png_structp png_ptr, png_bytep row, int mask)
{
#ifdef PNG_USE_LOCAL_ARRAYS
   const int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
#endif

   png_debug(1,"in png_combine_row_asm\n");

   if (mmx_supported == 2) {
       /* this should have happened in png_init_mmx_flags() already */
       png_warning(png_ptr, "asm_flags may not have been initialized");
       png_mmx_support();
   }

   if (mask == 0xff)
   {
      png_memcpy(row, png_ptr->row_buf + 1,
       (png_size_t)((png_ptr->width * png_ptr->row_info.pixel_depth + 7) >> 3));
   }
   /* GRR:  add "else if (mask == 0)" case?
    *       or does png_combine_row() not even get called in that case? */
   else
   {
      switch (png_ptr->row_info.pixel_depth)
      {
         case 1:
         {
            png_bytep sp;
            png_bytep dp;
            int s_inc, s_start, s_end;
            int m;
            int shift;
            png_uint_32 i;

            sp = png_ptr->row_buf + 1;
            dp = row;
            m = 0x80;
#if defined(PNG_READ_PACKSWAP_SUPPORTED)
            if (png_ptr->transformations & PNG_PACKSWAP)
            {
                s_start = 0;
                s_end = 7;
                s_inc = 1;
            }
            else
#endif
            {
                s_start = 7;
                s_end = 0;
                s_inc = -1;
            }

            shift = s_start;

            for (i = 0; i < png_ptr->width; i++)
            {
               if (m & mask)
               {
                  int value;

                  value = (*sp >> shift) & 0x1;
                  *dp &= (png_byte)((0x7f7f >> (7 - shift)) & 0xff);
                  *dp |= (png_byte)(value << shift);
               }

               if (shift == s_end)
               {
                  shift = s_start;
                  sp++;
                  dp++;
               }
               else
                  shift += s_inc;

               if (m == 1)
                  m = 0x80;
               else
                  m >>= 1;
            }
            break;
         }

         case 2:
         {
            png_bytep sp;
            png_bytep dp;
            int s_start, s_end, s_inc;
            int m;
            int shift;
            png_uint_32 i;
            int value;

            sp = png_ptr->row_buf + 1;
            dp = row;
            m = 0x80;
#if defined(PNG_READ_PACKSWAP_SUPPORTED)
            if (png_ptr->transformations & PNG_PACKSWAP)
            {
               s_start = 0;
               s_end = 6;
               s_inc = 2;
            }
            else
#endif
            {
               s_start = 6;
               s_end = 0;
               s_inc = -2;
            }

            shift = s_start;

            for (i = 0; i < png_ptr->width; i++)
            {
               if (m & mask)
               {
                  value = (*sp >> shift) & 0x3;
                  *dp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff);
                  *dp |= (png_byte)(value << shift);
               }

               if (shift == s_end)
               {
                  shift = s_start;
                  sp++;
                  dp++;
               }
               else
                  shift += s_inc;
               if (m == 1)
                  m = 0x80;
               else
                  m >>= 1;
            }
            break;
         }

         case 4:
         {
            png_bytep sp;
            png_bytep dp;
            int s_start, s_end, s_inc;
            int m;
            int shift;
            png_uint_32 i;
            int value;

            sp = png_ptr->row_buf + 1;
            dp = row;
            m = 0x80;
#if defined(PNG_READ_PACKSWAP_SUPPORTED)
            if (png_ptr->transformations & PNG_PACKSWAP)
            {
               s_start = 0;
               s_end = 4;
               s_inc = 4;
            }
            else
#endif
            {
               s_start = 4;
               s_end = 0;
               s_inc = -4;
            }
            shift = s_start;

            for (i = 0; i < png_ptr->width; i++)
            {
               if (m & mask)
               {
                  value = (*sp >> shift) & 0xf;
                  *dp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff);
                  *dp |= (png_byte)(value << shift);
               }

               if (shift == s_end)
               {
                  shift = s_start;
                  sp++;
                  dp++;
               }
               else
                  shift += s_inc;
               if (m == 1)
                  m = 0x80;
               else
                  m >>= 1;
            }
            break;
         }

         case 8:
         {
            png_bytep srcptr;
            png_bytep dstptr;
            png_uint_32 len;
            int m;
            int diff, unmask;

            __int64 mask0=0x0102040810204080;

            if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
                /* && mmx_supported */ )
            {
               srcptr = png_ptr->row_buf + 1;
               dstptr = row;
               m = 0x80;
               unmask = ~mask;
               len  = png_ptr->width &~7;  //reduce to multiple of 8
               diff = png_ptr->width & 7;  //amount lost

               _asm
               {
                  movd       mm7, unmask   //load bit pattern
                  psubb      mm6,mm6       //zero mm6
                  punpcklbw  mm7,mm7
                  punpcklwd  mm7,mm7
                  punpckldq  mm7,mm7       //fill register with 8 masks

                  movq       mm0,mask0

                  pand       mm0,mm7       //nonzero if keep byte
                  pcmpeqb    mm0,mm6       //zeros->1s, v versa

                  mov        ecx,len       //load length of line (pixels)
                  mov        esi,srcptr    //load source
                  mov        ebx,dstptr    //load dest
                  cmp        ecx,0         //lcr
                  je         mainloop8end

mainloop8:
                  movq       mm4,[esi]
                  pand       mm4,mm0
                  movq       mm6,mm0
                  pandn      mm6,[ebx]
                  por        mm4,mm6
                  movq       [ebx],mm4

                  add        esi,8         //inc by 8 bytes processed
                  add        ebx,8
                  sub        ecx,8         //dec by 8 pixels processed

                  ja         mainloop8
mainloop8end:

                  mov        ecx,diff
                  cmp        ecx,0
                  jz         end8

                  mov        edx,mask
                  sal        edx,24        //make low byte the high byte

secondloop8:
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -