📄 pngvcrd.c

📁 A*算法 A*算法 A*算法 A*算法A*算法A*算法
💻 C
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/* pngvcrd.c - mixed C/assembler version of utilities to read a PNG file
 *
 * For Intel x86 CPU and Microsoft Visual C++ compiler
 *
 * libpng version 1.2.7 - September 12, 2004
 * For conditions of distribution and use, see copyright notice in png.h
 * Copyright (c) 1998-2004 Glenn Randers-Pehrson
 * Copyright (c) 1998, Intel Corporation
 *
 * Contributed by Nirav Chhatrapati, Intel Corporation, 1998
 * Interface to libpng contributed by Gilles Vollant, 1999
 *
 *
 * In png_do_read_interlace() in libpng versions 1.0.3a through 1.0.4d,
 * a sign error in the post-MMX cleanup code for each pixel_depth resulted
 * in bad pixels at the beginning of some rows of some images, and also
 * (due to out-of-range memory reads and writes) caused heap corruption
 * when compiled with MSVC 6.0.  The error was fixed in version 1.0.4e.
 *
 * [png_read_filter_row_mmx_avg() bpp == 2 bugfix, GRR 20000916]
 *
 * [runtime MMX configuration, GRR 20010102]
 *
 */

#define PNG_INTERNAL
#include "png.h"

#if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_USE_PNGVCRD)

static int mmx_supported=2;


int PNGAPI
png_mmx_support(void)
{
  int mmx_supported_local = 0;
  _asm {
    push ebx          /*CPUID will trash these */
    push ecx
    push edx

    pushfd            /*Save Eflag to stack */
    pop eax           /*Get Eflag from stack into eax */
    mov ecx, eax      /*Make another copy of Eflag in ecx */
    xor eax, 0x200000 /*Toggle ID bit in Eflag [i.e. bit(21)] */
    push eax          /*Save modified Eflag back to stack */

    popfd             /*Restored modified value back to Eflag reg */
    pushfd            /*Save Eflag to stack */
    pop eax           /*Get Eflag from stack */
    push ecx          /* save original Eflag to stack */
    popfd             /* restore original Eflag */
    xor eax, ecx      /*Compare the new Eflag with the original Eflag */
    jz NOT_SUPPORTED  /*If the same, CPUID instruction is not supported, */
                      /*skip following instructions and jump to */
                      /*NOT_SUPPORTED label */

    xor eax, eax      /*Set eax to zero */

    _asm _emit 0x0f   /*CPUID instruction  (two bytes opcode) */
    _asm _emit 0xa2

    cmp eax, 1        /*make sure eax return non-zero value */
    jl NOT_SUPPORTED  /*If eax is zero, mmx not supported */

    xor eax, eax      /*set eax to zero */
    inc eax           /*Now increment eax to 1.  This instruction is */
                      /*faster than the instruction "mov eax, 1" */

    _asm _emit 0x0f   /*CPUID instruction */
    _asm _emit 0xa2

    and edx, 0x00800000  /*mask out all bits but mmx bit(24) */
    cmp edx, 0        /* 0 = mmx not supported */
    jz  NOT_SUPPORTED /* non-zero = Yes, mmx IS supported */

    mov  mmx_supported_local, 1  /*set return value to 1 */

NOT_SUPPORTED:
    mov  eax, mmx_supported_local  /*move return value to eax */
    pop edx          /*CPUID trashed these */
    pop ecx
    pop ebx
  }

  /*mmx_supported_local=0; // test code for force don't support MMX */
  /*printf("MMX : %u (1=MMX supported)\n",mmx_supported_local); */

  mmx_supported = mmx_supported_local;
  return mmx_supported_local;
}

/* Combines the row recently read in with the previous row.
   This routine takes care of alpha and transparency if requested.
   This routine also handles the two methods of progressive display
   of interlaced images, depending on the mask value.
   The mask value describes which pixels are to be combined with
   the row.  The pattern always repeats every 8 pixels, so just 8
   bits are needed.  A one indicates the pixel is to be combined; a
   zero indicates the pixel is to be skipped.  This is in addition
   to any alpha or transparency value associated with the pixel.  If
   you want all pixels to be combined, pass 0xff (255) in mask.  */

/* Use this routine for x86 platform - uses faster MMX routine if machine
   supports MMX */

void /* PRIVATE */
png_combine_row(png_structp png_ptr, png_bytep row, int mask)
{
#ifdef PNG_USE_LOCAL_ARRAYS
   const int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
#endif

   png_debug(1,"in png_combine_row_asm\n");

   if (mmx_supported == 2) {
#if !defined(PNG_1_0_X)
       /* this should have happened in png_init_mmx_flags() already */
       png_warning(png_ptr, "asm_flags may not have been initialized");
#endif
       png_mmx_support();
   }

   if (mask == 0xff)
   {
      png_memcpy(row, png_ptr->row_buf + 1,
       (png_size_t)PNG_ROWBYTES(png_ptr->row_info.pixel_depth,
       png_ptr->width));
   }
   /* GRR:  add "else if (mask == 0)" case?
    *       or does png_combine_row() not even get called in that case? */
   else
   {
      switch (png_ptr->row_info.pixel_depth)
      {
         case 1:
         {
            png_bytep sp;
            png_bytep dp;
            int s_inc, s_start, s_end;
            int m;
            int shift;
            png_uint_32 i;

            sp = png_ptr->row_buf + 1;
            dp = row;
            m = 0x80;
#if defined(PNG_READ_PACKSWAP_SUPPORTED)
            if (png_ptr->transformations & PNG_PACKSWAP)
            {
                s_start = 0;
                s_end = 7;
                s_inc = 1;
            }
            else
#endif
            {
                s_start = 7;
                s_end = 0;
                s_inc = -1;
            }

            shift = s_start;

            for (i = 0; i < png_ptr->width; i++)
            {
               if (m & mask)
               {
                  int value;

                  value = (*sp >> shift) & 0x1;
                  *dp &= (png_byte)((0x7f7f >> (7 - shift)) & 0xff);
                  *dp |= (png_byte)(value << shift);
               }

               if (shift == s_end)
               {
                  shift = s_start;
                  sp++;
                  dp++;
               }
               else
                  shift += s_inc;

               if (m == 1)
                  m = 0x80;
               else
                  m >>= 1;
            }
            break;
         }

         case 2:
         {
            png_bytep sp;
            png_bytep dp;
            int s_start, s_end, s_inc;
            int m;
            int shift;
            png_uint_32 i;
            int value;

            sp = png_ptr->row_buf + 1;
            dp = row;
            m = 0x80;
#if defined(PNG_READ_PACKSWAP_SUPPORTED)
            if (png_ptr->transformations & PNG_PACKSWAP)
            {
               s_start = 0;
               s_end = 6;
               s_inc = 2;
            }
            else
#endif
            {
               s_start = 6;
               s_end = 0;
               s_inc = -2;
            }

            shift = s_start;

            for (i = 0; i < png_ptr->width; i++)
            {
               if (m & mask)
               {
                  value = (*sp >> shift) & 0x3;
                  *dp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff);
                  *dp |= (png_byte)(value << shift);
               }

               if (shift == s_end)
               {
                  shift = s_start;
                  sp++;
                  dp++;
               }
               else
                  shift += s_inc;
               if (m == 1)
                  m = 0x80;
               else
                  m >>= 1;
            }
            break;
         }

         case 4:
         {
            png_bytep sp;
            png_bytep dp;
            int s_start, s_end, s_inc;
            int m;
            int shift;
            png_uint_32 i;
            int value;

            sp = png_ptr->row_buf + 1;
            dp = row;
            m = 0x80;
#if defined(PNG_READ_PACKSWAP_SUPPORTED)
            if (png_ptr->transformations & PNG_PACKSWAP)
            {
               s_start = 0;
               s_end = 4;
               s_inc = 4;
            }
            else
#endif
            {
               s_start = 4;
               s_end = 0;
               s_inc = -4;
            }
            shift = s_start;

            for (i = 0; i < png_ptr->width; i++)
            {
               if (m & mask)
               {
                  value = (*sp >> shift) & 0xf;
                  *dp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff);
                  *dp |= (png_byte)(value << shift);
               }

               if (shift == s_end)
               {
                  shift = s_start;
                  sp++;
                  dp++;
               }
               else
                  shift += s_inc;
               if (m == 1)
                  m = 0x80;
               else
                  m >>= 1;
            }
            break;
         }

         case 8:
         {
            png_bytep srcptr;
            png_bytep dstptr;
            png_uint_32 len;
            int m;
            int diff, unmask;

            __int64 mask0=0x0102040810204080;

#if !defined(PNG_1_0_X)
            if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
                /* && mmx_supported */ )
#else
            if (mmx_supported)
#endif
            {
               srcptr = png_ptr->row_buf + 1;
               dstptr = row;
               m = 0x80;
               unmask = ~mask;
               len  = png_ptr->width &~7;  /*reduce to multiple of 8 */
               diff = png_ptr->width & 7;  /*amount lost */

               _asm
               {
                  movd       mm7, unmask   /*load bit pattern */
                  psubb      mm6,mm6       /*zero mm6 */
                  punpcklbw  mm7,mm7
                  punpcklwd  mm7,mm7
                  punpckldq  mm7,mm7       /*fill register with 8 masks */

                  movq       mm0,mask0

                  pand       mm0,mm7       /*nonzero if keep byte */
                  pcmpeqb    mm0,mm6       /*zeros->1s, v versa */

                  mov        ecx,len       /*load length of line (pixels) */
                  mov        esi,srcptr    /*load source */
                  mov        ebx,dstptr    /*load dest */
                  cmp        ecx,0         /*lcr */
                  je         mainloop8end

mainloop8:
                  movq       mm4,[esi]
                  pand       mm4,mm0
                  movq       mm6,mm0
                  pandn      mm6,[ebx]
                  por        mm4,mm6
                  movq       [ebx],mm4

                  add        esi,8         /*inc by 8 bytes processed */
                  add        ebx,8
                  sub        ecx,8         /*dec by 8 pixels processed */

                  ja         mainloop8
mainloop8end:

                  mov        ecx,diff
                  cmp        ecx,0
                  jz         end8
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -