pnggccrd.c

来自「A*算法 A*算法 A*算法 A*算法A*算法A*算法」· C语言 代码 · 共 1,563 行 · 第 1/5 页

C
1,563
字号
               diff = (int) (png_ptr->width & 7);  /* amount lost */

               __asm__ __volatile__ (
                  "movd      _unmask, %%mm7  \n\t" /* load bit pattern */
                  "psubb     %%mm6, %%mm6    \n\t" /* zero mm6 */
                  "punpcklbw %%mm7, %%mm7    \n\t"
                  "punpcklwd %%mm7, %%mm7    \n\t"
                  "punpckldq %%mm7, %%mm7    \n\t" /* fill reg with 8 masks */

                  "movq      _mask8_0, %%mm0 \n\t"
                  "pand      %%mm7, %%mm0    \n\t" /* nonzero if keep byte */
                  "pcmpeqb   %%mm6, %%mm0    \n\t" /* zeros->1s, v versa */

/* preload        "movl      len, %%ecx      \n\t" // load length of line */
/* preload        "movl      srcptr, %%esi   \n\t" // load source */
/* preload        "movl      dstptr, %%edi   \n\t" // load dest */

                  "cmpl      $0, %%ecx       \n\t" /* len == 0 ? */
                  "je        mainloop8end    \n\t"

                "mainloop8:                  \n\t"
                  "movq      (%%esi), %%mm4  \n\t" /* *srcptr */
                  "pand      %%mm0, %%mm4    \n\t"
                  "movq      %%mm0, %%mm6    \n\t"
                  "pandn     (%%edi), %%mm6  \n\t" /* *dstptr */
                  "por       %%mm6, %%mm4    \n\t"
                  "movq      %%mm4, (%%edi)  \n\t"
                  "addl      $8, %%esi       \n\t" /* inc by 8 bytes processed */
                  "addl      $8, %%edi       \n\t"
                  "subl      $8, %%ecx       \n\t" /* dec by 8 pixels processed */
                  "ja        mainloop8       \n\t"

                "mainloop8end:               \n\t"
/* preload        "movl      diff, %%ecx     \n\t" // (diff is in eax) */
                  "movl      %%eax, %%ecx    \n\t"
                  "cmpl      $0, %%ecx       \n\t"
                  "jz        end8            \n\t"
/* preload        "movl      mask, %%edx     \n\t" */
                  "sall      $24, %%edx      \n\t" /* make low byte, high byte */

                "secondloop8:                \n\t"
                  "sall      %%edx           \n\t" /* move high bit to CF */
                  "jnc       skip8           \n\t" /* if CF = 0 */
                  "movb      (%%esi), %%al   \n\t"
                  "movb      %%al, (%%edi)   \n\t"

                "skip8:                      \n\t"
                  "incl      %%esi           \n\t"
                  "incl      %%edi           \n\t"
                  "decl      %%ecx           \n\t"
                  "jnz       secondloop8     \n\t"

                "end8:                       \n\t"
                  "EMMS                      \n\t"  /* DONE */

                  : "=a" (dummy_value_a),           /* output regs (dummy) */
                    "=d" (dummy_value_d),
                    "=c" (dummy_value_c),
                    "=S" (dummy_value_S),
                    "=D" (dummy_value_D)

                  : "3" (srcptr),      /* esi       // input regs */
                    "4" (dstptr),      /* edi */
                    "0" (diff),        /* eax */
/* was (unmask)     "b"    RESERVED    // ebx       // Global Offset Table idx */
                    "2" (len),         /* ecx */
                    "1" (mask)         /* edx */

#if 0  /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */
                  : "%mm0", "%mm4", "%mm6", "%mm7"  /* clobber list */
#endif
               );
            }
            else /* mmx _not supported - Use modified C routine */
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
            {
               register png_uint_32 i;
               png_uint_32 initial_val = png_pass_start[png_ptr->pass];
                 /* png.c:  png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
               register int stride = png_pass_inc[png_ptr->pass];
                 /* png.c:  png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
               register int rep_bytes = png_pass_width[png_ptr->pass];
                 /* png.c:  png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
               png_uint_32 len = png_ptr->width &~7;  /* reduce to mult. of 8 */
               int diff = (int) (png_ptr->width & 7); /* amount lost */
               register png_uint_32 final_val = len;  /* GRR bugfix */

               srcptr = png_ptr->row_buf + 1 + initial_val;
               dstptr = row + initial_val;

               for (i = initial_val; i < final_val; i += stride)
               {
                  png_memcpy(dstptr, srcptr, rep_bytes);
                  srcptr += stride;
                  dstptr += stride;
               }
               if (diff)  /* number of leftover pixels:  3 for pngtest */
               {
                  final_val+=diff /* *BPP1 */ ;
                  for (; i < final_val; i += stride)
                  {
                     if (rep_bytes > (int)(final_val-i))
                        rep_bytes = (int)(final_val-i);
                     png_memcpy(dstptr, srcptr, rep_bytes);
                     srcptr += stride;
                     dstptr += stride;
                  }
               }

            } /* end of else (_mmx_supported) */

            break;
         }       /* end 8 bpp */

         case 16:       /* png_ptr->row_info.pixel_depth */
         {
            png_bytep srcptr;
            png_bytep dstptr;

#if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)
#if !defined(PNG_1_0_X)
            if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
                /* && _mmx_supported */ )
#else
            if (_mmx_supported)
#endif
            {
               png_uint_32 len;
               int diff;
               int dummy_value_a;   /* fix 'forbidden register spilled' error */
               int dummy_value_d;
               int dummy_value_c;
               int dummy_value_S;
               int dummy_value_D;
               _unmask = ~mask;            /* global variable for -fPIC version */
               srcptr = png_ptr->row_buf + 1;
               dstptr = row;
               len  = png_ptr->width &~7;  /* reduce to multiple of 8 */
               diff = (int) (png_ptr->width & 7); /* amount lost // */

               __asm__ __volatile__ (
                  "movd      _unmask, %%mm7   \n\t" /* load bit pattern */
                  "psubb     %%mm6, %%mm6     \n\t" /* zero mm6 */
                  "punpcklbw %%mm7, %%mm7     \n\t"
                  "punpcklwd %%mm7, %%mm7     \n\t"
                  "punpckldq %%mm7, %%mm7     \n\t" /* fill reg with 8 masks */

                  "movq      _mask16_0, %%mm0 \n\t"
                  "movq      _mask16_1, %%mm1 \n\t"

                  "pand      %%mm7, %%mm0     \n\t"
                  "pand      %%mm7, %%mm1     \n\t"

                  "pcmpeqb   %%mm6, %%mm0     \n\t"
                  "pcmpeqb   %%mm6, %%mm1     \n\t"

/* preload        "movl      len, %%ecx       \n\t" // load length of line */
/* preload        "movl      srcptr, %%esi    \n\t" // load source */
/* preload        "movl      dstptr, %%edi    \n\t" // load dest */

                  "cmpl      $0, %%ecx        \n\t"
                  "jz        mainloop16end    \n\t"

                "mainloop16:                  \n\t"
                  "movq      (%%esi), %%mm4   \n\t"
                  "pand      %%mm0, %%mm4     \n\t"
                  "movq      %%mm0, %%mm6     \n\t"
                  "movq      (%%edi), %%mm7   \n\t"
                  "pandn     %%mm7, %%mm6     \n\t"
                  "por       %%mm6, %%mm4     \n\t"
                  "movq      %%mm4, (%%edi)   \n\t"

                  "movq      8(%%esi), %%mm5  \n\t"
                  "pand      %%mm1, %%mm5     \n\t"
                  "movq      %%mm1, %%mm7     \n\t"
                  "movq      8(%%edi), %%mm6  \n\t"
                  "pandn     %%mm6, %%mm7     \n\t"
                  "por       %%mm7, %%mm5     \n\t"
                  "movq      %%mm5, 8(%%edi)  \n\t"

                  "addl      $16, %%esi       \n\t" /* inc by 16 bytes processed */
                  "addl      $16, %%edi       \n\t"
                  "subl      $8, %%ecx        \n\t" /* dec by 8 pixels processed */
                  "ja        mainloop16       \n\t"

                "mainloop16end:               \n\t"
/* preload        "movl      diff, %%ecx      \n\t" // (diff is in eax) */
                  "movl      %%eax, %%ecx     \n\t"
                  "cmpl      $0, %%ecx        \n\t"
                  "jz        end16            \n\t"
/* preload        "movl      mask, %%edx      \n\t" */
                  "sall      $24, %%edx       \n\t" /* make low byte, high byte */

                "secondloop16:                \n\t"
                  "sall      %%edx            \n\t" /* move high bit to CF */
                  "jnc       skip16           \n\t" /* if CF = 0 */
                  "movw      (%%esi), %%ax    \n\t"
                  "movw      %%ax, (%%edi)    \n\t"

                "skip16:                      \n\t"
                  "addl      $2, %%esi        \n\t"
                  "addl      $2, %%edi        \n\t"
                  "decl      %%ecx            \n\t"
                  "jnz       secondloop16     \n\t"

                "end16:                       \n\t"
                  "EMMS                       \n\t" /* DONE */

                  : "=a" (dummy_value_a),           /* output regs (dummy) */
                    "=c" (dummy_value_c),
                    "=d" (dummy_value_d),
                    "=S" (dummy_value_S),
                    "=D" (dummy_value_D)

                  : "0" (diff),        /* eax       // input regs */
/* was (unmask)     " "    RESERVED    // ebx       // Global Offset Table idx */
                    "1" (len),         /* ecx */
                    "2" (mask),        /* edx */
                    "3" (srcptr),      /* esi */
                    "4" (dstptr)       /* edi */

#if 0  /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */
                  : "%mm0", "%mm1", "%mm4"          /* clobber list */
                  , "%mm5", "%mm6", "%mm7"
#endif
               );
            }
            else /* mmx _not supported - Use modified C routine */
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
            {
               register png_uint_32 i;
               png_uint_32 initial_val = BPP2 * png_pass_start[png_ptr->pass];
                 /* png.c:  png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
               register int stride = BPP2 * png_pass_inc[png_ptr->pass];
                 /* png.c:  png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
               register int rep_bytes = BPP2 * png_pass_width[png_ptr->pass];
                 /* png.c:  png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
               png_uint_32 len = png_ptr->width &~7;  /* reduce to mult. of 8 */
               int diff = (int) (png_ptr->width & 7); /* amount lost */
               register png_uint_32 final_val = BPP2 * len;   /* GRR bugfix */

               srcptr = png_ptr->row_buf + 1 + initial_val;
               dstptr = row + initial_val;

               for (i = initial_val; i < final_val; i += stride)
               {
                  png_memcpy(dstptr, srcptr, rep_bytes);
                  srcptr += stride;
                  dstptr += stride;
               }
               if (diff)  /* number of leftover pixels:  3 for pngtest */
               {
                  final_val+=diff*BPP2;
                  for (; i < final_val; i += stride)
                  {
                     if (rep_bytes > (int)(final_val-i))
                        rep_bytes = (int)(final_val-i);
                     png_memcpy(dstptr, srcptr, rep_bytes);
                     srcptr += stride;
                     dstptr += stride;
                  }
               }
            } /* end of else (_mmx_supported) */

            break;
         }       /* end 16 bpp */

         case 24:       /* png_ptr->row_info.pixel_depth */
         {
            png_bytep srcptr;
            png_bytep dstptr;

#if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)
#if !defined(PNG_1_0_X)
            if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
                /* && _mmx_supported */ )
#else
            if (_mmx_supported)
#endif
            {
               png_uint_32 len;
               int diff;
               int dummy_value_a;   /* fix 'forbidden register spilled' error */
               int dummy_value_d;
               int dummy_value_c;
               int dummy_value_S;
               int dummy_value_D;
               _unmask = ~mask;            /* global variable for -fPIC version */
               srcptr = png_ptr->row_buf + 1;
               dstptr = row;
               len  = png_ptr->width &~7;  /* reduce to multiple of 8 */
               diff = (int) (png_ptr->width & 7); /* amount lost // */

               __asm__ __volatile__ (
                  "movd      _unmask, %%mm7   \n\t" /* load bit pattern */
                  "psubb     %%mm6, %%mm6     \n\t" /* zero mm6 */
                  "punpcklbw %%mm7, %%mm7     \n\t"
                  "punpcklwd %%mm7, %%mm7     \n\t"
                  "punpckldq %%mm7, %%mm7     \n\t" /* fill reg with 8 masks */

                  "movq      _mask24_0, %%mm0 \n\t"
                  "movq      _mask24_1, %%mm1 \n\t"
                  "movq      _mask24_2, %%mm2 \n\t"

                  "pand      %%mm7, %%mm0     \n\t"
                  "pand      %%mm7, %%mm1     \n\t"
                  "pand      %%mm7, %%mm2     \n\t"

                  "pcmpeqb   %%mm6, %%mm0     \n\t"
                  "pcmpeqb   %%mm6, %%mm1     \n\t"
                  "pcmpeqb   %%mm6, %%mm2     \n\t"

/* preload        "movl      len, %%ecx       \n\t" // load length of line */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?