📄 pnggccrd.c
字号:
"psubb %%mm6, %%mm6 \n\t" // zero mm6
"punpcklbw %%mm7, %%mm7 \n\t"
"punpcklwd %%mm7, %%mm7 \n\t"
"punpckldq %%mm7, %%mm7 \n\t" // fill reg with 8 masks
"movq _mask48_0, %%mm0 \n\t"
"movq _mask48_1, %%mm1 \n\t"
"movq _mask48_2, %%mm2 \n\t"
"movq _mask48_3, %%mm3 \n\t"
"movq _mask48_4, %%mm4 \n\t"
"movq _mask48_5, %%mm5 \n\t"
"pand %%mm7, %%mm0 \n\t"
"pand %%mm7, %%mm1 \n\t"
"pand %%mm7, %%mm2 \n\t"
"pand %%mm7, %%mm3 \n\t"
"pand %%mm7, %%mm4 \n\t"
"pand %%mm7, %%mm5 \n\t"
"pcmpeqb %%mm6, %%mm0 \n\t"
"pcmpeqb %%mm6, %%mm1 \n\t"
"pcmpeqb %%mm6, %%mm2 \n\t"
"pcmpeqb %%mm6, %%mm3 \n\t"
"pcmpeqb %%mm6, %%mm4 \n\t"
"pcmpeqb %%mm6, %%mm5 \n\t"
// preload "movl len, %%ecx \n\t" // load length of line
// preload "movl srcptr, %%esi \n\t" // load source
// preload "movl dstptr, %%edi \n\t" // load dest
"cmpl $0, %%ecx \n\t"
"jz mainloop48end \n\t"
"mainloop48: \n\t"
"movq (%%esi), %%mm7 \n\t"
"pand %%mm0, %%mm7 \n\t"
"movq %%mm0, %%mm6 \n\t"
"pandn (%%edi), %%mm6 \n\t"
"por %%mm6, %%mm7 \n\t"
"movq %%mm7, (%%edi) \n\t"
"movq 8(%%esi), %%mm6 \n\t"
"pand %%mm1, %%mm6 \n\t"
"movq %%mm1, %%mm7 \n\t"
"pandn 8(%%edi), %%mm7 \n\t"
"por %%mm7, %%mm6 \n\t"
"movq %%mm6, 8(%%edi) \n\t"
"movq 16(%%esi), %%mm6 \n\t"
"pand %%mm2, %%mm6 \n\t"
"movq %%mm2, %%mm7 \n\t"
"pandn 16(%%edi), %%mm7 \n\t"
"por %%mm7, %%mm6 \n\t"
"movq %%mm6, 16(%%edi) \n\t"
"movq 24(%%esi), %%mm7 \n\t"
"pand %%mm3, %%mm7 \n\t"
"movq %%mm3, %%mm6 \n\t"
"pandn 24(%%edi), %%mm6 \n\t"
"por %%mm6, %%mm7 \n\t"
"movq %%mm7, 24(%%edi) \n\t"
"movq 32(%%esi), %%mm6 \n\t"
"pand %%mm4, %%mm6 \n\t"
"movq %%mm4, %%mm7 \n\t"
"pandn 32(%%edi), %%mm7 \n\t"
"por %%mm7, %%mm6 \n\t"
"movq %%mm6, 32(%%edi) \n\t"
"movq 40(%%esi), %%mm7 \n\t"
"pand %%mm5, %%mm7 \n\t"
"movq %%mm5, %%mm6 \n\t"
"pandn 40(%%edi), %%mm6 \n\t"
"por %%mm6, %%mm7 \n\t"
"movq %%mm7, 40(%%edi) \n\t"
"addl $48, %%esi \n\t" // inc by 48 bytes processed
"addl $48, %%edi \n\t"
"subl $8, %%ecx \n\t" // dec by 8 pixels processed
"ja mainloop48 \n\t"
"mainloop48end: \n\t"
// preload "movl diff, %%ecx \n\t" // (diff is in eax)
"movl %%eax, %%ecx \n\t"
"cmpl $0, %%ecx \n\t"
"jz end48 \n\t"
// preload "movl mask, %%edx \n\t"
"sall $24, %%edx \n\t" // make low byte, high byte
"secondloop48: \n\t"
"sall %%edx \n\t" // move high bit to CF
"jnc skip48 \n\t" // if CF = 0
"movl (%%esi), %%eax \n\t"
"movl %%eax, (%%edi) \n\t"
"skip48: \n\t"
"addl $4, %%esi \n\t"
"addl $4, %%edi \n\t"
"decl %%ecx \n\t"
"jnz secondloop48 \n\t"
"end48: \n\t"
"EMMS \n\t" // DONE
: "=a" (dummy_value_a), // output regs (dummy)
"=d" (dummy_value_d),
"=c" (dummy_value_c),
"=S" (dummy_value_S),
"=D" (dummy_value_D)
: "3" (srcptr), // esi // input regs
"4" (dstptr), // edi
"0" (diff), // eax
// was (unmask) "b" RESERVED // ebx // Global Offset Table idx
"2" (len), // ecx
"1" (mask) // edx
#if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */
: "%mm0", "%mm1", "%mm2", "%mm3" // clobber list
, "%mm4", "%mm5", "%mm6", "%mm7"
#endif
);
}
else /* mmx _not supported - Use modified C routine */
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
{
register png_uint_32 i;
png_uint_32 initial_val = BPP6 * png_pass_start[png_ptr->pass];
/* png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
register int stride = BPP6 * png_pass_inc[png_ptr->pass];
/* png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
register int rep_bytes = BPP6 * png_pass_width[png_ptr->pass];
/* png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
png_uint_32 len = png_ptr->width &~7; /* reduce to mult. of 8 */
int diff = (int) (png_ptr->width & 7); /* amount lost */
register png_uint_32 final_val = BPP6 * len; /* GRR bugfix */
srcptr = png_ptr->row_buf + 1 + initial_val;
dstptr = row + initial_val;
for (i = initial_val; i < final_val; i += stride)
{
png_memcpy(dstptr, srcptr, rep_bytes);
srcptr += stride;
dstptr += stride;
}
if (diff) /* number of leftover pixels: 3 for pngtest */
{
final_val+=diff*BPP6;
for (; i < final_val; i += stride)
{
if (rep_bytes > (int)(final_val-i))
rep_bytes = (int)(final_val-i);
png_memcpy(dstptr, srcptr, rep_bytes);
srcptr += stride;
dstptr += stride;
}
}
} /* end of else (_mmx_supported) */
break;
} /* end 48 bpp */
case 64: /* png_ptr->row_info.pixel_depth */
{
png_bytep srcptr;
png_bytep dstptr;
register png_uint_32 i;
png_uint_32 initial_val = BPP8 * png_pass_start[png_ptr->pass];
/* png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
register int stride = BPP8 * png_pass_inc[png_ptr->pass];
/* png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
register int rep_bytes = BPP8 * png_pass_width[png_ptr->pass];
/* png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
png_uint_32 len = png_ptr->width &~7; /* reduce to mult. of 8 */
int diff = (int) (png_ptr->width & 7); /* amount lost */
register png_uint_32 final_val = BPP8 * len; /* GRR bugfix */
srcptr = png_ptr->row_buf + 1 + initial_val;
dstptr = row + initial_val;
for (i = initial_val; i < final_val; i += stride)
{
png_memcpy(dstptr, srcptr, rep_bytes);
srcptr += stride;
dstptr += stride;
}
if (diff) /* number of leftover pixels: 3 for pngtest */
{
final_val+=diff*BPP8;
for (; i < final_val; i += stride)
{
if (rep_bytes > (int)(final_val-i))
rep_bytes = (int)(final_val-i);
png_memcpy(dstptr, srcptr, rep_bytes);
srcptr += stride;
dstptr += stride;
}
}
break;
} /* end 64 bpp */
default: /* png_ptr->row_info.pixel_depth != 1,2,4,8,16,24,32,48,64 */
{
/* this should never happen */
png_warning(png_ptr, "Invalid row_info.pixel_depth in pnggccrd");
break;
}
} /* end switch (png_ptr->row_info.pixel_depth) */
} /* end if (non-trivial mask) */
} /* end png_combine_row() */
#endif /* PNG_HAVE_ASSEMBLER_COMBINE_ROW */
/*===========================================================================*/
/* */
/* P N G _ D O _ R E A D _ I N T E R L A C E */
/* */
/*===========================================================================*/
#if defined(PNG_READ_INTERLACING_SUPPORTED)
#if defined(PNG_HAVE_ASSEMBLER_READ_INTERLACE)
/* png_do_read_interlace() is called after any 16-bit to 8-bit conversion
* has taken place. [GRR: what other steps come before and/or after?]
*/
void /* PRIVATE */
png_do_read_interlace(png_structp png_ptr)
{
png_row_infop row_info = &(png_ptr->row_info);
png_bytep row = png_ptr->row_buf + 1;
int pass = png_ptr->pass;
#if defined(PNG_READ_PACKSWAP_SUPPORTED)
png_uint_32 transformations = png_ptr->transformations;
#endif
png_debug(1, "in png_do_read_interlace (pnggccrd.c)\n");
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
if (_mmx_supported == 2) {
#if !defined(PNG_1_0_X)
/* this should have happened in png_init_mmx_flags() already */
png_warning(png_ptr, "asm_flags may not have been initialized");
#endif
png_mmx_support();
}
#endif
if (row != NULL && row_info != NULL)
{
png_uint_32 final_width;
final_width = row_info->width * png_pass_inc[pass];
switch (row_info->pixel_depth)
{
case 1:
{
png_bytep sp, dp;
int sshift, dshift;
int s_start, s_end, s_inc;
png_byte v;
png_uint_32 i;
int j;
sp = row + (png_size_t)((row_info->width - 1) >> 3);
dp = row + (png_size_t)((final_width - 1) >> 3);
#if defined(PNG_READ_PACKSWAP_SUPPORTED)
if (transformations & PNG_PACKSWAP)
{
sshift = (int)((row_info->width + 7) & 7);
dshift = (int)((final_width + 7) & 7);
s_start = 7;
s_end = 0;
s_inc = -1;
}
else
#endif
{
sshift = 7 - (int)((row_info->width + 7) & 7);
dshift = 7 - (int)((final_width + 7) & 7);
s_start = 0;
s_end = 7;
s_inc = 1;
}
for (i = row_info->width; i; i--)
{
v = (png_byte)((*sp >> sshift) & 0x1);
for (j = 0; j < png_pass_inc[pass]; j++)
{
*dp &= (png_byte)((0x7f7f >> (7 - dshift)) & 0xff);
*dp |= (png_byte)(v << dshift);
if (dshift == s_end)
{
dshift = s_start;
dp--;
}
else
dshift += s_inc;
}
if (sshift == s_end)
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -