📄 reconstruct.c
字号:
#include "mpeg3video.h"
#include <stdio.h>
#ifdef HAVE_MMX
#ifdef HAVE_3Dnow
static inline void recva_mmx(unsigned char *s, unsigned char *d, int lx, int lx2, int h)
{
__asm__(
".align 8\n"
"1:"
"movq (%1), %%mm0\n" /* 8 s */
"movq 8(%1), %%mm1\n" /* 8 s */
"movq (%4), %%mm2\n" /* 8 s +lx */
"movq 8(%4), %%mm3\n" /* 8 s +lx **/
"pavgusb %%mm2, %%mm0\n"
"addl %3, %1\n"
"pavgusb %%mm3, %%mm1\n"
"movq (%2), %%mm2\n" /* 8 d */
"movq 8(%2), %%mm3\n" /* 8 d */
"pavgusb %%mm2, %%mm0\n"
"addl %3, %4\n"
"pavgusb %%mm3, %%mm1\n"
"movq %%mm0, (%2)\n"
"movq %%mm1, 8(%2)\n"
"addl %3, %2\n"
"loop 1b\n"
:
: "c" (h), "r" (s), "r" (d), "r" (lx2), "r" (s +lx)
);
}
static inline void recvac_mmx(unsigned char *s, unsigned char *d, int lx,int lx2, int h)
{
__asm__(
".align 8\n"
"1:"
"movq (%1), %%mm0\n" /* 8 s */
"movq (%4), %%mm2\n" /* 8 s +lx */
"addl %3, %1\n"
"pavgusb %%mm2, %%mm0\n"
"movq (%2), %%mm3\n" /* 8 d */
"addl %3, %4\n"
"pavgusb %%mm3, %%mm0\n"
"movq %%mm0, (%2)\n"
"addl %3, %2\n"
"loop 1b\n"
:
: "c" (h), "r" (s), "r" (d), "r" (lx2), "r" (s +lx)
);
}
static inline void rech_mmx(unsigned char *s, unsigned char *d, int lx2, int h)
{
__asm__ (
".align 8\n"
"1:"
"movq (%1), %%mm0\n" /* 8 s */
"movq 8(%1), %%mm1\n" /* 8 s */
"movq 1(%1), %%mm2\n" /* 8 s */
"movq 9(%1), %%mm3\n" /* 8 s */
"pavgusb %%mm2, %%mm0\n"
"addl %3, %1\n"
"pavgusb %%mm3, %%mm1\n"
"movq %%mm0, (%2)\n"
"movq %%mm1, 8(%2)\n"
"addl %3, %2\n"
"loop 1b\n"
:
: "c" (h), "r" (s), "r" (d), "r" (lx2)
);
}
static inline void rechc_mmx(unsigned char *s, unsigned char *d, int lx2, int h)
{
__asm__ (
".align 8\n"
"1:"
"movq (%1), %%mm0\n" /* 8 s */
"movq 1(%1), %%mm2\n" /* 8 s +1 */
"addl %3, %1\n"
"pavgusb %%mm2, %%mm0\n"
"movq %%mm0, (%2)\n"
"addl %3, %2\n"
"loop 1b\n"
:
: "c" (h), "r" (s), "r" (d), "r" (lx2)
);
}
static inline void recha_mmx(unsigned char *s, unsigned char *d,int lx2, int h)
{
__asm__ (
".align 8\n"
"1:"
"movq (%1), %%mm0\n" /* 8 s */
"movq 8(%1), %%mm1\n" /* 8 s */
"movq 1(%1), %%mm2\n" /* 8 s */
"movq 9(%1), %%mm3\n" /* 8 s */
"pavgusb %%mm2, %%mm0\n"
"addl %3, %1\n"
"pavgusb %%mm3, %%mm1\n"
"movq (%2), %%mm2\n" /* 8 d */
"movq 8(%2), %%mm3\n" /* 8 d */
"pavgusb %%mm2, %%mm0\n"
"pavgusb %%mm3, %%mm1\n"
"movq %%mm0, (%2)\n"
"movq %%mm1, 8(%2)\n"
"addl %3, %2\n"
"loop 1b\n"
:
: "c" (h), "r" (s), "r" (d), "r" (lx2)
);
}
static inline void rechac_mmx(unsigned char *s,unsigned char *d, int lx2, int h)
{
__asm__ (
".align 8\n"
"1:"
"movq (%1), %%mm0\n" /* 8 s */
"movq 1(%1), %%mm2\n" /* 8 s */
"addl %3, %1\n"
"pavgusb %%mm2, %%mm0\n"
"movq (%2), %%mm1\n" /* 8 d */
"pavgusb %%mm1, %%mm0\n"
"movq %%mm0, (%2)\n"
"addl %3, %2\n"
"loop 1b\n"
:
: "c" (h), "r" (s), "r" (d), "r" (lx2)
);
}
static inline void rec4_mmx(unsigned char *s, unsigned char *d, int lx, int lx2, int h)
{
__asm__ __volatile__(
"movq (%1), %%mm0\n" /* 8 s */
"movq 8(%1), %%mm1\n" /* 8 s */
"movq 1(%1), %%mm2\n" /* 8 s +1*/
"movq 9(%1), %%mm3\n" /* 8 s +1*/
".align 8\n"
"1:"
"movq (%4), %%mm4\n" /* 8 s+lx */
"pavgusb %%mm2, %%mm0\n"
"movq 8(%4), %%mm5\n" /* 8 s+lx */
"pavgusb %%mm3, %%mm1\n"
"movq 1(%4), %%mm6\n" /* 8 s+lx +1*/
"pavgusb %%mm4, %%mm0\n"
"movq 9(%4), %%mm7\n" /* 8 s+lx +1*/
"pavgusb %%mm5, %%mm1\n"
"pavgusb %%mm6, %%mm0\n"
"addl %3, %4\n"
"pavgusb %%mm7, %%mm1\n"
"movq %%mm0, (%2)\n"
"movq %%mm6, %%mm2\n"
"movq %%mm7, %%mm3\n"
"movq %%mm1, 8(%2)\n"
"movq %%mm4, %%mm0\n"
"movq %%mm5, %%mm1\n"
"addl %3, %2\n"
"loop 1b\n"
:
: "c" (h), "r" (s), "r" (d), "r" (lx2), "r" (s +lx)
);
}
static inline void rec4c_mmx(unsigned char *s, unsigned char *d, int lx, int lx2, int h)
{
__asm__ __volatile__(
"movq (%1), %%mm0\n" /* 8 s */
"movq 1(%1), %%mm2\n" /* 8 s +1*/
".align 8\n"
"1:"
"movq (%4), %%mm4\n" /* 8 s+lx */
"pavgusb %%mm2, %%mm0\n"
"movq 1(%4), %%mm6\n" /* 8 s+lx +1*/
"pavgusb %%mm4, %%mm0\n"
"addl %3, %4\n"
"pavgusb %%mm6, %%mm0\n"
"movq %%mm0, (%2)\n"
"movq %%mm6, %%mm2\n"
"movq %%mm4, %%mm0\n"
"addl %3, %2\n"
"loop 1b\n"
:
: "c" (h), "r" (s), "r" (d), "r" (lx2), "r" (s +lx)
);
}
static inline void rec4a_mmx(unsigned char *s, unsigned char *d, int lx, int lx2, int h)
{
__asm__ __volatile__(
"movq (%1), %%mm0\n" /* 8 s */
"movq 8(%1), %%mm1\n" /* 8 s */
"movq 1(%1), %%mm2\n" /* 8 s +1*/
"movq 9(%1), %%mm3\n" /* 8 s +1*/
".align 8\n"
"1:"
"movq (%4), %%mm4\n" /* 8 s+lx */
"pavgusb %%mm2, %%mm0\n"
"movq 8(%4), %%mm5\n" /* 8 s+lx */
"pavgusb %%mm3, %%mm1\n"
"movq 1(%4), %%mm6\n" /* 8 s+lx +1*/
"pavgusb %%mm4, %%mm0\n"
"movq 9(%4), %%mm7\n" /* 8 s+lx +1*/
"pavgusb %%mm5, %%mm1\n"
"movq (%2), %%mm2\n"
"pavgusb %%mm6, %%mm0\n"
"movq 8(%2), %%mm3\n"
"pavgusb %%mm2, %%mm0\n"
"addl %3, %4\n"
"pavgusb %%mm3, %%mm1\n"
"movq %%mm0, (%2)\n"
"pavgusb %%mm7, %%mm1\n"
"movq %%mm6, %%mm2\n"
"movq %%mm7, %%mm3\n"
"movq %%mm1, 8(%2)\n"
"movq %%mm4, %%mm0\n"
"movq %%mm5, %%mm1\n"
"addl %3, %2\n"
"loop 1b\n"
:
: "c" (h), "r" (s), "r" (d), "r" (lx2), "r" (s +lx)
);
}
static inline void rec4ac_mmx(unsigned char *s, unsigned char *d, int lx, int lx2, int h)
{
__asm__ __volatile__(
"movq (%1), %%mm0\n" /* 8 s */
"movq 1(%1), %%mm2\n" /* 8 s +1*/
".align 8\n"
"1:"
"movq (%4), %%mm4\n" /* 8 s+lx */
"pavgusb %%mm2, %%mm0\n"
"movq 1(%4), %%mm6\n" /* 8 s+lx +1*/
"pavgusb %%mm4, %%mm0\n"
"movq (%2), %%mm1\n" /* 8 d */
"pavgusb %%mm6, %%mm0\n"
"addl %3, %4\n"
"pavgusb %%mm1, %%mm0\n"
"movq %%mm6, %%mm2\n"
"movq %%mm0, (%2)\n"
"movq %%mm4, %%mm0\n"
"addl %3, %2\n"
"loop 1b\n"
:
: "c" (h), "r" (s), "r" (d), "r" (lx2), "r" (s +lx)
);
}
#else // HAVE_3DNOW
static long long ADD_1 = 0x0101010101010101LL;
static long long MASK_AND = 0x7f7f7f7f7f7f7f7fLL;
#endif
static inline void rec_mmx(unsigned char *s, unsigned char *d, int lx2, int h)
{
__asm__ __volatile__(
".align 8\n"
"1:\t"
"movq ( %1 ), %%mm0\n" /* 8 s */
"movq 8( %1 ), %%mm2\n" /* 16 s */
"movq %%mm0, ( %2 )\n"
"addl %3, %1\n"
"movq %%mm2, 8( %2 )\n"
"decl %0\n"
"leal (%2, %3), %2\n"
"jnz 1b"
:
: "c" (h), "r" (s), "r" (d), "r" (lx2)
);
}
static inline void recc_mmx(unsigned char *s, unsigned char *d, int lx2, int h)
{
__asm__ __volatile__(
".align 8\n"
"1:\t"
"movq ( %1 ), %%mm0\n"
"addl %3, %1\n"
"movq %%mm0, ( %2 )\n"
"decl %0\n"
"leal (%2, %3), %2\n"
"jnz 1b"
:
: "c" (h), "r" (s), "r" (d), "r" (lx2)
);
}
static inline void reca_mmx(unsigned char *s, unsigned char *d, int lx2, int h)
{
#ifdef HAVE_3Dnow
__asm__ (
".align 8\n"
"1:"
"movq (%1), %%mm0\n" /* 8 s */
"movq (%2), %%mm2\n" /* 8 d */
"movq 8(%1), %%mm1\n" /* 8 s */
"movq 8(%2), %%mm3\n" /* 8 d */
"pavgusb %%mm2, %%mm0\n"
"addl %3, %1\n"
"pavgusb %%mm3, %%mm1\n"
"movq %%mm0, (%2)\n"
"movq %%mm1, 8(%2)\n"
"addl %3, %2\n"
"loop 1b\n"
:
: "c" (h), "r" (s), "r" (d), "r" (lx2)
);
#else /* No 3dnow */
__asm__ (
"movq MASK_AND, %%mm5\n"
"movq ADD_1, %%mm6\n"
"1:\t"
"movq (%1),%%mm0\n" /* Load 16 pixels from each row */
"movq (%2),%%mm1\n"
"movq 8(%1),%%mm2\n"
"movq 8(%2),%%mm3\n"
"psrlw $1,%%mm0\n" /* Shift pixels down */
"psrlw $1,%%mm1\n"
"pand %%mm5,%%mm0\n" /* Zero out significant bit */
"psrlw $1,%%mm2\n"
"pand %%mm5,%%mm1\n"
"psrlw $1,%%mm3\n"
"pand %%mm5,%%mm2\n"
"paddusb %%mm1,%%mm0\n" /* Add pixels */
"pand %%mm5,%%mm3\n"
"paddusb %%mm3,%%mm2\n"
"paddusb %%mm6,%%mm0\n" /* Add 1 to results */
"paddusb %%mm6,%%mm2\n"
"movq %%mm0,(%2)\n"
"addl %3,%1\n"
"movq %%mm2, 8(%2)\n"
"decl %0\n"
"leal (%2, %3), %2\n"
"jnz 1b\n"
:
: "c" (h), "r" (s), "r" (d), "r" (lx2)
);
#endif
}
static inline void recac_mmx(unsigned char *s, unsigned char *d, int lx2, int h)
{
#ifdef HAVE_3Dnow
__asm__ (
".align 8\n"
"1:"
"movq (%1), %%mm0\n" /* 8 s */
"movq (%2), %%mm2\n" /* 8 d */
"pavgusb %%mm2, %%mm0\n"
"addl %3, %1\n"
"movq %%mm0, (%2)\n"
"addl %3, %2\n"
"loop 1b\n"
:
: "c" (h), "r" (s), "r" (d), "r" (lx2)
);
#else /* No 3dnow */
__asm__ (
"movq MASK_AND, %%mm5\n"
"movq ADD_1, %%mm6\n"
"1:\t"
"movq (%1),%%mm0\n"
"movq (%2),%%mm1\n"
"psrlw $1,%%mm0\n"
"psrlw $1,%%mm1\n"
"pand %%mm5,%%mm0\n"
"pand %%mm5,%%mm1\n"
"paddusb %%mm1,%%mm0\n"
"paddusb %%mm6,%%mm0\n"
"addl %3,%1\n"
"movq %%mm0,(%2)\n"
"decl %0\n"
"leal (%2, %3), %2\n"
"jnz 1b\n"
:
: "c" (h), "r" (s), "r" (d), "r" (lx2)
);
#endif
}
static inline void recv_mmx(unsigned char *s, unsigned char *d, int lx, int lx2, int h)
{
#ifdef HAVE_3Dnow
__asm__(
".align 8\n"
"1:"
"movq (%1), %%mm0\n" /* 8 s */
"movq (%4), %%mm2\n" /* 8 s +lx */
"movq 8(%1), %%mm1\n" /* 8 s */
"movq 8(%4), %%mm3\n" /* 8 s +lx **/
"pavgusb %%mm2, %%mm0\n"
"addl %3, %1\n"
"pavgusb %%mm3, %%mm1\n"
"movq %%mm0, (%2)\n"
"addl %3, %4\n"
"movq %%mm1, 8(%2)\n"
"addl %3, %2\n"
"loop 1b\n"
:
: "c" (h), "r" (s), "r" (d), "r" (lx2), "r" (s +lx)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -