📄 fetch_mmx.h
字号:
/* fetch and fill empty pixels with mean value */ asm volatile ( /* 1st pass */ "movq (%3), %%mm0\n" /* load 1st mask line up left part */ "movq (%3, %2, 2), %%mm1\n" /* load 1st mask line down left part */ "por %%mm0, %%mm1\n" /* conservative subsample */ "movq 8(%3), %%mm0\n" /* load 1st mask line up right part */ "movq 8(%3, %2, 2), %%mm2\n" /* load 1st mask line down right part */ "por %%mm0, %%mm2\n" /* conservative subsample */ "packsswb %%mm2, %%mm1\n" /* 0000->00 00FF->7F FF00->80 FFFF->FF */ "pcmpeqb %%mm7, %%mm1\n" /* 0000->FF 00FF->00 FF00->00 FFFF->00 */ "pcmpeqb %%mm7, %%mm1\n" /* 0000->00 00FF->FF FF00->FF FFFF->FF */ "addl %2, %3\n" /* move one mask line down */ "addl %2, %3\n" "addl %2, %3\n" "addl %2, %3\n" "movq (%3), %%mm0\n" /* load 2nd mask line up left part */ "movq (%3, %2, 2), %%mm3\n" /* load 2nd mask line down left part */ "por %%mm0, %%mm3\n" /* conservative subsample */ "movq 8(%3), %%mm0\n" /* load 2nd mask line up right part */ "movq 8(%3, %2, 2), %%mm2\n" /* load 2nd mask line down right part */ "por %%mm0, %%mm2\n" /* conservative subsample */ "packsswb %%mm2, %%mm3\n" /* 0000->00 00FF->7F FF00->80 FFFF->FF */ "pcmpeqb %%mm7, %%mm3\n" /* 0000->FF 00FF->00 FF00->00 FFFF->00 */ "pcmpeqb %%mm7, %%mm3\n" /* 0000->00 00FF->FF FF00->FF FFFF->FF */ "addl %2, %3\n" /* move one mask line down */ "addl %2, %3\n" "addl %2, %3\n" "addl %2, %3\n" "movq (%0), %%mm0\n" /* load 1st texture line */ "movq (%0, %2), %%mm2\n" /* load 2nd texture line */ "pand %%mm1, %%mm0\n" /* mask 1st texture line */ "pand %%mm3, %%mm2\n" /* mask 2nd texture line */ "pcmpeqb %%mm7, %%mm1\n" /* invert mask */ "pcmpeqb %%mm7, %%mm3\n" /* invert mask */ "movq %%mm6, %%mm4\n" /* load mean value */ "pand %%mm1, %%mm4\n" /* mask mean for 1st texture line */ "por %%mm4, %%mm0\n" /* join texture and mean */ "movq %%mm6, %%mm4\n" /* load mean value */ "pand %%mm3, %%mm4\n" /* mask mean for 2nd texture line */ "por %%mm4, %%mm2\n" /* join texture and mean */ "movq %%mm0, %%mm1\n" /* copy 1st texture line */ "movq %%mm2, %%mm3\n" /* copy 2nd texture line */ "punpcklbw %%mm7, %%mm0\n" /* unpack texture to word */ "punpckhbw %%mm7, %%mm1\n" /* unpack texture to word */ "punpcklbw %%mm7, %%mm2\n" /* unpack texture to word */ "punpckhbw %%mm7, %%mm3\n" /* unpack texture to word */ "movq %%mm0, 0x00(%1)\n" /* store texture */ "movq %%mm1, 0x08(%1)\n" /* store texture */ "movq %%mm2, 0x10(%1)\n" /* store texture */ "movq %%mm3, 0x18(%1)\n" /* store texture */ "addl %2, %0\n" /* move one texture line down */ "addl %2, %0\n" /* move one texture line down */ /* 2nd pass */ "movq (%3), %%mm0\n" /* load 1st mask line up left part */ "movq (%3, %2, 2), %%mm1\n" /* load 1st mask line down left part */ "por %%mm0, %%mm1\n" /* conservative subsample */ "movq 8(%3), %%mm0\n" /* load 1st mask line up right part */ "movq 8(%3, %2, 2), %%mm2\n" /* load 1st mask line down right part */ "por %%mm0, %%mm2\n" /* conservative subsample */ "packsswb %%mm2, %%mm1\n" /* 0000->00 00FF->7F FF00->80 FFFF->FF */ "pcmpeqb %%mm7, %%mm1\n" /* 0000->FF 00FF->00 FF00->00 FFFF->00 */ "pcmpeqb %%mm7, %%mm1\n" /* 0000->00 00FF->FF FF00->FF FFFF->FF */ "addl %2, %3\n" /* move one mask line down */ "addl %2, %3\n" "addl %2, %3\n" "addl %2, %3\n" "movq (%3), %%mm0\n" /* load 2nd mask line up left part */ "movq (%3, %2, 2), %%mm3\n" /* load 2nd mask line down left part */ "por %%mm0, %%mm3\n" /* conservative subsample */ "movq 8(%3), %%mm0\n" /* load 2nd mask line up right part */ "movq 8(%3, %2, 2), %%mm2\n" /* load 2nd mask line down right part */ "por %%mm0, %%mm2\n" /* conservative subsample */ "packsswb %%mm2, %%mm3\n" /* 0000->00 00FF->7F FF00->80 FFFF->FF */ "pcmpeqb %%mm7, %%mm3\n" /* 0000->FF 00FF->00 FF00->00 FFFF->00 */ "pcmpeqb %%mm7, %%mm3\n" /* 0000->00 00FF->FF FF00->FF FFFF->FF */ "addl %2, %3\n" /* move one mask line down */ "addl %2, %3\n" "addl %2, %3\n" "addl %2, %3\n" "movq (%0), %%mm0\n" /* load 1st texture line */ "movq (%0, %2), %%mm2\n" /* load 2nd texture line */ "pand %%mm1, %%mm0\n" /* mask 1st texture line */ "pand %%mm3, %%mm2\n" /* mask 2nd texture line */ "pcmpeqb %%mm7, %%mm1\n" /* invert mask */ "pcmpeqb %%mm7, %%mm3\n" /* invert mask */ "movq %%mm6, %%mm4\n" /* load mean value */ "pand %%mm1, %%mm4\n" /* mask mean for 1st texture line */ "por %%mm4, %%mm0\n" /* join texture and mean */ "movq %%mm6, %%mm4\n" /* load mean value */ "pand %%mm3, %%mm4\n" /* mask mean for 2nd texture line */ "por %%mm4, %%mm2\n" /* join texture and mean */ "movq %%mm0, %%mm1\n" /* copy 1st texture line */ "movq %%mm2, %%mm3\n" /* copy 2nd texture line */ "punpcklbw %%mm7, %%mm0\n" /* unpack texture to word */ "punpckhbw %%mm7, %%mm1\n" /* unpack texture to word */ "punpcklbw %%mm7, %%mm2\n" /* unpack texture to word */ "punpckhbw %%mm7, %%mm3\n" /* unpack texture to word */ "movq %%mm0, 0x20(%1)\n" /* store texture */ "movq %%mm1, 0x28(%1)\n" /* store texture */ "movq %%mm2, 0x30(%1)\n" /* store texture */ "movq %%mm3, 0x38(%1)\n" /* store texture */ "addl %2, %0\n" /* move one texture line down */ "addl %2, %0\n" /* move one texture line down */ /* 3rd pass */ "movq (%3), %%mm0\n" /* load 1st mask line up left part */ "movq (%3, %2, 2), %%mm1\n" /* load 1st mask line down left part */ "por %%mm0, %%mm1\n" /* conservative subsample */ "movq 8(%3), %%mm0\n" /* load 1st mask line up right part */ "movq 8(%3, %2, 2), %%mm2\n" /* load 1st mask line down right part */ "por %%mm0, %%mm2\n" /* conservative subsample */ "packsswb %%mm2, %%mm1\n" /* 0000->00 00FF->7F FF00->80 FFFF->FF */ "pcmpeqb %%mm7, %%mm1\n" /* 0000->FF 00FF->00 FF00->00 FFFF->00 */ "pcmpeqb %%mm7, %%mm1\n" /* 0000->00 00FF->FF FF00->FF FFFF->FF */ "addl %2, %3\n" /* move one mask line down */ "addl %2, %3\n" "addl %2, %3\n" "addl %2, %3\n" "movq (%3), %%mm0\n" /* load 2nd mask line up left part */ "movq (%3, %2, 2), %%mm3\n" /* load 2nd mask line down left part */ "por %%mm0, %%mm3\n" /* conservative subsample */ "movq 8(%3), %%mm0\n" /* load 2nd mask line up right part */ "movq 8(%3, %2, 2), %%mm2\n" /* load 2nd mask line down right part */ "por %%mm0, %%mm2\n" /* conservative subsample */ "packsswb %%mm2, %%mm3\n" /* 0000->00 00FF->7F FF00->80 FFFF->FF */ "pcmpeqb %%mm7, %%mm3\n" /* 0000->FF 00FF->00 FF00->00 FFFF->00 */ "pcmpeqb %%mm7, %%mm3\n" /* 0000->00 00FF->FF FF00->FF FFFF->FF */ "addl %2, %3\n" /* move one mask line down */ "addl %2, %3\n" "addl %2, %3\n" "addl %2, %3\n" "movq (%0), %%mm0\n" /* load 1st texture line */ "movq (%0, %2), %%mm2\n" /* load 2nd texture line */ "pand %%mm1, %%mm0\n" /* mask 1st texture line */ "pand %%mm3, %%mm2\n" /* mask 2nd texture line */ "pcmpeqb %%mm7, %%mm1\n" /* invert mask */ "pcmpeqb %%mm7, %%mm3\n" /* invert mask */ "movq %%mm6, %%mm4\n" /* load mean value */ "pand %%mm1, %%mm4\n" /* mask mean for 1st texture line */ "por %%mm4, %%mm0\n" /* join texture and mean */ "movq %%mm6, %%mm4\n" /* load mean value */ "pand %%mm3, %%mm4\n" /* mask mean for 2nd texture line */ "por %%mm4, %%mm2\n" /* join texture and mean */ "movq %%mm0, %%mm1\n" /* copy 1st texture line */ "movq %%mm2, %%mm3\n" /* copy 2nd texture line */ "punpcklbw %%mm7, %%mm0\n" /* unpack texture to word */ "punpckhbw %%mm7, %%mm1\n" /* unpack texture to word */ "punpcklbw %%mm7, %%mm2\n" /* unpack texture to word */ "punpckhbw %%mm7, %%mm3\n" /* unpack texture to word */ "movq %%mm0, 0x40(%1)\n" /* store texture */ "movq %%mm1, 0x48(%1)\n" /* store texture */ "movq %%mm2, 0x50(%1)\n" /* store texture */ "movq %%mm3, 0x58(%1)\n" /* store texture */ "addl %2, %0\n" /* move one texture line down */ "addl %2, %0\n" /* move one texture line down */ /* 4th pass */ "movq (%3), %%mm0\n" /* load 1st mask line up left part */ "movq (%3, %2, 2), %%mm1\n" /* load 1st mask line down left part */ "por %%mm0, %%mm1\n" /* conservative subsample */ "movq 8(%3), %%mm0\n" /* load 1st mask line up right part */ "movq 8(%3, %2, 2), %%mm2\n" /* load 1st mask line down right part */ "por %%mm0, %%mm2\n" /* conservative subsample */ "packsswb %%mm2, %%mm1\n" /* 0000->00 00FF->7F FF00->80 FFFF->FF */ "pcmpeqb %%mm7, %%mm1\n" /* 0000->FF 00FF->00 FF00->00 FFFF->00 */ "pcmpeqb %%mm7, %%mm1\n" /* 0000->00 00FF->FF FF00->FF FFFF->FF */ "addl %2, %3\n" /* move one mask line down */ "addl %2, %3\n" "addl %2, %3\n" "addl %2, %3\n" "movq (%3), %%mm0\n" /* load 2nd mask line up left part */ "movq (%3, %2, 2), %%mm3\n" /* load 2nd mask line down left part */ "por %%mm0, %%mm3\n" /* conservative subsample */ "movq 8(%3), %%mm0\n" /* load 2nd mask line up right part */ "movq 8(%3, %2, 2), %%mm2\n" /* load 2nd mask line down right part */ "por %%mm0, %%mm2\n" /* conservative subsample */ "packsswb %%mm2, %%mm3\n" /* 0000->00 00FF->7F FF00->80 FFFF->FF */ "pcmpeqb %%mm7, %%mm3\n" /* 0000->FF 00FF->00 FF00->00 FFFF->00 */ "pcmpeqb %%mm7, %%mm3\n" /* 0000->00 00FF->FF FF00->FF FFFF->FF */ "addl %2, %3\n" /* move one mask line down */ "addl %2, %3\n" "addl %2, %3\n" "addl %2, %3\n" "movq (%0), %%mm0\n" /* load 1st texture line */ "movq (%0, %2), %%mm2\n" /* load 2nd texture line */ "pand %%mm1, %%mm0\n" /* mask 1st texture line */ "pand %%mm3, %%mm2\n" /* mask 2nd texture line */ "pcmpeqb %%mm7, %%mm1\n" /* invert mask */ "pcmpeqb %%mm7, %%mm3\n" /* invert mask */ "movq %%mm6, %%mm4\n" /* load mean value */ "pand %%mm1, %%mm4\n" /* mask mean for 1st texture line */ "por %%mm4, %%mm0\n" /* join texture and mean */ "movq %%mm6, %%mm4\n" /* load mean value */ "pand %%mm3, %%mm4\n" /* mask mean for 2nd texture line */ "por %%mm4, %%mm2\n" /* join texture and mean */ "movq %%mm0, %%mm1\n" /* copy 1st texture line */ "movq %%mm2, %%mm3\n" /* copy 2nd texture line */ "punpcklbw %%mm7, %%mm0\n" /* unpack texture to word */ "punpckhbw %%mm7, %%mm1\n" /* unpack texture to word */ "punpcklbw %%mm7, %%mm2\n" /* unpack texture to word */ "punpckhbw %%mm7, %%mm3\n" /* unpack texture to word */ "movq %%mm0, 0x60(%1)\n" /* store texture */ "movq %%mm1, 0x68(%1)\n" /* store texture */ "movq %%mm2, 0x70(%1)\n" /* store texture */ "movq %%mm3, 0x78(%1)\n" /* store texture */ "addl %2, %0\n" /* move one texture line down */ "addl %2, %0\n" /* move one texture line down */ : "=r"(dummy1), "=r"(output), "=r"(pitch), "=r"(dummy2) : "0"(input), "1"(output), "2"(pitch), "3"(mask) : "memory"); /* TODO: bilinear filtering */}static void inline diff(unsigned char *input, unsigned char *ref, dct_t *output, int ipitch, int rpitch){ int dummy1, dummy2; /* simple diff */#define DIFF_STEP(x) \ "movq (%0), %%mm0\n" \ "movq (%3), %%mm2\n" \ "movq %%mm0, %%mm1\n" \ "movq %%mm2, %%mm3\n" \ "punpcklbw %%mm7, %%mm0\n" \ "punpckhbw %%mm7, %%mm1\n" \ "punpcklbw %%mm7, %%mm2\n" \ "punpckhbw %%mm7, %%mm3\n" \ "psubsw %%mm2, %%mm0\n" \ "psubsw %%mm3, %%mm1\n" \ "movq %%mm0, 0x" #x "0(%1)\n" \ "movq %%mm1, 0x" #x "8(%1)\n" \ "addl %2, %0\n" \ "addl %4, %3\n" asm volatile ("pxor %%mm7, %%mm7\n" DIFF_STEP(0) DIFF_STEP(1) DIFF_STEP(2) DIFF_STEP(3) DIFF_STEP(4) DIFF_STEP(5) DIFF_STEP(6) DIFF_STEP(7) : "=r"(dummy1), "=r"(output), "=r"(ipitch), "=r"(dummy2), "=r"(rpitch) : "0"(input), "1"(output), "2"(ipitch), "3"(ref), "4"(rpitch) : "memory");}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -