⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fetch_mmx.h

📁 一个很好用的MPEG1/4的开源编码器
💻 H
📖 第 1 页 / 共 3 页
字号:
    "punpcklbw %%mm7, %%mm0\n" /* unpack texture to word */		\    "punpckhbw %%mm7, %%mm1\n" /* unpack texture to word */		\    "punpcklbw %%mm7, %%mm2\n" /* unpack texture to word */		\    "punpckhbw %%mm7, %%mm3\n" /* unpack texture to word */		\    "movq %%mm0, 0x" #x "0(%1)\n"   /* store texture */			\    "movq %%mm1, 0x" #x "8(%1)\n"   /* store texture */			\    "movq %%mm2, 0x" #y "0(%1)\n"   /* store texture */			\    "movq %%mm3, 0x" #y "8(%1)\n"   /* store texture */			\    "addl %2, %0\n"            /* move one texture line down */		\    "addl %2, %0\n"            /* move one texture line down */		\    "addl %2, %3\n"            /* move one mask line down */		\    "addl %2, %3\n"            /* move one mask line down */    asm volatile (PREFETCH_Y_MASK_STEP(0, 1)		PREFETCH_Y_MASK_STEP(2, 3)		PREFETCH_Y_MASK_STEP(4, 5)		PREFETCH_Y_MASK_STEP(6, 7)		: "=r"(dummy1), "=r"(output), "=r"(pitch), "=r"(dummy2)		: "0"(input), "1"(output), "2"(pitch), "3"(mask)		: "memory");  /* TODO: bilinear filtering */}static void inline prefetch_C_withmask(unsigned char *input,				       dct_t *output,				       unsigned char *mask,				       int pitch){  int dummy1, dummy2;  int mean, count;  /* compute mean of visible pixels */  asm volatile (      "pxor %%mm7, %%mm7\n"      /* mm7 = zero        */      "pxor %%mm6, %%mm6\n"      /* mm6 = mask    accumulator */      "pxor %%mm5, %%mm5\n"      /* mm5 = texture accumulator */      /* 1st pass */      "movq (%2),     %%mm0\n"   /* load 1st mask line up left part */      "movq (%2, %1, 2), %%mm1\n"   /* load 1st mask line down left part */      "por %%mm0, %%mm1\n"       /* conservative subsample */      "movq 8(%2),     %%mm0\n"  /* load 1st mask line up right part */      "movq 8(%2, %1, 2), %%mm2\n"  /* load 1st mask line down right part */      "por %%mm0, %%mm2\n"       /* conservative subsample */      "packsswb %%mm2, %%mm1\n"  /* 0000->00 00FF->7F FF00->80 FFFF->FF */      "pcmpeqb %%mm7, %%mm1\n"   /* 0000->FF 00FF->00 FF00->00 FFFF->00 */      "pcmpeqb %%mm7, %%mm1\n"   /* 0000->00 00FF->FF FF00->FF FFFF->FF */      "addl %1, %2\n"            /* move one mask line down */      "addl %1, %2\n"                  "addl %1, %2\n"                  "addl %1, %2\n"                  "movq (%2),     %%mm0\n"   /* load 2nd mask line up left part */      "movq (%2, %1, 2), %%mm3\n"   /* load 2nd mask line down left part */      "por %%mm0, %%mm3\n"       /* conservative subsample */      "movq 8(%2),     %%mm0\n"  /* load 2nd mask line up right part */      "movq 8(%2, %1, 2), %%mm2\n"  /* load 2nd mask line down right part */      "por %%mm0, %%mm2\n"       /* conservative subsample */      "packsswb %%mm2, %%mm3\n"  /* 0000->00 00FF->7F FF00->80 FFFF->FF */      "pcmpeqb %%mm7, %%mm3\n"   /* 0000->FF 00FF->00 FF00->00 FFFF->00 */      "pcmpeqb %%mm7, %%mm3\n"   /* 0000->00 00FF->FF FF00->FF FFFF->FF */      "addl %1, %2\n"            /* move one mask line down */      "addl %1, %2\n"                  "addl %1, %2\n"                  "addl %1, %2\n"                  "movq (%0),     %%mm0\n"   /* load 1st texture line */      "movq (%0, %1), %%mm2\n"   /* load 2nd texture line */      "pand %%mm1, %%mm0\n"        /* mask 1st texture line */      "pand %%mm3, %%mm2\n"        /* mask 2nd texture line */      "psubsb %%mm1, %%mm6\n"      /* accumulate mask */      "psubsb %%mm3, %%mm6\n"      /* accumulate mask */      "movq %%mm0, %%mm1\n"      /* copy 1st texture line */      "movq %%mm2, %%mm3\n"      /* copy 2nd texture line */      "punpcklbw %%mm7, %%mm0\n" /* unpack texture to word for addition */      "punpckhbw %%mm7, %%mm1\n" /* unpack texture to word for addition */      "punpcklbw %%mm7, %%mm2\n" /* unpack texture to word for addition */      "punpckhbw %%mm7, %%mm3\n" /* unpack texture to word for addition */      "paddw %%mm0, %%mm5\n"       /* accumulate texture */      "paddw %%mm1, %%mm5\n"       /* accumulate texture */      "paddw %%mm2, %%mm5\n"       /* accumulate texture */      "paddw %%mm3, %%mm5\n"       /* accumulate texture */      "addl %1, %0\n"            /* move one texture line down */      "addl %1, %0\n"            /* move one texture line down */      /* 2nd pass */      "movq (%2),     %%mm0\n"   /* load 1st mask line up left part */      "movq (%2, %1, 2), %%mm1\n"   /* load 1st mask line down left part */      "por %%mm0, %%mm1\n"       /* conservative subsample */      "movq 8(%2),     %%mm0\n"  /* load 1st mask line up right part */      "movq 8(%2, %1, 2), %%mm2\n"  /* load 1st mask line down right part */      "por %%mm0, %%mm2\n"       /* conservative subsample */      "packsswb %%mm2, %%mm1\n"  /* 0000->00 00FF->7F FF00->80 FFFF->FF */      "pcmpeqb %%mm7, %%mm1\n"   /* 0000->FF 00FF->00 FF00->00 FFFF->00 */      "pcmpeqb %%mm7, %%mm1\n"   /* 0000->00 00FF->FF FF00->FF FFFF->FF */      "addl %1, %2\n"            /* move one mask line down */      "addl %1, %2\n"                  "addl %1, %2\n"                  "addl %1, %2\n"                  "movq (%2),     %%mm0\n"   /* load 2nd mask line up left part */      "movq (%2, %1, 2), %%mm3\n"   /* load 2nd mask line down left part */      "por %%mm0, %%mm3\n"       /* conservative subsample */      "movq 8(%2),     %%mm0\n"  /* load 2nd mask line up right part */      "movq 8(%2, %1, 2), %%mm2\n"  /* load 2nd mask line down right part */      "por %%mm0, %%mm2\n"       /* conservative subsample */      "packsswb %%mm2, %%mm3\n"  /* 0000->00 00FF->7F FF00->80 FFFF->FF */      "pcmpeqb %%mm7, %%mm3\n"   /* 0000->FF 00FF->00 FF00->00 FFFF->00 */      "pcmpeqb %%mm7, %%mm3\n"   /* 0000->00 00FF->FF FF00->FF FFFF->FF */      "addl %1, %2\n"            /* move one mask line down */      "addl %1, %2\n"                  "addl %1, %2\n"                  "addl %1, %2\n"                  "movq (%0),     %%mm0\n"   /* load 1st texture line */      "movq (%0, %1), %%mm2\n"   /* load 2nd texture line */      "pand %%mm1, %%mm0\n"        /* mask 1st texture line */      "pand %%mm3, %%mm2\n"        /* mask 2nd texture line */      "psubsb %%mm1, %%mm6\n"      /* accumulate mask */      "psubsb %%mm3, %%mm6\n"      /* accumulate mask */      "movq %%mm0, %%mm1\n"      /* copy 1st texture line */      "movq %%mm2, %%mm3\n"      /* copy 2nd texture line */      "punpcklbw %%mm7, %%mm0\n" /* unpack texture to word for addition */      "punpckhbw %%mm7, %%mm1\n" /* unpack texture to word for addition */      "punpcklbw %%mm7, %%mm2\n" /* unpack texture to word for addition */      "punpckhbw %%mm7, %%mm3\n" /* unpack texture to word for addition */      "paddw %%mm0, %%mm5\n"       /* accumulate texture */      "paddw %%mm1, %%mm5\n"       /* accumulate texture */      "paddw %%mm2, %%mm5\n"       /* accumulate texture */      "paddw %%mm3, %%mm5\n"       /* accumulate texture */      "addl %1, %0\n"            /* move one texture line down */      "addl %1, %0\n"            /* move one texture line down */      /* 3rd pass */      "movq (%2),     %%mm0\n"   /* load 1st mask line up left part */      "movq (%2, %1, 2), %%mm1\n"   /* load 1st mask line down left part */      "por %%mm0, %%mm1\n"       /* conservative subsample */      "movq 8(%2),     %%mm0\n"  /* load 1st mask line up right part */      "movq 8(%2, %1, 2), %%mm2\n"  /* load 1st mask line down right part */      "por %%mm0, %%mm2\n"       /* conservative subsample */      "packsswb %%mm2, %%mm1\n"  /* 0000->00 00FF->7F FF00->80 FFFF->FF */      "pcmpeqb %%mm7, %%mm1\n"   /* 0000->FF 00FF->00 FF00->00 FFFF->00 */      "pcmpeqb %%mm7, %%mm1\n"   /* 0000->00 00FF->FF FF00->FF FFFF->FF */      "movq %%mm2, %%mm1\n"      "addl %1, %2\n"            /* move one mask line down */      "addl %1, %2\n"                  "addl %1, %2\n"                  "addl %1, %2\n"                  "movq (%2),     %%mm0\n"   /* load 2nd mask line up left part */      "movq (%2, %1, 2), %%mm3\n"   /* load 2nd mask line down left part */      "por %%mm0, %%mm3\n"       /* conservative subsample */      "movq 8(%2),     %%mm0\n"  /* load 2nd mask line up right part */      "movq 8(%2, %1, 2), %%mm2\n"  /* load 2nd mask line down right part */      "por %%mm0, %%mm2\n"       /* conservative subsample */      "packsswb %%mm2, %%mm3\n"  /* 0000->00 00FF->7F FF00->80 FFFF->FF */      "pcmpeqb %%mm7, %%mm3\n"   /* 0000->FF 00FF->00 FF00->00 FFFF->00 */      "pcmpeqb %%mm7, %%mm3\n"   /* 0000->00 00FF->FF FF00->FF FFFF->FF */      "addl %1, %2\n"            /* move one mask line down */      "addl %1, %2\n"                  "addl %1, %2\n"                  "addl %1, %2\n"                  "movq (%0),     %%mm0\n"   /* load 1st texture line */      "movq (%0, %1), %%mm2\n"   /* load 2nd texture line */      "pand %%mm1, %%mm0\n"        /* mask 1st texture line */      "pand %%mm3, %%mm2\n"        /* mask 2nd texture line */      "psubsb %%mm1, %%mm6\n"      /* accumulate mask */      "psubsb %%mm3, %%mm6\n"      /* accumulate mask */      "movq %%mm0, %%mm1\n"      /* copy 1st texture line */      "movq %%mm2, %%mm3\n"      /* copy 2nd texture line */      "punpcklbw %%mm7, %%mm0\n" /* unpack texture to word for addition */      "punpckhbw %%mm7, %%mm1\n" /* unpack texture to word for addition */      "punpcklbw %%mm7, %%mm2\n" /* unpack texture to word for addition */      "punpckhbw %%mm7, %%mm3\n" /* unpack texture to word for addition */      "paddw %%mm0, %%mm5\n"       /* accumulate texture */      "paddw %%mm1, %%mm5\n"       /* accumulate texture */      "paddw %%mm2, %%mm5\n"       /* accumulate texture */      "paddw %%mm3, %%mm5\n"       /* accumulate texture */      "addl %1, %0\n"            /* move one texture line down */      "addl %1, %0\n"            /* move one texture line down */      /* 4th pass */      "movq (%2),     %%mm0\n"   /* load 1st mask line up left part */      "movq (%2, %1, 2), %%mm1\n"   /* load 1st mask line down left part */      "por %%mm0, %%mm1\n"       /* conservative subsample */      "movq 8(%2),     %%mm0\n"  /* load 1st mask line up right part */      "movq 8(%2, %1, 2), %%mm2\n"  /* load 1st mask line down right part */      "por %%mm0, %%mm2\n"       /* conservative subsample */      "packsswb %%mm2, %%mm1\n"  /* 0000->00 00FF->7F FF00->80 FFFF->FF */      "pcmpeqb %%mm7, %%mm1\n"   /* 0000->FF 00FF->00 FF00->00 FFFF->00 */      "pcmpeqb %%mm7, %%mm1\n"   /* 0000->00 00FF->FF FF00->FF FFFF->FF */      "addl %1, %2\n"            /* move one mask line down */      "addl %1, %2\n"                  "addl %1, %2\n"                  "addl %1, %2\n"                  "movq (%2),     %%mm0\n"   /* load 2nd mask line up left part */      "movq (%2, %1, 2), %%mm3\n"   /* load 2nd mask line down left part */      "por %%mm0, %%mm3\n"       /* conservative subsample */      "movq 8(%2),     %%mm0\n"  /* load 2nd mask line up right part */      "movq 8(%2, %1, 2), %%mm2\n"  /* load 2nd mask line down right part */      "por %%mm0, %%mm2\n"       /* conservative subsample */      "packsswb %%mm2, %%mm3\n"  /* 0000->00 00FF->7F FF00->80 FFFF->FF */      "pcmpeqb %%mm7, %%mm3\n"   /* 0000->FF 00FF->00 FF00->00 FFFF->00 */      "pcmpeqb %%mm7, %%mm3\n"   /* 0000->00 00FF->FF FF00->FF FFFF->FF */      "addl %1, %2\n"            /* move one mask line down */      "addl %1, %2\n"                  "addl %1, %2\n"                  "addl %1, %2\n"                  "movq (%0),     %%mm0\n"   /* load 1st texture line */      "movq (%0, %1), %%mm2\n"   /* load 2nd texture line */      "pand %%mm1, %%mm0\n"        /* mask 1st texture line */      "pand %%mm3, %%mm2\n"        /* mask 2nd texture line */      "psubsb %%mm1, %%mm6\n"      /* accumulate mask */      "psubsb %%mm3, %%mm6\n"      /* accumulate mask */      "movq %%mm0, %%mm1\n"      /* copy 1st texture line */      "movq %%mm2, %%mm3\n"      /* copy 2nd texture line */      "punpcklbw %%mm7, %%mm0\n" /* unpack texture to word for addition */      "punpckhbw %%mm7, %%mm1\n" /* unpack texture to word for addition */      "punpcklbw %%mm7, %%mm2\n" /* unpack texture to word for addition */      "punpckhbw %%mm7, %%mm3\n" /* unpack texture to word for addition */      "paddw %%mm0, %%mm5\n"       /* accumulate texture */      "paddw %%mm1, %%mm5\n"       /* accumulate texture */      "paddw %%mm2, %%mm5\n"       /* accumulate texture */      "paddw %%mm3, %%mm5\n"       /* accumulate texture */      "addl %1, %0\n"            /* move one texture line down */      "addl %1, %0\n"            /* move one texture line down */      : "=r"(dummy1), "=r"(pitch), "=r"(dummy2)      : "0"(input), "1"(pitch), "2"(mask)      : "memory");  asm volatile (/* line accumulation */      "movq %%mm6, %%mm0\n"       /* copy column pixel counts */      "psrlq $32, %%mm6\n"        /* move high dword low */      "paddb %%mm6, %%mm0\n"      /* sum column counts */      "movd %%mm0, %%eax\n"       /* get 4 packed counts to register eax */      "movl %%eax, %%ecx\n"       /* copy register eax to register ecx */      "bswap %%ecx\n"             /* swap temporary register ecx */      "addw %%cx, %%ax\n"         /* sum low words */      "addb %%ah, %%al\n"         /* sum low bytes */      "andl $0x000000ff, %%eax\n" /* mask final value */      "movq %%mm5, %%mm1\n"       /* copy column pixel partial sums */      "psrlq $32, %%mm5\n"        /* move high dword low */      "paddw %%mm5, %%mm1\n"      /* sum partial sums */      "movq %%mm1, %%mm0\n"       /* copy column pixel partial sums */      "psrlq $16, %%mm0\n"         /* move high word low */      "paddw %%mm0, %%mm1\n"      /* sum partial sums */      "movd %%mm1, %%ecx\n"       /* get result in ecx */      "andl $0x0000ffff, %%ecx\n" /* mask final value */      : "=a" (count), "=c" (mean)      );  if(count) mean /= count;  /* replicate mean for padding */  asm volatile (      "movd %%ecx, %%mm6\n"       /* mm6 will hold mean value */      "punpcklbw %%mm6, %%mm6\n"  /* replicate mean to dword */      "punpcklwd %%mm6, %%mm6\n"  /* replicate mean to dword */      "punpckldq %%mm6, %%mm6\n"  /* replicate mean to qword */      :      : "c" (mean));

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -