⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 generic_mmx.h

📁 DirectFB-1.0.1可用于linux的嵌入式GUI
💻 H
📖 第 1 页 / 共 2 页
字号:
     static const u32 einser[] = { 0x01000100, 0x01000100 };     static const u32 zeros[]  = { 0, 0 };     __asm__ __volatile__ (	       "movq     %3, %%mm7\n\t"               "cmp      $0, %2\n\t"               "jne      3f\n\t"               "movq     %4, %%mm6\n\t"               "movd     %5, %%mm0\n\t"               "punpcklbw %%mm6, %%mm0\n\t" /* mm0 = 00aa 00rr 00gg 00bb */               "punpcklwd %%mm0, %%mm0\n\t" /* mm0 = 00aa 00aa xxxx xxxx */               "movq      %%mm7, %%mm1\n\t"               "punpckldq %%mm0, %%mm0\n\t" /* mm0 = 00aa 00aa 00aa 00aa */               "psubw     %%mm0, %%mm1\n\t"               ".align   16\n"               "4:\n\t"                 /* blend from color */               "testw    $0xF000, 6(%0)\n\t"               "jnz      1f\n\t"               "movq     (%0), %%mm0\n\t"               "pmullw   %%mm1, %%mm0\n\t"               "psrlw    $8, %%mm0\n\t"               "movq     %%mm0, (%0)\n"               "1:\n\t"	       "add      $8, %0\n\t"               "dec      %1\n\t"               "jnz      4b\n\t"               "jmp      2f\n\t"               ".align   16\n"               "3:\n\t"                      /* blend from Sacc */               "testw    $0xF000, 6(%0)\n\t"               "jnz      1f\n\t"               "movq     (%2), %%mm2\n\t"               "movq     (%0), %%mm0\n\t"	       "punpckhwd %%mm2, %%mm2\n\t" /* mm2 = 00aa 00aa xxxx xxxx */               "movq	  %%mm7, %%mm1\n\t"               "punpckhdq %%mm2, %%mm2\n\t" /* mm2 = 00aa 00aa 00aa 00aa */               "psubw    %%mm2, %%mm1\n\t"               "pmullw   %%mm1, %%mm0\n\t"               "psrlw    $8, %%mm0\n\t"               "movq     %%mm0, (%0)\n"               "1:\n\t"	       "add      $8, %2\n\t"	       "add      $8, %0\n\t"               "dec      %1\n\t"               "jnz      3b\n\t"               "2:\n\t"               "emms"               : /* no outputs */               : "D" (gfxs->Xacc), "c" (gfxs->length), "S" (gfxs->Sacc),                 "m" (*einser), "m" (*zeros), "m" (gfxs->color)               : "%st", "memory");}static void Xacc_blend_srcalpha_MMX( GenefxState *gfxs ){     static const u32 ones[]  = { 0x00010001, 0x00010001 };     static const u32 zeros[] = { 0, 0 };     __asm__ __volatile__ (	       "movq     %3, %%mm7\n\t"               "cmp      $0, %2\n\t"               "jne      3f\n\t"               "movq     %4, %%mm6\n\t"               "movd     %5, %%mm0\n\t"               "punpcklbw %%mm6, %%mm0\n\t" /* mm0 = 00aa 00rr 00gg 00bb */               "punpcklwd %%mm0, %%mm0\n\t" /* mm0 = 00aa 00aa xxxx xxxx */               "punpckldq %%mm0, %%mm0\n\t" /* mm0 = 00aa 00aa 00aa 00aa */               "paddw     %%mm7, %%mm0\n\t"               ".align   16\n"               "4:\n\t"                 /* blend from color */               "testw    $0xF000, 6(%0)\n\t"               "jnz      1f\n\t"               "movq     (%0), %%mm1\n\t"               "pmullw   %%mm0, %%mm1\n\t"               "psrlw    $8, %%mm1\n\t"               "movq     %%mm1, (%0)\n"               "1:\n\t"	       "add      $8, %0\n\t"               "dec      %1\n\t"               "jnz      4b\n\t"               "jmp      2f\n\t"               ".align   16\n"               "3:\n\t"                      /* blend from Sacc */               "testw    $0xF000, 6(%0)\n\t"               "jnz      1f\n\t"               "movq     (%2), %%mm0\n\t"               "movq     (%0), %%mm1\n\t"	       "punpckhwd %%mm0, %%mm0\n\t" /* mm2 = 00aa 00aa xxxx xxxx */               "punpckhdq %%mm0, %%mm0\n\t" /* mm2 = 00aa 00aa 00aa 00aa */               "paddw    %%mm7, %%mm0\n\t"               "pmullw   %%mm0, %%mm1\n\t"               "psrlw    $8, %%mm1\n\t"               "movq     %%mm1, (%0)\n"               "1:\n\t"	       "add      $8, %2\n\t"	       "add      $8, %0\n\t"               "dec      %1\n\t"               "jnz      3b\n\t"               "2:\n\t"               "emms"               : /* no outputs */               : "D" (gfxs->Xacc), "c" (gfxs->length), "S" (gfxs->Sacc),                 "m" (*ones), "m" (*zeros), "m" (gfxs->color)               : "%st", "memory");}static void Dacc_YCbCr_to_RGB_MMX( GenefxState *gfxs ){       static const u16 __aligned(8) sub0[4] = {  16,  16,  16,  16 };      static const u16 __aligned(8) sub1[4] = { 128, 128, 128, 128 };     static const s16 __aligned(8) mul[20] = {                     0x253F,  0x253F,  0x253F,  0x253F, // Y       Coeff.                     0x3312,  0x3312,  0x3312,  0x3312, // V Red   Coeff.                     0x4093,  0x4093,  0x4093,  0x4093, // U Blue  Coeff.                    -0x1A04, -0x1A04, -0x1A04, -0x1A04, // V Green Coeff.                    -0x0C83, -0x0C83, -0x0C83, -0x0C83  // U Green Coeff.     };     int                w = gfxs->length & 3;     GenefxAccumulator *D = gfxs->Dacc;          __asm__ __volatile__ (          "shrl          $2,    %1\n\t"          "jz            2f\n\t"          "pxor       %%mm7, %%mm7\n\t"               ".align 16\n"               "1:\n\t"               "movq        (%0), %%mm0\n\t" // 00 a0 00 y0 00 v0 00 u0               "movq       8(%0), %%mm1\n\t" // 00 a1 00 y1 00 v1 00 u1               "movq      16(%0), %%mm2\n\t" // 00 a2 00 y2 00 v2 00 u2               "movq      24(%0), %%mm3\n\t" // 00 a3 00 y3 00 v3 00 u3               "movq       %%mm0, %%mm4\n\t" // 00 a0 00 y0 00 v0 00 u0               "movq       %%mm2, %%mm5\n\t" // 00 a2 00 y2 00 v2 00 u2               "punpcklwd  %%mm1, %%mm0\n\t" // 00 v1 00 v0 00 u1 00 u0               "punpcklwd  %%mm3, %%mm2\n\t" // 00 v3 00 v2 00 u3 00 u2               "punpckhwd  %%mm1, %%mm4\n\t" // 00 a1 00 a0 00 y1 00 y0               "punpckhwd  %%mm3, %%mm5\n\t" // 00 a3 00 a2 00 y3 00 y2               "movq       %%mm0, %%mm1\n\t" // 00 v1 00 v1 00 u1 00 u0               "movq       %%mm4, %%mm3\n\t" // 00 a1 00 a0 00 y1 00 y0               "punpckldq  %%mm2, %%mm0\n\t" // 00 u3 00 u2 00 u1 00 u0               "punpckldq  %%mm5, %%mm3\n\t" // 00 y3 00 y2 00 y1 00 y0               "punpckhdq  %%mm2, %%mm1\n\t" // 00 v3 00 v2 00 v1 00 v0               "punpckhdq  %%mm5, %%mm4\n\t" // 00 a3 00 a2 00 a1 00 a0               /* mm0 = u, mm1 = v, mm3 = y, mm4 = a */               "psubw         %2, %%mm3\n\t" // y -= 16               "psllw         $3, %%mm3\n\t" // precision               "pmulhw      (%4), %%mm3\n\t"               "psubw         %3, %%mm1\n\t" // v -= 128                "psllw         $3, %%mm1\n\t" // precision               "movq       %%mm1, %%mm2\n\t" // 00 v3 00 v2 00 v1 00 v0               "pmulhw     8(%4), %%mm2\n\t" // vr               "psubw         %3, %%mm0\n\t" // u -= 128                "psllw         $3, %%mm0\n\t" // precision               "movq       %%mm0, %%mm5\n\t" // 00 u3 00 u2 00 u1 00 u0               "pmulhw    16(%4), %%mm5\n\t" // ub               "paddw      %%mm3, %%mm2\n\t" // 00 r3 00 r2 00 r1 00 r0               "paddw      %%mm3, %%mm5\n\t" // 00 b3 00 b2 00 b1 00 b0               "pmulhw    24(%4), %%mm1\n\t" // vg               "packuswb   %%mm2, %%mm2\n\t" // r3 r2 r1 r0 r3 r2 r1 r0               "packuswb   %%mm5, %%mm5\n\t" // b3 b2 b1 b0 b3 b2 b1 b0               "pmulhw    32(%4), %%mm0\n\t" // ug               "punpcklbw  %%mm7, %%mm2\n\t" // 00 r3 00 r2 00 r1 00 r0               "punpcklbw  %%mm7, %%mm5\n\t" // 00 b3 00 b2 00 b1 00 b0               "paddw      %%mm1, %%mm3\n\t" // y + vg               "paddw      %%mm0, %%mm3\n\t" // 00 g3 00 g2 00 g1 00 g0               "packuswb   %%mm3, %%mm3\n\t" // g3 g2 g1 g0 g3 g2 g1 g0               "punpcklbw  %%mm7, %%mm3\n\t" // 00 g3 00 g2 00 g1 00 g0               /* mm5 = b, mm3 = g, mm2 = r, mm4 = a */               "movq       %%mm5, %%mm0\n\t" // 00 b3 00 b2 00 b1 00 b0               "movq       %%mm3, %%mm1\n\t" // 00 g3 00 g2 00 g1 00 g0               "punpcklwd  %%mm2, %%mm0\n\t" // 00 r1 00 b1 00 r0 00 b0               "punpcklwd  %%mm4, %%mm1\n\t" // 00 a1 00 g1 00 a0 00 g0               "punpckhwd  %%mm2, %%mm5\n\t" // 00 r3 00 b3 00 r2 00 b2               "punpckhwd  %%mm4, %%mm3\n\t" // 00 a3 00 g3 00 a2 00 g2               "movq       %%mm0, %%mm2\n\t" // 00 r1 00 b1 00 r0 00 b0               "movq       %%mm5, %%mm4\n\t" // 00 r3 00 b3 00 r2 00 b2               "punpcklwd  %%mm1, %%mm0\n\t" // 00 a0 00 r0 00 g0 00 b0               "punpcklwd  %%mm3, %%mm5\n\t" // 00 a2 00 r2 00 g2 00 b2               "punpckhwd  %%mm1, %%mm2\n\t" // 00 a1 00 r1 00 g1 00 b1               "punpckhwd  %%mm3, %%mm4\n\t" // 00 a3 00 r3 00 g3 00 b3               "movq       %%mm0,  (%0)\n\t"               "movq       %%mm2, 8(%0)\n\t"               "movq       %%mm5,16(%0)\n\t"               "movq       %%mm4,24(%0)\n\t"               "add          $32,    %0\n\t"               "decl          %1\n\t"               "jnz           1b\n\t"          "emms\n\t"          "2:"               : "=&D" (D)          : "c" (gfxs->length), "m" (*sub0), "m" (*sub1), "r" (mul), "0" (D)          : "memory" );     while (w) {          if (!(D->YUV.a & 0xF000)) {               u16 y, cb, cr;               s16 r, g, b;               y  = y_for_rgb[D->YUV.y];               cb = D->YUV.u;               cr = D->YUV.v;               r  = y + cr_for_r[cr];               g  = y + cr_for_g[cr] + cb_for_g[cb];               b  = y                + cb_for_b[cb];                              D->RGB.r = (r < 0) ? 0 : r;               D->RGB.g = (g < 0) ? 0 : g;               D->RGB.b = (b < 0) ? 0 : b;          }          D++;          w--;     }}static void Dacc_RGB_to_YCbCr_MMX( GenefxState *gfxs ){     static const u16 __aligned(8) add0[4] = { 128, 128, 128, 128 };     static const u16 __aligned(8) add1[4] = {  16,  16,  16,  16 };     static const u16 __aligned(8) mul[24] = {                    0x03A5, 0x03A5, 0x03A5, 0x03A5, // Eb                    0x12C8, 0x12C8, 0x12C8, 0x12C8, // Eg                    0x0991, 0x0991, 0x0991, 0x0991, // Er                    0x0FE1, 0x0FE1, 0x0FE1, 0x0FE1, // Cb                    0x140A, 0x140A, 0x140A, 0x140A, // Cr                    0x1B7B, 0x1B7B, 0x1B7B, 0x1B7B  // Y     };           int                w = gfxs->length & 3;     GenefxAccumulator *D = gfxs->Dacc;          __asm__ __volatile__(          "shrl          $2,    %1\n\t"          "jz            2f\n\t"           "pxor       %%mm7, %%mm7\n\t"                    ".align 16\n"               "1:\n\t"               "movq        (%0), %%mm0\n\t" // 00 a0 00 r0 00 g0 00 b0               "movq       8(%0), %%mm1\n\t" // 00 a1 00 r1 00 g1 00 b1               "movq      16(%0), %%mm2\n\t" // 00 a2 00 r2 00 g2 00 b2               "movq      24(%0), %%mm3\n\t" // 00 a3 00 r3 00 g3 00 b3               "movq       %%mm0, %%mm4\n\t" // 00 a0 00 r0 00 g0 00 b0               "movq       %%mm2, %%mm6\n\t" // 00 a2 00 r2 00 g2 00 b2               "punpcklwd  %%mm1, %%mm0\n\t" // 00 g1 00 g0 00 b1 00 b0               "punpcklwd  %%mm3, %%mm2\n\t" // 00 g3 00 g2 00 b3 00 b2               "movq       %%mm0, %%mm5\n\t" // 00 g1 00 g0 00 b1 00 b0               "punpckldq  %%mm2, %%mm0\n\t" // 00 b3 00 b2 00 b1 00 b0               "punpckhdq  %%mm2, %%mm5\n\t" // 00 g3 00 g2 00 g1 00 g0               "punpckhwd  %%mm1, %%mm4\n\t" // 00 a1 00 a0 00 r1 00 r0               "punpckhwd  %%mm3, %%mm6\n\t" // 00 a3 00 a2 00 r3 00 r2               "movq       %%mm4, %%mm3\n\t" // 00 a1 00 a0 00 r1 00 r0               "punpckldq  %%mm6, %%mm4\n\t" // 00 r3 00 r2 00 r1 00 r0               "punpckhdq  %%mm6, %%mm3\n\t" // 00 a3 00 a2 00 a1 00 a0               /* mm0 = b, mm5 = g, mm4 = r, mm3 = a */               "movq       %%mm0, %%mm1\n\t" // save b               "psllw         $3, %%mm0\n\t"               "pmulhw      (%2), %%mm0\n\t"               "movq       %%mm4, %%mm2\n\t" // save r               "psllw         $3, %%mm5\n\t"               "pmulhw     8(%2), %%mm5\n\t"               "psllw         $3, %%mm4\n\t"               "pmulhw    16(%2), %%mm4\n\t"               "paddw      %%mm5, %%mm0\n\t"               "paddw      %%mm4, %%mm0\n\t" // ey               "psubw      %%mm0, %%mm1\n\t" // b - ey               "psllw         $3, %%mm1\n\t"               "pmulhw    24(%2), %%mm1\n\t" // 00 u3 00 u2 00 u1 00 u0               "psubw      %%mm0, %%mm2\n\t" // r - ey               "psllw         $3, %%mm2\n\t"               "pmulhw    32(%2), %%mm2\n\t" // 00 v3 00 v2 00 v1 00 v0               "paddw         %3, %%mm1\n\t" // Cb + 128               "packuswb   %%mm1, %%mm1\n\t" // u3 u2 u1 u0 u3 u2 u1 u0               "psllw         $3, %%mm0\n\t"               "pmulhw    40(%2), %%mm0\n\t" // 00 y3 00 y2 00 y1 00 y0               "paddw         %3, %%mm2\n\t" // Cr + 128               "packuswb   %%mm2, %%mm2\n\t" // v3 v2 v1 v0 v3 v2 v1 v0                 "paddw         %4, %%mm0\n\t" // Y + 16               "packuswb   %%mm0, %%mm0\n\t" // y3 y2 y1 y0 y3 y2 y1 y0               "punpcklbw  %%mm7, %%mm2\n\t" // 00 v3 00 v2 00 v1 00 v0               "punpcklbw  %%mm7, %%mm1\n\t" // 00 u3 00 y2 00 u1 00 u0               "punpcklbw  %%mm7, %%mm0\n\t" // 00 y3 00 y2 00 y1 00 y0                /* mm1 = u, mm2 = v, mm0 = y, mm3 = a */               "movq       %%mm2, %%mm4\n\t" // 00 v3 00 v2 00 v1 00 v0               "movq       %%mm1, %%mm5\n\t" // 00 u3 00 y2 00 u1 00 u0               "punpcklwd  %%mm3, %%mm2\n\t" // 00 a1 00 v1 00 a0 00 v0               "punpcklwd  %%mm0, %%mm1\n\t" // 00 y1 00 u1 00 y0 00 u0               "punpckhwd  %%mm3, %%mm4\n\t" // 00 a3 00 v3 00 a2 00 v2               "punpckhwd  %%mm0, %%mm5\n\t" // 00 y3 00 u3 00 y2 00 u2               "movq       %%mm1, %%mm3\n\t" // 00 y1 00 u1 00 y0 00 u0               "movq       %%mm5, %%mm6\n\t" // 00 y3 00 u3 00 y2 00 u2               "punpcklwd  %%mm2, %%mm1\n\t" // 00 a0 00 y0 00 v0 00 u0               "punpcklwd  %%mm4, %%mm5\n\t" // 00 a2 00 y2 00 v2 00 u2               "punpckhwd  %%mm2, %%mm3\n\t" // 00 a1 00 y1 00 v1 00 u1               "punpckhwd  %%mm4, %%mm6\n\t" // 00 a3 00 y3 00 v3 00 u3               "movq       %%mm1,  (%0)\n\t"               "movq       %%mm3, 8(%0)\n\t"               "movq       %%mm5,16(%0)\n\t"               "movq       %%mm6,24(%0)\n\t"               "add          $32, %0\n\t"               "decl          %1\n\t"               "jnz           1b\n\t"          "emms\n\t"          "2:"          : "=&D" (D)          : "c" (gfxs->length), "r" (mul), "m" (*add0), "m" (*add1), "0" (D)          : "memory" );     while (w) {          if (!(D->RGB.a & 0xF000)) {               u32 r, g, b, ey;               r = D->RGB.r; g = D->RGB.g; b = D->RGB.b;               ey = (19595 * r + 38469 * g + 7471 * b) >> 16;               D->YUV.y = y_from_ey[ey];               D->YUV.u = cb_from_bey[b-ey];               D->YUV.v = cr_from_rey[r-ey];          }          D++;          w--;     }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -