📄 fdct_mmx.c

📁 mediastreamer2是开源的网络传输媒体流的库
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
  "  paddw       %%mm2, %%mm1       \n\t" /* Trucated */                      \                                                                              \  "  pmulhw      %%mm7, %%mm3       \n\t" /* mm3 = xC7S1 * irot_input_x */    \  "  paddw       %%mm2, %%mm3       \n\t" /* Truncated */                     \                                                                              \  "  movq        %%mm0, %%mm5       \n\t"                                     \  "  movq        %%mm0, %%mm2       \n\t"                                     \                                                                              \  "  movq        %[xC1S7], %%mm7    \n\t"                                     \  "  pmulhw      %%mm7, %%mm0       \n\t" /* mm0 = xC1S7 * irot_input_y - irot_input_y */ \                                                                              \  "  movq        %[xC7S1], %%mm7    \n\t"                                     \  "  psrlw       $15, %%mm2         \n\t"                                     \                                                                              \  "  paddw       %%mm5, %%mm0       \n\t" /* mm0 = xC1S7 * irot_input_y */    \  "  paddw       %%mm2, %%mm0       \n\t" /* Truncated */                     \                                                                              \  "  pmulhw      %%mm7, %%mm5       \n\t" /* mm5 = xC7S1 * irot_input_y */    \  "  paddw       %%mm2, %%mm5       \n\t" /* Truncated */                     \                                                                              \  "  psubsw      %%mm5, %%mm1       \n\t" /* mm1 = xC1S7 * irot_input_x - xC7S1 * irot_input_y = ip1 */ \  "  paddsw      %%mm0, %%mm3       \n\t" /* mm3 = xC7S1 * irot_input_x - xC1S7 * irot_input_y = ip7 */ \                                                                              \  "  movq        %%mm1," #ip1 "     \n\t"                                     \  "  movq        %%mm3," #ip7 "     \n\t"                                     \  /* ------------------------------------------------------------------- */   \  "  movq        %[xC3S5], %%mm0    \n\t"                                     \  "  movq        %[xC5S3], %%mm1    \n\t"                                     \                                                                              \  "  movq        %%mm6, %%mm5       \n\t"                                     \  "  movq        %%mm6, %%mm7       \n\t"                                     \                                                                              \  "  movq        %%mm4, %%mm2       \n\t"                                     \  "  movq        %%mm4, %%mm3       \n\t"                                     \                                                                              \  "  pmulhw      %%mm0, %%mm4       \n\t" /* mm4 = xC3S5 * irot_input_x - irot_input_x */ \  "  pmulhw      %%mm1, %%mm6       \n\t" /* mm6 = xC5S3 * irot_input_y - irot_input_y */ \                                                                              \  "  psrlw       $15, %%mm2         \n\t"                                     \  "  psrlw       $15, %%mm5         \n\t"                                     \                                                                              \  "  paddw       %%mm3, %%mm4       \n\t" /* mm4 = xC3S5 * irot_input_x */    \  "  paddw       %%mm7, %%mm6       \n\t" /* mm6 = xC5S3 * irot_input_y */    \                                                                              \  "  paddw       %%mm2, %%mm4       \n\t" /* Truncated */                     \  "  paddw       %%mm5, %%mm6       \n\t" /* Truncated */                     \                                                                              \  "  psubsw      %%mm6, %%mm4       \n\t" /* ip3 */                           \  "  movq        %%mm4," #ip3 "     \n\t"                                     \                                                                              \  "  movq        %%mm3, %%mm4       \n\t"                                     \  "  movq        %%mm7, %%mm6       \n\t"                                     \                                                                              \  "  pmulhw      %%mm1, %%mm3       \n\t" /* mm3 = xC5S3 * irot_input_x - irot_input_x */ \  "  pmulhw      %%mm0, %%mm7       \n\t" /* mm7 = xC3S5 * irot_input_y - irot_input_y */ \                                                                              \  "  paddw       %%mm2, %%mm4       \n\t"                                     \  "  paddw       %%mm5, %%mm6       \n\t"                                     \                                                                              \  "  paddw       %%mm4, %%mm3       \n\t" /* mm3 = xC5S3 * irot_input_x */    \  "  paddw       %%mm6, %%mm7       \n\t" /* mm7 = xC3S5 * irot_input_y */    \                                                                              \  "  paddw       %%mm7, %%mm3       \n\t" /* ip5 */                           \  "  movq        %%mm3," #ip5 "     \n\t" #define Transpose_mmx(ip0,ip1,ip2,ip3,ip4,ip5,ip6,ip7,                  \		      op0,op1,op2,op3,op4,op5,op6,op7)                  \  "  movq      " #ip0 ", %%mm0      \n\t" /* mm0 = a0 a1 a2 a3 */       \  "  movq      " #ip4 ", %%mm4      \n\t" /* mm4 = e4 e5 e6 e7 */       \  "  movq      " #ip1 ", %%mm1      \n\t" /* mm1 = b0 b1 b2 b3 */       \  "  movq      " #ip5 ", %%mm5      \n\t" /* mm5 = f4 f5 f6 f7 */       \  "  movq      " #ip2 ", %%mm2      \n\t" /* mm2 = c0 c1 c2 c3 */       \  "  movq      " #ip6 ", %%mm6      \n\t" /* mm6 = g4 g5 g6 g7 */       \  "  movq      " #ip3 ", %%mm3      \n\t" /* mm3 = d0 d1 d2 d3 */       \  "  movq        %%mm1," #op1 "     \n\t" /* save  b0 b1 b2 b3 */       \  "  movq      " #ip7 ", %%mm7      \n\t" /* mm7 = h0 h1 h2 h3 */       \   /* Transpose 2x8 block */                                            \  "  movq        %%mm4, %%mm1       \n\t" /* mm1 = e3 e2 e1 e0 */       \  "  punpcklwd   %%mm5, %%mm4       \n\t" /* mm4 = f1 e1 f0 e0 */       \  "  movq        %%mm0," #op0 "     \n\t" /* save a3 a2 a1 a0  */       \  "  punpckhwd	 %%mm5, %%mm1       \n\t" /* mm1 = f3 e3 f2 e2 */       \  "  movq        %%mm6, %%mm0       \n\t" /* mm0 = g3 g2 g1 g0 */       \  "  punpcklwd	 %%mm7, %%mm6       \n\t" /* mm6 = h1 g1 h0 g0 */       \  "  movq        %%mm4, %%mm5       \n\t" /* mm5 = f1 e1 f0 e0 */       \  "  punpckldq   %%mm6, %%mm4       \n\t" /* mm4 = h0 g0 f0 e0 = MM4 */ \  "  punpckhdq   %%mm6, %%mm5       \n\t" /* mm5 = h1 g1 f1 e1 = MM5 */ \  "  movq        %%mm1, %%mm6       \n\t" /* mm6 = f3 e3 f2 e2 */       \  "  movq        %%mm4," #op4 "     \n\t"                               \  "  punpckhwd   %%mm7, %%mm0       \n\t" /* mm0 = h3 g3 h2 g2 */       \  "  movq        %%mm5," #op5 "     \n\t"                               \  "  punpckhdq   %%mm0, %%mm6       \n\t" /* mm6 = h3 g3 f3 e3 = MM7 */ \  "  movq      " #op0 ", %%mm4      \n\t" /* mm4 = a3 a2 a1 a0 */       \  "  punpckldq   %%mm0, %%mm1       \n\t" /* mm1 = h2 g2 f2 e2 = MM6 */ \  "  movq      " #op1 ", %%mm5      \n\t" /* mm5 = b3 b2 b1 b0 */       \  "  movq        %%mm4, %%mm0       \n\t" /* mm0 = a3 a2 a1 a0 */       \  "  movq        %%mm6," #op7 "     \n\t"                               \  "  punpcklwd   %%mm5, %%mm0       \n\t" /* mm0 = b1 a1 b0 a0 */       \  "  movq        %%mm1," #op6 "     \n\t"                               \  "  punpckhwd   %%mm5, %%mm4       \n\t" /* mm4 = b3 a3 b2 a2 */       \  "  movq        %%mm2, %%mm5       \n\t" /* mm5 = c3 c2 c1 c0 */       \  "  punpcklwd   %%mm3, %%mm2       \n\t" /* mm2 = d1 c1 d0 c0 */       \  "  movq        %%mm0, %%mm1       \n\t" /* mm1 = b1 a1 b0 a0 */       \  "  punpckldq   %%mm2, %%mm0       \n\t" /* mm0 = d0 c0 b0 a0 = MM0 */ \  "  punpckhdq   %%mm2, %%mm1       \n\t" /* mm1 = d1 c1 b1 a1 = MM1 */ \  "  movq        %%mm4, %%mm2       \n\t" /* mm2 = b3 a3 b2 a2 */       \  "  movq        %%mm0," #op0 "     \n\t"                               \  "  punpckhwd   %%mm3, %%mm5       \n\t" /* mm5 = d3 c3 d2 c2 */       \  "  movq        %%mm1," #op1 "     \n\t"                               \  "  punpckhdq   %%mm5, %%mm4       \n\t" /* mm4 = d3 c3 b3 a3 = MM3 */ \  "  punpckldq   %%mm5, %%mm2       \n\t" /* mm2 = d2 c2 b2 a2 = MM2 */ \  "  movq        %%mm4," #op3 "     \n\t"                               \  "  movq        %%mm2," #op2 "     \n\t"/* This performs a 2D Forward DCT on an 8x8 block with short    coefficients. We try to do the truncation to match the C   version. */static void fdct_short__mmx ( ogg_int16_t *InputData, ogg_int16_t *OutputData){  ogg_int64_t __attribute__((aligned(8))) align_tmp[16];  ogg_int16_t *const temp= (ogg_int16_t*)align_tmp;  __asm__ __volatile__ (    "  .balign 16                   \n\t"    /*     * Input data is an 8x8 block.  To make processing of the data more efficent     * we will transpose the block of data to two 4x8 blocks???     */    Transpose_mmx (  (%0), 16(%0), 32(%0), 48(%0),  8(%0), 24(%0), 40(%0), 56(%0),		     (%1), 16(%1), 32(%1), 48(%1),  8(%1), 24(%1), 40(%1), 56(%1))    Fdct_mmx      (  (%1), 16(%1), 32(%1), 48(%1),  8(%1), 24(%1), 40(%1), 56(%1), (%2))    Transpose_mmx (64(%0), 80(%0), 96(%0),112(%0), 72(%0), 88(%0),104(%0),120(%0),		   64(%1), 80(%1), 96(%1),112(%1), 72(%1), 88(%1),104(%1),120(%1))    Fdct_mmx      (64(%1), 80(%1), 96(%1),112(%1), 72(%1), 88(%1),104(%1),120(%1), (%2))    Transpose_mmx ( 0(%1), 16(%1), 32(%1), 48(%1), 64(%1), 80(%1), 96(%1),112(%1),		    0(%1), 16(%1), 32(%1), 48(%1), 64(%1), 80(%1), 96(%1),112(%1))    Fdct_mmx      ( 0(%1), 16(%1), 32(%1), 48(%1), 64(%1), 80(%1), 96(%1),112(%1), (%2))    Transpose_mmx ( 8(%1), 24(%1), 40(%1), 56(%1), 72(%1), 88(%1),104(%1),120(%1),		    8(%1), 24(%1), 40(%1), 56(%1), 72(%1), 88(%1),104(%1),120(%1))    Fdct_mmx      ( 8(%1), 24(%1), 40(%1), 56(%1), 72(%1), 88(%1),104(%1),120(%1), (%2))    "  emms                         \n\t"        : "+r" (InputData),      "+r" (OutputData)    : "r" (temp),      [xC1S7] "m" (xC1S7),      /* gcc 3.1+ allows named asm parameters */      [xC2S6] "m" (xC2S6),      [xC3S5] "m" (xC3S5),      [xC4S4] "m" (xC4S4),      [xC5S3] "m" (xC5S3),      [xC6S2] "m" (xC6S2),      [xC7S1] "m" (xC7S1)    : "memory"  );}/* install our implementation in the function table */void dsp_mmx_fdct_init(DspFunctions *funcs){  TH_DEBUG("enabling accelerated x86_32 mmx fdct function.\n");  funcs->fdct_short = fdct_short__mmx;}
上一页 12
💿 文件大小 44653 K
👤 上传用户 leeixndong
📂 所属分类网络
🏷️ 相关标签

#mediastreamer2 #开源 #媒体 #网络传输
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -