⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dct_mmx.h

📁 一个很好用的MPEG1/4的开源编码器
💻 H
字号:
/*    libfame - Fast Assembly MPEG Encoder Library    Copyright (C) 2000-2001 Vivien Chappelier    This library is free software; you can redistribute it and/or    modify it under the terms of the GNU Library General Public    License as published by the Free Software Foundation; either    version 2 of the License, or (at your option) any later version.    This library is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    Library General Public License for more details.    You should have received a copy of the GNU Library General Public    License along with this library; if not, write to the Free    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.*//*************************** MMX accelerated DCT *****************************//* Warning: Didn't check the DCT was IEEE compliant. It is probably not.     *//* TODO: Write an IEEE compliant DCT/iDCT                                    */#define precisionstatic void inline dct_aan_pass(dct_t *cache){  //  register unsigned short const *mmx_cos = _mmx_cos;  asm volatile (      ""                                     /* STEP 1 */      "movq 0x00(%0),     %%mm0\n"           /* load line 0 */      "movq 0x10(%0),     %%mm1\n"           /* load line 1 */      "movq 0x20(%0),     %%mm2\n"           /* load line 2 */      "movq 0x30(%0),     %%mm3\n"           /* load line 3 */      "movq 0x40(%0),     %%mm4\n"           /* load line 4 */      "movq 0x50(%0),     %%mm5\n"           /* load line 5 */      "movq 0x60(%0),     %%mm6\n"           /* load line 6 */      "movq 0x70(%0),     %%mm7\n"           /* load line 7 */      "paddsw %%mm0, %%mm7\n"                /* line0 + line7 -> mm7 (v00) */      "paddsw %%mm1, %%mm6\n"                /* line1 + line6 -> mm6 (v01) */      "paddsw %%mm2, %%mm5\n"                /* line2 + line5 -> mm5 (v02) */      "paddsw %%mm4, %%mm3\n"                /* line4 + line3 -> mm3 (v03) */      "psubsw 0x70(%0), %%mm0\n"             /* line0 - line7 -> mm0 (v07) */      "psubsw 0x60(%0), %%mm1\n"             /* line1 - line6 -> mm1 (v06) */      "psubsw 0x50(%0), %%mm2\n"             /* line2 - line5 -> mm2 (v05) */      "psubsw 0x30(%0), %%mm4\n"             /* line4 - line3 -> mm4 (-v04) */      ""                                     /* STEP 2 */      "psubsw %%mm2, %%mm4\n"                /* -v04 - v05 -> mm4 (v14) */      "paddsw %%mm1, %%mm2\n"                /*  v05 + v06 -> mm2 (v15) */      "paddsw %%mm0, %%mm1\n"                /*  v06 + v07 -> mm1 (v16) */      "movq %%mm0, 0x70(%0)\n"               /* store v07 for later */      "movq %%mm3, %%mm0\n"                  /*  v03 -> mm0 */      "paddsw %%mm7, %%mm3\n"                /*  v00 + v03 -> mm3 (v10) */      "psubsw %%mm0, %%mm7\n"                /*  v00 - v03 -> mm7 (v13) */      "movq %%mm5, %%mm0\n"                  /*  v02 -> mm0 */      "paddsw %%mm6, %%mm5\n"                /*  v01 + v02 -> mm5 (v11) */      "psubsw %%mm0, %%mm6\n"                /*  v01 - v02 -> mm6 (v12) */      ""                                     /* STEP 3 */      "movq %%mm5, %%mm0\n"                  /*  v11 -> mm0 */      "paddsw %%mm3, %%mm5\n"                /*  v10 + v11 -> mm5 (v20) */      "psubsw %%mm0, %%mm3\n"                /*  v10 - v11 -> mm3 (v21) */      "paddsw %%mm7, %%mm6\n"                /*  v12 + v13 -> mm6 (v22) */      "movq %%mm5, 0x00(%0)\n"               /* store line 0 */      "movq %%mm3, 0x40(%0)\n"               /* store line 4 */      "movq %%mm4, %%mm5\n"                  /*  v14 -> mm5 */      "paddsw %%mm1, %%mm5\n"                /*  v14 + v16 -> mm5 */#ifdef precision      "psllw $0x01, %%mm5\n"                 /* precision(va0) += 1 bit */#endif    "paddw " ASMSYM "_mmx_1, %%mm4\n"   /* + 1 *///      "pmulhw 16(%1), %%mm5\n"               /* (v14+v16)*COS6 -> mm5 (va0) */      "pmulhw " ASMSYM "_mmx_cos+16, %%mm5\n" /* (v14+v16)*COS6 -> mm5 (va0) */      ""                                     /* STEP 4 */#ifdef precision      "psllw $0x02, %%mm6\n"                 /* precision(v22) += 1 bit */#else      "psllw $0x01, %%mm6\n"                 /* */#endif    "paddw " ASMSYM "_mmx_1, %%mm4\n"   /* + 1 */      //      "pmulhw  8(%1), %%mm6\n"               /* 2*v22*COS4/2 -> mm6 (v32)*/      "pmulhw " ASMSYM "_mmx_cos+8, %%mm6\n" /* 2*v22*COS4/2 -> mm6 (v32)*/#ifdef precision      "psllw $0x02, %%mm2\n"                 /* precision(v15) += 1 bit */#else      "psllw $0x01, %%mm2\n"                 /* */#endif    "paddw " ASMSYM "_mmx_1, %%mm4\n"   /* + 1 */      //      "pmulhw  8(%1), %%mm2\n"               /* 2*v15*COS4/2 -> mm2 (v35) */      "pmulhw " ASMSYM "_mmx_cos+8, %%mm2\n" /* 2*v15*COS4/2 -> mm2 (v35) */#ifdef precision      "psllw $0x02, %%mm4\n"                 /* precision(v14) += 1 bit */#else      "psllw $0x01, %%mm4\n"                 /* */#endif    "paddw " ASMSYM "_mmx_1, %%mm4\n"   /* + 1 */      //      "pmulhw  0(%1), %%mm4\n"               /* 2 * v14 * -COS2/2 -> mm4 */      "pmulhw " ASMSYM "_mmx_cos, %%mm4\n"   /* 2 * v14 * -COS2/2 -> mm4 */      "psubsw %%mm5, %%mm4\n"                /* v14*-COS2 - va0 -> mm4 (v34) */#ifdef precision      "psllw $0x01, %%mm1\n"                 /* precision(v16) += 1 bit */#endif      "psubsw %%mm1, %%mm5\n"                /* va0 - v16 -> mm5 */    "paddw " ASMSYM "_mmx_1, %%mm4\n"   /* + 1 */      //      "pmulhw 24(%1), %%mm1\n"               /* v16 * (COS8 - 1) -> mm1 */      "pmulhw " ASMSYM "_mmx_cos+24, %%mm1\n" /* v16 * (COS8 - 1) -> mm1 */      "psubsw %%mm5, %%mm1\n"                /* v16 * COS8 - va0 -> mm1 (v36)*/      ""                                     /* STEP 5 */      "movq 0x70(%0), %%mm0\n"               /* retrieve v07 -> mm0 */#ifdef precision      "psllw $0x01, %%mm7\n"                 /* precision(v13) += 1 bit */      "psllw $0x01, %%mm0\n"                 /* precision(v07) += 1 bit */#endif      "movq %%mm6, %%mm3\n"                  /* v32 -> mm3 */      "paddsw %%mm7, %%mm6\n"                /* v13 + v32 -> mm6 (v42) */      "psubsw %%mm3, %%mm7\n"                /* v13 - v32 -> mm7 (v43) */#ifdef precision      "psraw $0x01, %%mm6\n"                 /* precision(v42) -= 1 bit */      "psraw $0x01, %%mm7\n"                 /* precision(v43) -= 1 bit */#endif      "movq %%mm6, 0x20(%0)\n"               /* store line 2 */      "movq %%mm7, 0x60(%0)\n"               /* store line 6 */      "movq %%mm2, %%mm5\n"                  /* v35 -> mm5 */      "paddsw %%mm0, %%mm2\n"                /* v07 + v35 -> mm2 (v45) */      "psubsw %%mm5, %%mm0\n"                /* v07 - v35 -> mm0 (v47) */      ""                                     /* STEP 6 */      "movq %%mm4, %%mm3\n"                  /* v34 -> mm3 */      "paddsw %%mm0, %%mm4\n"                /* v47 + v34 -> mm4 (v54) */      "psubsw %%mm3, %%mm0\n"                /* v47 - v34 -> mm0 (v57) */      "movq %%mm1, %%mm5\n"                  /* v36 -> mm5 */      "paddsw %%mm2, %%mm1\n"                /* v45 + v36 -> mm1 (v55) */      "psubsw %%mm5, %%mm2\n"                /* v45 - v36 -> mm2 (v56) */#ifdef precision      "psraw $0x01, %%mm4\n"                 /* precision(v54) -= 1 bit */      "psraw $0x01, %%mm0\n"                 /* precision(v57) -= 1 bit */      "psraw $0x01, %%mm1\n"                 /* precision(v55) -= 1 bit */      "psraw $0x01, %%mm2\n"                 /* precision(v56) -= 1 bit */#endif      "movq %%mm1, 0x10(%0)\n"               /* store line 1 */      "movq %%mm0, 0x30(%0)\n"               /* store line 3 */      "movq %%mm4, 0x50(%0)\n"               /* store line 5 */      "movq %%mm2, 0x70(%0)\n"               /* store line 7 */      : "=r"(cache)/*, "=r"(mmx_cos)*/      : "0"(cache)/*, "1"(mmx_cos)*/      : "memory");}static void inline dct(dct_t *block){  dct_aan_pass(block);  dct_aan_pass(block+4);  transpose(block);  dct_aan_pass(block);  dct_aan_pass(block+4);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -