📄 ac_mmx.h
字号:
/* libfame - Fast Assembly MPEG Encoder Library Copyright (C) 2000-2001 Vivien Chappelier This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.*//* Warning: blocks are transposed with MMX *//* V MMX functions are functionnaly equivalent to H integer functions *//* H MMX functions are functionnaly equivalent to V integer functions */#define FASTQS 8 /* predictor 8 in used to store qscale *//* prediction macros */#define FASTCLEAR16(p) memset((p), 0, 32);#define FASTCOPY16(d, s) memcpy((d), (s), 32);/* TODO: AC rescaling according to qscale */#define FASTSCALE8V(d, s, q) \{ \ int _qp = (s)[FASTQS]; \ int _qs = (q); \ \ if(!_qp || _qp == _qs) memcpy((d), (s), 32); \ else { \ d[0] = s[0]; \ d[1] = (s[1] * _qp + ((s[1] > 0)?(_qs>>1):(-(_qs>>1)))) / _qs; \ d[2] = (s[2] * _qp + ((s[2] > 0)?(_qs>>1):(-(_qs>>1)))) / _qs; \ d[3] = (s[3] * _qp + ((s[3] > 0)?(_qs>>1):(-(_qs>>1)))) / _qs; \ d[4] = (s[4] * _qp + ((s[4] > 0)?(_qs>>1):(-(_qs>>1)))) / _qs; \ d[5] = (s[5] * _qp + ((s[5] > 0)?(_qs>>1):(-(_qs>>1)))) / _qs; \ d[6] = (s[6] * _qp + ((s[6] > 0)?(_qs>>1):(-(_qs>>1)))) / _qs; \ d[7] = (s[7] * _qp + ((s[7] > 0)?(_qs>>1):(-(_qs>>1)))) / _qs; \ } \} #define FASTSCALE8H(d, s, q) \{ \ int _qp = (s)[FASTQS]; \ int _qs = (q); \ \ if(!_qp || _qp == _qs) memcpy((d), (s), 32); \ else { \ d[0] = s[0]; \ d[9] = (s[9] * _qp + ((s[9] > 0)?(_qs>>1):(-(_qs>>1)))) / _qs; \ d[10] = (s[10] * _qp + ((s[10] > 0)?(_qs>>1):(-(_qs>>1)))) / _qs; \ d[11] = (s[11] * _qp + ((s[11] > 0)?(_qs>>1):(-(_qs>>1)))) / _qs; \ d[12] = (s[12] * _qp + ((s[12] > 0)?(_qs>>1):(-(_qs>>1)))) / _qs; \ d[13] = (s[13] * _qp + ((s[13] > 0)?(_qs>>1):(-(_qs>>1)))) / _qs; \ d[14] = (s[14] * _qp + ((s[14] > 0)?(_qs>>1):(-(_qs>>1)))) / _qs; \ d[15] = (s[15] * _qp + ((s[15] > 0)?(_qs>>1):(-(_qs>>1)))) / _qs; \ } \}#define FASTAC8V(d, b) { \ asm("movq (%0), %%mm0\n" \ "movq 8(%0), %%mm1\n" \ "movq %%mm0, (%1)\n" \ "movq %%mm1, 8(%1)\n" \ : "=r"(b), "=r"(d) \ : "0"(b), "1"(d) \ : "memory"); \ ((short int *)(d))[9] = ((short int *)(b))[8]; \ ((short int *)(d))[10] = ((short int *)(b))[16]; \ ((short int *)(d))[11] = ((short int *)(b))[24]; \ ((short int *)(d))[12] = ((short int *)(b))[32]; \ ((short int *)(d))[13] = ((short int *)(b))[40]; \ ((short int *)(d))[14] = ((short int *)(b))[48]; \ ((short int *)(d))[15] = ((short int *)(b))[56]; \}#define FASTAC8H(d, b) { \ asm("movq (%0), %%mm0\n" \ "movq 8(%0), %%mm1\n" \ "movq %%mm0, (%1)\n" \ "movq %%mm1, 8(%1)\n" \ : "=r"(b), "=r"(d) \ : "0"(b), "1"(d) \ : "memory"); \ ((short int *)(d))[9] = ((short int *)(b))[8]; \ ((short int *)(d))[10] = ((short int *)(b))[16]; \ ((short int *)(d))[11] = ((short int *)(b))[24]; \ ((short int *)(d))[12] = ((short int *)(b))[32]; \ ((short int *)(d))[13] = ((short int *)(b))[40]; \ ((short int *)(d))[14] = ((short int *)(b))[48]; \ ((short int *)(d))[15] = ((short int *)(b))[56]; \}/* blocks are transposed! */#define FASTSAD8H(v, p, b) { \ long retval; \ \ asm("movq 16(%0), %%mm0\n" \ "movq 16(%1), %%mm1\n" \ "movq 24(%0), %%mm2\n" \ "movq 24(%1), %%mm3\n" \ "pxor %%mm4, %%mm4\n" \ "pxor %%mm5, %%mm5\n" \ "pxor %%mm6, %%mm6\n" \ "pxor %%mm7, %%mm7\n" \ "psubw %%mm0, %%mm1\n" \ "psubw %%mm2, %%mm3\n" \ "pcmpgtw %%mm0, %%mm4\n" \ "pcmpgtw %%mm1, %%mm5\n" \ "pcmpgtw %%mm2, %%mm6\n" \ "pcmpgtw %%mm3, %%mm7\n" \ "pxor %%mm4, %%mm0\n" \ "pxor %%mm5, %%mm1\n" \ "pxor %%mm6, %%mm2\n" \ "pxor %%mm7, %%mm3\n" \ "psubw %%mm4, %%mm0\n" \ "psubw %%mm5, %%mm1\n" \ "psubw %%mm6, %%mm2\n" \ "psubw %%mm7, %%mm3\n" \ "psubw %%mm1, %%mm0\n" \ "psubw %%mm3, %%mm2\n" \ "paddw %%mm2, %%mm0\n" \ "movq %%mm0, %%mm1\n" \ "psrlq $0x20, %%mm1\n" \ "paddw %%mm1, %%mm0\n" \ "movq %%mm0, %%mm1\n" \ "psrlq $0x10, %%mm1\n" \ "paddw %%mm1, %%mm0\n" \ "movd %%mm0, %2\n" \ : "=r"(b), "=r"(p), "=r"(retval) \ : "0"(b), "1"(p), "2"(0) \ : "memory"); \ v += (signed short) retval; \}#define FASTSAD8V(v, p, b) { \ long retval; \ \ asm("movq 2(%0), %%mm0\n" \ "movq 2(%1), %%mm1\n" \ "movq 10(%0), %%mm2\n" \ "movq 10(%1), %%mm3\n" \ "pxor %%mm4, %%mm4\n" \ "pxor %%mm5, %%mm5\n" \ "pxor %%mm6, %%mm6\n" \ "pxor %%mm7, %%mm7\n" \ "psubw %%mm0, %%mm1\n" \ "psubw %%mm2, %%mm3\n" \ "pcmpgtw %%mm0, %%mm4\n" \ "pcmpgtw %%mm1, %%mm5\n" \ "pcmpgtw %%mm2, %%mm6\n" \ "pcmpgtw %%mm3, %%mm7\n" \ "pxor %%mm4, %%mm0\n" \ "pxor %%mm5, %%mm1\n" \ "pxor %%mm6, %%mm2\n" \ "pxor %%mm7, %%mm3\n" \ "psubw %%mm4, %%mm0\n" \ "psubw %%mm5, %%mm1\n" \ "psubw %%mm6, %%mm2\n" \ "psubw %%mm7, %%mm3\n" \ "psubw %%mm1, %%mm0\n" \ "psubw %%mm3, %%mm2\n" \ "paddw %%mm2, %%mm0\n" \ "movq %%mm0, %%mm1\n" \ "psrlq $0x20, %%mm1\n" \ "paddw %%mm1, %%mm0\n" \ "movq %%mm0, %%mm1\n" \ "psrlq $0x10, %%mm1\n" \ "paddw %%mm1, %%mm0\n" \ "movd %%mm0, %2\n" \ : "=r"(b), "=r"(p), "=r"(retval) \ : "0"(b), "1"(p), "2"(0) \ : "memory"); \ v += (signed short) retval; \}#define COPY8H(b, p) \{ \ b[8] = p[9]; \ b[16] = p[10]; \ b[24] = p[11]; \ b[32] = p[12]; \ b[40] = p[13]; \ b[48] = p[14]; \ b[56] = p[15]; \}#define COPY8V(b, p) \{ \ b[1] = p[1]; \ b[2] = p[2]; \ b[3] = p[3]; \ b[4] = p[4]; \ b[5] = p[5]; \ b[6] = p[6]; \ b[7] = p[7]; \}#define FASTDIFF8V(b, p) { \ asm("movq (%0), %%mm0\n" \ "movq 8(%0), %%mm1\n" \ "movq 16(%0), %%mm2\n" \ "movq 24(%0), %%mm3\n" \ "psubw (%1), %%mm0\n" \ "psubw 8(%1), %%mm1\n" \ "psubw 16(%1), %%mm2\n" \ "psubw 24(%1), %%mm3\n" \ "movq %%mm0, (%0)\n" \ "movq %%mm1, 8(%0)\n" \ "movq %%mm2, 16(%0)\n" \ "movq %%mm3, 24(%0)\n" \ : "=r"(b), "=r"(p) \ : "0"(b), "1"(p) \ : "memory"); \}#define FASTSUM8V(b, p) { \ asm("movq (%0), %%mm0\n" \ "movq 8(%0), %%mm1\n" \ "movq 16(%0), %%mm2\n" \ "movq 24(%0), %%mm3\n" \ "paddw (%1), %%mm0\n" \ "paddw 8(%1), %%mm1\n" \ "paddw 16(%1), %%mm2\n" \ "paddw 24(%1), %%mm3\n" \ "movq %%mm0, (%0)\n" \ "movq %%mm1, 8(%0)\n" \ "movq %%mm2, 16(%0)\n" \ "movq %%mm3, 24(%0)\n" \ : "=r"(b), "=r"(p) \ : "0"(b), "1"(p) \ : "memory"); \}#define FASTDIFF8H(b, p) { \ asm("movq (%0), %%mm0\n" \ "movq 8(%0), %%mm1\n" \ "movq 16(%0), %%mm2\n" \ "movq 24(%0), %%mm3\n" \ "psubw (%1), %%mm0\n" \ "psubw 8(%1), %%mm1\n" \ "psubw 16(%1), %%mm2\n" \ "psubw 24(%1), %%mm3\n" \ "movq %%mm0, (%0)\n" \ "movq %%mm1, 8(%0)\n" \ "movq %%mm2, 16(%0)\n" \ "movq %%mm3, 24(%0)\n" \ : "=r"(b), "=r"(p) \ : "0"(b), "1"(p) \ : "memory"); \}#define FASTSUM8H(b, p) { \ asm("movq (%0), %%mm0\n" \ "movq 8(%0), %%mm1\n" \ "movq 16(%0), %%mm2\n" \ "movq 24(%0), %%mm3\n" \ "paddw (%1), %%mm0\n" \ "paddw 8(%1), %%mm1\n" \ "paddw 16(%1), %%mm2\n" \ "paddw 24(%1), %%mm3\n" \ "movq %%mm0, (%0)\n" \ "movq %%mm1, 8(%0)\n" \ "movq %%mm2, 16(%0)\n" \ "movq %%mm3, 24(%0)\n" \ : "=r"(b), "=r"(p) \ : "0"(b), "1"(p) \ : "memory"); \}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -