📄 reconstruct_mmx.h
字号:
/* libfame - Fast Assembly MPEG Encoder Library Copyright (C) 2000-2001 Vivien Chappelier This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.*//*************************** reconstruct reference frame *********************/static void inline reconstruct(unsigned char *plane, dct_t *block, int pitch){ int dummy; asm volatile ("movq 0x00(%1), %%mm0\n" "movq 0x08(%1), %%mm1\n" "movq 0x10(%1), %%mm2\n" "movq 0x18(%1), %%mm3\n" "movq 0x20(%1), %%mm4\n" "movq 0x28(%1), %%mm5\n" "movq 0x30(%1), %%mm6\n" "movq 0x38(%1), %%mm7\n" "psraw $0x04, %%mm0\n" "psraw $0x04, %%mm1\n" "psraw $0x04, %%mm2\n" "psraw $0x04, %%mm3\n" "psraw $0x04, %%mm4\n" "psraw $0x04, %%mm5\n" "psraw $0x04, %%mm6\n" "psraw $0x04, %%mm7\n" "packuswb %%mm1, %%mm0\n" "packuswb %%mm3, %%mm2\n" "packuswb %%mm5, %%mm4\n" "packuswb %%mm7, %%mm6\n" "movq %%mm0, (%0)\n" "addl %2, %0\n" "movq %%mm2, (%0)\n" "addl %2, %0\n" "movq %%mm4, (%0)\n" "addl %2, %0\n" "movq %%mm6, (%0)\n" "addl %2, %0\n" "movq 0x40(%1), %%mm0\n" "movq 0x48(%1), %%mm1\n" "movq 0x50(%1), %%mm2\n" "movq 0x58(%1), %%mm3\n" "movq 0x60(%1), %%mm4\n" "movq 0x68(%1), %%mm5\n" "movq 0x70(%1), %%mm6\n" "movq 0x78(%1), %%mm7\n" "psraw $0x04, %%mm0\n" "psraw $0x04, %%mm1\n" "psraw $0x04, %%mm2\n" "psraw $0x04, %%mm3\n" "psraw $0x04, %%mm4\n" "psraw $0x04, %%mm5\n" "psraw $0x04, %%mm6\n" "psraw $0x04, %%mm7\n" "packuswb %%mm1, %%mm0\n" "packuswb %%mm3, %%mm2\n" "packuswb %%mm5, %%mm4\n" "packuswb %%mm7, %%mm6\n" "movq %%mm0, (%0)\n" "addl %2, %0\n" "movq %%mm2, (%0)\n" "addl %2, %0\n" "movq %%mm4, (%0)\n" "addl %2, %0\n" "movq %%mm6, (%0)\n" : "=r"(dummy), "=r"(block), "=r"(pitch) : "0"(plane), "1"(block), "2"(pitch) : "memory");}static void inline sum(unsigned char *plane, unsigned char *ref, unsigned int *sum, dct_t *block, int pitch){ int dummy; #define SUM_STEP(x) \ "movq (%3), %%mm2\n" /* mm2 = [ref]b */ \ "movq 0x" #x "0(%1), %%mm0\n" /* mm0 = [0-3]w */ \ "movq 0x" #x "8(%1), %%mm1\n" /* mm1 = [4-7]w */ \ "movq %%mm2, %%mm5\n" /* mm5 = [ref]b */ \ "punpcklbw %%mm7, %%mm2\n" /* mm2 = [ref0-3]w */ \ "punpckhbw %%mm7, %%mm5\n" /* mm5 = [ref4-7]w */ \ "movq %%mm0, %%mm3\n" /* copy [0-3] for rounding */ \ "movq %%mm1, %%mm4\n" /* copy [4-7] for rounding */ \ "psraw $0x0f, %%mm3\n" /* mm3 = (sign(mm0) - 1) / 2 */ \ "psraw $0x0f, %%mm4\n" /* mm4 = (sign(mm0) - 1) / 2 */ \ "paddsw %%mm3, %%mm0\n" /* adjust sign before rounding */ \ "paddsw %%mm4, %%mm1\n" /* adjust sign before rounding */ \ "movq %%mm0, %%mm3\n" /* copy [0-3] for rounding */ \ "movq %%mm1, %%mm4\n" /* copy [4-7] for rounding */ \ "psllw $0x0c, %%mm3\n" /* keep 4 bits right aligned */ \ "psllw $0x0c, %%mm4\n" /* keep 4 bits right aligned */ \ "psrlw $0x0c, %%mm3\n" /* keep only 'fixed point' part */ \ "psrlw $0x0c, %%mm4\n" /* keep only 'fixed point' part */ \ "paddsw %%mm3, %%mm0\n" /* add fixed point to number */ \ "paddsw %%mm4, %%mm1\n" /* add fixed point to number */ \ "psraw $0x04, %%mm0\n" /* mm0 /= 16 (rounded) */ \ "psraw $0x04, %%mm1\n" /* mm1 /= 16 (rounded) */ \ "por %%mm0, %%mm6\n" /* accumulate sum */ \ "por %%mm1, %%mm6\n" /* accumulate sum */ \ "paddw %%mm0, %%mm2\n" /* add to ref */ \ "paddw %%mm1, %%mm5\n" /* add to ref */ \ "packuswb %%mm5, %%mm2\n" /* pack to byte and saturate */ \ "addl %2, %3\n" /* increment frame line */ \ "movq %%mm2, (%0)\n" /* store in frame */ \ "addl %2, %0\n" /* increment frame line */ asm volatile ("pxor %%mm7, %%mm7\n" /* mm7 = zero */ "pxor %%mm6, %%mm6\n" /* mm6 = sum accumulator */ SUM_STEP(0) SUM_STEP(1) SUM_STEP(2) SUM_STEP(3) SUM_STEP(4) SUM_STEP(5) SUM_STEP(6) SUM_STEP(7) : "=r"(dummy), "=r"(block), "=r"(pitch), "=r"(ref) : "0"(plane), "1"(block), "2"(pitch), "3"(ref) : "memory"); asm volatile ("movq %%mm6, %%mm5\n" "psrlq $32, %%mm6\n" "por %%mm6, %%mm5\n" "movd %%mm5, %0\n" : "=r"(*sum) : "0"(*sum) );}static void inline move(unsigned char *plane, unsigned char *ref, int pitch){ int dummy1, dummy2;#define MOVE_STEP() \ "movq (%2), %%mm0\n" /* mm0 = [ref] */ \ "addl %1, %2\n" /* increment frame line */ \ "movq %%mm0, (%0)\n" /* store in frame */ \ "addl %1, %0\n" /* increment frame line */ asm volatile (MOVE_STEP() MOVE_STEP() MOVE_STEP() MOVE_STEP() MOVE_STEP() MOVE_STEP() MOVE_STEP() MOVE_STEP() : "=r"(dummy1), "=r"(pitch), "=r"(dummy2) : "0"(plane), "1"(pitch), "2"(ref) : "memory");}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -