📄 half_mmx.h
字号:
/* libfame - Fast Assembly MPEG Encoder Library Copyright (C) 2000-2001 Vivien Chappelier This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.*//**************************** half-pixel interpolation ***********************/static short const _mmx_one[] = { 1, 1, 1, 1 };static void inline mmx_interpolate(unsigned char **ref, int pitch, int rounding){ int dummy = 0; short _mmx_rc[4]; /* rounding control */ register short *rc = _mmx_rc; _mmx_rc[0] = 1 - rounding; _mmx_rc[1] = 1 - rounding; _mmx_rc[2] = 1 - rounding; _mmx_rc[3] = 1 - rounding; asm volatile ("movl (%0), %3\n" /* %3 = ref[0] */ "pxor %%mm7, %%mm7\n" /* mm7 = 0 */ "movq (%3), %%mm0\n" /* mm0 = [ref] */ "movq 1(%3), %%mm1\n" /* mm1 = [ref+1] */ "movq %%mm0, %%mm2\n" /* mm2 = mm0 */ "movq %%mm1, %%mm3\n" /* mm3 = mm1 */ "punpcklbw %%mm7, %%mm0\n" /* mm0 = ref[0-3]w */ "punpcklbw %%mm7, %%mm1\n" /* mm1 = ref[1-4]w */ "punpckhbw %%mm7, %%mm2\n" /* mm2 = ref[4-7]w */ "punpckhbw %%mm7, %%mm3\n" /* mm3 = ref[5-8]w */ "movq %%mm0, %%mm4\n" /* save ref[0-3] */ "movq %%mm2, %%mm5\n" /* save ref[4-7] */ "paddw %%mm1, %%mm0\n" /* mm0 = ref[0-3] + ref[1-4] */ "paddw %%mm3, %%mm2\n" /* mm2 = ref[4-7] + ref[5-8] */ "paddw (%2), %%mm0\n" /* add 1 - residual */ "paddw (%2), %%mm2\n" /* add 1 - residual */ "psrlw $1, %%mm2\n" /* divide by 2 */ "psrlw $1, %%mm0\n" /* divide by 2 */ "packuswb %%mm2, %%mm0\n" /* pack to byte and saturate */ "movq (%3, %1), %%mm1\n" /* mm1 = [ref+1 line] */ "movq 1(%3, %1), %%mm3\n" /* mm3 = [ref+1 line+1] */ "movq %%mm1, %%mm2\n" /* mm2 = mm1 */ "movq %%mm3, %%mm6\n" /* mm6 = mm3 */ "punpcklbw %%mm7, %%mm1\n" /* mm1 = ref[0-3+1line] */ "punpcklbw %%mm7, %%mm3\n" /* mm3 = ref[1-4+1line] */ "punpckhbw %%mm7, %%mm2\n" /* mm2 = ref[4-7+1line] */ "punpckhbw %%mm7, %%mm6\n" /* mm6 = ref[5-8+1line] */ "paddw %%mm1, %%mm4\n" /* mm4 = ref[0-3]+ref[0-3+1l] */ "paddw %%mm2, %%mm5\n" /* mm5 = ref[4-7]+ref[4-7+1l] */ "paddw (%2), %%mm4\n" /* add 1 - residual */ "paddw (%2), %%mm5\n" /* add 1 - residual */ "paddw %%mm4, %%mm3\n" /* mm3 = ref00+ref10+ref11+1-r 0-3*/ "paddw %%mm5, %%mm6\n" /* mm6 = ref00+ref10+ref11+1-r 4-7*/ "psrlw $1, %%mm4\n" /* divide by 2 */ "psrlw $1, %%mm5\n" /* divide by 2 */ "paddw " ASMSYM "_mmx_one, %%mm3\n" /* add 1 */ "paddw " ASMSYM "_mmx_one, %%mm6\n" /* add 1 */ "packuswb %%mm5, %%mm4\n" /* pack to byte and saturate */ "movq 1(%3), %%mm1\n" /* mm1 = [ref+1] */ "movq %%mm1, %%mm2\n" /* mm2 = mm1 */ "punpcklbw %%mm7, %%mm1\n" /* mm0 = ref[1-4] */ "punpckhbw %%mm7, %%mm2\n" /* mm2 = ref[5-8] */ "paddw %%mm1, %%mm3\n" /* mm3 = all ref +2-r 0-3*/ "paddw %%mm2, %%mm6\n" /* mm6 = all ref +2-r 4-7*/ "psrlw $2, %%mm3\n" /* divide by 4 */ "psrlw $2, %%mm6\n" /* divide by 4 */ "packuswb %%mm6, %%mm3\n" /* pack to byte and saturate */ "movl 4(%0), %3\n" /* %3 = ref[1] */ "movq %%mm0, (%3)\n" /* store in frame */ "movl 8(%0), %3\n" /* %3 = ref[2] */ "movq %%mm4, (%3)\n" /* store in frame */ "movl 12(%0), %3\n" /* %3 = ref[3] */ "movq %%mm3, (%3)\n" /* store in frame */ : "=r"(ref), "=r"(pitch), "=r"(rc), "=r"(dummy) : "0"(ref), "1"(pitch), "2"(rc), "3"(dummy) : "memory");}/* half_interpolate *//* *//* Description: *//* Compute half-pel resolution frames from reference frame. *//* *//* Arguments: *//* fame_encoder_t *encoder: the encoder *//* *//* Return value: *//* None. */static void inline half_interpolate(int width, int height, fame_yuv_t **ref, int rounding){ int x, y, w, h; int pitch; unsigned char *planes[4]; /* Note: reference frame is allocated a little larger */ /* to allow overflow when doing interpolation */ /* and thus avoid a special case for borders. */ /* Note: interpolation is done for Cr and Cb components */ /* too, as it is used when computing the difference */ /* in motion compensation, even if it's not used */ /* in motion estimation. */ /* Y component */ w = width >> 3; h = height; planes[0] = ref[0]->y; planes[1] = ref[1]->y; planes[2] = ref[2]->y; planes[3] = ref[3]->y; pitch = ref[0]->p; for(y = 0; y < h; y++) { for(x = 0; x < w; x++) { mmx_interpolate(planes, pitch, rounding); planes[0]+=8; planes[1]+=8; planes[2]+=8; planes[3]+=8; } /* correct borders */ planes[1][-1] = planes[0][-1]; planes[3][-1] = planes[2][-1]; planes[0] += pitch - (w<<3); planes[1] += pitch - (w<<3); planes[2] += pitch - (w<<3); planes[3] += pitch - (w<<3); } /* correct borders */ memcpy(planes[2] - pitch, planes[0]-pitch, w<<3); memcpy(planes[3] - pitch, planes[1]-pitch, w<<3); /* U component */ w = width >> 4; h = height >> 1; planes[0] = ref[0]->u; planes[1] = ref[1]->u; planes[2] = ref[2]->u; planes[3] = ref[3]->u; pitch = ref[0]->p >> 1; for(y = 0; y < h; y++) { for(x = 0; x < w; x++) { mmx_interpolate(planes, pitch, rounding); planes[0]+=8; planes[1]+=8; planes[2]+=8; planes[3]+=8; } /* correct borders */ planes[1][-1] = planes[0][-1]; planes[3][-1] = planes[2][-1]; planes[0] += pitch - (w<<3); planes[1] += pitch - (w<<3); planes[2] += pitch - (w<<3); planes[3] += pitch - (w<<3); } /* correct borders */ memcpy(planes[2] - pitch, planes[0]-pitch, w<<3); memcpy(planes[3] - pitch, planes[1]-pitch, w<<3); /* V component */ w = width >> 4; h = height >> 1; planes[0] = ref[0]->v; planes[1] = ref[1]->v; planes[2] = ref[2]->v; planes[3] = ref[3]->v; pitch = ref[0]->p >> 1; for(y = 0; y < h; y++) { for(x = 0; x < w; x++) { mmx_interpolate(planes, pitch, rounding); planes[0]+=8; planes[1]+=8; planes[2]+=8; planes[3]+=8; } /* correct borders */ planes[1][-1] = planes[0][-1]; planes[3][-1] = planes[2][-1]; planes[0] += pitch - (w<<3); planes[1] += pitch - (w<<3); planes[2] += pitch - (w<<3); planes[3] += pitch - (w<<3); } /* correct borders */ memcpy(planes[2] - pitch, planes[0]-pitch, w<<3); memcpy(planes[3] - pitch, planes[1]-pitch, w<<3); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -