dsputil_mmx.c

来自「Trolltech公司发布的图形界面操作系统。可在qt-embedded-2.3」· C语言代码 · 共 1,736 行 · 第 1/5 页
1,736 行
/* * MMX optimized DSP utils * Copyright (c) 2000, 2001 Fabrice Bellard. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * * MMX optimization by Nick Kurshev <nickols_k@mail.ru> */#include "../dsputil.h"#include "../simple_idct.h"int mm_flags; /* multimedia extension flags *//* pixel operations */static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL;static const uint64_t mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;static const uint64_t mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002ULL;static const uint64_t ff_pw_20 __attribute__ ((aligned(8))) = 0x0014001400140014ULL;static const uint64_t ff_pw_3  __attribute__ ((aligned(8))) = 0x0003000300030003ULL;static const uint64_t ff_pw_16 __attribute__ ((aligned(8))) = 0x0010001000100010ULL;static const uint64_t ff_pw_15 __attribute__ ((aligned(8))) = 0x000F000F000F000FULL;#define JUMPALIGN() __asm __volatile (".balign 8"::)#define MOVQ_ZERO(regd)  __asm __volatile ("pxor %%" #regd ", %%" #regd ::)#define MOVQ_WONE(regd) \    __asm __volatile ( \    "pcmpeqd %%" #regd ", %%" #regd " \n\t" \    "psrlw $15, %%" #regd ::)#define MOVQ_BFE(regd) \    __asm __volatile ( \    "pcmpeqd %%" #regd ", %%" #regd " \n\t"\    "paddb %%" #regd ", %%" #regd " \n\t" ::)#ifndef PIC#define MOVQ_BONE(regd)  __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_bone))#define MOVQ_WTWO(regd)  __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_wtwo))#else// for shared library it's better to use this way for accessing constants// pcmpeqd -> -1#define MOVQ_BONE(regd) \    __asm __volatile ( \    "pcmpeqd %%" #regd ", %%" #regd " \n\t" \    "psrlw $15, %%" #regd " \n\t" \    "packuswb %%" #regd ", %%" #regd " \n\t" ::)#define MOVQ_WTWO(regd) \    __asm __volatile ( \    "pcmpeqd %%" #regd ", %%" #regd " \n\t" \    "psrlw $15, %%" #regd " \n\t" \    "psllw $1, %%" #regd " \n\t"::)#endif// using regr as temporary and for the output result// first argument is unmodifed and second is trashed// regfe is supposed to contain 0xfefefefefefefefe#define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \    "movq " #rega ", " #regr "	\n\t"\    "pand " #regb ", " #regr "	\n\t"\    "pxor " #rega ", " #regb "	\n\t"\    "pand " #regfe "," #regb "	\n\t"\    "psrlq $1, " #regb " 	\n\t"\    "paddb " #regb ", " #regr "	\n\t"#define PAVGB_MMX(rega, regb, regr, regfe) \    "movq " #rega ", " #regr "	\n\t"\    "por  " #regb ", " #regr "	\n\t"\    "pxor " #rega ", " #regb "	\n\t"\    "pand " #regfe "," #regb "	\n\t"\    "psrlq $1, " #regb "	\n\t"\    "psubb " #regb ", " #regr "	\n\t"// mm6 is supposed to contain 0xfefefefefefefefe#define PAVGBP_MMX_NO_RND(rega, regb, regr,  regc, regd, regp) \    "movq " #rega ", " #regr "	\n\t"\    "movq " #regc ", " #regp "	\n\t"\    "pand " #regb ", " #regr "	\n\t"\    "pand " #regd ", " #regp "	\n\t"\    "pxor " #rega ", " #regb "	\n\t"\    "pxor " #regc ", " #regd "	\n\t"\    "pand %%mm6, " #regb "	\n\t"\    "pand %%mm6, " #regd "	\n\t"\    "psrlq $1, " #regb " 	\n\t"\    "psrlq $1, " #regd " 	\n\t"\    "paddb " #regb ", " #regr "	\n\t"\    "paddb " #regd ", " #regp "	\n\t"#define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \    "movq " #rega ", " #regr "	\n\t"\    "movq " #regc ", " #regp "	\n\t"\    "por  " #regb ", " #regr "	\n\t"\    "por  " #regd ", " #regp "	\n\t"\    "pxor " #rega ", " #regb "	\n\t"\    "pxor " #regc ", " #regd "	\n\t"\    "pand %%mm6, " #regb "     	\n\t"\    "pand %%mm6, " #regd "     	\n\t"\    "psrlq $1, " #regd "	\n\t"\    "psrlq $1, " #regb "	\n\t"\    "psubb " #regb ", " #regr "	\n\t"\    "psubb " #regd ", " #regp "	\n\t"/***********************************//* MMX no rounding */#define DEF(x, y) x ## _no_rnd_ ## y ##_mmx#define SET_RND  MOVQ_WONE#define PAVGBP(a, b, c, d, e, f)	PAVGBP_MMX_NO_RND(a, b, c, d, e, f)#define PAVGB(a, b, c, e)		PAVGB_MMX_NO_RND(a, b, c, e)#include "dsputil_mmx_rnd.h"#undef DEF#undef SET_RND#undef PAVGBP#undef PAVGB/***********************************//* MMX rounding */#define DEF(x, y) x ## _ ## y ##_mmx#define SET_RND  MOVQ_WTWO#define PAVGBP(a, b, c, d, e, f)	PAVGBP_MMX(a, b, c, d, e, f)#define PAVGB(a, b, c, e)		PAVGB_MMX(a, b, c, e)#include "dsputil_mmx_rnd.h"#undef DEF#undef SET_RND#undef PAVGBP#undef PAVGB/***********************************//* 3Dnow specific */#define DEF(x) x ## _3dnow/* for Athlons PAVGUSB is prefered */#define PAVGB "pavgusb"#include "dsputil_mmx_avg.h"#undef DEF#undef PAVGB/***********************************//* MMX2 specific */#define DEF(x) x ## _mmx2/* Introduced only in MMX2 set */#define PAVGB "pavgb"#include "dsputil_mmx_avg.h"#undef DEF#undef PAVGB/***********************************//* standard MMX */#ifdef CONFIG_ENCODERSstatic void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size){    asm volatile(        "movl $-128, %%eax	\n\t"        "pxor %%mm7, %%mm7	\n\t"        ".balign 16		\n\t"        "1:			\n\t"        "movq (%0), %%mm0	\n\t"        "movq (%0, %2), %%mm2	\n\t"        "movq %%mm0, %%mm1	\n\t"        "movq %%mm2, %%mm3	\n\t"        "punpcklbw %%mm7, %%mm0	\n\t"        "punpckhbw %%mm7, %%mm1	\n\t"        "punpcklbw %%mm7, %%mm2	\n\t"        "punpckhbw %%mm7, %%mm3	\n\t"        "movq %%mm0, (%1, %%eax)\n\t"        "movq %%mm1, 8(%1, %%eax)\n\t"        "movq %%mm2, 16(%1, %%eax)\n\t"        "movq %%mm3, 24(%1, %%eax)\n\t"        "addl %3, %0		\n\t"        "addl $32, %%eax	\n\t"        "js 1b			\n\t"        : "+r" (pixels)        : "r" (block+64), "r" (line_size), "r" (line_size*2)        : "%eax"    );}static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride){    asm volatile(        "pxor %%mm7, %%mm7	\n\t"        "movl $-128, %%eax	\n\t"        ".balign 16		\n\t"        "1:			\n\t"        "movq (%0), %%mm0	\n\t"        "movq (%1), %%mm2	\n\t"        "movq %%mm0, %%mm1	\n\t"        "movq %%mm2, %%mm3	\n\t"        "punpcklbw %%mm7, %%mm0	\n\t"        "punpckhbw %%mm7, %%mm1	\n\t"        "punpcklbw %%mm7, %%mm2	\n\t"        "punpckhbw %%mm7, %%mm3	\n\t"        "psubw %%mm2, %%mm0	\n\t"        "psubw %%mm3, %%mm1	\n\t"        "movq %%mm0, (%2, %%eax)\n\t"        "movq %%mm1, 8(%2, %%eax)\n\t"        "addl %3, %0		\n\t"        "addl %3, %1		\n\t"        "addl $16, %%eax	\n\t"        "jnz 1b			\n\t"        : "+r" (s1), "+r" (s2)        : "r" (block+64), "r" (stride)        : "%eax"    );}#endif //CONFIG_ENCODERSvoid put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size){    const DCTELEM *p;    uint8_t *pix;    /* read the pixels */    p = block;    pix = pixels;    /* unrolled loop */	__asm __volatile(		"movq	%3, %%mm0\n\t"		"movq	8%3, %%mm1\n\t"		"movq	16%3, %%mm2\n\t"		"movq	24%3, %%mm3\n\t"		"movq	32%3, %%mm4\n\t"		"movq	40%3, %%mm5\n\t"		"movq	48%3, %%mm6\n\t"		"movq	56%3, %%mm7\n\t"		"packuswb %%mm1, %%mm0\n\t"		"packuswb %%mm3, %%mm2\n\t"		"packuswb %%mm5, %%mm4\n\t"		"packuswb %%mm7, %%mm6\n\t"		"movq	%%mm0, (%0)\n\t"		"movq	%%mm2, (%0, %1)\n\t"		"movq	%%mm4, (%0, %1, 2)\n\t"		"movq	%%mm6, (%0, %2)\n\t"		::"r" (pix), "r" (line_size), "r" (line_size*3), "m"(*p)		:"memory");        pix += line_size*4;        p += 32;    // if here would be an exact copy of the code above    // compiler would generate some very strange code    // thus using "r"    __asm __volatile(	    "movq	(%3), %%mm0\n\t"	    "movq	8(%3), %%mm1\n\t"	    "movq	16(%3), %%mm2\n\t"	    "movq	24(%3), %%mm3\n\t"	    "movq	32(%3), %%mm4\n\t"	    "movq	40(%3), %%mm5\n\t"	    "movq	48(%3), %%mm6\n\t"	    "movq	56(%3), %%mm7\n\t"	    "packuswb %%mm1, %%mm0\n\t"	    "packuswb %%mm3, %%mm2\n\t"	    "packuswb %%mm5, %%mm4\n\t"	    "packuswb %%mm7, %%mm6\n\t"	    "movq	%%mm0, (%0)\n\t"	    "movq	%%mm2, (%0, %1)\n\t"	    "movq	%%mm4, (%0, %1, 2)\n\t"	    "movq	%%mm6, (%0, %2)\n\t"	    ::"r" (pix), "r" (line_size), "r" (line_size*3), "r"(p)	    :"memory");}void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size){    const DCTELEM *p;    uint8_t *pix;    int i;    /* read the pixels */    p = block;    pix = pixels;    MOVQ_ZERO(mm7);    i = 4;    do {	__asm __volatile(		"movq	(%2), %%mm0\n\t"		"movq	8(%2), %%mm1\n\t"		"movq	16(%2), %%mm2\n\t"		"movq	24(%2), %%mm3\n\t"		"movq	%0, %%mm4\n\t"		"movq	%1, %%mm6\n\t"		"movq	%%mm4, %%mm5\n\t"		"punpcklbw %%mm7, %%mm4\n\t"		"punpckhbw %%mm7, %%mm5\n\t"		"paddsw	%%mm4, %%mm0\n\t"		"paddsw	%%mm5, %%mm1\n\t"		"movq	%%mm6, %%mm5\n\t"		"punpcklbw %%mm7, %%mm6\n\t"		"punpckhbw %%mm7, %%mm5\n\t"		"paddsw	%%mm6, %%mm2\n\t"		"paddsw	%%mm5, %%mm3\n\t"		"packuswb %%mm1, %%mm0\n\t"		"packuswb %%mm3, %%mm2\n\t"		"movq	%%mm0, %0\n\t"		"movq	%%mm2, %1\n\t"		:"+m"(*pix), "+m"(*(pix+line_size))		:"r"(p)		:"memory");        pix += line_size*2;        p += 16;    } while (--i);}static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h){    __asm __volatile(	 "lea (%3, %3), %%eax		\n\t"	 ".balign 8			\n\t"	 "1:				\n\t"	 "movq (%1), %%mm0		\n\t"	 "movq (%1, %3), %%mm1		\n\t"     	 "movq %%mm0, (%2)		\n\t"	 "movq %%mm1, (%2, %3)		\n\t"	 "addl %%eax, %1		\n\t"         "addl %%eax, %2       		\n\t"	 "movq (%1), %%mm0		\n\t"	 "movq (%1, %3), %%mm1		\n\t"	 "movq %%mm0, (%2)		\n\t"	 "movq %%mm1, (%2, %3)		\n\t"	 "addl %%eax, %1		\n\t"	 "addl %%eax, %2       		\n\t"	 "subl $4, %0			\n\t"	 "jnz 1b			\n\t"
dsputil_mmx.c - 源码说明

本页面展示了「Trolltech公司发布的图形界面操作系统。可在qt-embedded-2.3.10平台上编译为嵌入式图形界面操作系统。」中的 dsputil_mmx.c 源码文件，采用 C语言编程语言编写，共 1,736 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与qt-embedded相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?