📄 generic_mmx.h
字号:
/* (c) Copyright 2001-2007 The DirectFB Organization (directfb.org) (c) Copyright 2000-2004 Convergence (integrated media) GmbH All rights reserved. Written by Denis Oliver Kropp <dok@directfb.org>, Andreas Hundt <andi@fischlustig.de>, Sven Neumann <neo@directfb.org>, Ville Syrjälä <syrjala@sci.fi> and Claudio Ciccani <klan@users.sf.net>. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.*/#define __aligned( n ) __attribute__ ((aligned((n))))static void SCacc_add_to_Dacc_MMX( GenefxState *gfxs ){ __asm__ __volatile__ ( " movq %2, %%mm0\n" ".align 16\n" "1:\n" " movq (%0), %%mm1\n" " paddw %%mm0, %%mm1\n" " movq %%mm1, (%0)\n" " add $8, %0\n" " dec %1\n" " jnz 1b\n" " emms" : /* no outputs */ : "D" (gfxs->Dacc), "c" (gfxs->length), "m" (gfxs->SCacc) : "%st", "memory");}static void Dacc_modulate_argb_MMX( GenefxState *gfxs ){ __asm__ __volatile__ ( "movq %2, %%mm0\n\t" ".align 16\n" "1:\n\t" "testw $0xF000, 6(%0)\n\t" "jnz 2f\n\t" "movq (%0), %%mm1\n\t" "pmullw %%mm0, %%mm1\n\t" "psrlw $8, %%mm1\n\t" "movq %%mm1, (%0)\n" ".align 16\n" "2:\n\t" "add $8, %0\n\t" "dec %1\n\t" "jnz 1b\n\t" "emms" : /* no outputs */ : "D" (gfxs->Dacc), "c" (gfxs->length), "m" (gfxs->Cacc) : "%st", "memory");}static void Sacc_add_to_Dacc_MMX( GenefxState *gfxs ){ __asm__ __volatile__ ( ".align 16\n" "1:\n\t" "movq (%2), %%mm0\n\t" "movq (%0), %%mm1\n\t" "paddw %%mm1, %%mm0\n\t" "movq %%mm0, (%0)\n\t" "add $8, %0\n\t" "add $8, %2\n\t" "dec %1\n\t" "jnz 1b\n\t" "emms" : /* no outputs */ : "D" (gfxs->Dacc), "c" (gfxs->length), "S" (gfxs->Sacc) : "%st", "memory");}static void Sacc_to_Aop_rgb16_MMX( GenefxState *gfxs ){ static const u32 preload[] = { 0xFF00FF00, 0x0000FF00 }; static const u32 mask[] = { 0x00FC00F8, 0x000000F8 }; static const u32 pm[] = { 0x01000004, 0x00000004 }; __asm__ __volatile__ ( "movq %3, %%mm7\n\t" "movq %4, %%mm5\n\t" "movq %5, %%mm4\n\t" ".align 16\n" "1:\n\t" "testw $0xF000, 6(%2)\n\t" "jnz 2f\n\t" "movq (%2), %%mm0\n\t" "paddusw %%mm7, %%mm0\n\t" "pand %%mm5, %%mm0\n\t" "pmaddwd %%mm4, %%mm0\n\t" "psrlq $5, %%mm0\n\t" "movq %%mm0, %%mm1\n\t" "psrlq $21, %%mm0\n\t" "por %%mm1, %%mm0\n\t" "movd %%mm0, %%eax\n\t" "movw %%ax, (%0)\n\t" ".align 16\n" "2:\n\t" "add $8, %2\n\t" "add $2, %0\n\t" "dec %1\n\t" "jnz 1b\n\t" "emms" : /* no outputs */ : "D" (gfxs->Aop[0]), "c" (gfxs->length), "S" (gfxs->Sacc), "m" (*preload), "m" (*mask), "m" (*pm) : "%eax", "%st", "memory");}static void Sacc_to_Aop_rgb32_MMX( GenefxState *gfxs ){ static const u32 preload[] = { 0xFF00FF00, 0x0000FF00 }; static const u32 postload[] = { 0x00FF00FF, 0x000000FF }; static const u32 pm[] = { 0x01000001, 0x00000001 }; __asm__ __volatile__ ( "movq %3, %%mm1\n\t" "movq %4, %%mm2\n\t" "movq %5, %%mm3\n\t" ".align 16\n" "1:\n\t" "testw $0xF000, 6(%2)\n\t" "jnz 2f\n\t" "movq (%2), %%mm0\n\t" "paddusw %%mm1, %%mm0\n\t" "pand %%mm2, %%mm0\n\t" "pmaddwd %%mm3, %%mm0\n\t" "movq %%mm0, %%mm4\n\t" "psrlq $16, %%mm0\n\t" "por %%mm0, %%mm4\n\t" "movd %%mm4, (%0)\n\t" ".align 16\n" "2:\n\t" "add $8, %2\n\t" "add $4, %0\n\t" "dec %1\n\t" "jnz 1b\n\t" "emms" : /* no outputs */ : "D" (gfxs->Aop[0]), "c" (gfxs->length), "S" (gfxs->Sacc), "m" (*preload), "m" (*postload), "m" (*pm) : "%st", "memory");}__attribute__((no_instrument_function))static void Sop_argb_Sto_Dacc_MMX( GenefxState *gfxs ){ static const u32 zeros[] = { 0, 0 }; int i = 0; __asm__ __volatile__ ( "movq %5, %%mm0\n\t" ".align 16\n" "1:\n\t" "movd (%3), %%mm1\n\t" "punpcklbw %%mm0, %%mm1\n\t" ".align 16\n" "2:\n\t" "movq %%mm1, (%1)\n\t" "dec %2\n\t" "jz 3f\n\t" "add $8, %1\n\t" "add %4, %0\n\t" "testl $0xFFFF0000, %0\n\t" "jz 2b\n\t" "movl %0, %%edx\n\t" "andl $0xFFFF0000, %%edx\n\t" "shrl $14, %%edx\n\t"#ifdef ARCH_X86_64 "addq %%rdx, %3\n\t"#else "addl %%edx, %3\n\t"#endif "andl $0xFFFF, %0\n\t" "jmp 1b\n" "3:\n\t" "emms" : "=r" (i) : "D" (gfxs->Dacc), "c" (gfxs->length), "S" (gfxs->Sop[0]), "a" (gfxs->SperD), "m" (*zeros), "0" (i) : "%edx", "%st", "memory");}static void Sop_argb_to_Dacc_MMX( GenefxState *gfxs ){ static const u32 zeros[] = { 0, 0 }; __asm__ __volatile__ ( "movq %3, %%mm0\n\t" ".align 16\n" "1:\n\t" "movd (%2), %%mm1\n\t" "punpcklbw %%mm0, %%mm1\n\t" "movq %%mm1, (%0)\n\t" "add $4, %2\n\t" "add $8, %0\n\t" "dec %1\n\t" "jnz 1b\n\t" "emms" : /* no outputs */ : "D" (gfxs->Dacc), "c" (gfxs->length), "S" (gfxs->Sop[0]), "m" (*zeros) : "%st", "memory");}static void Sop_rgb16_to_Dacc_MMX( GenefxState *gfxs ){ static const u32 mask[] = { 0x07E0001F, 0x0000F800 }; static const u32 smul[] = { 0x00200800, 0x00000001 }; static const u32 alpha[] = { 0x00000000, 0x00FF0000 }; __asm__ __volatile__ ( "movq %3, %%mm4\n\t" "movq %4, %%mm5\n\t" "movq %5, %%mm7\n\t" ".align 16\n" "1:\n\t" "movq (%2), %%mm0\n\t" /* 1. Konvertierung nach 24 bit interleaved */ "movq %%mm0, %%mm3\n\t" "punpcklwd %%mm3, %%mm3\n\t" "punpckldq %%mm3, %%mm3\n\t" "pand %%mm4, %%mm3\n\t" "pmullw %%mm5, %%mm3\n\t" "psrlw $8, %%mm3\n\t" /* mm3 enthaelt jetzt: 0000 00rr 00gg 00bb des alten pixels */ "por %%mm7, %%mm3\n\t" "movq %%mm3, (%0)\n\t" "dec %1\n\t" "jz 2f\n\t" "psrlq $16, %%mm0\n\t" "add $8, %0\n\t" /* 2. Konvertierung nach 24 bit interleaved */ "movq %%mm0, %%mm3\n\t" "punpcklwd %%mm3, %%mm3\n\t" "punpckldq %%mm3, %%mm3\n\t" "pand %%mm4, %%mm3\n\t" "pmullw %%mm5, %%mm3\n\t" "psrlw $8, %%mm3\n\t" /* mm3 enthaelt jetzt: 0000 00rr 00gg 00bb des alten pixels */ "por %%mm7, %%mm3\n\t" "movq %%mm3, (%0)\n\t" "dec %1\n\t" "jz 2f\n\t" "psrlq $16, %%mm0\n\t" "add $8, %0\n\t" /* 3. Konvertierung nach 24 bit interleaved */ "movq %%mm0, %%mm3\n\t" "punpcklwd %%mm3, %%mm3\n\t" "punpckldq %%mm3, %%mm3\n\t" "pand %%mm4, %%mm3\n\t" "pmullw %%mm5, %%mm3\n\t" "psrlw $8, %%mm3\n\t" /* mm3 enthaelt jetzt: 0000 00rr 00gg 00bb des alten pixels */ "por %%mm7, %%mm3\n\t" "movq %%mm3, (%0)\n\t" "dec %1\n\t" "jz 2f\n\t" "psrlq $16, %%mm0\n\t" "add $8, %0\n\t" /* 4. Konvertierung nach 24 bit interleaved */ "movq %%mm0, %%mm3\n\t" "punpcklwd %%mm3, %%mm3\n\t" "punpckldq %%mm3, %%mm3\n\t" "pand %%mm4, %%mm3\n\t" "pmullw %%mm5, %%mm3\n\t" "psrlw $8, %%mm3\n\t" /* mm3 enthaelt jetzt: 0000 00rr 00gg 00bb des alten pixels */ "por %%mm7, %%mm3\n\t" "movq %%mm3, (%0)\n\t" "dec %1\n\t" "jz 2f\n\t" "add $8, %0\n\t" "add $8, %2\n\t" "jmp 1b\n" "2:\n\t" "emms" : /* no outputs */ : "D" (gfxs->Dacc), "c" (gfxs->length), "S" (gfxs->Sop[0]), "m" (*mask), "m" (*smul), "m" (*alpha) : "%st", "memory");}static void Sop_rgb32_to_Dacc_MMX( GenefxState *gfxs ){ static const u32 alpha[] = { 0, 0x00FF0000 }; static const u32 zeros[] = { 0, 0 }; __asm__ __volatile__ ( "movq %3, %%mm7\n\t" "movq %4, %%mm6\n\t" ".align 16\n" "1:\n\t" "movd (%2), %%mm0\n\t" "punpcklbw %%mm6, %%mm0\n\t" "por %%mm7, %%mm0\n\t" "movq %%mm0, (%0)\n\t" "add $4, %2\n\t" "add $8, %0\n\t" "dec %1\n\t" "jnz 1b\n\t" "emms" : /* no outputs */ : "D" (gfxs->Dacc), "c" (gfxs->length), "S" (gfxs->Sop[0]), "m" (*alpha), "m" (*zeros) : "%st", "memory");}static void Xacc_blend_invsrcalpha_MMX( GenefxState *gfxs ){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -