📄 mcomp_sgl_mmx.cc

📁 PIXIL is a small footprint operating environment, complete with PDA PIM applications, a browser and
💻 CC
📖 第 1 页 / 共 2 页
字号:
12 下一页
/********************************************************************************    Copyright (C) 1999  Dirk Farin    This program is distributed under GNU Public License (GPL) as    outlined in the COPYING file that comes with the source distribution.    This program is free software; you can redistribute it and/or modify    it under the terms of the GNU General Public License as published by    the Free Software Foundation; either version 2 of the License, or    (at your option) any later version.    This program is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    GNU General Public License for more details.    You should have received a copy of the GNU General Public License    along with this program; if not, write to the Free Software    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA ********************************************************************************/#include "video12/modules/mcomp_sgl_mmx.hh"#include <iostream.h>#include <iomanip.h>static void InitMMXRegs(){  uint64 mask1 = 0x0101010101010101LL;  uint64 mask2 = 0xFEFEFEFEFEFEFEFELL;  __asm__    (     "movq %0,%%mm0\n\t"     "movq %1,%%mm7\n\t"     : : "m" (mask1), "m" (mask2)     );}// -------------------------- SINGLE PREDICTION ----------------------------------static void LumaFF(MotionCompensation_SglMB::MCData* mc){  const Pixel* sp;        Pixel* dp;  sp = mc->lastimg.y;  dp = mc->currimg.y;  for (int y=mc->blkheight;y>0;y--)    {      const uint64* spd = (const uint64*)sp;            uint64* dpd = (uint64*)dp;      dpd[ 0]= spd[ 0];      dpd[ 1]= spd[ 1];      dp += mc->bytesperline_lum;      sp += mc->bytesperline_lum;    }}static void LumaHF(MotionCompensation_SglMB::MCData* mc){  InitMMXRegs();  const Pixel* sp;        Pixel* dp;  sp = mc->lastimg.y;  dp = mc->currimg.y;  uint64 xshift = (((uint32)sp) & 7);  sp -= xshift;  xshift <<= 3;               // left bits to ignore  uint64 xshift2 = 64-xshift; // right bits to ignore  uint64 xshift3 = 64-8-xshift;  for (int y=mc->blkheight;y>0;y--)    {      __asm__        (         // 24 pixel aligned einlesen (mm1 mm2 mm3)         "movq   (%0),%%mm1\n\t"         "movq  8(%0),%%mm2\n\t"         "movq 16(%0),%%mm3\n\t"         "movq %%mm2,%%mm4\n\t"         "movq %%mm3,%%mm6\n\t"  // fuer spaeter         "psrlq %2,%%mm1\n\t"         "psrlq %2,%%mm2\n\t"         "psllq %3,%%mm4\n\t"         "psllq %3,%%mm3\n\t"         "por  %%mm4,%%mm1\n\t"         "por  %%mm3,%%mm2\n\t"         // 16 pixel sind nun in (mm1,mm2)         "movq %%mm1,%%mm3\n\t"         "movq %%mm2,%%mm4\n\t"         "movq %%mm2,%%mm5\n\t"         "psrlq $8,%%mm3\n\t"         "psrlq $8,%%mm4\n\t"         "psllq $56,%%mm5\n\t"         "psllq %4,%%mm6\n\t"         "por  %%mm5,%%mm3\n\t"         "por  %%mm6,%%mm4\n\t"         // weitere 16 pixel (Startposition 1 Pixel weiter rechts) sind nun in (mm3,mm4)         // Rounding berechnen (mm5,mm6)         "movq %%mm1,%%mm5\n\t"         "por  %%mm3,%%mm5\n\t"         "pand %%mm0,%%mm5\n\t"         "movq %%mm2,%%mm6\n\t"         "por  %%mm4,%%mm6\n\t"         "pand %%mm0,%%mm6\n\t"         // Pixelwerte addieren         "pand    %%mm7,%%mm1\n\t"         "pand    %%mm7,%%mm2\n\t"         "pand    %%mm7,%%mm3\n\t"         "pand    %%mm7,%%mm4\n\t"         "psrlq   $1,%%mm1\n\t"         "psrlq   $1,%%mm2\n\t"         "psrlq   $1,%%mm3\n\t"         "psrlq   $1,%%mm4\n\t"         "paddusb %%mm3,%%mm1\n\t"         "paddusb %%mm4,%%mm2\n\t"         // Rounding addieren         "paddusb %%mm5,%%mm1\n\t"         "paddusb %%mm6,%%mm2\n\t"         // Abspeichern         "movq %%mm1, (%1)\n\t"         "movq %%mm2,8(%1)\n\t"         : : "r"(sp),"r"(dp),"m"(xshift),"m"(xshift2),"m"(xshift3)         );      sp += mc->bytesperline_lum;      dp += mc->bytesperline_lum;    }}static void LumaFH(MotionCompensation_SglMB::MCData* mc){  const Pixel* sp;        Pixel* dp;  sp = mc->lastimg.y;  dp = mc->currimg.y;  for (int y=0;y<mc->blkheight;y++)    {      for (int x=0;x<16;x++)        {          *dp++ = (*sp + sp[mc->bytesperline_lum] + 1)>>1;          sp++;        }      sp += mc->bytesperline_lum-16;      dp += mc->bytesperline_lum-16;    }}static void LumaHH(MotionCompensation_SglMB::MCData* mc){  const Pixel* sp;        Pixel* dp;  sp = mc->lastimg.y;  dp = mc->currimg.y;  for (int y=0;y<mc->blkheight;y++)    {      int v1 = *sp + sp[mc->bytesperline_lum];      for (int x=0;x<8;x++)        {          sp++;          int v2 = *sp + sp[mc->bytesperline_lum];          *dp++ = (v1 + v2 +2)>>2;          sp++;          v1 = *sp + sp[mc->bytesperline_lum];          *dp++ = (v1 + v2 +2)>>2;        }      sp += mc->bytesperline_lum-16;      dp += mc->bytesperline_lum-16;    }}static void ChromaFF(MotionCompensation_SglMB::MCData* mc){  const Pixel* sp = mc->lastimg.cr;        Pixel* dp = mc->currimg.cr;  int h = mc->blkheight_chr;  for (int y=0;y<h;y++)    {      for (int x=0;x<8;x++)        {          *dp++ = *sp++;        }      sp += mc->bytesperline_chr-8;      dp += mc->bytesperline_chr-8;    }  sp = mc->lastimg.cb;  dp = mc->currimg.cb;  for (int y=0;y<h;y++)    {      for (int x=0;x<8;x++)        {          *dp++ = *sp++;        }      sp += mc->bytesperline_chr-8;      dp += mc->bytesperline_chr-8;    }}static void ChromaFH(MotionCompensation_SglMB::MCData* mc){  const Pixel* sp = mc->lastimg.cr;        Pixel* dp = mc->currimg.cr;  int h = mc->blkheight_chr;  for (int y=0;y<h;y++)    {      for (int x=0;x<8;x++)        {          *dp++ = (*sp + sp[mc->bytesperline_chr]+1)>>1; sp++;        }      sp += mc->bytesperline_chr-8;      dp += mc->bytesperline_chr-8;    }  sp = mc->lastimg.cb;  dp = mc->currimg.cb;  for (int y=0;y<h;y++)    {      for (int x=0;x<8;x++)        {          *dp++ = (*sp + sp[mc->bytesperline_chr]+1)>>1; sp++;        }      sp += mc->bytesperline_chr-8;      dp += mc->bytesperline_chr-8;    }}static void ChromaHF(MotionCompensation_SglMB::MCData* mc){  const Pixel* sp = mc->lastimg.cr;        Pixel* dp = mc->currimg.cr;  int h = mc->blkheight_chr;  for (int y=0;y<h;y++)    {      for (int x=0;x<8;x++)        {          *dp++ = (*sp + sp[1]+1)>>1; sp++;        }      sp += mc->bytesperline_chr-8;      dp += mc->bytesperline_chr-8;    }  sp = mc->lastimg.cb;  dp = mc->currimg.cb;  for (int y=0;y<h;y++)    {      for (int x=0;x<8;x++)        {          *dp++ = (*sp + sp[1]+1)>>1; sp++;        }      sp += mc->bytesperline_chr-8;      dp += mc->bytesperline_chr-8;    }}static void ChromaHH(MotionCompensation_SglMB::MCData* mc){  const Pixel* sp = mc->lastimg.cr;        Pixel* dp = mc->currimg.cr;  int h = mc->blkheight_chr;  for (int y=0;y<h;y++)    {      int v1 = *sp + sp[mc->bytesperline_chr];      for (int x=0;x<8;x++)        {          sp++;          int v2 = *sp + sp[mc->bytesperline_chr];          *dp++ = (v1 + v2+2)>>2;          v1 = v2;        }      sp += mc->bytesperline_chr-8;      dp += mc->bytesperline_chr-8;    }  sp = mc->lastimg.cb;  dp = mc->currimg.cb;  for (int y=0;y<h;y++)    {      int v1 = *sp + sp[mc->bytesperline_chr];      for (int x=0;x<8;x++)        {          sp++;          int v2 = *sp + sp[mc->bytesperline_chr];          *dp++ = (v1 + v2+2)>>2;          v1 = v2;        }      sp += mc->bytesperline_chr-8;      dp += mc->bytesperline_chr-8;    }}static void LumaFFadd(MotionCompensation_SglMB::MCData* mc){  InitMMXRegs();  const Pixel* sp;        Pixel* dp;  sp = mc->nextimg.y;  dp = mc->currimg.y;#if 1  uint64 xshift = ((uint32)sp) & 7;  if (xshift==0)    {      for (int y=mc->blkheight;y>0;y--)        {          // rp: mm1 mm4          // dp: mm2 mm5                            __asm__            (             "movq (%0),%%mm1\n\t"             "movq (%1),%%mm2\n\t"                                  " movq %%mm1,%%mm3\n\t"             "movq 8(%0),%%mm4\n\t"             " por  %%mm2,%%mm3\n\t" // mm3 = rp[0] | dp[0]             "movq 8(%1),%%mm5\n\t"             " pand %%mm0,%%mm3\n\t" // mm3 = LSB of (rp[0] | dp[0]) = rounding[0]                                  "movq %%mm4,%%mm6\n\t"             "por  %%mm5,%%mm6\n\t"             "pand %%mm0,%%mm6\n\t"  // mm6 = LSB of (rp[1] | dp[1]) = rounding[1]                                  "pand %%mm7,%%mm1\n\t"     // mask out LSB             "pand %%mm7,%%mm4\n\t"     // mask out LSB             "psrlq $1,%%mm1\n\t"             "pand %%mm7,%%mm2\n\t"     // mask out LSB             "psrlq $1,%%mm2\n\t"             "pand %%mm7,%%mm5\n\t"     // mask out LSB             "psrlq $1,%%mm4\n\t"             "paddusb  %%mm2, %%mm1\n\t"             "psrlq $1,%%mm5\n\t"             "paddusb  %%mm3, %%mm1\n\t"             "movq   %%mm1,(%1)\n\t"      // dp[0] = (dp[0]>>1 + rp[0]>>1) + rounding[0]                                      "paddusb  %%mm5, %%mm4\n\t"             "paddusb  %%mm6, %%mm4\n\t"             "movq   %%mm4,8(%1)\n\t"     // dp[1] = (dp[1]>>1 + rp[1]>>1) + rounding[1]                                  : : "r"(sp),"r"(dp)             );
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -