📄 dsp_mmx.c
字号:
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: mcomp.c,v 1.8 2003/12/03 08:59:41 arc Exp $
********************************************************************/
#include <stdlib.h>
#include "codec_internal.h"
#include "dsp.h"
#if 0
//These are to let me selectively enable the C versions, these are needed
#define DSP_OP_AVG(a,b) ((((int)(a)) + ((int)(b)))/2)
#define DSP_OP_DIFF(a,b) (((int)(a)) - ((int)(b)))
#define DSP_OP_ABS_DIFF(a,b) abs((((int)(a)) - ((int)(b))))
#endif
static const ogg_int64_t V128 = 0x0080008000800080LL;
static void sub8x8__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr,
ogg_int16_t *DctInputPtr, ogg_uint32_t PixelsPerLine,
ogg_uint32_t ReconPixelsPerLine)
{
//Make non-zero to use the C-version
#if 0
int i; /* For each block row */ for (i=8; i; i--) { DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], ReconPtr[0]); DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], ReconPtr[1]); DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], ReconPtr[2]); DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], ReconPtr[3]); DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], ReconPtr[4]); DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], ReconPtr[5]); DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], ReconPtr[6]); DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], ReconPtr[7]); /* Start next row */ FiltPtr += PixelsPerLine; ReconPtr += ReconPixelsPerLine; DctInputPtr += 8; }
#else
__asm {
align 16
pxor mm7, mm7
mov eax, FiltPtr
mov ebx, ReconPtr
mov edx, DctInputPtr
/* You can't use rept in inline masm and macro parsing seems screwed with inline asm*/
/* ITERATION 1 */
movq mm0, [eax] /* mm0 = FiltPtr */
movq mm1, [ebx] /* mm1 = ReconPtr */
movq mm2, mm0 /* dup to prepare for up conversion */
movq mm3, mm1 /* dup to prepare for up conversion */
/* convert from UINT8 to INT16 */
punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */
punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */
/* start calculation */
psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */
psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */
movq [edx], mm0 /* write answer out */
movq [8 + edx], mm2 /* write answer out */
/* Increment pointers */
add edx, 16
add eax, PixelsPerLine
add ebx, ReconPixelsPerLine
/* ITERATION 2 */
movq mm0, [eax] /* mm0 = FiltPtr */
movq mm1, [ebx] /* mm1 = ReconPtr */
movq mm2, mm0 /* dup to prepare for up conversion */
movq mm3, mm1 /* dup to prepare for up conversion */
/* convert from UINT8 to INT16 */
punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */
punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */
/* start calculation */
psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */
psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */
movq [edx], mm0 /* write answer out */
movq [8 + edx], mm2 /* write answer out */
/* Increment pointers */
add edx, 16
add eax, PixelsPerLine
add ebx, ReconPixelsPerLine
/* ITERATION 3 */
movq mm0, [eax] /* mm0 = FiltPtr */
movq mm1, [ebx] /* mm1 = ReconPtr */
movq mm2, mm0 /* dup to prepare for up conversion */
movq mm3, mm1 /* dup to prepare for up conversion */
/* convert from UINT8 to INT16 */
punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */
punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */
/* start calculation */
psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */
psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */
movq [edx], mm0 /* write answer out */
movq [8 + edx], mm2 /* write answer out */
/* Increment pointers */
add edx, 16
add eax, PixelsPerLine
add ebx, ReconPixelsPerLine
/* ITERATION 4 */
movq mm0, [eax] /* mm0 = FiltPtr */
movq mm1, [ebx] /* mm1 = ReconPtr */
movq mm2, mm0 /* dup to prepare for up conversion */
movq mm3, mm1 /* dup to prepare for up conversion */
/* convert from UINT8 to INT16 */
punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */
punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */
/* start calculation */
psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */
psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */
movq [edx], mm0 /* write answer out */
movq [8 + edx], mm2 /* write answer out */
/* Increment pointers */
add edx, 16
add eax, PixelsPerLine
add ebx, ReconPixelsPerLine
/* ITERATION 5 */
movq mm0, [eax] /* mm0 = FiltPtr */
movq mm1, [ebx] /* mm1 = ReconPtr */
movq mm2, mm0 /* dup to prepare for up conversion */
movq mm3, mm1 /* dup to prepare for up conversion */
/* convert from UINT8 to INT16 */
punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */
punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */
/* start calculation */
psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */
psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */
movq [edx], mm0 /* write answer out */
movq [8 + edx], mm2 /* write answer out */
/* Increment pointers */
add edx, 16
add eax, PixelsPerLine
add ebx, ReconPixelsPerLine
/* ITERATION 6 */
movq mm0, [eax] /* mm0 = FiltPtr */
movq mm1, [ebx] /* mm1 = ReconPtr */
movq mm2, mm0 /* dup to prepare for up conversion */
movq mm3, mm1 /* dup to prepare for up conversion */
/* convert from UINT8 to INT16 */
punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */
punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */
/* start calculation */
psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */
psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */
movq [edx], mm0 /* write answer out */
movq [8 + edx], mm2 /* write answer out */
/* Increment pointers */
add edx, 16
add eax, PixelsPerLine
add ebx, ReconPixelsPerLine
/* ITERATION 7 */
movq mm0, [eax] /* mm0 = FiltPtr */
movq mm1, [ebx] /* mm1 = ReconPtr */
movq mm2, mm0 /* dup to prepare for up conversion */
movq mm3, mm1 /* dup to prepare for up conversion */
/* convert from UINT8 to INT16 */
punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */
punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */
/* start calculation */
psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */
psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */
movq [edx], mm0 /* write answer out */
movq [8 + edx], mm2 /* write answer out */
/* Increment pointers */
add edx, 16
add eax, PixelsPerLine
add ebx, ReconPixelsPerLine
/* ITERATION 8 */
movq mm0, [eax] /* mm0 = FiltPtr */
movq mm1, [ebx] /* mm1 = ReconPtr */
movq mm2, mm0 /* dup to prepare for up conversion */
movq mm3, mm1 /* dup to prepare for up conversion */
/* convert from UINT8 to INT16 */
punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
punpcklbw mm1, mm7 /* mm1 = INT16(ReconPtr) */
punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
punpckhbw mm3, mm7 /* mm3 = INT16(ReconPtr) */
/* start calculation */
psubw mm0, mm1 /* mm0 = FiltPtr - ReconPtr */
psubw mm2, mm3 /* mm2 = FiltPtr - ReconPtr */
movq [edx], mm0 /* write answer out */
movq [8 + edx], mm2 /* write answer out */
/* Increment pointers */
add edx, 16
add eax, PixelsPerLine
add ebx, ReconPixelsPerLine
};
#endif
}
static void sub8x8_128__mmx (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr,
ogg_uint32_t PixelsPerLine)
{
#if 0
int i; /* For each block row */ for (i=8; i; i--) { /* INTRA mode so code raw image data */ /* We convert the data to 8 bit signed (by subtracting 128) as this reduces the internal precision requirments in the DCT transform. */ DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], 128); DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], 128); DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], 128); DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], 128); DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], 128); DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], 128); DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], 128); DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], 128); /* Start next row */ FiltPtr += PixelsPerLine; DctInputPtr += 8; }
#else
__asm {
align 16
pxor mm7, mm7
mov eax, FiltPtr
mov ebx, DctInputPtr
movq mm1, V128
/* ITERATION 1 */
movq mm0, [eax] /* mm0 = FiltPtr */
movq mm2, mm0 /* dup to prepare for up conversion */
/* convert from UINT8 to INT16 */
punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
/* start calculation */
psubw mm0, mm1 /* mm0 = FiltPtr - 128 */
psubw mm2, mm1 /* mm2 = FiltPtr - 128 */
movq [ebx], mm0 /* write answer out */
movq [8 + ebx], mm2 /* write answer out */
/* Increment pointers */
add ebx, 16
add eax, PixelsPerLine
/* ITERATION 2 */
movq mm0, [eax] /* mm0 = FiltPtr */
movq mm2, mm0 /* dup to prepare for up conversion */
/* convert from UINT8 to INT16 */
punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
/* start calculation */
psubw mm0, mm1 /* mm0 = FiltPtr - 128 */
psubw mm2, mm1 /* mm2 = FiltPtr - 128 */
movq [ebx], mm0 /* write answer out */
movq [8 + ebx], mm2 /* write answer out */
/* Increment pointers */
add ebx, 16
add eax, PixelsPerLine
/* ITERATION 3 */
movq mm0, [eax] /* mm0 = FiltPtr */
movq mm2, mm0 /* dup to prepare for up conversion */
/* convert from UINT8 to INT16 */
punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
/* start calculation */
psubw mm0, mm1 /* mm0 = FiltPtr - 128 */
psubw mm2, mm1 /* mm2 = FiltPtr - 128 */
movq [ebx], mm0 /* write answer out */
movq [8 + ebx], mm2 /* write answer out */
/* Increment pointers */
add ebx, 16
add eax, PixelsPerLine
/* ITERATION 4 */
movq mm0, [eax] /* mm0 = FiltPtr */
movq mm2, mm0 /* dup to prepare for up conversion */
/* convert from UINT8 to INT16 */
punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
/* start calculation */
psubw mm0, mm1 /* mm0 = FiltPtr - 128 */
psubw mm2, mm1 /* mm2 = FiltPtr - 128 */
movq [ebx], mm0 /* write answer out */
movq [8 + ebx], mm2 /* write answer out */
/* Increment pointers */
add ebx, 16
add eax, PixelsPerLine
/* ITERATION 5 */
movq mm0, [eax] /* mm0 = FiltPtr */
movq mm2, mm0 /* dup to prepare for up conversion */
/* convert from UINT8 to INT16 */
punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
/* start calculation */
psubw mm0, mm1 /* mm0 = FiltPtr - 128 */
psubw mm2, mm1 /* mm2 = FiltPtr - 128 */
movq [ebx], mm0 /* write answer out */
movq [8 + ebx], mm2 /* write answer out */
/* Increment pointers */
add ebx, 16
add eax, PixelsPerLine
/* ITERATION 6 */
movq mm0, [eax] /* mm0 = FiltPtr */
movq mm2, mm0 /* dup to prepare for up conversion */
/* convert from UINT8 to INT16 */
punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
/* start calculation */
psubw mm0, mm1 /* mm0 = FiltPtr - 128 */
psubw mm2, mm1 /* mm2 = FiltPtr - 128 */
movq [ebx], mm0 /* write answer out */
movq [8 + ebx], mm2 /* write answer out */
/* Increment pointers */
add ebx, 16
add eax, PixelsPerLine
/* ITERATION 7 */
movq mm0, [eax] /* mm0 = FiltPtr */
movq mm2, mm0 /* dup to prepare for up conversion */
/* convert from UINT8 to INT16 */
punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
/* start calculation */
psubw mm0, mm1 /* mm0 = FiltPtr - 128 */
psubw mm2, mm1 /* mm2 = FiltPtr - 128 */
movq [ebx], mm0 /* write answer out */
movq [8 + ebx], mm2 /* write answer out */
/* Increment pointers */
add ebx, 16
add eax, PixelsPerLine
/* ITERATION 8 */
movq mm0, [eax] /* mm0 = FiltPtr */
movq mm2, mm0 /* dup to prepare for up conversion */
/* convert from UINT8 to INT16 */
punpcklbw mm0, mm7 /* mm0 = INT16(FiltPtr) */
punpckhbw mm2, mm7 /* mm2 = INT16(FiltPtr) */
/* start calculation */
psubw mm0, mm1 /* mm0 = FiltPtr - 128 */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -