📄 altivec_mlib.c

📁 基于linux的DVD播放器程序
💻 C
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/* Ogle - A video player * Copyright (C) 2001, Charles M. Hannum <root@ihack.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA *//* * AltiVec support written by Charles M. Hannum <root@ihack.net>, except * for the core IDCT routine published by Motorola. * * Notes: * 1) All AltiVec loads and stores are aligned.  Conveniently, the output *    area for the IDCT and motion comp functions is always aligned. *    However, the reference area is not; therefore, we check its *    alignment and use lvsl/vperm as necessary to align the reference *    data as it's loaded. * 2) Unfortunately, AltiVec doesn't do 8-byte loads and stores.  This *    means that the fastest paths are only applicable to the Y channel. *    For 8-byte operations on the U and V channels, there are two cases. *    When the alignment of the input and output are the same, we can do *    16-byte loads and just do two 4-byte stores.  For the unmatched *    alignment case, we have to do a rotation of the loaded data first. * 3) The `i[0-7]' variables look silly, but they prevent GCC from *    generating gratuitous multiplies, and allow the loaded constants *    to be recycled several times in the IDCT routine. * 4) The use of "b" constraints is *very* important.  Using r0 in any *    of the AltiVec load/store instructions is equivalent to a constant *    0. */#include <inttypes.h>#if 0#define	ASSERT(x)	if (!(x)) abort()#else#define	ASSERT(x)#endifvoidmlib_Init(void){	asm("mtspr	0x100,%0" : : "b" (-1));}static inline voidmlib_VideoInterpAveXY_U8_U8(uint8_t *curr_block,                             const uint8_t *ref_block,                             const int width, const int height,                            int32_t frame_stride,                               int32_t field_stride) {	int x, y;	const uint8_t *ref_block_next = ref_block + field_stride;	for (y = 0; y < height; y++) {		for (x = 0; x < width; x++)			curr_block[x] =			    (curr_block[x] + 			     ((ref_block[x] + ref_block_next[x] +			       ref_block[x+1] + ref_block_next[x+1] + 2) >> 2) + 1) >> 1;		curr_block     += frame_stride;		ref_block      += frame_stride;		ref_block_next += frame_stride;	}}static inline voidmlib_VideoInterpXY_U8_U8(uint8_t *curr_block, 			 const uint8_t *ref_block, 			 const int width, const int height,			 int32_t frame_stride,   			 int32_t field_stride) {	int x, y;	const uint8_t *ref_block_next = ref_block + field_stride;	for (y = 0; y < height; y++) {		for (x = 0; x < width; x++)			curr_block[x] =			    (ref_block[x] + ref_block_next[x] +			     ref_block[x+1] + ref_block_next[x+1] + 2) >> 2;		curr_block     += frame_stride;		ref_block      += frame_stride;		ref_block_next += frame_stride;	}}voidmlib_VideoCopyRefAve_U8_U8_16x16(uint8_t *curr_block,                                 const uint8_t *ref_block,                                 int32_t stride){	ASSERT(((int)curr_block & 15) == 0);	if (((int)ref_block & 15) != 0) {		int i0 = 0, i1 = 16;		asm(""			"lvsl 3,%0,%1\n"                    "" : : "b" (ref_block), "b" (i0));		asm(""			"lvx 0,%1,%2\n"			"lvx 1,%1,%3\n"			"lvx 2,%0,%2\n"			"vperm 0,0,1,3\n"			"vavgub 0,0,2\n"			"stvx 0,%0,%2\n"                    "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1));		i0 += stride, i1 += stride;		asm(""                    "lvx 0,%1,%2\n"                    "lvx 1,%1,%3\n"                    "lvx 2,%0,%2\n"                    "vperm 0,0,1,3\n"                    "vavgub 0,0,2\n"                    "stvx 0,%0,%2\n"                    "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1));		i0 += stride, i1 += stride;		asm(""			"lvx 0,%1,%2\n"			"lvx 1,%1,%3\n" 			"lvx 2,%0,%2\n"			"vperm 0,0,1,3\n"			"vavgub 0,0,2\n"			"stvx 0,%0,%2\n"                    "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1));		i0 += stride, i1 += stride;		asm(""			"lvx 0,%1,%2\n"			"lvx 1,%1,%3\n"			"lvx 2,%0,%2\n"			"vperm 0,0,1,3\n"			"vavgub 0,0,2\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1));		i0 += stride, i1 += stride;		asm(""			"lvx 0,%1,%2\n"			"lvx 1,%1,%3\n"			"lvx 2,%0,%2\n"			"vperm 0,0,1,3\n"			"vavgub 0,0,2\n"			"stvx 0,%0,%2 \n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1));		i0 += stride, i1 += stride;		asm(""			"lvx 0,%1,%2\n"			"lvx 1,%1,%3\n"			"lvx 2,%0,%2\n"			"vperm 0,0,1,3\n"			"vavgub 0,0,2\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1));		i0 += stride, i1 += stride;		asm(""			"lvx 0,%1,%2\n"			"lvx 1,%1,%3\n"			"lvx 2,%0,%2\n"			"vperm 0,0,1,3\n"			"vavgub 0,0,2\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1));		i0 += stride, i1 += stride;		asm(""			"lvx 0,%1,%2\n"			"lvx 1,%1,%3\n"			"lvx 2,%0,%2\n"			"vperm 0,0,1,3\n"			"vavgub 0,0,2\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1));		i0 += stride, i1 += stride;		asm(""			"lvx 0,%1,%2\n"			"lvx 1,%1,%3\n"			"lvx 2,%0,%2\n"			"vperm 0,0,1,3\n"			"vavgub 0,0,2\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1));		i0 += stride, i1 += stride;		asm(""			"lvx 0,%1,%2\n"			"lvx 1,%1,%3\n"			"lvx 2,%0,%2\n"			"vperm 0,0,1,3\n"			"vavgub 0,0,2\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1));		i0 += stride, i1 += stride;		asm(""			"lvx 0,%1,%2\n"			"lvx 1,%1,%3\n"			"lvx 2,%0,%2\n"			"vperm 0,0,1,3\n"			"vavgub 0,0,2\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1));		i0 += stride, i1 += stride;		asm(""			"lvx 0,%1,%2\n"			"lvx 1,%1,%3\n"			"lvx 2,%0,%2\n"			"vperm 0,0,1,3\n"			"vavgub 0,0,2\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1));		i0 += stride, i1 += stride;		asm(""			"lvx 0,%1,%2\n"			"lvx 1,%1,%3\n"			"lvx 2,%0,%2\n"			"vperm 0,0,1,3\n"			"vavgub 0,0,2\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1));		i0 += stride, i1 += stride;		asm(""			"lvx 0,%1,%2\n"			"lvx 1,%1,%3\n"			"lvx 2,%0,%2\n"			"vperm 0,0,1,3\n"			"vavgub 0,0,2\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1));		i0 += stride, i1 += stride;		asm(""			"lvx 0,%1,%2\n"			"lvx 1,%1,%3\n"			"lvx 2,%0,%2\n"			"vperm 0,0,1,3\n"			"vavgub 0,0,2\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1));		i0 += stride, i1 += stride;		asm(""			"lvx 0,%1,%2\n"			"lvx 1,%1,%3\n"			"lvx 2,%0,%2\n"			"vperm 0,0,1,3\n"			"vavgub 0,0,2\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1));	} else {		int i0 = 0;		asm(""			"lvx 0,%0,%2\n"			"lvx 1,%1,%2\n"			"vavgub 0,0,1\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0));		i0 += stride;		asm(""			"lvx 0,%0,%2\n"			"lvx 1,%1,%2\n"			"vavgub 0,0,1\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0));		i0 += stride;		asm(""			"lvx 0,%0,%2\n"			"lvx 1,%1,%2\n"			"vavgub 0,0,1\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0));		i0 += stride;		asm(""			"lvx 0,%0,%2\n"			"lvx 1,%1,%2\n"			"vavgub 0,0,1\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0));		i0 += stride;		asm(""			"lvx 0,%0,%2\n"			"lvx 1,%1,%2\n"			"vavgub 0,0,1\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0));		i0 += stride;		asm(""			"lvx 0,%0,%2\n"			"lvx 1,%1,%2\n"			"vavgub 0,0,1\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0));		i0 += stride;		asm(""			"lvx 0,%0,%2\n"			"lvx 1,%1,%2\n"			"vavgub 0,0,1\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0));		i0 += stride;		asm(""			"lvx 0,%0,%2\n"			"lvx 1,%1,%2\n"			"vavgub 0,0,1\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0));		i0 += stride;		asm(""			"lvx 0,%0,%2\n"			"lvx 1,%1,%2\n"			"vavgub 0,0,1\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0));		i0 += stride;		asm(""			"lvx 0,%0,%2\n"			"lvx 1,%1,%2\n"			"vavgub 0,0,1\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0));		i0 += stride;		asm(""			"lvx 0,%0,%2\n"			"lvx 1,%1,%2\n"			"vavgub 0,0,1\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0));		i0 += stride;		asm(""			"lvx 0,%0,%2\n"			"lvx 1,%1,%2\n"			"vavgub 0,0,1\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0));		i0 += stride;		asm(""			"lvx 0,%0,%2\n"			"lvx 1,%1,%2\n"			"vavgub 0,0,1\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0));		i0 += stride;		asm(""			"lvx 0,%0,%2\n"			"lvx 1,%1,%2\n"			"vavgub 0,0,1\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0));		i0 += stride;		asm(""			"lvx 0,%0,%2\n"			"lvx 1,%1,%2\n"			"vavgub 0,0,1\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0));		i0 += stride;		asm(""			"lvx 0,%0,%2\n"			"lvx 1,%1,%2\n"			"vavgub 0,0,1\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0));	}}void mlib_VideoCopyRefAve_U8_U8_16x8(uint8_t *curr_block,				const uint8_t *ref_block,				int32_t stride){	ASSERT(((int)curr_block & 15) == 0);	if (((int)ref_block & 15) != 0) {		int i0 = 0, i1 = 16;		asm(""			"lvsl 3,%0,%1\n"		"" : : "b" (ref_block), "b" (i0));		asm(""			"lvx 0,%1,%2\n"			"lvx 1,%1,%3\n"			"lvx 2,%0,%2\n"			"vperm 0,0,1,3\n"			"vavgub 0,0,2\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1));		i0 += stride, i1 += stride;		asm(""			"lvx 0,%1,%2\n"			"lvx 1,%1,%3\n"			"lvx 2,%0,%2\n"			"vperm 0,0,1,3\n"			"vavgub 0,0,2\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1));		i0 += stride, i1 += stride;		asm(""			"lvx 0,%1,%2\n"			"lvx 1,%1,%3\n"			"lvx 2,%0,%2\n"			"vperm 0,0,1,3\n"			"vavgub 0,0,2\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1));		i0 += stride, i1 += stride;		asm(""			"lvx 0,%1,%2\n"			"lvx 1,%1,%3\n"			"lvx 2,%0,%2\n"			"vperm 0,0,1,3\n"			"vavgub 0,0,2\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1));		i0 += stride, i1 += stride;		asm(""			"lvx 0,%1,%2\n"			"lvx 1,%1,%3\n"			"lvx 2,%0,%2\n"			"vperm 0,0,1,3\n"			"vavgub 0,0,2\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1));		i0 += stride, i1 += stride;		asm(""			"lvx 0,%1,%2\n"			"lvx 1,%1,%3\n"			"lvx 2,%0,%2\n"			"vperm 0,0,1,3\n"			"vavgub 0,0,2\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1));		i0 += stride, i1 += stride;		asm(""			"lvx 0,%1,%2\n"			"lvx 1,%1,%3\n"			"lvx 2,%0,%2\n"			"vperm 0,0,1,3\n"			"vavgub 0,0,2\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1));		i0 += stride, i1 += stride;		asm(""			"lvx 0,%1,%2\n"			"lvx 1,%1,%3\n"			"lvx 2,%0,%2\n"			"vperm 0,0,1,3\n"			"vavgub 0,0,2\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1));	} else {		int i0 = 0;		asm(""			"lvx 0,%0,%2\n"			"lvx 1,%1,%2\n"			"vavgub 0,0,1\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0));		i0 += stride;		asm(""			"lvx 0,%0,%2\n"			"lvx 1,%1,%2\n"			"vavgub 0,0,1\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0));		i0 += stride;		asm(""			"lvx 0,%0,%2\n"			"lvx 1,%1,%2\n"			"vavgub 0,0,1\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0));		i0 += stride;		asm(""			"lvx 0,%0,%2\n"			"lvx 1,%1,%2\n"			"vavgub 0,0,1\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0));		i0 += stride;		asm(""			"lvx 0,%0,%2\n"			"lvx 1,%1,%2\n"			"vavgub 0,0,1\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0));		i0 += stride;		asm(""			"lvx 0,%0,%2\n"			"lvx 1,%1,%2\n"			"vavgub 0,0,1\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0));		i0 += stride;		asm(""			"lvx 0,%0,%2\n"			"lvx 1,%1,%2\n"			"vavgub 0,0,1\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0));		i0 += stride;		asm(""			"lvx 0,%0,%2\n"			"lvx 1,%1,%2\n"			"vavgub 0,0,1\n"			"stvx 0,%0,%2\n"		"" : : "b" (curr_block), "b" (ref_block), "b" (i0));	}}void mlib_VideoCopyRefAve_U8_U8_8x8(uint8_t *curr_block,			       const uint8_t *ref_block,			       int32_t stride){	ASSERT(((int)curr_block & 7) == 0);	if ((((int)ref_block ^ (int)curr_block) & 15) != 0) {		const int i0 = 0, i1 = 16, i2 = 4;		asm(""			"lvsl 3,%1,%2\n"			"lvsl 4,%1,%3\n"			"lvsr 5,%0,%2\n"			"lvsr 6,%0,%3\n"			"vperm 3,3,3,5\n"			"vperm 4,4,4,6\n"		"" : : "b" (curr_block), "b" (ref_block),		      "b" (i0), "b" (i0 + stride));		asm(""			"lvx 0,%1,%2\n"			"lvx 1,%1,%3\n"			"lvx 2,%0,%2\n"			"vperm 0,0,1,3\n"			"vavgub 0,0,2\n"			"stvewx 0,%0,%2\n"			"stvewx 0,%0,%4\n"		"" : : "b" (curr_block), "b" (ref_block),		      "b" (i0), "b" (i1), "b" (i2));		curr_block += stride, ref_block += stride;		asm(""			"lvx 0,%1,%2\n"
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -