📄 deblock.c
字号:
/****************************************************************************** deblock.c: Altivec-accelerated deblocking for h264 encoder****************************************************************************** Copyright (C) 2007-2008 Guillaume Poirier <gpoirier@mplayerhq.hu>** This program is free software; you can redistribute it and/or modify* it under the terms of the GNU General Public License as published by* the Free Software Foundation; either version 2 of the License, or* (at your option) any later version.** This program is distributed in the hope that it will be useful,* but WITHOUT ANY WARRANTY; without even the implied warranty of* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the* GNU General Public License for more details.** You should have received a copy of the GNU General Public License* along with this program; if not, write to the Free Software* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.*****************************************************************************/#include "common/common.h"#include "ppccommon.h"#define transpose4x16(r0, r1, r2, r3) { \ register vec_u8_t r4; \ register vec_u8_t r5; \ register vec_u8_t r6; \ register vec_u8_t r7; \ \ r4 = vec_mergeh(r0, r2); /*0, 2 set 0*/ \ r5 = vec_mergel(r0, r2); /*0, 2 set 1*/ \ r6 = vec_mergeh(r1, r3); /*1, 3 set 0*/ \ r7 = vec_mergel(r1, r3); /*1, 3 set 1*/ \ \ r0 = vec_mergeh(r4, r6); /*all set 0*/ \ r1 = vec_mergel(r4, r6); /*all set 1*/ \ r2 = vec_mergeh(r5, r7); /*all set 2*/ \ r3 = vec_mergel(r5, r7); /*all set 3*/ \}static inline void write16x4(uint8_t *dst, int dst_stride, register vec_u8_t r0, register vec_u8_t r1, register vec_u8_t r2, register vec_u8_t r3) { DECLARE_ALIGNED_16(unsigned char result[64]); uint32_t *src_int = (uint32_t *)result, *dst_int = (uint32_t *)dst; int int_dst_stride = dst_stride/4; vec_st(r0, 0, result); vec_st(r1, 16, result); vec_st(r2, 32, result); vec_st(r3, 48, result); /* FIXME: there has to be a better way!!!! */ *dst_int = *src_int; *(dst_int+ int_dst_stride) = *(src_int + 1); *(dst_int+ 2*int_dst_stride) = *(src_int + 2); *(dst_int+ 3*int_dst_stride) = *(src_int + 3); *(dst_int+ 4*int_dst_stride) = *(src_int + 4); *(dst_int+ 5*int_dst_stride) = *(src_int + 5); *(dst_int+ 6*int_dst_stride) = *(src_int + 6); *(dst_int+ 7*int_dst_stride) = *(src_int + 7); *(dst_int+ 8*int_dst_stride) = *(src_int + 8); *(dst_int+ 9*int_dst_stride) = *(src_int + 9); *(dst_int+10*int_dst_stride) = *(src_int + 10); *(dst_int+11*int_dst_stride) = *(src_int + 11); *(dst_int+12*int_dst_stride) = *(src_int + 12); *(dst_int+13*int_dst_stride) = *(src_int + 13); *(dst_int+14*int_dst_stride) = *(src_int + 14); *(dst_int+15*int_dst_stride) = *(src_int + 15);}/** \brief performs a 6x16 transpose of data in src, and stores it to dst */#define readAndTranspose16x6(src, src_stride, r8, r9, r10, r11, r12, r13) {\ register vec_u8_t r0, r1, r2, r3, r4, r5, r6, r7, r14, r15;\ VEC_LOAD(src, r0, 16, vec_u8_t, pix ); \ VEC_LOAD(src + src_stride, r1, 16, vec_u8_t, pix ); \ VEC_LOAD(src + 2*src_stride, r2, 16, vec_u8_t, pix ); \ VEC_LOAD(src + 3*src_stride, r3, 16, vec_u8_t, pix ); \ VEC_LOAD(src + 4*src_stride, r4, 16, vec_u8_t, pix ); \ VEC_LOAD(src + 5*src_stride, r5, 16, vec_u8_t, pix ); \ VEC_LOAD(src + 6*src_stride, r6, 16, vec_u8_t, pix ); \ VEC_LOAD(src + 7*src_stride, r7, 16, vec_u8_t, pix ); \ VEC_LOAD(src + 14*src_stride, r14, 16, vec_u8_t, pix ); \ VEC_LOAD(src + 15*src_stride, r15, 16, vec_u8_t, pix ); \ \ VEC_LOAD(src + 8*src_stride, r8, 16, vec_u8_t, pix ); \ VEC_LOAD(src + 9*src_stride, r9, 16, vec_u8_t, pix ); \ VEC_LOAD(src + 10*src_stride, r10, 16, vec_u8_t, pix ); \ VEC_LOAD(src + 11*src_stride, r11, 16, vec_u8_t, pix ); \ VEC_LOAD(src + 12*src_stride, r12, 16, vec_u8_t, pix ); \ VEC_LOAD(src + 13*src_stride, r13, 16, vec_u8_t, pix ); \ \ /*Merge first pairs*/ \ r0 = vec_mergeh(r0, r8); /*0, 8*/ \ r1 = vec_mergeh(r1, r9); /*1, 9*/ \ r2 = vec_mergeh(r2, r10); /*2,10*/ \ r3 = vec_mergeh(r3, r11); /*3,11*/ \ r4 = vec_mergeh(r4, r12); /*4,12*/ \ r5 = vec_mergeh(r5, r13); /*5,13*/ \ r6 = vec_mergeh(r6, r14); /*6,14*/ \ r7 = vec_mergeh(r7, r15); /*7,15*/ \ \ /*Merge second pairs*/ \ r8 = vec_mergeh(r0, r4); /*0,4, 8,12 set 0*/ \ r9 = vec_mergel(r0, r4); /*0,4, 8,12 set 1*/ \ r10 = vec_mergeh(r1, r5); /*1,5, 9,13 set 0*/ \ r11 = vec_mergel(r1, r5); /*1,5, 9,13 set 1*/ \ r12 = vec_mergeh(r2, r6); /*2,6,10,14 set 0*/ \ r13 = vec_mergel(r2, r6); /*2,6,10,14 set 1*/ \ r14 = vec_mergeh(r3, r7); /*3,7,11,15 set 0*/ \ r15 = vec_mergel(r3, r7); /*3,7,11,15 set 1*/ \ \ /*Third merge*/ \ r0 = vec_mergeh(r8, r12); /*0,2,4,6,8,10,12,14 set 0*/ \ r1 = vec_mergel(r8, r12); /*0,2,4,6,8,10,12,14 set 1*/ \ r2 = vec_mergeh(r9, r13); /*0,2,4,6,8,10,12,14 set 2*/ \ r4 = vec_mergeh(r10, r14); /*1,3,5,7,9,11,13,15 set 0*/ \ r5 = vec_mergel(r10, r14); /*1,3,5,7,9,11,13,15 set 1*/ \ r6 = vec_mergeh(r11, r15); /*1,3,5,7,9,11,13,15 set 2*/ \ /* Don't need to compute 3 and 7*/ \ \ /*Final merge*/ \ r8 = vec_mergeh(r0, r4); /*all set 0*/ \ r9 = vec_mergel(r0, r4); /*all set 1*/ \ r10 = vec_mergeh(r1, r5); /*all set 2*/ \ r11 = vec_mergel(r1, r5); /*all set 3*/ \ r12 = vec_mergeh(r2, r6); /*all set 4*/ \ r13 = vec_mergel(r2, r6); /*all set 5*/ \ /* Don't need to compute 14 and 15*/ \ \}// out: o = |x-y| < astatic inline vec_u8_t diff_lt_altivec ( register vec_u8_t x, register vec_u8_t y, register vec_u8_t a) { register vec_u8_t diff = vec_subs(x, y); register vec_u8_t diffneg = vec_subs(y, x); register vec_u8_t o = vec_or(diff, diffneg); /* |x-y| */ o = (vec_u8_t)vec_cmplt(o, a); return o;}static inline vec_u8_t h264_deblock_mask ( register vec_u8_t p0, register vec_u8_t p1, register vec_u8_t q0, register vec_u8_t q1, register vec_u8_t alpha, register vec_u8_t beta) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -