📄 dsputil.c
字号:
/*
* DSP utils
* Copyright (c) 2000, 2001 Fabrice Bellard.
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
*/
/**
* @file dsputil.c
* DSP utils
*/
#ifndef USE_ASM_VERSION /* [ */
#include "avcodec.h"
#include "dsputil.h"
#include "mpegvideo.h"
#include "decinit.h"
#include "debug.h"
uint8_t cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
const uint32_t inverse[256]={
0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
};
STATIC_FUNC void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
{
int i;
for(i=0;i<8;i++) {
pixels[0] += block[0];
pixels[1] += block[1];
pixels[2] += block[2];
pixels[3] += block[3];
pixels[4] += block[4];
pixels[5] += block[5];
pixels[6] += block[6];
pixels[7] += block[7];
pixels += line_size;
block += 8;
}
}
STATIC_FUNC void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
{
int i;
for(i=0;i<4;i++) {
pixels[0] += block[0];
pixels[1] += block[1];
pixels[2] += block[2];
pixels[3] += block[3];
pixels += line_size;
block += 4;
}
}
STATIC_FUNC void put_pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){
int i;
for(i=0; i<h; i++){
*((uint16_t*)(block))= LD16(pixels);
pixels+=line_size;
block +=line_size;
}
}
STATIC_FUNC void put_pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){
int i;
for(i=0; i<h; i++){
*((uint32_t*)(block))= LD32(pixels);
pixels+=line_size;
block +=line_size;
}
}
STATIC_FUNC void put_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){
int i;
for(i=0; i<h; i++){
*((uint32_t*)(block))= LD32(pixels);
*((uint32_t*)(block+4))= LD32(pixels+4);
pixels+=line_size;
block +=line_size;
}
}
STATIC_FUNC inline void put_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride,
int src_stride1, int src_stride2, int h){
int i;
for(i=0; i<h; i++){
uint32_t a,b;
a= LD32(&src1[i*src_stride1 ]);
b= LD32(&src2[i*src_stride2 ]);
*((uint32_t*)&dst[i*dst_stride])= rnd_avg32(a, b);
a= LD32(&src1[i*src_stride1+4]);
b= LD32(&src2[i*src_stride2+4]);
*((uint32_t*)&dst[i*dst_stride+4])= rnd_avg32(a, b);
}
}
STATIC_FUNC inline void put_pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride,
int src_stride1, int src_stride2, int h){
int i;
for(i=0; i<h; i++){
uint32_t a,b;
a= LD32(&src1[i*src_stride1 ]);
b= LD32(&src2[i*src_stride2 ]);
*((uint32_t*)&dst[i*dst_stride ])= rnd_avg32(a, b);
}
}
STATIC_FUNC inline void put_pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride,
int src_stride1, int src_stride2, int h){
int i;
for(i=0; i<h; i++){
uint32_t a,b;
a= LD16(&src1[i*src_stride1 ]);
b= LD16(&src2[i*src_stride2 ]);
*((uint16_t*)&dst[i*dst_stride ])= rnd_avg32(a, b);
}
}
STATIC_FUNC inline void put_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride,
int src_stride1, int src_stride2, int h){
put_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);
put_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);
}
STATIC_FUNC void put_pixels16_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
put_pixels8_c(block , pixels , line_size, h);\
put_pixels8_c(block+8, pixels+8, line_size, h);\
}
STATIC_FUNC void avg_pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){
int i;
for(i=0; i<h; i++){
(*((uint32_t*)(block )), LD32(pixels ));
pixels+=line_size;
block +=line_size;
}
}
STATIC_FUNC void avg_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){
int i;
for(i=0; i<h; i++){
*((uint32_t*)(block )) = rnd_avg32(*((uint32_t*)(block )), LD32(pixels ));
*((uint32_t*)(block+4)) = rnd_avg32(*((uint32_t*)(block+4)), LD32(pixels+4));
pixels+=line_size;
block +=line_size;
}
}
STATIC_FUNC inline void avg_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride,
int src_stride1, int src_stride2, int h){
int i;
for(i=0; i<h; i++){
uint32_t a,b;
a= LD32(&src1[i*src_stride1 ]);
b= LD32(&src2[i*src_stride2 ]);
*((uint32_t*)&dst[i*dst_stride ]) = rnd_avg32(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));
a= LD32(&src1[i*src_stride1+4]);
b= LD32(&src2[i*src_stride2+4]);
*((uint32_t*)&dst[i*dst_stride+4]) = rnd_avg32(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));
}
}
STATIC_FUNC inline void avg_pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride,
int src_stride1, int src_stride2, int h){
int i;
for(i=0; i<h; i++){
uint32_t a,b;
a= LD32(&src1[i*src_stride1 ]);
b= LD32(&src2[i*src_stride2 ]);
*((uint32_t*)&dst[i*dst_stride ]) = rnd_avg32(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));
}
}
STATIC_FUNC inline void avg_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride,
int src_stride1, int src_stride2, int h){
avg_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);
avg_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);
}
STATIC_FUNC void avg_pixels16_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
avg_pixels8_c(block , pixels , line_size, h);\
avg_pixels8_c(block+8, pixels+8, line_size, h);\
}
#ifdef USE_ASM_VERSION
//using ASM functions from asm_dsputil.s
extern void put_h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y);
extern void put_h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y);
extern void put_h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y);
extern void avg_h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y);
extern void avg_h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y);
extern void avg_h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y);
#else /* [ */
#ifdef _MIPS_LINUX_ /* [ */
void put_h264_chroma_mc2_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y)
{
const int A = (8 - x) *(8 - y);
const int B = (x) *(8 - y);
const int C = (8 - x) *(y);
const int D = (x) *(y);
int i;
for (i = 0; i < h; i++)
{
dst[0] = ((((A *src[0] + B *src[1] + C *src[stride + 0] + D *src[stride + 1])) + 32) >> 6);
dst[1] = ((((A *src[1] + B *src[2] + C *src[stride + 1] + D *src[stride + 2])) + 32) >> 6);
dst += stride;
src += stride;
}
}
void put_h264_chroma_mc4_c(uint8_t *dst, uint8_t *src, int stride, int hh, int x, int y)
{
const int A = (8 - x) *(8 - y);
const int B = (x) *(8 - y);
const int C = (8 - x) *(y);
const int D = (x) *(y);
int ii;
for (ii = 0; ii < hh; ii++)
{
int t1;
const int a = src[0];
const int b = src[1];
const int c = src[2];
const int d = src[3];
const int e = src[4];
asm volatile ("mult %0,%1"::"r"(A), "r"(a));
asm volatile ("madd %0,%1"::"r"(B), "r"(b));
asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + 0]));
asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + 1]));
asm volatile ("mflo %0":"=r"(t1));
dst[0] = (((t1) + 32) >> 6);
asm volatile ("mult %0,%1"::"r"(A), "r"(b));
asm volatile ("madd %0,%1"::"r"(B), "r"(c));
asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + 1]));
asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + 2]));
asm volatile ("mflo %0":"=r"(t1));
dst[1] = (((t1) + 32) >> 6);
asm volatile ("mult %0,%1"::"r"(A), "r"(c));
asm volatile ("madd %0,%1"::"r"(B), "r"(d));
asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + 2]));
asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + 3]));
asm volatile ("mflo %0":"=r"(t1));
dst[2] = (((t1) + 32) >> 6);
asm volatile ("mult %0,%1"::"r"(A), "r"(d));
asm volatile ("madd %0,%1"::"r"(B), "r"(e));
asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + 3]));
asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + 4]));
asm volatile ("mflo %0":"=r"(t1));
dst[3] = (((t1) + 32) >> 6);
dst += stride;
src += stride;
}
}
#if 0
// This version has the inner loop re-rolled. It is slower.
void put_h264_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int hh, int x, int y)
{
const int A = (8 - x) *(8 - y);
const int B = (x) *(8 - y);
const int C = (8 - x) *(y);
const int D = (x) *(y);
int ii, jj, kk;
for (ii = 0; ii < hh; ii++)
{
int t1;
for (jj = 0, kk = 1; jj < 8; jj++, kk++)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -