📄 dsputil.c
字号:
/* * DSP utils * Copyright (c) 2000, 2001 Fabrice Bellard. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> */ /** * @file dsputil.c * DSP utils */ #include "avcodec.h"#include "dsputil.h"#include "mpegvideo.h"#include "simple_idct.h"#include "faandct.h"uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];uint32_t squareTbl[512];const uint8_t ff_zigzag_direct[64] = { 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63};/* Specific zigzag scan for 248 idct. NOTE that unlike the specification, we interleave the fields */const uint8_t ff_zigzag248_direct[64] = { 0, 8, 1, 9, 16, 24, 2, 10, 17, 25, 32, 40, 48, 56, 33, 41, 18, 26, 3, 11, 4, 12, 19, 27, 34, 42, 49, 57, 50, 58, 35, 43, 20, 28, 5, 13, 6, 14, 21, 29, 36, 44, 51, 59, 52, 60, 37, 45, 22, 30, 7, 15, 23, 31, 38, 46, 53, 61, 54, 62, 39, 47, 55, 63,};/* not permutated inverse zigzag_direct + 1 for MMX quantizer */uint16_t __align8 inv_zigzag_direct16[64];const uint8_t ff_alternate_horizontal_scan[64] = { 0, 1, 2, 3, 8, 9, 16, 17, 10, 11, 4, 5, 6, 7, 15, 14, 13, 12, 19, 18, 24, 25, 32, 33, 26, 27, 20, 21, 22, 23, 28, 29, 30, 31, 34, 35, 40, 41, 48, 49, 42, 43, 36, 37, 38, 39, 44, 45, 46, 47, 50, 51, 56, 57, 58, 59, 52, 53, 54, 55, 60, 61, 62, 63,};const uint8_t ff_alternate_vertical_scan[64] = { 0, 8, 16, 24, 1, 9, 2, 10, 17, 25, 32, 40, 48, 56, 57, 49, 41, 33, 26, 18, 3, 11, 4, 12, 19, 27, 34, 42, 50, 58, 35, 43, 51, 59, 20, 28, 5, 13, 6, 14, 21, 29, 36, 44, 52, 60, 37, 45, 53, 61, 22, 30, 7, 15, 23, 31, 38, 46, 54, 62, 39, 47, 55, 63,};/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */const uint32_t inverse[256]={ 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333, 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367, 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283, 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315, 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085, 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498, 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675, 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441, 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183, 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712, 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400, 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163, 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641, 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573, 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737, 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493, 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373, 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368, 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671, 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767, 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740, 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751, 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635, 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593, 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944, 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,};/* Input permutation for the simple_idct_mmx */static const uint8_t simple_mmx_permutation[64]={ 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,};static int pix_sum_c(uint8_t * pix, int line_size){ int s, i, j; s = 0; for (i = 0; i < 16; i++) { for (j = 0; j < 16; j += 8) { s += pix[0]; s += pix[1]; s += pix[2]; s += pix[3]; s += pix[4]; s += pix[5]; s += pix[6]; s += pix[7]; pix += 8; } pix += line_size - 16; } return s;}static int pix_norm1_c(uint8_t * pix, int line_size){ int s, i, j; uint32_t *sq = squareTbl + 256; s = 0; for (i = 0; i < 16; i++) { for (j = 0; j < 16; j += 8) {#if 0 s += sq[pix[0]]; s += sq[pix[1]]; s += sq[pix[2]]; s += sq[pix[3]]; s += sq[pix[4]]; s += sq[pix[5]]; s += sq[pix[6]]; s += sq[pix[7]];#else#if LONG_MAX > 2147483647 register uint64_t x=*(uint64_t*)pix; s += sq[x&0xff]; s += sq[(x>>8)&0xff]; s += sq[(x>>16)&0xff]; s += sq[(x>>24)&0xff]; s += sq[(x>>32)&0xff]; s += sq[(x>>40)&0xff]; s += sq[(x>>48)&0xff]; s += sq[(x>>56)&0xff];#else register uint32_t x=*(uint32_t*)pix; s += sq[x&0xff]; s += sq[(x>>8)&0xff]; s += sq[(x>>16)&0xff]; s += sq[(x>>24)&0xff]; x=*(uint32_t*)(pix+4); s += sq[x&0xff]; s += sq[(x>>8)&0xff]; s += sq[(x>>16)&0xff]; s += sq[(x>>24)&0xff];#endif#endif pix += 8; } pix += line_size - 16; } return s;}static void bswap_buf(uint32_t *dst, uint32_t *src, int w){ int i; for(i=0; i+8<=w; i+=8){ dst[i+0]= bswap_32(src[i+0]); dst[i+1]= bswap_32(src[i+1]); dst[i+2]= bswap_32(src[i+2]); dst[i+3]= bswap_32(src[i+3]); dst[i+4]= bswap_32(src[i+4]); dst[i+5]= bswap_32(src[i+5]); dst[i+6]= bswap_32(src[i+6]); dst[i+7]= bswap_32(src[i+7]); } for(;i<w; i++){ dst[i+0]= bswap_32(src[i+0]); }}static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size){ int s, i; uint32_t *sq = squareTbl + 256; s = 0; for (i = 0; i < 8; i++) { s += sq[pix1[0] - pix2[0]]; s += sq[pix1[1] - pix2[1]]; s += sq[pix1[2] - pix2[2]]; s += sq[pix1[3] - pix2[3]]; s += sq[pix1[4] - pix2[4]]; s += sq[pix1[5] - pix2[5]]; s += sq[pix1[6] - pix2[6]]; s += sq[pix1[7] - pix2[7]]; pix1 += line_size; pix2 += line_size; } return s;}static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size){ int s, i; uint32_t *sq = squareTbl + 256; s = 0; for (i = 0; i < 16; i++) { s += sq[pix1[ 0] - pix2[ 0]]; s += sq[pix1[ 1] - pix2[ 1]]; s += sq[pix1[ 2] - pix2[ 2]]; s += sq[pix1[ 3] - pix2[ 3]]; s += sq[pix1[ 4] - pix2[ 4]]; s += sq[pix1[ 5] - pix2[ 5]]; s += sq[pix1[ 6] - pix2[ 6]]; s += sq[pix1[ 7] - pix2[ 7]]; s += sq[pix1[ 8] - pix2[ 8]]; s += sq[pix1[ 9] - pix2[ 9]]; s += sq[pix1[10] - pix2[10]]; s += sq[pix1[11] - pix2[11]]; s += sq[pix1[12] - pix2[12]]; s += sq[pix1[13] - pix2[13]]; s += sq[pix1[14] - pix2[14]]; s += sq[pix1[15] - pix2[15]]; pix1 += line_size; pix2 += line_size; } return s;}static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size){ int i; /* read the pixels */ for(i=0;i<8;i++) { block[0] = pixels[0]; block[1] = pixels[1]; block[2] = pixels[2]; block[3] = pixels[3]; block[4] = pixels[4]; block[5] = pixels[5]; block[6] = pixels[6]; block[7] = pixels[7]; pixels += line_size; block += 8; }}static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, const uint8_t *s2, int stride){ int i; /* read the pixels */ for(i=0;i<8;i++) { block[0] = s1[0] - s2[0]; block[1] = s1[1] - s2[1]; block[2] = s1[2] - s2[2]; block[3] = s1[3] - s2[3]; block[4] = s1[4] - s2[4]; block[5] = s1[5] - s2[5]; block[6] = s1[6] - s2[6]; block[7] = s1[7] - s2[7]; s1 += stride; s2 += stride; block += 8; }}static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, int line_size){ int i; uint8_t *cm = cropTbl + MAX_NEG_CROP; /* read the pixels */ for(i=0;i<8;i++) { pixels[0] = cm[block[0]]; pixels[1] = cm[block[1]]; pixels[2] = cm[block[2]]; pixels[3] = cm[block[3]]; pixels[4] = cm[block[4]]; pixels[5] = cm[block[5]]; pixels[6] = cm[block[6]]; pixels[7] = cm[block[7]]; pixels += line_size; block += 8; }}static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, int line_size){ int i; uint8_t *cm = cropTbl + MAX_NEG_CROP;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -