📄 dsputil.c

📁 This the source release kit for the following system configuration(s): - AMD Alchemy(TM) DBAu1200(
💻 C
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/*
 * DSP utils
 * Copyright (c) 2000, 2001 Fabrice Bellard.
 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 *
 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
 */

/**
 * @file dsputil.c
 * DSP utils
 */

#ifndef USE_ASM_VERSION /* [ */

#include "avcodec.h"
#include "dsputil.h"
#include "mpegvideo.h"
#include "decinit.h"

#include "debug.h"

uint8_t cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };

/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
const uint32_t inverse[256]={
         0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757,
 536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154,
 268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709,
 178956971,  171798692,  165191050,  159072863,  153391690,  148102321,  143165577,  138547333,
 134217728,  130150525,  126322568,  122713352,  119304648,  116080198,  113025456,  110127367,
 107374183,  104755300,  102261127,   99882961,   97612894,   95443718,   93368855,   91382283,
  89478486,   87652394,   85899346,   84215046,   82595525,   81037119,   79536432,   78090315,
  76695845,   75350304,   74051161,   72796056,   71582789,   70409300,   69273667,   68174085,
  67108864,   66076420,   65075263,   64103990,   63161284,   62245903,   61356676,   60492498,
  59652324,   58835169,   58040099,   57266231,   56512728,   55778797,   55063684,   54366675,
  53687092,   53024288,   52377650,   51746594,   51130564,   50529028,   49941481,   49367441,
  48806447,   48258060,   47721859,   47197443,   46684428,   46182445,   45691142,   45210183,
  44739243,   44278014,   43826197,   43383509,   42949673,   42524429,   42107523,   41698712,
  41297763,   40904451,   40518560,   40139882,   39768216,   39403370,   39045158,   38693400,
  38347923,   38008561,   37675152,   37347542,   37025581,   36709123,   36398028,   36092163,
  35791395,   35495598,   35204650,   34918434,   34636834,   34359739,   34087043,   33818641,
  33554432,   33294321,   33038210,   32786010,   32537632,   32292988,   32051995,   31814573,
  31580642,   31350127,   31122952,   30899046,   30678338,   30460761,   30246249,   30034737,
  29826162,   29620465,   29417585,   29217465,   29020050,   28825284,   28633116,   28443493,
  28256364,   28071682,   27889399,   27709467,   27531842,   27356480,   27183338,   27012373,
  26843546,   26676816,   26512144,   26349493,   26188825,   26030105,   25873297,   25718368,
  25565282,   25414008,   25264514,   25116768,   24970741,   24826401,   24683721,   24542671,
  24403224,   24265352,   24129030,   23994231,   23860930,   23729102,   23598722,   23469767,
  23342214,   23216040,   23091223,   22967740,   22845571,   22724695,   22605092,   22486740,
  22369622,   22253717,   22139007,   22025474,   21913099,   21801865,   21691755,   21582751,
  21474837,   21367997,   21262215,   21157475,   21053762,   20951060,   20849356,   20748635,
  20648882,   20550083,   20452226,   20355296,   20259280,   20164166,   20069941,   19976593,
  19884108,   19792477,   19701685,   19611723,   19522579,   19434242,   19346700,   19259944,
  19173962,   19088744,   19004281,   18920561,   18837576,   18755316,   18673771,   18592933,
  18512791,   18433337,   18354562,   18276457,   18199014,   18122225,   18046082,   17970575,
  17895698,   17821442,   17747799,   17674763,   17602325,   17530479,   17459217,   17388532,
  17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
};

STATIC_FUNC void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
{
    int i;
    for(i=0;i<8;i++) {
        pixels[0] += block[0];
        pixels[1] += block[1];
        pixels[2] += block[2];
        pixels[3] += block[3];
        pixels[4] += block[4];
        pixels[5] += block[5];
        pixels[6] += block[6];
        pixels[7] += block[7];
        pixels += line_size;
        block += 8;
    }
}

STATIC_FUNC void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
{
    int i;
    for(i=0;i<4;i++) {
        pixels[0] += block[0];
        pixels[1] += block[1];
        pixels[2] += block[2];
        pixels[3] += block[3];
        pixels += line_size;
        block += 4;
    }
}

STATIC_FUNC void put_pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){
    int i;
    for(i=0; i<h; i++){
        *((uint16_t*)(block))= LD16(pixels);
        pixels+=line_size;
        block +=line_size;
    }
}
STATIC_FUNC void put_pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){
    int i;
    for(i=0; i<h; i++){
        *((uint32_t*)(block))= LD32(pixels);
        pixels+=line_size;
        block +=line_size;
    }
}
STATIC_FUNC void put_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){
    int i;
    for(i=0; i<h; i++){
        *((uint32_t*)(block))= LD32(pixels);
        *((uint32_t*)(block+4))= LD32(pixels+4);
        pixels+=line_size;
        block +=line_size;
    }
}

STATIC_FUNC inline void put_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, 
                                                int src_stride1, int src_stride2, int h){
    int i;
    for(i=0; i<h; i++){
        uint32_t a,b;
        a= LD32(&src1[i*src_stride1  ]);
        b= LD32(&src2[i*src_stride2  ]);
        *((uint32_t*)&dst[i*dst_stride])= rnd_avg32(a, b);
        a= LD32(&src1[i*src_stride1+4]);
        b= LD32(&src2[i*src_stride2+4]);
        *((uint32_t*)&dst[i*dst_stride+4])= rnd_avg32(a, b);
    }
}

STATIC_FUNC inline void put_pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, 
                                                int src_stride1, int src_stride2, int h){
    int i;
    for(i=0; i<h; i++){
        uint32_t a,b;
        a= LD32(&src1[i*src_stride1  ]);
        b= LD32(&src2[i*src_stride2  ]);
        *((uint32_t*)&dst[i*dst_stride  ])= rnd_avg32(a, b);
    }
}

STATIC_FUNC inline void put_pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, 
                                                int src_stride1, int src_stride2, int h){
    int i;
    for(i=0; i<h; i++){
        uint32_t a,b;
        a= LD16(&src1[i*src_stride1  ]);
        b= LD16(&src2[i*src_stride2  ]);
        *((uint16_t*)&dst[i*dst_stride  ])= rnd_avg32(a, b);
    }
}

STATIC_FUNC inline void put_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, 
                                                int src_stride1, int src_stride2, int h){
    put_pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);
    put_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);
}

STATIC_FUNC void put_pixels16_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
    put_pixels8_c(block  , pixels  , line_size, h);\
    put_pixels8_c(block+8, pixels+8, line_size, h);\
}

STATIC_FUNC void avg_pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){
    int i;
    for(i=0; i<h; i++){
        (*((uint32_t*)(block  )), LD32(pixels  ));
        pixels+=line_size;
        block +=line_size;
    }
}
STATIC_FUNC void avg_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){
    int i;
    for(i=0; i<h; i++){
        *((uint32_t*)(block  )) = rnd_avg32(*((uint32_t*)(block  )), LD32(pixels  ));
        *((uint32_t*)(block+4)) = rnd_avg32(*((uint32_t*)(block+4)), LD32(pixels+4));
        pixels+=line_size;
        block +=line_size;
    }
}

STATIC_FUNC inline void avg_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, 
                                                int src_stride1, int src_stride2, int h){
    int i;
    for(i=0; i<h; i++){
        uint32_t a,b;
        a= LD32(&src1[i*src_stride1  ]);
        b= LD32(&src2[i*src_stride2  ]);
        *((uint32_t*)&dst[i*dst_stride  ]) = rnd_avg32(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));
        a= LD32(&src1[i*src_stride1+4]);
        b= LD32(&src2[i*src_stride2+4]);
        *((uint32_t*)&dst[i*dst_stride+4]) = rnd_avg32(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));
    }
}

STATIC_FUNC inline void avg_pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, 
                                                int src_stride1, int src_stride2, int h){
    int i;
    for(i=0; i<h; i++){
        uint32_t a,b;
        a= LD32(&src1[i*src_stride1  ]);
        b= LD32(&src2[i*src_stride2  ]);
        *((uint32_t*)&dst[i*dst_stride  ]) = rnd_avg32(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));
    }
}

STATIC_FUNC inline void avg_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, 
                                                int src_stride1, int src_stride2, int h){
    avg_pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);
    avg_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);
}

STATIC_FUNC void avg_pixels16_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
    avg_pixels8_c(block  , pixels  , line_size, h);\
    avg_pixels8_c(block+8, pixels+8, line_size, h);\
}

#ifdef USE_ASM_VERSION 
  //using ASM functions from asm_dsputil.s
  extern void put_h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y);
  extern void put_h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y);
  extern void put_h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y);
  extern void avg_h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y);
  extern void avg_h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y);
  extern void avg_h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y);
#else /* [ */

#ifdef _MIPS_LINUX_  /* [ */

void put_h264_chroma_mc2_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y)
{  
  const int A = (8 - x) *(8 - y);
  const int B = (x) *(8 - y);
  const int C = (8 - x) *(y);
  const int D = (x) *(y);
  int i;

  for (i = 0; i < h; i++)
  {
    dst[0] = ((((A *src[0] + B *src[1] + C *src[stride + 0] + D *src[stride + 1])) + 32) >> 6);
    dst[1] = ((((A *src[1] + B *src[2] + C *src[stride + 1] + D *src[stride + 2])) + 32) >> 6);
    dst += stride;
    src += stride;
  }
}

void put_h264_chroma_mc4_c(uint8_t *dst, uint8_t *src, int stride, int hh, int x, int y)
{       
  const int A = (8 - x) *(8 - y);
  const int B = (x) *(8 - y);
  const int C = (8 - x) *(y);
  const int D = (x) *(y);
  int ii;
  
  for (ii = 0; ii < hh; ii++)
  {
    int t1;
    const int a = src[0];
    const int b = src[1];
    const int c = src[2];
    const int d = src[3];
    const int e = src[4];
    asm volatile ("mult %0,%1"::"r"(A), "r"(a));
    asm volatile ("madd %0,%1"::"r"(B), "r"(b));
    asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + 0]));
    asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + 1]));
    asm volatile ("mflo %0":"=r"(t1));
    dst[0] = (((t1) + 32) >> 6);
    asm volatile ("mult %0,%1"::"r"(A), "r"(b));
    asm volatile ("madd %0,%1"::"r"(B), "r"(c));
    asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + 1]));
    asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + 2]));
    asm volatile ("mflo %0":"=r"(t1));
    dst[1] = (((t1) + 32) >> 6);
    asm volatile ("mult %0,%1"::"r"(A), "r"(c));
    asm volatile ("madd %0,%1"::"r"(B), "r"(d));
    asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + 2]));
    asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + 3]));
    asm volatile ("mflo %0":"=r"(t1));
    dst[2] = (((t1) + 32) >> 6);
    asm volatile ("mult %0,%1"::"r"(A), "r"(d));
    asm volatile ("madd %0,%1"::"r"(B), "r"(e));
    asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + 3]));
    asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + 4]));
    asm volatile ("mflo %0":"=r"(t1));
    dst[3] = (((t1) + 32) >> 6);
    dst += stride;
    src += stride;
  }
}


#if 0
// This version has the inner loop re-rolled. It is slower.
void put_h264_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int hh, int x, int y)
{       
  const int A = (8 - x) *(8 - y);
  const int B = (x) *(8 - y);
  const int C = (8 - x) *(y);
  const int D = (x) *(y);
  int ii, jj, kk;
  
  for (ii = 0; ii < hh; ii++)
  {
    int t1;
    for (jj = 0, kk = 1; jj < 8; jj++, kk++)
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -