📄 skl_dsp.h

📁 从FFMPEG转换而来的H264解码程序,VC下编译..
💻 H
字号:
/********************************************************
 * Some code. Copyright (C) 2003 by Pascal Massimino.   *
 * All Rights Reserved.      (http://skal.planet-d.net) *
 * For Educational/Academic use ONLY. See 'LICENSE.TXT'.*
 ********************************************************/
/*
 * skl_dsp.h
 *
 * Low-level data processing
 ********************************************************/

#ifndef _SKL_DSP_H_
#define _SKL_DSP_H_

#include "skl_syst/skl_cpu_specs.h"

extern "C" {

//////////////////////////////////////////////////////////
// SKL_YUV_DSP : YUV conversion operations
//////////////////////////////////////////////////////////

typedef void (*SKL_TO_YUV_FUNC)(SKL_BYTE *Y, SKL_BYTE *U, SKL_BYTE *V,
                                const SKL_INT32 Dst_BpS,
                                const SKL_BYTE *Src,
                                const SKL_INT32 Src_BpS,
                                const SKL_INT32 Width, const SKL_INT32 Height);
typedef void (*SKL_TO_RGB_FUNC)(SKL_BYTE *RGB,
                                const SKL_INT32 Dst_BpS,
                                const SKL_BYTE *Y, const SKL_BYTE *U, const SKL_BYTE *V,
                                const SKL_INT32 Src_BpS,
                                const SKL_INT32 Width, const SKL_INT32 Height);
struct SKL_YUV_DSP
{
  const char *Name;
  SKL_DSP_SWITCH Switch_Off;

  int (*Init)(int Transfer_Type); // type=[1..7]. default=1 (ITU-R Rec. 709 (1990))
  SKL_TO_YUV_FUNC RGB565_TO_YUV;
  SKL_TO_RGB_FUNC YUV_TO_RGB565;
  SKL_TO_YUV_FUNC RGB24_TO_YUV;
  SKL_TO_RGB_FUNC YUV_TO_RGB24;
  SKL_TO_YUV_FUNC RGB32_TO_YUV;
  SKL_TO_RGB_FUNC YUV_TO_RGB32;
};

extern SKL_EXPORT int Skl_Init_YUV_DSP(SKL_YUV_DSP *Dsp, SKL_CPU_FEATURE Cpu = SKL_CPU_DETECT );

//////////////////////////////////////////////////////////
// SKL_IMG_DSP : Image processing
//////////////////////////////////////////////////////////

struct SKL_IMG_DSP
{
  const char *Name;
  SKL_DSP_SWITCH Switch_Off;

    // Note: Src1 is supposed to be 16b-aligned in SAD_*!! (for SSE2)
  SKL_UINT32 (*SAD_4x4  )(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);
  SKL_UINT32 (*SAD_8x8  )(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);
  SKL_UINT32 (*SAD_16x8_Field)(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);
  SKL_UINT32 (*SAD_16x16)(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);
  SKL_UINT32 (*SAD_16x7_Self)(const SKL_BYTE *Src, SKL_INT32 BpS);

  SKL_UINT32 (*SSD_4x4  )(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);
  SKL_UINT32 (*SSD_8x8  )(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);
  SKL_UINT32 (*SSD_16x8_Field)(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);
  SKL_UINT32 (*SSD_16x16)(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);

  SKL_UINT32 (*Hadamard_SAD_4x4  )(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);
  SKL_UINT32 (*Hadamard_SAD_8x8  )(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);
  SKL_UINT32 (*Hadamard_SAD_16x8_Field)(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);
  SKL_UINT32 (*Hadamard_SAD_16x16)(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);

  SKL_UINT32 (*Mean_4x4)(const SKL_BYTE *Src, SKL_INT32 BpS);
  SKL_UINT32 (*Mean_8x8)(const SKL_BYTE *Src, SKL_INT32 BpS);
  SKL_UINT32 (*Mean_16x16)(const SKL_BYTE *Src, SKL_INT32 BpS);

  SKL_UINT32 (*Sqr_4x4)(const SKL_BYTE *Src, SKL_INT32 BpS);
  SKL_UINT32 (*Sqr_8x8)(const SKL_BYTE *Src, SKL_INT32 BpS);
  SKL_UINT32 (*Sqr_16x16)(const SKL_BYTE *Src, SKL_INT32 BpS);

  SKL_UINT32 (*Sqr_Dev_16x16)(const SKL_BYTE *Src, SKL_INT32 BpS);
  SKL_UINT32 (*Abs_Dev_16x16)(const SKL_BYTE *Src, SKL_INT32 BpS);
  SKL_UINT32 (*Hadamard_Dev_16x16)(const SKL_BYTE *Src, SKL_INT32 BpS);

  SKL_UINT32 (*SAD)(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 W, SKL_INT32 H, SKL_INT32 BpS);
  SKL_UINT32 (*Mean)(const SKL_BYTE *Src, SKL_INT32 W, SKL_INT32 H, SKL_INT32 BpS);
  SKL_UINT32 (*Square_Dev)(const SKL_BYTE *Src, SKL_INT32 W, SKL_INT32 H, SKL_INT32 BpS);
  SKL_UINT32 (*Abs_Dev)(const SKL_BYTE *Src, SKL_INT32 W, SKL_INT32 H, SKL_INT32 BpS);

  SKL_UINT32 (*SAD_Avrg_4x4  )(const SKL_BYTE *Dst, const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);
  SKL_UINT32 (*SAD_Avrg_8x8  )(const SKL_BYTE *Dst, const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);
  SKL_UINT32 (*SAD_Avrg_16x8 )(const SKL_BYTE *Dst, const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);
  SKL_UINT32 (*SAD_Avrg_16x16)(const SKL_BYTE *Dst, const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);

  void (*Smooth_18x18_To_8x8)(SKL_BYTE *Dst, int Dst_BpS, const SKL_BYTE *Src, int Src_BpS);
  void (*Gradx_18x18_To_8x8) (SKL_INT8 *Dst, int Dst_BpS, const SKL_BYTE *Src, int Src_BpS);
  void (*Grady_18x18_To_8x8) (SKL_INT8 *Dst, int Dst_BpS, const SKL_BYTE *Src, int Src_BpS);
  void (*Grad2_18x18_To_8x8) (SKL_BYTE *Dst, int Dst_BpS, const SKL_BYTE *Src, int Src_BpS);
};

extern int Skl_Init_Img_DSP(SKL_IMG_DSP *Dsp, SKL_CPU_FEATURE Cpu = SKL_CPU_DETECT);

//////////////////////////////////////////////////////////
// SKL_QUANT_DSP : various quantization/dequantization
//////////////////////////////////////////////////////////

// M[2][31][64] is the common workspace for all implementation. Hope
// it's enough so we won't go OO.
//   Indexes: M[ 0=Quant/1=Dequant ][ Q=0..31 ][ 0=Mult/1=Bias ][ i=0..64 ]

typedef struct SKL_QUANT_DSP SKL_QUANT_DSP;

typedef SKL_INT16 (*SKL_QUANTIZER)[31][2][64];

    // for comfort, let's define some common signatures
typedef void (*SKL_QUANT_INTRA_FUNC)(SKL_INT16 *Out, const SKL_INT16 *In,
                                     const SKL_QUANTIZER M,
                                     SKL_INT32 Scale, SKL_INT32 DC_Scale);
typedef SKL_INT32 (*SKL_QUANT_INTER_FUNC)(SKL_INT16 *Out, const SKL_INT16 *In,
                                          const SKL_QUANTIZER M,
                                          SKL_INT32 Scale);
typedef void (*SKL_DEQUANT_INTRA_FUNC)(SKL_INT16 *Out, const SKL_INT16 *In,
                                       const SKL_QUANTIZER M,
                                       SKL_INT32 Scale, SKL_INT32 DC_Scale);
typedef void (*SKL_DEQUANT_INTER_FUNC)(SKL_INT16 *Out, const SKL_INT16 *In,
                                       const SKL_QUANTIZER M,
                                       SKL_INT32 Scale, int Rows);

struct SKL_QUANT_DSP
{
  const char *Name;
  SKL_DSP_SWITCH Switch_Off;

  void (*Init_Quantizer)(SKL_QUANTIZER Q, const SKL_BYTE In[64],
                         const SKL_BYTE *Scale_Map, int For_Intra);

  SKL_QUANT_INTRA_FUNC   Quant_Intra;
  SKL_QUANT_INTER_FUNC   Quant_Inter;

  SKL_DEQUANT_INTRA_FUNC Dequant_Intra;
  SKL_DEQUANT_INTER_FUNC Dequant_Inter;

  void (*Zero)(SKL_INT16 C[64]); // resets coeffs to zero
  void (*Zero16)(SKL_INT16 C[16]); // resets coeffs to zero

  void (*Dct)(SKL_INT16 *);
  void (*IDct)(SKL_INT16 *);

  void (*IDct_Sparse)(SKL_INT16 *);
  void (*IDct_Put)(SKL_INT16 *, SKL_BYTE *Dst, int BpS);
  void (*IDct_Add)(SKL_INT16 *, SKL_BYTE *Dst, int BpS);

  void (*IDct_Sparse_8x4)(SKL_INT16 *);
  void (*IDct_Put_8x4)(SKL_INT16 *, SKL_BYTE *Dst, int BpS);
  void (*IDct_Add_8x4)(SKL_INT16 *, SKL_BYTE *Dst, int BpS);
};

extern int Skl_Init_Quant_DSP(SKL_QUANT_DSP *Dsp,
                              SKL_CPU_FEATURE Cpu = SKL_CPU_DETECT,
                              int Quant_Type=1  /* 0=H263, 1=MPEG4, 2=MPEG2 */ );

//////////////////////////////////////////////////////////
// SKL_MB_DSP : Macro-block operations
//////////////////////////////////////////////////////////

typedef void (*SKL_MB_FUNC)(SKL_BYTE *Dst,
                            const SKL_BYTE *Src,
                            const int BpS);

struct SKL_HV_FILTER      // horizontal/vertical filter of a 16x16 block
{
  void (*H_Pass)(SKL_BYTE *Dst, const SKL_BYTE *Src, const int BpS);
  void (*V_Pass)(SKL_BYTE *Dst, const SKL_BYTE *Src, const int BpS);
  void (*HV_Pass)(SKL_BYTE *Dst, const SKL_BYTE *Src, const int BpS);
};
  // some pre-defined averaging filters (2Taps).
extern const SKL_HV_FILTER Skl_Filter_2_C, Skl_Filter_2_MMX, Skl_Filter_2_SSE;


#define SKL_MB_QP_PASS_SIGNATURE(NAME)  \
  void (NAME)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS)

    //   Half-pixel motion of 16x8 and 8x8 blocks
    // Index [0..3] is computed with: i=(dx&1) | ((dy&1)<<1)
    // where dx/dy is the motion vector in 1/2 pel unit.
    // [0..3] <=> [Full/Full]  [Full/Half]  [Half/Full]  [Half/Half]
    // We put everything in a single struct so it can easily be passed
    // to prediction functions as a whole...

struct SKL_MB_FUNCS
{
  SKL_MB_FUNC    HP_16x8 [4]; // 16x8  hp-prediction
  SKL_MB_FUNC    HP_8x8  [4]; // 8x8   hp-prediction
  SKL_MB_FUNC    HP_8x4  [4]; // 8x4   hp-prediction (for fields)

    // filter for QPel 16x? MPEG4 prediction

  void (*H_Pass)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);
  void (*H_Pass_Avrg)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);
  void (*H_Pass_Avrg_Up)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);
  void (*H_LowPass)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);
  void (*H_LowPass_Avrg)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);
  void (*H_LowPass_Avrg_Up)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);
  void (*V_Pass)(SKL_BYTE *Dst, const SKL_BYTE *Src, int W, int BpS);
  void (*V_Pass_Avrg)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);
  void (*V_Pass_Avrg_Up)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);

    // filter for QPel 8x? MPEG4 prediction

  void (*H_Pass_8)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);
  void (*H_Pass_Avrg_8)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);
  void (*H_Pass_Avrg_Up_8)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);
  void (*H_LowPass_8)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);
  void (*H_LowPass_Avrg_8)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);
  void (*H_LowPass_Avrg_Up_8)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);
  void (*V_Pass_8)(SKL_BYTE *Dst, const SKL_BYTE *Src, int W, int BpS);
  void (*V_Pass_Avrg_8)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);
  void (*V_Pass_Avrg_Up_8)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);

  void (*SAD_HP_16x16)(const SKL_BYTE *Cur, const SKL_BYTE *Src, int BpS, SKL_UINT32 Sad[3]);
  void (*SAD_HP_8x8)(const SKL_BYTE *Cur, const SKL_BYTE *Src, int BpS, SKL_UINT32 Sad[3]);
};

//////////////////////////////////////////////////////////

struct SKL_MB_DSP
{
  const char *Name;
  SKL_DSP_SWITCH Switch_Off;

  void (*Init)();

    // 16b->8b transfer    (*Src is actually Src[][8][8])
  void (*Copy_Upsampled_8x8_16To8)(SKL_BYTE *Dst, const SKL_INT16 *Src, const int BpS);
  void (*Add_Upsampled_8x8_16To8)(SKL_BYTE *Dst, const SKL_INT16 *Src, const int BpS);

    // 8b->16b transfer
  void (*Copy_16x8_8To16)(SKL_INT16 *Dst, const SKL_BYTE *Src, const int BpS);
  void (*Copy_8x8_8To16) (SKL_INT16 *Dst, const SKL_BYTE *Src, const int BpS);
  void (*Diff_16x8_8To16)(SKL_INT16 *Dst, const SKL_BYTE *Src, const int BpS);
  void (*Diff_8x8_8To16) (SKL_INT16 *Dst, const SKL_BYTE *Src, const int BpS);

  void (*Diff_16x8_88To16)(SKL_INT16 *Dst, const SKL_BYTE *Src1, const SKL_BYTE *Src2, const int BpS);
  void (*Diff_8x8_88To16) (SKL_INT16 *Dst, const SKL_BYTE *Src1, const SKL_BYTE *Src2, const int BpS);

    // 8b->8b transfer

  const SKL_MB_FUNCS *Copy[2]; // index = rounding (0/1)
  const SKL_MB_FUNCS  *Add;    // Rounding is always 0 for addition (B-frames)

  SKL_UINT32 (*SAD_16x7_Frame)(const SKL_INT16 *Src); // self SAD on DCT coeffs
  SKL_UINT32 (*SAD_16x7_Field)(const SKL_INT16 *Src); // Src[4*64]
  void (*Reorder_Frame_16x16)(SKL_INT16 *Src);        // frame to field

    // 8b utilitary functions
        /* replicate 16/8 edge pixels of YUV2 picture */
  void (*Make_Edges)(SKL_BYTE * const * const YUV, const int Width, const int Height, const int BpS);
        /* horizontal/vertical (3,1)-Filter of 8-pixels blocks */
  void (*HFilter_31)(SKL_BYTE *Src1, SKL_BYTE *Src2, int Nb_Blks);
  void (*VFilter_31)(SKL_BYTE *Src1, SKL_BYTE *Src2, const int BpS, int Nb_Blks);

    // 8b->16b 18x18 -> 8x8 downsampling
  void (*Filter_18x18_To_8x8)(SKL_INT16 *Dst, const SKL_BYTE *Src, const int BpS);
  void (*Filter_Diff_18x18_To_8x8)(SKL_INT16 *Dst, const SKL_BYTE *Src, const int BpS);

  const SKL_HV_FILTER *Filter;
};

extern int Skl_Init_Mb_DSP(SKL_MB_DSP *Dsp, SKL_CPU_FEATURE Cpu = SKL_CPU_DETECT);

//////////////////////////////////////////////////////////
// Global Motion Compensation stuff

struct SKL_GMC_DSP
{
  const char *Name;
  SKL_DSP_SWITCH Switch_Off;
  void (*Predict_16x16)(const SKL_GMC_DSP * const This,
                        SKL_BYTE *Dst, const SKL_BYTE *Src, int BpS,
                        int x, int y, int Rounder);
  void (*Predict_8x8)(const SKL_GMC_DSP * const This,
                      SKL_BYTE *uDst, const SKL_BYTE *uSrc, SKL_SAFE_INT uv_Coloc,
                      int BpS, int x, int y, int Rounder);

  int Width, Height;
  int Nb_Pts;
  int Accuracy;

  int dU[2];    // gradient-converted warp points
  int dV[2];
  int dw[2];
  int Uo, Vo, Uco, Vco;

  void Setup(int Width, int Height, const int MVs[][2], int Nb, int Accuracy);
  void Get_Average_MV(int MV[2], int x, int y, int QPel) const;
};

extern int Skl_Init_GMC_DSP(SKL_GMC_DSP *Dsp, SKL_CPU_FEATURE Cpu = SKL_CPU_DETECT);

//////////////////////////////////////////////////////////
// useful table for clipping pixels.
// MPEG4's 6-tap FIR output values in [-112,367]
// H264's  6-tap FIR output values in [-80,335]
// B-VOP like mixing requires range   [-128,384]
// H264's hadamard transform ranges   [-512,768]

#define SKL_CLIP_MIN  (-512)
#define SKL_CLIP_MAX  ( 768)
extern "C" SKL_BYTE Skl_DSP_Clip[SKL_CLIP_MAX-SKL_CLIP_MIN];
extern "C" void Skl_Init_DSP_Clip();

#ifndef NDEBUG
#define SKL_DSP_CLIP(x) (SKL_ASSERT((x)>=SKL_CLIP_MIN && (x)<SKL_CLIP_MAX), Skl_DSP_Clip[(x)-SKL_CLIP_MIN])
#else
#define SKL_DSP_CLIP(x) (Skl_DSP_Clip[(x)-SKL_CLIP_MIN])
#endif

//////////////////////////////////////////////////////////
// General-purpose other function

  // In[Size], Out[2*Size]
extern void Skl_Generic_IDct_Ref(int Size, const float *In, float *Out);

//////////////////////////////////////////////////////////

} /* extern "C" */

#endif  /* _SKL_DSP_H_ */
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -