📄 skl_dsp.h

📁 mpeg4编解码器
💻 H
字号:
/******************************************************** * Some code. Copyright (C) 2003 by Pascal Massimino.   * * All Rights Reserved.      (http://skal.planet-d.net) * * For Educational/Academic use ONLY. See 'LICENSE.TXT'.* ********************************************************//* * skl_dsp.h * * Low-level data processing ********************************************************/#ifndef _SKL_DSP_H_#define _SKL_DSP_H_#include "skl_syst/skl_cpu_specs.h"extern "C" {//////////////////////////////////////////////////////////// SKL_YUV_DSP : YUV conversion operations//////////////////////////////////////////////////////////typedef void (*SKL_TO_YUV_FUNC)(SKL_BYTE *Y, SKL_BYTE *U, SKL_BYTE *V,                                const SKL_INT32 Dst_BpS,                                const SKL_BYTE *Src,                                const SKL_INT32 Src_BpS,                                const SKL_INT32 Width, const SKL_INT32 Height);typedef void (*SKL_TO_RGB_FUNC)(SKL_BYTE *RGB,                                const SKL_INT32 Dst_BpS,                                const SKL_BYTE *Y, const SKL_BYTE *U, const SKL_BYTE *V,                                const SKL_INT32 Src_BpS,                                const SKL_INT32 Width, const SKL_INT32 Height);struct SKL_YUV_DSP{  const char *Name;  SKL_DSP_SWITCH Switch_Off;  int (*Init)(int Transfer_Type); // type=[1..7]. default=1 (ITU-R Rec. 709 (1990))  SKL_TO_YUV_FUNC RGB565_TO_YUV;  SKL_TO_RGB_FUNC YUV_TO_RGB565;  SKL_TO_YUV_FUNC RGB24_TO_YUV;  SKL_TO_RGB_FUNC YUV_TO_RGB24;  SKL_TO_YUV_FUNC RGB32_TO_YUV;  SKL_TO_RGB_FUNC YUV_TO_RGB32;  };extern SKL_EXPORT int Skl_Init_YUV_DSP(SKL_YUV_DSP *Dsp, SKL_CPU_FEATURE Cpu = SKL_CPU_DETECT );//////////////////////////////////////////////////////////// SKL_IMG_DSP : Image processing//////////////////////////////////////////////////////////struct SKL_IMG_DSP{  const char *Name;  SKL_DSP_SWITCH Switch_Off;    // Note: Src1 is supposed to be 16b-aligned in SAD_*!! (for SSE2)  SKL_UINT32 (*SAD_4x4  )(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);  SKL_UINT32 (*SAD_8x8  )(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);  SKL_UINT32 (*SAD_16x8_Field)(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);  SKL_UINT32 (*SAD_16x16)(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);  SKL_UINT32 (*SAD_16x7_Self)(const SKL_BYTE *Src, SKL_INT32 BpS);  SKL_UINT32 (*SSD_4x4  )(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);  SKL_UINT32 (*SSD_8x8  )(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);  SKL_UINT32 (*SSD_16x8_Field)(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);  SKL_UINT32 (*SSD_16x16)(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);  SKL_UINT32 (*Hadamard_SAD_4x4  )(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);  SKL_UINT32 (*Hadamard_SAD_8x8  )(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);  SKL_UINT32 (*Hadamard_SAD_16x8_Field)(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);  SKL_UINT32 (*Hadamard_SAD_16x16)(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);  SKL_UINT32 (*Mean_4x4)(const SKL_BYTE *Src, SKL_INT32 BpS);  SKL_UINT32 (*Mean_8x8)(const SKL_BYTE *Src, SKL_INT32 BpS);  SKL_UINT32 (*Mean_16x16)(const SKL_BYTE *Src, SKL_INT32 BpS);  SKL_UINT32 (*Sqr_4x4)(const SKL_BYTE *Src, SKL_INT32 BpS);  SKL_UINT32 (*Sqr_8x8)(const SKL_BYTE *Src, SKL_INT32 BpS);  SKL_UINT32 (*Sqr_16x16)(const SKL_BYTE *Src, SKL_INT32 BpS);  SKL_UINT32 (*Sqr_Dev_16x16)(const SKL_BYTE *Src, SKL_INT32 BpS);  SKL_UINT32 (*Abs_Dev_16x16)(const SKL_BYTE *Src, SKL_INT32 BpS);  SKL_UINT32 (*Hadamard_Dev_16x16)(const SKL_BYTE *Src, SKL_INT32 BpS);  SKL_UINT32 (*SAD)(const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 W, SKL_INT32 H, SKL_INT32 BpS);  SKL_UINT32 (*Mean)(const SKL_BYTE *Src, SKL_INT32 W, SKL_INT32 H, SKL_INT32 BpS);  SKL_UINT32 (*Square_Dev)(const SKL_BYTE *Src, SKL_INT32 W, SKL_INT32 H, SKL_INT32 BpS);  SKL_UINT32 (*Abs_Dev)(const SKL_BYTE *Src, SKL_INT32 W, SKL_INT32 H, SKL_INT32 BpS);  SKL_UINT32 (*SAD_Avrg_4x4  )(const SKL_BYTE *Dst, const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);  SKL_UINT32 (*SAD_Avrg_8x8  )(const SKL_BYTE *Dst, const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);  SKL_UINT32 (*SAD_Avrg_16x8 )(const SKL_BYTE *Dst, const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);  SKL_UINT32 (*SAD_Avrg_16x16)(const SKL_BYTE *Dst, const SKL_BYTE *Src1, const SKL_BYTE *Src2, SKL_INT32 BpS);  void (*Smooth_18x18_To_8x8)(SKL_BYTE *Dst, int Dst_BpS, const SKL_BYTE *Src, int Src_BpS);  void (*Gradx_18x18_To_8x8) (SKL_INT8 *Dst, int Dst_BpS, const SKL_BYTE *Src, int Src_BpS);  void (*Grady_18x18_To_8x8) (SKL_INT8 *Dst, int Dst_BpS, const SKL_BYTE *Src, int Src_BpS);  void (*Grad2_18x18_To_8x8) (SKL_BYTE *Dst, int Dst_BpS, const SKL_BYTE *Src, int Src_BpS);};extern int Skl_Init_Img_DSP(SKL_IMG_DSP *Dsp, SKL_CPU_FEATURE Cpu = SKL_CPU_DETECT);//////////////////////////////////////////////////////////// SKL_QUANT_DSP : various quantization/dequantization//////////////////////////////////////////////////////////// M[2][31][64] is the common workspace for all implementation. Hope// it's enough so we won't go OO.//   Indexes: M[ 0=Quant/1=Dequant ][ Q=0..31 ][ 0=Mult/1=Bias ][ i=0..64 ]typedef struct SKL_QUANT_DSP SKL_QUANT_DSP;typedef SKL_INT16 (*SKL_QUANTIZER)[31][2][64];    // for comfort, let's define some common signaturestypedef void (*SKL_QUANT_INTRA_FUNC)(SKL_INT16 *Out, const SKL_INT16 *In,                                     const SKL_QUANTIZER M,                                     SKL_INT32 Scale, SKL_INT32 DC_Scale);typedef SKL_INT32 (*SKL_QUANT_INTER_FUNC)(SKL_INT16 *Out, const SKL_INT16 *In,                                          const SKL_QUANTIZER M,                                          SKL_INT32 Scale);typedef void (*SKL_DEQUANT_INTRA_FUNC)(SKL_INT16 *Out, const SKL_INT16 *In,                                       const SKL_QUANTIZER M,                                       SKL_INT32 Scale, SKL_INT32 DC_Scale);typedef void (*SKL_DEQUANT_INTER_FUNC)(SKL_INT16 *Out, const SKL_INT16 *In,                                       const SKL_QUANTIZER M,                                       SKL_INT32 Scale, int Rows);struct SKL_QUANT_DSP{  const char *Name;  SKL_DSP_SWITCH Switch_Off;  void (*Init_Quantizer)(SKL_QUANTIZER Q, const SKL_BYTE In[64],                         const SKL_BYTE *Scale_Map, int For_Intra);  SKL_QUANT_INTRA_FUNC   Quant_Intra;  SKL_QUANT_INTER_FUNC   Quant_Inter;  SKL_DEQUANT_INTRA_FUNC Dequant_Intra;  SKL_DEQUANT_INTER_FUNC Dequant_Inter;  void (*Zero)(SKL_INT16 C[64]); // resets coeffs to zero  void (*Zero16)(SKL_INT16 C[16]); // resets coeffs to zero  void (*Dct)(SKL_INT16 *);  void (*IDct)(SKL_INT16 *);  void (*IDct_Sparse)(SKL_INT16 *);  void (*IDct_Put)(SKL_INT16 *, SKL_BYTE *Dst, int BpS);  void (*IDct_Add)(SKL_INT16 *, SKL_BYTE *Dst, int BpS);  void (*IDct_Sparse_8x4)(SKL_INT16 *);  void (*IDct_Put_8x4)(SKL_INT16 *, SKL_BYTE *Dst, int BpS);  void (*IDct_Add_8x4)(SKL_INT16 *, SKL_BYTE *Dst, int BpS);};extern int Skl_Init_Quant_DSP(SKL_QUANT_DSP *Dsp,                              SKL_CPU_FEATURE Cpu = SKL_CPU_DETECT,                               int Quant_Type=1  /* 0=H263, 1=MPEG4, 2=MPEG2 */ );//////////////////////////////////////////////////////////// SKL_MB_DSP : Macro-block operations//////////////////////////////////////////////////////////typedef void (*SKL_MB_FUNC)(SKL_BYTE *Dst,                            const SKL_BYTE *Src,                            const int BpS);struct SKL_HV_FILTER      // horizontal/vertical filter of a 16x16 block{  void (*H_Pass)(SKL_BYTE *Dst, const SKL_BYTE *Src, const int BpS);  void (*V_Pass)(SKL_BYTE *Dst, const SKL_BYTE *Src, const int BpS);  void (*HV_Pass)(SKL_BYTE *Dst, const SKL_BYTE *Src, const int BpS);};  // some pre-defined averaging filters (2Taps).extern const SKL_HV_FILTER Skl_Filter_2_C, Skl_Filter_2_MMX, Skl_Filter_2_SSE;#define SKL_MB_QP_PASS_SIGNATURE(NAME)  \  void (NAME)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS)    //   Half-pixel motion of 16x8 and 8x8 blocks    // Index [0..3] is computed with: i=(dx&1) | ((dy&1)<<1)    // where dx/dy is the motion vector in 1/2 pel unit.    // [0..3] <=> [Full/Full]  [Full/Half]  [Half/Full]  [Half/Half]    // We put everything in a single struct so it can easily be passed    // to prediction functions as a whole...struct SKL_MB_FUNCS{  SKL_MB_FUNC    HP_16x8 [4]; // 16x8  hp-prediction  SKL_MB_FUNC    HP_8x8  [4]; // 8x8   hp-prediction  SKL_MB_FUNC    HP_8x4  [4]; // 8x4   hp-prediction (for fields)    // filter for QPel 16x? MPEG4 prediction  void (*H_Pass)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);  void (*H_Pass_Avrg)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);  void (*H_Pass_Avrg_Up)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);  void (*H_LowPass)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);  void (*H_LowPass_Avrg)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);  void (*H_LowPass_Avrg_Up)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);  void (*V_Pass)(SKL_BYTE *Dst, const SKL_BYTE *Src, int W, int BpS);  void (*V_Pass_Avrg)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);  void (*V_Pass_Avrg_Up)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);    // filter for QPel 8x? MPEG4 prediction  void (*H_Pass_8)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);  void (*H_Pass_Avrg_8)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);  void (*H_Pass_Avrg_Up_8)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);  void (*H_LowPass_8)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);  void (*H_LowPass_Avrg_8)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);  void (*H_LowPass_Avrg_Up_8)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);  void (*V_Pass_8)(SKL_BYTE *Dst, const SKL_BYTE *Src, int W, int BpS);  void (*V_Pass_Avrg_8)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);  void (*V_Pass_Avrg_Up_8)(SKL_BYTE *Dst, const SKL_BYTE *Src, int H, int BpS);  void (*SAD_HP_16x16)(const SKL_BYTE *Cur, const SKL_BYTE *Src, int BpS, SKL_UINT32 Sad[3]);  void (*SAD_HP_8x8)(const SKL_BYTE *Cur, const SKL_BYTE *Src, int BpS, SKL_UINT32 Sad[3]);};//////////////////////////////////////////////////////////struct SKL_MB_DSP{  const char *Name;  SKL_DSP_SWITCH Switch_Off;  void (*Init)();    // 16b->8b transfer    (*Src is actually Src[][8][8])  void (*Copy_Upsampled_8x8_16To8)(SKL_BYTE *Dst, const SKL_INT16 *Src, const int BpS);  void (*Add_Upsampled_8x8_16To8)(SKL_BYTE *Dst, const SKL_INT16 *Src, const int BpS);    // 8b->16b transfer  void (*Copy_16x8_8To16)(SKL_INT16 *Dst, const SKL_BYTE *Src, const int BpS);  void (*Copy_8x8_8To16) (SKL_INT16 *Dst, const SKL_BYTE *Src, const int BpS);  void (*Diff_16x8_8To16)(SKL_INT16 *Dst, const SKL_BYTE *Src, const int BpS);  void (*Diff_8x8_8To16) (SKL_INT16 *Dst, const SKL_BYTE *Src, const int BpS);  void (*Diff_16x8_88To16)(SKL_INT16 *Dst, const SKL_BYTE *Src1, const SKL_BYTE *Src2, const int BpS);  void (*Diff_8x8_88To16) (SKL_INT16 *Dst, const SKL_BYTE *Src1, const SKL_BYTE *Src2, const int BpS);    // 8b->8b transfer  const SKL_MB_FUNCS *Copy[2]; // index = rounding (0/1)  const SKL_MB_FUNCS  *Add;    // Rounding is always 0 for addition (B-frames)  SKL_UINT32 (*SAD_16x7_Frame)(const SKL_INT16 *Src); // self SAD on DCT coeffs      SKL_UINT32 (*SAD_16x7_Field)(const SKL_INT16 *Src); // Src[4*64]  void (*Reorder_Frame_16x16)(SKL_INT16 *Src);        // frame to field    // 8b utilitary functions        /* replicate 16/8 edge pixels of YUV2 picture */  void (*Make_Edges)(SKL_BYTE * const * const YUV, const int Width, const int Height, const int BpS);        /* horizontal/vertical (3,1)-Filter of 8-pixels blocks */  void (*HFilter_31)(SKL_BYTE *Src1, SKL_BYTE *Src2, int Nb_Blks);  void (*VFilter_31)(SKL_BYTE *Src1, SKL_BYTE *Src2, const int BpS, int Nb_Blks);    // 8b->16b 18x18 -> 8x8 downsampling  void (*Filter_18x18_To_8x8)(SKL_INT16 *Dst, const SKL_BYTE *Src, const int BpS);  void (*Filter_Diff_18x18_To_8x8)(SKL_INT16 *Dst, const SKL_BYTE *Src, const int BpS);  const SKL_HV_FILTER *Filter;};extern int Skl_Init_Mb_DSP(SKL_MB_DSP *Dsp, SKL_CPU_FEATURE Cpu = SKL_CPU_DETECT);//////////////////////////////////////////////////////////// Global Motion Compensation stuffstruct SKL_GMC_DSP{  const char *Name;  SKL_DSP_SWITCH Switch_Off;  void (*Predict_16x16)(const SKL_GMC_DSP * const This,                        SKL_BYTE *Dst, const SKL_BYTE *Src, int BpS,                         int x, int y, int Rounder);  void (*Predict_8x8)(const SKL_GMC_DSP * const This,                      SKL_BYTE *uDst, const SKL_BYTE *uSrc, SKL_SAFE_INT uv_Coloc,                      int BpS, int x, int y, int Rounder);  int Width, Height;  int Nb_Pts;  int Accuracy;  int dU[2];    // gradient-converted warp points  int dV[2];  int dw[2];  int Uo, Vo, Uco, Vco;  void Setup(int Width, int Height, const int MVs[][2], int Nb, int Accuracy);  void Get_Average_MV(int MV[2], int x, int y, int QPel) const;  };extern int Skl_Init_GMC_DSP(SKL_GMC_DSP *Dsp, SKL_CPU_FEATURE Cpu = SKL_CPU_DETECT);//////////////////////////////////////////////////////////// useful table for clipping pixels.// MPEG4's 6-tap FIR output values in [-112,367]// H264's  6-tap FIR output values in [-80,335]// B-VOP like mixing requires range   [-128,384]// H264's hadamard transform ranges   [-512,768]#define SKL_CLIP_MIN  (-512)#define SKL_CLIP_MAX  ( 768)extern "C" SKL_BYTE Skl_DSP_Clip[SKL_CLIP_MAX-SKL_CLIP_MIN];extern "C" void Skl_Init_DSP_Clip();#ifndef NDEBUG#define SKL_DSP_CLIP(x) (SKL_ASSERT((x)>=SKL_CLIP_MIN && (x)<SKL_CLIP_MAX), Skl_DSP_Clip[(x)-SKL_CLIP_MIN])#else#define SKL_DSP_CLIP(x) (Skl_DSP_Clip[(x)-SKL_CLIP_MIN])#endif//////////////////////////////////////////////////////////// General-purpose other function  // In[Size], Out[2*Size]extern void Skl_Generic_IDct_Ref(int Size, const float *In, float *Out);//////////////////////////////////////////////////////////} /* extern "C" */#endif  /* _SKL_DSP_H_ */
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -