vf_fspp.c
来自「君正早期ucos系统(只有早期的才不没有打包成库),MPLAYER,文件系统,图」· C语言 代码 · 共 2,126 行 · 第 1/5 页
C
2,126 行
p->src[index - x - 1]= p->src[index + x ]; p->src[index + width + x ]= p->src[index + width - x - 1]; } } for(y=0; y<8; y++){ fast_memcpy(p->src + ( 7-y)*stride, p->src + ( y+8)*stride, stride); fast_memcpy(p->src + (height+8+y)*stride, p->src + (height-y+7)*stride, stride); } //FIXME (try edge emu) for(y=8; y<24; y++) memset(p->temp+ 8 +y*stride, 0,width*sizeof(int16_t)); for(y=step; y<height+8; y+=step){ //step= 1,2 qy=y-4; if (qy>height-1) qy=height-1; if (qy<0) qy=0; qy=(qy>>qps)*qp_stride; row_fdct_s(block, p->src + y*stride +2-(y&1), stride, 2); for(x0=0; x0<width+8-8*(BLOCKSZ-1); x0+=8*(BLOCKSZ-1)){ row_fdct_s(block+8*8, p->src + y*stride+8+x0 +2-(y&1), stride, 2*(BLOCKSZ-1)); if(p->qp) column_fidct_s((int16_t*)(&p->threshold_mtx[0]), block+0*8, block3+0*8, 8*(BLOCKSZ-1)); //yes, this is a HOTSPOT else for (x=0; x<8*(BLOCKSZ-1); x+=8) { t=x+x0-2; //correct t=x+x0-2-(y&1), but its the same if (t<0) t=0;//t always < width-2 t=qp_store[qy+(t>>qps)]; if(p->mpeg2) t>>=1; //copy p->mpeg2,prev_q to locals? if (t!=p->prev_q) p->prev_q=t, mul_thrmat_s(p, t); column_fidct_s((int16_t*)(&p->threshold_mtx[0]), block+x*8, block3+x*8, 8); //yes, this is a HOTSPOT } row_idct_s(block3+0*8, p->temp + (y&15)*stride+x0+2-(y&1), stride, 2*(BLOCKSZ-1)); memcpy(block, block+(BLOCKSZ-1)*64, 8*8*sizeof(DCTELEM)); //cycling memcpy(block3, block3+(BLOCKSZ-1)*64, 6*8*sizeof(DCTELEM)); } // es=width+8-x0; // 8, ... if (es>8) row_fdct_s(block+8*8, p->src + y*stride+8+x0 +2-(y&1), stride, (es-4)>>2); column_fidct_s((int16_t*)(&p->threshold_mtx[0]), block, block3, es&(~1)); row_idct_s(block3+0*8, p->temp + (y&15)*stride+x0+2-(y&1), stride, es>>2); {const int y1=y-8+step;//l5-7 l4-6 if (!(y1&7) && y1) { if (y1&8) store_slice_s(dst + (y1-8)*dst_stride, p->temp+ 8 +8*stride, dst_stride, stride, width, 8, 5-p->log2_count); else store_slice2_s(dst + (y1-8)*dst_stride, p->temp+ 8 +0*stride, dst_stride, stride, width, 8, 5-p->log2_count); } } } if (y&7) { // == height & 7 if (y&8) store_slice_s(dst + ((y-8)&~7)*dst_stride, p->temp+ 8 +8*stride, dst_stride, stride, width, y&7, 5-p->log2_count); else store_slice2_s(dst + ((y-8)&~7)*dst_stride, p->temp+ 8 +0*stride, dst_stride, stride, width, y&7, 5-p->log2_count); }}static int config(struct vf_instance_s* vf, int width, int height, int d_width, int d_height, unsigned int flags, unsigned int outfmt){ int h= (height+16+15)&(~15); vf->priv->temp_stride= (width+16+15)&(~15); vf->priv->temp= (int16_t*)av_mallocz(vf->priv->temp_stride*3*8*sizeof(int16_t)); //this can also be avoided, see above vf->priv->src = (uint8_t*)av_malloc(vf->priv->temp_stride*h*sizeof(uint8_t)); return vf_next_config(vf,width,height,d_width,d_height,flags,outfmt);}static void get_image(struct vf_instance_s* vf, mp_image_t *mpi){ if(mpi->flags&MP_IMGFLAG_PRESERVE) return; // don't change // ok, we can do pp in-place (or pp disabled): vf->dmpi=vf_get_image(vf->next,mpi->imgfmt, mpi->type, mpi->flags, mpi->width, mpi->height); mpi->planes[0]=vf->dmpi->planes[0]; mpi->stride[0]=vf->dmpi->stride[0]; mpi->width=vf->dmpi->width; if(mpi->flags&MP_IMGFLAG_PLANAR){ mpi->planes[1]=vf->dmpi->planes[1]; mpi->planes[2]=vf->dmpi->planes[2]; mpi->stride[1]=vf->dmpi->stride[1]; mpi->stride[2]=vf->dmpi->stride[2]; } mpi->flags|=MP_IMGFLAG_DIRECT;}static int put_image(struct vf_instance_s* vf, mp_image_t *mpi, double pts){ mp_image_t *dmpi; if(!(mpi->flags&MP_IMGFLAG_DIRECT)){ // no DR, so get a new image! hope we'll get DR buffer: dmpi=vf_get_image(vf->next,mpi->imgfmt, MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE|MP_IMGFLAG_PREFER_ALIGNED_STRIDE, mpi->width,mpi->height); vf_clone_mpi_attributes(dmpi, mpi); }else{ dmpi=vf->dmpi; } vf->priv->mpeg2= mpi->qscale_type; if(mpi->pict_type != 3 && mpi->qscale && !vf->priv->qp){ if(!vf->priv->non_b_qp) vf->priv->non_b_qp= malloc(mpi->qstride * ((mpi->h + 15) >> 4)); fast_memcpy(vf->priv->non_b_qp, mpi->qscale, mpi->qstride * ((mpi->h + 15) >> 4)); } if(vf->priv->log2_count || !(mpi->flags&MP_IMGFLAG_DIRECT)){ char *qp_tab= vf->priv->non_b_qp; if(vf->priv->bframes || !qp_tab) qp_tab= mpi->qscale; if(qp_tab || vf->priv->qp){ filter(vf->priv, dmpi->planes[0], mpi->planes[0], dmpi->stride[0], mpi->stride[0], mpi->w, mpi->h, qp_tab, mpi->qstride, 1); filter(vf->priv, dmpi->planes[1], mpi->planes[1], dmpi->stride[1], mpi->stride[1], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, qp_tab, mpi->qstride, 0); filter(vf->priv, dmpi->planes[2], mpi->planes[2], dmpi->stride[2], mpi->stride[2], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, qp_tab, mpi->qstride, 0); }else{ memcpy_pic(dmpi->planes[0], mpi->planes[0], mpi->w, mpi->h, dmpi->stride[0], mpi->stride[0]); memcpy_pic(dmpi->planes[1], mpi->planes[1], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, dmpi->stride[1], mpi->stride[1]); memcpy_pic(dmpi->planes[2], mpi->planes[2], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, dmpi->stride[2], mpi->stride[2]); } }#ifdef HAVE_MMX if(gCpuCaps.hasMMX) asm volatile ("emms\n\t");#endif#ifdef HAVE_MMX2 if(gCpuCaps.hasMMX2) asm volatile ("sfence\n\t");#endif return vf_next_put_image(vf,dmpi, pts);}static void uninit(struct vf_instance_s* vf){ if(!vf->priv) return; if(vf->priv->temp) av_free(vf->priv->temp); vf->priv->temp= NULL; if(vf->priv->src) av_free(vf->priv->src); vf->priv->src= NULL; //if(vf->priv->avctx) free(vf->priv->avctx); //vf->priv->avctx= NULL; if(vf->priv->non_b_qp) free(vf->priv->non_b_qp); vf->priv->non_b_qp= NULL; av_free(vf->priv); vf->priv=NULL;}//===========================================================================//static int query_format(struct vf_instance_s* vf, unsigned int fmt){ switch(fmt){ case IMGFMT_YVU9: case IMGFMT_IF09: case IMGFMT_YV12: case IMGFMT_I420: case IMGFMT_IYUV: case IMGFMT_CLPL: case IMGFMT_Y800: case IMGFMT_Y8: case IMGFMT_444P: case IMGFMT_422P: case IMGFMT_411P: return vf_next_query_format(vf,fmt); } return 0;}/* static unsigned int fmt_list[]={ IMGFMT_YVU9, IMGFMT_IF09, IMGFMT_YV12, IMGFMT_I420, IMGFMT_IYUV, IMGFMT_CLPL, IMGFMT_Y800, IMGFMT_Y8, IMGFMT_444P, IMGFMT_422P, IMGFMT_411P, 0 };*/static int control(struct vf_instance_s* vf, int request, void* data){ switch(request){ case VFCTRL_QUERY_MAX_PP_LEVEL: return 5; case VFCTRL_SET_PP_LEVEL: vf->priv->log2_count= *((unsigned int*)data); if (vf->priv->log2_count < 4) vf->priv->log2_count=4; return CONTROL_TRUE; } return vf_next_control(vf,request,data);}static int open(vf_instance_t *vf, char* args){ int i=0, bias; int custom_threshold_m[64]; int log2c=-1; vf->config=config; vf->put_image=put_image; vf->get_image=get_image; vf->query_format=query_format; vf->uninit=uninit; vf->control= control; vf->priv=av_mallocz(sizeof(struct vf_priv_s));//assumes align 16 ! avcodec_init(); //vf->priv->avctx= avcodec_alloc_context(); //dsputil_init(&vf->priv->dsp, vf->priv->avctx); vf->priv->log2_count= 4; vf->priv->bframes = 0; if (args) sscanf(args, "%d:%d:%d:%d", &log2c, &vf->priv->qp, &i, &vf->priv->bframes); if( log2c >=4 && log2c <=5 ) vf->priv->log2_count = log2c; else if( log2c >= 6 ) vf->priv->log2_count = 5; if(vf->priv->qp < 0) vf->priv->qp = 0; if (i < -15) i = -15; if (i > 32) i = 32; bias= (1<<4)+i; //regulable vf->priv->prev_q=0; // for(i=0;i<64;i++) //FIXME: tune custom_threshold[] and remove this ! custom_threshold_m[i]=(int)(custom_threshold[i]*(bias/71.)+ 0.5); for(i=0;i<8;i++){ vf->priv->threshold_mtx_noq[2*i]=(uint64_t)custom_threshold_m[i*8+2] |(((uint64_t)custom_threshold_m[i*8+6])<<16) |(((uint64_t)custom_threshold_m[i*8+0])<<32) |(((uint64_t)custom_threshold_m[i*8+4])<<48); vf->priv->threshold_mtx_noq[2*i+1]=(uint64_t)custom_threshold_m[i*8+5] |(((uint64_t)custom_threshold_m[i*8+3])<<16) |(((uint64_t)custom_threshold_m[i*8+1])<<32) |(((uint64_t)custom_threshold_m[i*8+7])<<48); } if (vf->priv->qp) vf->priv->prev_q=vf->priv->qp, mul_thrmat_s(vf->priv, vf->priv->qp); return 1;}vf_info_t vf_info_fspp = { "fast simple postprocess", "fspp", "Michael Niedermayer, Nikolaj Poroshin", "", open, NULL};//====================================================================//Specific spp's dct, idct and threshold functions//I'd prefer to have them in the separate file.#include "mangle.h"//#define MANGLE(a) #a//typedef int16_t DCTELEM; //! only int16_t#define DCTSIZE 8#define DCTSIZE_S "8"#define FIX(x,s) ((int) ((x) * (1<<s) + 0.5)&0xffff)#define C64(x) ((uint64_t)((x)|(x)<<16))<<32 | (uint64_t)(x) | (uint64_t)(x)<<16#define FIX64(x,s) C64(FIX(x,s))#define MULTIPLY16H(x,k) (((x)*(k))>>16)#define THRESHOLD(r,x,t) if(((unsigned)((x)+t))>t*2) r=(x);else r=0;#define DESCALE(x,n) (((x) + (1 << ((n)-1))) >> n)#ifdef HAVE_MMXstatic uint64_t attribute_used __attribute__((aligned(8))) temps[4];//!!static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_0_382683433=FIX64(0.382683433, 14); static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_0_541196100=FIX64(0.541196100, 14); static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_0_707106781=FIX64(0.707106781, 14); static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_1_306562965=FIX64(1.306562965, 14); static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_1_414213562_A=FIX64(1.414213562, 14); static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_1_847759065=FIX64(1.847759065, 13); static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_2_613125930=FIX64(-2.613125930, 13); //-static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_1_414213562=FIX64(1.414213562, 13); static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_1_082392200=FIX64(1.082392200, 13);//for t3,t5,t7 == 0 shortcutstatic uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_0_847759065=FIX64(0.847759065, 14); static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_0_566454497=FIX64(0.566454497, 14); static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_0_198912367=FIX64(0.198912367, 14); static uint64_t attribute_used __attribute__((aligned(8))) MM_DESCALE_RND=C64(4);static uint64_t attribute_used __attribute__((aligned(8))) MM_2=C64(2);#else /* !HAVE_MMX */typedef int32_t int_simd16_t;static int16_t FIX_0_382683433=FIX(0.382683433, 14); static int16_t FIX_0_541196100=FIX(0.541196100, 14); static int16_t FIX_0_707106781=FIX(0.707106781, 14); static int16_t FIX_1_306562965=FIX(1.306562965, 14); static int16_t FIX_1_414213562_A=FIX(1.414213562, 14); static int16_t FIX_1_847759065=FIX(1.847759065, 13); static int16_t FIX_2_613125930=FIX(-2.613125930, 13); //-static int16_t FIX_1_414213562=FIX(1.414213562, 13); static int16_t FIX_1_082392200=FIX(1.082392200, 13);#endif#ifndef HAVE_MMXstatic void column_fidct_c(int16_t* thr_adr, DCTELEM *data, DCTELEM *output, int cnt){ int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int_simd16_t tmp10, tmp11, tmp12, tmp13; int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13; int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7; DCTELEM* dataptr; DCTELEM* wsptr; int16_t *threshold; int ctr; dataptr = data; wsptr = output; for (; cnt > 0; cnt-=2) { //start positions threshold=(int16_t*)thr_adr;//threshold_mtx for (ctr = DCTSIZE; ctr > 0; ctr--) { // Process columns from input, add to output. tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; // Even part of FDCT tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; d0 = tmp10 + tmp11; d4 = tmp10 - tmp11; z1 = MULTIPLY16H((tmp12 + tmp13) <<2, FIX_0_707106781); d2 = tmp13 + z1; d6 = tmp13 - z1; // Even part of IDCT THRESHOLD(tmp0, d0, threshold[0*8]); THRESHOLD(tmp1, d2, threshold[2*8]); THRESHOLD(tmp2, d4, threshold[4*8]); THRESHOLD(tmp3, d6, threshold[6*8]); tmp0+=2; tmp10 = (tmp0 + tmp2)>>2; tmp11 = (tmp0 - tmp2)>>2; tmp13 = (tmp1 + tmp3)>>2; //+2 ! (psnr decides) tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2 tmp0 = tmp10 + tmp13; //->temps tmp3 = tmp10 - tmp13; //->temps tmp1 = tmp11 + tmp12; //->temps tmp2 = tmp11 - tmp12; //->temps // Odd part of FDCT tmp10 = tmp4 + tmp5; tmp11 = tmp5 + tmp6; tmp12 = tmp6 + tmp7; z5 = MULTIPLY16H((tmp10 - tmp12)<<2, FIX_0_382683433); z2 = MULTIPLY16H(tmp10 <<2, FIX_0_541196100) + z5; z4 = MULTIPLY16H(tmp12 <<2, FIX_1_306562965) + z5; z3 = MULTIPLY16H(tmp11 <<2, FIX_0_707106781); z11 = tmp7 + z3; z13 = tmp7 - z3; d5 = z13 + z2; d3 = z13 - z2; d1 = z11 + z4; d7 = z11 - z4; // Odd part of IDCT THRESHOLD(tmp4, d1, threshold[1*8]); THRESHOLD(tmp5, d3, threshold[3*8]); THRESHOLD(tmp6, d5, threshold[5*8]); THRESHOLD(tmp7, d7, threshold[7*8]); //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0 z13 = tmp6 + tmp5; z10 = (tmp6 - tmp5)<<1; z11 = tmp4 + tmp7; z12 = (tmp4 - tmp7)<<1;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?