vf_fspp.c

来自「君正早期ucos系统(只有早期的才不没有打包成库),MPLAYER,文件系统,图」· C语言 代码 · 共 2,126 行 · 第 1/5 页

C
2,126
字号
            p->src[index         - x - 1]= p->src[index +         x    ];            p->src[index + width + x    ]= p->src[index + width - x - 1];        }    }    for(y=0; y<8; y++){        fast_memcpy(p->src + (      7-y)*stride, p->src + (      y+8)*stride, stride);        fast_memcpy(p->src + (height+8+y)*stride, p->src + (height-y+7)*stride, stride);    }    //FIXME (try edge emu)    for(y=8; y<24; y++)	memset(p->temp+ 8 +y*stride, 0,width*sizeof(int16_t));    for(y=step; y<height+8; y+=step){    //step= 1,2	qy=y-4;	if (qy>height-1) qy=height-1;	if (qy<0) qy=0;	qy=(qy>>qps)*qp_stride;	row_fdct_s(block, p->src + y*stride +2-(y&1), stride, 2);	for(x0=0; x0<width+8-8*(BLOCKSZ-1); x0+=8*(BLOCKSZ-1)){	    row_fdct_s(block+8*8, p->src + y*stride+8+x0 +2-(y&1), stride, 2*(BLOCKSZ-1));	    if(p->qp)        		column_fidct_s((int16_t*)(&p->threshold_mtx[0]), block+0*8, block3+0*8, 8*(BLOCKSZ-1)); //yes, this is a HOTSPOT	    else		for (x=0; x<8*(BLOCKSZ-1); x+=8) {		    t=x+x0-2; //correct t=x+x0-2-(y&1), but its the same 		    if (t<0) t=0;//t always < width-2		    t=qp_store[qy+(t>>qps)];		    if(p->mpeg2) t>>=1; //copy p->mpeg2,prev_q to locals?		    if (t!=p->prev_q) p->prev_q=t, mul_thrmat_s(p, t);		    column_fidct_s((int16_t*)(&p->threshold_mtx[0]), block+x*8, block3+x*8, 8); //yes, this is a HOTSPOT		}	    row_idct_s(block3+0*8, p->temp + (y&15)*stride+x0+2-(y&1), stride, 2*(BLOCKSZ-1));	    memcpy(block, block+(BLOCKSZ-1)*64, 8*8*sizeof(DCTELEM)); //cycling	    memcpy(block3, block3+(BLOCKSZ-1)*64, 6*8*sizeof(DCTELEM));  	}	//	es=width+8-x0; //  8, ...      	if (es>8)	    row_fdct_s(block+8*8, p->src + y*stride+8+x0 +2-(y&1), stride, (es-4)>>2);	column_fidct_s((int16_t*)(&p->threshold_mtx[0]), block, block3, es&(~1));	row_idct_s(block3+0*8, p->temp + (y&15)*stride+x0+2-(y&1), stride, es>>2);	{const int y1=y-8+step;//l5-7  l4-6	    if (!(y1&7) && y1) {		if (y1&8) store_slice_s(dst + (y1-8)*dst_stride, p->temp+ 8 +8*stride, 					dst_stride, stride, width, 8, 5-p->log2_count);		else store_slice2_s(dst + (y1-8)*dst_stride, p->temp+ 8 +0*stride, 				    dst_stride, stride, width, 8, 5-p->log2_count);    	    } }    }    if (y&7) {  // == height & 7	if (y&8) store_slice_s(dst + ((y-8)&~7)*dst_stride, p->temp+ 8 +8*stride, 			       dst_stride, stride, width, y&7, 5-p->log2_count);	else store_slice2_s(dst + ((y-8)&~7)*dst_stride, p->temp+ 8 +0*stride, 			    dst_stride, stride, width, y&7, 5-p->log2_count);    }}static int config(struct vf_instance_s* vf,		  int width, int height, int d_width, int d_height,		  unsigned int flags, unsigned int outfmt){    int h= (height+16+15)&(~15);    vf->priv->temp_stride= (width+16+15)&(~15);    vf->priv->temp= (int16_t*)av_mallocz(vf->priv->temp_stride*3*8*sizeof(int16_t));    //this can also be avoided, see above    vf->priv->src = (uint8_t*)av_malloc(vf->priv->temp_stride*h*sizeof(uint8_t));    return vf_next_config(vf,width,height,d_width,d_height,flags,outfmt);}static void get_image(struct vf_instance_s* vf, mp_image_t *mpi){    if(mpi->flags&MP_IMGFLAG_PRESERVE) return; // don't change    // ok, we can do pp in-place (or pp disabled):    vf->dmpi=vf_get_image(vf->next,mpi->imgfmt,			  mpi->type, mpi->flags, mpi->width, mpi->height);    mpi->planes[0]=vf->dmpi->planes[0];    mpi->stride[0]=vf->dmpi->stride[0];    mpi->width=vf->dmpi->width;    if(mpi->flags&MP_IMGFLAG_PLANAR){        mpi->planes[1]=vf->dmpi->planes[1];        mpi->planes[2]=vf->dmpi->planes[2];        mpi->stride[1]=vf->dmpi->stride[1];        mpi->stride[2]=vf->dmpi->stride[2];    }    mpi->flags|=MP_IMGFLAG_DIRECT;}static int put_image(struct vf_instance_s* vf, mp_image_t *mpi, double pts){    mp_image_t *dmpi;    if(!(mpi->flags&MP_IMGFLAG_DIRECT)){	// no DR, so get a new image! hope we'll get DR buffer:	dmpi=vf_get_image(vf->next,mpi->imgfmt,			  MP_IMGTYPE_TEMP,			  MP_IMGFLAG_ACCEPT_STRIDE|MP_IMGFLAG_PREFER_ALIGNED_STRIDE,			  mpi->width,mpi->height);	vf_clone_mpi_attributes(dmpi, mpi);    }else{	dmpi=vf->dmpi;    }    vf->priv->mpeg2= mpi->qscale_type;    if(mpi->pict_type != 3 && mpi->qscale && !vf->priv->qp){	if(!vf->priv->non_b_qp)	    vf->priv->non_b_qp= malloc(mpi->qstride * ((mpi->h + 15) >> 4));	fast_memcpy(vf->priv->non_b_qp, mpi->qscale, mpi->qstride * ((mpi->h + 15) >> 4));    }    if(vf->priv->log2_count || !(mpi->flags&MP_IMGFLAG_DIRECT)){	char *qp_tab= vf->priv->non_b_qp;	if(vf->priv->bframes || !qp_tab)	    qp_tab= mpi->qscale;	if(qp_tab || vf->priv->qp){	    filter(vf->priv, dmpi->planes[0], mpi->planes[0], dmpi->stride[0], mpi->stride[0],		   mpi->w, mpi->h, qp_tab, mpi->qstride, 1);	    filter(vf->priv, dmpi->planes[1], mpi->planes[1], dmpi->stride[1], mpi->stride[1],		   mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, qp_tab, mpi->qstride, 0);	    filter(vf->priv, dmpi->planes[2], mpi->planes[2], dmpi->stride[2], mpi->stride[2],		   mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, qp_tab, mpi->qstride, 0);	}else{	    memcpy_pic(dmpi->planes[0], mpi->planes[0], mpi->w, mpi->h, dmpi->stride[0], mpi->stride[0]);	    memcpy_pic(dmpi->planes[1], mpi->planes[1], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, dmpi->stride[1], mpi->stride[1]);	    memcpy_pic(dmpi->planes[2], mpi->planes[2], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, dmpi->stride[2], mpi->stride[2]);	}    }#ifdef HAVE_MMX    if(gCpuCaps.hasMMX) asm volatile ("emms\n\t");#endif#ifdef HAVE_MMX2    if(gCpuCaps.hasMMX2) asm volatile ("sfence\n\t");#endif    return vf_next_put_image(vf,dmpi, pts);}static void uninit(struct vf_instance_s* vf){    if(!vf->priv) return;    if(vf->priv->temp) av_free(vf->priv->temp);    vf->priv->temp= NULL;    if(vf->priv->src)  av_free(vf->priv->src);    vf->priv->src= NULL;    //if(vf->priv->avctx) free(vf->priv->avctx);    //vf->priv->avctx= NULL;    if(vf->priv->non_b_qp) free(vf->priv->non_b_qp);    vf->priv->non_b_qp= NULL;            av_free(vf->priv);    vf->priv=NULL;}//===========================================================================//static int query_format(struct vf_instance_s* vf, unsigned int fmt){    switch(fmt){    case IMGFMT_YVU9:    case IMGFMT_IF09:    case IMGFMT_YV12:    case IMGFMT_I420:    case IMGFMT_IYUV:    case IMGFMT_CLPL:    case IMGFMT_Y800:    case IMGFMT_Y8:    case IMGFMT_444P:    case IMGFMT_422P:    case IMGFMT_411P:	return vf_next_query_format(vf,fmt);    }    return 0;}/*  static unsigned int fmt_list[]={  IMGFMT_YVU9,  IMGFMT_IF09,  IMGFMT_YV12,  IMGFMT_I420,  IMGFMT_IYUV,  IMGFMT_CLPL,  IMGFMT_Y800,  IMGFMT_Y8,  IMGFMT_444P,  IMGFMT_422P,  IMGFMT_411P,  0  };*/static int control(struct vf_instance_s* vf, int request, void* data){    switch(request){    case VFCTRL_QUERY_MAX_PP_LEVEL:        return 5;    case VFCTRL_SET_PP_LEVEL:        vf->priv->log2_count= *((unsigned int*)data);        if (vf->priv->log2_count < 4) vf->priv->log2_count=4;        return CONTROL_TRUE;    }    return vf_next_control(vf,request,data);}static int open(vf_instance_t *vf, char* args){    int i=0, bias;    int custom_threshold_m[64];    int log2c=-1;        vf->config=config;    vf->put_image=put_image;    vf->get_image=get_image;    vf->query_format=query_format;    vf->uninit=uninit;    vf->control= control;    vf->priv=av_mallocz(sizeof(struct vf_priv_s));//assumes align 16 !         avcodec_init();    //vf->priv->avctx= avcodec_alloc_context();    //dsputil_init(&vf->priv->dsp, vf->priv->avctx);        vf->priv->log2_count= 4;    vf->priv->bframes = 0;        if (args) sscanf(args, "%d:%d:%d:%d", &log2c, &vf->priv->qp, &i, &vf->priv->bframes);    if( log2c >=4 && log2c <=5 )        vf->priv->log2_count = log2c;    else if( log2c >= 6 )	vf->priv->log2_count = 5;    if(vf->priv->qp < 0)        vf->priv->qp = 0;    if (i < -15) i = -15;    if (i > 32) i = 32;        bias= (1<<4)+i; //regulable    vf->priv->prev_q=0;    //    for(i=0;i<64;i++) //FIXME: tune custom_threshold[] and remove this !	custom_threshold_m[i]=(int)(custom_threshold[i]*(bias/71.)+ 0.5);    for(i=0;i<8;i++){	vf->priv->threshold_mtx_noq[2*i]=(uint64_t)custom_threshold_m[i*8+2]	    |(((uint64_t)custom_threshold_m[i*8+6])<<16)	    |(((uint64_t)custom_threshold_m[i*8+0])<<32)	    |(((uint64_t)custom_threshold_m[i*8+4])<<48);	vf->priv->threshold_mtx_noq[2*i+1]=(uint64_t)custom_threshold_m[i*8+5]	    |(((uint64_t)custom_threshold_m[i*8+3])<<16)	    |(((uint64_t)custom_threshold_m[i*8+1])<<32)	    |(((uint64_t)custom_threshold_m[i*8+7])<<48);    }    if (vf->priv->qp) vf->priv->prev_q=vf->priv->qp, mul_thrmat_s(vf->priv, vf->priv->qp);    return 1;}vf_info_t vf_info_fspp = {    "fast simple postprocess",    "fspp",    "Michael Niedermayer, Nikolaj Poroshin",    "",    open,    NULL};//====================================================================//Specific spp's dct, idct and threshold functions//I'd prefer to have them in the separate file.#include "mangle.h"//#define MANGLE(a) #a//typedef int16_t DCTELEM; //! only int16_t#define DCTSIZE 8#define DCTSIZE_S "8"#define FIX(x,s)  ((int) ((x) * (1<<s) + 0.5)&0xffff)#define C64(x)    ((uint64_t)((x)|(x)<<16))<<32 | (uint64_t)(x) | (uint64_t)(x)<<16#define FIX64(x,s)  C64(FIX(x,s))#define MULTIPLY16H(x,k)   (((x)*(k))>>16)#define THRESHOLD(r,x,t) if(((unsigned)((x)+t))>t*2) r=(x);else r=0;#define DESCALE(x,n)  (((x) + (1 << ((n)-1))) >> n)#ifdef HAVE_MMXstatic uint64_t attribute_used __attribute__((aligned(8))) temps[4];//!!static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_0_382683433=FIX64(0.382683433, 14); static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_0_541196100=FIX64(0.541196100, 14); static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_0_707106781=FIX64(0.707106781, 14); static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_1_306562965=FIX64(1.306562965, 14); static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_1_414213562_A=FIX64(1.414213562, 14); static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_1_847759065=FIX64(1.847759065, 13); static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_2_613125930=FIX64(-2.613125930, 13); //-static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_1_414213562=FIX64(1.414213562, 13); static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_1_082392200=FIX64(1.082392200, 13);//for t3,t5,t7 == 0 shortcutstatic uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_0_847759065=FIX64(0.847759065, 14); static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_0_566454497=FIX64(0.566454497, 14); static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_0_198912367=FIX64(0.198912367, 14); static uint64_t attribute_used __attribute__((aligned(8))) MM_DESCALE_RND=C64(4);static uint64_t attribute_used __attribute__((aligned(8))) MM_2=C64(2);#else /* !HAVE_MMX */typedef int32_t int_simd16_t;static int16_t FIX_0_382683433=FIX(0.382683433, 14); static int16_t FIX_0_541196100=FIX(0.541196100, 14); static int16_t FIX_0_707106781=FIX(0.707106781, 14); static int16_t FIX_1_306562965=FIX(1.306562965, 14); static int16_t FIX_1_414213562_A=FIX(1.414213562, 14); static int16_t FIX_1_847759065=FIX(1.847759065, 13); static int16_t FIX_2_613125930=FIX(-2.613125930, 13); //-static int16_t FIX_1_414213562=FIX(1.414213562, 13); static int16_t FIX_1_082392200=FIX(1.082392200, 13);#endif#ifndef HAVE_MMXstatic void column_fidct_c(int16_t* thr_adr, DCTELEM *data, DCTELEM *output, int cnt){    int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;    int_simd16_t tmp10, tmp11, tmp12, tmp13;    int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;    int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;    DCTELEM* dataptr;    DCTELEM* wsptr;    int16_t *threshold;    int ctr;      dataptr = data;    wsptr = output;    for (; cnt > 0; cnt-=2) { //start positions	threshold=(int16_t*)thr_adr;//threshold_mtx	for (ctr = DCTSIZE; ctr > 0; ctr--) { 	    // Process columns from input, add to output. 	    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];	    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];    	    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];	    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];    	    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];	    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];    	    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];	    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];	    // Even part of FDCT    	    tmp10 = tmp0 + tmp3;	    tmp13 = tmp0 - tmp3;	    tmp11 = tmp1 + tmp2;	    tmp12 = tmp1 - tmp2;	    d0 = tmp10 + tmp11; 	    d4 = tmp10 - tmp11;    	    z1 = MULTIPLY16H((tmp12 + tmp13) <<2, FIX_0_707106781); 	    d2 = tmp13 + z1; 	    d6 = tmp13 - z1;    	    // Even part of IDCT	    THRESHOLD(tmp0, d0, threshold[0*8]);	    THRESHOLD(tmp1, d2, threshold[2*8]);	    THRESHOLD(tmp2, d4, threshold[4*8]);	    THRESHOLD(tmp3, d6, threshold[6*8]);     	    tmp0+=2;	    tmp10 = (tmp0 + tmp2)>>2;	    tmp11 = (tmp0 - tmp2)>>2;	    tmp13 = (tmp1 + tmp3)>>2; //+2 !  (psnr decides)	    tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2	    tmp0 = tmp10 + tmp13; //->temps	    tmp3 = tmp10 - tmp13; //->temps	    tmp1 = tmp11 + tmp12; //->temps	    tmp2 = tmp11 - tmp12; //->temps	    // Odd part of FDCT	    tmp10 = tmp4 + tmp5;  	    tmp11 = tmp5 + tmp6;	    tmp12 = tmp6 + tmp7;        	    z5 = MULTIPLY16H((tmp10 - tmp12)<<2, FIX_0_382683433); 	    z2 = MULTIPLY16H(tmp10 <<2, FIX_0_541196100) + z5; 	    z4 = MULTIPLY16H(tmp12 <<2, FIX_1_306562965) + z5; 	    z3 = MULTIPLY16H(tmp11 <<2, FIX_0_707106781); 	    z11 = tmp7 + z3;        	    z13 = tmp7 - z3;	    d5 = z13 + z2; 	    d3 = z13 - z2;	    d1 = z11 + z4;	    d7 = z11 - z4;    	    // Odd part of IDCT	    THRESHOLD(tmp4, d1, threshold[1*8]);	    THRESHOLD(tmp5, d3, threshold[3*8]);	    THRESHOLD(tmp6, d5, threshold[5*8]);	    THRESHOLD(tmp7, d7, threshold[7*8]);	    //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0	    z13 = tmp6 + tmp5;	    z10 = (tmp6 - tmp5)<<1;	    z11 = tmp4 + tmp7;	    z12 = (tmp4 - tmp7)<<1;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?