vf_filmdint.c

来自「君正早期ucos系统(只有早期的才不没有打包成库),MPLAYER,文件系统,图」· C语言 代码 · 共 1,447 行 · 第 1/3 页

C
1,447
字号
	"movq %3, %%mm0\n\t"	"movq %%mm7, %%mm1\n\t"	"psubusw %%mm0, %%mm1\n\t"	"movq %%mm1, %%mm2\n\t"	"paddusw %%mm0, %%mm2\n\t"	"paddusw %%mm7, %%mm2\n\t"	"pshufw $0xb1, %%mm2, %%mm3\n\t"	"pavgw %%mm7, %%mm2\n\t"	"pshufw $0xb1, %%mm2, %%mm2\n\t"	"psubusw %%mm7, %%mm2\n\t"	"pcmpeqw %%mm6, %%mm2\n\t" /* 1 if >= 1.5x */	"psubusw %%mm7, %%mm3\n\t"	"pcmpeqw %%mm6, %%mm3\n\t" /* 1 if >= 2x */	"movq %1, %%mm4\n\t"	"movq %2, %%mm5\n\t"	"psubw %%mm2, %%mm4\n\t"	"psubw %%mm3, %%mm5\n\t"	"movq %%mm4, %1\n\t"	"movq %%mm5, %2\n\t"	"pxor %%mm4, %%mm4\n\t"	"pcmpeqw %%mm1, %%mm4\n\t" /* 1 if <= t */	"psubusw %%mm0, %%mm1\n\t"	"pxor %%mm5, %%mm5\n\t"	"pcmpeqw %%mm1, %%mm5\n\t" /* 1 if <= 2t */	"psubusw %%mm0, %%mm1\n\t"	"psubusw %%mm0, %%mm1\n\t"	"pcmpeqw %%mm6, %%mm1\n\t" /* 1 if <= 4t */	"pshufw $0xb1, %%mm2, %%mm0\n\t"	"por %%mm2, %%mm0\n\t"     /* 1 if not close */	"punpckhdq %%mm0, %%mm0\n\t"	"movq %%mm4, %%mm2\n\t"      /* tttt */	"punpckhdq %%mm5, %%mm2\n\t" /* ttll */	"por %%mm2, %%mm0\n\t"	"pcmpeqd %%mm6, %%mm0\n\t" /* close && big */	"psrlq $16, %%mm0\n\t"	"psrlw $15, %%mm0\n\t"	"movd %%mm0, %0\n\t"	: "=r" (interlaced), "=m" (s->bigger), "=m" (s->twox)	: "m" (p->thres)	);    if (interlaced) {	s->interlaced_high += interlaced >> 16;	s->interlaced_low += interlaced;    } else {	asm volatile(	    "pcmpeqw %%mm0, %%mm0\n\t" /* -1 */	    "psubw 	%%mm0, %%mm4\n\t"	    "psubw 	%%mm0, %%mm5\n\t"	    "psubw 	%%mm0, %%mm1\n\t"	    "paddw %0, %%mm4\n\t"	    "paddw %1, %%mm5\n\t"	    "paddw %2, %%mm1\n\t"	    "movq %%mm4, %0\n\t"	    "movq %%mm5, %1\n\t"	    "movq %%mm1, %2\n\t"	    : "=m" (s->tiny), "=m" (s->low), "=m" (s->high)	    );	asm volatile(	    "pshufw $0, %2, %%mm0\n\t"	    "psubusw %%mm7, %%mm0\n\t"	    "pcmpeqw %%mm6, %%mm0\n\t"   /* 0 if below sad_thres */	    "pand %%mm7, %%mm0\n\t"	    "movq %%mm0, %%mm1\n\t"	    "punpcklwd %%mm6, %%mm0\n\t" /* sad even, odd */	    "punpckhwd %%mm6, %%mm1\n\t" /* sad noise, temp */	    "paddd %0, %%mm0\n\t"	    "paddd %1, %%mm1\n\t"	    "movq %%mm0, %0\n\t"	    "movq %%mm1, %1\n\t"	    : "=m" (s->sad.even), "=m" (s->sad.noise)	    : "m" (p->sad_thres)	    );    }    asm volatile(	"movq %%mm7, (%1)\n\t"	PMAXUW((%0), %%mm7)	"movq %%mm7, (%0)\n\t"	"emms"	: : "r" (&s->max), "r" (&tm), "X" (s->max)	: "memory"	);#ifdef DEBUG    if (1) {	struct metrics cm;	a -= 7*as;	b -= 7*bs;	cm = block_metrics_c(a, b, as, bs, 4, p, &ts);	if (!MEQ(tm, cm))	    mp_msg(MSGT_VFILTER, MSGL_WARN, "Bad metrics\n");	if (s) {#           define CHECK(X) if (!MEQ(s->X, ts.X)) \		mp_msg(MSGT_VFILTER, MSGL_WARN, "Bad " #X "\n");	    CHECK(tiny);	    CHECK(low);	    CHECK(high);	    CHECK(sad);	    CHECK(max);	}    }#endif#endif    return tm;}static inline intdint_copy_line_mmx2(unsigned char *dst, unsigned char *a, long bos,		    long cos, int ds, int ss, int w, int t){#ifndef HAVE_MMX    mp_msg(MSGT_VFILTER, MSGL_FATAL, "dint_copy_line_mmx2: internal error\n");    return 0;#else    unsigned long len = (w+7) >> 3;    int ret;    asm volatile (	"pxor %%mm6, %%mm6 \n\t"       /* deinterlaced pixel counter */	"movd %0, %%mm7 \n\t"	"punpcklbw %%mm7, %%mm7 \n\t"	"punpcklwd %%mm7, %%mm7 \n\t"	"punpckldq %%mm7, %%mm7 \n\t"  /* mm7 = threshold */	: /* no output */	: "rm" (t)	);    do {	asm volatile (	    "movq (%0), %%mm0\n\t"	    "movq (%0,%3,2), %%mm1\n\t"	    "movq %%mm0, (%2)\n\t"	    "pmaxub %%mm1, %%mm0\n\t"	    "pavgb (%0), %%mm1\n\t"	    "psubusb %%mm1, %%mm0\n\t"	    "paddusb %%mm7, %%mm0\n\t"  /* mm0 = max-avg+thr */	    "movq (%0,%1), %%mm2\n\t"	    "movq (%0,%5), %%mm3\n\t"	    "movq %%mm2, %%mm4\n\t"	    PDIFFUBT(%%mm1, %%mm2, %%mm5)	    PDIFFUBT(%%mm1, %%mm3, %%mm5)	    "pminub %%mm2, %%mm3\n\t"	    "pcmpeqb %%mm3, %%mm2\n\t"  /* b = min */	    "pand %%mm2, %%mm4\n\t"	    "pandn (%0,%5), %%mm2\n\t"	    "por %%mm4, %%mm2\n\t"	    "pminub %%mm0, %%mm3\n\t"	    "pcmpeqb %%mm0, %%mm3\n\t"  /* set to 1s if >= threshold */	    "psubb %%mm3, %%mm6\n\t"    /* count pixels above thr. */	    "pand %%mm3, %%mm1 \n\t"	    "pandn %%mm2, %%mm3 \n\t"	    "por %%mm3, %%mm1 \n\t"     /* avg if >= threshold */	    "movq %%mm1, (%2,%4) \n\t"	    : /* no output */	    : "r" (a), "r" (bos), "r" (dst), "r" ((long)ss), "r" ((long)ds), "r" (cos)	    );	a += 8;	dst += 8;    } while (--len);    asm volatile ("pxor %%mm7, %%mm7 \n\t"		  "psadbw %%mm6, %%mm7 \n\t"		  "movd %%mm7, %0 \n\t"		  "emms \n\t"		  : "=r" (ret)	);    return ret;#endif}static inline intdint_copy_line(unsigned char *dst, unsigned char *a, long bos,	       long cos, int ds, int ss, int w, int t){    unsigned long len = ((unsigned long)w+sizeof(cmmx_t)-1) / sizeof(cmmx_t);    cmmx_t dint_count = 0;    cmmx_t thr;    t |= t <<  8;    thr = t | (t << 16);    if (sizeof(cmmx_t) > 4)	thr |= thr << (sizeof(cmmx_t)*4);    do {	cmmx_t e = *(cmmx_t*)a;	cmmx_t ne = *(cmmx_t*)(a+2*ss);	cmmx_t o = *(cmmx_t*)(a+bos);	cmmx_t oo = *(cmmx_t*)(a+cos);	cmmx_t maxe = pmaxub(e, ne);	cmmx_t avge = pavgb(e, ne);	cmmx_t max_diff = maxe - avge + thr; /* 0<=max-avg<128, thr<128 */	cmmx_t diffo  = pdiffub(avge, o);	cmmx_t diffoo = pdiffub(avge, oo);	cmmx_t diffcmp = pcmpgtub(diffo, diffoo);	cmmx_t bo = ((oo ^ o) & diffcmp) ^ o;	cmmx_t diffbo = ((diffoo ^ diffo) & diffcmp) ^ diffo;	cmmx_t above_thr = ~pcmpgtub(max_diff, diffbo);	cmmx_t bo_or_avg = ((avge ^ bo) & above_thr) ^ bo;	dint_count += above_thr & ONE_BYTES;	*(cmmx_t*)(dst) = e;	*(cmmx_t*)(dst+ds) = bo_or_avg;	a += sizeof(cmmx_t);	dst += sizeof(cmmx_t);    } while (--len);    return psumbw(dint_count);}static intdint_copy_plane(unsigned char *d, unsigned char *a, unsigned char *b,		unsigned char *c, unsigned long w, unsigned long h,		unsigned long ds, unsigned long ss, unsigned long threshold,		long field, long mmx2){    unsigned long ret = 0;    long bos = b - a;    long cos = c - a;    if (field) {	fast_memcpy(d, b, w);	h--;	d += ds;	a += ss;    }    bos += ss;    cos += ss;    while (h > 2) {	if (threshold >= 128) {	    fast_memcpy(d, a, w);	    fast_memcpy(d+ds, a+bos, w);	} else if (mmx2 == 1) {	    ret += dint_copy_line_mmx2(d, a, bos, cos, ds, ss, w, threshold);	} else	    ret += dint_copy_line(d, a, bos, cos, ds, ss, w, threshold);	h -= 2;	d += 2*ds;	a += 2*ss;    }    fast_memcpy(d, a, w);    if (h == 2)	fast_memcpy(d+ds, a+bos, w);    return ret;}static voidcopy_merge_fields(struct vf_priv_s *p, mp_image_t *dmpi,		  unsigned char **old, unsigned char **new, unsigned long show){    unsigned long threshold = 256;    unsigned long field = p->swapped;    unsigned long dint_pixels = 0;    unsigned char **other = old;    if (show >= 12 || !(show & 3))	show >>= 2, other = new, new = old;    if (show <= 2) {  /* Single field: de-interlace */	threshold = p->dint_thres;	field ^= show & 1;	old = new;    } else if (show == 3)	old = new;    else	field ^= 1;    dint_pixels +=dint_copy_plane(dmpi->planes[0], old[0], new[0],				  other[0], p->w, p->h, dmpi->stride[0],				  p->stride, threshold, field, p->mmx2);    if (dmpi->flags & MP_IMGFLAG_PLANAR) {	if (p->luma_only)	    old = new, other = new;	else	    threshold = threshold/2 + 1;	field ^= p->chroma_swapped;	dint_copy_plane(dmpi->planes[1], old[1], new[1],			other[1], p->cw, p->ch,	dmpi->stride[1],			p->chroma_stride, threshold, field, p->mmx2);	dint_copy_plane(dmpi->planes[2], old[2], new[2],			other[2], p->cw, p->ch, dmpi->stride[2],			p->chroma_stride, threshold, field, p->mmx2);    }    if (dint_pixels > 0 && p->verbose)	mp_msg(MSGT_VFILTER,MSGL_INFO,"Deinterlaced %lu pixels\n",dint_pixels);}static void diff_planes(struct vf_priv_s *p, struct frame_stats *s,			unsigned char *of, unsigned char *nf,			int w, int h, int os, int ns, int swapped){    int i, y;    int align = -(long)nf & 7;    of += align;    nf += align;    w -= align;    if (swapped)	of -= os, nf -= ns;    i = (h*3 >> 7) & ~1;    of += i*os + 8;    nf += i*ns + 8;    h -= i;    w -= 16;    memset(s, 0, sizeof(*s));    for (y = (h-8) >> 3; y; y--) {	if (p->mmx2 == 1) {	    for (i = 0; i < w; i += 8)		block_metrics_mmx2(of+i, nf+i, os, ns, 4, p, s);	} else if (p->mmx2 == 2) {	    for (i = 0; i < w; i += 8)		block_metrics_3dnow(of+i, nf+i, os, ns, 4, p, s);	} else if (p->fast > 3) {	    for (i = 0; i < w; i += 8)		block_metrics_faster_c(of+i, nf+i, os, ns, 4, p, s);	} else if (p->fast > 1) {	    for (i = 0; i < w; i += 8)		block_metrics_fast_c(of+i, nf+i, os, ns, 4, p, s);	} else {	    for (i = 0; i < w; i += 8)		block_metrics_c(of+i, nf+i, os, ns, 4, p, s);	}	of += 8*os;	nf += 8*ns;    }}#define METRICS(X) (X).even, (X).odd, (X).noise, (X).tempstatic void diff_fields(struct vf_priv_s *p, struct frame_stats *s,			unsigned char **old, unsigned char **new){    diff_planes(p, s, old[0], new[0], p->w, p->h,		p->stride, p->stride, p->swapped);    s->sad.even  = (s->sad.even  * 16ul) / s->num_blocks;    s->sad.odd   = (s->sad.odd   * 16ul) / s->num_blocks;    s->sad.noise = (s->sad.noise * 16ul) / s->num_blocks;    s->sad.temp  = (s->sad.temp  * 16ul) / s->num_blocks;    if (p->verbose)	mp_msg(MSGT_VFILTER, MSGL_INFO, "%lu%c M:%d/%d/%d/%d - %d, "	       "t:%d/%d/%d/%d, l:%d/%d/%d/%d, h:%d/%d/%d/%d, bg:%d/%d/%d/%d, "	       "2x:%d/%d/%d/%d, sad:%d/%d/%d/%d, lil:%d, hil:%d, ios:%.1f\n",	       p->inframes, p->chflag, METRICS(s->max), s->num_blocks,	       METRICS(s->tiny), METRICS(s->low), METRICS(s->high),	       METRICS(s->bigger), METRICS(s->twox), METRICS(s->sad),	       s->interlaced_low, s->interlaced_high,	       p->iosync / (double) p->in_inc);}static const char *parse_args(struct vf_priv_s *p, const char *args){    args--;    while (args && *++args &&	   (sscanf(args, "io=%lu:%lu", &p->out_dec, &p->in_inc) == 2 ||	    sscanf(args, "diff_thres=%hu", &p->thres.even ) == 1 ||	    sscanf(args, "comb_thres=%hu", &p->thres.noise) == 1 ||	    sscanf(args, "sad_thres=%lu",  &p->sad_thres  ) == 1 ||	    sscanf(args, "dint_thres=%lu", &p->dint_thres ) == 1 ||	    sscanf(args, "fast=%u",        &p->fast       ) == 1 ||	    sscanf(args, "mmx2=%lu",       &p->mmx2       ) == 1 ||	    sscanf(args, "luma_only=%u",   &p->luma_only  ) == 1 ||	    sscanf(args, "verbose=%u",     &p->verbose    ) == 1 ||	    sscanf(args, "crop=%lu:%lu:%lu:%lu", &p->w,		   &p->h, &p->crop_x, &p->crop_y) == 4))	args = strchr(args, '/');    return args;}static unsigned long gcd(unsigned long x, unsigned long y){    unsigned long t;    if (x > y)	t = x, x = y, y = t;    while (x) {	t = y % x;	y = x;	x = t;    }    return y;}static void init(struct vf_priv_s *p, mp_image_t *mpi){    unsigned long i;    unsigned long plane_size, chroma_plane_size;    unsigned char *plane;    unsigned long cos, los;    p->crop_cx = p->crop_x >> mpi->chroma_x_shift;    p->crop_cy = p->crop_y >> mpi->chroma_y_shift;    if (mpi->flags & MP_IMGFLAG_ACCEPT_STRIDE) {	p->stride = (mpi->w + 15) & ~15;	p->chroma_stride = p->stride >> mpi->chroma_x_shift;    } else {	p->stride = mpi->width;	p->chroma_stride = mpi->chroma_width;    }    p->cw = p->w >> mpi->chroma_x_shift;    p->ch = p->h >> mpi->chroma_y_shift;    p->nplanes = 1;    p->static_idx = 0;    p->temp_idx = 0;    p->old_planes = p->planes[0];    plane_size = mpi->h * p->stride;    chroma_plane_size = mpi->flags & MP_IMGFLAG_PLANAR ?	mpi->chroma_height * p->chroma_stride : 0;    p->memory_allocated =	malloc(NUM_STORED * (plane_size+2*chroma_plane_size) +	       8*p->chroma_stride + 4096);    /* align to page boundary */    plane = p->memory_allocated + (-(long)p->memory_allocated & 4095);    memset(plane, 0, NUM_STORED * plane_size);    los = p->crop_x  + p->crop_y  * p->stride;    cos = p->crop_cx + p->crop_cy * p->chroma_stride;    for (i = 0; i != NUM_STORED; i++, plane += plane_size) {	p->planes[i][0] = plane;	p->planes[NUM_STORED + i][0] = plane + los;    }    if (mpi->flags & MP_IMGFLAG_PLANAR) {	p->nplanes = 3;	memset(plane, 0x80, NUM_STORED * 2 * chroma_plane_size);	for (i = 0; i != NUM_STORED; i++) {	    p->planes[i][1] = plane;	    p->planes[NUM_STORED + i][1] = plane + cos;	    plane += chroma_plane_size;	    p->planes[i][2] = plane;	    p->planes[NUM_STORED + i][2] = plane + cos;	    plane += chroma_plane_size;	}    }    p->out_dec <<= 2;    i = gcd(p->in_inc, p->out_dec);    p->in_inc /= i;    p->out_dec /= i;    p->iosync = 0;    p->num_fields = 3;}static inline double get_time(void){    struct timeval tv;    gettimeofday(&tv, 0);    return tv.tv_sec + tv.tv_usec * 1e-6;}static void get_image(struct vf_instance_s* vf, mp_image_t *mpi){    struct vf_priv_s *p = vf->priv;    static unsigned char **planes, planes_idx;    if (mpi->type == MP_IMGTYPE_STATIC) return;    if (!p->planes[0][0]) init(p, mpi);    if (mpi->type == MP_IMGTYPE_TEMP ||	(mpi->type == MP_IMGTYPE_IPB && !(mpi->flags & MP_IMGFLAG_READABLE)))	planes_idx = NUM_STORED/2 + (++p->temp_idx % (NUM_STORED/2));    else	planes_idx = ++p->static_idx % (NUM_STORED/2);    planes = p->planes[planes_idx];    mpi->priv = p->planes[NUM_STORED + planes_idx];    if (mpi->priv == p->old_planes) {	unsigned char **old_planes =	    p->planes[NUM_STORED + 2 + (++p->temp_idx & 1)];	my_memcpy_pic(old_planes[0], p->old_planes[0],		      p->w, p->h, p->stride, p->stride);	if (mpi->flags & MP_IMGFLAG_PLANAR) {	    my_memcpy_pic(old_planes[1], p->old_planes[1],			  p->cw, p->ch, p->chroma_stride, p->chroma_stride);	    my_memcpy_pic(old_planes[2], p->old_planes[2],			  p->cw, p->ch, p->chroma_stride, p->chroma_stride);	}	p->old_planes = old_planes;	p->num_copies++;    }    mpi->planes[0] = planes[0];    mpi->stride[0] = p->stride;    if (mpi->flags & MP_IMGFLAG_PLANAR) {	mpi->planes[1] = planes[1];	mpi->planes[2] = planes[2];	mpi->stride[1] = mpi->stride[2] = p->chroma_stride;    }    mpi->width = p->stride;    mpi->flags |= MP_IMGFLAG_DIRECT;    mpi->flags &= ~MP_IMGFLAG_DRAW_CALLBACK;}static inline longcmpe(unsigned long x, unsigned long y, unsigned long err, unsigned long e){    long diff = x-y;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?