📄 postprocess_template.c

📁 这是著名的TCPMP播放器在WINDWOWS,和WINCE下编译通过的源程序.笔者对其中的LIBMAD库做了针对ARM MPU的优化. 并增加了词幕功能.
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
		"lea (%%"REG_d", %1, 4), %%"REG_c"		\n\t"
		"add %1, %%"REG_c"				\n\t"
		"pxor %%mm7, %%mm7				\n\t"
//	0	1	2	3	4	5	6	7	8	9	10
//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1 ecx

#define REAL_DEINT_CUBIC(a,b,c,d,e)\
		"movq " #a ", %%mm0				\n\t"\
		"movq " #b ", %%mm1				\n\t"\
		"movq " #d ", %%mm2				\n\t"\
		"movq " #e ", %%mm3				\n\t"\
		PAVGB(%%mm2, %%mm1)					/* (b+d) /2 */\
		PAVGB(%%mm3, %%mm0)					/* a(a+e) /2 */\
		"movq %%mm0, %%mm2				\n\t"\
		"punpcklbw %%mm7, %%mm0				\n\t"\
		"punpckhbw %%mm7, %%mm2				\n\t"\
		"movq %%mm1, %%mm3				\n\t"\
		"punpcklbw %%mm7, %%mm1				\n\t"\
		"punpckhbw %%mm7, %%mm3				\n\t"\
		"psubw %%mm1, %%mm0				\n\t"	/* L(a+e - (b+d))/2 */\
		"psubw %%mm3, %%mm2				\n\t"	/* H(a+e - (b+d))/2 */\
		"psraw $3, %%mm0				\n\t"	/* L(a+e - (b+d))/16 */\
		"psraw $3, %%mm2				\n\t"	/* H(a+e - (b+d))/16 */\
		"psubw %%mm0, %%mm1				\n\t"	/* L(9b + 9d - a - e)/16 */\
		"psubw %%mm2, %%mm3				\n\t"	/* H(9b + 9d - a - e)/16 */\
		"packuswb %%mm3, %%mm1				\n\t"\
		"movq %%mm1, " #c "				\n\t"
#define DEINT_CUBIC(a,b,c,d,e)  REAL_DEINT_CUBIC(a,b,c,d,e)

DEINT_CUBIC((%0), (%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4), (%%REGd, %1))
DEINT_CUBIC((%%REGa, %1), (%0, %1, 4), (%%REGd), (%%REGd, %1), (%0, %1, 8))
DEINT_CUBIC((%0, %1, 4), (%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8), (%%REGc))
DEINT_CUBIC((%%REGd, %1), (%0, %1, 8), (%%REGd, %1, 4), (%%REGc), (%%REGc, %1, 2))

		: : "r" (src), "r" ((long)stride)
		: "%"REG_a, "%"REG_d, "%"REG_c
	);
#else
	int x;
	src+= stride*3;
	for(x=0; x<8; x++)
	{
		src[stride*3] = CLIP((-src[0]        + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4);
		src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4);
		src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4);
		src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4);
		src++;
	}
#endif
}

/**
 * Deinterlaces the given block by filtering every second line with a (-1 4 2 4 -1) filter.
 * will be called for every 8x8 block and can read & write from line 4-15
 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too
 * lines 4-12 will be read into the deblocking filter and should be deinterlaced
 * this filter will read lines 4-13 and write 5-11
 */
static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp)
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
	src+= stride*4;
	asm volatile(
		"lea (%0, %1), %%"REG_a"			\n\t"
		"lea (%%"REG_a", %1, 4), %%"REG_d"		\n\t"
		"pxor %%mm7, %%mm7				\n\t"
		"movq (%2), %%mm0				\n\t"
//	0	1	2	3	4	5	6	7	8	9	10
//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1 ecx

#define REAL_DEINT_FF(a,b,c,d)\
		"movq " #a ", %%mm1				\n\t"\
		"movq " #b ", %%mm2				\n\t"\
		"movq " #c ", %%mm3				\n\t"\
		"movq " #d ", %%mm4				\n\t"\
		PAVGB(%%mm3, %%mm1)					\
		PAVGB(%%mm4, %%mm0)					\
		"movq %%mm0, %%mm3				\n\t"\
		"punpcklbw %%mm7, %%mm0				\n\t"\
		"punpckhbw %%mm7, %%mm3				\n\t"\
		"movq %%mm1, %%mm4				\n\t"\
		"punpcklbw %%mm7, %%mm1				\n\t"\
		"punpckhbw %%mm7, %%mm4				\n\t"\
		"psllw $2, %%mm1				\n\t"\
		"psllw $2, %%mm4				\n\t"\
		"psubw %%mm0, %%mm1				\n\t"\
		"psubw %%mm3, %%mm4				\n\t"\
		"movq %%mm2, %%mm5				\n\t"\
		"movq %%mm2, %%mm0				\n\t"\
		"punpcklbw %%mm7, %%mm2				\n\t"\
		"punpckhbw %%mm7, %%mm5				\n\t"\
		"paddw %%mm2, %%mm1				\n\t"\
		"paddw %%mm5, %%mm4				\n\t"\
		"psraw $2, %%mm1				\n\t"\
		"psraw $2, %%mm4				\n\t"\
		"packuswb %%mm4, %%mm1				\n\t"\
		"movq %%mm1, " #b "				\n\t"\

#define DEINT_FF(a,b,c,d)  REAL_DEINT_FF(a,b,c,d)

DEINT_FF((%0)       ,  (%%REGa)       , (%%REGa, %1), (%%REGa, %1, 2))
DEINT_FF((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4),  (%%REGd)       )
DEINT_FF((%0, %1, 4),  (%%REGd)       , (%%REGd, %1), (%%REGd, %1, 2))
DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8),  (%%REGd, %1, 4))

		"movq %%mm0, (%2)				\n\t"
		: : "r" (src), "r" ((long)stride), "r"(tmp)
		: "%"REG_a, "%"REG_d
	);
#else
	int x;
	src+= stride*4;
	for(x=0; x<8; x++)
	{
		int t1= tmp[x];
		int t2= src[stride*1];

		src[stride*1]= CLIP((-t1 + 4*src[stride*0] + 2*t2 + 4*src[stride*2] - src[stride*3] + 4)>>3);
		t1= src[stride*4];
		src[stride*3]= CLIP((-t2 + 4*src[stride*2] + 2*t1 + 4*src[stride*4] - src[stride*5] + 4)>>3);
		t2= src[stride*6];
		src[stride*5]= CLIP((-t1 + 4*src[stride*4] + 2*t2 + 4*src[stride*6] - src[stride*7] + 4)>>3);
		t1= src[stride*8];
		src[stride*7]= CLIP((-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3);
		tmp[x]= t1;

		src++;
	}
#endif
}

/**
 * Deinterlaces the given block by filtering every line with a (-1 2 6 2 -1) filter.
 * will be called for every 8x8 block and can read & write from line 4-15
 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too
 * lines 4-12 will be read into the deblocking filter and should be deinterlaced
 * this filter will read lines 4-13 and write 4-11
 */
static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2)
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
	src+= stride*4;
	asm volatile(
		"lea (%0, %1), %%"REG_a"			\n\t"
		"lea (%%"REG_a", %1, 4), %%"REG_d"		\n\t"
		"pxor %%mm7, %%mm7				\n\t"
		"movq (%2), %%mm0				\n\t"
		"movq (%3), %%mm1				\n\t"
//	0	1	2	3	4	5	6	7	8	9	10
//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1 ecx

#define REAL_DEINT_L5(t1,t2,a,b,c)\
		"movq " #a ", %%mm2				\n\t"\
		"movq " #b ", %%mm3				\n\t"\
		"movq " #c ", %%mm4				\n\t"\
		PAVGB(t2, %%mm3)					\
		PAVGB(t1, %%mm4)					\
		"movq %%mm2, %%mm5				\n\t"\
		"movq %%mm2, " #t1 "				\n\t"\
		"punpcklbw %%mm7, %%mm2				\n\t"\
		"punpckhbw %%mm7, %%mm5				\n\t"\
		"movq %%mm2, %%mm6				\n\t"\
		"paddw %%mm2, %%mm2				\n\t"\
		"paddw %%mm6, %%mm2				\n\t"\
		"movq %%mm5, %%mm6				\n\t"\
		"paddw %%mm5, %%mm5				\n\t"\
		"paddw %%mm6, %%mm5				\n\t"\
		"movq %%mm3, %%mm6				\n\t"\
		"punpcklbw %%mm7, %%mm3				\n\t"\
		"punpckhbw %%mm7, %%mm6				\n\t"\
		"paddw %%mm3, %%mm3				\n\t"\
		"paddw %%mm6, %%mm6				\n\t"\
		"paddw %%mm3, %%mm2				\n\t"\
		"paddw %%mm6, %%mm5				\n\t"\
		"movq %%mm4, %%mm6				\n\t"\
		"punpcklbw %%mm7, %%mm4				\n\t"\
		"punpckhbw %%mm7, %%mm6				\n\t"\
		"psubw %%mm4, %%mm2				\n\t"\
		"psubw %%mm6, %%mm5				\n\t"\
		"psraw $2, %%mm2				\n\t"\
		"psraw $2, %%mm5				\n\t"\
		"packuswb %%mm5, %%mm2				\n\t"\
		"movq %%mm2, " #a "				\n\t"\

#define DEINT_L5(t1,t2,a,b,c)  REAL_DEINT_L5(t1,t2,a,b,c)

DEINT_L5(%%mm0, %%mm1, (%0)           , (%%REGa)       , (%%REGa, %1)   )
DEINT_L5(%%mm1, %%mm0, (%%REGa)       , (%%REGa, %1)   , (%%REGa, %1, 2))
DEINT_L5(%%mm0, %%mm1, (%%REGa, %1)   , (%%REGa, %1, 2), (%0, %1, 4)   )
DEINT_L5(%%mm1, %%mm0, (%%REGa, %1, 2), (%0, %1, 4)    , (%%REGd)       )
DEINT_L5(%%mm0, %%mm1, (%0, %1, 4)    , (%%REGd)       , (%%REGd, %1)   )  
DEINT_L5(%%mm1, %%mm0, (%%REGd)       , (%%REGd, %1)   , (%%REGd, %1, 2))
DEINT_L5(%%mm0, %%mm1, (%%REGd, %1)   , (%%REGd, %1, 2), (%0, %1, 8)   )
DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8)    , (%%REGd, %1, 4))

		"movq %%mm0, (%2)				\n\t"
		"movq %%mm1, (%3)				\n\t"
		: : "r" (src), "r" ((long)stride), "r"(tmp), "r"(tmp2)
		: "%"REG_a, "%"REG_d
	);
#else
	int x;
	src+= stride*4;
	for(x=0; x<8; x++)
	{
		int t1= tmp[x];
		int t2= tmp2[x];
		int t3= src[0];

		src[stride*0]= CLIP((-(t1 + src[stride*2]) + 2*(t2 + src[stride*1]) + 6*t3 + 4)>>3);
		t1= src[stride*1];
		src[stride*1]= CLIP((-(t2 + src[stride*3]) + 2*(t3 + src[stride*2]) + 6*t1 + 4)>>3);
		t2= src[stride*2];
		src[stride*2]= CLIP((-(t3 + src[stride*4]) + 2*(t1 + src[stride*3]) + 6*t2 + 4)>>3);
		t3= src[stride*3];
		src[stride*3]= CLIP((-(t1 + src[stride*5]) + 2*(t2 + src[stride*4]) + 6*t3 + 4)>>3);
		t1= src[stride*4];
		src[stride*4]= CLIP((-(t2 + src[stride*6]) + 2*(t3 + src[stride*5]) + 6*t1 + 4)>>3);
		t2= src[stride*5];
		src[stride*5]= CLIP((-(t3 + src[stride*7]) + 2*(t1 + src[stride*6]) + 6*t2 + 4)>>3);
		t3= src[stride*6];
		src[stride*6]= CLIP((-(t1 + src[stride*8]) + 2*(t2 + src[stride*7]) + 6*t3 + 4)>>3);
		t1= src[stride*7];
		src[stride*7]= CLIP((-(t2 + src[stride*9]) + 2*(t3 + src[stride*8]) + 6*t1 + 4)>>3);

		tmp[x]= t3;
		tmp2[x]= t1;

		src++;
	}
#endif
}

/**
 * Deinterlaces the given block by filtering all lines with a (1 2 1) filter.
 * will be called for every 8x8 block and can read & write from line 4-15
 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too
 * lines 4-12 will be read into the deblocking filter and should be deinterlaced
 * this filter will read lines 4-13 and write 4-11
 */
static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp)
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
	src+= 4*stride;
	asm volatile(
		"lea (%0, %1), %%"REG_a"			\n\t"
		"lea (%%"REG_a", %1, 4), %%"REG_d"		\n\t"
//	0	1	2	3	4	5	6	7	8	9
//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1

		"movq (%2), %%mm0				\n\t" // L0
		"movq (%%"REG_a"), %%mm1			\n\t" // L2
		PAVGB(%%mm1, %%mm0)				      // L0+L2
		"movq (%0), %%mm2				\n\t" // L1
		PAVGB(%%mm2, %%mm0)
		"movq %%mm0, (%0)				\n\t"
		"movq (%%"REG_a", %1), %%mm0			\n\t" // L3
		PAVGB(%%mm0, %%mm2)				      // L1+L3
		PAVGB(%%mm1, %%mm2)				      // 2L2 + L1 + L3
		"movq %%mm2, (%%"REG_a")			\n\t"
		"movq (%%"REG_a", %1, 2), %%mm2			\n\t" // L4
		PAVGB(%%mm2, %%mm1)				      // L2+L4
		PAVGB(%%mm0, %%mm1)				      // 2L3 + L2 + L4
		"movq %%mm1, (%%"REG_a", %1)			\n\t"
		"movq (%0, %1, 4), %%mm1			\n\t" // L5
		PAVGB(%%mm1, %%mm0)				      // L3+L5
		PAVGB(%%mm2, %%mm0)				      // 2L4 + L3 + L5
		"movq %%mm0, (%%"REG_a", %1, 2)			\n\t"
		"movq (%%"REG_d"), %%mm0			\n\t" // L6
		PAVGB(%%mm0, %%mm2)				      // L4+L6
		PAVGB(%%mm1, %%mm2)				      // 2L5 + L4 + L6
		"movq %%mm2, (%0, %1, 4)			\n\t"
		"movq (%%"REG_d", %1), %%mm2			\n\t" // L7
		PAVGB(%%mm2, %%mm1)				      // L5+L7
		PAVGB(%%mm0, %%mm1)				      // 2L6 + L5 + L7
		"movq %%mm1, (%%"REG_d")			\n\t"
		"movq (%%"REG_d", %1, 2), %%mm1			\n\t" // L8
		PAVGB(%%mm1, %%mm0)				      // L6+L8
		PAVGB(%%mm2, %%mm0)				      // 2L7 + L6 + L8
		"movq %%mm0, (%%"REG_d", %1)			\n\t"
		"movq (%0, %1, 8), %%mm0			\n\t" // L9
		PAVGB(%%mm0, %%mm2)				      // L7+L9
		PAVGB(%%mm1, %%mm2)				      // 2L8 + L7 + L9
		"movq %%mm2, (%%"REG_d", %1, 2)			\n\t"
		"movq %%mm1, (%2)				\n\t"

		: : "r" (src), "r" ((long)stride), "r" (tmp)
		: "%"REG_a, "%"REG_d
	);
#else
	int a, b, c, x;
	src+= 4*stride;

	for(x=0; x<2; x++){
		a= *(uint32_t*)&tmp[stride*0];
		b= *(uint32_t*)&src[stride*0];
		c= *(uint32_t*)&src[stride*1];
		a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
		*(uint32_t*)&src[stride*0]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);

		a= *(uint32_t*)&src[stride*2];
		b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
		*(uint32_t*)&src[stride*1]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);

		b= *(uint32_t*)&src[stride*3];
		c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
		*(uint32_t*)&src[stride*2]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);

		c= *(uint32_t*)&src[stride*4];
		a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
		*(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);

		a= *(uint32_t*)&src[stride*5];
		b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
		*(uint32_t*)&src[stride*4]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);

		b= *(uint32_t*)&src[stride*6];
		c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
		*(uint32_t*)&src[stride*5]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);

		c= *(uint32_t*)&src[stride*7];
		a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
		*(uint32_t*)&src[stride*6]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);

		a= *(uint32_t*)&src[stride*8];
		b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
		*(uint32_t*)&src[stride*7]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);

		*(uint32_t*)&tmp[stride*0]= c;
		src += 4;
		tmp += 4;
	}
#endif
}

/**
 * Deinterlaces the given block by applying a median filter to every second line.
 * will be called for every 8x8 block and can read & write from line 4-15,
 * lines 0-3 have been passed through the deblock / dering filters allready, but can be read too
 * lines 4-12 will be read into the deblocking filter and should be deinterlaced
 */
static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride)
{
#ifdef HAVE_MMX
	src+= 4*stride;
#ifdef HAVE_MMX2
	asm volatile(
		"lea (%0, %1), %%"REG_a"			\n\t"
		"lea (%%"REG_a", %1, 4), %%"REG_d"		\n\t"
//	0	1	2	3	4	5	6	7	8	9
//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1

		"movq (%0), %%mm0				\n\t" //
		"movq (%%"REG_a", %1), %%mm2			\n\t" //
		"movq (%%"REG_a"), %%mm1			\n\t" //
		"movq %%mm0, %%mm3				\n\t"
		"pmaxub %%mm1, %%mm0				\n\t" //
		"pminub %%mm3, %%mm1				\n\t" //
		"pmaxub %%mm2, %%mm1				\n\t" //
		"pminub %%mm1, %%mm0				\n\t"
		"movq %%mm0, (%%"REG_a")			\n\t"

		"movq (%0, %1, 4), %%mm0			\n\t" //
		"movq (%%"REG_a", %1, 2), %%mm1			\n\t" //
		"movq %%mm2, %%mm3				\n\t"
		"pmaxub %%mm1, %%mm2				\n\t" //
		"pminub %%mm3, %%mm1				\n\t" //
		"pmaxub %%mm0, %%mm1				\n\t" //
		"pminub %%mm1, %%mm2				\n\t"
		"movq %%mm2, (%%"REG_a", %1, 2)			\n\t"

		"movq (%%"REG_d"), %%mm2			\n\t" //
		"movq (%%"REG_d", %1), %%mm1			\n\t" //
		"movq %%mm2, %%mm3				\n\t"
		"pmaxub %%mm0, %%mm2				\n\t" //
		"pminub %%mm3, %%mm0				\n\t" //
		"pmaxub %%mm1, %%mm0				\n\t" //
		"pminub %%mm0, %%mm2				\n\t"
		"movq %%mm2, (%%"REG_d")			\n\t"

		"movq (%%"REG_d", %1, 2), %%mm2			\n\t" //
		"movq (%0, %1, 8), %%mm0			\n\t" //
		"movq %%mm2, %%mm3				\n\t"
		"pmaxub %%mm0, %%mm2				\n\t" //
		"pminub %%mm3, %%mm0				\n\t" //
		"pmaxub %%mm1, %%mm0				\n\t" //
		"pminub %%mm0, %%mm2				\n\t"
		"movq %%mm2, (%%"REG_d", %1, 2)			\n\t"


		: : "r" (src), "r" ((long)stride)
		: "%"REG_a, "%"REG_d
	);

#else // MMX without MMX2
	asm volatile(
		"lea (%0, %1), %%"REG_a"			\n\t"
		"lea (%%"REG_a", %1, 4), %%"REG_d"		\n\t"
//	0	1	2	3	4	5	6	7	8	9
//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1
		"pxor %%mm7
上一页 1 2 3 45
💿 文件大小 8959 K
👤 上传用户 stone825
📂 所属分类 Windows CE
🏷️ 相关标签

#WINDWOWS #LIBMAD #TCPMP #WINCE
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -