⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 interpolate8x8_altivec.c

📁 这是一个压缩解压包,用C语言进行编程的,里面有详细的源代码.
💻 C
📖 第 1 页 / 共 2 页
字号:
    /* if debug is set, print alignment errors */    if(((unsigned)dst) & 0x7)        fprintf(stderr, "interpolate8x8_avg4_altivec_c:incorrect align, dst: %lx\n", (long)dst);    if(stride & 0x7)        fprintf(stderr, "interpolate8x8_avg4_altivec_c:incorrect stride, stride: %u\n", stride);#endif    /* Initialization */    zerovec = vec_splat_u8(0);    *((short*)&r) = 2 - rounding;    r = vec_splat(r, 0);    shift = vec_splat_u16(2);    mask_stencil = vec_pack(vec_splat_u16(0), vec_splat_u16(-1));        /* interpolate */    INTERPOLATE8X8_AVG4();    INTERPOLATE8X8_AVG4();    INTERPOLATE8X8_AVG4();    INTERPOLATE8X8_AVG4();        INTERPOLATE8X8_AVG4();    INTERPOLATE8X8_AVG4();    INTERPOLATE8X8_AVG4();    INTERPOLATE8X8_AVG4();}/* * This function assumes: *  dst is 8 byte aligned *  src is unaligned *  stirde is a multiple of 8 *  rounding is ignored */voidinterpolate8x8_halfpel_add_altivec_c(uint8_t *dst, const uint8_t *src, const uint32_t stride, const uint32_t rouding){	interpolate8x8_avg2_altivec_c(dst, dst, src, stride, 0, 8);}#define INTERPOLATE8X8_HALFPEL_H_ADD_ROUND() \	mask_dst = vec_lvsl(0,dst);										\	s1 = vec_perm(vec_ld(0,src),vec_ld(16,src),vec_lvsl(0,src));	\	d = vec_perm(vec_ld(0,dst),vec_ld(16,dst),mask_dst);		\	\	s2 = vec_perm(s1,s1,rot1);\	tmp = vec_avg(s1,s2);\	s1 = vec_xor(s1,s2);\	s1 = vec_sub(tmp,vec_and(s1,one));\	\	d = vec_avg(s1,d);\	\	mask = vec_perm(mask_stencil, mask_stencil, mask_dst); \	d = vec_perm(d,d,mask_dst);	\	d = vec_sel(d,vec_ld(0,dst),mask);	\	vec_st(d,0,dst);					\	\	dst += stride; \	src += stride#define INTERPOLATE8X8_HALFPEL_H_ADD_NOROUND() \	mask_dst = vec_lvsl(0,dst);										\	s1 = vec_perm(vec_ld(0,src),vec_ld(16,src),vec_lvsl(0,src));	\	d = vec_perm(vec_ld(0,dst),vec_ld(16,dst),mask_dst);		\	\	s1 = vec_avg(s1, vec_perm(s1,s1,rot1));\	d = vec_avg(s1,d);\	\	mask = vec_perm(mask_stencil,mask_stencil,mask_dst);\	d = vec_perm(d,d,mask_dst);\	d = vec_sel(d,vec_ld(0,dst),mask);\	vec_st(d,0,dst);\	\	dst += stride;\	src += stride/* * This function assumes: *	dst is 8 byte aligned *	src is unaligned *	stride is a multiple of 8 */voidinterpolate8x8_halfpel_h_add_altivec_c(uint8_t *dst, uint8_t *src, const uint32_t stride, const uint32_t rounding){	register vector unsigned char s1,s2;	register vector unsigned char d;	register vector unsigned char tmp;		register vector unsigned char mask_dst;	register vector unsigned char one;	register vector unsigned char rot1;		register vector unsigned char mask_stencil;	register vector unsigned char mask;	#ifdef DEBUG	if(((unsigned)dst) & 0x7)		fprintf(stderr, "interpolate8x8_halfpel_h_add_altivec_c:incorrect align, dst: %lx\n", (long)dst);	if(stride & 0x7)		fprintf(stderr, "interpolate8x8_halfpel_h_add_altivec_c:incorrect stride, stride: %u\n", stride);#endif		/* initialization */	mask_stencil = vec_pack(vec_splat_u16(0), vec_splat_u16(-1));	one = vec_splat_u8(1);	rot1 = vec_lvsl(1,(unsigned char*)0);		if(rounding) {		INTERPOLATE8X8_HALFPEL_H_ADD_ROUND();		INTERPOLATE8X8_HALFPEL_H_ADD_ROUND();		INTERPOLATE8X8_HALFPEL_H_ADD_ROUND();		INTERPOLATE8X8_HALFPEL_H_ADD_ROUND();				INTERPOLATE8X8_HALFPEL_H_ADD_ROUND();		INTERPOLATE8X8_HALFPEL_H_ADD_ROUND();		INTERPOLATE8X8_HALFPEL_H_ADD_ROUND();		INTERPOLATE8X8_HALFPEL_H_ADD_ROUND();	}	else {				INTERPOLATE8X8_HALFPEL_H_ADD_NOROUND();		INTERPOLATE8X8_HALFPEL_H_ADD_NOROUND();		INTERPOLATE8X8_HALFPEL_H_ADD_NOROUND();		INTERPOLATE8X8_HALFPEL_H_ADD_NOROUND();				INTERPOLATE8X8_HALFPEL_H_ADD_NOROUND();		INTERPOLATE8X8_HALFPEL_H_ADD_NOROUND();		INTERPOLATE8X8_HALFPEL_H_ADD_NOROUND();		INTERPOLATE8X8_HALFPEL_H_ADD_NOROUND();	}}#define INTERPOLATE8X8_HALFPEL_V_ADD_ROUND()\	src += stride;\	mask_dst = vec_lvsl(0,dst);\	s2 = vec_perm(vec_ld(0,src),vec_ld(16,src),vec_lvsl(0,src));\	d = vec_perm(vec_ld(0,dst),vec_ld(16,dst),mask_dst);\	\	tmp = vec_avg(s1,s2);\	s1 = vec_xor(s1,s2);\	s1 = vec_sub(tmp,vec_and(s1,vec_splat_u8(1)));\	d = vec_avg(s1,d);\	\	mask = vec_perm(mask_stencil,mask_stencil,mask_dst);\	d = vec_perm(d,d,mask_dst);\	d = vec_sel(d,vec_ld(0,dst),mask);\	vec_st(d,0,dst);\	\	s1 = s2;\	\	dst += stride#define INTERPOLATE8X8_HALFPEL_V_ADD_NOROUND()\	src += stride;\	mask_dst = vec_lvsl(0,dst);\	s2 = vec_perm(vec_ld(0,src),vec_ld(16,src),vec_lvsl(0,src));\	d = vec_perm(vec_ld(0,dst),vec_ld(16,dst),mask_dst);\	\	s1 = vec_avg(s1,s2);\	d = vec_avg(s1,d);\	\	mask = vec_perm(mask_stencil,mask_stencil,mask_dst);\	d = vec_perm(d,d,mask_dst);\	d = vec_sel(d,vec_ld(0,dst),mask);\	vec_st(d,0,dst);\	\	s1 = s2;\	dst += stride/* * This function assumes: *	dst: 8 byte aligned *	src: unaligned *	stride is a multiple of 8 */ voidinterpolate8x8_halfpel_v_add_altivec_c(uint8_t *dst, uint8_t *src, const uint32_t stride, const uint32_t rounding){	register vector unsigned char s1,s2;	register vector unsigned char tmp;	register vector unsigned char d;		register vector unsigned char mask;	register vector unsigned char mask_dst;	register vector unsigned char mask_stencil;	#ifdef DEBUG	if(((unsigned)dst) & 0x7)		fprintf(stderr, "interpolate8x8_halfpel_v_add_altivec_c:incorrect align, dst: %lx\n", (long)dst);	if(stride & 0x7)		fprintf(stderr, "interpolate8x8_halfpel_v_add_altivec_c:incorrect align, dst: %u\n", stride);#endif	/* initialization */	mask_stencil = vec_pack(vec_splat_u16(0), vec_splat_u16(-1));		if(rounding) {				/* Interpolate vertical with rounding */		s1 = vec_perm(vec_ld(0,src),vec_ld(16,src),vec_lvsl(0,src));				INTERPOLATE8X8_HALFPEL_V_ADD_ROUND();		INTERPOLATE8X8_HALFPEL_V_ADD_ROUND();		INTERPOLATE8X8_HALFPEL_V_ADD_ROUND();		INTERPOLATE8X8_HALFPEL_V_ADD_ROUND();					INTERPOLATE8X8_HALFPEL_V_ADD_ROUND();		INTERPOLATE8X8_HALFPEL_V_ADD_ROUND();		INTERPOLATE8X8_HALFPEL_V_ADD_ROUND();		INTERPOLATE8X8_HALFPEL_V_ADD_ROUND();	}	else {		/* Interpolate vertical without rounding */		s1 = vec_perm(vec_ld(0,src),vec_ld(16,src),vec_lvsl(0,src));				INTERPOLATE8X8_HALFPEL_V_ADD_NOROUND();		INTERPOLATE8X8_HALFPEL_V_ADD_NOROUND();		INTERPOLATE8X8_HALFPEL_V_ADD_NOROUND();		INTERPOLATE8X8_HALFPEL_V_ADD_NOROUND();				INTERPOLATE8X8_HALFPEL_V_ADD_NOROUND();		INTERPOLATE8X8_HALFPEL_V_ADD_NOROUND();		INTERPOLATE8X8_HALFPEL_V_ADD_NOROUND();		INTERPOLATE8X8_HALFPEL_V_ADD_NOROUND();	}}#define INTERPOLATE8X8_HALFPEL_HV_ADD_ROUND()\	src += stride;\	mask_dst = vec_lvsl(0,dst);\	c10 = vec_perm(vec_ld(0,src),vec_ld(16,src),vec_lvsl(0,src));\	d = vec_perm(vec_ld(0,dst),vec_ld(16,dst),mask_dst);\	c11 = vec_perm(c10,c10,rot1);\	\	s00 = (vector unsigned short)vec_mergeh(zero,c00);\	s01 = (vector unsigned short)vec_mergeh(zero,c01);\	s10 = (vector unsigned short)vec_mergeh(zero,c10);\	s11 = (vector unsigned short)vec_mergeh(zero,c11);\	\	s00 = vec_add(s00,s10);\	s01 = vec_add(s01,s11);\	s00 = vec_add(s00,s01);\	s00 = vec_add(s00,one);\	\	s00 = vec_sr(s00,two);\	s00 = vec_add(s00, (vector unsigned short)vec_mergeh(zero,d));\	s00 = vec_sr(s00,one);\	\	d = vec_pack(s00,s00);\	mask = vec_perm(mask_stencil,mask_stencil,mask_dst);\	d = vec_sel(d,vec_ld(0,dst),mask);\	vec_st(d,0,dst);\	\	c00 = c10;\	c01 = c11;\	dst += stride	#define INTERPOLATE8X8_HALFPEL_HV_ADD_NOROUND()\	src += stride;\	mask_dst = vec_lvsl(0,dst);\	c10 = vec_perm(vec_ld(0,src),vec_ld(16,src),vec_lvsl(0,src));\	d = vec_perm(vec_ld(0,dst),vec_ld(16,dst),mask_dst);\	c11 = vec_perm(c10,c10,rot1);\	\	s00 = (vector unsigned short)vec_mergeh(zero,c00);\	s01 = (vector unsigned short)vec_mergeh(zero,c01);\	s10 = (vector unsigned short)vec_mergeh(zero,c10);\	s11 = (vector unsigned short)vec_mergeh(zero,c11);\	\	s00 = vec_add(s00,s10);\	s01 = vec_add(s01,s11);\	s00 = vec_add(s00,s01);\	s00 = vec_add(s00,two);\	s00 = vec_sr(s00,two);\	\	c00 = vec_pack(s00,s00);\	d = vec_avg(d,c00);\	\	mask = vec_perm(mask_stencil,mask_stencil,mask_dst);\	d = vec_perm(d,d,mask_dst);\	d = vec_sel(d,vec_ld(0,dst),mask);\	vec_st(d,0,dst);\	\	c00 = c10;\	c01 = c11;\	dst += stride/* * This function assumes: *	dst: 8 byte aligned *	src: unaligned *	stride: multiple of 8 */voidinterpolate8x8_halfpel_hv_add_altivec_c(uint8_t *dst, uint8_t *src, const uint32_t stride, const uint32_t rounding){	register vector unsigned char  c00,c10,c01,c11;	register vector unsigned short s00,s10,s01,s11;	register vector unsigned char  d;		register vector unsigned char mask;	register vector unsigned char mask_stencil;		register vector unsigned char rot1;	register vector unsigned char mask_dst;	register vector unsigned char zero;	register vector unsigned short one,two;	#ifdef DEBUG	if(((unsigned)dst) & 0x7)		fprintf(stderr, "interpolate8x8_halfpel_hv_add_altivec_c:incorrect align, dst: %lx\n", (long)dst);	if(stride & 0x7)		fprintf(stderr, "interpolate8x8_halfpel_hv_add_altivec_c:incorrect stride, stride: %u\n", stride);#endif		/* initialization */	mask_stencil = vec_pack(vec_splat_u16(0), vec_splat_u16(-1));	rot1 = vec_lvsl(1,(unsigned char*)0);	zero = vec_splat_u8(0);	one = vec_splat_u16(1);	two = vec_splat_u16(2);		if(rounding) {				/* Load the first row 'manually' */		c00 = vec_perm(vec_ld(0,src),vec_ld(16,src),vec_lvsl(0,src));		c01 = vec_perm(c00,c00,rot1);				INTERPOLATE8X8_HALFPEL_HV_ADD_ROUND();		INTERPOLATE8X8_HALFPEL_HV_ADD_ROUND();		INTERPOLATE8X8_HALFPEL_HV_ADD_ROUND();		INTERPOLATE8X8_HALFPEL_HV_ADD_ROUND();				INTERPOLATE8X8_HALFPEL_HV_ADD_ROUND();		INTERPOLATE8X8_HALFPEL_HV_ADD_ROUND();		INTERPOLATE8X8_HALFPEL_HV_ADD_ROUND();		INTERPOLATE8X8_HALFPEL_HV_ADD_ROUND();	}	else {				/* Load the first row 'manually' */		c00 = vec_perm(vec_ld(0,src),vec_ld(16,src),vec_lvsl(0,src));		c01 = vec_perm(c00,c00,rot1);				INTERPOLATE8X8_HALFPEL_HV_ADD_NOROUND();		INTERPOLATE8X8_HALFPEL_HV_ADD_NOROUND();		INTERPOLATE8X8_HALFPEL_HV_ADD_NOROUND();		INTERPOLATE8X8_HALFPEL_HV_ADD_NOROUND();				INTERPOLATE8X8_HALFPEL_HV_ADD_NOROUND();		INTERPOLATE8X8_HALFPEL_HV_ADD_NOROUND();		INTERPOLATE8X8_HALFPEL_HV_ADD_NOROUND();		INTERPOLATE8X8_HALFPEL_HV_ADD_NOROUND();	}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -