📄 interpolate8x8_altivec.c
字号:
/* if debug is set, print alignment errors */ if(((unsigned)dst) & 0x7) fprintf(stderr, "interpolate8x8_avg4_altivec_c:incorrect align, dst: %lx\n", (long)dst); if(stride & 0x7) fprintf(stderr, "interpolate8x8_avg4_altivec_c:incorrect stride, stride: %u\n", stride);#endif /* Initialization */ zerovec = vec_splat_u8(0); *((short*)&r) = 2 - rounding; r = vec_splat(r, 0); shift = vec_splat_u16(2); mask_stencil = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); /* interpolate */ INTERPOLATE8X8_AVG4(); INTERPOLATE8X8_AVG4(); INTERPOLATE8X8_AVG4(); INTERPOLATE8X8_AVG4(); INTERPOLATE8X8_AVG4(); INTERPOLATE8X8_AVG4(); INTERPOLATE8X8_AVG4(); INTERPOLATE8X8_AVG4();}/* * This function assumes: * dst is 8 byte aligned * src is unaligned * stirde is a multiple of 8 * rounding is ignored */voidinterpolate8x8_halfpel_add_altivec_c(uint8_t *dst, const uint8_t *src, const uint32_t stride, const uint32_t rouding){ interpolate8x8_avg2_altivec_c(dst, dst, src, stride, 0, 8);}#define INTERPOLATE8X8_HALFPEL_H_ADD_ROUND() \ mask_dst = vec_lvsl(0,dst); \ s1 = vec_perm(vec_ld(0,src),vec_ld(16,src),vec_lvsl(0,src)); \ d = vec_perm(vec_ld(0,dst),vec_ld(16,dst),mask_dst); \ \ s2 = vec_perm(s1,s1,rot1);\ tmp = vec_avg(s1,s2);\ s1 = vec_xor(s1,s2);\ s1 = vec_sub(tmp,vec_and(s1,one));\ \ d = vec_avg(s1,d);\ \ mask = vec_perm(mask_stencil, mask_stencil, mask_dst); \ d = vec_perm(d,d,mask_dst); \ d = vec_sel(d,vec_ld(0,dst),mask); \ vec_st(d,0,dst); \ \ dst += stride; \ src += stride#define INTERPOLATE8X8_HALFPEL_H_ADD_NOROUND() \ mask_dst = vec_lvsl(0,dst); \ s1 = vec_perm(vec_ld(0,src),vec_ld(16,src),vec_lvsl(0,src)); \ d = vec_perm(vec_ld(0,dst),vec_ld(16,dst),mask_dst); \ \ s1 = vec_avg(s1, vec_perm(s1,s1,rot1));\ d = vec_avg(s1,d);\ \ mask = vec_perm(mask_stencil,mask_stencil,mask_dst);\ d = vec_perm(d,d,mask_dst);\ d = vec_sel(d,vec_ld(0,dst),mask);\ vec_st(d,0,dst);\ \ dst += stride;\ src += stride/* * This function assumes: * dst is 8 byte aligned * src is unaligned * stride is a multiple of 8 */voidinterpolate8x8_halfpel_h_add_altivec_c(uint8_t *dst, uint8_t *src, const uint32_t stride, const uint32_t rounding){ register vector unsigned char s1,s2; register vector unsigned char d; register vector unsigned char tmp; register vector unsigned char mask_dst; register vector unsigned char one; register vector unsigned char rot1; register vector unsigned char mask_stencil; register vector unsigned char mask; #ifdef DEBUG if(((unsigned)dst) & 0x7) fprintf(stderr, "interpolate8x8_halfpel_h_add_altivec_c:incorrect align, dst: %lx\n", (long)dst); if(stride & 0x7) fprintf(stderr, "interpolate8x8_halfpel_h_add_altivec_c:incorrect stride, stride: %u\n", stride);#endif /* initialization */ mask_stencil = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); one = vec_splat_u8(1); rot1 = vec_lvsl(1,(unsigned char*)0); if(rounding) { INTERPOLATE8X8_HALFPEL_H_ADD_ROUND(); INTERPOLATE8X8_HALFPEL_H_ADD_ROUND(); INTERPOLATE8X8_HALFPEL_H_ADD_ROUND(); INTERPOLATE8X8_HALFPEL_H_ADD_ROUND(); INTERPOLATE8X8_HALFPEL_H_ADD_ROUND(); INTERPOLATE8X8_HALFPEL_H_ADD_ROUND(); INTERPOLATE8X8_HALFPEL_H_ADD_ROUND(); INTERPOLATE8X8_HALFPEL_H_ADD_ROUND(); } else { INTERPOLATE8X8_HALFPEL_H_ADD_NOROUND(); INTERPOLATE8X8_HALFPEL_H_ADD_NOROUND(); INTERPOLATE8X8_HALFPEL_H_ADD_NOROUND(); INTERPOLATE8X8_HALFPEL_H_ADD_NOROUND(); INTERPOLATE8X8_HALFPEL_H_ADD_NOROUND(); INTERPOLATE8X8_HALFPEL_H_ADD_NOROUND(); INTERPOLATE8X8_HALFPEL_H_ADD_NOROUND(); INTERPOLATE8X8_HALFPEL_H_ADD_NOROUND(); }}#define INTERPOLATE8X8_HALFPEL_V_ADD_ROUND()\ src += stride;\ mask_dst = vec_lvsl(0,dst);\ s2 = vec_perm(vec_ld(0,src),vec_ld(16,src),vec_lvsl(0,src));\ d = vec_perm(vec_ld(0,dst),vec_ld(16,dst),mask_dst);\ \ tmp = vec_avg(s1,s2);\ s1 = vec_xor(s1,s2);\ s1 = vec_sub(tmp,vec_and(s1,vec_splat_u8(1)));\ d = vec_avg(s1,d);\ \ mask = vec_perm(mask_stencil,mask_stencil,mask_dst);\ d = vec_perm(d,d,mask_dst);\ d = vec_sel(d,vec_ld(0,dst),mask);\ vec_st(d,0,dst);\ \ s1 = s2;\ \ dst += stride#define INTERPOLATE8X8_HALFPEL_V_ADD_NOROUND()\ src += stride;\ mask_dst = vec_lvsl(0,dst);\ s2 = vec_perm(vec_ld(0,src),vec_ld(16,src),vec_lvsl(0,src));\ d = vec_perm(vec_ld(0,dst),vec_ld(16,dst),mask_dst);\ \ s1 = vec_avg(s1,s2);\ d = vec_avg(s1,d);\ \ mask = vec_perm(mask_stencil,mask_stencil,mask_dst);\ d = vec_perm(d,d,mask_dst);\ d = vec_sel(d,vec_ld(0,dst),mask);\ vec_st(d,0,dst);\ \ s1 = s2;\ dst += stride/* * This function assumes: * dst: 8 byte aligned * src: unaligned * stride is a multiple of 8 */ voidinterpolate8x8_halfpel_v_add_altivec_c(uint8_t *dst, uint8_t *src, const uint32_t stride, const uint32_t rounding){ register vector unsigned char s1,s2; register vector unsigned char tmp; register vector unsigned char d; register vector unsigned char mask; register vector unsigned char mask_dst; register vector unsigned char mask_stencil; #ifdef DEBUG if(((unsigned)dst) & 0x7) fprintf(stderr, "interpolate8x8_halfpel_v_add_altivec_c:incorrect align, dst: %lx\n", (long)dst); if(stride & 0x7) fprintf(stderr, "interpolate8x8_halfpel_v_add_altivec_c:incorrect align, dst: %u\n", stride);#endif /* initialization */ mask_stencil = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); if(rounding) { /* Interpolate vertical with rounding */ s1 = vec_perm(vec_ld(0,src),vec_ld(16,src),vec_lvsl(0,src)); INTERPOLATE8X8_HALFPEL_V_ADD_ROUND(); INTERPOLATE8X8_HALFPEL_V_ADD_ROUND(); INTERPOLATE8X8_HALFPEL_V_ADD_ROUND(); INTERPOLATE8X8_HALFPEL_V_ADD_ROUND(); INTERPOLATE8X8_HALFPEL_V_ADD_ROUND(); INTERPOLATE8X8_HALFPEL_V_ADD_ROUND(); INTERPOLATE8X8_HALFPEL_V_ADD_ROUND(); INTERPOLATE8X8_HALFPEL_V_ADD_ROUND(); } else { /* Interpolate vertical without rounding */ s1 = vec_perm(vec_ld(0,src),vec_ld(16,src),vec_lvsl(0,src)); INTERPOLATE8X8_HALFPEL_V_ADD_NOROUND(); INTERPOLATE8X8_HALFPEL_V_ADD_NOROUND(); INTERPOLATE8X8_HALFPEL_V_ADD_NOROUND(); INTERPOLATE8X8_HALFPEL_V_ADD_NOROUND(); INTERPOLATE8X8_HALFPEL_V_ADD_NOROUND(); INTERPOLATE8X8_HALFPEL_V_ADD_NOROUND(); INTERPOLATE8X8_HALFPEL_V_ADD_NOROUND(); INTERPOLATE8X8_HALFPEL_V_ADD_NOROUND(); }}#define INTERPOLATE8X8_HALFPEL_HV_ADD_ROUND()\ src += stride;\ mask_dst = vec_lvsl(0,dst);\ c10 = vec_perm(vec_ld(0,src),vec_ld(16,src),vec_lvsl(0,src));\ d = vec_perm(vec_ld(0,dst),vec_ld(16,dst),mask_dst);\ c11 = vec_perm(c10,c10,rot1);\ \ s00 = (vector unsigned short)vec_mergeh(zero,c00);\ s01 = (vector unsigned short)vec_mergeh(zero,c01);\ s10 = (vector unsigned short)vec_mergeh(zero,c10);\ s11 = (vector unsigned short)vec_mergeh(zero,c11);\ \ s00 = vec_add(s00,s10);\ s01 = vec_add(s01,s11);\ s00 = vec_add(s00,s01);\ s00 = vec_add(s00,one);\ \ s00 = vec_sr(s00,two);\ s00 = vec_add(s00, (vector unsigned short)vec_mergeh(zero,d));\ s00 = vec_sr(s00,one);\ \ d = vec_pack(s00,s00);\ mask = vec_perm(mask_stencil,mask_stencil,mask_dst);\ d = vec_sel(d,vec_ld(0,dst),mask);\ vec_st(d,0,dst);\ \ c00 = c10;\ c01 = c11;\ dst += stride #define INTERPOLATE8X8_HALFPEL_HV_ADD_NOROUND()\ src += stride;\ mask_dst = vec_lvsl(0,dst);\ c10 = vec_perm(vec_ld(0,src),vec_ld(16,src),vec_lvsl(0,src));\ d = vec_perm(vec_ld(0,dst),vec_ld(16,dst),mask_dst);\ c11 = vec_perm(c10,c10,rot1);\ \ s00 = (vector unsigned short)vec_mergeh(zero,c00);\ s01 = (vector unsigned short)vec_mergeh(zero,c01);\ s10 = (vector unsigned short)vec_mergeh(zero,c10);\ s11 = (vector unsigned short)vec_mergeh(zero,c11);\ \ s00 = vec_add(s00,s10);\ s01 = vec_add(s01,s11);\ s00 = vec_add(s00,s01);\ s00 = vec_add(s00,two);\ s00 = vec_sr(s00,two);\ \ c00 = vec_pack(s00,s00);\ d = vec_avg(d,c00);\ \ mask = vec_perm(mask_stencil,mask_stencil,mask_dst);\ d = vec_perm(d,d,mask_dst);\ d = vec_sel(d,vec_ld(0,dst),mask);\ vec_st(d,0,dst);\ \ c00 = c10;\ c01 = c11;\ dst += stride/* * This function assumes: * dst: 8 byte aligned * src: unaligned * stride: multiple of 8 */voidinterpolate8x8_halfpel_hv_add_altivec_c(uint8_t *dst, uint8_t *src, const uint32_t stride, const uint32_t rounding){ register vector unsigned char c00,c10,c01,c11; register vector unsigned short s00,s10,s01,s11; register vector unsigned char d; register vector unsigned char mask; register vector unsigned char mask_stencil; register vector unsigned char rot1; register vector unsigned char mask_dst; register vector unsigned char zero; register vector unsigned short one,two; #ifdef DEBUG if(((unsigned)dst) & 0x7) fprintf(stderr, "interpolate8x8_halfpel_hv_add_altivec_c:incorrect align, dst: %lx\n", (long)dst); if(stride & 0x7) fprintf(stderr, "interpolate8x8_halfpel_hv_add_altivec_c:incorrect stride, stride: %u\n", stride);#endif /* initialization */ mask_stencil = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); rot1 = vec_lvsl(1,(unsigned char*)0); zero = vec_splat_u8(0); one = vec_splat_u16(1); two = vec_splat_u16(2); if(rounding) { /* Load the first row 'manually' */ c00 = vec_perm(vec_ld(0,src),vec_ld(16,src),vec_lvsl(0,src)); c01 = vec_perm(c00,c00,rot1); INTERPOLATE8X8_HALFPEL_HV_ADD_ROUND(); INTERPOLATE8X8_HALFPEL_HV_ADD_ROUND(); INTERPOLATE8X8_HALFPEL_HV_ADD_ROUND(); INTERPOLATE8X8_HALFPEL_HV_ADD_ROUND(); INTERPOLATE8X8_HALFPEL_HV_ADD_ROUND(); INTERPOLATE8X8_HALFPEL_HV_ADD_ROUND(); INTERPOLATE8X8_HALFPEL_HV_ADD_ROUND(); INTERPOLATE8X8_HALFPEL_HV_ADD_ROUND(); } else { /* Load the first row 'manually' */ c00 = vec_perm(vec_ld(0,src),vec_ld(16,src),vec_lvsl(0,src)); c01 = vec_perm(c00,c00,rot1); INTERPOLATE8X8_HALFPEL_HV_ADD_NOROUND(); INTERPOLATE8X8_HALFPEL_HV_ADD_NOROUND(); INTERPOLATE8X8_HALFPEL_HV_ADD_NOROUND(); INTERPOLATE8X8_HALFPEL_HV_ADD_NOROUND(); INTERPOLATE8X8_HALFPEL_HV_ADD_NOROUND(); INTERPOLATE8X8_HALFPEL_HV_ADD_NOROUND(); INTERPOLATE8X8_HALFPEL_HV_ADD_NOROUND(); INTERPOLATE8X8_HALFPEL_HV_ADD_NOROUND(); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -