📄 colorspace_altivec.c
字号:
};/* RGB Input */#define READ_RGB_Y_ALTIVEC(SIZE,ROW,UVID,C1,C2,C3,C4) \ p0 = vec_ld(0, (unsigned int*)(x_ptr + (ROW) * x_stride)); \ p1 = vec_ld(16, (unsigned int*)(x_ptr + (ROW) * x_stride)); \ \ mask = vec_mergeh((vector unsigned char)shift_consts[3], vec_splat_u8(-1)); \ mask = (vector unsigned char)vec_mergeh((vector unsigned short)shift_consts[3], (vector unsigned short)mask); \ \ t0 = vec_sr(p0, shift_consts[C1]); \ t0 = vec_sel(shift_consts[3], t0, (vector unsigned int)mask); \ t1 = vec_sr(p1, shift_consts[C1]); \ t1 = vec_sel(shift_consts[3], t1, (vector unsigned int)mask); \ r = vec_pack(t0, t1); \ r##UVID = vec_add(r##UVID, r); \ \ t0 = vec_sr(p0, shift_consts[C2]); \ t0 = vec_sel(shift_consts[3], t0, (vector unsigned int)mask); \ t1 = vec_sr(p1, shift_consts[C2]); \ t1 = vec_sel(shift_consts[3], t1, (vector unsigned int)mask); \ g = vec_pack(t0, t1); \ g##UVID = vec_add(g##UVID, g); \ \ t0 = vec_sr(p0, shift_consts[C3]); \ t0 = vec_sel(shift_consts[3], t0, (vector unsigned int)mask); \ t1 = vec_sr(p1, shift_consts[C3]); \ t1 = vec_sel(shift_consts[3], t1, (vector unsigned int)mask); \ b = vec_pack(t0, t1); \ b##UVID = vec_add(b##UVID, b); \ \ lum = vec_mladd(r, vec_splat(vec_fix_ins[0], 1), (vector unsigned short)shift_consts[3]); \ lum = vec_mladd(g, vec_splat(vec_fix_ins[0], 2), lum); \ lum = vec_mladd(b, vec_splat(vec_fix_ins[0], 3), lum); \ lum = vec_sr(lum, vec_splat(vec_fix_ins[0], 0)); \ y_vec = vec_pack(lum, (vector unsigned short)shift_consts[3]); \ y_vec = vec_add(y_vec, y_add); \ \ mask = vec_pack((vector unsigned short)shift_consts[3], vec_splat_u16(-1)); \ mask = vec_perm(mask, mask, vec_lvsl(0, y_ptr + (ROW)*y_stride)); \ y_vec = vec_perm(y_vec, y_vec, vec_lvsl(0, y_ptr + (ROW)*y_stride)); \ y_vec = vec_sel(y_vec, vec_ld(0, y_ptr + (ROW)*y_stride), mask); \ vec_st(y_vec, 0, y_ptr + (ROW)*y_stride)#define READ_RGB_UV_ALTIVEC(UV_ROW,UVID) \ r##UVID = (vector unsigned short)vec_sum4s((vector signed short)r##UVID, (vector signed int)shift_consts[3]); \ g##UVID = (vector unsigned short)vec_sum4s((vector signed short)g##UVID, (vector signed int)shift_consts[3]); \ b##UVID = (vector unsigned short)vec_sum4s((vector signed short)b##UVID, (vector signed int)shift_consts[3]); \ \ t3 = vec_mulo((vector signed short)r##UVID, (vector signed short)vec_splat(vec_fix_ins[1], 1)); \ t3 = vec_add(t3, vec_mulo((vector signed short)g##UVID, (vector signed short)vec_splat(vec_fix_ins[1], 2))); \ t3 = vec_add(t3, vec_mulo((vector signed short)b##UVID, (vector signed short)vec_splat(vec_fix_ins[1], 3))); \ t3 = vec_sr(t3, (vector unsigned int)vec_mergeh((vector unsigned short)shift_consts[3], vec_splat(vec_fix_ins[1], 0))); \ \ u_vec = vec_pack(vec_pack((vector unsigned int)t3, shift_consts[3]), (vector unsigned short)shift_consts[3]); \ u_vec = vec_add(u_vec, u_add); \ \ mask = vec_pack(vec_splat_u16(-1), (vector unsigned short)shift_consts[3]); \ mask = (vector unsigned char)vec_pack((vector unsigned int)mask, shift_consts[3]); \ mask = vec_perm(mask, mask, vec_lvsr(0, u_ptr + (UV_ROW)*uv_stride)); \ u_vec = vec_perm(u_vec, u_vec, vec_lvsr(0, u_ptr + (UV_ROW)*uv_stride)); \ u_vec = vec_sel(vec_ld(0, u_ptr + (UV_ROW)*uv_stride), u_vec, mask); \ vec_st(u_vec, 0, u_ptr + (UV_ROW)*uv_stride); \ \ t3 = vec_mulo((vector signed short)r##UVID, (vector signed short)vec_splat(vec_fix_ins[2], 1)); \ t3 = vec_add(t3, vec_mulo((vector signed short)g##UVID, (vector signed short)vec_splat(vec_fix_ins[2], 2))); \ t3 = vec_add(t3, vec_mulo((vector signed short)b##UVID, (vector signed short)vec_splat(vec_fix_ins[2], 3))); \ t3 = vec_sr(t3, (vector unsigned int)vec_mergeh((vector unsigned short)shift_consts[3], vec_splat(vec_fix_ins[2], 0))); \ \ v_vec = vec_pack(vec_pack((vector unsigned int)t3, shift_consts[3]), (vector unsigned short)shift_consts[3]); \ v_vec = vec_add(v_vec, v_add); \ \ mask = vec_pack(vec_splat_u16(-1), (vector unsigned short)shift_consts[3]); \ mask = (vector unsigned char)vec_pack((vector unsigned int)mask, shift_consts[3]); \ mask = vec_perm(mask, mask, vec_lvsr(0, v_ptr + (UV_ROW) * uv_stride)); \ v_vec = vec_perm(v_vec, v_vec, vec_lvsr(0, v_ptr + (UV_ROW) * uv_stride)); \ v_vec = vec_sel(vec_ld(0, v_ptr + (UV_ROW) * uv_stride), v_vec, mask); \ vec_st(v_vec, 0, v_ptr + (UV_ROW) * uv_stride)#define RGB_TO_YV12_ALTIVEC_ROW(SIZE,C1,C2,C3,C4) \ /* nothing */#define RGB_TO_YV12_ALTIVEC(SIZE,C1,C2,C3,C4) \ vector unsigned int p0, p1; \ vector unsigned int t0, t1; \ vector unsigned short r, g, b, r0, g0, b0; \ vector unsigned short lum; \ vector unsigned char mask; \ vector unsigned char y_vec; \ vector unsigned char u_vec; \ vector unsigned char v_vec; \ vector signed int t3; \ \ vec_dstt(x_ptr, prefetch_constant, 0); \ vec_dstt(x_ptr + (x_stride << 1), prefetch_constant, 1); \ \ r0 = g0 = b0 = (vector unsigned short)shift_consts[3]; \ \ READ_RGB_Y_ALTIVEC(SIZE, 0, 0, C1, C2, C3, C4); \ READ_RGB_Y_ALTIVEC(SIZE, 1, 0, C1, C2, C3, C4); \ READ_RGB_UV_ALTIVEC(0, 0)/* YUV input */#define READ_YUYV_Y_ALTIVEC(ROW,C1,C2,C3,C4) \ p0 = vec_ld(0, (unsigned int*)(x_ptr + (ROW)*x_stride)); \ p1 = vec_ld(16, (unsigned int*)(x_ptr + (ROW)*x_stride)); \ \ t = vec_lvsl(0, (unsigned char*)0); \ t = vec_sl(t, vec_splat_u8(2)); \ t = vec_add(t, vec_splat_u8(C1)); \ \ lum0 = (vector unsigned char)vec_perm(p0, p0, t); \ lum1 = (vector unsigned char)vec_perm(p1, p1, t); \ \ t = vec_lvsl(0, (unsigned char*)0); \ t = vec_sl(t, vec_splat_u8(2)); \ t = vec_add(t, vec_splat_u8(C3)); \ \ lum0 = vec_mergeh(lum0, (vector unsigned char)vec_perm(p0, p0, t)); \ lum1 = vec_mergeh(lum1, (vector unsigned char)vec_perm(p1, p1, t)); \ \ lum0 = vec_sel(lum0, lum1, vec_pack(vec_splat_u16(0), vec_splat_u16(-1))); \ vec_st(lum0, 0, y_ptr + (ROW)*y_stride); \ \ t = vec_lvsl(0, (unsigned char*)0); \ t = vec_sl(t, vec_splat_u8(2)); \ t = vec_add(t, vec_splat_u8(C2)); \ \ lum0 = (vector unsigned char)vec_perm(p0, p0, t); \ lum1 = (vector unsigned char)vec_perm(p1, p1, t); \ lum1 = vec_perm(lum1, lum1, vec_lvsr(4, (unsigned char*)0)); \ t = vec_pack(vec_pack(vec_splat_u32(0), vec_splat_u32(-1)), vec_splat_u16(-1)); \ u##ROW = vec_sel(lum0, lum1, t); \ \ t = vec_lvsl(0, (unsigned char*)0); \ t = vec_sl(t, vec_splat_u8(2)); \ t = vec_add(t, vec_splat_u8(C4)); \ \ lum0 = (vector unsigned char)vec_perm(p0, p0, t); \ lum1 = (vector unsigned char)vec_perm(p1, p1, t); \ lum1 = vec_perm(lum1, lum1, vec_lvsr(4, (unsigned char*)0)); \ t = vec_pack(vec_pack(vec_splat_u32(0), vec_splat_u32(-1)), vec_splat_u16(-1)); \ v##ROW = vec_sel(lum0, lum1, t);#define READ_YUYV_UV_ALTIVEC(UV_ROW,ROW1,ROW2,C1,C2,C3,C4) \ u##ROW1 = vec_avg(u##ROW1, u##ROW2); \ t = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); \ t = vec_perm(t, t, vec_lvsl(0, u_ptr + (UV_ROW)*uv_stride)); \ u##ROW1 = vec_perm(u##ROW1, u##ROW1, vec_lvsl(0, u_ptr + (UV_ROW)*uv_stride)); \ u##ROW1 = vec_sel(u##ROW1, vec_ld(0, u_ptr + (UV_ROW)*uv_stride), t); \ vec_st(u##ROW1, 0, u_ptr + (UV_ROW)*uv_stride); \ \ v##ROW1 = vec_avg(v##ROW1, v##ROW2); \ t = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); \ t = vec_perm(t, t, vec_lvsl(0, v_ptr + (UV_ROW)*uv_stride)); \ v##ROW1 = vec_perm(v##ROW1, v##ROW1, vec_lvsl(0, v_ptr + (UV_ROW)*uv_stride)); \ v##ROW1 = vec_sel(v##ROW1, vec_ld(0, v_ptr + (UV_ROW)*uv_stride), t); \ vec_st(v##ROW1, 0, v_ptr + (UV_ROW)*uv_stride);#define YUYV_TO_YV12_ALTIVEC_ROW(SIZE,C1,C2,C3,C4) \ /*nothing*/#define YUYV_TO_YV12_ALTIVEC(SIZE,C1,C2,C3,C4) \ vec_dstt(x_ptr, prefetch_constant, 0); \ vec_dstt(x_ptr + (x_stride << 1), prefetch_constant, 1); \ \ READ_YUYV_Y_ALTIVEC (0, C1,C2,C3,C4) \ READ_YUYV_Y_ALTIVEC (1, C1,C2,C3,C4) \ READ_YUYV_UV_ALTIVEC(0, 0, 1, C1,C2,C3,C4)MAKE_COLORSPACE_ALTIVEC_FROM_RGB(bgra_to_yv12_altivec_c, 4, 8, 2, RGB_TO_YV12_ALTIVEC, 2, 1, 0, 0)MAKE_COLORSPACE_ALTIVEC_FROM_RGB(abgr_to_yv12_altivec_c, 4, 8, 2, RGB_TO_YV12_ALTIVEC, 3, 2, 1, 0)MAKE_COLORSPACE_ALTIVEC_FROM_RGB(rgba_to_yv12_altivec_c, 4, 8, 2, RGB_TO_YV12_ALTIVEC, 0, 1, 2, 0)MAKE_COLORSPACE_ALTIVEC_FROM_RGB(argb_to_yv12_altivec_c, 4, 8, 2, RGB_TO_YV12_ALTIVEC, 1, 2, 3, 0)MAKE_COLORSPACE_ALTIVEC_FROM_YUV(yuyv_to_yv12_altivec_c, 2, 16, 2, YUYV_TO_YV12_ALTIVEC, 0, 1, 2, 3)MAKE_COLORSPACE_ALTIVEC_FROM_YUV(uyvy_to_yv12_altivec_c, 2, 16, 2, YUYV_TO_YV12_ALTIVEC, 1, 0, 3, 2) #define WRITE_YUYV_ALTIVEC(ROW, UV_ROW, C1,C2,C3,C4) \ p0 = vec_splat_u8(0); \ p1 = vec_splat_u8(0); \ \ y_vec = vec_perm(vec_ld(0, y_ptr + (ROW)*y_stride), vec_ld(16, y_ptr + (ROW)*y_stride), vec_lvsl(0, y_ptr + (ROW)*y_stride)); \ /* C1 */ \ t = vec_perm(y_vec, y_vec, vec_sl(vec_lvsl(0, (unsigned char*)0), vec_splat_u8(1))); \ mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C1, (unsigned char*)0)); \ \ p0 = vec_sel(p0, vec_perm(t, t, m4), mask); \ ptmp = vec_perm(t,t, vec_add(m4, vec4));\ p1 = vec_sel(p1, ptmp, mask); \ \ /* C3 */ \ ptmp = vec_add(vec_sl(vec_lvsl(0, (unsigned char*)0), vec_splat_u8(1)), vec_splat_u8(1)); \ t = vec_perm(y_vec, y_vec, ptmp); \ mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C3, (unsigned char*)0)); \ \ p0 = vec_sel(p0, vec_perm(t, t, m4), mask); \ ptmp = vec_perm(t, t, vec_add(m4, vec4)); \ p1 = vec_sel(p1, ptmp, mask); \ \ /* C2 */ \ u_vec = vec_perm(vec_ld(0,u_ptr), vec_ld(16, u_ptr), vec_lvsl(0, u_ptr)); \ mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C2, (unsigned char*)0)); \ \ p0 = vec_sel(p0, vec_perm(u_vec, u_vec, m4), mask); \ ptmp = vec_perm(u_vec, u_vec, vec_add(m4, vec4)); \ p1 = vec_sel(p1, ptmp, mask); \ \ /* C4 */ \ v_vec = vec_perm(vec_ld(0, v_ptr), vec_ld(16, v_ptr), vec_lvsl(0, v_ptr)); \ mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C4, (unsigned char*)0)); \ \ p0 = vec_sel(p0, vec_perm(v_vec, v_vec, m4), mask); \ ptmp = vec_perm(v_vec, v_vec, vec_add(m4, vec4)); \ p1 = vec_sel(p1, ptmp, mask); \ \ vec_st(p0, 0, x_ptr + (ROW)*x_stride); \ vec_st(p1, 16, x_ptr + (ROW)*x_stride)#define YV12_TO_YUYV_ALTIVEC_ROW(SIZE,C1,C2,C3,C4) \ /*nothing*/#define YV12_TO_YUYV_ALTIVEC(SIZE,C1,C2,C3,C4) \ vec_dstt(y_ptr, prefetch_constant_y, 0); \ vec_dstt(u_ptr, prefetch_constant_uv, 1); \ vec_dstt(v_ptr, prefetch_constant_uv, 2); \ \ WRITE_YUYV_ALTIVEC(0, 0, C1,C2,C3,C4); \ WRITE_YUYV_ALTIVEC(1, 0, C1,C2,C3,C4)MAKE_COLORSPACE_ALTIVEC_TO_YUV(yv12_to_yuyv_altivec_unaligned_c, 2, 16, 2, YV12_TO_YUYV_ALTIVEC, 0, 1, 2, 3)MAKE_COLORSPACE_ALTIVEC_TO_YUV(yv12_to_uyvy_altivec_unaligned_c, 2, 16, 2, YV12_TO_YUYV_ALTIVEC, 1, 0, 3, 2)/* This intermediate functions are used because gcc v3.3 seems to produces an invalid register usage with the fallback directly integrated in the altivec routine (!!!) */#define CHECK_COLORSPACE_ALTIVEC_TO_YUV(NAME,FAST,FALLBACK) \void \NAME(uint8_t *x_ptr, int x_stride, \ uint8_t *y_ptr, uint8_t *u_ptr, uint8_t *v_ptr, \ int y_stride, int uv_stride, \ int width, int height, int vflip) \{\ if( ((uint32_t)x_ptr & 15) | (x_stride & 15) )\ FALLBACK(x_ptr, x_stride, y_ptr, u_ptr, v_ptr, y_stride, uv_stride, width, height, vflip);\ else\ FAST(x_ptr, x_stride, y_ptr, u_ptr, v_ptr, y_stride, uv_stride, width, height, vflip);\}CHECK_COLORSPACE_ALTIVEC_TO_YUV(yv12_to_yuyv_altivec_c, yv12_to_yuyv_altivec_unaligned_c, yv12_to_yuyv_c)CHECK_COLORSPACE_ALTIVEC_TO_YUV(yv12_to_uyvy_altivec_c, yv12_to_uyvy_altivec_unaligned_c, yv12_to_uyvy_c)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -