📄 yuv2rgb_altivec.c
字号:
instrides_scl[1] = instrides[1]-w/2; /* the loop moves ui by w/2 */ \
instrides_scl[2] = instrides[2]-w/2; /* the loop moves vi by w/2 */ \
\
\
for (i=0;i<h/2;i++) { \
vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0); \
vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1); \
\
for (j=0;j<w/16;j++) { \
\
y1ivP = (vector unsigned char *)y1i; \
y2ivP = (vector unsigned char *)y2i; \
uivP = (vector unsigned char *)ui; \
vivP = (vector unsigned char *)vi; \
\
align_perm = vec_lvsl (0, y1i); \
y0 = (vector unsigned char) \
vec_perm (y1ivP[0], y1ivP[1], align_perm); \
\
align_perm = vec_lvsl (0, y2i); \
y1 = (vector unsigned char) \
vec_perm (y2ivP[0], y2ivP[1], align_perm); \
\
align_perm = vec_lvsl (0, ui); \
u = (vector signed char) \
vec_perm (uivP[0], uivP[1], align_perm); \
\
align_perm = vec_lvsl (0, vi); \
v = (vector signed char) \
vec_perm (vivP[0], vivP[1], align_perm); \
\
u = (vector signed char) \
vec_sub (u,(vector signed char) \
vec_splat((vector signed char)AVV(128),0)); \
v = (vector signed char) \
vec_sub (v,(vector signed char) \
vec_splat((vector signed char)AVV(128),0)); \
\
U = vec_unpackh (u); \
V = vec_unpackh (v); \
\
\
Y0 = vec_unh (y0); \
Y1 = vec_unl (y0); \
Y2 = vec_unh (y1); \
Y3 = vec_unl (y1); \
\
Y0 = vec_mradds (Y0, lCY, lOY); \
Y1 = vec_mradds (Y1, lCY, lOY); \
Y2 = vec_mradds (Y2, lCY, lOY); \
Y3 = vec_mradds (Y3, lCY, lOY); \
\
/* ux = (CBU*(u<<CSHIFT)+0x4000)>>15 */ \
ux = vec_sl (U, lCSHIFT); \
ux = vec_mradds (ux, lCBU, (vector signed short)AVV(0)); \
ux0 = vec_mergeh (ux,ux); \
ux1 = vec_mergel (ux,ux); \
\
/* vx = (CRV*(v<<CSHIFT)+0x4000)>>15; */ \
vx = vec_sl (V, lCSHIFT); \
vx = vec_mradds (vx, lCRV, (vector signed short)AVV(0)); \
vx0 = vec_mergeh (vx,vx); \
vx1 = vec_mergel (vx,vx); \
\
/* uvx = ((CGU*u) + (CGV*v))>>15 */ \
uvx = vec_mradds (U, lCGU, (vector signed short)AVV(0)); \
uvx = vec_mradds (V, lCGV, uvx); \
uvx0 = vec_mergeh (uvx,uvx); \
uvx1 = vec_mergel (uvx,uvx); \
\
R0 = vec_add (Y0,vx0); \
G0 = vec_add (Y0,uvx0); \
B0 = vec_add (Y0,ux0); \
R1 = vec_add (Y1,vx1); \
G1 = vec_add (Y1,uvx1); \
B1 = vec_add (Y1,ux1); \
\
R = vec_packclp (R0,R1); \
G = vec_packclp (G0,G1); \
B = vec_packclp (B0,B1); \
\
out_pixels(R,G,B,oute); \
\
R0 = vec_add (Y2,vx0); \
G0 = vec_add (Y2,uvx0); \
B0 = vec_add (Y2,ux0); \
R1 = vec_add (Y3,vx1); \
G1 = vec_add (Y3,uvx1); \
B1 = vec_add (Y3,ux1); \
R = vec_packclp (R0,R1); \
G = vec_packclp (G0,G1); \
B = vec_packclp (B0,B1); \
\
\
out_pixels(R,G,B,outo); \
\
y1i += 16; \
y2i += 16; \
ui += 8; \
vi += 8; \
\
} \
\
outo += (outstrides[0])>>4; \
oute += (outstrides[0])>>4; \
\
ui += instrides_scl[1]; \
vi += instrides_scl[2]; \
y1i += instrides_scl[0]; \
y2i += instrides_scl[0]; \
} \
return srcSliceH; \
}
#define out_abgr(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a))AVV(0)),c,b,a,ptr)
#define out_bgra(a,b,c,ptr) vec_mstrgb32(typeof(a),c,b,a,((typeof (a))AVV(0)),ptr)
#define out_rgba(a,b,c,ptr) vec_mstrgb32(typeof(a),a,b,c,((typeof (a))AVV(0)),ptr)
#define out_argb(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a))AVV(0)),a,b,c,ptr)
#define out_rgb24(a,b,c,ptr) vec_mstrgb24(a,b,c,ptr)
#define out_bgr24(a,b,c,ptr) vec_mstbgr24(a,b,c,ptr)
DEFCSP420_CVT (yuv2_abgr, out_abgr)
#if 1
DEFCSP420_CVT (yuv2_bgra, out_bgra)
#else
static int altivec_yuv2_bgra32 (SwsContext *c,
unsigned char **in, int *instrides,
int srcSliceY, int srcSliceH,
unsigned char **oplanes, int *outstrides)
{
int w = c->srcW;
int h = srcSliceH;
int i,j;
int instrides_scl[3];
vector unsigned char y0,y1;
vector signed char u,v;
vector signed short Y0,Y1,Y2,Y3;
vector signed short U,V;
vector signed short vx,ux,uvx;
vector signed short vx0,ux0,uvx0;
vector signed short vx1,ux1,uvx1;
vector signed short R0,G0,B0;
vector signed short R1,G1,B1;
vector unsigned char R,G,B;
vector unsigned char *uivP, *vivP;
vector unsigned char align_perm;
vector signed short
lCY = c->CY,
lOY = c->OY,
lCRV = c->CRV,
lCBU = c->CBU,
lCGU = c->CGU,
lCGV = c->CGV;
vector unsigned short lCSHIFT = c->CSHIFT;
ubyte *y1i = in[0];
ubyte *y2i = in[0]+w;
ubyte *ui = in[1];
ubyte *vi = in[2];
vector unsigned char *oute
= (vector unsigned char *)
(oplanes[0]+srcSliceY*outstrides[0]);
vector unsigned char *outo
= (vector unsigned char *)
(oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]);
instrides_scl[0] = instrides[0];
instrides_scl[1] = instrides[1]-w/2; /* the loop moves ui by w/2 */
instrides_scl[2] = instrides[2]-w/2; /* the loop moves vi by w/2 */
for (i=0;i<h/2;i++) {
vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0);
vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1);
for (j=0;j<w/16;j++) {
y0 = vec_ldl (0,y1i);
y1 = vec_ldl (0,y2i);
uivP = (vector unsigned char *)ui;
vivP = (vector unsigned char *)vi;
align_perm = vec_lvsl (0, ui);
u = (vector signed char)vec_perm (uivP[0], uivP[1], align_perm);
align_perm = vec_lvsl (0, vi);
v = (vector signed char)vec_perm (vivP[0], vivP[1], align_perm);
u = (vector signed char)
vec_sub (u,(vector signed char)
vec_splat((vector signed char)AVV(128),0));
v = (vector signed char)
vec_sub (v, (vector signed char)
vec_splat((vector signed char)AVV(128),0));
U = vec_unpackh (u);
V = vec_unpackh (v);
Y0 = vec_unh (y0);
Y1 = vec_unl (y0);
Y2 = vec_unh (y1);
Y3 = vec_unl (y1);
Y0 = vec_mradds (Y0, lCY, lOY);
Y1 = vec_mradds (Y1, lCY, lOY);
Y2 = vec_mradds (Y2, lCY, lOY);
Y3 = vec_mradds (Y3, lCY, lOY);
/* ux = (CBU*(u<<CSHIFT)+0x4000)>>15 */
ux = vec_sl (U, lCSHIFT);
ux = vec_mradds (ux, lCBU, (vector signed short)AVV(0));
ux0 = vec_mergeh (ux,ux);
ux1 = vec_mergel (ux,ux);
/* vx = (CRV*(v<<CSHIFT)+0x4000)>>15; */
vx = vec_sl (V, lCSHIFT);
vx = vec_mradds (vx, lCRV, (vector signed short)AVV(0));
vx0 = vec_mergeh (vx,vx);
vx1 = vec_mergel (vx,vx);
/* uvx = ((CGU*u) + (CGV*v))>>15 */
uvx = vec_mradds (U, lCGU, (vector signed short)AVV(0));
uvx = vec_mradds (V, lCGV, uvx);
uvx0 = vec_mergeh (uvx,uvx);
uvx1 = vec_mergel (uvx,uvx);
R0 = vec_add (Y0,vx0);
G0 = vec_add (Y0,uvx0);
B0 = vec_add (Y0,ux0);
R1 = vec_add (Y1,vx1);
G1 = vec_add (Y1,uvx1);
B1 = vec_add (Y1,ux1);
R = vec_packclp (R0,R1);
G = vec_packclp (G0,G1);
B = vec_packclp (B0,B1);
out_argb(R,G,B,oute);
R0 = vec_add (Y2,vx0);
G0 = vec_add (Y2,uvx0);
B0 = vec_add (Y2,ux0);
R1 = vec_add (Y3,vx1);
G1 = vec_add (Y3,uvx1);
B1 = vec_add (Y3,ux1);
R = vec_packclp (R0,R1);
G = vec_packclp (G0,G1);
B = vec_packclp (B0,B1);
out_argb(R,G,B,outo);
y1i += 16;
y2i += 16;
ui += 8;
vi += 8;
}
outo += (outstrides[0])>>4;
oute += (outstrides[0])>>4;
ui += instrides_scl[1];
vi += instrides_scl[2];
y1i += instrides_scl[0];
y2i += instrides_scl[0];
}
return srcSliceH;
}
#endif
DEFCSP420_CVT (yuv2_rgba, out_rgba)
DEFCSP420_CVT (yuv2_argb, out_argb)
DEFCSP420_CVT (yuv2_rgb24, out_rgb24)
DEFCSP420_CVT (yuv2_bgr24, out_bgr24)
// uyvy|uyvy|uyvy|uyvy
// 0123 4567 89ab cdef
static
const vector unsigned char
demux_u = (const vector unsigned char)AVV(0x10,0x00,0x10,0x00,
0x10,0x04,0x10,0x04,
0x10,0x08,0x10,0x08,
0x10,0x0c,0x10,0x0c),
demux_v = (const vector unsigned char)AVV(0x10,0x02,0x10,0x02,
0x10,0x06,0x10,0x06,
0x10,0x0A,0x10,0x0A,
0x10,0x0E,0x10,0x0E),
demux_y = (const vector unsigned char)AVV(0x10,0x01,0x10,0x03,
0x10,0x05,0x10,0x07,
0x10,0x09,0x10,0x0B,
0x10,0x0D,0x10,0x0F);
/*
this is so I can play live CCIR raw video
*/
static int altivec_uyvy_rgb32 (SwsContext *c,
unsigned char **in, int *instrides,
int srcSliceY, int srcSliceH,
unsigned char **oplanes, int *outstrides)
{
int w = c->srcW;
int h = srcSliceH;
int i,j;
vector unsigned char uyvy;
vector signed short Y,U,V;
vector signed short R0,G0,B0,R1,G1,B1;
vector unsigned char R,G,B;
vector unsigned char *out;
ubyte *img;
img = in[0];
out = (vector unsigned char *)(oplanes[0]+srcSliceY*outstrides[0]);
for (i=0;i<h;i++) {
for (j=0;j<w/16;j++) {
uyvy = vec_ld (0, img);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -