📄 yuv2rgb_altivec.c
字号:
U = (vector signed short)
vec_perm (uyvy, (vector unsigned char)AVV(0), demux_u);
V = (vector signed short)
vec_perm (uyvy, (vector unsigned char)AVV(0), demux_v);
Y = (vector signed short)
vec_perm (uyvy, (vector unsigned char)AVV(0), demux_y);
cvtyuvtoRGB (c, Y,U,V,&R0,&G0,&B0);
uyvy = vec_ld (16, img);
U = (vector signed short)
vec_perm (uyvy, (vector unsigned char)AVV(0), demux_u);
V = (vector signed short)
vec_perm (uyvy, (vector unsigned char)AVV(0), demux_v);
Y = (vector signed short)
vec_perm (uyvy, (vector unsigned char)AVV(0), demux_y);
cvtyuvtoRGB (c, Y,U,V,&R1,&G1,&B1);
R = vec_packclp (R0,R1);
G = vec_packclp (G0,G1);
B = vec_packclp (B0,B1);
// vec_mstbgr24 (R,G,B, out);
out_rgba (R,G,B,out);
img += 32;
}
}
return srcSliceH;
}
/* Ok currently the acceleration routine only supports
inputs of widths a multiple of 16
and heights a multiple 2
So we just fall back to the C codes for this.
*/
SwsFunc yuv2rgb_init_altivec (SwsContext *c)
{
if (!(c->flags & SWS_CPU_CAPS_ALTIVEC))
return NULL;
/*
and this seems not to matter too much I tried a bunch of
videos with abnormal widths and mplayer crashes else where.
mplayer -vo x11 -rawvideo on:w=350:h=240 raw-350x240.eyuv
boom with X11 bad match.
*/
if ((c->srcW & 0xf) != 0) return NULL;
switch (c->srcFormat) {
case PIX_FMT_YUV410P:
case PIX_FMT_YUV420P:
/*case IMGFMT_CLPL: ??? */
case PIX_FMT_GRAY8:
case PIX_FMT_NV12:
case PIX_FMT_NV21:
if ((c->srcH & 0x1) != 0)
return NULL;
switch(c->dstFormat){
case PIX_FMT_RGB24:
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGB24\n");
return altivec_yuv2_rgb24;
case PIX_FMT_BGR24:
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGR24\n");
return altivec_yuv2_bgr24;
case PIX_FMT_ARGB:
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ARGB\n");
return altivec_yuv2_argb;
case PIX_FMT_ABGR:
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ABGR\n");
return altivec_yuv2_abgr;
case PIX_FMT_RGBA:
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGBA\n");
return altivec_yuv2_rgba;
case PIX_FMT_BGRA:
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGRA\n");
return altivec_yuv2_bgra;
default: return NULL;
}
break;
case PIX_FMT_UYVY422:
switch(c->dstFormat){
case PIX_FMT_BGR32:
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space UYVY -> RGB32\n");
return altivec_uyvy_rgb32;
default: return NULL;
}
break;
}
return NULL;
}
static uint16_t roundToInt16(int64_t f){
int r= (f + (1<<15))>>16;
if (r<-0x7FFF) return 0x8000;
else if (r> 0x7FFF) return 0x7FFF;
else return r;
}
void yuv2rgb_altivec_init_tables (SwsContext *c, const int inv_table[4],int brightness,int contrast, int saturation)
{
union {
signed short tmp[8] __attribute__ ((aligned(16)));
vector signed short vec;
} buf;
buf.tmp[0] = ( (0xffffLL) * contrast>>8 )>>9; //cy
buf.tmp[1] = -256*brightness; //oy
buf.tmp[2] = (inv_table[0]>>3) *(contrast>>16)*(saturation>>16); //crv
buf.tmp[3] = (inv_table[1]>>3) *(contrast>>16)*(saturation>>16); //cbu
buf.tmp[4] = -((inv_table[2]>>1)*(contrast>>16)*(saturation>>16)); //cgu
buf.tmp[5] = -((inv_table[3]>>1)*(contrast>>16)*(saturation>>16)); //cgv
c->CSHIFT = (vector unsigned short)vec_splat_u16(2);
c->CY = vec_splat ((vector signed short)buf.vec, 0);
c->OY = vec_splat ((vector signed short)buf.vec, 1);
c->CRV = vec_splat ((vector signed short)buf.vec, 2);
c->CBU = vec_splat ((vector signed short)buf.vec, 3);
c->CGU = vec_splat ((vector signed short)buf.vec, 4);
c->CGV = vec_splat ((vector signed short)buf.vec, 5);
#if 0
{
int i;
char *v[6]={"cy","oy","crv","cbu","cgu","cgv"};
for (i=0; i<6; i++)
printf("%s %d ", v[i],buf.tmp[i] );
printf("\n");
}
#endif
return;
}
void
altivec_yuv2packedX (SwsContext *c,
int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
uint8_t *dest, int dstW, int dstY)
{
int i,j;
vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
vector signed short R0,G0,B0,R1,G1,B1;
vector unsigned char R,G,B;
vector unsigned char *out,*nout;
vector signed short RND = vec_splat_s16(1<<3);
vector unsigned short SCL = vec_splat_u16(4);
unsigned long scratch[16] __attribute__ ((aligned (16)));
vector signed short *YCoeffs, *CCoeffs;
YCoeffs = c->vYCoeffsBank+dstY*lumFilterSize;
CCoeffs = c->vCCoeffsBank+dstY*chrFilterSize;
out = (vector unsigned char *)dest;
for (i=0; i<dstW; i+=16){
Y0 = RND;
Y1 = RND;
/* extract 16 coeffs from lumSrc */
for (j=0; j<lumFilterSize; j++) {
X0 = vec_ld (0, &lumSrc[j][i]);
X1 = vec_ld (16, &lumSrc[j][i]);
Y0 = vec_mradds (X0, YCoeffs[j], Y0);
Y1 = vec_mradds (X1, YCoeffs[j], Y1);
}
U = RND;
V = RND;
/* extract 8 coeffs from U,V */
for (j=0; j<chrFilterSize; j++) {
X = vec_ld (0, &chrSrc[j][i/2]);
U = vec_mradds (X, CCoeffs[j], U);
X = vec_ld (0, &chrSrc[j][i/2+2048]);
V = vec_mradds (X, CCoeffs[j], V);
}
/* scale and clip signals */
Y0 = vec_sra (Y0, SCL);
Y1 = vec_sra (Y1, SCL);
U = vec_sra (U, SCL);
V = vec_sra (V, SCL);
Y0 = vec_clip_s16 (Y0);
Y1 = vec_clip_s16 (Y1);
U = vec_clip_s16 (U);
V = vec_clip_s16 (V);
/* now we have
Y0= y0 y1 y2 y3 y4 y5 y6 y7 Y1= y8 y9 y10 y11 y12 y13 y14 y15
U= u0 u1 u2 u3 u4 u5 u6 u7 V= v0 v1 v2 v3 v4 v5 v6 v7
Y0= y0 y1 y2 y3 y4 y5 y6 y7 Y1= y8 y9 y10 y11 y12 y13 y14 y15
U0= u0 u0 u1 u1 u2 u2 u3 u3 U1= u4 u4 u5 u5 u6 u6 u7 u7
V0= v0 v0 v1 v1 v2 v2 v3 v3 V1= v4 v4 v5 v5 v6 v6 v7 v7
*/
U0 = vec_mergeh (U,U);
V0 = vec_mergeh (V,V);
U1 = vec_mergel (U,U);
V1 = vec_mergel (V,V);
cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0);
cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1);
R = vec_packclp (R0,R1);
G = vec_packclp (G0,G1);
B = vec_packclp (B0,B1);
switch(c->dstFormat) {
case PIX_FMT_ABGR: out_abgr (R,G,B,out); break;
case PIX_FMT_BGRA: out_bgra (R,G,B,out); break;
case PIX_FMT_RGBA: out_rgba (R,G,B,out); break;
case PIX_FMT_ARGB: out_argb (R,G,B,out); break;
case PIX_FMT_RGB24: out_rgb24 (R,G,B,out); break;
case PIX_FMT_BGR24: out_bgr24 (R,G,B,out); break;
default:
{
/* If this is reached, the caller should have called yuv2packedXinC
instead. */
static int printed_error_message;
if (!printed_error_message) {
av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n",
sws_format_name(c->dstFormat));
printed_error_message=1;
}
return;
}
}
}
if (i < dstW) {
i -= 16;
Y0 = RND;
Y1 = RND;
/* extract 16 coeffs from lumSrc */
for (j=0; j<lumFilterSize; j++) {
X0 = vec_ld (0, &lumSrc[j][i]);
X1 = vec_ld (16, &lumSrc[j][i]);
Y0 = vec_mradds (X0, YCoeffs[j], Y0);
Y1 = vec_mradds (X1, YCoeffs[j], Y1);
}
U = RND;
V = RND;
/* extract 8 coeffs from U,V */
for (j=0; j<chrFilterSize; j++) {
X = vec_ld (0, &chrSrc[j][i/2]);
U = vec_mradds (X, CCoeffs[j], U);
X = vec_ld (0, &chrSrc[j][i/2+2048]);
V = vec_mradds (X, CCoeffs[j], V);
}
/* scale and clip signals */
Y0 = vec_sra (Y0, SCL);
Y1 = vec_sra (Y1, SCL);
U = vec_sra (U, SCL);
V = vec_sra (V, SCL);
Y0 = vec_clip_s16 (Y0);
Y1 = vec_clip_s16 (Y1);
U = vec_clip_s16 (U);
V = vec_clip_s16 (V);
/* now we have
Y0= y0 y1 y2 y3 y4 y5 y6 y7 Y1= y8 y9 y10 y11 y12 y13 y14 y15
U = u0 u1 u2 u3 u4 u5 u6 u7 V = v0 v1 v2 v3 v4 v5 v6 v7
Y0= y0 y1 y2 y3 y4 y5 y6 y7 Y1= y8 y9 y10 y11 y12 y13 y14 y15
U0= u0 u0 u1 u1 u2 u2 u3 u3 U1= u4 u4 u5 u5 u6 u6 u7 u7
V0= v0 v0 v1 v1 v2 v2 v3 v3 V1= v4 v4 v5 v5 v6 v6 v7 v7
*/
U0 = vec_mergeh (U,U);
V0 = vec_mergeh (V,V);
U1 = vec_mergel (U,U);
V1 = vec_mergel (V,V);
cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0);
cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1);
R = vec_packclp (R0,R1);
G = vec_packclp (G0,G1);
B = vec_packclp (B0,B1);
nout = (vector unsigned char *)scratch;
switch(c->dstFormat) {
case PIX_FMT_ABGR: out_abgr (R,G,B,nout); break;
case PIX_FMT_BGRA: out_bgra (R,G,B,nout); break;
case PIX_FMT_RGBA: out_rgba (R,G,B,nout); break;
case PIX_FMT_ARGB: out_argb (R,G,B,nout); break;
case PIX_FMT_RGB24: out_rgb24 (R,G,B,nout); break;
case PIX_FMT_BGR24: out_bgr24 (R,G,B,nout); break;
default:
/* Unreachable, I think. */
av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n",
sws_format_name(c->dstFormat));
return;
}
memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4);
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -