📄 yuv2rgb_altivec.c
字号:
} \ return srcSliceH; \}#define out_abgr(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a))(0)),c,b,a,ptr)#define out_bgra(a,b,c,ptr) vec_mstrgb32(typeof(a),c,b,a,((typeof (a))(0)),ptr)#define out_rgba(a,b,c,ptr) vec_mstrgb32(typeof(a),a,b,c,((typeof (a))(0)),ptr)#define out_argb(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a))(0)),a,b,c,ptr)#define out_rgb24(a,b,c,ptr) vec_mstrgb24(a,b,c,ptr)#define out_bgr24(a,b,c,ptr) vec_mstrgb24(c,b,a,ptr)DEFCSP420_CVT (yuv2_abgr32, out_abgr)DEFCSP420_CVT (yuv2_bgra32, out_argb)DEFCSP420_CVT (yuv2_rgba32, out_rgba)DEFCSP420_CVT (yuv2_argb32, out_argb)DEFCSP420_CVT (yuv2_rgb24, out_rgb24)DEFCSP420_CVT (yuv2_bgr24, out_bgr24)// uyvy|uyvy|uyvy|uyvy// 0123 4567 89ab cdefstaticconst vector unsigned char demux_u = (vector unsigned char)(0x10,0x00,0x10,0x00, 0x10,0x04,0x10,0x04, 0x10,0x08,0x10,0x08, 0x10,0x0c,0x10,0x0c), demux_v = (vector unsigned char)(0x10,0x02,0x10,0x02, 0x10,0x06,0x10,0x06, 0x10,0x0A,0x10,0x0A, 0x10,0x0E,0x10,0x0E), demux_y = (vector unsigned char)(0x10,0x01,0x10,0x03, 0x10,0x05,0x10,0x07, 0x10,0x09,0x10,0x0B, 0x10,0x0D,0x10,0x0F);/* this is so I can play live CCIR raw video*/static int altivec_uyvy_rgb32 (SwsContext *c, unsigned char **in, int *instrides, int srcSliceY, int srcSliceH, unsigned char **oplanes, int *outstrides){ int w = c->srcW; int h = srcSliceH; int i,j; vector unsigned char uyvy; vector signed short Y,U,V; vector signed short vx,ux,uvx; vector signed short R0,G0,B0,R1,G1,B1; vector unsigned char R,G,B; vector unsigned char *out; ubyte *img; img = in[0]; out = (vector unsigned char *)(oplanes[0]+srcSliceY*outstrides[0]); for (i=0;i<h;i++) { for (j=0;j<w/16;j++) { uyvy = vec_ld (0, img); U = (vector signed short) vec_perm (uyvy, (vector unsigned char)(0), demux_u); V = (vector signed short) vec_perm (uyvy, (vector unsigned char)(0), demux_v); Y = (vector signed short) vec_perm (uyvy, (vector unsigned char)(0), demux_y); cvtyuvtoRGB (c, Y,U,V,&R0,&G0,&B0); uyvy = vec_ld (16, img); U = (vector signed short) vec_perm (uyvy, (vector unsigned char)(0), demux_u); V = (vector signed short) vec_perm (uyvy, (vector unsigned char)(0), demux_v); Y = (vector signed short) vec_perm (uyvy, (vector unsigned char)(0), demux_y); cvtyuvtoRGB (c, Y,U,V,&R1,&G1,&B1); R = vec_packclp (R0,R1); G = vec_packclp (G0,G1); B = vec_packclp (B0,B1); // vec_mstbgr24 (R,G,B, out); out_rgba (R,G,B,out); img += 32; } } return srcSliceH;}/* Ok currently the acceleration routine only supports inputs of widths a multiple of 16 and heights a multiple 2 So we just fall back to the C codes for this.*/SwsFunc yuv2rgb_init_altivec (SwsContext *c){ if (!(c->flags & SWS_CPU_CAPS_ALTIVEC)) return NULL; /* and this seems not to matter too much I tried a bunch of videos with abnormal widths and mplayer crashes else where. mplayer -vo x11 -rawvideo on:w=350:h=240 raw-350x240.eyuv boom with X11 bad match. */ if ((c->srcW & 0xf) != 0) return NULL; switch (c->srcFormat) { case IMGFMT_YVU9: case IMGFMT_IF09: case IMGFMT_YV12: case IMGFMT_I420: case IMGFMT_IYUV: case IMGFMT_CLPL: case IMGFMT_Y800: case IMGFMT_Y8: case IMGFMT_NV12: case IMGFMT_NV21: if ((c->srcH & 0x1) != 0) return NULL; switch(c->dstFormat){ case IMGFMT_RGB24: MSG_WARN("ALTIVEC: Color Space RGB24\n"); return altivec_yuv2_rgb24; case IMGFMT_BGR24: MSG_WARN("ALTIVEC: Color Space BGR24\n"); return altivec_yuv2_bgr24; case IMGFMT_RGB32: MSG_WARN("ALTIVEC: Color Space ARGB32\n"); return altivec_yuv2_argb32; case IMGFMT_BGR32: MSG_WARN("ALTIVEC: Color Space BGRA32\n"); // return profile_altivec_bgra32; return altivec_yuv2_bgra32; default: return NULL; } break; case IMGFMT_UYVY: switch(c->dstFormat){ case IMGFMT_RGB32: MSG_WARN("ALTIVEC: Color Space UYVY -> RGB32\n"); return altivec_uyvy_rgb32; default: return NULL; } break; } return NULL;}void yuv2rgb_altivec_init_tables (SwsContext *c, const int inv_table[4]){ vector signed short CY, CRV, CBU, CGU, CGV, OY, Y0; int64_t crv __attribute__ ((aligned(16))) = inv_table[0]; int64_t cbu __attribute__ ((aligned(16))) = inv_table[1]; int64_t cgu __attribute__ ((aligned(16))) = inv_table[2]; int64_t cgv __attribute__ ((aligned(16))) = inv_table[3]; int64_t cy = (1<<16)-1; int64_t oy = 0; short tmp __attribute__ ((aligned(16))); if ((c->flags & SWS_CPU_CAPS_ALTIVEC) == 0) return; cy = (cy *c->contrast )>>17; crv= (crv*c->contrast * c->saturation)>>32; cbu= (cbu*c->contrast * c->saturation)>>32; cgu= (cgu*c->contrast * c->saturation)>>32; cgv= (cgv*c->contrast * c->saturation)>>32; oy -= 256*c->brightness; tmp = cy; CY = vec_lde (0, &tmp); CY = vec_splat (CY, 0); tmp = oy; OY = vec_lde (0, &tmp); OY = vec_splat (OY, 0); tmp = crv>>3; CRV = vec_lde (0, &tmp); CRV = vec_splat (CRV, 0); tmp = cbu>>3; CBU = vec_lde (0, &tmp); CBU = vec_splat (CBU, 0); tmp = -(cgu>>1); CGU = vec_lde (0, &tmp); CGU = vec_splat (CGU, 0); tmp = -(cgv>>1); CGV = vec_lde (0, &tmp); CGV = vec_splat (CGV, 0); c->CSHIFT = (vector unsigned short)(2); c->CY = CY; c->OY = OY; c->CRV = CRV; c->CBU = CBU; c->CGU = CGU; c->CGV = CGV;#if 0 printf ("cy: %hvx\n", CY); printf ("oy: %hvx\n", OY); printf ("crv: %hvx\n", CRV); printf ("cbu: %hvx\n", CBU); printf ("cgv: %hvx\n", CGV); printf ("cgu: %hvx\n", CGU);#endif return;}voidaltivec_yuv2packedX (SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, uint8_t *dest, int dstW, int dstY){ int i,j; short tmp __attribute__((aligned (16))); short *p; short *f; vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V; vector signed short R0,G0,B0,R1,G1,B1; vector unsigned char R,G,B,pels[3]; vector unsigned char *out,*nout; vector signed short RND = (vector signed short)(1<<3); vector unsigned short SCL = (vector unsigned short)(4); unsigned long scratch[16] __attribute__ ((aligned (16))); vector signed short *vYCoeffsBank, *vCCoeffsBank; vector signed short *YCoeffs, *CCoeffs; vYCoeffsBank = malloc (sizeof (vector signed short)*lumFilterSize*dstW); vCCoeffsBank = malloc (sizeof (vector signed short)*chrFilterSize*dstW); for (i=0;i<lumFilterSize*dstW;i++) { tmp = c->vLumFilter[i]; p = &vYCoeffsBank[i]; for (j=0;j<8;j++) p[j] = tmp; } for (i=0;i<chrFilterSize*dstW;i++) { tmp = c->vChrFilter[i]; p = &vCCoeffsBank[i]; for (j=0;j<8;j++) p[j] = tmp; } YCoeffs = vYCoeffsBank+dstY*lumFilterSize; CCoeffs = vCCoeffsBank+dstY*chrFilterSize; out = (vector unsigned char *)dest; for(i=0; i<dstW; i+=16){ Y0 = RND; Y1 = RND; /* extract 16 coeffs from lumSrc */ for(j=0; j<lumFilterSize; j++) { X0 = vec_ld (0, &lumSrc[j][i]); X1 = vec_ld (16, &lumSrc[j][i]); Y0 = vec_mradds (X0, YCoeffs[j], Y0); Y1 = vec_mradds (X1, YCoeffs[j], Y1); } U = RND; V = RND; /* extract 8 coeffs from U,V */ for(j=0; j<chrFilterSize; j++) { X = vec_ld (0, &chrSrc[j][i/2]); U = vec_mradds (X, CCoeffs[j], U); X = vec_ld (0, &chrSrc[j][i/2+2048]); V = vec_mradds (X, CCoeffs[j], V); } /* scale and clip signals */ Y0 = vec_sra (Y0, SCL); Y1 = vec_sra (Y1, SCL); U = vec_sra (U, SCL); V = vec_sra (V, SCL); Y0 = vec_clip (Y0); Y1 = vec_clip (Y1); U = vec_clip (U); V = vec_clip (V); /* now we have Y0= y0 y1 y2 y3 y4 y5 y6 y7 Y1= y8 y9 y10 y11 y12 y13 y14 y15 U= u0 u1 u2 u3 u4 u5 u6 u7 V= v0 v1 v2 v3 v4 v5 v6 v7 Y0= y0 y1 y2 y3 y4 y5 y6 y7 Y1= y8 y9 y10 y11 y12 y13 y14 y15 U0= u0 u0 u1 u1 u2 u2 u3 u3 U1= u4 u4 u5 u5 u6 u6 u7 u7 V0= v0 v0 v1 v1 v2 v2 v3 v3 V1= v4 v4 v5 v5 v6 v6 v7 v7 */ U0 = vec_mergeh (U,U); V0 = vec_mergeh (V,V); U1 = vec_mergel (U,U); V1 = vec_mergel (V,V); cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0); cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1); R = vec_packclp (R0,R1); G = vec_packclp (G0,G1); B = vec_packclp (B0,B1); out_rgba (R,G,B,out); } if (i < dstW) { i -= 16; Y0 = RND; Y1 = RND; /* extract 16 coeffs from lumSrc */ for(j=0; j<lumFilterSize; j++) { X0 = vec_ld (0, &lumSrc[j][i]); X1 = vec_ld (16, &lumSrc[j][i]); Y0 = vec_mradds (X0, YCoeffs[j], Y0); Y1 = vec_mradds (X1, YCoeffs[j], Y1); } U = RND; V = RND; /* extract 8 coeffs from U,V */ for(j=0; j<chrFilterSize; j++) { X = vec_ld (0, &chrSrc[j][i/2]); U = vec_mradds (X, CCoeffs[j], U); X = vec_ld (0, &chrSrc[j][i/2+2048]); V = vec_mradds (X, CCoeffs[j], V); } /* scale and clip signals */ Y0 = vec_sra (Y0, SCL); Y1 = vec_sra (Y1, SCL); U = vec_sra (U, SCL); V = vec_sra (V, SCL); Y0 = vec_clip (Y0); Y1 = vec_clip (Y1); U = vec_clip (U); V = vec_clip (V); /* now we have Y0= y0 y1 y2 y3 y4 y5 y6 y7 Y1= y8 y9 y10 y11 y12 y13 y14 y15 U= u0 u1 u2 u3 u4 u5 u6 u7 V= v0 v1 v2 v3 v4 v5 v6 v7 Y0= y0 y1 y2 y3 y4 y5 y6 y7 Y1= y8 y9 y10 y11 y12 y13 y14 y15 U0= u0 u0 u1 u1 u2 u2 u3 u3 U1= u4 u4 u5 u5 u6 u6 u7 u7 V0= v0 v0 v1 v1 v2 v2 v3 v3 V1= v4 v4 v5 v5 v6 v6 v7 v7 */ U0 = vec_mergeh (U,U); V0 = vec_mergeh (V,V); U1 = vec_mergel (U,U); V1 = vec_mergel (V,V); cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0); cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1); R = vec_packclp (R0,R1); G = vec_packclp (G0,G1); B = vec_packclp (B0,B1); nout = (vector unsigned char *)scratch; out_rgba (R,G,B,nout); memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4); } if (vYCoeffsBank) free (vYCoeffsBank); if (vCCoeffsBank) free (vCCoeffsBank);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -