📄 swscale_template.c
字号:
"punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" "punpcklbw %%mm7, %%mm3 \n\t" "paddw %%mm1, %%mm0 \n\t" "paddw %%mm3, %%mm2 \n\t" "paddw %%mm2, %%mm0 \n\t" "movd 6(%0, %%ebx), %%mm4 \n\t" "movd 6(%1, %%ebx), %%mm1 \n\t" "movd 9(%0, %%ebx), %%mm2 \n\t" "movd 9(%1, %%ebx), %%mm3 \n\t" "punpcklbw %%mm7, %%mm4 \n\t" "punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" "punpcklbw %%mm7, %%mm3 \n\t" "paddw %%mm1, %%mm4 \n\t" "paddw %%mm3, %%mm2 \n\t" "paddw %%mm4, %%mm2 \n\t" "psrlw $2, %%mm0 \n\t" "psrlw $2, %%mm2 \n\t"#endif "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" "pmaddwd %%mm0, %%mm1 \n\t" "pmaddwd %%mm2, %%mm3 \n\t" "pmaddwd %%mm6, %%mm0 \n\t" "pmaddwd %%mm6, %%mm2 \n\t"#ifndef FAST_BGR2YV12 "psrad $8, %%mm0 \n\t" "psrad $8, %%mm1 \n\t" "psrad $8, %%mm2 \n\t" "psrad $8, %%mm3 \n\t"#endif "packssdw %%mm2, %%mm0 \n\t" "packssdw %%mm3, %%mm1 \n\t" "pmaddwd %%mm5, %%mm0 \n\t" "pmaddwd %%mm5, %%mm1 \n\t" "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0 "psraw $7, %%mm0 \n\t"#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) "movq 12(%0, %%ebx), %%mm4 \n\t" "movq 12(%1, %%ebx), %%mm1 \n\t" "movq 18(%0, %%ebx), %%mm2 \n\t" "movq 18(%1, %%ebx), %%mm3 \n\t" PAVGB(%%mm1, %%mm4) PAVGB(%%mm3, %%mm2) "movq %%mm4, %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "psrlq $24, %%mm4 \n\t" "psrlq $24, %%mm2 \n\t" PAVGB(%%mm1, %%mm4) PAVGB(%%mm3, %%mm2) "punpcklbw %%mm7, %%mm4 \n\t" "punpcklbw %%mm7, %%mm2 \n\t"#else "movd 12(%0, %%ebx), %%mm4 \n\t" "movd 12(%1, %%ebx), %%mm1 \n\t" "movd 15(%0, %%ebx), %%mm2 \n\t" "movd 15(%1, %%ebx), %%mm3 \n\t" "punpcklbw %%mm7, %%mm4 \n\t" "punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" "punpcklbw %%mm7, %%mm3 \n\t" "paddw %%mm1, %%mm4 \n\t" "paddw %%mm3, %%mm2 \n\t" "paddw %%mm2, %%mm4 \n\t" "movd 18(%0, %%ebx), %%mm5 \n\t" "movd 18(%1, %%ebx), %%mm1 \n\t" "movd 21(%0, %%ebx), %%mm2 \n\t" "movd 21(%1, %%ebx), %%mm3 \n\t" "punpcklbw %%mm7, %%mm5 \n\t" "punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" "punpcklbw %%mm7, %%mm3 \n\t" "paddw %%mm1, %%mm5 \n\t" "paddw %%mm3, %%mm2 \n\t" "paddw %%mm5, %%mm2 \n\t" "movq "MANGLE(w1111)", %%mm5 \n\t" "psrlw $2, %%mm4 \n\t" "psrlw $2, %%mm2 \n\t"#endif "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" "pmaddwd %%mm4, %%mm1 \n\t" "pmaddwd %%mm2, %%mm3 \n\t" "pmaddwd %%mm6, %%mm4 \n\t" "pmaddwd %%mm6, %%mm2 \n\t"#ifndef FAST_BGR2YV12 "psrad $8, %%mm4 \n\t" "psrad $8, %%mm1 \n\t" "psrad $8, %%mm2 \n\t" "psrad $8, %%mm3 \n\t"#endif "packssdw %%mm2, %%mm4 \n\t" "packssdw %%mm3, %%mm1 \n\t" "pmaddwd %%mm5, %%mm4 \n\t" "pmaddwd %%mm5, %%mm1 \n\t" "addl $24, %%ebx \n\t" "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2 "psraw $7, %%mm4 \n\t" "movq %%mm0, %%mm1 \n\t" "punpckldq %%mm4, %%mm0 \n\t" "punpckhdq %%mm4, %%mm1 \n\t" "packsswb %%mm1, %%mm0 \n\t" "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t" "movd %%mm0, (%2, %%eax) \n\t" "punpckhdq %%mm0, %%mm0 \n\t" "movd %%mm0, (%3, %%eax) \n\t" "addl $4, %%eax \n\t" " js 1b \n\t" : : "r" (src1+width*6), "r" (src2+width*6), "r" (dstU+width), "r" (dstV+width), "g" (-width) : "%eax", "%ebx" );#else int i; for(i=0; i<width; i++) { int b= src1[6*i + 0] + src1[6*i + 3] + src2[6*i + 0] + src2[6*i + 3]; int g= src1[6*i + 1] + src1[6*i + 4] + src2[6*i + 1] + src2[6*i + 4]; int r= src1[6*i + 2] + src1[6*i + 5] + src2[6*i + 2] + src2[6*i + 5]; dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128; dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128; }#endif}static inline void RENAME(bgr16ToY)(uint8_t *dst, uint8_t *src, int width){ int i; for(i=0; i<width; i++) { int d= ((uint16_t*)src)[i]; int b= d&0x1F; int g= (d>>5)&0x3F; int r= (d>>11)&0x1F; dst[i]= ((2*RY*r + GY*g + 2*BY*b)>>(RGB2YUV_SHIFT-2)) + 16; }}static inline void RENAME(bgr16ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width){ int i; for(i=0; i<width; i++) { int d0= ((uint32_t*)src1)[i]; int d1= ((uint32_t*)src2)[i]; int dl= (d0&0x07E0F81F) + (d1&0x07E0F81F); int dh= ((d0>>5)&0x07C0F83F) + ((d1>>5)&0x07C0F83F); int dh2= (dh>>11) + (dh<<21); int d= dh2 + dl; int b= d&0x7F; int r= (d>>11)&0x7F; int g= d>>21; dstU[i]= ((2*RU*r + GU*g + 2*BU*b)>>(RGB2YUV_SHIFT+2-2)) + 128; dstV[i]= ((2*RV*r + GV*g + 2*BV*b)>>(RGB2YUV_SHIFT+2-2)) + 128; }}static inline void RENAME(bgr15ToY)(uint8_t *dst, uint8_t *src, int width){ int i; for(i=0; i<width; i++) { int d= ((uint16_t*)src)[i]; int b= d&0x1F; int g= (d>>5)&0x1F; int r= (d>>10)&0x1F; dst[i]= ((RY*r + GY*g + BY*b)>>(RGB2YUV_SHIFT-3)) + 16; }}static inline void RENAME(bgr15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width){ int i; for(i=0; i<width; i++) { int d0= ((uint32_t*)src1)[i]; int d1= ((uint32_t*)src2)[i]; int dl= (d0&0x03E07C1F) + (d1&0x03E07C1F); int dh= ((d0>>5)&0x03E0F81F) + ((d1>>5)&0x03E0F81F); int dh2= (dh>>11) + (dh<<21); int d= dh2 + dl; int b= d&0x7F; int r= (d>>10)&0x7F; int g= d>>21; dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2-3)) + 128; dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2-3)) + 128; }}static inline void RENAME(rgb32ToY)(uint8_t *dst, uint8_t *src, int width){ int i; for(i=0; i<width; i++) { int r= ((uint32_t*)src)[i]&0xFF; int g= (((uint32_t*)src)[i]>>8)&0xFF; int b= (((uint32_t*)src)[i]>>16)&0xFF; dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT); }}static inline void RENAME(rgb32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width){ int i; for(i=0; i<width; i++) { const int a= ((uint32_t*)src1)[2*i+0]; const int e= ((uint32_t*)src1)[2*i+1]; const int c= ((uint32_t*)src2)[2*i+0]; const int d= ((uint32_t*)src2)[2*i+1]; const int l= (a&0xFF00FF) + (e&0xFF00FF) + (c&0xFF00FF) + (d&0xFF00FF); const int h= (a&0x00FF00) + (e&0x00FF00) + (c&0x00FF00) + (d&0x00FF00); const int r= l&0x3FF; const int g= h>>8; const int b= l>>16; dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128; dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128; }}static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, int width){ int i; for(i=0; i<width; i++) { int r= src[i*3+0]; int g= src[i*3+1]; int b= src[i*3+2]; dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT); }}static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width){ int i; for(i=0; i<width; i++) { int r= src1[6*i + 0] + src1[6*i + 3] + src2[6*i + 0] + src2[6*i + 3]; int g= src1[6*i + 1] + src1[6*i + 4] + src2[6*i + 1] + src2[6*i + 4]; int b= src1[6*i + 2] + src1[6*i + 5] + src2[6*i + 2] + src2[6*i + 5]; dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128; dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128; }}// Bilinear / Bicubic scalingstatic inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc, int16_t *filter, int16_t *filterPos, int filterSize){#ifdef HAVE_MMX assert(filterSize % 4 == 0 && filterSize>0); if(filterSize==4) // allways true for upscaling, sometimes for down too { int counter= -2*dstW; filter-= counter*2; filterPos-= counter/2; dst-= counter/2; asm volatile( "pxor %%mm7, %%mm7 \n\t" "movq "MANGLE(w02)", %%mm6 \n\t" "pushl %%ebp \n\t" // we use 7 regs here ... "movl %%eax, %%ebp \n\t" ".balign 16 \n\t" "1: \n\t" "movzwl (%2, %%ebp), %%eax \n\t" "movzwl 2(%2, %%ebp), %%ebx \n\t" "movq (%1, %%ebp, 4), %%mm1 \n\t" "movq 8(%1, %%ebp, 4), %%mm3 \n\t" "movd (%3, %%eax), %%mm0 \n\t" "movd (%3, %%ebx), %%mm2 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" "pmaddwd %%mm1, %%mm0 \n\t" "pmaddwd %%mm2, %%mm3 \n\t" "psrad $8, %%mm0 \n\t" "psrad $8, %%mm3 \n\t" "packssdw %%mm3, %%mm0 \n\t" "pmaddwd %%mm6, %%mm0 \n\t" "packssdw %%mm0, %%mm0 \n\t" "movd %%mm0, (%4, %%ebp) \n\t" "addl $4, %%ebp \n\t" " jnc 1b \n\t" "popl %%ebp \n\t" : "+a" (counter) : "c" (filter), "d" (filterPos), "S" (src), "D" (dst) : "%ebx" ); } else if(filterSize==8) { int counter= -2*dstW; filter-= counter*4; filterPos-= counter/2; dst-= counter/2; asm volatile( "pxor %%mm7, %%mm7 \n\t" "movq "MANGLE(w02)", %%mm6 \n\t" "pushl %%ebp \n\t" // we use 7 regs here ... "movl %%eax, %%ebp \n\t" ".balign 16 \n\t" "1: \n\t" "movzwl (%2, %%ebp), %%eax \n\t" "movzwl 2(%2, %%ebp), %%ebx \n\t" "movq (%1, %%ebp, 8), %%mm1 \n\t" "movq 16(%1, %%ebp, 8), %%mm3 \n\t" "movd (%3, %%eax), %%mm0 \n\t" "movd (%3, %%ebx), %%mm2 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" "pmaddwd %%mm1, %%mm0 \n\t" "pmaddwd %%mm2, %%mm3 \n\t" "movq 8(%1, %%ebp, 8), %%mm1 \n\t" "movq 24(%1, %%ebp, 8), %%mm5 \n\t" "movd 4(%3, %%eax), %%mm4 \n\t" "movd 4(%3, %%ebx), %%mm2 \n\t" "punpcklbw %%mm7, %%mm4 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" "pmaddwd %%mm1, %%mm4 \n\t" "pmaddwd %%mm2, %%mm5 \n\t" "paddd %%mm4, %%mm0 \n\t" "paddd %%mm5, %%mm3 \n\t" "psrad $8, %%mm0 \n\t" "psrad $8, %%mm3 \n\t" "packssdw %%mm3, %%mm0 \n\t" "pmaddwd %%mm6, %%mm0 \n\t" "packssdw %%mm0, %%mm0 \n\t" "movd %%mm0, (%4, %%ebp) \n\t" "addl $4, %%ebp \n\t" " jnc 1b \n\t" "popl %%ebp \n\t" : "+a" (counter) : "c" (filter), "d" (filterPos), "S" (src), "D" (dst) : "%ebx" ); } else { int counter= -2*dstW;// filter-= counter*filterSize/2; filterPos-= counter/2; dst-= counter/2; asm volatile( "pxor %%mm7, %%mm7 \n\t" "movq "MANGLE(w02)", %%mm6 \n\t" ".balign 16 \n\t" "1: \n\t" "movl %2, %%ecx \n\t" "movzwl (%%ecx, %0), %%eax \n\t" "movzwl 2(%%ecx, %0), %%ebx \n\t" "movl %5, %%ecx \n\t" "pxor %%mm4, %%mm4 \n\t" "pxor %%mm5, %%mm5 \n\t" "2: \n\t" "movq (%1), %%mm1 \n\t" "movq (%1, %6), %%mm3 \n\t" "movd (%%ecx, %%eax), %%mm0 \n\t" "movd (%%ecx, %%ebx), %%mm2 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" "pmaddwd %%mm1, %%mm0 \n\t" "pmaddwd %%mm2, %%mm3 \n\t" "paddd %%mm3, %%mm5 \n\t" "paddd %%mm0, %%mm4 \n\t" "addl $8, %1 \n\t" "addl $4, %%ecx \n\t" "cmpl %4, %%ecx \n\t" " jb 2b \n\t" "addl %6, %1 \n\t" "psrad $8, %%mm4 \n\t" "psrad $8, %%mm5 \n\t" "packssdw %%mm5, %%mm4 \n\t" "pmaddwd %%mm6, %%mm4 \n\t" "packssdw %%mm4, %%mm4 \n\t" "movl %3, %%eax \n\t" "movd %%mm4, (%%eax, %0) \n\t" "addl $4, %0 \n\t" " jnc 1b \n\t" : "+r" (counter), "+r" (filter) : "m" (filterPos), "m" (dst), "m"(src+filterSize), "m" (src), "r" (filterSize*2) : "%ebx", "%eax", "%ecx" ); }#else#ifdef HAVE_ALTIVEC hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize);#else int i; for(i=0; i<dstW; i++) { int j; int srcPos= filterPos[i]; int val=0;// printf("filterPos: %d\n", filterPos[i]); for(j=0; j<filterSize; j++) {// printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]); val += ((int)src[srcPos + j])*filter[filterSize*i + j]; }// filter += hFilterSize; dst[i] = MIN(MAX(0, val>>7), (1<<15)-1); // the cubic equation does overflow ...// dst[i] = val>>7; }#endif#endif} // *** horizontal scale Y line to temp bufferstatic inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, int srcW, int xInc, int flags, int canMMX2BeUsed,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -