📄 dsputil.c
字号:
{
// dst[jj] = ((A*src[jj] + B*src[kk] + C*src[stride + jj] + D*src[stride + kk])+32)>>6;
int t1;
asm volatile ("mult %0,%1"::"r"(A), "r"(src[jj]));
asm volatile ("madd %0,%1"::"r"(B), "r"(src[kk]));
asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + jj]));
asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + kk]));
asm volatile ("mflo %0":"=r"(t1));
dst[jj] = (t1 + 32) >> 6;
}
dst += stride;
src += stride;
}
}
#else
void put_h264_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int hh, int x, int y)
{
const int A = (8 - x) *(8 - y);
const int B = (x) *(8 - y);
const int C = (8 - x) *(y);
const int D = (x) *(y);
int ii;
for (ii = 0; ii < hh; ii++)
{
int t1;
const int a = src[0];
const int b = src[1];
const int c = src[2];
const int d = src[3];
const int e = src[4];
const int f = src[5];
const int g = src[6];
const int h = src[7];
const int j = src[8];
asm volatile ("mult %0,%1"::"r"(A), "r"(a));
asm volatile ("madd %0,%1"::"r"(B), "r"(b));
asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + 0]));
asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + 1]));
asm volatile ("mflo %0":"=r"(t1));
dst[0] = (((t1) + 32) >> 6);
asm volatile ("mult %0,%1"::"r"(A), "r"(b));
asm volatile ("madd %0,%1"::"r"(B), "r"(c));
asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + 1]));
asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + 2]));
asm volatile ("mflo %0":"=r"(t1));
dst[1] = (((t1) + 32) >> 6);
asm volatile ("mult %0,%1"::"r"(A), "r"(c));
asm volatile ("madd %0,%1"::"r"(B), "r"(d));
asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + 2]));
asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + 3]));
asm volatile ("mflo %0":"=r"(t1));
dst[2] = (((t1) + 32) >> 6);
asm volatile ("mult %0,%1"::"r"(A), "r"(d));
asm volatile ("madd %0,%1"::"r"(B), "r"(e));
asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + 3]));
asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + 4]));
asm volatile ("mflo %0":"=r"(t1));
dst[3] = (((t1) + 32) >> 6);
asm volatile ("mult %0,%1"::"r"(A), "r"(e));
asm volatile ("madd %0,%1"::"r"(B), "r"(f));
asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + 4]));
asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + 5]));
asm volatile ("mflo %0":"=r"(t1));
dst[4] = (((t1) + 32) >> 6);
asm volatile ("mult %0,%1"::"r"(A), "r"(f));
asm volatile ("madd %0,%1"::"r"(B), "r"(g));
asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + 5]));
asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + 6]));
asm volatile ("mflo %0":"=r"(t1));
dst[5] = (((t1) + 32) >> 6);
asm volatile ("mult %0,%1"::"r"(A), "r"(g));
asm volatile ("madd %0,%1"::"r"(B), "r"(h));
asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + 6]));
asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + 7]));
asm volatile ("mflo %0":"=r"(t1));
dst[6] = (((t1) + 32) >> 6);
asm volatile ("mult %0,%1"::"r"(A), "r"(h));
asm volatile ("madd %0,%1"::"r"(B), "r"(j));
asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + 7]));
asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + 8]));
asm volatile ("mflo %0":"=r"(t1));
dst[7] = (((t1) + 32) >> 6);
dst += stride;
src += stride;
}
}
#endif
void avg_h264_chroma_mc2_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y)
{
const int A = (8 - x) *(8 - y);
const int B = (x) *(8 - y);
const int C = (8 - x) *(y);
const int D = (x) *(y);
int i;
for (i = 0; i < h; i++)
{
dst[0] = (((dst[0]) + ((((A *src[0] + B *src[1] + C *src[stride + 0] + D *src[stride + 1])) + 32) >> 6) + 1) >> 1);
dst[1] = (((dst[1]) + ((((A *src[1] + B *src[2] + C *src[stride + 1] + D *src[stride + 2])) + 32) >> 6) + 1) >> 1);
dst += stride;
src += stride;
}
}
void avg_h264_chroma_mc4_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y)
{
const int A = (8 - x) *(8 - y);
const int B = (x) *(8 - y);
const int C = (8 - x) *(y);
const int D = (x) *(y);
int i;
for (i = 0; i < h; i++)
{
dst[0] = (((dst[0]) + ((((A *src[0] + B *src[1] + C *src[stride + 0] + D *src[stride + 1])) + 32) >> 6) + 1) >> 1);
dst[1] = (((dst[1]) + ((((A *src[1] + B *src[2] + C *src[stride + 1] + D *src[stride + 2])) + 32) >> 6) + 1) >> 1);
dst[2] = (((dst[2]) + ((((A *src[2] + B *src[3] + C *src[stride + 2] + D *src[stride + 3])) + 32) >> 6) + 1) >> 1);
dst[3] = (((dst[3]) + ((((A *src[3] + B *src[4] + C *src[stride + 3] + D *src[stride + 4])) + 32) >> 6) + 1) >> 1);
dst += stride;
src += stride;
}
}
void avg_h264_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y)
{
const int A = (8 - x) *(8 - y);
const int B = (x) *(8 - y);
const int C = (8 - x) *(y);
const int D = (x) *(y);
int i;
for (i = 0; i < h; i++)
{
int t1;
const int a = src[0];
const int b = src[1];
const int c = src[2];
const int d = src[3];
const int e = src[4];
const int f = src[5];
const int g = src[6];
const int h = src[7];
const int j = src[8];
asm volatile ("mult %0,%1"::"r"(A), "r"(a));
asm volatile ("madd %0,%1"::"r"(B), "r"(b));
asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + 0]));
asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + 1]));
asm volatile ("mflo %0":"=r"(t1));
dst[0] = (((dst[0]) + (((t1) + 32) >> 6) + 1) >> 1);
asm volatile ("mult %0,%1"::"r"(A), "r"(b));
asm volatile ("madd %0,%1"::"r"(B), "r"(c));
asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + 1]));
asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + 2]));
asm volatile ("mflo %0":"=r"(t1));
dst[1] = (((dst[1]) + (((t1) + 32) >> 6) + 1) >> 1);
asm volatile ("mult %0,%1"::"r"(A), "r"(c));
asm volatile ("madd %0,%1"::"r"(B), "r"(d));
asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + 2]));
asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + 3]));
asm volatile ("mflo %0":"=r"(t1));
dst[2] = (((dst[2]) + (((t1) + 32) >> 6) + 1) >> 1);
asm volatile ("mult %0,%1"::"r"(A), "r"(d));
asm volatile ("madd %0,%1"::"r"(B), "r"(e));
asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + 3]));
asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + 4]));
asm volatile ("mflo %0":"=r"(t1));
dst[3] = (((dst[3]) + (((t1) + 32) >> 6) + 1) >> 1);
asm volatile ("mult %0,%1"::"r"(A), "r"(e));
asm volatile ("madd %0,%1"::"r"(B), "r"(f));
asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + 4]));
asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + 5]));
asm volatile ("mflo %0":"=r"(t1));
dst[4] = (((dst[4]) + (((t1) + 32) >> 6) + 1) >> 1);
asm volatile ("mult %0,%1"::"r"(A), "r"(f));
asm volatile ("madd %0,%1"::"r"(B), "r"(g));
asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + 5]));
asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + 6]));
asm volatile ("mflo %0":"=r"(t1));
dst[5] = (((dst[5]) + (((t1) + 32) >> 6) + 1) >> 1);
asm volatile ("mult %0,%1"::"r"(A), "r"(g));
asm volatile ("madd %0,%1"::"r"(B), "r"(h));
asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + 6]));
asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + 7]));
asm volatile ("mflo %0":"=r"(t1));
dst[6] = (((dst[6]) + (((t1) + 32) >> 6) + 1) >> 1);
asm volatile ("mult %0,%1"::"r"(A), "r"(h));
asm volatile ("madd %0,%1"::"r"(B), "r"(j));
asm volatile ("madd %0,%1"::"r"(C), "r"(src[stride + 7]));
asm volatile ("madd %0,%1"::"r"(D), "r"(src[stride + 8]));
asm volatile ("mflo %0":"=r"(t1));
dst[7] = (((dst[7]) + (((t1) + 32) >> 6) + 1) >> 1);
dst += stride;
src += stride;
}
}
#else /* ] [ */
STATIC_FUNC void put_h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
{
const int A=(8-x)*(8-y);
const int B=( x)*(8-y);
const int C=(8-x)*( y);
const int D=( x)*( y);
int i;
assert(x<8 && y<8 && x>=0 && y>=0);
for(i=0; i<h; i++)
{
dst[0]= ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1])+32)>>6;
dst[1]= ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2])+32)>>6;
dst+= stride;
src+= stride;
}
}
STATIC_FUNC void put_h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
{
const int A=(8-x)*(8-y);
const int B=( x)*(8-y);
const int C=(8-x)*( y);
const int D=( x)*( y);
int i;
assert(x<8 && y<8 && x>=0 && y>=0);
for(i=0; i<h; i++)
{
dst[0]= ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1])+32)>>6;
dst[1]= ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2])+32)>>6;
dst[2]= ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3])+32)>>6;
dst[3]= ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4])+32)>>6;
dst+= stride;
src+= stride;
}
}
STATIC_FUNC void put_h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int hh, int x, int y)
{
const int A = (8-x)*(8-y);
const int B = ( x)*(8-y);
const int C = (8-x)*( y);
const int D = ( x)*( y);
int ii, jj, kk;
#if 0
for(ii = 0; ii < hh; ii++)
{
for (jj = 0, kk = 1; jj < 8; jj++, kk++)
dst[jj] = ((A*src[jj] + B*src[kk] + C*src[stride + jj] + D*src[stride + kk])+32)>>6;
dst+= stride;
src+= stride;
}
#else
assert(x<8 && y<8 && x>=0 && y>=0);
for (ii = 0; ii < hh; ii++)
{
dst[0]= ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1])+32)>>6;
dst[1]= ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2])+32)>>6;
dst[2]= ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3])+32)>>6;
dst[3]= ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4])+32)>>6;
dst[4]= ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5])+32)>>6;
dst[5]= ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6])+32)>>6;
dst[6]= ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7])+32)>>6;
dst[7]= ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8])+32)>>6;
dst+= stride;
src+= stride;
}
#endif
}
STATIC_FUNC void avg_h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
{
const int A=(8-x)*(8-y);
const int B=( x)*(8-y);
const int C=(8-x)*( y);
const int D=( x)*( y);
int i;
assert(x<8 && y<8 && x>=0 && y>=0);
for(i=0; i<h; i++)
{
dst[0] = (dst[0] + (((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1])+32)>>6) + 1)>>1;
dst[1] = (dst[1] + (((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2])+32)>>6) + 1)>>1;
dst+= stride;
src+= stride;
}
}
STATIC_FUNC void avg_h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int hh, int x, int y)
{
const int A = (8-x)*(8-y);
const int B = ( x)*(8-y);
const int C = (8-x)*( y);
const int D = ( x)*( y);
int ii, jj;
#if 0 // re-rolled version is slower
for (ii = 0; ii < hh; ii++)
{
for (jj = 0; jj < 4; jj++)
dst[jj] = (dst[jj] + (((A*src[jj] + B*src[jj+1] + C*src[stride+jj] + D*src[stride+jj+1])+32)>>6) + 1)>>1;
dst += stride;
src += stride;
}
#else
assert(x<8 && y<8 && x>=0 && y>=0);
for (ii = 0; ii < hh; ii++)
{
dst[0] = (dst[0] + (((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1])+32)>>6) + 1)>>1;
dst[1] = (dst[1] + (((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2])+32)>>6) + 1)>>1;
dst[2] = (dst[2] + (((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3])+32)>>6) + 1)>>1;
dst[3] = (dst[3] + (((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4])+32)>>6) + 1)>>1;
dst+= stride;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -