📄 dsputil.c
字号:
src+=srcStride;
}
}
STATIC_FUNC void put_h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride)
{
const int w=8;
uint8_t *cm = cropTbl + MAX_NEG_CROP;
int i;
for(i=0; i<w; i++)
{
const int srcB= src[-2*srcStride];
const int srcA= src[-1*srcStride];
const int src0= src[0 *srcStride];
const int src1= src[1 *srcStride];
const int src2= src[2 *srcStride];
const int src3= src[3 *srcStride];
const int src4= src[4 *srcStride];
const int src5= src[5 *srcStride];
const int src6= src[6 *srcStride];
const int src7= src[7 *srcStride];
const int src8= src[8 *srcStride];
const int src9= src[9 *srcStride];
const int src10=src[10*srcStride];
dst[0*dstStride] = cm[(((src0+src1)*20 - (srcA+src2)*5 + (srcB+src3))+16)>>5];
dst[1*dstStride] = cm[(((src1+src2)*20 - (src0+src3)*5 + (srcA+src4))+16)>>5];
dst[2*dstStride] = cm[(((src2+src3)*20 - (src1+src4)*5 + (src0+src5))+16)>>5];
dst[3*dstStride] = cm[(((src3+src4)*20 - (src2+src5)*5 + (src1+src6))+16)>>5];
dst[4*dstStride] = cm[(((src4+src5)*20 - (src3+src6)*5 + (src2+src7))+16)>>5];
dst[5*dstStride] = cm[(((src5+src6)*20 - (src4+src7)*5 + (src3+src8))+16)>>5];
dst[6*dstStride] = cm[(((src6+src7)*20 - (src5+src8)*5 + (src4+src9))+16)>>5];
dst[7*dstStride] = cm[(((src7+src8)*20 - (src6+src9)*5 + (src5+src10))+16)>>5];
dst++;
src++;
}
}
#ifdef _MIPS_LINUX_ /* [ */
void put_h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride)
{
const int hh = 8;
const int ww = 8;
uint8_t *cm = cropTbl + 1024;
int ii;
src -= 2 * srcStride;
for (ii = 0; ii < hh + 5; ii++)
{
const int srcB = src[ - 2];
const int srcA = src[ - 1];
const int src0 = src[0];
const int src1 = src[1];
const int src2 = src[2];
const int src3 = src[3];
const int src4 = src[4];
const int src5 = src[5];
const int src6 = src[6];
const int src7 = src[7];
const int src8 = src[8];
const int src9 = src[9];
const int src10 = src[10];
const int twenty = 20;
const int five = 5;
asm volatile ("mtlo %0": /* no outputs */ : "r"(srcB + src3));
asm volatile ("madd %0,%1"::"r"(src0 + src1), "r"(twenty));
asm volatile ("msub %0,%1"::"r"(srcA + src2), "r"(five));
asm volatile ("mflo %0":"=r"(tmp[0]));
//tmp[0] = (src0 + src1) * 20 - (srcA + src2) * 5 + (srcB + src3);
asm volatile ("mtlo %0": /* no outputs */ : "r"(srcA + src4));
asm volatile ("madd %0,%1"::"r"(src1 + src2), "r"(twenty));
asm volatile ("msub %0,%1"::"r"(src0 + src3), "r"(five));
asm volatile ("mflo %0":"=r"(tmp[1]));
//tmp[1] = (src1 + src2) * 20 - (src0 + src3) * 5 + (srcA + src4);
asm volatile ("mtlo %0": /* no outputs */ : "r"(src0 + src5));
asm volatile ("madd %0,%1"::"r"(src2 + src3), "r"(twenty));
asm volatile ("msub %0,%1"::"r"(src1 + src4), "r"(five));
asm volatile ("mflo %0":"=r"(tmp[2]));
//tmp[2] = (src2 + src3) * 20 - (src1 + src4) * 5 + (src0 + src5);
asm volatile ("mtlo %0": /* no outputs */ : "r"(src1 + src6));
asm volatile ("madd %0,%1"::"r"(src3 + src4), "r"(twenty));
asm volatile ("msub %0,%1"::"r"(src2 + src5), "r"(five));
asm volatile ("mflo %0":"=r"(tmp[3]));
//tmp[3] = (src3 + src4) * 20 - (src2 + src5) * 5 + (src1 + src6);
asm volatile ("mtlo %0": /* no outputs */ : "r"(src2 + src7));
asm volatile ("madd %0,%1"::"r"(src4 + src5), "r"(twenty));
asm volatile ("msub %0,%1"::"r"(src3 + src6), "r"(five));
asm volatile ("mflo %0":"=r"(tmp[4]));
//tmp[4] = (src4 + src5) * 20 - (src3 + src6) * 5 + (src2 + src7);
asm volatile ("mtlo %0": /* no outputs */ : "r"(src3 + src8));
asm volatile ("madd %0,%1"::"r"(src5 + src6), "r"(twenty));
asm volatile ("msub %0,%1"::"r"(src4 + src7), "r"(five));
asm volatile ("mflo %0":"=r"(tmp[5]));
//tmp[5] = (src5 + src6) * 20 - (src4 + src7) * 5 + (src3 + src8);
asm volatile ("mtlo %0": /* no outputs */ : "r"(src4 + src9));
asm volatile ("madd %0,%1"::"r"(src6 + src7), "r"(twenty));
asm volatile ("msub %0,%1"::"r"(src5 + src8), "r"(five));
asm volatile ("mflo %0":"=r"(tmp[6]));
//tmp[6] = (src6 + src7) * 20 - (src5 + src8) * 5 + (src4 + src9);
asm volatile ("mtlo %0": /* no outputs */ : "r"(src5 + src10));
asm volatile ("madd %0,%1"::"r"(src7 + src8), "r"(twenty));
asm volatile ("msub %0,%1"::"r"(src6 + src9), "r"(five));
asm volatile ("mflo %0":"=r"(tmp[7]));
//tmp[7] = (src7 + src8) * 20 - (src6 + src9) * 5 + (src5 + src10);
tmp += tmpStride;
src += srcStride;
}
tmp -= tmpStride * (hh + 5 - 2);
for (ii = 0; ii < ww; ii++)
{
const int tmpB = tmp[ - 2 *tmpStride];
const int tmpA = tmp[ - 1 *tmpStride];
const int tmp0 = tmp[0 *tmpStride];
const int tmp1 = tmp[1 *tmpStride];
const int tmp2 = tmp[2 *tmpStride];
const int tmp3 = tmp[3 *tmpStride];
const int tmp4 = tmp[4 *tmpStride];
const int tmp5 = tmp[5 *tmpStride];
const int tmp6 = tmp[6 *tmpStride];
const int tmp7 = tmp[7 *tmpStride];
const int tmp8 = tmp[8 *tmpStride];
const int tmp9 = tmp[9 *tmpStride];
const int tmp10 = tmp[10 *tmpStride];
const int twenty = 20;
const int five = 5;
int t1;
asm volatile ("mtlo %0": /* no outputs */ : "r"((tmpB + tmp3) + 512));
asm volatile ("madd %0,%1"::"r"(tmp0 + tmp1), "r"(twenty));
asm volatile ("msub %0,%1"::"r"(tmpA + tmp2), "r"(five));
asm volatile ("mflo %0":"=r"(t1));
dst[0 *dstStride] = cm[t1 >> 10];
// dst[0 * dstStride] = cm[(((tmp0 + tmp1)*20 - (tmpA + tmp2) * 5 + (tmpB + tmp3))+512)>>10];
asm volatile ("mtlo %0": /* no outputs */ : "r"((tmpA + tmp4) + 512));
asm volatile ("madd %0,%1"::"r"(tmp1 + tmp2), "r"(twenty));
asm volatile ("msub %0,%1"::"r"(tmp0 + tmp3), "r"(five));
asm volatile ("mflo %0":"=r"(t1));
dst[1 * dstStride] = cm[t1 >> 10];
// dst[1 * dstStride] = cm[(((tmp1 + tmp2) * 20 - (tmp0 + tmp3) * 5 + (tmpA + tmp4)) + 512) >> 10];
asm volatile ("mtlo %0": /* no outputs */ : "r"((tmp0 + tmp5) + 512));
asm volatile ("madd %0,%1"::"r"(tmp2 + tmp3), "r"(twenty));
asm volatile ("msub %0,%1"::"r"(tmp1 + tmp4), "r"(five));
asm volatile ("mflo %0":"=r"(t1));
dst[2 * dstStride] = cm[t1 >> 10];
// dst[2 * dstStride] = cm[(((tmp2 + tmp3) * 20 - (tmp1 + tmp4) * 5 + (tmp0 + tmp5)) + 512) >> 10];
asm volatile ("mtlo %0": /* no outputs */ : "r"((tmp1 + tmp6) + 512));
asm volatile ("madd %0,%1"::"r"(tmp3 + tmp4), "r"(twenty));
asm volatile ("msub %0,%1"::"r"(tmp2 + tmp5), "r"(five));
asm volatile ("mflo %0":"=r"(t1));
dst[3 * dstStride] = cm[t1 >> 10];
// dst[3 * dstStride] = cm[(((tmp3 + tmp4) * 20 - (tmp2 + tmp5) * 5 + (tmp1 + tmp6)) + 512) >> 10];
asm volatile ("mtlo %0": /* no outputs */ : "r"((tmp2 + tmp7) + 512));
asm volatile ("madd %0,%1"::"r"(tmp4 + tmp5), "r"(twenty));
asm volatile ("msub %0,%1"::"r"(tmp3 + tmp6), "r"(five));
asm volatile ("mflo %0":"=r"(t1));
dst[4 * dstStride] = cm[t1 >> 10];
// dst[4 * dstStride] = cm[(((tmp4 + tmp5) * 20 - (tmp3 + tmp6) * 5 + (tmp2 + tmp7)) + 512) >> 10];
asm volatile ("mtlo %0": /* no outputs */ : "r"((tmp3 + tmp8) + 512));
asm volatile ("madd %0,%1"::"r"(tmp5 + tmp6), "r"(twenty));
asm volatile ("msub %0,%1"::"r"(tmp4 + tmp7), "r"(five));
asm volatile ("mflo %0":"=r"(t1));
dst[5 *dstStride] = cm[t1 >> 10];
// dst[5 * dstStride] = cm[(((tmp5 + tmp6) * 20 - (tmp4 + tmp7) * 5 + (tmp3 + tmp8)) + 512) >> 10];
asm volatile ("mtlo %0": /* no outputs */ : "r"((tmp4 + tmp9) + 512));
asm volatile ("madd %0,%1"::"r"(tmp6 + tmp7), "r"(twenty));
asm volatile ("msub %0,%1"::"r"(tmp5 + tmp8), "r"(five));
asm volatile ("mflo %0":"=r"(t1));
dst[6 *dstStride] = cm[t1 >> 10];
// dst[6 * dstStride] = cm[(((tmp6 + tmp7) * 20 - (tmp5 + tmp8) * 5 + (tmp4 + tmp9)) + 512) >> 10];
asm volatile ("mtlo %0": /* no outputs */ : "r"((tmp5 + tmp10) + 512));
asm volatile ("madd %0,%1"::"r"(tmp7 + tmp8), "r"(twenty));
asm volatile ("msub %0,%1"::"r"(tmp6 + tmp9), "r"(five));
asm volatile ("mflo %0":"=r"(t1));
dst[7 *dstStride] = cm[t1 >> 10];
// dst[7 * dstStride] = cm[(((tmp7 + tmp8) * 20 - (tmp6 + tmp9) * 5 + (tmp5 + tmp10)) + 512) >> 10];
dst++;
tmp++;
}
}
#else /* ] [ */
void put_h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride)
{
const int hh = 8;
const int ww = 8;
uint8_t *cm = cropTbl + MAX_NEG_CROP;
int ii;
src -= 2*srcStride;
for (ii = 0; ii <hh + 5; ii++)
{
const srcB = src[-2];
const srcA = src[-1];
const src0 = src[0 ];
const src1 = src[1 ];
const src2 = src[2 ];
const src3 = src[3];
const src4 = src[4 ];
const src5 = src[5 ];
const src6 = src[6 ];
const src7 = src[7 ];
const src8 = src[8 ];
const src9 = src[9 ];
const src10 = src[10];
tmp[0]= (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3);
tmp[1]= (src1+src2)*20 - (src0+src3)*5 + (srcA+src4);
tmp[2]= (src2+src3)*20 - (src1+src4)*5 + (src0+src5);
tmp[3]= (src3+src4)*20 - (src2+src5)*5 + (src1+src6);
tmp[4]= (src4+src5)*20 - (src3+src6)*5 + (src2+src7);
tmp[5]= (src5+src6)*20 - (src4+src7)*5 + (src3+src8);
tmp[6]= (src6+src7)*20 - (src5+src8)*5 + (src4+src9);
tmp[7]= (src7+src8)*20 - (src6+src9)*5 + (src5+src10);
tmp += tmpStride;
src += srcStride;
}
tmp -= tmpStride * (hh + 5 - 2);
for (ii = 0; ii < ww; ii++)
{
const int tmpB= tmp[-2*tmpStride];
const int tmpA= tmp[-1*tmpStride];
const int tmp0= tmp[0 *tmpStride];
const int tmp1= tmp[1 *tmpStride];
const int tmp2= tmp[2 *tmpStride];
const int tmp3= tmp[3 *tmpStride];
const int tmp4= tmp[4 *tmpStride];
const int tmp5= tmp[5 *tmpStride];
const int tmp6= tmp[6 *tmpStride];
const int tmp7= tmp[7 *tmpStride];
const int tmp8= tmp[8 *tmpStride];
const int tmp9= tmp[9 *tmpStride];
const int tmp10=tmp[10*tmpStride];
dst[0*dstStride] = cm[(((tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3))+512)>>10];
dst[1*dstStride] = cm[(((tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4))+512)>>10];
dst[2*dstStride] = cm[(((tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5))+512)>>10];
dst[3*dstStride] = cm[(((tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6))+512)>>10];
dst[4*dstStride] = cm[(((tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7))+512)>>10];
dst[5*dstStride] = cm[(((tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8))+512)>>10];
dst[6*dstStride] = cm[(((tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9))+512)>>10];
dst[7*dstStride] = cm[(((tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10))+512)>>10];
dst++;
tmp++;
}
}
#endif /* ] */
#endif /* ] */
STATIC_FUNC void put_h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride)
{
put_h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);
put_h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);
src += 8*srcStride;
dst += 8*dstStride;
put_h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);
put_h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);
}
STATIC_FUNC void put_h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride)
{
put_h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);
put_h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);
src += 8*srcStride;
dst += 8*dstStride;
put_h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);
put_h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);
}
STATIC_FUNC void put_h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride)
{
put_h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);
put_h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);
src += 8*srcStride;
dst += 8*dstStride;
put_h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);
put_h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);
}
STATIC_FUNC void avg_h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride)
{
const int h=2;
uint8_t *cm = cropTbl + MAX_NEG_CROP;
int i;
for(i=0; i<h; i++)
{
const int srcB= src[-2];
const int srcA= src[-1];
const int src0= src[0 ];
const int src1= src[1];
const int src2= src[2 ];
const int src3= src[3 ];
const int src4= src[4 ];
dst[0] = (dst[0] + cm[(((src0+src1)*20 - (srcA+src2)*5 + (srcB+src3))+16)>>5]+1)>>1;
dst[1] = (dst[1] + cm[(((src1+src2)*20 - (src0+src3)*5 + (srcA+src4))+16)>>5]+1)>>1;
dst+=dstStride;
src+=srcStride;
}
}
STATIC_FUNC void avg_h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride)
{
const int w=2;
uint8_t *cm = cropTbl + MAX_NEG_CROP;
int i;
for(i=0; i<w; i++)
{
const int srcB= src[-2*srcStride];
const int srcA= src[-1*srcStride];
const int src0= src[0 *srcStride];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -