📄 refframe.c
字号:
*yBufLine++ = clip8Buf[tmp+256]; a = *reco++; *yBufLine++ = (u_int8) d; tmp = (b + 5*(-c + 4*(d + e) - f) + a + 16) >> 5; *yBufLine++ = clip8Buf[tmp+256]; b = *reco++; *yBufLine++ = (u_int8) e; tmp = (c + 5*(-d + 4*(e + f) - a) + b + 16) >> 5; *yBufLine++ = clip8Buf[tmp+256]; c = *reco++; *yBufLine++ = (u_int8) f; tmp = (d + 5*(-e + 4*(f + a) - b) + c + 16) >> 5; *yBufLine++ = clip8Buf[tmp+256]; d = *reco++; *yBufLine++ = (u_int8) a; tmp = (e + 5*(-f + 4*(a + b) - c) + d + 16) >> 5; *yBufLine++ = clip8Buf[tmp+256]; e = *reco++; *yBufLine++ = (u_int8) b; tmp = (f + 5*(-a + 4*(b + c) - d) + e + 16) >> 5; *yBufLine++ = clip8Buf[tmp+256]; } // Interpolate remaining (2 + picWidth - 3)%6 values for (j = j + 6; j > 0; j--) { f = *reco++; *yBufLine++ = (u_int8) c; tmp = (a + 5*(-b + 4*(c + d) - e) + f + 16) >> 5; *yBufLine++ = clip8Buf[tmp+256]; a = b; b = c; c = d; d = e; e = f; } // Interpolate 4 last values that are interpolated // across the picture border f = e; for (j = 4; j > 0; j--) { *yBufLine++ = (u_int8) c; tmp = (a + 5*(-b + 4*(c + d) - e) + f + 16) >> 5; *yBufLine++ = clip8Buf[tmp+256]; a = b; b = c; c = d; d = e; } // right extension, these pixels are indentical for (j = xExt-1; j > 0; j--) { // right boundary *yBufLine++ = (u_int8) e; *yBufLine++ = (u_int8) e; } yBufLine += extBufWidth; } // perform vertical boundary extension // the first line in the active area yBufLine = yBuf + 2 * yExt * extBufWidth; for (i = 0; i < (yExt * 2); i ++) { memcpy(yBuf + i * extBufWidth, yBufLine, extBufWidth * sizeof(u_int8)); } // the last line in the active area yBufLine += (picHeight * 2 - 2) * extBufWidth; for (i = 2 * (yExt + picHeight); i < (2 * (2 * yExt + picHeight)); i ++) { memcpy(yBuf + i * extBufWidth, yBufLine, extBufWidth * sizeof(u_int8)); }}#else// this is a SIMD version of the horizontal filtering functionvoid refFrmUpsampleLumaH(u_int8 *reco, u_int8 *yBuf, int picWidth, int picHeight, int xExt, int yExt){ int i, j; int32 tmp; u_int8 *yBufLine; int extBufWidth; // pRefFrm->yBufWidth/height has extended boundary extBufWidth = 2*(picWidth + 2*xExt); yBufLine = yBuf + 2*yExt*extBufWidth; // upsample by 2 and filter horizontally for (i = 0; i < picHeight; i ++) { int32 a, b, c, d, e, f; a = *reco++; // left pixel // left extension, these pixels are indentical for (j = xExt-2; j > 0; j--) { // left boundary *yBufLine++ = (u_int8) a; *yBufLine++ = (u_int8) a; } // Horizontal filtering to generate odd locations // Interpolate 6 values per iteration. We'll use variable // a,b,c,d,e and f in rotating manner to avoid unnecessary mem loads // this is doing filtering in SIMD style, two 16 bits values in one int32 a = (a << 16) | a; b = c = d = a; j = (2 + picWidth - 3) - 6; do { e = *reco++; f = *reco++; f = (e << 16) | f; e = (d << 16) | e; tmp = a + 5*(-b + 4*(c + d) - e) + f + 0x20102010; *yBufLine++ = (u_int8) b; *yBufLine++ = clip8Buf[tmp>>(16+5)]; *yBufLine++ = (u_int8) c; *yBufLine++ = clip8Buf[(tmp >> 5) & 0x000007ff]; a = *reco++; b = *reco++; b = (a << 16) | b; a = (f << 16) | a; tmp = c + 5*(-d + 4*(e + f) - a) + b + 0x20102010; *yBufLine++ = (u_int8) d; *yBufLine++ = clip8Buf[tmp>>(16+5)]; *yBufLine++ = (u_int8) e; *yBufLine++ = clip8Buf[(tmp >> 5) & 0x000007ff]; c = *reco++; d = *reco++; d = (c << 16) | d; c = (b << 16) | c; tmp = e + 5*(-f + 4*(a + b) - c) + d + 0x20102010; *yBufLine++ = (u_int8) f; *yBufLine++ = clip8Buf[tmp>>(16+5)]; *yBufLine++ = (u_int8) a; *yBufLine++ = clip8Buf[(tmp >> 5) & 0x000007ff]; j -= 6; } while (j >= 0); // Interpolate remaining (2 + picWidth - 3)%6 values for (j = j + 6; j > 0; j--) { e = *reco++; tmp = f + 5*(-a + 4*(b + c) - d) + e + 0x20102010; *yBufLine++ = (u_int8) b; *yBufLine++ = clip8Buf[(tmp >> 5) & 0x000007ff]; f = a; a = b; b = c; c = d; d = e; } // Interpolate 4 last values that are interpolated // across the picture border e = d; for (j = 4; j > 0; j--) { tmp = f + 5*(-a + 4*(b + c) - d) + e + 0x20102010; *yBufLine++ = (u_int8) b; *yBufLine++ = clip8Buf[(tmp >> 5) & 0x000007ff]; f = a; a = b; b = c; c = d; } // right extension, these pixels are identical for (j = xExt-1; j > 0; j--) { // right boundary *yBufLine++ = (u_int8) e; *yBufLine++ = (u_int8) e; } yBufLine += extBufWidth; } // perform vertical boundary extension // the first line in the active area yBufLine = yBuf + 2 * yExt * extBufWidth; for (i = 0; i < (yExt * 2); i ++) { memcpy(yBuf + i * extBufWidth, yBufLine, extBufWidth * sizeof(u_int8)); } // the last line in the active area yBufLine += (picHeight * 2 - 2) * extBufWidth; for (i = 2 * (yExt + picHeight); i < (2 * (2 * yExt + picHeight)); i ++) { memcpy(yBuf + i * extBufWidth, yBufLine, extBufWidth * sizeof(u_int8)); }}#endif/* * * refFrmUpsampleLumaV * * Parameters: * yBuf Luma reference pixels * picWidth Picture width in pixels * picHeight Picture height in pixels * xExt Reference frame x-extension in pixels * yExt Reference frame y-extension in pixels * * Function: * * Vertical upsampling and filtering of luma. * * Returns: * - */void refFrmUpsampleLumaV(u_int8 *yBuf, int picWidth, int picHeight, int xExt, int yExt){ int extBufWidth; u_int8 *lnPtr, *tmpLnPtr; int i, j, i2; int32 a, b, c, d, e, f; int32 tmp; // yBuf has extended boundary extBufWidth = 2*(picWidth + 2*xExt); // first line affected by vertical filtering lnPtr = yBuf + (2*yExt - 3) * extBufWidth; // interpolate 4 lines at a time for (i = picHeight + 4; i > 0; i -= 4) { tmpLnPtr = lnPtr + 2*xExt - 5*extBufWidth;#if 1 for (j = picWidth; j > 0; j--) { a = *tmpLnPtr, tmpLnPtr += 2*extBufWidth; b = *tmpLnPtr, tmpLnPtr += 2*extBufWidth; c = *tmpLnPtr, tmpLnPtr += 2*extBufWidth; d = *tmpLnPtr, tmpLnPtr += 2*extBufWidth; e = *tmpLnPtr, tmpLnPtr += 2*extBufWidth; f = *tmpLnPtr, tmpLnPtr += 2*extBufWidth; // 0x2010 is combination of rounding constant (16) and clip table offset (256)<<5 *((int16 *)(tmpLnPtr-7*extBufWidth)) = (int16) (a + 5*(-b + 4*(c + d) - e) + f + 0x2010); a = *tmpLnPtr, tmpLnPtr += 2*extBufWidth; *((int16 *)(tmpLnPtr-7*extBufWidth)) = (int16) (b + 5*(-c + 4*(d + e) - f) + a + 0x2010); b = *tmpLnPtr, tmpLnPtr += 2*extBufWidth; *((int16 *)(tmpLnPtr-7*extBufWidth)) = (int16) (c + 5*(-d + 4*(e + f) - a) + b + 0x2010); c = *tmpLnPtr; *((int16 *)(tmpLnPtr-5*extBufWidth)) = (int16) (d + 5*(-e + 4*(f + a) - b) + c + 0x2010); tmpLnPtr += -16*extBufWidth + 2; }#else // this is a SIMD version of vertical filtering. // works on little endian CPUs only for (j = picWidth; j > 0; j -= 2) { a = *((int32 *)tmpLnPtr) & 0x00ff00ff, tmpLnPtr += 2*extBufWidth; b = *((int32 *)tmpLnPtr) & 0x00ff00ff, tmpLnPtr += 2*extBufWidth; c = *((int32 *)tmpLnPtr) & 0x00ff00ff, tmpLnPtr += 2*extBufWidth; d = *((int32 *)tmpLnPtr) & 0x00ff00ff, tmpLnPtr += 2*extBufWidth; e = *((int32 *)tmpLnPtr) & 0x00ff00ff, tmpLnPtr += 2*extBufWidth; f = *((int32 *)tmpLnPtr) & 0x00ff00ff, tmpLnPtr += 2*extBufWidth; // 0x2010 is combination of rounding constant (16) and clip table offset (256<<5) *((int32 *)(tmpLnPtr-7*extBufWidth)) = (a + 5*(-b + 4*(c + d) - e) + f + 0x20102010); a = *((int32 *)tmpLnPtr) & 0x00ff00ff, tmpLnPtr += 2*extBufWidth; *((int32 *)(tmpLnPtr-7*extBufWidth)) = (b + 5*(-c + 4*(d + e) - f) + a + 0x20102010); b = *((int32 *)tmpLnPtr) & 0x00ff00ff, tmpLnPtr += 2*extBufWidth; *((int32 *)(tmpLnPtr-7*extBufWidth)) = (c + 5*(-d + 4*(e + f) - a) + b + 0x20102010); c = *((int32 *)tmpLnPtr) & 0x00ff00ff; *((int32 *)(tmpLnPtr-5*extBufWidth)) = (d + 5*(-e + 4*(f + a) - b) + c + 0x20102010); tmpLnPtr += -16*extBufWidth + 4; }#endif // horizontal filtering of 4 lines for (i2 = 4; i2 > 0; i2--) { a = *((int16 *) (lnPtr + 2*xExt)); // left pixel // left extension, these pixels are indentical tmp = clip8Buf[a>>5]; for (j = xExt-2; j > 0; j--) { // left boundary *lnPtr++ = (u_int8) tmp; *lnPtr++ = (u_int8) tmp; } // Horizontal filtering to generate odd locations // Interpolate 6 values per iteration. We'll use variable // a,b,c,d,e and f in rotating manner to avoid unnecessary mem loads. // rounding constant and clip table offset were added during vertical filtering b = c = d = e = a; for (j = (2 + picWidth - 3) - 6; j >= 0; j -= 6) { f = *((int16 *) (lnPtr + 6)); *lnPtr++ = clip8Buf[c>>5]; tmp = (a + 5*(-b + 4*(c + d) - e) + f) >> 10; *lnPtr++ = clip8Buf[tmp]; a = *((int16 *) (lnPtr + 6)); *lnPtr++ = clip8Buf[d>>5]; tmp = (b + 5*(-c + 4*(d + e) - f) + a) >> 10; *lnPtr++ = clip8Buf[tmp]; b = *((int16 *) (lnPtr + 6)); *lnPtr++ = clip8Buf[e>>5]; tmp = (c + 5*(-d + 4*(e + f) - a) + b) >> 10; *lnPtr++ = clip8Buf[tmp]; c = *((int16 *) (lnPtr + 6)); *lnPtr++ = clip8Buf[f>>5]; tmp = (d + 5*(-e + 4*(f + a) - b) + c) >> 10; *lnPtr++ = clip8Buf[tmp]; d = *((int16 *) (lnPtr + 6)); *lnPtr++ = clip8Buf[a>>5]; tmp = (e + 5*(-f + 4*(a + b) - c) + d) >> 10;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -