📄 nic_postprocess.cpp
字号:
movq (mm1, mm5); /* mm1 = psum right 0 1w2 3 4 5r6 7 */
paddsw (mm1, 24+ebx); /* mm1 += vv[1] right 0 1 2 3 4 5 6 7 */
psllw (mm0, 1); /* mm0 <<= 1 0m1 2 3 4 5 6 7 */
psubsw (mm0, 64+ebx); /* mm0 -= vv[4] left 0m1 2 3 4 5 6 7 */
psllw (mm1, 1); /* mm1 <<= 1 0 1 2 3 4 5 6 7 */
psubsw (mm1, 72+ebx); /* mm1 -= vv[4] right 0 1m2 3 4 5 6 7 */
paddsw (mm0, 80+ebx); /* mm0 += vv[5] left 0m1 2 3 4 5 6 7 */
paddsw (mm1, 88+ebx); /* mm1 += vv[5] right 0 1m2 3 4 5 6 7 */
psrlw (mm0, 4); /* mm0 >>= 4 0m1 2 3 4 5 6 7 */
/* psum += vv[5] - p1 */
paddsw (mm4, 80+ebx); /* mm4 += vv[5] left 0 1 2 3 4m5 6 7 */
psrlw (mm1, 4); /* mm1 >>= 4 0 1m2 3 4 5 6 7 */
paddsw (mm5, 88+ebx); /* mm5 += vv[5] right 0 1 2 3 4 5 6 7 */
psubsw (mm4, eax); /* mm4 -= p1 left 0 1 2 3 4 5 6 7 */
packuswb (mm0, mm1); /* pack mm1, mm0 to mm0 0m1 2 3 4 5 6 7 */
psubsw (mm5, 8+eax); /* mm5 -= p1 right 0 1 2 3 4 5 6 7 */
/* v[2] = (((psum + vv[2]) << 1) - (vv[5] - vv[6])) >> 4 */
/* compute this in mm6 (left) and mm7 (right) */
__m64 mm6,mm7;
movq (mm6, mm4); /* mm6 = psum left 0 1 2 3 4 5 6 7 */
paddsw (mm6, 32+ebx); /* mm6 += vv[2] left 0 1 2 3 4 5 6 7 */
movq (mm7, mm5); /* mm7 = psum right 0 1 2 3 4 5 6 7 */
paddsw (mm7, 40+ebx); /* mm7 += vv[2] right 0 1 2 3 4 5 6 7 */
psllw (mm6, 1); /* mm6 <<= 1 0 1 2 3 4 5 6 7 */
psubsw (mm6, 80+ebx); /* mm6 -= vv[5] left 0 1 2 3 4 5 6 7 */
psllw (mm7, 1); /* mm7 <<= 1 0 1 2 3 4 5 6 7 */
psubsw (mm7, 88+ebx); /* mm7 -= vv[5] right 0 1 2 3 4 5 6 7 */
movq (ecx, mm0); /* v[1*stride] = mm0 0 1 2 3 4 5 6 7 */
paddsw (mm6, 96+ebx); /* mm6 += vv[6] left 0 1 2 3 4 5 6 7 */
ecx+=stride; /* ecx points at v[2*stride] 0 1 2 3 4 5 6 7 */
paddsw (mm7, 104+ebx); /* mm7 += vv[6] right 0 1 2 3 4 5 6 7 */
/* psum += vv[6] - p1 */
paddsw (mm4, 96+ebx); /* mm4 += vv[6] left 0 1 2 3 4 5 6 7 */
psrlw (mm6, 4); /* mm6 >>= 4 0 1 2 3 4 5 6 7 */
paddsw (mm5, 104+ebx); /* mm5 += vv[6] right 0 1 2 3 4 5 6 7 */
psrlw (mm7, 4); /* mm7 >>= 4 0 1 2 3 4 5 6 7 */
psubsw (mm4, eax); /* mm4 -= p1 left 0 1 2 3 4 5 6 7 */
packuswb (mm6, mm7); /* pack mm7, mm6 to mm6 0 1 2 3 4 5 6 7 */
psubsw (mm5, 8+eax); /* mm5 -= p1 right 0 1 2 3 4 5 6 7 */
/* v[3] = (((psum + vv[3]) << 1) - (vv[6] - vv[7])) >> 4 */
/* compute this in mm0 (left) and mm1 (right) */
movq (mm0, mm4); /* mm0 = psum left 0 1 2 3 4 5 6 7 */
paddsw (mm0, 48+ebx); /* mm0 += vv[3] left 0 1 2 3 4 5 6 7 */
movq (mm1, mm5); /* mm1 = psum right 0 1 2 3 4 5 6 7 */
paddsw (mm1, 56+ebx); /* mm1 += vv[3] right 0 1 2 3 4 5 6 7 */
psllw (mm0, 1); /* mm0 <<= 1 0 1 2 3 4 5 6 7 */
psubsw (mm0, 96+ebx); /* mm0 -= vv[6] left 0 1 2 3 4 5 6 7 */
psllw (mm1, 1); /* mm1 <<= 1 0 1 2 3 4 5 6 7 */
psubsw (mm1, 104+ebx); /* mm1 -= vv[6] right 0 1 2 3 4 5 6 7 */
movq (ecx, mm6); /* v[2*stride] = mm6 0 1 2 3 4 5 6 7 */
paddsw (mm0, 112+ebx); /* mm0 += vv[7] left 0 1 2 3 4 5 6 7 */
paddsw (mm1, 120+ebx); /* mm1 += vv[7] right 0 1 2 3 4 5 6 7 */
ecx+=stride; /* ecx points at v[3*stride] 0 1 2 3 4 5 6 7 */
/* psum += vv[7] - p1 */
paddsw (mm4, 112+ebx); /* mm4 += vv[5] left 0 1 2 3 4 5 6 7 */
psrlw (mm0, 4); /* mm0 >>= 4 0 1 2 3 4 5 6 7 */
paddsw (mm5, 120+ebx); /* mm5 += vv[5] right 0 1 2 3 4 5 6 7 */
psrlw (mm1, 4); /* mm1 >>= 4 0 1 2 3 4 5 6 7 */
psubsw (mm4, eax); /* mm4 -= p1 left 0 1 2 3 4 5 6 7 */
packuswb( mm0, mm1); /* pack mm1, mm0 to mm0 0 1 2 3 4 5 6 7 */
psubsw (mm5, 8+eax); /* mm5 -= p1 right 0 1 2 3 4 5 6 7 */
/* v[4] = (((psum + vv[4]) << 1) + p1 - vv[1] - (vv[7] - vv[8])) >> 4 */
/* compute this in mm6 (left) and mm7 (right) */
movq (ecx, mm0); /* v[3*stride] = mm0 0 1 2 3 4 5 6 7 */
movq (mm6, mm4); /* mm6 = psum left 0 1 2 3 4 5 6 7 */
paddsw (mm6, 64+ebx); /* mm6 += vv[4] left 0 1 2 3 4 5 6 7 */
movq (mm7, mm5); /* mm7 = psum right 0 1 2 3 4 5 6 7 */
paddsw (mm7, 72+ebx); /* mm7 += vv[4] right 0 1 2 3 4 5 6 7 */
psllw (mm6, 1); /* mm6 <<= 1 0 1 2 3 4 5 6 7 */
paddsw (mm6, eax); /* mm6 += p1 left 0 1 2 3 4 5 6 7 */
psllw (mm7, 1); /* mm7 <<= 1 0 1 2 3 4 5 6 7 */
paddsw (mm7, 8+eax); /* mm7 += p1 right 0 1 2 3 4 5 6 7 */
psubsw (mm6, 16+ebx); /* mm6 -= vv[1] left 0 1 2 3 4 5 6 7 */
psubsw (mm7, 24+ebx); /* mm7 -= vv[1] right 0 1 2 3 4 5 6 7 */
psubsw (mm6, 112+ebx); /* mm6 -= vv[7] left 0 1 2 3 4 5 6 7 */
psubsw (mm7, 120+ebx); /* mm7 -= vv[7] right 0 1 2 3 4 5 6 7 */
paddsw (mm6, 128+ebx); /* mm6 += vv[8] left 0 1 2 3 4 5 6 7 */
ecx+=stride; /* ecx points at v[4*stride] 0 1 2 3 4 5 6 7 */
paddsw (mm7, 136+ebx); /* mm7 += vv[8] right 0 1 2 3 4 5 6 7 */
/* psum += vv[8] - vv[1] */
paddsw (mm4, 128+ebx); /* mm4 += vv[5] left 0 1 2 3 4 5 6 7 */
psrlw (mm6, 4); /* mm6 >>= 4 0 1 2 3 4 5 6 7 */
paddsw (mm5, 136+ebx); /* mm5 += vv[5] right 0 1 2 3 4 5 6 7 */
psrlw (mm7, 4); /* mm7 >>= 4 0 1 2 3 4 5 6 7 */
psubsw (mm4, 16+ebx); /* mm4 -= vv[1] left 0 1 2 3 4 5 6 7 */
packuswb (mm6, mm7); /* pack mm7, mm6 to mm6 0 1 2 3 4 5 6 7 */
psubsw (mm5, 24+ebx); /* mm5 -= vv[1] right 0 1 2 3 4 5 6 7 */
/* v[5] = (((psum + vv[5]) << 1) + (vv[1] - vv[2]) - vv[8] + p2) >> 4 */
/* compute this in mm0 (left) and mm1 (right) */
movq (mm0, mm4); /* mm0 = psum left 0 1 2 3 4 5 6 7 */
paddsw (mm0, 80+ebx); /* mm0 += vv[5] left 0 1 2 3 4 5 6 7 */
movq (mm1, mm5); /* mm1 = psum right 0 1 2 3 4 5 6 7 */
paddsw (mm1, 88+ebx); /* mm1 += vv[5] right 0 1 2 3 4 5 6 7 */
psllw (mm0, 1); /* mm0 <<= 1 0 1 2 3 4 5 6 7 */
paddsw (mm0, 16+eax); /* mm0 += p2 left 0 1 2 3 4 5 6 7 */
psllw (mm1, 1); /* mm1 <<= 1 0 1 2 3 4 5 6 7 */
paddsw (mm1, 24+eax); /* mm1 += p2 right 0 1 2 3 4 5 6 7 */
paddsw (mm0, 16+ebx); /* mm0 += vv[1] left 0 1 2 3 4 5 6 7 */
movq (ecx, mm6); /* v[4*stride] = mm6 0 1 2 3 4 5 6 7 */
paddsw (mm1, 24+ebx); /* mm1 += vv[1] right 0 1 2 3 4 5 6 7 */
psubsw (mm0, 32+ebx); /* mm0 -= vv[2] left 0 1 2 3 4 5 6 7 */
psubsw (mm1, 40+ebx); /* mm1 -= vv[2] right 0 1 2 3 4 5 6 7 */
psubsw (mm0, 128+ebx); /* mm0 -= vv[8] left 0 1 2 3 4 5 6 7 */
psubsw (mm1, 136+ebx); /* mm1 -= vv[8] right 0 1 2 3 4 5 6 7 */
/* psum += p2 - vv[2] */
paddsw (mm4, 16+eax); /* mm4 += p2 left 0 1 2 3 4 5 6 7 */
ecx+=stride; /* ecx points at v[5*stride] 0 1 2 3 4 5 6 7 */
paddsw (mm5, 24+eax); /* mm5 += p2 right 0 1 2 3 4 5 6 7 */
psubsw (mm4, 32+ebx); /* mm4 -= vv[2] left 0 1 2 3 4 5 6 7 */
psubsw (mm5, 40+ebx); /* mm5 -= vv[2] right 0 1 2 3 4 5 6 7 */
/* v[6] = (((psum + vv[6]) << 1) + (vv[2] - vv[3])) >> 4 */
/* compute this in mm6 (left) and mm7 (right) */
movq (mm6, mm4); /* mm6 = psum left 0 1 2 3 4 5 6 7 */
paddsw (mm6, 96+ebx); /* mm6 += vv[6] left 0 1 2 3 4 5 6 7 */
movq (mm7, mm5); /* mm7 = psum right 0 1 2 3 4 5 6 7 */
paddsw (mm7, 104+ebx); /* mm7 += vv[6] right 0 1 2 3 4 5 6 7 */
psllw (mm6, 1); /* mm6 <<= 1 0 1 2 3 4 5 6 7 */
paddsw (mm6, 32+ebx); /* mm6 += vv[2] left 0 1 2 3 4 5 6 7 */
psllw (mm7, 1); /* mm7 <<= 1 0 1 2 3 4 5 6 7 */
paddsw (mm7, 40+ebx); /* mm7 += vv[2] right 0 1 2 3 4 5 6 7 */
psrlw (mm0, 4); /* mm0 >>= 4 0 1 2 3 4 5 6 7 */
psubsw (mm6, 48+ebx); /* mm6 -= vv[3] left 0 1 2 3 4 5 6 7 */
psrlw (mm1, 4); /* mm1 >>= 4 0 1 2 3 4 5 6 7 */
psubsw (mm7, 56+ebx); /* mm7 -= vv[3] right 0 1 2 3 4 5 6 7 */
packuswb (mm0, mm1); /* pack mm1, mm0 to mm0 0 1 2 3 4 5 6 7 */
movq (ecx, mm0); /* v[5*stride] = mm0 0 1 2 3 4 5 6 7 */
/* psum += p2 - vv[3] */
paddsw (mm4, 16+eax); /* mm4 += p2 left 0 1 2 3 4 5 6 7 */
psrlw (mm6, 4); /* mm6 >>= 4 0 1 2 3 4 5 6 7 */
paddsw (mm5, 24+eax); /* mm5 += p2 right 0 1 2 3 4 5 6 7 */
psrlw (mm7, 4); /* mm7 >>= 4 0 1 2 3 4 5 6 7 */
psubsw (mm4, 48+ebx); /* mm4 -= vv[3] left 0 1 2 3 4 5 6 7 */
ecx+=stride; /* ecx points at v[6*stride] 0 1 2 3 4 5 6 7 */
psubsw (mm5, 56+ebx); /* mm5 -= vv[3] right 0 1 2 3 4 5 6 7 */
/* v[7] = (((psum + vv[7]) << 1) + (vv[3] - vv[4])) >> 4 */
/* compute this in mm0 (left) and mm1 (right) */
movq (mm0, mm4); /* mm0 = psum left 0 1 2 3 4 5 6 7 */
paddsw (mm0, 112+ebx); /* mm0 += vv[7] left 0 1 2 3 4 5 6 7 */
movq (mm1, mm5); /* mm1 = psum right 0 1 2 3 4 5 6 7 */
paddsw (mm1, 120+ebx); /* mm1 += vv[7] right 0 1 2 3 4 5 6 7 */
psllw (mm0, 1); /* mm0 <<= 1 0 1 2 3 4 5 6 7 */
paddsw (mm0, 48+ebx); /* mm0 += vv[3] left 0 1 2 3 4 5 6 7 */
psllw (mm1, 1); /* mm1 <<= 1 0 1 2 3 4 5 6 7 */
paddsw (mm1, 56+ebx); /* mm1 += vv[3] right 0 1 2 3 4 5 6 7 */
packuswb( mm6, mm7); /* pack mm7, mm6 to mm6 0 1 2 3 4 5 6 7 */
psubsw (mm0, 64+ebx); /* mm0 -= vv[4] left 0 1 2 3 4 5 6 7 */
psubsw (mm1, 72+ebx); /* mm1 -= vv[4] right 0 1 2 3 4 5 6 7 */
psrlw (mm0, 4); /* mm0 >>= 4 0 1 2 3 4 5 6 7 */
movq (ecx, mm6); /* v[6*stride] = mm6 0 1 2 3 4 5 6 7 */
/* psum += p2 - vv[4] */
paddsw (mm4, 16+eax); /* mm4 += p2 left 0 1 2 3 4 5 6 7 */
paddsw (mm5, 24+eax); /* mm5 += p2 right 0 1 2 3 4 5 6 7 */
ecx+=stride; /* ecx points at v[7*stride] 0 1 2 3 4 5 6 7 */
psubsw (mm4, 64+ebx); /* mm4 -= vv[4] left 0 1 2 3 4 5 6 7 */
psrlw (mm1, 4); /* mm1 >>= 4 0 1 2 3 4 5 6 7 */
psubsw (mm5, 72+ebx); /* mm5 -= vv[4] right 0 1 2 3 4 5 6 7 */
/* v[8] = (((psum + vv[8]) << 1) + (vv[4] - vv[5])) >> 4 */
/* compute this in mm6 (left) and mm7 (right) */
movq (mm6, mm4); /* mm6 = psum left 0 1 2 3 4 5 6 7 */
paddsw (mm6, 128+ebx); /* mm6 += vv[8] left 0 1 2 3 4 5 6 7 */
movq (mm7, mm5); /* mm7 = psum right 0 1 2 3 4 5 6 7 */
paddsw (mm7, 136+ebx); /* mm7 += vv[8] right 0 1 2 3 4 5 6 7 */
psllw (mm6, 1); /* mm6 <<= 1 0 1 2 3 4 5 6 7 */
paddsw (mm6, 64+ebx); /* mm6 += v
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -