📄 idct_llm.cpp
字号:
psraw mm6, 4 ;p2(34,6)
paddw mm3, [esi + 8*1] ;p2(35,1)
packuswb mm4, mm6 ;p2(34,7)
psubw mm5, [esi + 8*1] ;p2(35,2)
movq mm0, mm1 ;p2(36,0)
paddw mm3, const_0x0808 ;p2(35,3)
paddw mm5, const_0x0808 ;p2(35,4)
punpcklbw mm0, mm7 ;p2(36,1)
psraw mm3, 4 ;p2(35,5)
movq mm2, mm4 ;p2(37,0)
psraw mm5, 4 ;p2(35,6)
movq mm6, mm0 ;p2(38,0)
packuswb mm3, mm5 ;p2(35,7)
mov ebx, [edi] ;p2(42,0)
punpckhbw mm7, mm1 ;p2(36,2)
mov ecx, [edi+4] ;p2(42,1)
punpcklbw mm2, mm3 ;p2(37,1)
mov edx, [edi+8] ;p2(42,2)
punpckhbw mm3, mm4 ;p2(37,2)
add ebx, output_col ;p2(42,3)
punpcklwd mm0, mm2 ;p2(38,1)
movq mm5, mm3 ;p2(39,0)
punpckhwd mm6, mm2 ;p2(38,2)
movq mm1, mm0 ;p2(40,0)
punpcklwd mm3, mm7 ;p2(39,1)
add ecx, output_col ;p2(42,4)
add edx, output_col ;p2(42,5)
punpckldq mm0, mm3 ;p2(40,1)
punpckhdq mm1, mm3 ;p2(40,2)
movq [ebx], mm0 ;p2(43,0)
punpckhwd mm5, mm7 ;p2(39,2)
movq [ecx], mm1 ;p2(43,1)
movq mm4, mm6 ;p2(41,0)
mov ebx, [edi+12] ;p2(43,3)
punpckldq mm4, mm5 ;p2(41,1)
add ebx, output_col ;p2(43,4)
punpckhdq mm6, mm5 ;p2(41,2)
movq [edx], mm4 ;p2(43,2)
movq [ebx], mm6 ;p2(43,5)
//************************************************************
// Process next 4 rows
add esi, 64
add edi, 16
//transpose next 4 rows of wsptr
movq mm0, [esi+8*0] ;tran(0)
movq mm1, mm0 ;tran(1)
movq mm2, [esi+8*2] ;tran(2)
punpcklwd mm0, mm2 ;tran(3)
movq mm3, [esi+8*4] ;tran(5)
punpckhwd mm1, mm2 ;tran(4)
movq mm5, [esi+8*6] ;tran(7)
movq mm4, mm3 ;tran(6)
movq mm6, mm0 ;tran(10)
punpcklwd mm3, mm5 ;tran(8)
movq mm7, mm1 ;tran(11)
punpckldq mm0, mm3 ;tran(12)
punpckhwd mm4, mm5 ;tran(9)
movq [esi+8*0], mm0 ;tran(16)
punpckhdq mm6, mm3 ;tran(13)
movq mm0, [esi+8*1] ;tran(20)
punpckldq mm1, mm4 ;tran(14)
movq [esi+8*2], mm6 ;tran(17)
punpckhdq mm7, mm4 ;tran(15)
movq [esi+8*4], mm1 ;tran(18)
movq mm1, mm0 ;tran(21)
movq mm3, [esi+8*5] ;tran(25)
movq mm2, [esi+8*3] ;tran(22)
movq mm4, mm3 ;tran(26)
punpcklwd mm0, mm2 ;tran(23)
movq [esi+8*6], mm7 ;tran(19)
punpckhwd mm1, mm2 ;tran(24)
movq mm5, [esi+8*7] ;tran(27)
punpcklwd mm3, mm5 ;tran(28)
movq mm6, mm0 ;tran(30)
movq mm7, mm1 ;tran(31)
punpckhdq mm6, mm3 ;tran(33)
punpckhwd mm4, mm5 ;tran(29)
movq mm2, mm6 ;p2(1,0)
punpckhdq mm7, mm4 ;tran(35)
movq mm5, [esi + 8*2] ;p2(1,2)
paddw mm2, mm7 ;p2(1,1)
paddw mm5, [esi + 8*6] ;p2(1,3)
punpckldq mm0, mm3 ;tran(32)
paddw mm2, mm5 ;p2(1,4)
punpckldq mm1, mm4 ;tran(34)
movq mm5, [esi + 8*2] ;p2(3,0)
pmulhw mm2, fix_117_117 ;p2(1,5)
movq mm4, mm7 ;p2(2,0)
pmulhw mm4, fixn089n196p029 ;p2(2,1)
movq mm3, mm6 ;p2(6,0)
pmulhw mm3, fix_n256n039p205 ;p2(6,1)
pmulhw mm5, fix_n089 ;p2(3,1)
movq [eax + 8*24], mm2 ;p2(1,6)
movq mm2, [esi + 8*6] ;p2(4,0)
pmulhw mm2, fix_n196 ;p2(4,1)
paddw mm4, [eax + 8*24] ;p2(5,0)
paddw mm3, [eax + 8*24] ;p2(9,0)
paddw mm5, mm2 ;p2(5,1)
movq mm2, [esi + 8*2] ;p2(7,0)
paddw mm5, mm4 ;p2(5,2)
pmulhw mm2, fix_n039 ;p2(7,1)
movq [esi + 8*1], mm5 ;p2(5,3)
movq mm4, [esi + 8*6] ;p2(8,0)
movq mm5, mm6 ;p2(10,0)
pmulhw mm4, fix_n256 ;p2(8,1)
pmulhw mm5, fix_n039 ;p2(10,1)
pmulhw mm6, fix_n256 ;p2(15,0)
paddw mm2, mm4 ;p2(9,1)
movq mm4, mm7 ;p2(11,0)
pmulhw mm4, fix_n089 ;p2(11,1)
paddw mm2, mm3 ;p2(9,2)
movq [esi + 8*3], mm2 ;p2(9,3)
movq mm3, [esi + 8*2] ;p2(13,0)
pmulhw mm7, fix_n196 ;p2(16,0)
pmulhw mm3, fix_n089n039p150 ;p2(13,1)
paddw mm5, mm4 ;p2(12,0)
paddw mm5, [eax + 8*24] ;p2(14,0)
movq mm2, [esi + 8*6] ;p2(18,0)
pmulhw mm2, fix_n196p307n256 ;p2(18,1)
paddw mm5, mm3 ;p2(14,1)
movq [esi + 8*5], mm5 ;p2(14,2)
paddw mm6, mm7 ;p2(17,0)
paddw mm6, [eax + 8*24] ;p2(19,0)
movq mm3, mm1 ;p2(21,0)
movq mm4, [esi + 8*4] ;p2(20,0)
paddw mm6, mm2 ;p2(19,1)
movq [esi + 8*7], mm6 ;p2(19,2)
movq mm5, mm4 ;p2(20,1)
movq mm7, [esi + 8*0] ;p2(26,0)
pmulhw mm4, fix_054p076 ;p2(20,2)
psubw mm7, mm0 ;p2(27,0)
pmulhw mm3, fix_054 ;p2(21,1)
movq mm2, mm0 ;p2(26,1)
pmulhw mm5, fix_054 ;p2(23,0)
psraw mm7, 3 ;p2(27,1)
paddw mm2, [esi + 8*0] ;p2(26,2)
movq mm6, mm7 ;p2(28,0)
pmulhw mm1, fix_054n184 ;p2(24,0)
psraw mm2, 3 ;p2(26,3)
paddw mm4, mm3 ;p2(22,0)
paddw mm5, mm1 ;p2(25,0)
psubw mm6, mm5 ;p2(29,0)
movq mm3, mm2 ;p2(30,0)
paddw mm2, mm4 ;p2(30,1)
paddw mm7, mm5 ;p2(28,1)
movq mm1, mm2 ;p2(32,0)
psubw mm3, mm4 ;p2(31,0)
paddw mm1, [esi + 8*5] ;p2(32,1)
movq mm0, mm7 ;p2(33,0)
psubw mm2, [esi + 8*5] ;p2(32,2)
movq mm4, mm6 ;p2(34,0)
paddw mm1, const_0x0808 ;p2(32,3)
paddw mm2, const_0x0808 ;p2(32,4)
psraw mm1, 4 ;p2(32,5)
psraw mm2, 4 ;p2(32,6)
paddw mm7, [esi + 8*7] ;p2(33,1)
packuswb mm1, mm2 ;p2(32,7)
psubw mm0, [esi + 8*7] ;p2(33,2)
paddw mm7, const_0x0808 ;p2(33,3)
paddw mm0, const_0x0808 ;p2(33,4)
psraw mm7, 4 ;p2(33,5)
psraw mm0, 4 ;p2(33,6)
paddw mm4, [esi + 8*3] ;p2(34,1)
packuswb mm7, mm0 ;p2(33,7)
psubw mm6, [esi + 8*3] ;p2(34,2)
paddw mm4, const_0x0808 ;p2(34,3)
movq mm5, mm3 ;p2(35,0)
paddw mm6, const_0x0808 ;p2(34,4)
psraw mm4, 4 ;p2(34,5)
psraw mm6, 4 ;p2(34,6)
paddw mm3, [esi + 8*1] ;p2(35,1)
packuswb mm4, mm6 ;p2(34,7)
psubw mm5, [esi + 8*1] ;p2(35,2)
movq mm0, mm1 ;p2(36,0)
paddw mm3, const_0x0808 ;p2(35,3)
paddw mm5, const_0x0808 ;p2(35,4)
punpcklbw mm0, mm7 ;p2(36,1)
psraw mm3, 4 ;p2(35,5)
movq mm2, mm4 ;p2(37,0)
psraw mm5, 4 ;p2(35,6)
movq mm6, mm0 ;p2(38,0)
packuswb mm3, mm5 ;p2(35,7)
mov ebx, [edi] ;p2(42,0)
punpckhbw mm7, mm1 ;p2(36,2)
mov ecx, [edi+4] ;p2(42,1)
punpcklbw mm2, mm3 ;p2(37,1)
mov edx, [edi+8] ;p2(42,2)
punpckhbw mm3, mm4 ;p2(37,2)
add ebx, output_col ;p2(42,3)
punpckl
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -