📄 idct_llm.cpp
字号:
pmaddwd mm5, fix_205_n256n039 ;p1(18,17)
paddd mm4, [esi+8*25] ;p1(18,18)
paddd mm4, mm5 ;p1(18,19)
movq mm5, mm4 ;p1(18,20)
paddd mm4, [esi + 8*21] ;p1(18,21)
paddd mm4, const_0x2xx8 ;p1(18,22)
psrad mm4, 9 ;p1(18,23)
packssdw mm6, mm4 ;p1(18,24)
movq [esi + 8*4], mm6 ;p1(18,25)
movq mm4, [esi + 8*20] ;p1(18,26)
psubd mm4, mm7 ;p1(18,27)
paddd mm4, const_0x2xx8 ;p1(18,28)
movq mm7, mm0 ;p1(19,3)
psrad mm4, 9 ;p1(18,29)
movq mm6, [esi + 8*21] ;p1(18,30)
psubd mm6, mm5 ;p1(18,31)
punpcklwd mm7, mm0 ;p1(19,4)
paddd mm6, const_0x2xx8 ;p1(18,32)
psrad mm6, 9 ;p1(18,33)
pmaddwd mm7, fix_029_n089n196 ;p1(19,5)
packssdw mm4, mm6 ;p1(18,34)
movq [esi + 8*10], mm4 ;p1(18,35)
movq mm6, mm3 ;p1(19,0)
punpcklwd mm6, mm1 ;p1(19,1)
movq mm5, mm0 ;p1(19,15)
pmaddwd mm6, fix_n196_n089 ;p1(19,2)
punpckhwd mm5, mm0 ;p1(19,16)
paddd mm6, [esi+8*24] ;p1(19,6)
movq mm4, mm3 ;p1(19,12)
paddd mm6, mm7 ;p1(19,7)
punpckhwd mm4, mm1 ;p1(19,13)
movq mm7, mm6 ;p1(19,8)
pmaddwd mm4, fix_n196_n089 ;p1(19,14)
paddd mm6, [esi + 8*22] ;p1(19,9)
pmaddwd mm5, fix_029_n089n196 ;p1(19,17)
paddd mm6, const_0x2xx8 ;p1(19,10)
psrad mm6, 9 ;p1(19,11)
paddd mm4, [esi+8*25] ;p1(19,18)
paddd mm4, mm5 ;p1(19,19)
movq mm5, mm4 ;p1(19,20)
paddd mm4, [esi + 8*23] ;p1(19,21)
paddd mm4, const_0x2xx8 ;p1(19,22)
psrad mm4, 9 ;p1(19,23)
packssdw mm6, mm4 ;p1(19,24)
movq [esi + 8*6], mm6 ;p1(19,25)
movq mm4, [esi + 8*22] ;p1(19,26)
psubd mm4, mm7 ;p1(19,27)
movq mm6, [esi + 8*23] ;p1(19,30)
paddd mm4, const_0x2xx8 ;p1(19,28)
psubd mm6, mm5 ;p1(19,31)
psrad mm4, 9 ;p1(19,29)
paddd mm6, const_0x2xx8 ;p1(19,32)
psrad mm6, 9 ;p1(19,33)
packssdw mm4, mm6 ;p1(19,34)
movq [esi + 8*8], mm4 ;p1(19,35)
//************************************************************
mov esi, eax
mov edi, outptr
//transpose 4 rows of wsptr
movq mm0, [esi+8*0] ;tran(0)
movq mm1, mm0 ;tran(1)
movq mm2, [esi+8*2] ;tran(2)
punpcklwd mm0, mm2 ;tran(3)
movq mm3, [esi+8*4] ;tran(5)
punpckhwd mm1, mm2 ;tran(4)
movq mm5, [esi+8*6] ;tran(7)
movq mm4, mm3 ;tran(6)
movq mm6, mm0 ;tran(10)
punpcklwd mm3, mm5 ;tran(8)
movq mm7, mm1 ;tran(11)
punpckldq mm0, mm3 ;tran(12)
punpckhwd mm4, mm5 ;tran(9)
movq [esi+8*0], mm0 ;tran(16)
punpckhdq mm6, mm3 ;tran(13)
movq mm0, [esi+8*1] ;tran(20)
punpckldq mm1, mm4 ;tran(14)
movq [esi+8*2], mm6 ;tran(17)
punpckhdq mm7, mm4 ;tran(15)
movq [esi+8*4], mm1 ;tran(18)
movq mm1, mm0 ;tran(21)
movq mm3, [esi+8*5] ;tran(25)
movq mm2, [esi+8*3] ;tran(22)
movq mm4, mm3 ;tran(26)
punpcklwd mm0, mm2 ;tran(23)
movq [esi+8*6], mm7 ;tran(19)
punpckhwd mm1, mm2 ;tran(24)
movq mm5, [esi+8*7] ;tran(27)
punpcklwd mm3, mm5 ;tran(28)
movq mm6, mm0 ;tran(30)
movq mm7, mm1 ;tran(31)
punpckhdq mm6, mm3 ;tran(33)
punpckhwd mm4, mm5 ;tran(29)
movq mm2, mm6 ;p2(1,0)
punpckhdq mm7, mm4 ;tran(35)
movq mm5, [esi + 8*2] ;p2(1,2)
paddw mm2, mm7 ;p2(1,1)
paddw mm5, [esi + 8*6] ;p2(1,3)
punpckldq mm0, mm3 ;tran(32)
paddw mm2, mm5 ;p2(1,4)
punpckldq mm1, mm4 ;tran(34)
movq mm5, [esi + 8*2] ;p2(3,0)
pmulhw mm2, fix_117_117 ;p2(1,5)
movq mm4, mm7 ;p2(2,0)
pmulhw mm4, fixn089n196p029 ;p2(2,1)
movq mm3, mm6 ;p2(6,0)
pmulhw mm3, fix_n256n039p205 ;p2(6,1)
pmulhw mm5, fix_n089 ;p2(3,1)
movq [eax + 8*24], mm2 ;p2(1,6)
movq mm2, [esi + 8*6] ;p2(4,0)
pmulhw mm2, fix_n196 ;p2(4,1)
paddw mm4, [eax + 8*24] ;p2(5,0)
paddw mm3, [eax + 8*24] ;p2(9,0)
paddw mm5, mm2 ;p2(5,1)
movq mm2, [esi + 8*2] ;p2(7,0)
paddw mm5, mm4 ;p2(5,2)
pmulhw mm2, fix_n039 ;p2(7,1)
movq [esi + 8*1], mm5 ;p2(5,3)
movq mm4, [esi + 8*6] ;p2(8,0)
movq mm5, mm6 ;p2(10,0)
pmulhw mm4, fix_n256 ;p2(8,1)
pmulhw mm5, fix_n039 ;p2(10,1)
pmulhw mm6, fix_n256 ;p2(15,0)
paddw mm2, mm4 ;p2(9,1)
movq mm4, mm7 ;p2(11,0)
pmulhw mm4, fix_n089 ;p2(11,1)
paddw mm2, mm3 ;p2(9,2)
movq [esi + 8*3], mm2 ;p2(9,3)
movq mm3, [esi + 8*2] ;p2(13,0)
pmulhw mm7, fix_n196 ;p2(16,0)
pmulhw mm3, fix_n089n039p150 ;p2(13,1)
paddw mm5, mm4 ;p2(12,0)
paddw mm5, [eax + 8*24] ;p2(14,0)
movq mm2, [esi + 8*6] ;p2(18,0)
pmulhw mm2, fix_n196p307n256 ;p2(18,1)
paddw mm5, mm3 ;p2(14,1)
movq [esi + 8*5], mm5 ;p2(14,2)
paddw mm6, mm7 ;p2(17,0)
paddw mm6, [eax + 8*24] ;p2(19,0)
movq mm3, mm1 ;p2(21,0)
movq mm4, [esi + 8*4] ;p2(20,0)
paddw mm6, mm2 ;p2(19,1)
movq [esi + 8*7], mm6 ;p2(19,2)
movq mm5, mm4 ;p2(20,1)
movq mm7, [esi + 8*0] ;p2(26,0)
pmulhw mm4, fix_054p076 ;p2(20,2)
psubw mm7, mm0 ;p2(27,0)
pmulhw mm3, fix_054 ;p2(21,1)
movq mm2, mm0 ;p2(26,1)
pmulhw mm5, fix_054 ;p2(23,0)
psraw mm7, 3 ;p2(27,1)
paddw mm2, [esi + 8*0] ;p2(26,2)
movq mm6, mm7 ;p2(28,0)
pmulhw mm1, fix_054n184 ;p2(24,0)
psraw mm2, 3 ;p2(26,3)
paddw mm4, mm3 ;p2(22,0)
paddw mm5, mm1 ;p2(25,0)
psubw mm6, mm5 ;p2(29,0)
movq mm3, mm2 ;p2(30,0)
paddw mm2, mm4 ;p2(30,1)
paddw mm7, mm5 ;p2(28,1)
movq mm1, mm2 ;p2(32,0)
psubw mm3, mm4 ;p2(31,0)
paddw mm1, [esi + 8*5] ;p2(32,1)
movq mm0, mm7 ;p2(33,0)
psubw mm2, [esi + 8*5] ;p2(32,2)
movq mm4, mm6 ;p2(34,0)
paddw mm1, const_0x0808 ;p2(32,3)
paddw mm2, const_0x0808 ;p2(32,4)
psraw mm1, 4 ;p2(32,5)
psraw mm2, 4 ;p2(32,6)
paddw mm7, [esi + 8*7] ;p2(33,1)
packuswb mm1, mm2 ;p2(32,7)
psubw mm0, [esi + 8*7] ;p2(33,2)
paddw mm7, const_0x0808 ;p2(33,3)
paddw mm0, const_0x0808 ;p2(33,4)
psraw mm7, 4 ;p2(33,5)
psraw mm0, 4 ;p2(33,6)
paddw mm4, [esi + 8*3] ;p2(34,1)
packuswb mm7, mm0 ;p2(33,7)
psubw mm6, [esi + 8*3] ;p2(34,2)
paddw mm4, const_0x0808 ;p2(34,3)
movq mm5, mm3 ;p2(35,0)
paddw mm6, const_0x0808 ;p2(34,4)
psraw mm4, 4 ;p2(34,5)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -