⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 idct_llm.cpp

📁 这是一组DCT和iDCT的代码
💻 CPP
📖 第 1 页 / 共 5 页
字号:
        psraw           mm6, 4                                  ;p2(34,6)
        paddw           mm3, [esi + 8*1]                ;p2(35,1)

        packuswb        mm4, mm6                                ;p2(34,7)
        psubw           mm5, [esi + 8*1]                ;p2(35,2)

        movq            mm0, mm1                                ;p2(36,0)
        paddw           mm3, const_0x0808               ;p2(35,3)
        
        paddw           mm5, const_0x0808               ;p2(35,4)
        punpcklbw       mm0, mm7                                ;p2(36,1)

        psraw           mm3, 4                                  ;p2(35,5)
        movq            mm2, mm4                                ;p2(37,0)

        psraw           mm5, 4                                  ;p2(35,6)
        movq            mm6, mm0                                ;p2(38,0)
        
        packuswb        mm3, mm5                                ;p2(35,7)
        mov                     ebx, [edi]                              ;p2(42,0)

        punpckhbw       mm7, mm1                                ;p2(36,2)
        mov                     ecx, [edi+4]                    ;p2(42,1)

        punpcklbw       mm2, mm3                                ;p2(37,1)       
        mov                     edx, [edi+8]                    ;p2(42,2)

        punpckhbw       mm3, mm4                                ;p2(37,2)       
        add                     ebx, output_col                 ;p2(42,3)

        punpcklwd       mm0, mm2                                ;p2(38,1)       
        movq            mm5, mm3                                ;p2(39,0)
        
        punpckhwd       mm6, mm2                                ;p2(38,2)       
        movq            mm1, mm0                                ;p2(40,0)

        punpcklwd       mm3, mm7                                ;p2(39,1)       
        add                     ecx, output_col                 ;p2(42,4)

        add                     edx, output_col                 ;p2(42,5)
        punpckldq       mm0, mm3                                ;p2(40,1)       

        punpckhdq       mm1, mm3                                ;p2(40,2)       
        movq            [ebx], mm0                              ;p2(43,0)       

        punpckhwd       mm5, mm7                                ;p2(39,2)       
        movq            [ecx], mm1                              ;p2(43,1)       

        movq            mm4, mm6                                ;p2(41,0)
        mov                     ebx, [edi+12]                   ;p2(43,3)

        punpckldq       mm4, mm5                                ;p2(41,1)       
        add                     ebx, output_col                 ;p2(43,4)

        punpckhdq       mm6, mm5                                ;p2(41,2)       
        movq            [edx], mm4                              ;p2(43,2)               

        movq            [ebx], mm6                              ;p2(43,5)

//************************************************************
//      Process next 4 rows

        add                     esi, 64
        add                     edi, 16

//transpose next 4 rows of wsptr

        movq            mm0, [esi+8*0]          ;tran(0)
        
        movq            mm1, mm0                        ;tran(1)
        movq            mm2, [esi+8*2]          ;tran(2)                

        punpcklwd       mm0, mm2                        ;tran(3)
        movq            mm3, [esi+8*4]          ;tran(5)

        punpckhwd       mm1, mm2                        ;tran(4)
        movq            mm5, [esi+8*6]          ;tran(7)

        movq            mm4, mm3                        ;tran(6)
        movq            mm6, mm0                        ;tran(10)

        punpcklwd       mm3, mm5                        ;tran(8)
        movq            mm7, mm1                        ;tran(11)

        punpckldq       mm0, mm3                        ;tran(12)

        punpckhwd       mm4, mm5                        ;tran(9)
        movq            [esi+8*0], mm0          ;tran(16)
        
        punpckhdq       mm6, mm3                        ;tran(13)
        movq            mm0, [esi+8*1]          ;tran(20)

        punpckldq       mm1, mm4                        ;tran(14)
        movq            [esi+8*2], mm6          ;tran(17)
        
        punpckhdq       mm7, mm4                        ;tran(15)
        movq            [esi+8*4], mm1          ;tran(18)

        movq            mm1, mm0                        ;tran(21)
        movq            mm3, [esi+8*5]          ;tran(25)

        movq            mm2, [esi+8*3]          ;tran(22)
        movq            mm4, mm3                        ;tran(26)

        punpcklwd       mm0, mm2                        ;tran(23)
        movq            [esi+8*6], mm7          ;tran(19)

        punpckhwd       mm1, mm2                        ;tran(24)
        movq            mm5, [esi+8*7]          ;tran(27)

        punpcklwd       mm3, mm5                        ;tran(28)
        movq            mm6, mm0                        ;tran(30)

        movq            mm7, mm1                        ;tran(31)
        punpckhdq       mm6, mm3                        ;tran(33)

        punpckhwd       mm4, mm5                        ;tran(29)
        movq            mm2, mm6                        ;p2(1,0)

        punpckhdq       mm7, mm4                        ;tran(35)
        movq            mm5, [esi + 8*2]        ;p2(1,2)

        paddw           mm2, mm7                        ;p2(1,1)
        paddw           mm5, [esi + 8*6]        ;p2(1,3)

        punpckldq       mm0, mm3                        ;tran(32)
        paddw           mm2, mm5                        ;p2(1,4)

        punpckldq       mm1, mm4                        ;tran(34)
        movq            mm5, [esi + 8*2]                ;p2(3,0)

        pmulhw          mm2, fix_117_117        ;p2(1,5)
        movq            mm4, mm7                                ;p2(2,0)

        pmulhw          mm4, fixn089n196p029    ;p2(2,1)
        movq            mm3, mm6                                        ;p2(6,0)

        pmulhw          mm3, fix_n256n039p205           ;p2(6,1)

        pmulhw          mm5, fix_n089                   ;p2(3,1)

        movq            [eax + 8*24], mm2       ;p2(1,6)

        movq            mm2, [esi + 8*6]        ;p2(4,0)

        pmulhw          mm2, fix_n196           ;p2(4,1)

        paddw           mm4, [eax + 8*24]                       ;p2(5,0)

        paddw           mm3, [eax + 8*24]               ;p2(9,0)

        paddw           mm5, mm2                                        ;p2(5,1)

        movq            mm2, [esi + 8*2]                ;p2(7,0)
        paddw           mm5, mm4                                        ;p2(5,2)
        pmulhw          mm2, fix_n039                   ;p2(7,1)

        movq            [esi + 8*1], mm5                        ;p2(5,3)

        movq            mm4, [esi + 8*6]        ;p2(8,0)
        movq            mm5, mm6                        ;p2(10,0)

        pmulhw          mm4, fix_n256           ;p2(8,1)

        pmulhw          mm5, fix_n039           ;p2(10,1)

        pmulhw          mm6, fix_n256                   ;p2(15,0)

        paddw           mm2, mm4                                ;p2(9,1)

        movq            mm4, mm7                        ;p2(11,0)
        
        pmulhw          mm4, fix_n089           ;p2(11,1)
        paddw           mm2, mm3                                ;p2(9,2)
        
        movq            [esi + 8*3], mm2                ;p2(9,3)

        movq            mm3, [esi + 8*2]                ;p2(13,0)

        pmulhw          mm7, fix_n196                   ;p2(16,0)

        pmulhw          mm3, fix_n089n039p150   ;p2(13,1)
        paddw           mm5, mm4                        ;p2(12,0)


        paddw           mm5, [eax + 8*24]               ;p2(14,0)

        movq            mm2, [esi + 8*6]                ;p2(18,0)

        pmulhw          mm2, fix_n196p307n256   ;p2(18,1)
        paddw           mm5, mm3                                ;p2(14,1)

        movq            [esi + 8*5], mm5                ;p2(14,2)
        paddw           mm6, mm7                                ;p2(17,0)

        paddw           mm6, [eax + 8*24]               ;p2(19,0)
        movq            mm3, mm1                                ;p2(21,0)

        movq            mm4, [esi + 8*4]                ;p2(20,0)
        paddw           mm6, mm2                                ;p2(19,1)

        movq            [esi + 8*7], mm6                ;p2(19,2)       
        movq            mm5, mm4                                ;p2(20,1)

        movq            mm7, [esi + 8*0]                ;p2(26,0)

        pmulhw          mm4, fix_054p076                ;p2(20,2)       
        psubw           mm7, mm0                                ;p2(27,0)

        pmulhw          mm3, fix_054                    ;p2(21,1)
        movq            mm2, mm0                                ;p2(26,1)

        pmulhw          mm5, fix_054                    ;p2(23,0)
        psraw           mm7, 3                                  ;p2(27,1)

        paddw           mm2, [esi + 8*0]                ;p2(26,2)
        movq            mm6, mm7                                ;p2(28,0)

        pmulhw          mm1, fix_054n184                ;p2(24,0)
        psraw           mm2, 3                                  ;p2(26,3)                       

        paddw           mm4, mm3                                ;p2(22,0)       
        paddw           mm5, mm1                                ;p2(25,0)

        psubw           mm6, mm5                                ;p2(29,0)       
        movq            mm3, mm2                                ;p2(30,0)

        paddw           mm2, mm4                                ;p2(30,1)               
        paddw           mm7, mm5                                ;p2(28,1)                       

        movq            mm1, mm2                                ;p2(32,0)
        psubw           mm3, mm4                                ;p2(31,0)               

        paddw           mm1, [esi + 8*5]                ;p2(32,1)
        movq            mm0, mm7                                ;p2(33,0)

        psubw           mm2, [esi + 8*5]                ;p2(32,2)
        movq            mm4, mm6                                ;p2(34,0)
        
        paddw           mm1, const_0x0808               ;p2(32,3)
        
        paddw           mm2, const_0x0808               ;p2(32,4)
        psraw           mm1, 4                                  ;p2(32,5)

        psraw           mm2, 4                                  ;p2(32,6)
        paddw           mm7, [esi + 8*7]                ;p2(33,1)

        packuswb        mm1, mm2                                ;p2(32,7)
        psubw           mm0, [esi + 8*7]                ;p2(33,2)

        paddw           mm7, const_0x0808               ;p2(33,3)

        paddw           mm0, const_0x0808               ;p2(33,4)
        psraw           mm7, 4                                  ;p2(33,5)
        
        psraw           mm0, 4                                  ;p2(33,6)
        paddw           mm4, [esi + 8*3]                ;p2(34,1)

        packuswb        mm7, mm0                                ;p2(33,7)
        psubw           mm6, [esi + 8*3]                ;p2(34,2)

        paddw           mm4, const_0x0808               ;p2(34,3)
        movq            mm5, mm3                                ;p2(35,0)
        
        paddw           mm6, const_0x0808               ;p2(34,4)
        psraw           mm4, 4                                  ;p2(34,5)
        
        psraw           mm6, 4                                  ;p2(34,6)
        paddw           mm3, [esi + 8*1]                ;p2(35,1)

        packuswb        mm4, mm6                                ;p2(34,7)
        psubw           mm5, [esi + 8*1]                ;p2(35,2)

        movq            mm0, mm1                                ;p2(36,0)
        paddw           mm3, const_0x0808               ;p2(35,3)
        
        paddw           mm5, const_0x0808               ;p2(35,4)
        punpcklbw       mm0, mm7                                ;p2(36,1)

        psraw           mm3, 4                                  ;p2(35,5)
        movq            mm2, mm4                                ;p2(37,0)

        psraw           mm5, 4                                  ;p2(35,6)
        movq            mm6, mm0                                ;p2(38,0)
        
        packuswb        mm3, mm5                                ;p2(35,7)
        mov                     ebx, [edi]                              ;p2(42,0)

        punpckhbw       mm7, mm1                                ;p2(36,2)
        mov                     ecx, [edi+4]                    ;p2(42,1)

        punpcklbw       mm2, mm3                                ;p2(37,1)       
        mov                     edx, [edi+8]                    ;p2(42,2)

        punpckhbw       mm3, mm4                                ;p2(37,2)       
        add                     ebx, output_col                 ;p2(42,3)

        punpckl

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -