⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 idct_ap922_mmx.asm

📁 由bmp生成mpeg2 的I_frame 数据
💻 ASM
📖 第 1 页 / 共 3 页
字号:
                pmaddwd    mm7, mm3       ; x7*w31+x3*x29_x7*w27+x3*w25
                pmaddwd    mm0, [eax]     ; x4*w06+x0*w04_x4*w02+x0*w00
                pmaddwd    mm1, [eax+8]   ; x6*w07+x0*w05_x6*w03+x0*w01
                pmaddwd    mm2, [eax+32]  ; x5*w22+x1*w20_x5*w18+x1*w16
                pmaddwd    mm3, [eax+40]  ; x7*w23+x3*w21_x7*w19+x3*w17
                paddd      mm4, mm5       ; a3_a2
                paddd      mm6, mm7       ; b3_b2
                paddd      mm0, mm1       ; a1_a0
                paddd      mm2, mm3       ; b1_b0
                paddd      mm4, half11
                paddd      mm0, half11
                movq       mm5, mm6
                movq       mm1, mm2
                paddd      mm2, mm0       ; a1+b1_a0+b0
                paddd      mm6, mm4       ; a3+b3_a2+b2
                psubd      mm0, mm1       ; a1-b1_a0-b0
                psubd      mm4, mm5       ; a3-b3_a2-b2
                psrad      mm2, 11        ; y1_y0
                psrad      mm6, 11        ; y3_y2
                psrad      mm0, 11        ; y6_y7
                psrad      mm4, 11        ; y4_y5
                packssdw   mm2, mm6       ; y3_y2_y1_y0
                packssdw   mm4, mm0       ; y6_y7_y4_y5
                movq       [esi+16], mm2  ;
                movq       mm7, mm4       ; 
                psrld      mm4, 16        ; 00_y6_00_y4
                pslld      mm7, 16        ; y7_00_y5_00
                por        mm4, mm7       ; y7_y6_y5_y4
                movq       [esi+24], mm4  ;
            ; row 7
                movd       mm0, [esi+112] ; 00_00_x1_x0
                movd       mm1, [esi+116] ; 00_00_x3_x2
                movd       mm2, [esi+120] ; 00_00_x5_x4
                movd       mm3, [esi+124] ; 00_00_x7_x4
                movq       mm4, [eax+16]  ; w14_w12_w10_w08
                movq       mm5, [eax+24]  ; w15_w13_w11_w09
                movq       mm6, [eax+48]  ; w30_w28_w26_w24
                movq       mm7, [eax+56]  ; w31_w29_w27_w25
                punpcklwd  mm0, mm2       ; x5_x1_x4_x0
                punpcklwd  mm1, mm3       ; x7_x3_x6_x2
                movq       mm2, mm0
                movq       mm3, mm1
                punpckldq  mm0, mm0       ; x4_x0_x4_x0
                punpckhdq  mm2, mm2       ; x5_x1_x5_x1
                punpckldq  mm1, mm1       ; x6_x2_x6_x2
                punpckhdq  mm3, mm3       ; x7_x3_x7_x3
                pmaddwd    mm4, mm0       ; x4*w14+x0*w12_x4*w10+x0*w08
                pmaddwd    mm5, mm1       ; x6*w15+x2*w13_x6*w11+x2*w09
                pmaddwd    mm6, mm2       ; x5*w30+x1*w28_x5*w26+x1*w24
                pmaddwd    mm7, mm3       ; x7*w31+x3*x29_x7*w27+x3*w25
                pmaddwd    mm0, [eax]     ; x4*w06+x0*w04_x4*w02+x0*w00
                pmaddwd    mm1, [eax+8]   ; x6*w07+x0*w05_x6*w03+x0*w01
                pmaddwd    mm2, [eax+32]  ; x5*w22+x1*w20_x5*w18+x1*w16
                pmaddwd    mm3, [eax+40]  ; x7*w23+x3*w21_x7*w19+x3*w17
                paddd      mm4, mm5       ; a3_a2
                paddd      mm6, mm7       ; b3_b2
                paddd      mm0, mm1       ; a1_a0
                paddd      mm2, mm3       ; b1_b0
                paddd      mm4, half11
                paddd      mm0, half11
                movq       mm5, mm6
                movq       mm1, mm2
                paddd      mm2, mm0       ; a1+b1_a0+b0
                paddd      mm6, mm4       ; a3+b3_a2+b2
                psubd      mm0, mm1       ; a1-b1_a0-b0
                psubd      mm4, mm5       ; a3-b3_a2-b2
                psrad      mm2, 11        ; y1_y0
                psrad      mm6, 11        ; y3_y2
                psrad      mm0, 11        ; y6_y7
                psrad      mm4, 11        ; y4_y5
                packssdw   mm2, mm6       ; y3_y2_y1_y0
                packssdw   mm4, mm0       ; y6_y7_y4_y5
                movq       [esi+112], mm2 ;
                movq       mm7, mm4       ; 
                psrld      mm4, 16        ; 00_y6_00_y4
                pslld      mm7, 16        ; y7_00_y5_00
                por        mm4, mm7       ; y7_y6_y5_y4
                movq       [esi+120], mm4 ;
            ; row 2
                lea        eax, table_26
                movd       mm0, [esi+32]  ; 00_00_x1_x0
                movd       mm1, [esi+36]  ; 00_00_x3_x2
                movd       mm2, [esi+40]  ; 00_00_x5_x4
                movd       mm3, [esi+44]  ; 00_00_x7_x4
                movq       mm4, [eax+16]  ; w14_w12_w10_w08
                movq       mm5, [eax+24]  ; w15_w13_w11_w09
                movq       mm6, [eax+48]  ; w30_w28_w26_w24
                movq       mm7, [eax+56]  ; w31_w29_w27_w25
                punpcklwd  mm0, mm2       ; x5_x1_x4_x0
                punpcklwd  mm1, mm3       ; x7_x3_x6_x2
                movq       mm2, mm0
                movq       mm3, mm1
                punpckldq  mm0, mm0       ; x4_x0_x4_x0
                punpckhdq  mm2, mm2       ; x5_x1_x5_x1
                punpckldq  mm1, mm1       ; x6_x2_x6_x2
                punpckhdq  mm3, mm3       ; x7_x3_x7_x3
                pmaddwd    mm4, mm0       ; x4*w14+x0*w12_x4*w10+x0*w08
                pmaddwd    mm5, mm1       ; x6*w15+x2*w13_x6*w11+x2*w09
                pmaddwd    mm6, mm2       ; x5*w30+x1*w28_x5*w26+x1*w24
                pmaddwd    mm7, mm3       ; x7*w31+x3*x29_x7*w27+x3*w25
                pmaddwd    mm0, [eax]     ; x4*w06+x0*w04_x4*w02+x0*w00
                pmaddwd    mm1, [eax+8]   ; x6*w07+x0*w05_x6*w03+x0*w01
                pmaddwd    mm2, [eax+32]  ; x5*w22+x1*w20_x5*w18+x1*w16
                pmaddwd    mm3, [eax+40]  ; x7*w23+x3*w21_x7*w19+x3*w17
                paddd      mm4, mm5       ; a3_a2
                paddd      mm6, mm7       ; b3_b2
                paddd      mm0, mm1       ; a1_a0
                paddd      mm2, mm3       ; b1_b0
                paddd      mm4, half11
                paddd      mm0, half11
                movq       mm5, mm6
                movq       mm1, mm2
                paddd      mm2, mm0       ; a1+b1_a0+b0
                paddd      mm6, mm4       ; a3+b3_a2+b2
                psubd      mm0, mm1       ; a1-b1_a0-b0
                psubd      mm4, mm5       ; a3-b3_a2-b2
                psrad      mm2, 11        ; y1_y0
                psrad      mm6, 11        ; y3_y2
                psrad      mm0, 11        ; y6_y7
                psrad      mm4, 11        ; y4_y5
                packssdw   mm2, mm6       ; y3_y2_y1_y0
                packssdw   mm4, mm0       ; y6_y7_y4_y5
                movq       [esi+32], mm2  ;
                movq       mm7, mm4       ; 
                psrld      mm4, 16        ; 00_y6_00_y4
                pslld      mm7, 16        ; y7_00_y5_00
                por        mm4, mm7       ; y7_y6_y5_y4
                movq       [esi+40], mm4  ;
            ; row 6
                movd       mm0, [esi+96]  ; 00_00_x1_x0
                movd       mm1, [esi+100] ; 00_00_x3_x2
                movd       mm2, [esi+104] ; 00_00_x5_x4
                movd       mm3, [esi+108] ; 00_00_x7_x4
                movq       mm4, [eax+16]  ; w14_w12_w10_w08
                movq       mm5, [eax+24]  ; w15_w13_w11_w09
                movq       mm6, [eax+48]  ; w30_w28_w26_w24
                movq       mm7, [eax+56]  ; w31_w29_w27_w25
                punpcklwd  mm0, mm2       ; x5_x1_x4_x0
                punpcklwd  mm1, mm3       ; x7_x3_x6_x2
                movq       mm2, mm0
                movq       mm3, mm1
                punpckldq  mm0, mm0       ; x4_x0_x4_x0
                punpckhdq  mm2, mm2       ; x5_x1_x5_x1
                punpckldq  mm1, mm1       ; x6_x2_x6_x2
                punpckhdq  mm3, mm3       ; x7_x3_x7_x3
                pmaddwd    mm4, mm0       ; x4*w14+x0*w12_x4*w10+x0*w08
                pmaddwd    mm5, mm1       ; x6*w15+x2*w13_x6*w11+x2*w09
                pmaddwd    mm6, mm2       ; x5*w30+x1*w28_x5*w26+x1*w24
                pmaddwd    mm7, mm3       ; x7*w31+x3*x29_x7*w27+x3*w25
                pmaddwd    mm0, [eax]     ; x4*w06+x0*w04_x4*w02+x0*w00
                pmaddwd    mm1, [eax+8]   ; x6*w07+x0*w05_x6*w03+x0*w01
                pmaddwd    mm2, [eax+32]  ; x5*w22+x1*w20_x5*w18+x1*w16
                pmaddwd    mm3, [eax+40]  ; x7*w23+x3*w21_x7*w19+x3*w17
                paddd      mm4, mm5       ; a3_a2
                paddd      mm6, mm7       ; b3_b2
                paddd      mm0, mm1       ; a1_a0
                paddd      mm2, mm3       ; b1_b0
                paddd      mm4, half11
                paddd      mm0, half11
                movq       mm5, mm6
                movq       mm1, mm2
                paddd      mm2, mm0       ; a1+b1_a0+b0
                paddd      mm6, mm4       ; a3+b3_a2+b2
                psubd      mm0, mm1       ; a1-b1_a0-b0
                psubd      mm4, mm5       ; a3-b3_a2-b2
                psrad      mm2, 11        ; y1_y0
                psrad      mm6, 11        ; y3_y2
                psrad      mm0, 11        ; y6_y7
                psrad      mm4, 11        ; y4_y5
                packssdw   mm2, mm6       ; y3_y2_y1_y0
                packssdw   mm4, mm0       ; y6_y7_y4_y5
                movq       [esi+96], mm2  ;
                movq       mm7, mm4       ; 
                psrld      mm4, 16        ; 00_y6_00_y4
                pslld      mm7, 16        ; y7_00_y5_00
                por        mm4, mm7       ; y7_y6_y5_y4
                movq       [esi+104], mm4 ;
            ; row 3
                lea        eax, table_35
                movd       mm0, [esi+48]  ; 00_00_x1_x0
                movd       mm1, [esi+52]  ; 00_00_x3_x2
                movd       mm2, [esi+56]  ; 00_00_x5_x4
                movd       mm3, [esi+60]  ; 00_00_x7_x4
                movq       mm4, [eax+16]  ; w14_w12_w10_w08
                movq       mm5, [eax+24]  ; w15_w13_w11_w09
                movq       mm6, [eax+48]  ; w30_w28_w26_w24
                movq       mm7, [eax+56]  ; w31_w29_w27_w25
                punpcklwd  mm0, mm2       ; x5_x1_x4_x0
                punpcklwd  mm1, mm3       ; x7_x3_x6_x2
                movq       mm2, mm0
                movq       mm3, mm1
                punpckldq  mm0, mm0       ; x4_x0_x4_x0
                punpckhdq  mm2, mm2       ; x5_x1_x5_x1
                punpckldq  mm1, mm1       ; x6_x2_x6_x2
                punpckhdq  mm3, mm3       ; x7_x3_x7_x3
                pmaddwd    mm4, mm0       ; x4*w14+x0*w12_x4*w10+x0*w08
                pmaddwd    mm5, mm1       ; x6*w15+x2*w13_x6*w11+x2*w09
                pmaddwd    mm6, mm2       ; x5*w30+x1*w28_x5*w26+x1*w24
                pmaddwd    mm7, mm3       ; x7*w31+x3*x29_x7*w27+x3*w25
                pmaddwd    mm0, [eax]     ; x4*w06+x0*w04_x4*w02+x0*w00
                pmaddwd    mm1, [eax+8]   ; x6*w07+x0*w05_x6*w03+x0*w01
                pmaddwd    mm2, [eax+32]  ; x5*w22+x1*w20_x5*w18+x1*w16
                pmaddwd    mm3, [eax+40]  ; x7*w23+x3*w21_x7*w19+x3*w17
                paddd      mm4, mm5       ; a3_a2
                paddd      mm6, mm7       ; b3_b2
                paddd      mm0, mm1       ; a1_a0
                paddd      mm2, mm3       ; b1_b0
                paddd      mm4, half11
                paddd      mm0, half11
                movq       mm5, mm6
                movq       mm1, mm2
                paddd      mm2, mm0       ; a1+b1_a0+b0
                paddd      mm6, mm4       ; a3+b3_a2+b2
                psubd      mm0, mm1       ; a1-b1_a0-b0
                psubd      mm4, mm5       ; a3-b3_a2-b2
                psrad      mm2, 11        ; y1_y0
                psrad      mm6, 11        ; y3_y2
                psrad      mm0, 11        ; y6_y7
                psrad      mm4, 11        ; y4_y5
                packssdw   mm2, mm6       ; y3_y2_y1_y0
                packssdw   mm4, mm0       ; y6_y7_y4_y5
                movq       [esi+48], mm2  ;
                movq       mm7, mm4       ; 
                psrld      mm4, 16        ; 00_y6_00_y4
                pslld      mm7, 16        ; y7_00_y5_00
                por        mm4, mm7       ; y7_y6_y5_y4
                movq       [esi+56], mm4  ;
            ; row 5
                movd       mm0, [esi+80]  ; 00_00_x1_x0
                movd       mm1, [esi+84]  ; 00_00_x3_x2
                movd       mm2, [esi+88]  ; 00_00_x5_x4
                movd       mm3, [esi+92]  ; 00_00_x7_x4
                movq       mm4, [eax+16]  ; w14_w12_w10_w08
                movq       mm5, [eax+24]  ; w15_w13_w11_w09

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -