⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 idct_3dne.asm

📁 xvid的视频编解码库文件
💻 ASM
📖 第 1 页 / 共 3 页
字号:
  pmaddwd mm5, [tab_i_26_xmm+56]; x7*w31+x5*w30 x7*w27+x5*w26  paddd mm3, mm4                ; 4 free    ; a1=sum(even1) a0=sum(even0)  paddd mm0, mm1                ; 1 free    ; a3=sum(even3) a2=sum(even2)  pshufw mm1, [eax+48+8],10001000b      ; x6 x4 x6 x4  movq mm4, mm3                 ; 4     ; a1 a0  paddd mm6, mm7                ; 7 free    ; b1=sum(odd1) b0=sum(odd0)  paddd mm2, mm5                ; 5 free    ; b3=sum(odd3) b2=sum(odd2)  pshufw mm5, [eax+48],10001000b; x2 x0 x2 x0   mm5 & mm0 exchanged for next cycle  movq mm7, mm0                 ; 7     ; a3 a2  psubd mm4, mm6                ; 6 free    ; a1-b1 a0-b0  paddd mm6, mm3                ; mm6 = mm3+mm6+mm5+mm4; a1+b1 a0+b0  movq mm3, [tab_i_35_xmm]      ; 3     ; w05 w04 w01 w00  psubd mm7, mm2                ; ; a3-b3 a2-b2  paddd mm0, mm2                ; 0 free a3+b3 a2+b2  pshufw mm2, [eax+48],11011101b; x3 x1 x3 x1  pmaddwd mm3, mm5              ; x2*w05+x0*w04 x2*w01+x0*w00  pmaddwd mm5, [tab_i_35_xmm+16]; x2*w13+x0*w12 x2*w09+x0*w08  psrad mm4, SHIFT_INV_ROW      ; y6=a1-b1 y7=a0-b0  psrad mm7, SHIFT_INV_ROW      ; y4=a3-b3 y5=a2-b2  psrad mm6, SHIFT_INV_ROW      ; y1=a1+b1 y0=a0+b0  psrad mm0, SHIFT_INV_ROW      ; y3=a3+b3 y2=a2+b2  packssdw mm7, mm4             ; 4     ; y6 y7 y4 y5  packssdw mm6, mm0             ; 0 free    ; y3 y2 y1 y0  pshufw mm0, [eax+48+8],11011101b  ; x7 x5 x7 x5  movq [eax+32], mm6            ; 3     ; save y3 y2 y1 y0 stall2;   DCT_8_INV_ROW_1_s [eax+48], [eax+48], tab_i_35_xmm, rounder_3  movq mm4, [tab_i_35_xmm+8]    ; 4     ; w07 w06 w03 w02  movq mm6, [tab_i_35_xmm+32]   ; 6     ; w21 w20 w17 w16  pshufw mm7, mm7, 10110001b    ; y7 y6 y5 y4  paddd mm3, [rounder_3]        ; +rounder stall 6  paddd mm5, [rounder_3]        ; +rounder  movq [eax+32+8], mm7          ; 7     ; save y7 y6 y5 y4  movq mm7, [tab_i_35_xmm+40]   ; 7     ; w23 w22 w19 w18  pmaddwd mm4, mm1              ; x6*w07+x4*w06 x6*w03+x4*w02  pmaddwd mm1, [tab_i_35_xmm+24]; x6*w15+x4*w14 x6*w11+x4*w10  pmaddwd mm6, mm2              ; x3*w21+x1*w20 x3*w17+x1*w16  pmaddwd mm2, [tab_i_35_xmm+48]; x3*w29+x1*w28 x3*w25+x1*w24  pmaddwd mm7, mm0              ; 7     ; x7*w23+x5*w22 x7*w19+x5*w18 ; w23 w22 w19 w18  pmaddwd mm0, [tab_i_35_xmm+56]; x7*w31+x5*w30 x7*w27+x5*w26  paddd mm3, mm4                ; 4 free    ; a1=sum(even1) a0=sum(even0)  paddd mm5, mm1                ; mm1 free  ; a3=sum(even3) a2=sum(even2)  movq mm1, [tg_3_16]  movq mm4, mm3                 ; 4     ; a1 a0  paddd mm6, mm7                ; 7 free    ; b1=sum(odd1) b0=sum(odd0)  paddd mm2, mm0                ; 5 free    ; b3=sum(odd3) b2=sum(odd2)  movq mm0, [tg_3_16]  movq mm7, mm5                 ; 7     ; a3 a2  psubd mm4, mm6                ; 6 free    ; a1-b1 a0-b0  paddd mm3, mm6                ; mm3 = mm3+mm6+mm5+mm4; a1+b1 a0+b0  psubd mm7, mm2                ; ; a3-b3 a2-b2  paddd mm2, mm5                ; 0 free a3+b3 a2+b2  movq mm5, [eax+16*5]  psrad mm4, SHIFT_INV_ROW      ; y6=a1-b1 y7=a0-b0  psrad mm7, SHIFT_INV_ROW      ; y4=a3-b3 y5=a2-b2  psrad mm3, SHIFT_INV_ROW      ; y1=a1+b1 y0=a0+b0  psrad mm2, SHIFT_INV_ROW      ; y3=a3+b3 y2=a2+b2  movq mm6, [eax+16*1]  packssdw mm7, mm4             ; 4     ; y6 y7 y4 y5  movq mm4, [tg_1_16]  packssdw mm3, mm2             ; 0 free    ; y3 y2 y1 y0  pshufw mm2, mm7, 10110001b    ; y7 y6 y5 y4;   DCT_8_INV_COL_4 [eax+0],[eax+0];   movq    mm3,mmword ptr [eax+16*3]  movq mm7, [eax+16*7]  pmulhw mm0, mm3           ; x3*(tg_3_16-1)  pmulhw mm1, mm5           ; x5*(tg_3_16-1)  movq [eax+48+8], mm2      ; 7     ; save y7 y6 y5 y4  movq mm2, mm4             ; tg_1_16  pmulhw mm4, mm7           ; x7*tg_1_16  paddsw mm0, mm3           ; x3*tg_3_16  pmulhw mm2, mm6           ; x1*tg_1_16  paddsw mm1, mm3           ; x3+x5*(tg_3_16-1)  psubsw mm0, mm5           ; x3*tg_3_16-x5 = tm35  movq [eax+48], mm3        ; 3     ; save y3 y2 y1 y0  movq mm3, [ocos_4_16]  paddsw mm1, mm5           ; x3+x5*tg_3_16 = tp35  paddsw mm4, mm6           ; x1+tg_1_16*x7 = tp17  psubsw mm2, mm7           ; x1*tg_1_16-x7 = tm17  movq mm5, mm4             ; tp17  movq mm6, mm2             ; tm17  paddsw mm5, mm1           ; tp17+tp35 = b0  psubsw mm6, mm0           ; tm17-tm35 = b3  psubsw mm4, mm1           ; tp17-tp35 = t1  paddsw mm2, mm0           ; tm17+tm35 = t2  movq mm7, [tg_2_16]  movq mm1, mm4             ; t1  movq [eax+3*16], mm5      ; save b0  paddsw mm1, mm2           ; t1+t2  movq [eax+5*16], mm6      ; save b3  psubsw mm4, mm2           ; t1-t2  movq mm5, [eax+2*16]  movq mm0, mm7             ; tg_2_16  movq mm6, [eax+6*16]  pmulhw mm0, mm5           ; x2*tg_2_16  pmulhw mm7, mm6           ; x6*tg_2_16; slot  pmulhw mm1, mm3           ; ocos_4_16*(t1+t2) = b1/2; slot  movq mm2, [eax+0*16]  pmulhw mm4, mm3           ; ocos_4_16*(t1-t2) = b2/2  psubsw mm0, mm6           ; t2*tg_2_16-x6 = tm26  movq mm3, [eax+0*16]      ; x0  movq mm6, [eax+4*16]  paddsw mm7, mm5           ; x2+x6*tg_2_16 = tp26  paddsw mm2, mm6           ; x0+x4 = tp04  psubsw mm3, mm6           ; x0-x4 = tm04  movq mm5, mm2             ; tp04  movq mm6, mm3             ; tm04  psubsw mm2, mm7           ; tp04-tp26 = a3  paddsw mm3, mm0           ; tm04+tm26 = a1  paddsw mm1, mm1           ; b1  paddsw mm4, mm4           ; b2  paddsw mm5, mm7           ; tp04+tp26 = a0  psubsw mm6, mm0           ; tm04-tm26 = a2  movq mm7, mm3             ; a1  movq mm0, mm6             ; a2  paddsw mm3, mm1           ; a1+b1  paddsw mm6, mm4           ; a2+b2  psraw mm3, SHIFT_INV_COL  ; dst1  psubsw mm7, mm1           ; a1-b1  psraw mm6, SHIFT_INV_COL  ; dst2  psubsw mm0, mm4           ; a2-b2  movq mm1, [eax+3*16]      ; load b0  psraw mm7, SHIFT_INV_COL  ; dst6  movq mm4, mm5             ; a0  psraw mm0, SHIFT_INV_COL  ; dst5  movq [eax+1*16], mm3  paddsw mm5, mm1           ; a0+b0  movq [eax+2*16], mm6  psubsw mm4, mm1           ; a0-b0  movq mm3, [eax+5*16]      ; load b3  psraw mm5, SHIFT_INV_COL  ; dst0  movq mm6, mm2             ; a3  psraw mm4, SHIFT_INV_COL  ; dst7  movq [eax+5*16], mm0  movq mm0, [tg_3_16]  paddsw mm2, mm3           ; a3+b3  movq [eax+6*16], mm7  psubsw mm6, mm3           ; a3-b3  movq mm3, [eax+8+16*3]  movq [eax+0*16], mm5  psraw mm2, SHIFT_INV_COL  ; dst3  movq [eax+7*16], mm4 ;  DCT_8_INV_COL_4 [eax+8],[eax+8]  movq mm1, mm0             ; tg_3_16  movq mm5, [eax+8+16*5]  psraw mm6, SHIFT_INV_COL  ; dst4  pmulhw mm0, mm3           ; x3*(tg_3_16-1)  movq mm4, [tg_1_16]  pmulhw mm1, mm5           ; x5*(tg_3_16-1)  movq mm7, [eax+8+16*7]  movq [eax+3*16], mm2  movq mm2, mm4             ; tg_1_16  movq [eax+4*16], mm6  movq mm6, [eax+8+16*1]  pmulhw mm4, mm7           ; x7*tg_1_16  paddsw mm0, mm3           ; x3*tg_3_16  pmulhw mm2, mm6           ; x1*tg_1_16  paddsw mm1, mm3           ; x3+x5*(tg_3_16-1)  psubsw mm0, mm5           ; x3*tg_3_16-x5 = tm35  movq mm3, [ocos_4_16]  paddsw mm1, mm5           ; x3+x5*tg_3_16 = tp35  paddsw mm4, mm6           ; x1+tg_1_16*x7 = tp17  psubsw mm2, mm7           ; x1*tg_1_16-x7 = tm17  movq mm5, mm4             ; tp17  movq mm6, mm2             ; tm17  paddsw mm5, mm1           ; tp17+tp35 = b0  psubsw mm4, mm1           ; tp17-tp35 = t1  paddsw mm2, mm0           ; tm17+tm35 = t2  movq mm7, [tg_2_16]  movq mm1, mm4             ; t1  psubsw mm6, mm0           ; tm17-tm35 = b3  movq [eax+8+3*16], mm5    ; save b0  movq [eax+8+5*16], mm6    ; save b3  psubsw mm4, mm2           ; t1-t2  movq mm5, [eax+8+2*16]  movq mm0, mm7             ; tg_2_16  movq mm6, [eax+8+6*16]  paddsw mm1, mm2           ; t1+t2  pmulhw mm0, mm5           ; x2*tg_2_16  pmulhw mm7, mm6           ; x6*tg_2_16  movq mm2, [eax+8+0*16]  pmulhw mm4, mm3           ; ocos_4_16*(t1-t2) = b2/2  psubsw mm0, mm6           ; t2*tg_2_16-x6 = tm26 ; slot  pmulhw mm1, mm3           ; ocos_4_16*(t1+t2) = b1/2 ; slot  movq mm3, [eax+8+0*16]    ; x0  movq mm6, [eax+8+4*16]  paddsw mm7, mm5           ; x2+x6*tg_2_16 = tp26  paddsw mm2, mm6           ; x0+x4 = tp04  psubsw mm3, mm6           ; x0-x4 = tm04  movq mm5, mm2             ; tp04  movq mm6, mm3             ; tm04  psubsw mm2, mm7           ; tp04-tp26 = a3  paddsw mm3, mm0           ; tm04+tm26 = a1  paddsw mm1, mm1           ; b1  paddsw mm4, mm4           ; b2  paddsw mm5, mm7           ; tp04+tp26 = a0  psubsw mm6, mm0           ; tm04-tm26 = a2  movq mm7, mm3             ; a1  movq mm0, mm6             ; a2  paddsw mm3, mm1           ; a1+b1  paddsw mm6, mm4           ; a2+b2  psraw mm3, SHIFT_INV_COL  ; dst1  psubsw mm7, mm1           ; a1-b1  psraw mm6, SHIFT_INV_COL  ; dst2  psubsw mm0, mm4           ; a2-b2  movq mm1, [eax+8+3*16]    ; load b0  psraw mm7, SHIFT_INV_COL  ; dst6  movq mm4, mm5             ; a0  psraw mm0, SHIFT_INV_COL  ; dst5  movq [eax+8+1*16], mm3  paddsw mm5, mm1           ; a0+b0  movq [eax+8+2*16], mm6  psubsw mm4, mm1           ; a0-b0  movq mm3, [eax+8+5*16]    ; load b3  psraw mm5, SHIFT_INV_COL  ; dst0  movq mm6, mm2         ; a3  psraw mm4, SHIFT_INV_COL  ; dst7  movq [eax+8+5*16], mm0  paddsw mm2, mm3           ; a3+b3  movq [eax+8+6*16], mm7  psubsw mm6, mm3           ; a3-b3  movq [eax+8+0*16], mm5  psraw mm2, SHIFT_INV_COL  ; dst3  movq [eax+8+7*16], mm4  psraw mm6, SHIFT_INV_COL  ; dst4  movq [eax+8+3*16], mm2  movq [eax+8+4*16], mm6  ret

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -