⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 quantize_h263_3dne.asm

📁 这是一个压缩解压包,用C语言进行编程的,里面有详细的源代码.
💻 ASM
📖 第 1 页 / 共 2 页
字号:
  mov ebp, [byte esp]  quant_intra1 3  psubw mm5, mm4                    ;C8  mov esi, [dword esp + 12]         ; pop back the register value  mov edi, [esp + 4]                ; pop back the register value  sar eax, 16  lea ebx, [byte eax + 1]           ; workaround for eax < 0  cmovs eax, ebx                    ; conditionnaly move the corrected value  mov [edx], ax                     ; coeff[0] = ax  mov ebx, [esp + 8]                ; pop back the register value  add esp, byte 16                  ; "quant_intra 0" pushed ebp, but we don't restore that one, just correct the stack offset by 16  psubw mm7, mm6                    ;D8  movq [edx + 3 * 32 + 16], mm5     ;C9  movq [edx + 3 * 32 + 24], mm7     ;D9  xor eax, eax  ret.endfunc;-----------------------------------------------------------------------------;; uint32_t quant_h263_inter_3dne(int16_t * coeff,;                                const int16_t const * data,;                                const uint32_t quant,;                                const uint16_t *mpeg_matrices);;;-----------------------------------------------------------------------------;This is Athlon-optimized code (ca 90 clk per call);Optimized by Jaan, 30 Nov 2002%macro quantinter 1  movq mm1, [eax]               ;A2  psraw mm3, 15                 ;B6%if (%1)  psubw mm2, mm6                ;C10%endif  psubw mm1, mm0                ;A3  pmulhw mm4, mm7               ;B7  movq mm6, [ecx + %1*24+16]    ;C1  pmaxsw mm1, mm0               ;A4  paddw mm5, mm4                ;B8%if (%1)  movq [edx + %1*24+16-24], mm2 ;C11%endif  psubusw mm1, [ebx]            ;A5 mm0 -= sub (unsigned, dont go < 0)  pxor mm4, mm3                 ;B9  movq mm2, [eax]               ;C2  psraw mm0, 15                 ;A6  psubw mm4, mm3                ;B10  psubw mm2, mm6                ;C3  pmulhw mm1, mm7               ;A7 mm0 = (mm0 / 2Q) >> 24  movq mm3, [ecx + %1*24+8] ;B1  pmaxsw mm2, mm6               ;C4  paddw mm5, mm1                ;A8 sum += mm0%if (%1)  movq [edx + %1*24+8-24], mm4  ;B11%else  movq [edx + 120], mm4         ;B11%endif  psubusw mm2, [ebx]            ;C5  pxor mm1, mm0                 ;A9 mm0 *= sign(mm0)  movq mm4, [eax]               ;B2  psraw mm6, 15                 ;C6  psubw mm1, mm0                ;A10 undisplace  psubw mm4, mm3                ;B3  pmulhw mm2, mm7               ;C7  movq mm0, [ecx + %1*24+24]    ;A1 mm0 = [1st]  pmaxsw mm4, mm3               ;B4  paddw mm5, mm2                ;C8  movq [byte edx + %1*24], mm1  ;A11  psubusw mm4, [ebx]            ;B5  pxor mm2, mm6                 ;C9%endmacro%macro quantinter1 1  movq mm0, [byte ecx + %1*16]  ;mm0 = [1st]  movq mm3, [ecx + %1*16+8] ;  movq mm1, [eax]  movq mm4, [eax]  psubw mm1, mm0  psubw mm4, mm3  pmaxsw mm1, mm0  pmaxsw mm4, mm3  psubusw mm1, mm6              ; mm0 -= sub (unsigned, dont go < 0)  psubusw mm4, mm6              ;  psraw mm0, 15  psraw mm3, 15  psrlw mm1, 1                  ; mm0 = (mm0 / 2Q) >> 16  psrlw mm4, 1                  ;  paddw mm5, mm1                ; sum += mm0  pxor mm1, mm0                 ; mm0 *= sign(mm0)  paddw mm5, mm4  pxor mm4, mm3                 ;  psubw mm1, mm0                ; undisplace  psubw mm4, mm3  cmp esp, esp  movq [byte edx + %1*16], mm1  movq [edx + %1*16+8], mm4%endmacroALIGN 16cglobal quant_h263_inter_3dnequant_h263_inter_3dne:  mov edx, [esp  + 4]               ; coeff  mov ecx, [esp  + 8]               ; data  mov eax, [esp  + 12]              ; quant  push ebx  pxor mm5, mm5                     ; sum  nop  lea ebx,[mmx_sub + eax * 8 - 8]   ; sub  movq mm7, [mmx_div + eax * 8 - 8] ; divider  cmp al, 1  lea eax, [mmzero]  jz near .q1loop  cmp esp, espALIGN 8  movq mm3, [ecx + 120]     ;B1  pxor mm4, mm4             ;B2  psubw mm4, mm3            ;B3  movq mm0, [ecx]           ;A1 mm0 = [1st]  pmaxsw mm4, mm3           ;B4  psubusw mm4, [ebx]        ;B5  quantinter 0  quantinter 1  quantinter 2  quantinter 3  quantinter 4  psraw mm3, 15             ;B6  psubw mm2, mm6            ;C10  pmulhw mm4, mm7           ;B7  paddw mm5, mm4            ;B8  pxor mm4, mm3             ;B9  psubw mm4, mm3            ;B10  movq [edx + 4*24+16], mm2 ;C11  pop ebx  movq [edx + 4*24+8], mm4  ;B11  pmaddwd mm5, [plus_one]  movq mm0, mm5  punpckhdq mm5, mm5  paddd mm0, mm5  movd eax, mm0             ; return sum  retALIGN 16.q1loop  movq mm6, [byte ebx]  quantinter1 0  quantinter1 1  quantinter1 2  quantinter1 3  quantinter1 4  quantinter1 5  quantinter1 6  quantinter1 7  pmaddwd mm5, [plus_one]  movq mm0, mm5  psrlq mm5, 32  paddd mm0, mm5  movd eax, mm0 ; return sum  pop ebx  ret.endfunc;-----------------------------------------------------------------------------;; uint32_t dequant_h263_intra_3dne(int16_t *data,;                                  const int16_t const *coeff,;                                  const uint32_t quant,;                                  const uint32_t dcscalar,;                                  const uint16_t *mpeg_matrices);;;-----------------------------------------------------------------------------  ; this is the same as dequant_inter_3dne, except that we're  ; saturating using 'pminsw' (saves 2 cycles/loop => ~5% faster);This is Athlon-optimized code (ca 106 clk per call)%macro dequant 1  movq mm1, [ecx+%1*24]         ; c  = coeff[i] ;A2  psubw mm0, mm1                ;-c     ;A3 (1st dep)%if (%1)  paddw mm4, mm6                ;C11 mm6 free (4th+)%endif  pmaxsw mm0, mm1               ;|c|        ;A4 (2nd)%if (%1)  mov ebp, ebp  pminsw mm4, [ebx]             ;C12 saturates to +2047 (5th+) later%endif  movq mm6, [esi]               ;0      ;A5  mm6 in use  pandn mm7, [eax]              ;B9 offset = isZero ? 0 : quant_add (2nd)%if (%1)  pxor mm5, mm4                 ;C13 (6th+) 1later%endif  movq mm4, [esi]               ;C1 ;0  mov esp, esp  pcmpeqw mm6, [ecx+%1*24]      ;A6 (c ==0) ? -1 : 0 (1st)ALIGN 4  psraw mm1, 15                 ; sign(c)   ;A7 (2nd)%if (%1)  movq [edx+%1*24+16-24], mm5   ; C14 (7th) 2later%endif  paddw mm7, mm3                ;B10  offset +negate back (3rd)  pmullw mm0, [edi]             ;*= 2Q  ;A8 (3rd+)  paddw mm2, mm7                ;B11 mm7 free (4th+)  lea ebp, [byte ebp]  movq mm5, [ecx+%1*24+16]      ;C2 ; c  = coeff[i]  psubw mm4, mm5                ;-c         ;C3 (1st dep)  pandn mm6, [eax]              ;A9 offset = isZero ? 0 : quant_add (2nd)  pminsw mm2, [ebx]             ;B12 saturates to +2047 (5th+)  pxor mm3, mm2                 ;B13 (6th+)  movq mm2, [byte esi]          ;B1 ;0%if (%1)  movq [edx+%1*24+8-24], mm3    ;B14 (7th)%else  movq [edx+120], mm3%endif  pmaxsw mm4, mm5               ;|c|        ;C4 (2nd)  paddw mm6, mm1                ;A10  offset +negate back (3rd)  movq mm3, [ecx+%1*24 + 8]     ;B2 ; c  = coeff[i]  psubw mm2, mm3                ;-c     ;B3 (1st dep)  paddw mm0, mm6                ;A11 mm6 free (4th+)  movq mm6, [byte esi]          ;0          ;C5  mm6 in use  pcmpeqw mm6, [ecx+%1*24+16]   ;C6 (c ==0) ? -1 : 0 (1st)  pminsw mm0, [ebx]             ;A12 saturates to +2047 (5th+)  pmaxsw mm2, mm3               ;|c|        ;B4 (2nd)  pxor mm1, mm0                 ;A13 (6th+)  pmullw mm4, [edi]             ;*= 2Q  ;C8 (3rd+)  psraw mm5, 15                 ; sign(c)   ;C7 (2nd)  movq mm7, [byte esi]          ;0          ;B5 mm7 in use  pcmpeqw mm7, [ecx+%1*24 + 8]  ;B6 (c ==0) ? -1 : 0 (1st)%if (%1 < 4)  movq mm0, [byte esi]          ;A1 ;0%endif  pandn mm6, [byte eax]         ;C9 offset = isZero ? 0 : quant_add (2nd)  psraw mm3, 15                 ;sign(c)    ;B7 (2nd)  movq [byte edx+%1*24], mm1    ;A14 (7th)  paddw mm6, mm5                ;C10  offset +negate back (3rd)  pmullw mm2, [edi]             ;*= 2Q  ;B8 (3rd+)  mov esp, esp%endmacroALIGN 16cglobal dequant_h263_intra_3dnedequant_h263_intra_3dne:  mov ecx, [esp+ 8]                 ; coeff  mov eax, [esp+12]                 ; quant  pxor mm0, mm0  pxor mm2, mm2  push edi  push ebx  lea edi, [mmx_mul + eax*8 - 8]    ; 2*quant  push ebp  mov ebx, mmx_2047  movsx ebp, word [ecx]  lea eax, [mmx_add + eax*8 - 8]    ; quant or quant-1  push esi  mov esi, mmzero  pxor mm7, mm7  movq mm3, [ecx+120]               ;B2 ; c  = coeff[i]  pcmpeqw mm7, [ecx+120]            ;B6 (c ==0) ? -1 : 0 (1st)  imul ebp, [esp+16+16]             ; dcscalar  psubw mm2, mm3                    ;-c         ;B3 (1st dep)  pmaxsw mm2, mm3                   ;|c|        ;B4 (2nd)  pmullw mm2, [edi]                 ;*= 2Q  ;B8 (3rd+)  psraw mm3, 15                     ; sign(c)   ;B7 (2nd)  mov edx, [esp+ 4+16]              ; dataALIGN 8  dequant 0  cmp ebp, -2048  mov esp, esp  dequant 1  cmovl ebp, [int_2048]  nop  dequant 2  cmp ebp, 2047  mov esp, esp  dequant 3  cmovg ebp, [int2047]  nop  dequant 4  paddw mm4, mm6            ;C11 mm6 free (4th+)  pminsw mm4, [ebx]         ;C12 saturates to +2047 (5th+)  pandn mm7, [eax]          ;B9 offset = isZero ? 0 : quant_add (2nd)  mov eax, ebp  mov esi, [esp]  mov ebp, [esp+4]  pxor mm5, mm4             ;C13 (6th+)  paddw mm7, mm3            ;B10  offset +negate back (3rd)  movq [edx+4*24+16], mm5   ;C14 (7th)  paddw mm2, mm7            ;B11 mm7 free (4th+)  pminsw mm2, [ebx]         ;B12 saturates to +2047 (5th+)  mov ebx, [esp+8]  mov edi, [esp+12]  add esp, byte 16  pxor mm3, mm2             ;B13 (6th+)  movq [edx+4*24+8], mm3    ;B14 (7th)  mov [edx], ax  xor eax, eax  ret.endfunc;-----------------------------------------------------------------------------;; uint32_t dequant_h263_inter_3dne(int16_t * data,;                                  const int16_t * const coeff,;                                  const uint32_t quant,;                                  const uint16_t *mpeg_matrices);;;-----------------------------------------------------------------------------; this is the same as dequant_inter_3dne,; except that we're saturating using 'pminsw' (saves 2 cycles/loop); This is Athlon-optimized code (ca 100 clk per call)ALIGN 16cglobal dequant_h263_inter_3dnedequant_h263_inter_3dne:  mov ecx, [esp+ 8]         ; coeff  mov eax, [esp+12]         ; quant  pxor mm0, mm0  pxor mm2, mm2  push edi  push ebx  push esi  lea edi, [mmx_mul + eax*8 - 8]    ; 2*quant  mov ebx, mmx_2047  pxor mm7, mm7  movq mm3, [ecx+120]               ;B2 ; c  = coeff[i]  pcmpeqw mm7, [ecx+120]            ;B6 (c ==0) ? -1 : 0 (1st)  lea eax, [mmx_add + eax*8 - 8]    ; quant or quant-1  psubw mm2, mm3                    ;-c ;B3 (1st dep)  mov esi, mmzero  pmaxsw mm2, mm3                   ;|c|        ;B4 (2nd)  pmullw mm2, [edi]                 ;*= 2Q      ;B8 (3rd+)  psraw mm3, 15                     ; sign(c)   ;B7 (2nd)  mov edx, [dword esp+ 4+12]        ; dataALIGN 8  dequant 0  dequant 1  dequant 2  dequant 3  dequant 4  paddw mm4, mm6            ;C11 mm6 free (4th+)  pminsw mm4, [ebx]         ;C12 saturates to +2047 (5th+)  pandn mm7, [eax]          ;B9 offset = isZero ? 0 : quant_add (2nd)  mov esi, [esp]  pxor mm5, mm4             ;C13 (6th+)  paddw mm7, mm3            ;B10  offset +negate back (3rd)  movq [edx+4*24+16], mm5   ;C14 (7th)  paddw mm2, mm7            ;B11 mm7 free (4th+)  pminsw mm2, [ebx]         ;B12 saturates to +2047 (5th+)  mov ebx, [esp+4]  mov edi, [esp+8]  add esp, byte 12  pxor mm3, mm2             ;B13 (6th+)  movq [edx+4*24+8], mm3    ;B14 (7th)  xor eax, eax  ret.endfunc

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -