📄 reconmmx.s
字号:
push ecx
push ebx
push edx
mov esi, [esp+source]
mov edi, [esp+dest]
mov ecx, [esp+h]
mov ebx, [esp+lx2]
mov edx, [esp+lx]
movq mm5, [MASK_AND]
movq mm6, [ADD_1]
.recvc1:
movq mm0,[esi]
movq mm1,[esi+edx]
psrlw mm0,1
psrlw mm1,1
pand mm0,mm5
pand mm1,mm5
paddusb mm0,mm1
paddusb mm0,mm6
movq [edi],mm0
add edi,ebx
add esi,ebx
dec ecx
jnz .recvc1
emms
pop edx
pop ebx
pop ecx
pop edi
pop esi
ret
align 16
recva:
push esi
push edi
push ecx
push ebx
push edx
mov esi, [esp+source]
mov edi, [esp+dest]
mov ecx, [esp+h]
mov ebx, [esp+lx2]
mov edx, [esp+lx]
movq mm7, [MASK_AND]
movq mm6, [ADD_1]
.recva1:
movq mm0,[esi]
movq mm1,[esi+edx]
movq mm2,[esi+8]
movq mm3,[esi+edx+8]
movq mm4,[edi]
movq mm5,[edi+8]
psrlw mm0,1
psrlw mm1,1
psrlw mm2,1
psrlw mm3,1
psrlw mm4,1
psrlw mm5,1
pand mm0,mm7
pand mm1,mm7
pand mm2,mm7
pand mm3,mm7
pand mm4,mm7
pand mm5,mm7
paddusb mm0,mm1
paddusb mm2,mm3
paddusb mm0,mm6
paddusb mm2,mm6
psrlw mm0,1
psrlw mm2,1
pand mm0,mm7
pand mm2,mm7
paddusb mm4,mm0
paddusb mm5,mm2
paddusb mm4,mm6
paddusb mm5,mm6
movq [edi],mm4
movq [edi+8],mm5
add edi,ebx
add esi,ebx
dec ecx
jnz near .recva1
emms
pop edx
pop ebx
pop ecx
pop edi
pop esi
ret
align 16
recvac:
push esi
push edi
push ecx
push ebx
push edx
mov esi, [esp+source]
mov edi, [esp+dest]
mov ecx, [esp+h]
mov ebx, [esp+lx2]
mov edx, [esp+lx]
movq mm5, [MASK_AND]
movq mm6, [ADD_1]
.recvac1:
movq mm0,[esi]
movq mm1,[esi+edx]
movq mm4,[edi]
psrlw mm0,1
psrlw mm1,1
psrlw mm4,1
pand mm0,mm5
pand mm1,mm5
pand mm4,mm5
paddusb mm0,mm1
paddusb mm0,mm6
psrlw mm0,1
pand mm0,mm5
paddusb mm4,mm0
paddusb mm4,mm6
movq [edi],mm4
add edi,ebx
add esi,ebx
dec ecx
jnz .recvac1
emms
pop edx
pop ebx
pop ecx
pop edi
pop esi
ret
%assign RegisterStorageSize 20
%assign rfp LocalFrameSize + RegisterStorageSize + 4
%assign bp LocalFrameSize + RegisterStorageSize + 8
%assign iincr LocalFrameSize + RegisterStorageSize + 12
; FIXME clipping needs to be done
align 16
add_block_mmx:
push esi
push edi
push ecx
push ebx
push edx
mov esi, [esp+bp]
mov edi, [esp+rfp]
mov ebx, [esp+iincr]
; movq mm7, [PLUS_384]
mov ecx,8
pxor mm2,mm2 ; clear
%rep 8
movq mm0, [edi] ; get dest
movq mm1,mm0
punpcklbw mm0,mm2
punpckhbw mm1,mm2
paddsw mm0, [esi]
paddsw mm1, [esi+8]
; paddsw mm0, mm7
; paddsw mm1, mm7
packuswb mm0,mm1
movq [edi], mm0
add edi,ebx
add esi,16
%endrep
emms
pop edx
pop ebx
pop ecx
pop edi
pop esi
ret
align 16
set_block_mmx:
push esi
push edi
push ecx
push ebx
push edx
mov esi, [esp+bp]
mov edi, [esp+rfp]
mov ebx, [esp+iincr]
movq mm7, [PLUS_128]
%rep 4
movq mm0, [esi]
movq mm1, [esi+8]
paddsw mm0, mm7
movq mm2, [esi+16]
paddsw mm1, mm7
movq mm3, [esi+24]
paddsw mm2, mm7
packuswb mm0, mm1
paddsw mm3, mm7
movq [edi], mm0
packuswb mm2, mm3
add edi, ebx
add esi, 32
movq [edi], mm2
add edi, ebx
%endrep
emms
pop edx
pop ebx
pop ecx
pop edi
pop esi
ret
;
;_64_minus_index: dd 64,63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48,47,46,45,44,43,42,41,40,39,38,37,36,35,34
; dd 33,32,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1
;
;extern _buf_pointer
;extern _end_buf
;extern _bit_count
;
;
;global _getbits
;_getbits:
;
; mov eax, [_bit_count] ; Number of valid bits in MM0
; mov ecx, [ESP+4] ; Parameter. How many bits should we
; ; read.
;
; sub eax,ecx ; Do we have enough bits in MM0
; jl new64bit ; If not get new 64 bits
;
; movd MM3,[_64_minus_index+ecx*4] ;MM3 = 64 - number of needed bits.
; movq MM2,MM0
;
; movd mm1,ecx ; # of bits to read
; psrlq MM2,MM3 ; MM2 now has valid bitstream in least
; ; significant part
;
; mov [_bit_count],eax ; Update number of valid bits.
;
; movd eax,MM2 ; move the result into eax
; psllq MM0,MM1 ; throw away those bits
;
; ret
;
;new64bit:
; movd MM3,[_64_minus_index+ecx*4] ;MM3 = 64 - number of requested bits
; ;(for shifting)
; movq MM2,MM0 ; copy left over bits
;
; mov edx,[_buf_pointer] ;pointer to bitstream
; mov ecx,[_end_buf] ;read pointer to end of buffer
;
; add edx,8 ;update the pointer
; add eax,64 ;eax = 64- # of bits we missed in old
; ;group
;
; cmp edx,ecx ;do we have another qword to read
; mov [_buf_pointer],edx ;save new value
;
; mov ecx,[edx-8] ;read next qword (dword here)
; mov edx,[edx-4] ;(dword here)
;
; jge do_refill ;do_refill
;refill:
; ; now convert from bigendian to little and
; ; but make use of left over bits (MM2) before using these
; bswap edx ;swapping the first 32 bit
; bswap ecx ;swapping the second 32 bit
;
; movd mm4,ecx ;second 32 bit in mm4
; psrlq mm2,mm3 ;mm2 has remaining bits in least
; ;significant part with room for new
; ;bits to right of it
; movd mm1,edx ;move first 32 bit
; psllq mm4,32 ;shiftsecond 32 bit to upper part of
; ;register
;
; movd mm3,eax ;mm3 gets the shift counter
; por mm4,mm1 ;combine the 64 swapped data into mm4
;
; movq mm0,mm4 ;save new word in mm0 for next time
; psrlq mm4,mm3 ;mm4 = new bits we now need in least
; ;significant part
;
; mov [_bit_count],eax ;Save bit count for next time
; por mm2,mm4 ;combine remaining bits with the bits
; ;from new word
;
; movd mm1,[_64_minus_index+4*eax] ;# of bits we missed in old group
;
; movd eax,mm2 ;return bits in eax
; psllq mm0,mm1 ;remove the bits we just
; ;read from mm0
;
; RET
;do_refill:
; PUSH EAX
; PUSH EDX
; PUSH ECX
; CALL _refill_buffer
; POP ECX
; POP EDX
; POP EAX
; JMP refill
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -