📄 yuv12-rgb16.s

📁 ac3的解码程序
💻 S
📖 第 1 页 / 共 2 页
字号:
上一页 12

  sar        ebx, 1
  add        esi, ebx
  add        edx, esi
  neg        ebx
  mov		[esp+FrameWidth],ebx

;  Register Usage:
;
;------------------------------------------------------------------------------
PrepareChromaLine:
  mov		ebp,[esp+AspectCount]
  mov		ebx,[esp+FrameWidth]
  sub    ebp,2
  mov		 eax,[esp+CCOPitch]
  mov		[esp+tmpCCOPitch],eax
   ja     continue

  xor    eax,eax
  add		ebp,[esp+AspectAdjustmentCount]
  mov		[esp+tmpCCOPitch],eax
continue:
  mov		[esp+AspectCount],ebp

do_next_8x2_block:
  mov		ebp,[esp+tmpYCursorEven]
; here is even line
  movd      mm1, [edx+ebx]         ; 4 u values
  pxor       mm0, mm0               ; mm0=0
  movd      mm2, [esi+ebx]         ; 4 v values
  punpcklbw  mm1, mm0               ; get 4 unsign u
  psubw      mm1, [Minusg]            ; get 4 unsign u-128
  punpcklbw  mm2, mm0               ; get unsign v
  psubw      mm2, [Minusg]            ; get unsign v-128
  movq       mm3, mm1               ; save the u-128 unsign
  movq       mm5, mm1               ; save u-128 unsign
  punpcklwd  mm1, mm2               ; get 2 low u, v unsign pairs
  pmaddwd    mm1, [UVtG]
   punpckhwd  mm3, mm2               ; create high 2 unsign uv pairs
  pmaddwd    mm3, [UVtG]
  movq       [temp_mmx+esp], mm2     ; save v-128
  movq       mm6, [ebp+2*ebx]       ; mm6 has 8 y pixels
  psubusb    mm6, [Yadd]              ; mm6 has 8 y-16 pixels
   packssdw   mm1, mm3               ; packed the results to signed words
  movq       mm7, mm6               ; save the 8 y-16 pixels
   punpcklbw  mm6, mm0               ; mm6 has 4 low y-16 unsign
  pmullw     mm6, [Ymul]
   punpckhbw  mm7, mm0               ; mm7 has 4 high y-16 unsign
  pmullw     mm7, [Ymul]
   movq       mm4, mm1
  movq       [temp_mmx+esp+8], mm1   ; save 4 chroma G values
   punpcklwd  mm1, mm1               ; chroma G replicate low 2
  movq       mm0, mm6               ; low  y
   punpckhwd  mm4, mm4               ; chroma G replicate high 2
  movq       mm3, mm7               ; high y
   psubw      mm6, mm1               ;  4 low G
  psraw      mm6, [esp+GRightShift]
   psubw      mm7, mm4               ; 4 high G values in signed 16 bit
  movq       mm2, mm5
   punpcklwd  mm5, mm5               ; replicate the 2 low u pixels
  pmullw     mm5, [UtB]
   punpckhwd  mm2, mm2
  psraw      mm7, [esp+GRightShift]
   pmullw     mm2, [UtB]
  packuswb   mm6, mm7               ; mm6: G7 G6 G5 G4 G3 G2 G1 G0
  movq       [temp_mmx+esp+16], mm5  ; low chroma B
   paddw      mm5, mm0               ; 4 low B values in signed 16 bit
  movq       [temp_mmx+esp+40], mm2  ; high chroma B
   paddw      mm2, mm3               ; 4 high B values in signed 16 bit
  psraw      mm5, [esp+BRightShift] ; low B scaled down by 6+(8-5)
  psraw      mm2, [esp+BRightShift] ; high B scaled down by 6+(8-5)
  packuswb   mm5, mm2               ; mm5: B7 B6 B5 B4 B3 B2 B1 B0

  movq       mm2, [temp_mmx+esp]     ; 4 v values
   movq       mm1, mm5               ; save B
  movq       mm7, mm2
   punpcklwd  mm2, mm2               ; replicate the 2 low v pixels
  pmullw     mm2, [VtR]
   punpckhwd  mm7, mm7
  pmullw     mm7, [VtR]
  paddusb    mm1, [esp+BUpperLimit] ; mm1: saturate B+0FF-15
  movq       [temp_mmx+esp+24], mm2  ; low chroma R
  paddw      mm2, mm0               ; 4 low R values in signed 16 bit
  psraw      mm2, [esp+RRightShift] ; low R scaled down by 6+(8-5)
   pxor       mm4, mm4               ; mm4=0 for 8-&gt;16 conversion
  movq       [temp_mmx+esp+32], mm7  ; high chroma R
   paddw      mm7, mm3               ; 4 high R values in signed 16 bit
  psraw      mm7, [esp+RRightShift] ; high R scaled down by 6+(8-5)
  psubusb    mm1, [esp+BUpperLimit]
   packuswb   mm2, mm7               ; mm2: R7 R6 R5 R4 R3 R2 R1 R0
  paddusb    mm6, [esp+GUpperLimit] ; G fast patch ih
  psubusb    mm6, [esp+GUpperLimit] ; fast patch ih
  paddusb    mm2, [esp+RUpperLimit] ; R
  psubusb    mm2, [esp+RUpperLimit]

; here we are packing from RGB24 to RGB16
; input:
       ; mm6: G7 G6 G5 G4 G3 G2 G1 G0
       ; mm1: B7 B6 B5 B4 B3 B2 B1 B0
       ; mm2: R7 R6 R5 R4 R3 R2 R1 R0
; assuming 8 original pixels in 0-H representation on mm6, mm5, mm2
; when  H=2**xBITS-1 (x is for R G B)
; output:
;        mm1- result: 4 low RGB16
;        mm7- result: 4 high RGB16
; using: mm0- zero register
;        mm3- temporary results
; algorithm:
;   for (i=0; i&lt;8; i++) {
;     RGB[i]=256*(R[i]&lt;&lt;(8-5))+(G[i]&lt;&lt;5)+B[i];
;   }

  psllq      mm2, [esp+RLeftShift]  ; position R in the most significant part of the byte
   movq       mm7, mm1               ; mm1: Save B

; note: no need for shift to place B on the least significant part of the byte
;   R in left position, B in the right position so they can be combined

  punpcklbw  mm1, mm2               ; mm1: 4 low 16 bit RB
   pxor       mm0, mm0               ; mm0: 0
  punpckhbw  mm7, mm2               ; mm5: 4 high 16 bit RB
   movq       mm3, mm6               ; mm3: G
  punpcklbw  mm6, mm0               ; mm6: low 4 G 16 bit
  psllw      mm6, [esp+GLeftShift]  ; shift low G 5 positions
  punpckhbw  mm3, mm0               ; mm3: high 4 G 16 bit
   por        mm1, mm6               ; mm1: low RBG16
  psllw      mm3, [esp+GLeftShift]  ; shift high G 5 positions
  por        mm7, mm3               ; mm5: high RBG16

  mov		ebp,[esp+tmpYCursorOdd]          ; moved to here to save cycles before odd line
  movq       [edi], mm1             ; !! aligned

;- start odd line
  movq       mm1, [ebp+2*ebx]       ; mm1 has 8 y pixels
   pxor       mm2, mm2
  psubusb    mm1, [Yadd]              ; mm1 has 8 pixels y-16
  movq       mm5, mm1
   punpcklbw  mm1, mm2               ; get 4 low y-16 unsign pixels word
  pmullw     mm1, [Ymul]              ; low 4 luminance contribution
   punpckhbw  mm5, mm2               ; 4 high y-16
  pmullw     mm5,  [Ymul]              ; high 4 luminance contribution
  movq       [edi+8], mm7           ; !! aligned
   movq       mm0, mm1
  paddw      mm0, [temp_mmx+esp+24]  ; low 4 R
   movq       mm6, mm5
  psraw      mm0, [esp+RRightShift] ; low R scaled down by 6+(8-5)
  paddw      mm5, [temp_mmx+esp+32]  ; high 4 R
   movq       mm2, mm1
  psraw      mm5, [esp+RRightShift] ; high R scaled down by 6+(8-5)
  paddw      mm2, [temp_mmx+esp+16]  ; low 4 B
   packuswb   mm0, mm5               ; mm0: R7 R6 R5 R4 R3 R2 R1 R0
  psraw      mm2, [esp+BRightShift] ; low B scaled down by 6+(8-5)
   movq       mm5, mm6
  paddw      mm6, [temp_mmx+esp+40]  ; high 4 B
  psraw      mm6, [esp+BRightShift] ; high B scaled down by 6+(8-5)
  movq       mm3, [temp_mmx+esp+8]   ; chroma G  low 4
  packuswb   mm2, mm6               ; mm2: B7 B6 B5 B4 B3 B2 B1 B0
   movq       mm4, mm3
  punpcklwd  mm3, mm3               ; replicate low 2
  punpckhwd  mm4, mm4               ; replicate high 2
   psubw      mm1, mm3               ;  4 low G
  psraw      mm1, [esp+GRightShift] ; low G scaled down by 6+(8-5)
   psubw      mm5, mm4               ;  4 high G values in signed 16 bit
  psraw      mm5, [esp+GRightShift] ; high G scaled down by 6+(8-5)
  paddusb    mm2, [esp+BUpperLimit] ; mm1: saturate B+0FF-15
   packuswb   mm1, mm5               ; mm1: G7 G6 G5 G4 G3 G2 G1 G0
  psubusb    mm2, [esp+BUpperLimit]
  paddusb    mm1, [esp+GUpperLimit] ; G
  psubusb    mm1, [esp+GUpperLimit]
  paddusb    mm0, [esp+RUpperLimit] ; R
  mov		eax,[esp+tmpCCOPitch]
  psubusb    mm0, [esp+RUpperLimit]

; here we are packing from RGB24 to RGB16
       ; mm1: G7 G6 G5 G4 G3 G2 G1 G0
       ; mm2: B7 B6 B5 B4 B3 B2 B1 B0
       ; mm0: R7 R6 R5 R4 R3 R2 R1 R0
; output:
;        mm2- result: 4 low RGB16
;        mm7- result: 4 high RGB16
; using: mm4- zero register
;        mm3- temporary results

  psllq       mm0, [esp+RLeftShift] ; position R in the most significant part of the byte
   movq        mm7, mm2              ; mm7: Save B

; note: no need for shift to place B on the least significant part of the byte
;   R in left position, B in the right position so they can be combined

  punpcklbw  mm2, mm0               ; mm1: 4 low 16 bit RB
   pxor       mm4, mm4               ; mm4: 0
  movq       mm3, mm1               ; mm3: G
   punpckhbw  mm7, mm0               ; mm7: 4 high 16 bit RB
  punpcklbw  mm1, mm4               ; mm1: low 4 G 16 bit
  punpckhbw  mm3, mm4               ; mm3: high 4 G 16 bit
  psllw      mm1, [esp+GLeftShift]  ; shift low G 5 positions
   por        mm2, mm1               ; mm2: low RBG16
  psllw      mm3, [esp+GLeftShift]  ; shift high G 5 positions
  por        mm7, mm3               ; mm7: high RBG16
  movq       [edi+eax], mm2
  movq       [edi+eax+8], mm7       ; aligned
  add        edi, 16                ; ih take 16 bytes (8 pixels-16 bit)
   add        ebx, 4                 ; ? to take 4 pixels together instead of 2
  jl         near do_next_8x2_block      ; ? update the loop for 8 y pixels at once

  add		edi,[esp+CCOSkipDistance]        ; go to begin of next line
  add 		edi,[esp+tmpCCOPitch]           ; skip odd line (if it is needed)
; Leax       AspectCount
; Lebp       CCOPitch               ; skip odd line

; sub        eax, 2
; jg         @f

; Addeax     AspectBaseCount
; xor        ebp, ebp

;@@:
;  Seax       AspectCount
;  add        edi, ebp

  mov		eax,[esp+YPitch]
  mov		ebp,[esp+tmpYCursorOdd]
  add        ebp, eax       ; skip one line
;  lea        ebp, [ebp+2*eax]       ; skip two lines
  mov		[esp+tmpYCursorEven],ebp
;  Sebp       tmpYCursorOdd

  add        ebp, eax       ; skip one line
  mov		[esp+tmpYCursorOdd],ebp
;  Lebp       tmpYCursorEven
;  lea        ebp, [ebp+2*eax]
;  Sebp       tmpYCursorEven


  add		esi,[esp+ChromaPitch]
  add		edx,[esp+ChromaPitch]


;  Leax       YLimit                  ; Done with last line?
;  cmp        ebp, eax
;  jbe        PrepareChromaLine
   sub      word [esp+FrameHeight],2
   ja       near PrepareChromaLine


;------------------------------------------------------------------------------
finish:
  emms
  add        esp, LocalFrameSize

  pop        ebx
  pop        ebp
  pop        edi
  pop        esi
  ret
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -