jidctasm.pas

来自「DELPHI版的JPEG文件解码源程序」· PAS 代码 · 共 793 行 · 第 1/2 页
PAS
793 行
  imul edx, (-FIX_1_961570560)
  add  eax, edx                        { z3 = eax }

    {Inc(tmp0, z1 + z3);}
  mov   ebx, z1
  add	ebx, eax
  add	tmp0, ebx

    {tmp2 := (tmp2) * INT32(FIX_3_072711026); { sqrt(2) * ( c1+c3+c5-c7) }
    {Inc(tmp2, z2 + z3);}
  mov   ebx, tmp2
  imul  ebx, FIX_3_072711026
  mov	edx, z2                        { z2 = edx }
  add   ebx, edx
  add   eax, ebx
  mov	tmp2, eax

    {Inc(tmp1, z2 + z4);}
  mov   eax, z4                        { z4 = eax }
  add   edx, eax
  add   tmp1, edx

    {tmp3 := (tmp3) * INT32(FIX_1_501321110); { sqrt(2) * ( c1+c3-c5-c7) }
    {Inc(tmp3, z1 + z4);}
  mov	edx, tmp3
  imul  edx, FIX_1_501321110

  add	edx, eax
  add   edx, z1                        { tmp3 = edx }

    { Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 }

    {wsptr^[DCTSIZE*0] := int (DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS));}
    {wsptr^[DCTSIZE*7] := int (DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS));}    
  mov	eax, tmp10
  add   eax, ROUND_CONST
  lea   ebx, [eax+edx]
  sar	ebx, CONST_BITS-PASS1_BITS
  mov	DWORD PTR [ecx+wrkDCTSIZE*0], ebx

  sub	eax, edx
  sar	eax, CONST_BITS-PASS1_BITS
  mov	DWORD PTR [ecx+wrkDCTSIZE*7], eax

    {wsptr^[DCTSIZE*1] := int (DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS));}
    {wsptr^[DCTSIZE*6] := int (DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS));}
  mov	eax, tmp11
  add   eax, ROUND_CONST
  mov   edx, tmp2
  lea	ebx, [eax+edx]
  sar	ebx, CONST_BITS-PASS1_BITS
  mov	DWORD PTR [ecx+wrkDCTSIZE*1], ebx

  sub	eax, edx
  sar	eax, CONST_BITS-PASS1_BITS
  mov	DWORD PTR [ecx+wrkDCTSIZE*6], eax

    {wsptr^[DCTSIZE*2] := int (DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS));}
    {wsptr^[DCTSIZE*5] := int (DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS));}
  mov	eax, tmp12
  add   eax, ROUND_CONST
  mov   edx, tmp1
  lea	ebx, [eax+edx]
  sar	ebx, CONST_BITS-PASS1_BITS
  mov	DWORD PTR [ecx+wrkDCTSIZE*2], ebx

  sub	eax, edx
  sar	eax, CONST_BITS-PASS1_BITS
  mov	DWORD PTR [ecx+wrkDCTSIZE*5], eax

    {wsptr^[DCTSIZE*3] := int (DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS));}
    {wsptr^[DCTSIZE*4] := int (DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS));}    
  mov	eax, tmp13
  add   eax, ROUND_CONST
  mov   edx, tmp0
  lea   ebx, [eax+edx]
  sar	ebx, CONST_BITS-PASS1_BITS
  mov	DWORD PTR [ecx+wrkDCTSIZE*3], ebx

  sub	eax, edx
  sar	eax, CONST_BITS-PASS1_BITS
  mov	DWORD PTR [ecx+wrkDCTSIZE*4], eax

    {Inc(JCOEF_PTR(inptr));		{ advance pointers to next column }
    {Inc(ISLOW_MULT_TYPE_PTR(quantptr));
    Inc(int_ptr(wsptr));}
  dec	ctr
  je	@loop519

  add   esi, Type JCOEF
  add	edi, Type ISLOW_MULT_TYPE
  add	ecx, Type int  { int_ptr }
  {end;}
	jmp	@loop518
@loop519:
  { Save to memory what we've registerized for the preceding loop. }

  { Pass 2: process rows from work array, store into output array. }
  { Note that we must descale the results by a factor of 8 == 2**3, }
  { and also undo the PASS1_BITS scaling. }

  {wsptr := @workspace;}
  lea	esi, workspace

  {for ctr := 0 to pred(DCTSIZE) do
  begin}
  mov	ctr, 0
@loop523:

    {outptr := output_buf^[ctr];}
  mov	eax, ctr
  mov	ebx, output_buf
  mov	edi, DWORD PTR [ebx+eax*4]           { 4 = SizeOf(pointer) }

    {Inc(JSAMPLE_PTR(outptr), output_col);}
  add	edi, output_col

    { Rows of zeroes can be exploited in the same way as we did with columns.
      However, the column calculation has created many nonzero AC terms, so
      the simplification applies less often (typically 5% to 10% of the time).
      On machines with very fast multiplication, it's possible that the
      test takes more time than it's worth.  In that case this section
      may be commented out. }

{$ifndef NO_ZERO_ROW_TEST}
    {if ((wsptr^[1]) or (wsptr^[2]) or (wsptr^[3]) or (wsptr^[4]) or
        (wsptr^[5]) or (wsptr^[6]) or (wsptr^[7]) = 0) then
    begin}
	mov	eax, DWORD PTR [esi+4*1]
	or	eax, DWORD PTR [esi+4*2]
	or	eax, DWORD PTR [esi+4*3]
        jne     @loop525            { Nomssi: early exit path may help }
	or	eax, DWORD PTR [esi+4*4]
	or	eax, DWORD PTR [esi+4*5]
	or	eax, DWORD PTR [esi+4*6]
	or	eax, DWORD PTR [esi+4*7]
	jne	@loop525

      { AC terms all zero }
      {JSAMPLE(dcval_) := range_limit^[int(DESCALE(INT32(wsptr^[0]),
                          PASS1_BITS+3)) and RANGE_MASK];}
	mov	eax, DWORD PTR [esi+4*0]
	add	eax, (INT32(1) shl (PASS1_BITS+3-1))
	sar	eax, PASS1_BITS+3
	and	eax, RANGE_MASK
        mov     ebx, range_limit
	mov	al, BYTE PTR [ebx+eax]
        mov     ah, al

      {outptr^[0] := dcval_;
      outptr^[1] := dcval_;
      outptr^[2] := dcval_;
      outptr^[3] := dcval_;
      outptr^[4] := dcval_;
      outptr^[5] := dcval_;
      outptr^[6] := dcval_;
      outptr^[7] := dcval_;}

	stosw
	stosw
	stosw
	stosw

      {Inc(int_ptr(wsptr), DCTSIZE);	{ advance pointer to next row }
      {continue;}
	add esi, wrkDCTSIZE
	inc	ctr
	cmp	ctr, DCTSIZE
	jl	@loop523
	jmp @loop524
    {end;}
@loop525:
{$endif}


    { Even part: reverse the even part of the forward DCT. }
    { The rotator is sqrt(2)*c(-6). }

    {z2 := INT32 (wsptr^[2]);}
  mov	edx, DWORD PTR [esi+4*2]                   { z2 = edx }

    {z3 := INT32 (wsptr^[6]);}
  mov	ecx, DWORD PTR [esi+4*6]                   { z3 = ecx }

    {z1 := (z2 + z3) * INT32(FIX_0_541196100);}
  lea   eax, [edx+ecx]
  imul  eax, FIX_0_541196100
  mov	ebx, eax                                   { z1 = ebx }

    {tmp2 := z1 + (z3) * INT32(- FIX_1_847759065);}
  imul  ecx, (-FIX_1_847759065)
  add	ecx, ebx                                   { tmp2 = ecx }

    {tmp3 := z1 + (z2) * INT32(FIX_0_765366865);}
  imul  edx, FIX_0_765366865
  add	ebx, edx                                   { tmp3 = ebx }

    {tmp0 := (INT32(wsptr^[0]) + INT32(wsptr^[4])) shl CONST_BITS;}
    {tmp1 := (INT32(wsptr^[0]) - INT32(wsptr^[4])) shl CONST_BITS;}
  mov	edx, DWORD PTR [esi+4*4]
  mov   eax, DWORD PTR [esi+4*0]
  sub   eax, edx
  add   edx, edx
  add   edx, eax
  shl	edx, CONST_BITS              { tmp0 = edx }
  shl	eax, CONST_BITS              { tmp1 = eax }

    {tmp10 := tmp0 + tmp3;}
    {tmp13 := tmp0 - tmp3;}
  sub   edx, ebx
  mov	tmp13, edx
  add   ebx, ebx
  add   edx, ebx
  mov	tmp10, edx

    {tmp11 := tmp1 + tmp2;}
    {tmp12 := tmp1 - tmp2;}
  lea   ebx, [ecx+eax]
  mov	tmp11, ebx
  sub	eax, ecx
  mov	tmp12, eax

    { Odd part per figure 8; the matrix is unitary and hence its
      transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively. }

{ The following lines no longer produce code, since wsptr has been
  optimized to esi, it is more efficient to access these values
  directly.
    tmp0 := INT32(wsptr^[7]);
    tmp1 := INT32(wsptr^[5]);
    tmp2 := INT32(wsptr^[3]);
    tmp3 := INT32(wsptr^[1]); }

    {z2 := tmp1 + tmp2;}
    {z2 := (z2) * INT32(- FIX_2_562915447); { sqrt(2) * (-c1-c3) }
  mov	ebx, DWORD PTR [esi+4*3]              { tmp2 }
  mov   ecx, DWORD PTR [esi+4*5]              { tmp1 }
  lea   eax, [ebx+ecx]
  imul  eax, (-FIX_2_562915447)
  mov	z2, eax

    {z3 := tmp0 + tmp2;}
  mov	edx, DWORD PTR [esi+4*7]              { tmp0 }
  add   ebx, edx                              { old z3 = ebx }
  mov	eax, ebx
    {z3 := (z3) * INT32(- FIX_1_961570560); { sqrt(2) * (-c3-c5) }
  imul eax, (-FIX_1_961570560)
  mov	z3, eax

    {z1 := tmp0 + tmp3;}
    {z1 := (z1) * INT32(- FIX_0_899976223); { sqrt(2) * (c7-c3) }
  mov	eax, DWORD PTR [esi+4*1]               { tmp3 }
  add	edx, eax
  imul  edx, (-FIX_0_899976223)                { z1 = edx }

    {z4 := tmp1 + tmp3;}
  add	eax, ecx                              { +tmp1 }
  add	ebx, eax                              { z3 + z4 = ebx }
    {z4 := (z4) * INT32(- FIX_0_390180644); { sqrt(2) * (c5-c3) }
  imul eax, (-FIX_0_390180644)                { z4 = eax }

    {z5 := (z3 + z4) * INT32(FIX_1_175875602); { sqrt(2) * c3 }
    {Inc(z3, z5);}
  imul ebx, FIX_1_175875602
  mov  ecx, z3
  add  ecx, ebx                                { ecx = z3 }

    {Inc(z4, z5);}
  add ebx, eax                                 { z4 = ebx }

    {tmp0 := (tmp0) * INT32(FIX_0_298631336); { sqrt(2) * (-c1+c3+c5-c7) }
    {Inc(tmp0, z1 + z3);}
  mov   eax, DWORD PTR [esi+4*7]
  imul  eax, FIX_0_298631336
  add   eax, edx
  add   eax, ecx
  mov	tmp0, eax

    {tmp1 := (tmp1) * INT32(FIX_2_053119869); { sqrt(2) * ( c1+c3-c5+c7) }
    {Inc(tmp1, z2 + z4);}
  mov  eax, DWORD PTR [esi+4*5]
  imul eax, FIX_2_053119869
  add  eax, z2
  add  eax, ebx
  mov  tmp1, eax

    {tmp2 := (tmp2) * INT32(FIX_3_072711026); { sqrt(2) * ( c1+c3+c5-c7) }
    {Inc(tmp2, z2 + z3);}
  mov	eax, DWORD PTR [esi+4*3]
  imul  eax, FIX_3_072711026
  add   eax, z2
  add   ecx, eax                      { ecx = tmp2 }

    {tmp3 := (tmp3) * INT32(FIX_1_501321110); { sqrt(2) * ( c1+c3-c5-c7) }
    {Inc(tmp3, z1 + z4);}
  mov	eax, DWORD PTR [esi+4*1]
  imul  eax, FIX_1_501321110
  add   eax, edx
  add   ebx, eax                   { ebx = tmp3 }

    { Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 }

    {outptr^[0] := range_limit^[ int(DESCALE(tmp10 + tmp3,
                      CONST_BITS+PASS1_BITS+3)) and RANGE_MASK]; }
    {outptr^[7] := range_limit^[ int(DESCALE(tmp10 - tmp3,
                        CONST_BITS+PASS1_BITS+3)) and RANGE_MASK];}

  mov	edx, tmp10
  add   edx, ROUND_CONST_2
  lea	eax, [ebx+edx]
  sub   edx, ebx

  shr	eax, CONST_BITS+PASS1_BITS+3
  and	eax, RANGE_MASK
  mov   ebx, range_limit           { once for all }
  mov	al, BYTE PTR [ebx+eax]
  mov   [edi+0], al

  shr	edx, CONST_BITS+PASS1_BITS+3
  and	edx, RANGE_MASK
  mov	al, BYTE PTR [ebx+edx]
  mov   [edi+7], al

    {outptr^[1] := range_limit^[ int(DESCALE(tmp11 + tmp2,
                        CONST_BITS+PASS1_BITS+3)) and RANGE_MASK];}
  mov	eax, tmp11
  add   eax, ROUND_CONST_2
  lea	edx, [eax+ecx]
  shr	edx, CONST_BITS+PASS1_BITS+3
  and	edx, RANGE_MASK
  mov	dl, BYTE PTR [ebx+edx]
  mov   [edi+1], dl

    {outptr^[6] := range_limit^[ int(DESCALE(tmp11 - tmp2,
			CONST_BITS+PASS1_BITS+3)) and RANGE_MASK];}
  sub	eax, ecx
  shr	eax, CONST_BITS+PASS1_BITS+3
  and	eax, RANGE_MASK
  mov	al, BYTE PTR [ebx+eax]
  mov   [edi+6], al

    {outptr^[2] := range_limit^[ int(DESCALE(tmp12 + tmp1,
			CONST_BITS+PASS1_BITS+3)) and RANGE_MASK];}
  mov	eax, tmp12
  add   eax, ROUND_CONST_2
  mov   ecx, tmp1
  lea	edx, [eax+ecx]
  shr	edx, CONST_BITS+PASS1_BITS+3
  and	edx, RANGE_MASK
  mov	dl, BYTE PTR [ebx+edx]
  mov   [edi+2], dl

    {outptr^[5] := range_limit^[ int(DESCALE(tmp12 - tmp1,
			CONST_BITS+PASS1_BITS+3)) and RANGE_MASK];}
  sub	eax, ecx
  shr	eax, CONST_BITS+PASS1_BITS+3
  and	eax, RANGE_MASK
  mov	al, BYTE PTR [ebx+eax]
  mov   [edi+5], al

    {outptr^[3] := range_limit^[ int(DESCALE(tmp13 + tmp0,
			CONST_BITS+PASS1_BITS+3)) and RANGE_MASK];}
  mov	eax, tmp13
  add   eax, ROUND_CONST_2
  mov   ecx, tmp0
  lea   edx, [eax+ecx]
  shr	edx, CONST_BITS+PASS1_BITS+3
  and	edx, RANGE_MASK
  mov	dl, BYTE PTR [ebx+edx]
  mov   [edi+3], dl

    {outptr^[4] := range_limit^[ int(DESCALE(tmp13 - tmp0,
			CONST_BITS+PASS1_BITS+3)) and RANGE_MASK];}
  sub	eax, ecx
  shr	eax, CONST_BITS+PASS1_BITS+3
  and	eax, RANGE_MASK
  mov	al, BYTE PTR [ebx+eax]
  mov   [edi+4], al

    {Inc(int_ptr(wsptr), DCTSIZE);	{ advance pointer to next row }
  add	esi, wrkDCTSIZE
  add	edi, DCTSIZE

  {end;}
  inc	ctr
  cmp	ctr, DCTSIZE
  jl	@loop523

@loop524:
@loop496:
  pop   ebx
  pop   esi
  pop   edi
end;

end.
jidctasm.pas - 源码说明

本页面展示了「DELPHI版的JPEG文件解码源程序」中的 jidctasm.pas 源码文件，采用 PAS 编程语言编写，共 793 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与DELPHI相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?