⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 idct_arm.asm

📁 大名鼎鼎的CE下播放软件,TCPPMP的源代码!!!2410下可以流畅的解QVGA的H264,MPEG4等格式.
💻 ASM
📖 第 1 页 / 共 2 页
字号:
;*****************************************************************************
;*
;* This program is free software ; you can redistribute it and/or modify
;* it under the terms of the GNU General Public License as published by
;* the Free Software Foundation; either version 2 of the License, or
;* (at your option) any later version.
;*
;* This program is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;* GNU General Public License for more details.
;*
;* You should have received a copy of the GNU General Public License
;* along with this program; if not, write to the Free Software
;* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
;*
;* $Id: idct_arm.asm 284 2005-10-04 08:54:26Z picard $
;*
;* The Core Pocket Media Player
;* Copyright (c) 2004-2005 Gabor Kovacs
;*
;*****************************************************************************

	AREA	|.text|, CODE

	EXPORT IDCT_Block4x8
	EXPORT IDCT_Block8x8
	EXPORT IDCT_Block4x8Swap
	EXPORT IDCT_Block8x8Swap

; r6 Block
; r7,r8 must be saved

	macro
	MCol8 $Name,$Rotate,$Pitch

$Name PROC

; r10 = x0
; r4  = x1
; r2  = x2
; r1  = x3
; r3  = x4
; r12 = x5
; r0  = x6
; r5  = x7
; r11 = x8  
; r9  = tmp (x567)

	ldrsh     r4, [r6, #4*$Pitch]
	ldrsh     r0, [r6, #5*$Pitch]
	ldrsh     r12,[r6, #7*$Pitch]
	ldrsh     r5, [r6, #3*$Pitch]
	ldrsh     r2, [r6, #6*$Pitch]
	ldrsh     r1, [r6, #2*$Pitch]
	ldrsh     r3, [r6, #1*$Pitch]
	ldrsh     r10,[r6]
	if $Rotate
	add		  r6,r6,r9
	endif

	orr       r9, r12, r0
	orr       r9, r9, r5
	orr       r11, r9, r2
	orr       r11, r11, r4
	orrs      r11, r11, r1

	bne       $Name.Mode2 
 	cmp       r3, #0
	bne       $Name.Mode1
	if $Rotate=0
	cmp       r10, #0
	beq       $Name.Zero
	endif
	mov       r10, r10, lsl #3
	strh      r10, [r6]
	strh      r10, [r6, #0x10]
	strh      r10, [r6, #0x20]
	strh      r10, [r6, #0x30]
	strh      r10, [r6, #0x40]
	strh      r10, [r6, #0x50]
	strh      r10, [r6, #0x60]
	strh      r10, [r6, #0x70]
$Name.Zero
	mov		pc,lr

$Name.Mode1							;x0,x4
	mov       r11, r3
	mov       r2, #0x8D, 30  ; 0x234 = 564
	orr       r2, r2, #1
	mov       r9, r3
	mul       r2, r11, r2
	mov       r11, #0xB1, 28  ; 0xB10 = 2832
	orr       r11, r11, #9
	mul       r4, r9, r11
	mov       r11, #0x96, 28  ; 0x960 = 2400
	orr       r11, r11, #8
	mul       r5, r9, r11
	mov       r11, #0x19, 26  ; 0x640 = 1600
	mov       r1, r10, lsl #11
	orr       r11, r11, #9
	mul       r0, r3, r11
	add       r1, r1, #0x80  ; 0x80 = 128

	add       r3, r4, r1
	add       r11, r5, r1
	mov       r3, r3, asr #8
	mov       r11, r11, asr #8
	strh      r3, [r6]
	strh      r11, [r6, #0x10]  ; 0x10 = 16

	add       r3, r0, r1
	add       r11, r2, r1
	mov       r3, r3, asr #8
	mov       r11, r11, asr #8
	strh      r3, [r6, #0x20]  ; 0x20 = 32
	strh      r11, [r6, #0x30]  ; 0x30 = 48

	sub       r3, r1, r2
	sub       r11, r1, r0
	mov       r3, r3, asr #8
	mov       r11, r11, asr #8
	strh      r3, [r6, #0x40]  ; 0x40 = 64
	strh      r11, [r6, #0x50]  ; 0x50 = 80

	sub       r3, r1, r5
	sub       r11, r1, r4
	mov       r3, r3, asr #8
	mov       r11, r11, asr #8
	strh      r3, [r6, #0x60]  ; 0x60 = 96
	strh      r11, [r6, #0x70]  ; 0x70 = 112
	mov		pc,lr

$Name.Mode2						;x0,x1,x2,x3
	orrs      r11, r9, r3
	bne       $Name.Mode3
	mov       r3, r10, lsl #11
	add       r3, r3, #128
	mov       r9, #0x45, 28  ; 0x450 = 1104
	add       r5, r3, r4, lsl #11
	add       r11, r2, r1
	orr       r9, r9, #4
	sub       r3, r3, r4, lsl #11
	mul       r4, r11, r9
	mov       r11, #0x3B, 26  ; 0xEC0 = 3776
	orr       r11, r11, #8
	mul       r11, r2, r11
	sub       r2, r4, r11
	mov       r11, #0x62, 28  ; 0x620 = 1568
	mul       r11, r1, r11
	add       r0, r2, r3
	add       r1, r11, r4
	add       r4, r5, r1
	sub       r3, r3, r2
	sub       r5, r5, r1
	mov       r1, r4, asr #8
	mov       r3, r3, asr #8
	mov       r2, r0, asr #8
	mov       r4, r5, asr #8
	strh      r1, [r6,#0x00]
	strh      r2, [r6,#0x10]
	strh      r3, [r6,#0x20]
	strh      r4, [r6,#0x30]
	strh      r4, [r6,#0x40] 
	strh      r3, [r6,#0x50] 
	strh      r2, [r6,#0x60] 
	strh      r1, [r6,#0x70] 
	mov		pc,lr

$Name.Mode3						;x0,x1,x2,x3,x4,x5,x6,x7

	mov     r9, #0x8D, 30  
	orr     r9, r9, #1			;W7
	add     r11, r12, r3
	mul     r11, r9, r11		;x8 = W7 * (x5 + x4)

	mov     r9, #0x8E, 28  
	orr     r9, r9, #4			;W1-W7
	mla     r3, r9, r3, r11		;x4 = x8 + (W1-W7) * x4

	mvn     r9, #0xD40
	eor     r9, r9, #0xD		;-W1-W7
	mla     r12, r9, r12, r11	;x5 = x8 + (-W1-W7) * x5

	mov     r9, #0x96, 28		;
	orr     r9, r9, #8			;W3
	add     r11, r0, r5
	mul     r11, r9, r11		;x8 = W3 * (x6 + x7)
								
	mvn     r9, #0x310
	eor     r9, r9, #0xE		;W5-W3
	mla     r0, r9, r0, r11		;x6 = x8 + (W5-W3) * x6

	mvn     r9, #0xFB0			;-W3-W5
	mla     r5, r9, r5, r11		;x7 = x8 + (-W3-W5) * x7

	mov     r10, r10, lsl #11
	add     r10, r10, #128		;x0 = (x0 << 11) + 128
	add		r11, r10,r4,lsl #11 ;x8 = x0 + (x1 << 11)
	sub		r10, r10,r4,lsl #11 ;x0 = x0 - (x1 << 11)

	mov     r9, #0x45, 28  
	orr     r9, r9, #4			;W6
	add		r4, r1, r2
	mul		r4, r9, r4			;x1 = W6 * (x3 + x2)

	mvn     r9, #0xEC0
	eor     r9, r9, #0x7		;-W2-W6
	mla     r2, r9, r2, r4		;x2 = x1 + (-W2-W6) * x2

	mov     r9, #0x620			;W2-W6
	mla     r1, r9, r1, r4		;x3 = x1 + (W2-W6) * x3

	add		r4, r3, r0			;x1 = x4 + x6
	sub		r3, r3, r0			;x4 -= x6
	add		r0, r12,r5			;x6 = x5 + x7
	sub		r12,r12,r5			;x5 -= x7
	add		r5, r11,r1			;x7 = x8 + x3
	sub		r11,r11,r1			;x8 -= x3
	add		r1, r10,r2			;x3 = x0 + x2
	sub		r10,r10,r2			;x0 -= x2

	add		r9, r3, r12			;x4 + x5
	sub		r3, r3, r12			;x4 - x5
	mov		r12, #181
	mul		r2, r9, r12			;181 * (x4 + x5)
	mul		r9, r3, r12			;181 * (x4 - x5)
	add		r2, r2, #128		;x2 = 181 * (x4 + x5) + 128
	add		r3, r9, #128		;x4 = 181 * (x4 - x5) + 128

	add		r9,r5,r4			
	sub		r5,r5,r4			
	mov		r9,r9,asr #8		;(x7 + x1) >> 8
	mov		r5,r5,asr #8		;(x7 - x1) >> 8
	strh	r9,[r6,#0x00]
	strh	r5,[r6,#0x70]

	add		r9,r1,r2,asr #8
	sub		r1,r1,r2,asr #8			
	mov		r9,r9,asr #8		;(x3 + x2) >> 8
	mov		r1,r1,asr #8		;(x3 - x2) >> 8
	strh	r9,[r6,#0x10]
	strh	r1,[r6,#0x60]

	add		r9,r10,r3,asr #8			
	sub		r10,r10,r3,asr #8			
	mov		r9,r9,asr #8		;(x0 + x4) >> 8
	mov		r10,r10,asr #8		;(x0 - x4) >> 8
	strh	r9,[r6,#0x20]
	strh	r10,[r6,#0x50]

	add		r9,r11,r0			
	sub		r11,r11,r0			
	mov		r9,r9,asr #8		;(x8 + x6) >> 8
	mov		r11,r11,asr #8		;(x8 - x6) >> 8
	strh	r9,[r6,#0x30]
	strh	r11,[r6,#0x40]

	mov		pc,lr
	mend

	MCol8 Col8,0,16
	MCol8 Col8Swap,1,2

; r0 Block[0]
; r6 Block
; r7 Src
; r8 Dst

	ALIGN 16
RowConst PROC

	add     r0, r0, #0x20  ; 0x20 = 32
	cmp     r7, #0
	mov     r3, r0, asr #6
	beq     RowConst_NoSrc
	cmp     r3, #0
	beq		RowConst_Zero
	blt     RowConst_Sub

RowConst_Add
	ldr     r0, CarryMask
	ldr     r2, [r7]
	orr     r3, r3, r3, lsl #8
	orr     r3, r3, r3, lsl #16
	add     r4, r2, r3
	eor     r11, r2, r3
	and     r2, r3, r2
	bic     r11, r11, r4
	orr     r11, r11, r2
	and     r5, r11, r0
	mov     r12, r5, lsl #1
	sub     r10, r4, r12
	sub     r11, r12, r5, lsr #7
	ldr     r2, [r7, #4]
	orr     r11, r11, r10
	str     r11, [r8]
	add     r4, r2, r3
	eor     r11, r2, r3
	and     r2, r3, r2
	bic     r11, r11, r4
	orr     r11, r11, r2
	and     r5, r11, r0
	mov     r12, r5, lsl #1
	sub     r10, r4, r12
	sub     r11, r12, r5, lsr #7
	orr     r11, r11, r10
	str     r11, [r8, #4]
	add		r7, r7, #8			;source stride
	mov		pc,lr

RowConst_Sub
	ldr     r0, CarryMask
	ldr     r2, [r7]
	rsb     r3, r3, #0
	orr     r3, r3, r3, lsl #8
	orr     r3, r3, r3, lsl #16
	mvn		r2, r2
	add     r4, r2, r3
	eor     r11, r2, r3
	and     r2, r3, r2
	bic     r11, r11, r4
	orr     r11, r11, r2
	and     r5, r11, r0
	mov     r12, r5, lsl #1
	sub     r10, r4, r12
	sub     r11, r12, r5, lsr #7
	ldr     r2, [r7, #4]
	orr     r11, r11, r10
	mvn		r11, r11
	str     r11, [r8]
	mvn		r2, r2
	add     r4, r2, r3
	eor     r11, r2, r3
	and     r2, r3, r2
	bic     r11, r11, r4
	orr     r11, r11, r2
	and     r5, r11, r0
	mov     r12, r5, lsl #1
	sub     r10, r4, r12
	sub     r11, r12, r5, lsr #7
	orr     r11, r11, r10
	mvn		r11, r11
	str     r11, [r8, #4]
	add		r7, r7, #8			;source stride
	mov		pc,lr

RowConst_Zero
	ldr     r1, [r7]
	ldr     r2, [r7, #4]
	str     r1, [r8]
	str     r2, [r8, #4]
	add		r7, r7, #8			;source stride
	mov		pc,lr

RowConst_NoSrc
	cmp     r3, #0
	movmi   r3, #0
	cmppl   r3, #255
	movgt   r3, #255
	orr     r3, r3, r3, lsl #8
	orr     r3, r3, r3, lsl #16
	str     r3, [r8]
	str     r3, [r8, #4]
	mov		pc,lr

	ENDP

CarryMask	DCD 0x80808080
W1			DCW	2841                 ; 2048*sqrt(2)*cos(1*pi/16) 
W3			DCW 2408                 ; 2048*sqrt(2)*cos(3*pi/16) 
nW5			DCW 0xF9B7 ;-1609        ; 2048*sqrt(2)*cos(5*pi/16) 
W6			DCW 1108                 ; 2048*sqrt(2)*cos(6*pi/16) 
W7			DCW 565                  ; 2048*sqrt(2)*cos(7*pi/16) 
W2			DCW 2676                 ; 2048*sqrt(2)*cos(2*pi/16) 

; r6 Block
; r7 Src
; r8 Dst

	ALIGN 16
IDCT_Block4x8Swap PROC

	add		r0, r0, #256
	stmdb   sp!, {r0, r2, r4 - r12, lr}  ; r0=BlockEnd r2=DstStride
	sub		r6, r0, #256	;Block
	mov		r7, r3			;Src
	mov	    r8, r1			;Dst

	mov		r9,#128-0*16+0*2
	bl      Col8Swap  
	mov		r9,#128-1*16+1*2
	add     r6, r6, #1*16-0*2-128
	bl      Col8Swap  
	mov		r9,#128-2*16+2*2
	add     r6, r6, #2*16-1*2-128
	bl      Col8Swap  
	mov		r9,#128-3*16+3*2
	add     r6, r6, #3*16-2*2-128
	bl      Col8Swap 
	sub     r6, r6, #6
	b		Row4_Loop

	ALIGN 16
IDCT_Block4x8 PROC

	add		r0, r0, #128
	stmdb   sp!, {r0, r2, r4 - r12, lr}  ; r0=BlockEnd r2=DstStride
	sub		r6, r0, #128	;Block
	mov		r7, r3			;Src
	mov	    r8, r1			;Dst

	bl      Col8  
	add     r6, r6, #2
	bl      Col8  
	add     r6, r6, #2
	bl      Col8  
	add     r6, r6, #2

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -