📄 idct_arm.asm
字号:
;*****************************************************************************
;*
;* This program is free software ; you can redistribute it and/or modify
;* it under the terms of the GNU General Public License as published by
;* the Free Software Foundation; either version 2 of the License, or
;* (at your option) any later version.
;*
;* This program is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;* GNU General Public License for more details.
;*
;* You should have received a copy of the GNU General Public License
;* along with this program; if not, write to the Free Software
;* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
;*
;* $Id: idct_arm.asm 284 2005-10-04 08:54:26Z picard $
;*
;* The Core Pocket Media Player
;* Copyright (c) 2004-2005 Gabor Kovacs
;*
;*****************************************************************************
AREA |.text|, CODE
EXPORT IDCT_Block4x8
EXPORT IDCT_Block8x8
EXPORT IDCT_Block4x8Swap
EXPORT IDCT_Block8x8Swap
; r6 Block
; r7,r8 must be saved
macro
MCol8 $Name,$Rotate,$Pitch
$Name PROC
; r10 = x0
; r4 = x1
; r2 = x2
; r1 = x3
; r3 = x4
; r12 = x5
; r0 = x6
; r5 = x7
; r11 = x8
; r9 = tmp (x567)
ldrsh r4, [r6, #4*$Pitch]
ldrsh r0, [r6, #5*$Pitch]
ldrsh r12,[r6, #7*$Pitch]
ldrsh r5, [r6, #3*$Pitch]
ldrsh r2, [r6, #6*$Pitch]
ldrsh r1, [r6, #2*$Pitch]
ldrsh r3, [r6, #1*$Pitch]
ldrsh r10,[r6]
if $Rotate
add r6,r6,r9
endif
orr r9, r12, r0
orr r9, r9, r5
orr r11, r9, r2
orr r11, r11, r4
orrs r11, r11, r1
bne $Name.Mode2
cmp r3, #0
bne $Name.Mode1
if $Rotate=0
cmp r10, #0
beq $Name.Zero
endif
mov r10, r10, lsl #3
strh r10, [r6]
strh r10, [r6, #0x10]
strh r10, [r6, #0x20]
strh r10, [r6, #0x30]
strh r10, [r6, #0x40]
strh r10, [r6, #0x50]
strh r10, [r6, #0x60]
strh r10, [r6, #0x70]
$Name.Zero
mov pc,lr
$Name.Mode1 ;x0,x4
mov r11, r3
mov r2, #0x8D, 30 ; 0x234 = 564
orr r2, r2, #1
mov r9, r3
mul r2, r11, r2
mov r11, #0xB1, 28 ; 0xB10 = 2832
orr r11, r11, #9
mul r4, r9, r11
mov r11, #0x96, 28 ; 0x960 = 2400
orr r11, r11, #8
mul r5, r9, r11
mov r11, #0x19, 26 ; 0x640 = 1600
mov r1, r10, lsl #11
orr r11, r11, #9
mul r0, r3, r11
add r1, r1, #0x80 ; 0x80 = 128
add r3, r4, r1
add r11, r5, r1
mov r3, r3, asr #8
mov r11, r11, asr #8
strh r3, [r6]
strh r11, [r6, #0x10] ; 0x10 = 16
add r3, r0, r1
add r11, r2, r1
mov r3, r3, asr #8
mov r11, r11, asr #8
strh r3, [r6, #0x20] ; 0x20 = 32
strh r11, [r6, #0x30] ; 0x30 = 48
sub r3, r1, r2
sub r11, r1, r0
mov r3, r3, asr #8
mov r11, r11, asr #8
strh r3, [r6, #0x40] ; 0x40 = 64
strh r11, [r6, #0x50] ; 0x50 = 80
sub r3, r1, r5
sub r11, r1, r4
mov r3, r3, asr #8
mov r11, r11, asr #8
strh r3, [r6, #0x60] ; 0x60 = 96
strh r11, [r6, #0x70] ; 0x70 = 112
mov pc,lr
$Name.Mode2 ;x0,x1,x2,x3
orrs r11, r9, r3
bne $Name.Mode3
mov r3, r10, lsl #11
add r3, r3, #128
mov r9, #0x45, 28 ; 0x450 = 1104
add r5, r3, r4, lsl #11
add r11, r2, r1
orr r9, r9, #4
sub r3, r3, r4, lsl #11
mul r4, r11, r9
mov r11, #0x3B, 26 ; 0xEC0 = 3776
orr r11, r11, #8
mul r11, r2, r11
sub r2, r4, r11
mov r11, #0x62, 28 ; 0x620 = 1568
mul r11, r1, r11
add r0, r2, r3
add r1, r11, r4
add r4, r5, r1
sub r3, r3, r2
sub r5, r5, r1
mov r1, r4, asr #8
mov r3, r3, asr #8
mov r2, r0, asr #8
mov r4, r5, asr #8
strh r1, [r6,#0x00]
strh r2, [r6,#0x10]
strh r3, [r6,#0x20]
strh r4, [r6,#0x30]
strh r4, [r6,#0x40]
strh r3, [r6,#0x50]
strh r2, [r6,#0x60]
strh r1, [r6,#0x70]
mov pc,lr
$Name.Mode3 ;x0,x1,x2,x3,x4,x5,x6,x7
mov r9, #0x8D, 30
orr r9, r9, #1 ;W7
add r11, r12, r3
mul r11, r9, r11 ;x8 = W7 * (x5 + x4)
mov r9, #0x8E, 28
orr r9, r9, #4 ;W1-W7
mla r3, r9, r3, r11 ;x4 = x8 + (W1-W7) * x4
mvn r9, #0xD40
eor r9, r9, #0xD ;-W1-W7
mla r12, r9, r12, r11 ;x5 = x8 + (-W1-W7) * x5
mov r9, #0x96, 28 ;
orr r9, r9, #8 ;W3
add r11, r0, r5
mul r11, r9, r11 ;x8 = W3 * (x6 + x7)
mvn r9, #0x310
eor r9, r9, #0xE ;W5-W3
mla r0, r9, r0, r11 ;x6 = x8 + (W5-W3) * x6
mvn r9, #0xFB0 ;-W3-W5
mla r5, r9, r5, r11 ;x7 = x8 + (-W3-W5) * x7
mov r10, r10, lsl #11
add r10, r10, #128 ;x0 = (x0 << 11) + 128
add r11, r10,r4,lsl #11 ;x8 = x0 + (x1 << 11)
sub r10, r10,r4,lsl #11 ;x0 = x0 - (x1 << 11)
mov r9, #0x45, 28
orr r9, r9, #4 ;W6
add r4, r1, r2
mul r4, r9, r4 ;x1 = W6 * (x3 + x2)
mvn r9, #0xEC0
eor r9, r9, #0x7 ;-W2-W6
mla r2, r9, r2, r4 ;x2 = x1 + (-W2-W6) * x2
mov r9, #0x620 ;W2-W6
mla r1, r9, r1, r4 ;x3 = x1 + (W2-W6) * x3
add r4, r3, r0 ;x1 = x4 + x6
sub r3, r3, r0 ;x4 -= x6
add r0, r12,r5 ;x6 = x5 + x7
sub r12,r12,r5 ;x5 -= x7
add r5, r11,r1 ;x7 = x8 + x3
sub r11,r11,r1 ;x8 -= x3
add r1, r10,r2 ;x3 = x0 + x2
sub r10,r10,r2 ;x0 -= x2
add r9, r3, r12 ;x4 + x5
sub r3, r3, r12 ;x4 - x5
mov r12, #181
mul r2, r9, r12 ;181 * (x4 + x5)
mul r9, r3, r12 ;181 * (x4 - x5)
add r2, r2, #128 ;x2 = 181 * (x4 + x5) + 128
add r3, r9, #128 ;x4 = 181 * (x4 - x5) + 128
add r9,r5,r4
sub r5,r5,r4
mov r9,r9,asr #8 ;(x7 + x1) >> 8
mov r5,r5,asr #8 ;(x7 - x1) >> 8
strh r9,[r6,#0x00]
strh r5,[r6,#0x70]
add r9,r1,r2,asr #8
sub r1,r1,r2,asr #8
mov r9,r9,asr #8 ;(x3 + x2) >> 8
mov r1,r1,asr #8 ;(x3 - x2) >> 8
strh r9,[r6,#0x10]
strh r1,[r6,#0x60]
add r9,r10,r3,asr #8
sub r10,r10,r3,asr #8
mov r9,r9,asr #8 ;(x0 + x4) >> 8
mov r10,r10,asr #8 ;(x0 - x4) >> 8
strh r9,[r6,#0x20]
strh r10,[r6,#0x50]
add r9,r11,r0
sub r11,r11,r0
mov r9,r9,asr #8 ;(x8 + x6) >> 8
mov r11,r11,asr #8 ;(x8 - x6) >> 8
strh r9,[r6,#0x30]
strh r11,[r6,#0x40]
mov pc,lr
mend
MCol8 Col8,0,16
MCol8 Col8Swap,1,2
; r0 Block[0]
; r6 Block
; r7 Src
; r8 Dst
ALIGN 16
RowConst PROC
add r0, r0, #0x20 ; 0x20 = 32
cmp r7, #0
mov r3, r0, asr #6
beq RowConst_NoSrc
cmp r3, #0
beq RowConst_Zero
blt RowConst_Sub
RowConst_Add
ldr r0, CarryMask
ldr r2, [r7]
orr r3, r3, r3, lsl #8
orr r3, r3, r3, lsl #16
add r4, r2, r3
eor r11, r2, r3
and r2, r3, r2
bic r11, r11, r4
orr r11, r11, r2
and r5, r11, r0
mov r12, r5, lsl #1
sub r10, r4, r12
sub r11, r12, r5, lsr #7
ldr r2, [r7, #4]
orr r11, r11, r10
str r11, [r8]
add r4, r2, r3
eor r11, r2, r3
and r2, r3, r2
bic r11, r11, r4
orr r11, r11, r2
and r5, r11, r0
mov r12, r5, lsl #1
sub r10, r4, r12
sub r11, r12, r5, lsr #7
orr r11, r11, r10
str r11, [r8, #4]
add r7, r7, #8 ;source stride
mov pc,lr
RowConst_Sub
ldr r0, CarryMask
ldr r2, [r7]
rsb r3, r3, #0
orr r3, r3, r3, lsl #8
orr r3, r3, r3, lsl #16
mvn r2, r2
add r4, r2, r3
eor r11, r2, r3
and r2, r3, r2
bic r11, r11, r4
orr r11, r11, r2
and r5, r11, r0
mov r12, r5, lsl #1
sub r10, r4, r12
sub r11, r12, r5, lsr #7
ldr r2, [r7, #4]
orr r11, r11, r10
mvn r11, r11
str r11, [r8]
mvn r2, r2
add r4, r2, r3
eor r11, r2, r3
and r2, r3, r2
bic r11, r11, r4
orr r11, r11, r2
and r5, r11, r0
mov r12, r5, lsl #1
sub r10, r4, r12
sub r11, r12, r5, lsr #7
orr r11, r11, r10
mvn r11, r11
str r11, [r8, #4]
add r7, r7, #8 ;source stride
mov pc,lr
RowConst_Zero
ldr r1, [r7]
ldr r2, [r7, #4]
str r1, [r8]
str r2, [r8, #4]
add r7, r7, #8 ;source stride
mov pc,lr
RowConst_NoSrc
cmp r3, #0
movmi r3, #0
cmppl r3, #255
movgt r3, #255
orr r3, r3, r3, lsl #8
orr r3, r3, r3, lsl #16
str r3, [r8]
str r3, [r8, #4]
mov pc,lr
ENDP
CarryMask DCD 0x80808080
W1 DCW 2841 ; 2048*sqrt(2)*cos(1*pi/16)
W3 DCW 2408 ; 2048*sqrt(2)*cos(3*pi/16)
nW5 DCW 0xF9B7 ;-1609 ; 2048*sqrt(2)*cos(5*pi/16)
W6 DCW 1108 ; 2048*sqrt(2)*cos(6*pi/16)
W7 DCW 565 ; 2048*sqrt(2)*cos(7*pi/16)
W2 DCW 2676 ; 2048*sqrt(2)*cos(2*pi/16)
; r6 Block
; r7 Src
; r8 Dst
ALIGN 16
IDCT_Block4x8Swap PROC
add r0, r0, #256
stmdb sp!, {r0, r2, r4 - r12, lr} ; r0=BlockEnd r2=DstStride
sub r6, r0, #256 ;Block
mov r7, r3 ;Src
mov r8, r1 ;Dst
mov r9,#128-0*16+0*2
bl Col8Swap
mov r9,#128-1*16+1*2
add r6, r6, #1*16-0*2-128
bl Col8Swap
mov r9,#128-2*16+2*2
add r6, r6, #2*16-1*2-128
bl Col8Swap
mov r9,#128-3*16+3*2
add r6, r6, #3*16-2*2-128
bl Col8Swap
sub r6, r6, #6
b Row4_Loop
ALIGN 16
IDCT_Block4x8 PROC
add r0, r0, #128
stmdb sp!, {r0, r2, r4 - r12, lr} ; r0=BlockEnd r2=DstStride
sub r6, r0, #128 ;Block
mov r7, r3 ;Src
mov r8, r1 ;Dst
bl Col8
add r6, r6, #2
bl Col8
add r6, r6, #2
bl Col8
add r6, r6, #2
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -