📄 imdct_l_arm.s
字号:
/****************************************************************************** Copyright (C) 2000-2001 Andre McCurdy <armccurdy@yahoo.co.uk>** This program is free software. you can redistribute it and/or modify* it under the terms of the GNU General Public License as published by* the Free Software Foundation@ either version 2 of the License, or* (at your option) any later version.** This program is distributed in the hope that it will be useful,* but WITHOUT ANY WARRANTY, without even the implied warranty of* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the* GNU General Public License for more details.** You should have received a copy of the GNU General Public License* along with this program@ if not, write to the Free Software* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA******************************************************************************** Notes:********************************************************************************* $Id: imdct_l_arm.S,v 1.7 2001/03/25 20:03:34 rob Rel $** 2001/03/24: Andre McCurdy <armccurdy@yahoo.co.uk>* - Corrected PIC unsafe loading of address of 'imdct36_long_karray'** 2000/09/20: Robert Leslie <rob@mars.org>* - Added a global symbol with leading underscore per suggestion of* Simon Burge to support linking with the a.out format.** 2000/09/15: Robert Leslie <rob@mars.org>* - Fixed a small bug where flags were changed before a conditional branch.** 2000/09/15: Andre McCurdy <armccurdy@yahoo.co.uk>* - Applied Nicolas Pitre's rounding optimisation in all remaining places.** 2000/09/09: Nicolas Pitre <nico@cam.org>* - Optimized rounding + scaling operations.** 2000/08/09: Andre McCurdy <armccurdy@yahoo.co.uk>* - Original created.*****************************************************************************//* On entry: r0 = pointer to 18 element input array r1 = pointer to 36 element output array r2 = windowing block type Stack frame created during execution of the function: Initial Holds: Stack pointer minus: 0 4 lr 8 r11 12 r10 16 r9 20 r8 24 r7 28 r6 32 r5 36 r4 40 r2 : windowing block type 44 ct00 high 48 ct00 low 52 ct01 high 56 ct01 low 60 ct04 high 64 ct04 low 68 ct06 high 72 ct06 low 76 ct05 high 80 ct05 low 84 ct03 high 88 ct03 low 92 -ct05 high 96 -ct05 low 100 -ct07 high 104 -ct07 low 108 ct07 high 112 ct07 low 116 ct02 high 120 ct02 low*/.equ BLOCK_MODE_NORMAL, 0.equ BLOCK_MODE_START, 1.equ BLOCK_MODE_STOP, 3 .equ X0 , 0x00.equ X1 , 0x04.equ X2 , 0x08.equ X3 , 0x0C.equ X4 , 0x10.equ X5 , 0x14.equ X6 , 0x18.equ X7 , 0x1c.equ X8 , 0x20.equ X9 , 0x24.equ X10, 0x28.equ X11, 0x2c.equ X12, 0x30.equ X13, 0x34.equ X14, 0x38.equ X15, 0x3c.equ X16, 0x40.equ X17, 0x44 .equ x0 , 0x00.equ x1 , 0x04.equ x2 , 0x08.equ x3 , 0x0C.equ x4 , 0x10.equ x5 , 0x14.equ x6 , 0x18.equ x7 , 0x1c.equ x8 , 0x20.equ x9 , 0x24.equ x10, 0x28.equ x11, 0x2c.equ x12, 0x30.equ x13, 0x34.equ x14, 0x38.equ x15, 0x3c.equ x16, 0x40.equ x17, 0x44.equ x18, 0x48.equ x19, 0x4c.equ x20, 0x50.equ x21, 0x54.equ x22, 0x58.equ x23, 0x5c.equ x24, 0x60.equ x25, 0x64.equ x26, 0x68.equ x27, 0x6c.equ x28, 0x70.equ x29, 0x74.equ x30, 0x78.equ x31, 0x7c.equ x32, 0x80.equ x33, 0x84.equ x34, 0x88.equ x35, 0x8c.equ K00, 0x0ffc19fd.equ K01, 0x00b2aa3e.equ K02, 0x0fdcf549.equ K03, 0x0216a2a2.equ K04, 0x0f9ee890.equ K05, 0x03768962.equ K06, 0x0f426cb5.equ K07, 0x04cfb0e2.equ K08, 0x0ec835e8.equ K09, 0x061f78aa.equ K10, 0x0e313245.equ K11, 0x07635284.equ K12, 0x0d7e8807.equ K13, 0x0898c779.equ K14, 0x0cb19346.equ K15, 0x09bd7ca0.equ K16, 0x0bcbe352.equ K17, 0x0acf37ad.equ minus_K02, 0xf0230ab7.equ WL0 ,0x00b2aa3e.equ WL1 ,0x0216a2a2.equ WL2 ,0x03768962.equ WL3 ,0x04cfb0e2.equ WL4 ,0x061f78aa.equ WL5 ,0x07635284.equ WL6 ,0x0898c779.equ WL7 ,0x09bd7ca0.equ WL8 ,0x0acf37ad.equ WL9 ,0x0bcbe352.equ WL10,0x0cb19346.equ WL11,0x0d7e8807.equ WL12,0x0e313245.equ WL13,0x0ec835e8.equ WL14,0x0f426cb5.equ WL15,0x0f9ee890.equ WL16,0x0fdcf549.equ WL17,0x0ffc19fd@***************************************************************************** .text .align .global III_imdct_l .global _III_imdct_lIII_imdct_l:_III_imdct_l: stmdb sp!, { r2, r4 - r11, lr } @ all callee saved regs, plus arg3 ldr r4, =K08 @ r4 = K08 ldr r5, =K09 @ r5 = K09 ldr r8, [r0, #X4] @ r8 = X4 ldr r9, [r0, #X13] @ r9 = X13 rsb r6, r4, #0 @ r6 = -K08 rsb r7, r5, #0 @ r7 = -K09 smull r2, r3, r4, r8 @ r2..r3 = (X4 * K08) smlal r2, r3, r5, r9 @ r2..r3 = (X4 * K08) + (X13 * K09) = ct01 smull r10, lr, r8, r5 @ r10..lr = (X4 * K09) smlal r10, lr, r9, r6 @ r10..lr = (X4 * K09) + (X13 * -K08) = ct00 ldr r8, [r0, #X7] @ r8 = X7 ldr r9, [r0, #X16] @ r9 = X16 stmdb sp!, { r2, r3, r10, lr } @ stack ct00_h, ct00_l, ct01_h, ct01_l add r8, r8, r9 @ r8 = (X7 + X16) ldr r9, [r0, #X1] @ r9 = X1 smlal r2, r3, r6, r8 @ r2..r3 = ct01 + ((X7 + X16) * -K08) smlal r2, r3, r7, r9 @ r2..r3 += (X1 * -K09) ldr r7, [r0, #X10] @ r7 = X10 rsbs r10, r10, #0 rsc lr, lr, #0 @ r10..lr = -ct00 smlal r2, r3, r5, r7 @ r2..r3 += (X10 * K09) = ct06 smlal r10, lr, r9, r6 @ r10..lr = -ct00 + ( X1 * -K08) smlal r10, lr, r8, r5 @ r10..lr += ((X7 + X16) * K09) smlal r10, lr, r7, r4 @ r10..lr += ( X10 * K08) = ct04 stmdb sp!, { r2, r3, r10, lr } @ stack ct04_h, ct04_l, ct06_h, ct06_l @---- ldr r7, [r0, #X0] ldr r8, [r0, #X11] ldr r9, [r0, #X12] sub r7, r7, r8 sub r7, r7, r9 @ r7 = (X0 - X11 -X12) = ct14 ldr r9, [r0, #X3] ldr r8, [r0, #X8] ldr r11, [r0, #X15] sub r8, r8, r9 add r8, r8, r11 @ r8 = (X8 - X3 + X15) = ct16 add r11, r7, r8 @ r11 = ct14 + ct16 = ct18 smlal r2, r3, r6, r11 @ r2..r3 = ct06 + ((X0 - X11 - X3 + X15 + X8 - X12) * -K08) ldr r6, [r0, #X2] ldr r9, [r0, #X9] ldr r12, [r0, #X14] sub r6, r6, r9 sub r6, r6, r12 @ r6 = (X2 - X9 - X14) = ct15 ldr r9, [r0, #X5] ldr r12, [r0, #X6] sub r9, r9, r12 ldr r12, [r0, #X17] sub r9, r9, r12 @ r9 = (X5 - X6 - X17) = ct17 add r12, r9, r6 @ r12 = ct15 + ct17 = ct19 smlal r2, r3, r5, r12 @ r2..r3 += ((X2 - X9 + X5 - X6 - X17 - X14) * K09) smlal r10, lr, r11, r5 @ r10..lr = ct04 + (ct18 * K09) smlal r10, lr, r12, r4 @ r10..lr = ct04 + (ct18 * K09) + (ct19 * K08) movs r2, r2, lsr #28 adc r2, r2, r3, lsl #4 @ r2 = bits[59..28] of r2..r3 str r2, [r1, #x22] @ store result x22 movs r10, r10, lsr #28 adc r10, r10, lr, lsl #4 @ r10 = bits[59..28] of r10..lr str r10, [r1, #x4] @ store result x4 @---- ldmia sp, { r2, r3, r4, r5 } @ r2..r3 = ct06, r4..r5 = ct04 (dont update sp) @ r2..r3 = ct06 @ r4..r5 = ct04 @ r6 = ct15 @ r7 = ct14 @ r8 = ct16 @ r9 = ct17 @ r10 = . @ r11 = . @ r12 = . @ lr = . ldr r10, =K03 @ r10 = K03 ldr lr, =K15 @ lr = K15 smlal r2, r3, r10, r7 @ r2..r3 = ct06 + (ct14 * K03) smlal r4, r5, lr, r7 @ r4..r5 = ct04 + (ct14 * K15) ldr r12, =K14 @ r12 = K14 rsb r10, r10, #0 @ r10 = -K03 smlal r2, r3, lr, r6 @ r2..r3 += (ct15 * K15) smlal r4, r5, r10, r6 @ r4..r5 += (ct15 * -K03) smlal r2, r3, r12, r8 @ r2..r3 += (ct16 * K14) ldr r11, =minus_K02 @ r11 = -K02 rsb r12, r12, #0 @ r12 = -K14 smlal r4, r5, r12, r9 @ r4..r5 += (ct17 * -K14) smlal r2, r3, r11, r9 @ r2..r3 += (ct17 * -K02) smlal r4, r5, r11, r8 @ r4..r5 += (ct16 * -K02) movs r2, r2, lsr #28 adc r2, r2, r3, lsl #4 @ r2 = bits[59..28] of r2..r3 str r2, [r1, #x7] @ store result x7
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -