📄 asmpoly.s
字号:
;/* ***** BEGIN LICENSE BLOCK ***** ; * Version: RCSL 1.0/RPSL 1.0 ; * ; * Portions Copyright (c) 1995-2002 RealNetworks, Inc. All Rights Reserved. ; * ; * The contents of this file, and the files included with this file, are ; * subject to the current version of the RealNetworks Public Source License ; * Version 1.0 (the "RPSL") available at ; * http://www.helixcommunity.org/content/rpsl unless you have licensed ; * the file under the RealNetworks Community Source License Version 1.0 ; * (the "RCSL") available at http://www.helixcommunity.org/content/rcsl, ; * in which case the RCSL will apply. You may also obtain the license terms ; * directly from RealNetworks. You may not use this file except in ; * compliance with the RPSL or, if you have a valid RCSL with RealNetworks ; * applicable to this file, the RCSL. Please see the applicable RPSL or ; * RCSL for the rights, obligations and limitations governing use of the ; * contents of the file. ; * ; * This file is part of the Helix DNA Technology. RealNetworks is the ; * developer of the Original Code and owns the copyrights in the portions ; * it created. ; * ; * This file, and the files included with this file, is distributed and made ; * available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER ; * EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL SUCH WARRANTIES, ; * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS ; * FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. ; * ; * Technology Compatibility Kit Test Suite(s) Location: ; * http://www.helixcommunity.org/content/tck ; * ; * Contributor(s): ; * ; * ***** END LICENSE BLOCK ***** */ AREA |.text|, CODE, READONLYPCM RN r0VB1 RN r1COEF RN r2VLO RN r0 ; must push PCM ptr to stack during inner looopVHI RN r3 ; temp variableSUM1LL RN r4SUM1LH RN r5SUM2LL RN r6SUM2LH RN r7SUM1RL RN r8SUM1RH RN r9SUM2RL RN r10SUM2RH RN r11CF1 RN r12CF2 RN r14 SIGN RN r12 ; used for clipping - after discarding CF1MAXPOS RN r14 ; used for clipping - after discarding CF2I RN r12 ; overlay loop counter with CF1, SIGN GBLA RNDVALRNDVAL SETA (1 << ((32 - 12) + (6 - 1))) ; C64TOS - clip 64-bit accumulator to short (no rounding) ; xl, xh = value (lo 32, hi 32) ; input assumed to have 6 fraction bits ; sign = temp variable to use for sign ; maxPos = 0x00007fff (takes 2 instr. to generate - calculating ; once and using repeatedly saves if you do several CTOS in a row) MACRO C64TOS $xl, $xh, $sign, $maxPos mov $xl, $xl, lsr #(20+6) orr $xl, $xl, $xh, lsl #(12-6) mov $sign, $xl, ASR #31 cmp $sign, $xl, ASR #15 eorne $xl, $sign, $maxPos MEND ; C64TOS ; MC0S - process 2 taps, 1 sample per channel (sample 0) ; x = vb1 offset MACRO MC0S $x ldr CF1, [COEF], #4 ldr CF2, [COEF], #4 ldr VLO, [VB1, #(4*($x))] ldr VHI, [VB1, #(4*(23 - $x))] smlal SUM1LL, SUM1LH, VLO, CF1 ldr VLO, [VB1, #(4*(32 + $x))] rsb CF2, CF2, #0 smlal SUM1LL, SUM1LH, VHI, CF2 ldr VHI, [VB1, #(4*(32 + 23 - $x))] smlal SUM1RL, SUM1RH, VLO, CF1 smlal SUM1RL, SUM1RH, VHI, CF2 MEND ; MC0S ; MC1S - process 2 taps, 1 sample per channel (sample 16) ; x = vb1 offset MACRO MC1S $x ldr CF1, [COEF], #4 ldr VLO, [VB1, #(4*($x))] ldr VHI, [VB1, #(4*(32 + $x))] smlal SUM1LL, SUM1LH, VLO, CF1 smlal SUM1RL, SUM1RH, VHI, CF1 MEND ; MC1S ; MC2S - process 2 taps, 2 samples per channel ; x = vb1 offset MACRO MC2S $x ; load data as far as possible in advance of using it ldr CF1, [COEF], #4 ldr CF2, [COEF], #4 ldr VLO, [VB1, #(4*($x))] ldr VHI, [VB1, #(4*(23 - $x))] smlal SUM1LL, SUM1LH, VLO, CF1 smlal SUM2LL, SUM2LH, VLO, CF2 rsb CF2, CF2, #0 smlal SUM2LL, SUM2LH, VHI, CF1 smlal SUM1LL, SUM1LH, VHI, CF2 ldr VHI, [VB1, #(4*(32 + 23 - $x))] ldr VLO, [VB1, #(4*(32 + $x))] smlal SUM1RL, SUM1RH, VHI, CF2 smlal SUM2RL, SUM2RH, VHI, CF1 rsb CF2, CF2, #0 smlal SUM1RL, SUM1RH, VLO, CF1 smlal SUM2RL, SUM2RH, VLO, CF2 MEND ; MC2S; void PolyphaseStereo(short *pcm, int *vbuf, const int *coefBase)xmp3_PolyphaseStereo FUNCTION EXPORT xmp3_PolyphaseStereo stmfd sp!, {r4-r11, r14} ; clear out stack space for 2 local variables (4 bytes each) sub sp, sp, #8 str PCM, [sp, #4] ; sp[1] = pcm pointer ; special case, output sample 0 mov SUM1LL, #RNDVAL ; load rndVal (low 32) mov SUM1RL, #RNDVAL ; load rndVal (low 32) mov SUM1LH, #0 mov SUM1RH, #0 MC0S 0 MC0S 1 MC0S 2 MC0S 3 MC0S 4 MC0S 5 MC0S 6 MC0S 7 ldr PCM, [sp, #4] ; load pcm pointer mov MAXPOS, #0x7f00 orr MAXPOS, MAXPOS, #0xff C64TOS SUM1LL, SUM1LH, SIGN, MAXPOS C64TOS SUM1RL, SUM1RH, SIGN, MAXPOS strh SUM1LL, [PCM, #(2*0)] strh SUM1RL, [PCM, #(2*1)] ; special case, output sample 16 add COEF, COEF, #(4*(256-16)) ; coef = coefBase + 256 (was coefBase + 16 after MC0S block) add VB1, VB1, #(4*1024) ; vb1 = vbuf + 64*16 mov SUM1LL, #RNDVAL ; load rndVal (low 32) mov SUM1RL, #RNDVAL ; load rndVal (low 32) mov SUM1LH, #0 mov SUM1RH, #0 MC1S 0 MC1S 1 MC1S 2 MC1S 3 MC1S 4 MC1S 5 MC1S 6 MC1S 7 ldr PCM, [sp, #4] ; load pcm pointer mov MAXPOS, #0x7f00 orr MAXPOS, MAXPOS, #0xff C64TOS SUM1LL, SUM1LH, SIGN, MAXPOS C64TOS SUM1RL, SUM1RH, SIGN, MAXPOS strh SUM1LL, [PCM, #(2*(2*16+0))] strh SUM1RL, [PCM, #(2*(2*16+1))] ; main convolution loop: sum1L = samples 1, 2, 3, ... 15 sum2L = samples 31, 30, ... 17 sub COEF, COEF, #(4*(264-16)) ; coef = coefBase + 16 (was coefBase + 264 after MC1S block) sub VB1, VB1, #(4*(1024-64)) ; vb1 = vbuf + 64 (was vbuf + 64*16 after MC1S block) mov I, #15 ; loop counter, count down add PCM, PCM, #(2*2) ; pcm+=2 LoopPS str I, [sp, #0] ; sp[0] = i (loop counter) str PCM, [sp, #4] ; sp[1] = pcm (pointer to pcm buffer) mov SUM1LL, #RNDVAL ; load rndVal (low 32) mov SUM1RL, #RNDVAL ; load rndVal (low 32) mov SUM2LL, #RNDVAL ; load rndVal (low 32) mov SUM2RL, #RNDVAL ; load rndVal (low 32) mov SUM1LH, #0 mov SUM1RH, #0 mov SUM2LH, #0 mov SUM2RH, #0 MC2S 0 MC2S 1 MC2S 2 MC2S 3 MC2S 4 MC2S 5 MC2S 6 MC2S 7 add VB1, VB1, #(4*64) ; vb1 += 64 ldr PCM, [sp, #4] ; load pcm pointer mov MAXPOS, #0x7f00 orr MAXPOS, MAXPOS, #0xff C64TOS SUM1LL, SUM1LH, SIGN, MAXPOS C64TOS SUM1RL, SUM1RH, SIGN, MAXPOS C64TOS SUM2LL, SUM2LH, SIGN, MAXPOS C64TOS SUM2RL, SUM2RH, SIGN, MAXPOS ldr I, [sp, #0] ; load loop counter add CF2, PCM, I, lsl #3 ; CF2 = PCM + 4*i (short offset) strh SUM2LL, [CF2], #2 ; *(pcm + 2*2*i + 0) strh SUM2RL, [CF2], #2 ; *(pcm + 2*2*i + 1) strh SUM1LL, [PCM], #2 ; *(pcm + 0) strh SUM1RL, [PCM], #2 ; *(pcm + 1) subs I, I, #1 bne LoopPS ; restore stack pointer add sp, sp, #8 ldmfd sp!, {r4-r11, pc} ENDFUNC;; MONO PROCESSING ; MC0M - process 2 taps, 1 sample (sample 0) ; x = vb1 offset MACRO MC0M $x ldr CF1, [COEF], #4 ldr CF2, [COEF], #4 ldr VLO, [VB1, #(4*($x))] ldr VHI, [VB1, #(4*(23 - $x))] rsb CF2, CF2, #0 smlal SUM1LL, SUM1LH, VLO, CF1 smlal SUM1LL, SUM1LH, VHI, CF2 MEND ; MC0M ; MC1M - process 2 taps, 1 sample (sample 16) ; x = vb1 offset MACRO MC1M $x ldr CF1, [COEF], #4 ldr VLO, [VB1, #(4*($x))] smlal SUM1LL, SUM1LH, VLO, CF1 MEND ; MC1M ; MC2M - process 2 taps, 2 samples ; x = vb1 offset MACRO MC2M $x ; load data as far as possible in advance of using it ldr CF1, [COEF], #4 ldr CF2, [COEF], #4 ldr VLO, [VB1, #(4*($x))] ldr VHI, [VB1, #(4*(23 - $x))] smlal SUM1LL, SUM1LH, VLO, CF1 smlal SUM2LL, SUM2LH, VLO, CF2 rsb CF2, CF2, #0 smlal SUM1LL, SUM1LH, VHI, CF2 smlal SUM2LL, SUM2LH, VHI, CF1 MEND ; MC2M; void PolyphaseMono(short *pcm, int *vbuf, const int *coefBase)xmp3_PolyphaseMono FUNCTION EXPORT xmp3_PolyphaseMono stmfd sp!, {r4-r11, r14} ; clear out stack space for 4 local variables (4 bytes each) sub sp, sp, #8 str PCM, [sp, #4] ; sp[1] = pcm pointer ; special case, output sample 0 mov SUM1LL, #RNDVAL ; load rndVal (low 32) mov SUM1LH, #0 MC0M 0 MC0M 1 MC0M 2 MC0M 3 MC0M 4 MC0M 5 MC0M 6 MC0M 7 ldr PCM, [sp, #4] ; load pcm pointer mov MAXPOS, #0x7f00 orr MAXPOS, MAXPOS, #0xff C64TOS SUM1LL, SUM1LH, SIGN, MAXPOS strh SUM1LL, [PCM, #(2*0)] ; special case, output sample 16 add COEF, COEF, #(4*(256-16)) ; coef = coefBase + 256 (was coefBase + 16 after MC0M block) add VB1, VB1, #(4*1024) ; vb1 = vbuf + 64*16 mov SUM1LL, #RNDVAL ; load rndVal (low 32) mov SUM1LH, #0 MC1M 0 MC1M 1 MC1M 2 MC1M 3 MC1M 4 MC1M 5 MC1M 6 MC1M 7 ldr PCM, [sp, #4] ; load pcm pointer mov MAXPOS, #0x7f00 orr MAXPOS, MAXPOS, #0xff C64TOS SUM1LL, SUM1LH, SIGN, MAXPOS strh SUM1LL, [PCM, #(2*16)] ; main convolution loop: sum1L = samples 1, 2, 3, ... 15 sum2L = samples 31, 30, ... 17 sub COEF, COEF, #(4*(264-16)) ; coef = coefBase + 16 (was coefBase + 264 after MC1M block) sub VB1, VB1, #(4*(1024-64)) ; vb1 = vbuf + 64 (was vbuf + 64*16 after MC1M block) mov I, #15 ; loop counter, count down add PCM, PCM, #(2) ; pcm++ LoopPM str I, [sp, #0] ; sp[0] = i (loop counter) str PCM, [sp, #4] ; sp[1] = pcm (pointer to pcm buffer) mov SUM1LL, #RNDVAL ; load rndVal (low 32) mov SUM2LL, #RNDVAL ; load rndVal (low 32) mov SUM1LH, #0 mov SUM2LH, #0 MC2M 0 MC2M 1 MC2M 2 MC2M 3 MC2M 4 MC2M 5 MC2M 6 MC2M 7 add VB1, VB1, #(4*64) ; vb1 += 64 ldr PCM, [sp, #4] ; load pcm pointer mov MAXPOS, #0x7f00 orr MAXPOS, MAXPOS, #0xff C64TOS SUM1LL, SUM1LH, SIGN, MAXPOS C64TOS SUM2LL, SUM2LH, SIGN, MAXPOS ldr I, [sp, #0] ; load loop counter add CF2, PCM, I, lsl #2 ; CF2 = PCM + 2*i (short offset) strh SUM2LL, [CF2], #2 ; *(pcm + 2*i + 0) strh SUM1LL, [PCM], #2 ; *(pcm + 0) ; pcm++ subs I, I, #1 bne LoopPM ; restore stack pointer add sp, sp, #8 ldmfd sp!, {r4-r11, pc} ENDFUNC END
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -