📄 imdct_l_arm.s
字号:
movs r4, r4, lsr #28
adc r4, r4, r5, lsl #4 @ r4 = bits[59..28] of r4..r5
str r4, [r1, #x1] @ store result x1
@----
ldmia sp, { r2, r3, r4, r5 } @ r2..r3 = ct06, r4..r5 = ct04 (dont update sp)
@ r2..r3 = ct06
@ r4..r5 = ct04
@ r6 = ct15
@ r7 = ct14
@ r8 = ct16
@ r9 = ct17
@ r10 = -K03
@ r11 = -K02
@ r12 = -K14
@ lr = K15
rsbs r2, r2, #0
rsc r3, r3, #0 @ r2..r3 = -ct06
smlal r2, r3, r12, r7 @ r2..r3 = -ct06 + (ct14 * -K14)
smlal r2, r3, r10, r8 @ r2..r3 += (ct16 * -K03)
smlal r4, r5, r12, r6 @ r4..r5 = ct04 + (ct15 * -K14)
smlal r4, r5, r10, r9 @ r4..r5 += (ct17 * -K03)
smlal r4, r5, lr, r8 @ r4..r5 += (ct16 * K15)
smlal r4, r5, r11, r7 @ r4..r5 += (ct14 * -K02)
rsb lr, lr, #0 @ lr = -K15
rsb r11, r11, #0 @ r11 = K02
smlal r2, r3, lr, r9 @ r2..r3 += (ct17 * -K15)
smlal r2, r3, r11, r6 @ r2..r3 += (ct15 * K02)
movs r4, r4, lsr #28
adc r4, r4, r5, lsl #4 @ r4 = bits[59..28] of r4..r5
str r4, [r1, #x25] @ store result x25
movs r2, r2, lsr #28
adc r2, r2, r3, lsl #4 @ r2 = bits[59..28] of r2..r3
str r2, [r1, #x19] @ store result x19
@----
ldr r2, [sp, #16] @ r2 = ct01_l
ldr r3, [sp, #20] @ r3 = ct01_h
ldr r6, [r0, #X1]
ldr r8, [r0, #X7]
ldr r9, [r0, #X10]
ldr r7, [r0, #X16]
rsbs r2, r2, #0
rsc r3, r3, #0 @ r2..r3 = -ct01
mov r4, r2
mov r5, r3 @ r4..r5 = -ct01
@ r2..r3 = -ct01
@ r4..r5 = -ct01
@ r6 = X1
@ r7 = X16
@ r8 = X7
@ r9 = X10
@ r10 = -K03
@ r11 = K02
@ r12 = -K14
@ lr = -K15
smlal r4, r5, r12, r7 @ r4..r5 = -ct01 + (X16 * -K14)
smlal r2, r3, lr, r9 @ r2..r3 = -ct01 + (X10 * -K15)
smlal r4, r5, r10, r8 @ r4..r5 += (X7 * -K03)
smlal r2, r3, r10, r7 @ r2..r3 += (X16 * -K03)
smlal r4, r5, r11, r9 @ r4..r5 += (X10 * K02)
smlal r2, r3, r12, r8 @ r2..r3 += (X7 * -K14)
rsb lr, lr, #0 @ lr = K15
rsb r11, r11, #0 @ r11 = -K02
smlal r4, r5, lr, r6 @ r4..r5 += (X1 * K15) = ct05
smlal r2, r3, r11, r6 @ r2..r3 += (X1 * -K02) = ct03
stmdb sp!, { r2, r3, r4, r5 } @ stack ct05_h, ct05_l, ct03_h, ct03_l
rsbs r4, r4, #0
rsc r5, r5, #0 @ r4..r5 = -ct05
stmdb sp!, { r4, r5 } @ stack -ct05_h, -ct05_l
ldr r2, [sp, #48] @ r2 = ct00_l
ldr r3, [sp, #52] @ r3 = ct00_h
rsb r10, r10, #0 @ r10 = K03
rsbs r4, r2, #0
rsc r5, r3, #0 @ r4..r5 = -ct00
@ r2..r3 = ct00
@ r4..r5 = -ct00
@ r6 = X1
@ r7 = X16
@ r8 = X7
@ r9 = X10
@ r10 = K03
@ r11 = -K02
@ r12 = -K14
@ lr = K15
smlal r4, r5, r10, r6 @ r4..r5 = -ct00 + (X1 * K03)
smlal r2, r3, r10, r9 @ r2..r3 = ct00 + (X10 * K03)
smlal r4, r5, r12, r9 @ r4..r5 += (X10 * -K14)
smlal r2, r3, r12, r6 @ r2..r3 += (X1 * -K14)
smlal r4, r5, r11, r7 @ r4..r5 += (X16 * -K02)
smlal r4, r5, lr, r8 @ r4..r5 += (X7 * K15) = ct07
rsb lr, lr, #0 @ lr = -K15
rsb r11, r11, #0 @ r11 = K02
smlal r2, r3, r11, r8 @ r2..r3 += (X7 * K02)
smlal r2, r3, lr, r7 @ r2..r3 += (X16 * -K15) = ct02
rsbs r6, r4, #0
rsc r7, r5, #0 @ r6..r7 = -ct07
stmdb sp!, { r2 - r7 } @ stack -ct07_h, -ct07_l, ct07_h, ct07_l, ct02_h, ct02_l
@----
add r2, pc, #(imdct36_long_karray-.-8) @ r2 = base address of Knn array (PIC safe ?)
loop:
ldr r12, [r0, #X0]
ldmia r2!, { r5 - r11 } @ first 7 words from Karray element
smull r3, r4, r5, r12 @ sum = (Kxx * X0)
ldr r12, [r0, #X2]
ldr r5, [r0, #X3]
smlal r3, r4, r6, r12 @ sum += (Kxx * X2)
ldr r12, [r0, #X5]
ldr r6, [r0, #X6]
smlal r3, r4, r7, r5 @ sum += (Kxx * X3)
smlal r3, r4, r8, r12 @ sum += (Kxx * X5)
ldr r12, [r0, #X8]
ldr r5, [r0, #X9]
smlal r3, r4, r9, r6 @ sum += (Kxx * X6)
smlal r3, r4, r10, r12 @ sum += (Kxx * X8)
smlal r3, r4, r11, r5 @ sum += (Kxx * X9)
ldmia r2!, { r5 - r10 } @ final 6 words from Karray element
ldr r11, [r0, #X11]
ldr r12, [r0, #X12]
smlal r3, r4, r5, r11 @ sum += (Kxx * X11)
ldr r11, [r0, #X14]
ldr r5, [r0, #X15]
smlal r3, r4, r6, r12 @ sum += (Kxx * X12)
smlal r3, r4, r7, r11 @ sum += (Kxx * X14)
ldr r11, [r0, #X17]
smlal r3, r4, r8, r5 @ sum += (Kxx * X15)
smlal r3, r4, r9, r11 @ sum += (Kxx * X17)
add r5, sp, r10, lsr #16 @ create index back into stack for required ctxx
ldmia r5, { r6, r7 } @ r6..r7 = ctxx
mov r8, r10, lsl #16 @ push ctxx index off the top end
adds r3, r3, r6 @ add low words
adc r4, r4, r7 @ add high words, with carry
movs r3, r3, lsr #28
adc r3, r3, r4, lsl #4 @ r3 = bits[59..28] of r3..r4
str r3, [r1, r8, lsr #24] @ push completion flag off the bottom end
movs r8, r8, lsl #8 @ push result location index off the top end
beq loop @ loop back if completion flag not set
b imdct_l_windowing @ branch to windowing stage if looping finished
imdct36_long_karray:
.word K17, -K13, K10, -K06, -K05, K01, -K00, K04, -K07, K11, K12, -K16, 0x00000000
.word K13, K07, K16, K01, K10, -K05, K04, -K11, K00, -K17, K06, -K12, 0x00200800
.word K11, K17, K05, K12, -K01, K06, -K07, K00, -K13, K04, -K16, K10, 0x00200c00
.word K07, K00, -K12, K05, -K16, -K10, K11, -K17, K04, K13, K01, K06, 0x00001400
.word K05, K10, -K00, -K17, K07, -K13, K12, K06, -K16, K01, -K11, -K04, 0x00181800
.word K01, K05, -K07, -K11, K13, K17, -K16, -K12, K10, K06, -K04, -K00, 0x00102000
.word -K16, K12, -K11, K07, K04, -K00, -K01, K05, -K06, K10, K13, -K17, 0x00284800
.word -K12, K06, K17, -K00, -K11, K04, K05, -K10, K01, K16, -K07, -K13, 0x00085000
.word -K10, K16, K04, -K13, -K00, K07, K06, -K01, -K12, -K05, K17, K11, 0x00105400
.word -K06, -K01, K13, K04, K17, -K11, -K10, -K16, -K05, K12, K00, K07, 0x00185c00
.word -K04, -K11, -K01, K16, K06, K12, K13, -K07, -K17, -K00, -K10, -K05, 0x00006000
.word -K00, -K04, -K06, -K10, -K12, -K16, -K17, -K13, -K11, -K07, -K05, -K01, 0x00206801
@----
@-------------------------------------------------------------------------
@----
imdct_l_windowing:
ldr r11, [sp, #80] @ fetch function parameter 3 from out of the stack
ldmia r1!, { r0, r2 - r9 } @ load 9 words from x0, update pointer
@ r0 = x0
@ r1 = &x[9]
@ r2 = x1
@ r3 = x2
@ r4 = x3
@ r5 = x4
@ r6 = x5
@ r7 = x6
@ r8 = x7
@ r9 = x8
@ r10 = .
@ r11 = window mode: (0 == normal), (1 == start block), (3 == stop block)
@ r12 = .
@ lr = .
cmp r11, #BLOCK_MODE_STOP @ setup flags
rsb r10, r0, #0 @ r10 = -x0 (DONT change flags !!)
beq stop_block_x0_to_x17
@ start and normal blocks are treated the same for x[0]..x[17]
normal_block_x0_to_x17:
ldr r12, =WL9 @ r12 = window_l[9]
rsb r0, r9, #0 @ r0 = -x8
rsb r9, r2, #0 @ r9 = -x1
rsb r2, r8, #0 @ r2 = -x7
rsb r8, r3, #0 @ r8 = -x2
rsb r3, r7, #0 @ r3 = -x6
rsb r7, r4, #0 @ r7 = -x3
rsb r4, r6, #0 @ r4 = -x5
rsb r6, r5, #0 @ r6 = -x4
@ r0 = -x8
@ r1 = &x[9]
@ r2 = -x7
@ r3 = -x6
@ r4 = -x5
@ r5 = .
@ r6 = -x4
@ r7 = -x3
@ r8 = -x2
@ r9 = -x1
@ r10 = -x0
@ r11 = window mode: (0 == normal), (1 == start block), (3 == stop block)
@ r12 = window_l[9]
@ lr = .
smull r5, lr, r12, r0 @ r5..lr = (window_l[9] * (x[9] == -x[8]))
ldr r12, =WL10 @ r12 = window_l[10]
movs r5, r5, lsr #28
adc r0, r5, lr, lsl #4 @ r0 = bits[59..28] of windowed x9
smull r5, lr, r12, r2 @ r5..lr = (window_l[10] * (x[10] == -x[7]))
ldr r12, =WL11 @ r12 = window_l[11]
movs r5, r5, lsr #28
adc r2, r5, lr, lsl #4 @ r2 = bits[59..28] of windowed x10
smull r5, lr, r12, r3 @ r5..lr = (window_l[11] * (x[11] == -x[6]))
ldr r12, =WL12 @ r12 = window_l[12]
movs r5, r5, lsr #28
adc r3, r5, lr, lsl #4 @ r3 = bits[59..28] of windowed x11
smull r5, lr, r12, r4 @ r5..lr = (window_l[12] * (x[12] == -x[5]))
ldr r12, =WL13 @ r12 = window_l[13]
movs r5, r5, lsr #28
adc r4, r5, lr, lsl #4 @ r4 = bits[59..28] of windowed x12
smull r5, lr, r12, r6 @ r5..lr = (window_l[13] * (x[13] == -x[4]))
ldr r12, =WL14 @ r12 = window_l[14]
movs r5, r5, lsr #28
adc r6, r5, lr, lsl #4 @ r6 = bits[59..28] of windowed x13
smull r5, lr, r12, r7 @ r5..lr = (window_l[14] * (x[14] == -x[3]))
ldr r12, =WL15 @ r12 = window_l[15]
movs r5, r5, lsr #28
adc r7, r5, lr, lsl #4 @ r7 = bits[59..28] of windowed x14
smull r5, lr, r12, r8 @ r5..lr = (window_l[15] * (x[15] == -x[2]))
ldr r12, =WL16 @ r12 = window_l[16]
movs r5, r5, lsr #28
adc r8, r5, lr, lsl #4 @ r8 = bits[59..28] of windowed x15
smull r5, lr, r12, r9 @ r5..lr = (window_l[16] * (x[16] == -x[1]))
ldr r12, =WL17 @ r12 = window_l[17]
movs r5, r5, lsr #28
adc r9, r5, lr, lsl #4 @ r9 = bits[59..28] of windowed x16
smull r5, lr, r12, r10 @ r5..lr = (window_l[17] * (x[17] == -x[0]))
ldr r12, =WL0 @ r12 = window_l[0]
movs r5, r5, lsr #28
adc r10, r5, lr, lsl #4 @ r10 = bits[59..28] of windowed x17
stmia r1, { r0, r2 - r4, r6 - r10 } @ store windowed x[9] .. x[17]
ldmdb r1!, { r0, r2 - r9 } @ load 9 words downto (and including) x0
smull r10, lr, r12, r0 @ r10..lr = (window_l[0] * x[0])
ldr r12, =WL1 @ r12 = window_l[1]
movs r10, r10, lsr #28
adc r0, r10, lr, lsl #4 @ r0 = bits[59..28] of windowed x0
smull r10, lr, r12, r2 @ r10..lr = (window_l[1] * x[1])
ldr r12, =WL2 @ r12 = window_l[2]
movs r10, r10, lsr #28
adc r2, r10, lr, lsl #4 @ r2 = bits[59..28] of windowed x1
smull r10, lr, r12, r3 @ r10..lr = (window_l[2] * x[2])
ldr r12, =WL3 @ r12 = window_l[3]
movs r10, r10, lsr #28
adc r3, r10, lr, lsl #4 @ r3 = bits[59..28] of windowed x2
smull r10, lr, r12, r4 @ r10..lr = (window_l[3] * x[3])
ldr r12, =WL4 @ r12 = window_l[4]
movs r10, r10, lsr #28
adc r4, r10, lr, lsl #4 @ r4 = bits[59..28] of windowed x3
smull r10, lr, r12, r5 @ r10..lr = (window_l[4] * x[4])
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -