📄 idct_arm.asm
字号:
bl Col8
sub r6, r6, #6
Row4_Loop
ldrsh r4, [r6, #4] ;x3
ldrsh r5, [r6, #6] ;x7
ldrsh r3, [r6, #2] ;x4
ldrsh r0, [r6] ;x0
orr r11, r5, r4
orrs r11, r11, r3
bne Row4_NoConst
bl RowConst
b Row4_Next
Row4_NoConst
cmp r7, #0
ldrsh r10, W7
ldrsh r11, W1
mov r2, #4
add r0, r0, #32
mov r0, r0, lsl #8 ;x0
mla r14, r3, r10, r2 ;x5 = x4 * W7 + 4
ldrsh r10, W3
mla r3, r11, r3, r2 ;x4 = x4 * W1 + 4
mov r14, r14, asr #3 ;x5 >>= 3
ldrsh r11, nW5
mla r12, r5, r10, r2 ;x6 = x7 * W3 + 4
mov r3, r3, asr #3 ;x4 >>= 3
ldrsh r10, W6
mla r5, r11, r5, r2 ;x7 = x7 * -W5 + 4
ldrsh r11, W2
add r9, r3, r12, asr #3 ;x1 = x4 + (x6 >> 3)
sub r3, r3, r12, asr #3 ;x4 = x4 - (x6 >> 3)
mla r12, r4, r10, r2 ;x2 = x3 * W6 + 4
mla r4, r11, r4, r2 ;x3 = x3 * W2 + 4
add r2, r14, r5, asr #3 ;x6 = x5 + (x7 >> 3)
sub r5, r14, r5, asr #3 ;x5 = x5 - (x7 >> 3)
add r14, r0, r4, asr #3 ;x7 = x0 + (x3 >> 3)
sub r4, r0, r4, asr #3 ;x8 = x0 - (x3 >> 3)
add r10, r0, r12, asr #3;x3 = x0 + (x2 >> 3)
sub r0, r0, r12, asr #3 ;x0 = x0 - (x2 >> 3)
add r1, r5, r3
mov r11, #181
mul r12, r1, r11 ;x2 = 181 * (x5 + x4)
sub r3, r3, r5
mul r1, r3, r11 ;x4 = 181 * (x4 - x5)
add r12, r12, #128 ;x2 += 128
add r3, r1, #128 ;x4 += 128
add r1, r14, r9 ;x5 = x7 + x1
sub r5, r14, r9 ;x1 = x7 - x1
add r11, r10, r12, asr #8 ;x7 = x3 + (x2 >> 8)
sub r14, r10, r12, asr #8 ;x2 = x3 - (x2 >> 8)
add r9, r0, r3, asr #8 ;x3 = x0 + (x4 >> 8)
sub r3, r0, r3, asr #8 ;x4 = x0 - (x4 >> 8)
add r12, r4, r2 ;x0 = x8 + x6
sub r4, r4, r2 ;x6 = x8 - x6
beq Row4_NoSrc
ldrb r0, [r7]
ldrb r2, [r7, #7]
ldrb r10, [r7, #1]
add r1, r0, r1, asr #14
add r5, r2, r5, asr #14
add r11, r10, r11, asr #14
ldrb r2, [r7, #6]
ldrb r0, [r7, #2]
ldrb r10, [r7, #5]
add r14, r2, r14, asr #14
add r9, r0, r9, asr #14
ldrb r0, [r7, #3]
ldrb r2, [r7, #4]
add r3, r10, r3, asr #14
add r12, r0, r12, asr #14
add r4, r2, r4, asr #14
add r7, r7, #8 ;source stride
Row4_Sat
orr r0, r5, r14
orr r0, r0, r4
orr r0, r0, r1
orr r0, r0, r12
orr r0, r0, r11
orr r0, r0, r9
orr r0, r0, r3
bics r0, r0, #0xFF ; 0xFF = 255
beq Row4_Write
mov r0, #0xFFFFFF00
tst r1, r0
movne r1, #0xFF
movmi r1, #0x00
tst r11, r0
movne r11, #0xFF
movmi r11, #0x00
tst r9, r0
movne r9, #0xFF
movmi r9, #0x00
tst r12, r0
movne r12, #0xFF
movmi r12, #0x00
tst r4, r0
movne r4, #0xFF
movmi r4, #0x00
tst r3, r0
movne r3, #0xFF
movmi r3, #0x00
tst r14, r0
movne r14, #0xFF
movmi r14, #0x00
tst r5, r0
movne r5, #0xFF
movmi r5, #0x00
Row4_Write
strb r1, [r8]
strb r11,[r8, #1]
strb r9, [r8, #2]
strb r12,[r8, #3]
strb r4, [r8, #4]
strb r3, [r8, #5]
strb r14,[r8, #6]
strb r5, [r8, #7]
Row4_Next
ldr r2, [sp, #4] ;DstStride
ldr r1, [sp, #0] ;BlockEnd
add r6,r6,#16 ;Block += 16
add r8,r8,r2 ;Dst += DstStride
cmp r6,r1
bne Row4_Loop
ldmia sp!, {r0,r2,r4 - r12, pc}
Row4_NoSrc
mov r5, r5, asr #14
mov r14, r14, asr #14
mov r12, r12, asr #14
mov r1, r1, asr #14
mov r11, r11, asr #14
mov r9, r9, asr #14
mov r3, r3, asr #14
mov r4, r4, asr #14
b Row4_Sat
ENDP
; r6 Block
; r7 Src
; r8 Dst
ALIGN 16
IDCT_Block8x8Swap PROC
add r0, r0, #256
stmdb sp!, {r0, r2, r4 - r12, lr} ; r0=BlockEnd r2=DstStride
sub r6, r0, #256 ;Block
mov r7, r3 ;Src
mov r8, r1 ;Dst
mov r9,#128-0*16+0*2
bl Col8Swap
mov r9,#128-1*16+1*2
add r6, r6, #1*16-0*2-128
bl Col8Swap
mov r9,#128-2*16+2*2
add r6, r6, #2*16-1*2-128
bl Col8Swap
mov r9,#128-3*16+3*2
add r6, r6, #3*16-2*2-128
bl Col8Swap
mov r9,#128-4*16+4*2
add r6, r6, #4*16-3*2-128
bl Col8Swap
mov r9,#128-5*16+5*2
add r6, r6, #5*16-4*2-128
bl Col8Swap
mov r9,#128-6*16+6*2
add r6, r6, #6*16-5*2-128
bl Col8Swap
mov r9,#128-7*16+7*2
add r6, r6, #7*16-6*2-128
bl Col8Swap
sub r6, r6, #14
b Row8_Loop
ALIGN 16
IDCT_Block8x8 PROC
add r0, r0, #128
stmdb sp!, {r0, r2, r4 - r12, lr} ; r0=BlockEnd r2=DstStride
sub r6, r0, #128 ;Block
mov r7, r3 ;Src
mov r8, r1 ;Dst
bl Col8
add r6, r6, #2
bl Col8
add r6, r6, #2
bl Col8
add r6, r6, #2
bl Col8
add r6, r6, #2
bl Col8
add r6, r6, #2
bl Col8
add r6, r6, #2
bl Col8
add r6, r6, #2
bl Col8
sub r6, r6, #14
Row8_Loop
ldrsh r0, [r6] ;x0
ldrsh r3, [r6, #2] ;x4
ldrsh r4, [r6, #4] ;x3
ldrsh r5, [r6, #6] ;x7
ldrsh r9, [r6, #8] ;x1
ldrsh r2, [r6, #10] ;x6
ldrsh r14,[r6, #12] ;x2
ldrsh r1, [r6, #14] ;x5
orr r11, r3, r4
orr r11, r11, r5
orr r11, r11, r9
orr r11, r11, r2
orr r11, r11, r14
orrs r11, r11, r1
bne Row8_NoConst
bl RowConst
b Row8_Next
_W3 DCW 2408 ; 2048*sqrt(2)*cos(3*pi/16)
_W6 DCW 1108 ; 2048*sqrt(2)*cos(6*pi/16)
_W7 DCW 565 ; 2048*sqrt(2)*cos(7*pi/16)
_W1_nW7 DCW 2276
_nW1_nW7 DCW 0xF2B2 ;-3406
_W5_nW3 DCW 0xFCE1 ;-799
_nW2_nW6 DCW 0xF138 ;-3784
ALIGN 4
Row8_NoConst
cmp r7, #0
add r0, r0, #32
ldrsh r10, _W7
mov r0, r0, lsl #11 ;x0 = (x0 + 32) << 11
ldrsh r12, _W1_nW7
add r11,r3,r1
mul r11,r10,r11 ;x8 = W7 * (x4 + x5)
ldrsh r10, _nW1_nW7
mla r3, r12, r3, r11 ;x4 = x8 + (W1-W7) * x4
ldrsh r12, _W3
mla r1, r10, r1, r11 ;x5 = x8 + (-W1-W7) * x5
ldrsh r10, _W5_nW3
add r11,r2,r5 ;x6 + x7
mul r11,r12,r11 ;x8 = W3 * (x6 + x7)
mvn r12, #0xFB0 ;-W3-W5
mla r2,r10,r2,r11 ;x6 = x8 + (W5-W3) * x6
ldrsh r10, _W6
mla r5,r12,r5,r11 ;x7 = x8 + (-W3-W5) * x7
ldrsh r12, _nW2_nW6
add r11, r0, r9, lsl #11;x8 = x0 + (x1 << 11)
sub r0, r0, r9, lsl #11 ;x0 = x0 - (x1 << 11)
add r9, r4, r14
mul r9, r10, r9 ;x1 = W6 * (x3 + x2)
mov r10, #0x620 ;W2-W6
mla r14, r12, r14, r9 ;x2 = x1 + (-W2-W6) * x2
mov r12, #181
mla r4, r10, r4, r9 ;x3 = x1 + (W2-W6) * x3
add r9, r3, r2 ;x1 = x4 + x6
sub r3, r3, r2 ;x4 = x4 - x6
add r2, r1, r5 ;x6 = x5 + x7
sub r1, r1, r5 ;x5 = x5 - x7
add r5, r11, r4 ;x7 = x8 + x3
sub r11, r11, r4 ;x8 = x8 - x3
add r4, r0, r14 ;x3 = x0 + x2
sub r0, r0, r14 ;x0 = x0 - x2
add r3, r3, #4 ;
add r14, r3, r1 ;x2 = x4 + x5 + 4
sub r3, r3, r1 ;x4 = x4 - x5 + 4
mov r10, #16
mov r14, r14, asr #3
mov r3, r3, asr #3
mla r14, r12, r14, r10 ;x2 = 181 * ((x4 + x5 + 4) >> 3) + 16
mla r3, r12, r3, r10 ;x4 = 181 * ((x4 - x5 + 4) >> 3) + 16
add r1, r5, r9 ;x5 = x7 + x1
sub r9, r5, r9 ;x1 = x7 - x1
add r5, r4, r14, asr #5 ;x7 = x3 + (x2 >> 5)
sub r14,r4, r14, asr #5 ;x2 = x3 - (x2 >> 5)
add r4, r0, r3, asr #5 ;x3 = x0 + (x4 >> 5)
sub r3, r0, r3, asr #5 ;x4 = x0 - (x4 >> 5)
add r0, r11, r2 ;x0 = x8 + x6
sub r2, r11, r2 ;x6 = x8 - x6
beq Row8_NoSrc
ldrb r10, [r7]
ldrb r12, [r7, #7]
ldrb r11, [r7, #1]
add r1, r10, r1, asr #17
add r9, r12, r9, asr #17
add r5, r11, r5, asr #17
ldrb r10, [r7, #6]
ldrb r12, [r7, #2]
ldrb r11, [r7, #5]
add r14, r10, r14, asr #17
add r4, r12, r4, asr #17
ldrb r10, [r7, #3]
ldrb r12, [r7, #4]
add r3, r11, r3, asr #17
add r0, r10, r0, asr #17
add r2, r12, r2, asr #17
add r7, r7, #8 ;source stride
Row8_Sat
orr r10, r1, r9
orr r10, r10, r5
orr r10, r10, r14
orr r10, r10, r4
orr r10, r10, r3
orr r10, r10, r0
orr r10, r10, r2
bics r10, r10, #0xFF ; 0xFF = 255
beq Row8_Write
mov r10, #0xFFFFFF00
tst r1, r10
movne r1, #0xFF
movmi r1, #0x00
tst r9, r10
movne r9, #0xFF
movmi r9, #0x00
tst r5, r10
movne r5, #0xFF
movmi r5, #0x00
tst r14, r10
movne r14, #0xFF
movmi r14, #0x00
tst r4, r10
movne r4, #0xFF
movmi r4, #0x00
tst r3, r10
movne r3, #0xFF
movmi r3, #0x00
tst r0, r10
movne r0, #0xFF
movmi r0, #0x00
tst r2, r10
movne r2, #0xFF
movmi r2, #0x00
Row8_Write
strb r1, [r8]
strb r5, [r8, #1]
strb r4, [r8, #2]
strb r0, [r8, #3]
strb r2, [r8, #4]
strb r3, [r8, #5]
strb r14,[r8, #6]
strb r9, [r8, #7]
Row8_Next
ldr r2, [sp, #4] ;DstStride
ldr r1, [sp, #0] ;BlockEnd
add r6,r6,#16 ;Block += 16
add r8,r8,r2 ;Dst += DstStride
cmp r6,r1
bne Row8_Loop
ldmia sp!, {r0,r2,r4 - r12, pc}
Row8_NoSrc
mov r1, r1, asr #17
mov r9, r9, asr #17
mov r5, r5, asr #17
mov r14, r14, asr #17
mov r4, r4, asr #17
mov r3, r3, asr #17
mov r0, r0, asr #17
mov r2, r2, asr #17
b Row8_Sat
ENDP
END
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -