📄 idct.s
字号:
;************************************************************************
; (c)1996-1997 Copyright M-Pact, Inc. All rights reserved
;
; Revision 1.0
;
;
;
; Module: SUB_SubBandSynthesis
;
; Initial version:Jinshi Huang 12/9/96
; Jinshi Huang 12/1/98 for LS388/LS500
; Calling : SUB_compute_new_v
; Called by: Main
; Return: r26, samples
; Param in: r21 (segment), r20 (channel)
; Temp reg:
; AGR reg:
; Local buffer:
;************************************************************************
.nolist
#include "regdef.h"
#include "memory.h"
#include "constant.h"
#include "user.h"
.list
.text
.globl SUB_SubBandSynthesis
SUB_SubBandSynthesis:
; inputs: r20 = channel; r21 = segment
movi AGRMod0, 0
movi AGRSiz0, 0xffff ;linear addressing
movi AGRMod1, 0
movi AGRSiz1, 0xffff ;linear addressing
movi AGRMod2, 0
movi AGRSiz2, 0xffff ;linear addressing
movi AGRMod3, 0
movi AGRSiz3, 0xffff ;linear addressing
;--------------------- fast IDCT ------------------------------------
multi r0, r20, 3*SBLIMIT
multi r1, r21, SBLIMIT //input in [chan][seg][i] format
// multi r0, r20, SBLIMIT
// multi r1, r21, 2*SBLIMIT //input in [seg][chan][i] format
nop
nop //pipeline
add r22, r0, r1
shl r22, 2 // to byte addr
addi r22, local_fraction_byte
jsr r29,SUB_compute_new_v // fast IDCT
;------------ put IDCT results into buffers -------------------------
multi r12, r20, 256*4 // channel * buffer_size (*4 for bytes)
;sample 0
movw r4, Lwrite_ptr //pipeline. as byte offset
movb r9, LBbuf_flag
.if 1 // debug
addi r10, r12, F2Abuf0 // buf0 [channel]
add r10, r4 // buf0 [channel][write_ptr]
movw r0, %local_new_v_byte //new_v[0]
dswr r0, r10
;samples 1 to 30
tsti r9, 0
beq buf_flag_0
movi r5, F2Abuf1+16*4
movi r6, F2Abuf0+16*4
j write_to_local_buf
buf_flag_0:
movi r5, F2Abuf0+16*4
movi r6, F2Abuf1+16*4
write_to_local_buf:
movi BlockSkip, 15 ; with skip of 16 words
movi BlockSize, 0
movi DMASize, 14 ;store 15 words
; copy samples 1 to 15
movi LocalAddr, local_new_v_word+1 ;start with sample 1
add r15, r5, r12 // buf1/0 [channel]
add r15, r4 // bufOffsetPtr[16+write_ptr]
shr r15, 2 //in word addr
dmawr r15 ;bufOffsetPtr[(i+1)*16+write_ptr]=new_v[i+1]
wait_dma_1_15:
mov r2,StatusPort
tsti r2,DmaDoneBit
bz wait_dma_1_15 ;wait for dma done
; copy samples 16 to 30
movi LocalAddr, local_new_v_word+16 ;start with sample 16
add r16, r6, r12 // buf0/1 [channel]
add r16, r4 // bufOffsetPtr[16+write_ptr]
shr r16, 2 //in word addr
dmawr r16 ;bufOffsetPtr[(i+1)*16+write_ptr]=new_v[i+16]
wait_dma_16_30:
mov r2,StatusPort
tsti r2,DmaDoneBit
bz wait_dma_16_30 ;wait for dma done
;sample 31
addi r11, r12, F2Abuf1 // buf1 [channel]
add r11, r4 // buf1 [channel][write_ptr]
movw r0, %(local_new_v_byte+31*4) //new_v[31]
dswr r0, r11
.endif
;--------------- retrieve data from buffer -------------------------------
movi BlockSkip, 0 ; with no skip
movi BlockSize, 31 ;get 32 words each time
movi DMASize, 31
addi r5, r12, F2Abuf0 ;buf0[channel]
shr r5, 2 ; to word addr
movi r1, local_buf0_word
CLR_TrapReg
loop 8, read_buf0 ; total of 256 words
mov LocalAddr, r1
dmarr r5
wait_dma_buf0:
mov r2,StatusPort
tsti r2,DmaDoneBit
bz wait_dma_buf0 ;wait for dma done
addi r1, 32
addi r5, 32
read_buf0:
SET_TrapReg
addi r5, r12, F2Abuf1 ;buf1[channel]
shr r5, 2 ; to word addr
movi r1, local_buf1_word
CLR_TrapReg
loop 8, read_buf1 ; total of 256 words
mov LocalAddr, r1
dmarr r5
wait_dma_buf1:
mov r2,StatusPort
tsti r2,DmaDoneBit
bz wait_dma_buf1 ;wait for dma done
addi r1, 32
addi r5, 32
read_buf1:
SET_TrapReg
.if DB_BUF0
movi r10,local_buf0_byte
rswi r10,0x500 // begining of dump addr
movi r10,local_buf0_byte+256*4
rswi r10,0x504 // end of dump addr
movi r10,1
rswi r10,0x508 // start dumping
.endif
.if DB_BUF1
movi r10,local_buf1_byte
rswi r10,0x500 // begining of dump addr
movi r10,local_buf1_byte+256*4
rswi r10,0x504 // end of dump addr
movi r10,1
rswi r10,0x508 // start dumping
.endif
;--------------- do windowing to produce 32 PCM samples ------------------
// li r10, 0x48fbfe // scaled down, then left shift
li r10, 0x510d12 // ZhangWei:make 0db sinewave output p-p-v=6.3V
// May cause clipping for 0dB signal??
// dlw r10,0xff30
multi r0, r20, 3*SBLIMIT //output in [chan][seg][i] format
multi r1, r21, SBLIMIT
movi AGRAdr0,LFAnew_win_byte ;new_win. pipeline
#ifdef EFFECT
movi AGRSiz5, 15*4 ;circular buffer with length = 16
#else //EFFECT
movi AGRSiz1, 15*4 ;circular buffer with length = 16
#endif //EFFECT
add r22, r0, r1
shl r22, 2 // to byte addr
addi r22, local_sample_d_byte
mov AGRAdr2, r22
; -----------------------PCM sample 0
#ifdef EFFECT
movi AGRInc0,1*4
#endif //EFFECT
addi r1, r4, local_buf0_byte
#ifdef EFFECT
mov AGRAdr5, r1 ;buf0 [channel][write_ptr]
#else //EFFECT
mov AGRAdr1, r1 ;buf0 [channel][write_ptr]
#endif //EFFECT
movi r2, 0
mulf r0, r2, r3 ;sum = 0
CLR_TrapReg
loop 8, windowing_0
#ifdef EFFECT
madd r0, i0, a5(-1*4) ;sum+=new_win[2*i]*bufOffsetPtr[k--]
msub r0, i0, a5(-1*4) ;sum-=new_win[2*i+1]*bufOffsetPtr[k--]
#else //EFFECT
mov r2, a0(1*4)
madd r0, r2, a1(-1*4) ;sum+=new_win[2*i]*bufOffsetPtr[k--]
mov r2, a0(1*4)
msub r0, r2, a1(-1*4) ;sum-=new_win[2*i+1]*bufOffsetPtr[k--]
#endif //EFFECT
windowing_0:
nop
mulf r6,r0,r10
/*
nop
nop
nop
shr r0,r7,21
shl r7,2
shl r6,2
or r6,r0
*/
;----------------------- PCM samples 1 to 15
tsti r9, 0
beq buf_flag_0_a
addi r3, r4, local_buf1_byte+16*4 ;r3=buf1[channel][16+write_ptr]
j cont_a
buf_flag_0_a:
addi r3, r4, local_buf0_byte+16*4 ;r3=buf0[channel][16+write_ptr]
cont_a:
shr r0,r7,21
shl r7,2
shl r6,2
or r6,r0
CLR_TrapReg
loop 15, windowing_1_to_15
#ifdef EFFECT
mov AGRAdr5, r3 ;r3=buf_base+16+write_ptr
#else //EFFECT
mov AGRAdr1, r3 ;r3=buf_base+16+write_ptr
#endif //EFFECT
movi r2, 0
mulf r0, r2, r3 ;sum = 0
loop 16, inner_loop_1_to_15
#ifdef EFFECT
madd r0, i0, a5(-1*4) ;sum+=new_win[2*i]*bufOffsetPtr[k--]
#else //EFFECT
mov r2, a0(1*4)
madd r0, r2, a1(-1*4) ;sum+=new_win[2*i]*bufOffsetPtr[k--]
#endif //EFFECT
inner_loop_1_to_15:
rnd a2(1*4), r6 ;saturate to 24-bit,save sample 0, then other
// addi r3, 16*4 ;buf_base += 16. *4 for bytes. pipeline
mulf r6,r0,r10
// nop
addi r3, 16*4 ;buf_base += 16. *4 for bytes. pipeline
nop
nop
shr r0,r7,21
shl r7,2
shl r6,2
or r6,r0
windowing_1_to_15:
SET_TrapReg
;--------------------- PCM sample 16
addi r3, r4, local_buf1_byte
#ifdef EFFECT
mov AGRAdr5, r3 ;buf1 [channel][write_ptr]
#else //EFFECT
mov AGRAdr1, r3 ;buf1 [channel][write_ptr]
#endif //EFFECT
movi r2, 0
mulf r0, r2, r3 ;sum = 0
#ifdef EFFECT
movi AGRInc0,2*4
mov r2, a5(-1*4) ;inc to odd address
#else //EFFECT
mov r2, a1(-1*4) ;inc to odd address
#endif //EFFECT
mov r2, a0(1*4) ;inc to odd address
CLR_TrapReg
loop 8, windowing_16
#ifdef EFFECT
madd r0, i0, a5(-2*4);sum+=new_win[16*16+2*i+1]*bufOffsetPtr[k--]
#else //EFFECT
mov r2, a0(2*4)
madd r0, r2, a1(-2*4);sum+=new_win[16*16+2*i+1]*bufOffsetPtr[k--]
#endif //EFFECT
windowing_16:
mov r2, a0(-1*4) ;dec to even address. pipeline
rnd a2(1*4), r6 ;saturate to 24-bit,save sample 15
mulf r6,r0,r10
/*
nop
nop
nop
shr r0,r7,21
shl r7,2
shl r6,2
or r6,r0
*/
;----------------------- PCM samples 17 to 32
tsti r9, 0
beq buf_flag_0_b
addi r3, r4, local_buf1_byte+15*16*4 ;r3=buf1[channel][15*16+write_ptr]
j cont_b
buf_flag_0_b:
addi r3, r4, local_buf0_byte+15*16*4 ;r3=buf0[channel][15*16+write_ptr]
cont_b:
shr r0,r7,21
shl r7,2
shl r6,2
or r6,r0
#ifdef EFFECT
movi AGRInc0,1*4
#endif //EFFECT
CLR_TrapReg
loop 15, windowing_17_to_32 ;for (j=17; j<32; j++) ...
#ifdef EFFECT
mov AGRAdr5, r3 ;r3=buf_base+15*16+write_ptr
#else //EFFECT
mov AGRAdr1, r3 ;r3=buf_base+15*16+write_ptr
#endif //EFFECT
movi r2, 0
mulf r0, r2, r3 ;sum = 0
loop 8, inner_loop_17_to_32
#ifdef EFFECT
msub r0, i0, a5(-1*4) ;sum-=new_win[j*16+2*i]*bufOffsetPtr[k--]
madd r0, i0, a5(-1*4) ;sum+=new_win[j*16+2*i]*bufOffsetPtr[k--]
#else //EFFECT
mov r2, a0(1*4)
msub r0, r2, a1(-1*4) ;sum-=new_win[j*16+2*i]*bufOffsetPtr[k--]
mov r2, a0(1*4)
madd r0, r2, a1(-1*4) ;sum+=new_win[j*16+2*i]*bufOffsetPtr[k--]
#endif //EFFECT
inner_loop_17_to_32:
rnd a2(1*4), r6 ;saturate to 24-bit,save sample 16, then other
// subi r3, 16*4 ;buf_base -= 16. *4 for bytes
mulf r6,r0,r10
// nop
subi r3, 16*4 ;buf_base -= 16. *4 for bytes
nop
nop
shr r0,r7,21
shl r7,2
shl r6,2
or r6,r0
windowing_17_to_32:
SET_TrapReg
;----------------- update write_ptr and buf_flag if needed ----------
tsti r20, 0
bne no_write_ptr_update ;update only for channel 0
addi r4, 4 ;write_ptr++
andi r4, 0x003f ;modulo-16*4 (*4 for bytes)
movw Lwrite_ptr, r4
xori r9, 0x0001 ;toggle buf_flag
movb LBbuf_flag, r9
no_write_ptr_update:
#ifdef EFFECT
movi AGRSiz5, 0xffff ;back to linear addressing
#else //EFFECT
movi AGRSiz1, 0xffff ;back to linear addressing
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -