📄 idct.s
字号:
#endif //EFFECT
rnd a2(1*4), r6 ;saturate to 24-bit,save sample 32
.if DB_SAMP_D
multi r0, r20, 3*SBLIMIT //output in [chan][seg][i] format
multi r1, r21, SBLIMIT
nop
nop
nop
add r22, r0, r1
shl r22, 2 //to byte addr
addi r10, r22, local_sample_d_byte
rswi r10,0x500 // begining of dump addr
addi r10, r22, local_sample_d_byte+32*4
rswi r10,0x504 // end of dump addr
movi r10,1
rswi r10,0x508 // start dumping
.endif
j r26 ;return to calling routine
;************************************************************************
; (c)1996-1997 Copyright M-Pact, Inc. All rights reserved
;
; Revision 1.0
;
;
;
; Module: SUB_compute_new_v
;
; Initial version:Jinshi Huang 12/9/96
;
; Calling : none
; Called by: SubBandSynthesis
; Return: r29; new_v
; Param in: r22 (dct_in)
; Temp reg:
; AGR reg:
; Local buffer:
;************************************************************************
// .globl SUB_compute_new_v
SUB_compute_new_v:
.if DB_DCT_IN
rswi r22,0x500 // begining of dump addr
addi r10, r22, 32*4
rswi r10,0x504 // end of dump addr
movi r10,1
rswi r10,0x508 // start dumping
.endif
mupi r9, 0x0004 ; 0x040000 for rounding
mupi r4, 0x007f
ori r4, 0xffff //max +
mupi r5, 0xff80 //max -
//================ scale input down by 2 bit to avoid overflow ====
.if 1
mov AGRAdr0, r22
addi r1, r22, 16*4
mov AGRAdr1, r1
// mupi r10, 0x0020 //0.25
// mupi r10, 0x0010 //0.125 scaled down by 6 dB more to match
//the volume of AC-3. This is also used to
//reduce overflow. The final output will
//not match that of the MPEG C code.
li r10, 0xc8811 // optimal value for Abex TVD-581A Track 21.
CLR_TrapReg
loop 16, scale_dct_in
mulf r0, r10, a0(0)
mulf r2, r10, a1(0)
rnd a0(1*4), r0
rnd a1(1*4), r2
scale_dct_in:
SET_TrapReg
.endif
;================= first 16 values ===========================
;------------------ pre pass -----------------------------------
mov AGRAdr0, r22 ;dct_in
addi r1, r22, 31*4
mov AGRAdr1, r1 ;dct_in + 31*4
movi AGRAdr2, local_p_buffer_byte
CLR_TrapReg
loop 16, dct_loop_1_1
mulf r0, r4, a0(1*4) ;*x1++
madd r0, r4, a1(-1*4) ;*x2--
nop
rnd a2(1*4), r0 //append. rnd to saturate ??
dct_loop_1_1:
SET_TrapReg
;-------------------- pass 1-4 --------------------------------
movi r10, 0 ;init pass count
movi r11, 1 ;init group count
movi r12, 8 ;init butterfly count
movi r13, 16*4 ;offset
movi r14, local_cos1_32_byte
movi r15, 16*4
;%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
CLR_TrapReg
loop 4, dct_pass_loop
tsti r10, 1 ;test LSB of pass count
bnz dct_odd_pass
movi r6, local_p_buffer_byte
movi r7, local_p_buffer_byte
movi AGRAdr2, local_pp_buffer_byte
j dct_pass_cont
dct_odd_pass:
movi r6, local_pp_buffer_byte
movi r7, local_pp_buffer_byte
movi AGRAdr2, local_p_buffer_byte
dct_pass_cont:
subi r2, r13, 1*4
add r7, r2
;=======================================
loop r11, dct_group_loop
mov AGRAdr0, r6
mov AGRAdr1, r7
;--------------------------------------
loop r12, dct_butterfly_loop_1
mulf r0, r4, a0(1*4)
madd r0, r4, a1(-1*4)
nop
rnd a2(1*4), r0
dct_butterfly_loop_1:
;--------------------------------------
mov AGRAdr0, r6
mov AGRAdr1, r7
mov AGRAdr3, r14 ;cos coeffs
nop ;pipeline
mov r0, a0(1*4)
sub r1, r0, a1(-1*4)
;--------------------------------------
loop r12, dct_butterfly_loop_2
mulf r2, r1, a3(0*4) ;pipeline)
mov r0, a0(1*4)
rnd r2
shr r17, a3(1*4), 24 //get integer portion
mult r16, r17, r1
sub r1, r0, a1(-1*4)
add r2, r16
// add r3, r9 //rnd
// shr r3, 19 //to 4 LSBs
// shl r2, 4 //scaled by 16
// or r2, r3 //append
sub r3, r2, r4
blte check_neg_2
mov r2, r4
j save_r2_2
check_neg_2:
sub r3, r2, r5
bgte save_r2_2
mov r2, r5
save_r2_2:
mov a2(1*4), r2 //append. rnd to saturate ??
dct_butterfly_loop_2:
;--------------------------------------
add r6, r15 ;to next group
add r7, r15
dct_group_loop:
;======================================
shl r11, 1 ;double group count
shr r12, 1 ;half butterfly count
shr r13, 1 ;half offset
shr r15, 1 ;??
addi r10, 1 ;inc pass count
andi r10, 1 // RH 10-14-97
add r14, r15 ;to next set of cos coeffs
dct_pass_loop:
SET_TrapReg
;%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
;----------------------- copy the results to new_v buffer ----------
movi AGRAdr0, local_p_buffer_byte + 6*4
movi AGRAdr2, local_new_v_byte + 19*4
movi r2, 0
.if NO_SAT
mov r0, a0(1*4) ;p6
add r0, a0(-2*4) ;tmp=p6+p7
mov r1, a0(-1*4) ;p5
add r1, r0 ;p5+tmp
movi r2, 0
sub a2(8*4), r2, r1 ;new_v[19] = -(p5+tmp)
mov r1, a0(7*4) ;p4
add r1, r0 ;p4+tmp
sub a2(-17*4), r2, r1 ;new_v[27] = -(p4+tmp)
.else
mulf r10, r4, a0(1*4) ; p6
madd r10, r4, a0(-2*4) ; tmp=p6+p7
nop
rnd r12, r10 ; r12=tmp
madd r10, r4, a0(-1*4); p5+tmp
nop
rnd r10
sub a2(8*4), r2, r10 ;new_v[19] = -(p5+tmp)
mulf r10, r5, r12 ; -tmp
madd r10, r5, a0(7*4)
nop
rnd a2(-17*4), r10 ; new_v[27] = -(p4+tmp)
.endif
.if NO_SAT
mov r0, a0(4*4) ;p11
add r0, a0(-2*4) ;tmp=p11+p15
mov a2(-4*4), r0 ;new_v[10] = tmp
mov r1, a0(1*4) ;p13
add a2(23*4), r1, r0 ;new_v[6] = p13 + tmp
.else
mulf r10, r4, a0(4*4); p11
madd r10, r4, a0(-2*4); tmp=p11+p15
nop
rnd a2(-4*4), r10 ; new_v[10] = tmp
madd r10, r4, a0(1*4); p13
nop
rnd a2(23*4), r10 ; ;new_v[6] = p13 + tmp
.endif
.if NO_SAT
mov r0, a0(1*4)
add r0, a0(-7*4) ;tmp=p14+p15
mov r1, a0(4*4) ;p8
add r1, a0(-3*4) ;p8+p12
add r1, r0 ;p8+p12+tmp
sub a2(-12*4), r2, r1 ;new_v[29]= - (p8+p12+tmp)
mov r1, a0(4*4) ;p9
add r1, a0(-3*4) ;p9+p13
add r1, r0 ;p9+p13+tmp
sub a2(4*4), r2, r1 ;new_v[17]= - (p9+p13+tmp)
.else
mulf r10, r4, a0(1*4)
madd r10, r4, a0(-7*4);tmp=p14+p15
nop
rnd r12, r10 ; r12=tmp
mulf r10, r5, r12 ; -tmp
madd r10, r5, a0(4*4)
madd r10, r5, a0(-3*4);-(p8+p12+tmp)
nop
rnd a2(-12*4), r10 ; new_v[29]= - (p8+p12+tmp)
mulf r10, r5, r12 ; -tmp
madd r10, r5, a0(4*4)
madd r10, r5, a0(-3*4)
nop
rnd a2(4*4), r10 ;new_v[17]= - (p9+p13+tmp)
.endif
.if NO_SAT
add r0, a0(1*4) ;+p10
add r0, a0(2*4) ;tmp += p10+p11
add r1, r0, a0(-1*4) ;p13+tmp
sub a2(4*4), r2, r1 ;new_v[21]= - (p13+tmp)
add r1, r0, a0(-3*4) ;p12+tmp
sub a2(-23*4), r2, r1 ;new_v[25]= - (p12+tmp)
.else
mulf r10, r5, r12 ; -tmp
madd r10, r5, a0(1*4); -p10
madd r10, r5, a0(2*4); -tmp -= -p10-p11 (new tmp)
nop
rnd r12, r10 ; r12=-tmp
madd r10, r5, a0(-1*4);
nop
rnd a2(4*4), r10 ;new_v[21]= - (p13+tmp)
mulf r10, r4, r12 ; -tmp
madd r10, r5, a0(-3*4)
nop
rnd a2(-23*4), r10 ;new_v[25]= - (p12+tmp)
.endif
.if NO_SAT
mov r0, a0(4*4) ;p9
add r0, a0(2*4) ;p9+p13
add r0, a0(-10*4) ;p9+p13+p15
mov a2(2*4), r0 ;new_v[2]=p9+p13+p15
mov r0, a0(2*4) ;p5
add r0, a0(-7*4) ;p5+p7
mov a2(27*4), r0 ;new_v[4]=p5+p7
.else
mulf r10, r4, a0(4*4); p9
madd r10, r4, a0(2*4); p9+p13
madd r10, r4, a0(-10*4); p9+p13+p15
nop
rnd a2(2*4), r10 ; new_v[2]=p9+p13+p15
mulf r10, r4, a0(2*4); p5
madd r10, r4, a0(-7*4); p5+p7
nop
rnd a2(27*4), r10 ; new_v[4]=p5+p7
.endif
.if NO_SAT
sub r0, r2, a0(1*4)
mov a2(-31*4), r0 ;new_v[31]=-p0
mov r0, a0(2*4)
mov a2(8*4), r0 ;new_v[0]=p1
mov r0, a0(4*4)
mov a2(4*4), r0 ;new_v[8]=p3
mov r0, a0(8*4)
mov a2(2*4), r0 ;new_v[12]=p7
mov r0, a0(-13*4)
mov a2(9*4), r0 ;new_v[14]=p15
mov r0, a0(1*4) ;p2
add r0, a0(0*4) ;p2+p3
sub a2(0*4), r2, r0 ;new_v[23]= -(p2+p3)
.else
sub r0, r2, a0(1*4)
mov a2(-31*4), r0 ;new_v[31]=-p0
mov r0, a0(2*4)
mov a2(8*4), r0 ;new_v[0]=p1
mov r0, a0(4*4)
mov a2(4*4), r0 ;new_v[8]=p3
mov r0, a0(8*4)
mov a2(2*4), r0 ;new_v[12]=p7
mov r0, a0(-13*4)
mov a2(9*4), r0 ;new_v[14]=p15
mulf r10, r5, a0(1*4); -p2
madd r10, r5, a0(0*4); -p2-p3
nop
rnd a2(0*4), r10 ; new_v[23]= -(p2+p3)
.endif
;================= second 16 values ===========================
;------------------ pre pass -----------------------------------
mov AGRAdr0, r22 ;dct_in
addi r1, r22, 31*4
mov AGRAdr1, r1 ;dct_in + 31*4
movi AGRAdr2, local_p_buffer_byte
movi AGRAdr3, local_cos1_64_byte ;cos coeffs
nop ;pipeline
mov r0, a0(1*4)
sub r1, r0, a1(-1*4)
CLR_TrapReg
loop 16, dct_loop_2_1
mulf r2, r1, a3(0*4) ;pipeline
mov r0, a0(1*4)
rnd r2
shr r17, a3(1*4), 24 //get integer portion
mult r16, r17, r1
sub r1, r0, a1(-1*4)
add r2, r16
// add r3, r9 //rnd
// shr r3, 19 //to 4 LSBs
// shl r2, 4 //scaled by 16
// or r2, r3 //append
sub r3, r2, r4
blte check_neg_2_1
mov r2, r4
j save_r2_2_1
check_neg_2_1:
sub r3, r2, r5
bgte save_r2_2_1
mov r2, r5
save_r2_2_1:
mov a2(1*4), r2 //append. rnd to saturate ??
dct_loop_2_1:
SET_TrapReg
;-------------------- pass 1-4 --------------------------------
movi r10, 0 ;init pass count
movi r11, 1 ;init group count
movi r12, 8 ;init butterfly count
movi r13, 16*4 ;offset
movi r14, local_cos1_32_byte
movi r15, 16*4
;%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
CLR_TrapReg
loop 4, dct_pass_loop_2
tsti r10, 1 ;test LSB of pass count
bnz dct_odd_pass_2
movi r6, local_p_buffer_byte
movi r7, local_p_buffer_byte
movi AGRAdr2, local_pp_buffer_byte
j dct_pass_cont_2
dct_odd_pass_2:
movi r6, local_pp_buffer_byte
movi r7, local_pp_buffer_byte
movi AGRAdr2, local_p_buffer_byte
dct_pass_cont_2:
subi r2, r13, 1*4
add r7, r2
;=======================================
loop r11, dct_group_loop_2
mov AGRAdr0, r6
mov AGRAdr1, r7
;--------------------------------------
loop r12, dct_butterfly_loop_1_2
mulf r0, r4, a0(1*4)
madd r0, r4, a1(-1*4)
nop
rnd a2(1*4), r0
dct_butterfly_loop_1_2:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -