📄 quant_h263_ia64.s
字号:
mov ar.lc = r2
br.ret.sptk.many b0
.endp dequant_h263_intra_ia64#
// uint32_t quant_h263_inter_ia64(int16_t *coeff, const int16_t *data, const uint32_t quant)
.common quant_h263_inter#,8,8
.align 16
.global quant_h263_inter_ia64#
.proc quant_h263_inter_ia64#
quant_h263_inter_ia64:
//*******************************************************
//* *
//* const uint32_t mult = multipliers[quant]; *
//* const uint16_t quant_m_2 = quant << 1; *
//* const uint16_t quant_d_2 = quant >> 1; *
//* int sum = 0; *
//* uint32_t i; *
//* int16_t acLevel,acL; *
//* *
//*******************************************************/
LL=3 // LL = load latency
//if LL is changed, you'll also have to change the .pred.rel... parts below!
.prologue
addl r14 = @ltoff(multipliers#), gp
dep.z r15 = r34, 2, 32
.save ar.lc, r2
mov r2 = ar.lc
;;
.body
alloc r9=ar.pfs,0,24,0,24
mov r17 = ar.ec
mov r10 = pr
ld8 r14 = [r14]
extr.u r16 = r34, 1, 16 //r16 = quant_d_2
dep.z r20 = r34, 1, 15 //r20 = quant_m_2
;;
add r15 = r15, r14
mov r21 = r16 //r21 = quant_d_2
mov r8 = r0 //r8 = sum = 0
mov pr.rot = 0 //p16-p63 = 0
;;
ld4 r15 = [r15]
addl r14 = 63, r0
mov pr.rot = 1 << 16 //p16=1
;;
mov ar.lc = r14
mov ar.ec = LL+9
mov r29 = r15
;;
mov r15 = r33 //r15 = data
mov r18 = r32 //r18 = coeff
;;
.rotr ac1[LL+3], ac2[8], ac3[2]
.rotp p[LL+9], cmp1[8], cmp1neg[8],cmp2[5], cmp2neg[2]
//*******************************************************************************
//* *
//* for (i = 0; i < 64; i++) { *
//* acL=acLevel = data[i]; *
//* acLevel = ((acLevel < 0)?-acLevel:acLevel) - quant_d_2; *
//* if (acLevel < quant_m_2){ *
//* acLevel = 0; *
//* } *
//* acLevel = (acLevel * mult) >> SCALEBITS; *
//* sum += acLevel; *
//* coeff[i] = ((acL < 0)?-acLevel:acLevel); *
//* } *
//* *
//*******************************************************************************/
.explicit
.L58:
.pred.rel "clear", p29, p37
.pred.rel "mutex", p29, p37
//pipeline stage
{.mmi
(p[0]) ld2 ac1[0] = [r15],2 // 0 acL=acLevel = data[i];
(p[LL+1]) sub ac2[0] = r0, ac1[LL+1] // LL+1 ac2=-acLevel
(p[LL]) sxt2 ac1[LL] = ac1[LL] // LL
}
{.mmi
(p[LL+1]) cmp4.le cmp1[0], cmp1neg[0] = r0, ac1[LL+1] // LL+1 cmp1 = (0<=acLevel) ; cmp1neg = !(0<=acLevel)
(p[LL+4]) cmp4.le cmp2[0], cmp2neg[0] = r20, ac2[3] // LL+4 cmp2 = (quant_m_2 < acLevel) ; cmp2neg = !(quant_m_2 < acLevel)
(cmp1[1]) sub ac2[1] = ac1[LL+2], r21 // LL+2 acLevel = acLevel - quant_d_2;
}
{.mmi
(cmp2neg[1]) mov ac2[4] = r0 // LL+5 if (acLevel < quant_m_2) acLevel=0;
(cmp1neg[1]) sub ac2[1] = ac2[1], r21 // LL+2 acLevel = ac2 - quant_d_2;
(p[LL+3]) sxt2 ac2[2] = ac2[2] // LL+3
}
{.mmi
.pred.rel "mutex", p34, p42
(cmp1[6]) mov ac3[0] = ac2[6] // LL+7 ac3 = acLevel;
(cmp1neg[6]) sub ac3[0] = r0, ac2[6] // LL+7 ac3 = -acLevel;
(p[LL+6]) pmpyshr2.u ac2[5] = r29, ac2[5], 16 // LL+6 acLevel = (acLevel * mult) >> SCALEBITS;
}
{.mib
(p[LL+8]) st2 [r18] = ac3[1] , 2 // LL+8 coeff[i] = ac3;
(cmp2[4]) add r8 = r8, ac2[7] // LL+8 sum += acLevel;
br.ctop.sptk.few .L58
;;
}
.pred.rel "clear", p29, p37
.default
mov ar.ec = r17
;;
mov ar.lc = r2
mov pr = r10, -1
mov ar.pfs = r9
br.ret.sptk.many b0
.endp quant_h263_inter_ia64#
// void dequant_h263_inter_ia64(int16_t *data, const int16_t *coeff, const uint32_t quant)
.common dequant_h263_inter#,8,8
.align 16
.global dequant_h263_inter_ia64#
.proc dequant_h263_inter_ia64#
dequant_h263_inter_ia64:
//***********************************************************************
//* *
//* const uint16_t quant_m_2 = quant << 1; *
//* const uint16_t quant_add = (quant & 1 ? quant : quant - 1); *
//* uint32_t i; *
//* *
//***********************************************************************
.prologue
andcm r14 = 1, r34
dep.z r29 = r34, 1, 15
alloc r9=ar.pfs,0,32,0,32
.save ar.lc, r2
mov r2 = ar.lc
;;
.body
sub r15 = r34, r14 // r15 = quant
addl r14 = 63, r0
addl r21 = -2048, r0
addl r20 = 2047, r0
mov r16 = ar.ec
mov r17 = pr
;;
zxt2 r15 = r15
mov ar.lc = r14
mov pr.rot = 0
;;
adds r14 = 0, r33 // r14 = coeff
mov r18 = r32 // r18 = data
mov ar.ec = LL+10
mov pr.rot = 1 << 16
;;
//*******************************************************************************
//* *
//*for (i = 0; i < 64; i++) { *
//* int16_t acLevel = coeff[i]; *
//* *
//* if (acLevel == 0) *
//* { *
//* data[i] = 0; *
//* } *
//* else if (acLevel < 0) *
//* { *
//* acLevel = acLevel * quant_m_2 - quant_add; *
//* data[i] = (acLevel >= -2048 ? acLevel : -2048); *
//* } *
//* else // if (acLevel > 0) *
//* { *
//* acLevel = acLevel * quant_m_2 + quant_add; *
//* data[i] = (acLevel <= 2047 ? acLevel : 2047); *
//* } *
//* } *
//* *
//*******************************************************************************/
LL=2 // LL := load latency
//if LL is changed, you'll also have to change the .pred.rel... parts below!
.rotr ac1[LL+10], x[5], y1[3], y2[3]
.rotp p[LL+10] , cmp1neg[8], cmp2[5], cmp2neg[5],cmp3[2], cmp3neg[2]
.explicit
//pipeline stage
.L60:
.pred.rel "clear", p36
.pred.rel "mutex", p47, p49
.pred.rel "mutex", p46, p48
.pred.rel "mutex", p40, p45
.pred.rel "mutex", p39, p44
.pred.rel "mutex", p38, p43
.pred.rel "mutex", p37, p42
.pred.rel "mutex", p36, p41
{.mmi
(p[0])ld2 ac1[0] = [r14] ,2 // 0 acLevel = coeff[i];
(p[LL+1])cmp4.ne p6, cmp1neg[0] = 0, ac1[LL+1] // LL+1
(p[LL])sxt2 ac1[LL] = ac1[LL] // LL
}
{.mmi
(p[LL+1])cmp4.le cmp2[0], cmp2neg[0] = r0, ac1[LL+1] // LL+1
(cmp2[1]) mov x[0] = r20 // LL+2
(p[LL+2])pmpyshr2.u ac1[LL+2] = r29, ac1[LL+2], 0 // LL+2
}
{.mmi
(cmp2neg[1]) mov x[0] = r21 // LL+2
(cmp2[2]) add ac1[LL+3] = ac1[LL+3], r15 // LL+3
(cmp2neg[2]) sub ac1[LL+3] = ac1[LL+3], r15 // LL+3
}
{.mmi
(cmp2neg[4]) mov y1[0] = ac1[LL+5] // LL+5
(cmp2neg[4]) mov y2[0] = x[3] // LL+5
(p[LL+4])sxt2 ac1[LL+4] = ac1[LL+4] // LL+4
}
{.mmi
(cmp2[4]) mov y1[0] = x[3] // LL+5
(cmp2[4]) mov y2[0] = ac1[LL+5] // LL+5
(p[LL+6])cmp4.le cmp3[0], cmp3neg[0] = x[4], ac1[LL+6] // LL+6
}
{.mmi
(cmp3[1]) mov ac1[LL+7] = y1[2] // LL+7
(cmp3neg[1]) mov ac1[LL+7] = y2[2] // LL+7
(cmp1neg[7]) mov ac1[LL+8] = r0 // LL+8
}
{.mbb
(p[LL+9])st2 [r18] = ac1[LL+9] ,2 // LL+9
nop.b 0x0
br.ctop.sptk.few .L60
;;
}
.pred.rel "clear", p36
.default
mov ar.lc = r2
mov ar.pfs = r9
mov ar.ec = r16
mov pr = r17, -1
;;
mov ar.lc = r2
br.ret.sptk.many b0
.endp dequant_h263_inter_ia64#
.ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -