📄 quant_h263_ia64.s
字号:
//*******************************************************************************
//* *
//* functions quant_inter and dequant_inter have been softwarepipelined *
//* use was made of the pmpyshr2 instruction *
//* *
//* by Christian Engel and Hans-Joachim Daniels *
//* christian.engel@ira.uka.de hans-joachim.daniels@ira.uka.de *
//* *
//* This was made for the ia64 DivX laboratory (yes, it was really called *
//* this way, originally OpenDivX was intendet, but died shortly before our *
//* work started (you will probably already know ...)) *
//* at the Universitat Karlsruhe (TH) held between April and July 2002 *
//* http://www.info.uni-karlsruhe.de/~rubino/ia64p/ *
//* *
//*******************************************************************************/
.file "quant_h263_ia64.s"
.pred.safe_across_calls p1-p5,p16-p63
.section .rodata
.align 4
.type multipliers#,@object
.size multipliers#,128
multipliers:
data4 0
data4 32769
data4 16385
data4 10923
data4 8193
data4 6554
data4 5462
data4 4682
data4 4097
data4 3641
data4 3277
data4 2979
data4 2731
data4 2521
data4 2341
data4 2185
data4 2049
data4 1928
data4 1821
data4 1725
data4 1639
data4 1561
data4 1490
data4 1425
data4 1366
data4 1311
data4 1261
data4 1214
data4 1171
data4 1130
data4 1093
data4 1058
.global __divdi3#
.text
.align 16
.global quant_h263_intra_ia64#
.proc quant_h263_intra_ia64#
quant_h263_intra_ia64:
.prologue
.save ar.pfs, r38
alloc r38 = ar.pfs, 4, 3, 2, 0
adds r16 = -8, r12
.fframe 32
adds r12 = -32, r12
mov r17 = ar.lc
addl r14 = @ltoff(multipliers#), gp
ld2 r15 = [r33]
;;
.savesp ar.lc, 24
st8 [r16] = r17, 8
ld8 r14 = [r14]
sxt2 r15 = r15
;;
.save.f 0x1
stf.spill [r16] = f2
.save rp, r37
mov r37 = b0
.body
dep.z r36 = r34, 1, 15
dep.z r16 = r34, 2, 32
cmp4.ge p6, p7 = 0, r15
;;
add r16 = r16, r14
;;
ld4 r16 = [r16]
;;
setf.sig f2 = r16
(p6) br.cond.dptk .L8
extr r39 = r35, 1, 31
sxt4 r40 = r35
;;
add r39 = r39, r15
br .L21
;;
.L8:
extr r39 = r35, 1, 31
sxt4 r40 = r35
;;
sub r39 = r15, r39
;;
.L21:
sxt4 r39 = r39
br.call.sptk.many b0 = __divdi3#
;;
addl r14 = 62, r0
st2 [r32] = r8
addl r19 = 1, r0
;;
mov ar.lc = r14
;;
.L20:
dep.z r17 = r19, 1, 32
;;
add r15 = r17, r33
adds r19 = 1, r19
;;
ld2 r14 = [r15]
;;
sxt2 r14 = r14
;;
mov r16 = r14
mov r18 = r14
;;
sub r15 = r0, r16
cmp4.le p8, p9 = r36, r16
cmp4.le p6, p7 = r0, r16
;;
sxt2 r14 = r15
(p6) br.cond.dptk .L14
;;
mov r16 = r14
add r18 = r17, r32
;;
setf.sig f6 = r16
cmp4.le p6, p7 = r36, r16
mov r15 = r18
;;
xma.l f6 = f6, f2, f0
(p7) st2 [r18] = r0
;;
getf.sig r14 = f6
;;
extr r14 = r14, 16, 16
;;
sub r14 = r0, r14
;;
(p6) st2 [r15] = r14
br .L12
.L14:
.pred.rel "mutex", p8, p9
setf.sig f6 = r18
add r16 = r17, r32
;;
xma.l f6 = f6, f2, f0
mov r15 = r16
(p9) st2 [r16] = r0
;;
getf.sig r14 = f6
;;
extr r14 = r14, 16, 16
;;
(p8) st2 [r15] = r14
.L12:
br.cloop.sptk.few .L20
adds r18 = 24, r12
;;
ld8 r19 = [r18], 8
mov ar.pfs = r38
mov b0 = r37
;;
mov ar.lc = r19
ldf.fill f2 = [r18]
.restore sp
adds r12 = 32, r12
br.ret.sptk.many b0
.endp quant_h263_intra_ia64#
.common quant_h263_intra#,8,8
.common dequant_h263_intra#,8,8
.align 16
.global dequant_h263_intra_ia64#
.proc dequant_h263_intra_ia64#
dequant_h263_intra_ia64:
.prologue
ld2 r14 = [r33]
andcm r15 = 1, r34
setf.sig f8 = r35
;;
sxt2 r14 = r14
sub r15 = r34, r15
addl r16 = -2048, r0
;;
setf.sig f6 = r14
setf.sig f7 = r15
shladd r34 = r34, 1, r0
;;
xma.l f8 = f6, f8, f0
.save ar.lc, r2
mov r2 = ar.lc
;;
.body
getf.sig r14 = f8
setf.sig f6 = r34
;;
sxt2 r15 = r14
st2 [r32] = r14
;;
cmp4.le p6, p7 = r16, r15
;;
(p7) st2 [r32] = r16
(p7) br.cond.dptk .L32
addl r14 = 2047, r0
;;
cmp4.ge p6, p7 = r14, r15
;;
(p7) st2 [r32] = r14
.L32:
addl r14 = 62, r0
addl r19 = 1, r0
addl r22 = 2048, r0
addl r21 = -2048, r0
addl r20 = 2047, r0
;;
mov ar.lc = r14
;;
.L56:
dep.z r16 = r19, 1, 32
;;
add r14 = r16, r33
add r17 = r16, r32
adds r19 = 1, r19
;;
ld2 r15 = [r14]
;;
sxt2 r15 = r15
;;
cmp4.ne p6, p7 = 0, r15
cmp4.le p8, p9 = r0, r15
;;
(p7) st2 [r17] = r0
(p7) br.cond.dpnt .L36
add r18 = r16, r32
sub r17 = r0, r15
;;
mov r14 = r18
(p8) br.cond.dptk .L40
setf.sig f8 = r17
;;
xma.l f8 = f6, f8, f7
;;
getf.sig r15 = f8
;;
cmp4.lt p6, p7 = r22, r15
sub r16 = r0, r15
;;
(p7) st2 [r14] = r16
(p6) st2 [r14] = r21
br .L36
.L40:
setf.sig f8 = r15
;;
xma.l f8 = f6, f8, f7
;;
getf.sig r15 = f8
;;
cmp4.le p6, p7 = r20, r15
;;
(p6) mov r14 = r20
(p7) mov r14 = r15
;;
st2 [r18] = r14
.L36:
br.cloop.sptk.few .L56
;;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -