📄 rs32asm.s
字号:
/* Assemble this file with "gasp -a rsasm.s | as -o rsasm.o"
/* Fast encoder for (255,223) Reed-Solomon code over GF(256)
/* Called as
/* rse32(unsigned char data[223],unsigned char parity[32]);
/* This code started as output of gcc -S, then was heavily hand
/* massaged to exploit the Pentium dual pipeline
/* Runs at about 30 megabits/sec on a 133 MHz Pentium
/* Phil Karn, KA9Q July 5, 1997 */
.comm Gtab,8192,4
.globl rse32
.type rse32,@function
rse32:
pushl %ebp
movl %esp,%ebp
subl $32,%esp
pushl %edi
pushl %esi
pushl %ebx
movl 8(%ebp),%ebx /* -> data
movl 12(%ebp),%esi /* -> output parity bytes
/* init parity bytes to zeroes
movl $7,%eax
.L75: movl $0,-32(%ebp,%eax,4)
decl %eax
jns .L75
movl $222,%edi /* number of data bytes
.L80: xorl %eax,%eax
movl -4(%ebp),%ecx /* %ecx = (R31,R30,R29,R28)
movb (%edi,%ebx),%al /* %al = data byte
roll $8,%ecx /* %ecx = (R30,R29,R28,R31)
movl -8(%ebp),%edx /* %edx = (R27,R26,R25,R24)
xorb %cl,%al /* %al = R31 ^ data (feedback term)
shll $8,%edx /* %edx = (R26,R25,R24,0)
movb -5(%ebp),%cl /* %ecx = (R30,R29,R28,R27)
movb -9(%ebp),%dl /* %edx = (R26,R25,R24,R23)
xorl Gtab+7168(,%eax,4),%ecx /* %ecx = (R31',R30',R29',R28')
xorl Gtab+6144(,%eax,4),%edx /* %edx = (R27',R26',R25',R24')
movl %ecx,-4(%ebp)
movl %edx,-8(%ebp)
movl -12(%ebp),%ecx /* %ecx = (R23,R22,R21,R20)
movl -16(%ebp),%edx /* %edx = (R19,R18,R17,R16)
shll $8,%ecx /* %ecx = (R22,R21,R20,0)
shll $8,%edx /* %edx = (R18,R17,R16,0)
movb -13(%ebp),%cl /* %ecx = (R22,R21,R20,R19)
movb -17(%ebp),%dl /* %edx = (R18,R17,R16,R15)
xorl Gtab+5120(,%eax,4),%ecx
xorl Gtab+4096(,%eax,4),%edx
movl %ecx,-12(%ebp)
movl %edx,-16(%ebp)
movl -20(%ebp),%ecx /* %ecx = (R15,R14,R13,R12)
movl -24(%ebp),%edx /* %edx = (R11,R10,R09,R08)
shll $8,%ecx /* %ecx = (R14,R13,R12,0)
shll $8,%edx /* %edx = (R10,R09,R08,0)
movb -21(%ebp),%cl /* %ecx = (R14,R13,R12,R11)
movb -25(%ebp),%dl /* %edx = (R10,R09,R08,R07)
xorl Gtab+3072(,%eax,4),%ecx
xorl Gtab+2048(,%eax,4),%edx
movl %ecx,-20(%ebp)
movl %edx,-24(%ebp)
movl -28(%ebp),%ecx /* %ecx = (R07,R06,R05,R04)
movl -32(%ebp),%edx /* %edx = (R03,R02,R01,R00)
shll $8,%ecx /* %ecx = (R06,R05,R04,0)
shll $8,%edx /* %edx = (R02,R01,R00,0)
movb -29(%ebp),%cl /* %ecx = (R06,R05,R04,R03)
xorl Gtab(,%eax,4),%edx
xorl Gtab+1024(,%eax,4),%ecx
movl %edx,-32(%ebp)
movl %ecx,-28(%ebp)
decl %edi
jns .L80
/* Copy parity bytes to user buffer
movl $7,%edx
.L85: movl -32(%ebp,%edx,4),%eax
movl %eax,(%esi,%edx,4)
decl %edx
jns .L85
xorl %eax,%eax /* return 0
leal -44(%ebp),%esp
popl %ebx
popl %esi
popl %edi
movl %ebp,%esp
popl %ebp
ret
/* This macro evaluates the input polynomial (which has 255 elements)
/* at four consecutive values of alpha**n and stores the results
/* The instructions are ordered to avoid address generation interlocks
/* and to encourage parallel execution in the Pentium's two pipelines
/* Input: %esi -> input buffer, 12(%ebp) -> output buffer
/* Uses char Mtab[32][256], a multiplication lookup table
/* trashes eax, ebx, ecx, edx, edi
.MACRO DOSYN R
LOCAL loop
xorl %eax,%eax
xorl %ebx,%ebx
xorl %ecx,%ecx
xorl %edx,%edx
movl $254,%edi
loop: movb Mtab+256*(\R+0)(%eax),%al
movb (%edi,%esi),%ah
movb Mtab+256*(\R+1)(%ebx),%bl
movb Mtab+256*(\R+2)(%ecx),%cl
movb Mtab+256*(\R+3)(%edx),%dl
xorb %ah,%al
xorb %ah,%bl
xorb %ah,%cl
xorb %ah,%dl
xorb %ah,%ah
decl %edi
jns loop
movl 12(%ebp),%edi
movb %al,\R(%edi)
movb %bl,\R+1(%edi)
movb %cl,\R+2(%edi)
movb %dl,\R+3(%edi)
.ENDM
.comm Mtab,8192,1
.text
.globl rssyndrome
.type rssyndrome,@function
rssyndrome:
pushl %ebp
movl %esp,%ebp
pushl %edi
pushl %esi
pushl %ebx
movl 8(%ebp),%esi
DOSYN 0
DOSYN 4
DOSYN 8
DOSYN 12
DOSYN 16
DOSYN 20
DOSYN 24
DOSYN 28
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
.END
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -