⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ssebfly27.s

📁 FEC Optimized viterbi code
💻 S
字号:
/* Intel SIMD (SSE) implementation of Viterbi ACS butterflies   for 64-state (k=7) convolutional code   Copyright 2001 Phil Karn, KA9Q   This code may be used under the terms of the GNU Lesser General Public License (LGPL)   int update_viterbi27_blk_sse(struct v27 *vp,unsigned char syms[],int nbits) ; */	# SSE (64-bit integer SIMD) version	# Requires Pentium III or better	# These are offsets into struct v27, defined in viterbi27.h	.set DP,128	.set OLDMETRICS,132	.set NEWMETRICS,136.text	.global update_viterbi27_blk_sse,Branchtab27_sse	.type update_viterbi27_blk_sse,@function	.align 16	update_viterbi27_blk_sse:	pushl %ebp	movl %esp,%ebp	pushl %esi	pushl %edi	pushl %edx	pushl %ebx		movl 8(%ebp),%edx	# edx = vp	testl %edx,%edx	jnz  0f	movl -1,%eax	jmp  err		0:	movl OLDMETRICS(%edx),%esi	# esi -> old metrics	movl NEWMETRICS(%edx),%edi	# edi -> new metrics	movl DP(%edx),%edx	# edx -> decisions1:	movl 16(%ebp),%eax	# eax = nbits	decl %eax	jl   2f			# passed zero, we're done	movl %eax,16(%ebp)	xorl %eax,%eax	movl 12(%ebp),%ebx	# %ebx = syms	movb (%ebx),%al	movd %eax,%mm6		# mm6[0] = first symbol	movb 1(%ebx),%al	movd %eax,%mm5		# mm5[0] = second symbol	addl $2,%ebx	movl %ebx,12(%ebp)	punpcklbw %mm6,%mm6	# mm6[1] = mm6[0]	punpcklbw %mm5,%mm5	movq thirtyones,%mm7	pshufw $0,%mm6,%mm6	# copy low word to upper 3	pshufw $0,%mm5,%mm5	# mm6 now contains first symbol in each byte, mm5 the second	# each invocation of this macro does 8 butterflies in parallel	.MACRO butterfly GROUP	# compute branch metrics	movq Branchtab27_sse+(8*\GROUP),%mm4	movq Branchtab27_sse+32+(8*\GROUP),%mm3	pxor %mm6,%mm4	pxor %mm5,%mm3	pavgb %mm3,%mm4			# mm4 contains branch metrics	psrlw $3,%mm4	pand %mm7,%mm4		movq (8*\GROUP)(%esi),%mm0	# Incoming path metric, high bit = 0	movq ((8*\GROUP)+32)(%esi),%mm3	# Incoming path metric, high bit = 1	movq %mm0,%mm2	movq %mm3,%mm1	paddusb %mm4,%mm0	paddusb %mm4,%mm3		# invert branch metrics. This works only because they're 5 bits	pxor %mm7,%mm4		paddusb %mm4,%mm1	paddusb %mm4,%mm2		# Find survivors, leave in mm0,2	pminub %mm1,%mm0	pminub %mm3,%mm2	# get decisions, leave in mm1,3	pcmpeqb %mm0,%mm1	pcmpeqb %mm2,%mm3		# interleave and store new branch metrics in mm0,2	movq %mm0,%mm4	punpckhbw %mm2,%mm0	# interleave second 8 new metrics	punpcklbw %mm2,%mm4	# interleave first 8 new metrics	movq %mm0,(16*\GROUP+8)(%edi)	movq %mm4,(16*\GROUP)(%edi)	# interleave decisions, accumulate into %ebx	movq %mm1,%mm4	punpckhbw %mm3,%mm1	punpcklbw %mm3,%mm4	# Due to an error in the Intel instruction set ref (the register	# fields are swapped), gas assembles pmovmskb incorrectly	# See http://mail.gnu.org/pipermail/bug-gnu-utils/2000-August/002341.html	.byte 0x0f,0xd7,0xc1	# pmovmskb %mm1,%eax	shll $((16*\GROUP+8)&31),%eax	orl %eax,%ebx	.byte 0x0f,0xd7,0xc4	# pmovmskb %mm4,%eax	shll $((16*\GROUP)&31),%eax	orl %eax,%ebx	.endm	# invoke macro 4 times for a total of 32 butterflies	xorl %ebx,%ebx		# clear decisions	butterfly GROUP=0	butterfly GROUP=1	movl %ebx,(%edx)	# stash first 32 decisions	xorl %ebx,%ebx	butterfly GROUP=2	butterfly GROUP=3	movl %ebx,4(%edx)	# stash second 32 decisions	addl $8,%edx		# bump decision pointer			# see if we have to normalize	movl (%edi),%eax	# extract first output metric	andl $255,%eax	cmpl $150,%eax		# is it greater than 150?	movl $0,%eax	jle done		# No, no need to normalize	# Normalize by finding smallest metric and subtracting it	# from all metrics	movq (%edi),%mm0	pminub 8(%edi),%mm0	pminub 16(%edi),%mm0	pminub 24(%edi),%mm0	pminub 32(%edi),%mm0	pminub 40(%edi),%mm0	pminub 48(%edi),%mm0	pminub 56(%edi),%mm0	# mm0 contains 8 smallest metrics	# crunch down to single lowest metric	movq %mm0,%mm1	psrlq $32,%mm0	pminub %mm1,%mm0	movq %mm0,%mm1	psrlq $16,%mm0	pminub %mm1,%mm0	movq %mm0,%mm1	psrlq $8,%mm0	pminub %mm1,%mm0	punpcklbw %mm0,%mm0	# expand to all 8 bytes	pshufw $0,%mm0,%mm0	# mm0 now contains lowest metric in all 8 bytes	# subtract it from every output metric	# Trashes %mm7	.macro PSUBUSBM REG,MEM	movq \MEM,%mm7	psubusb \REG,%mm7	movq %mm7,\MEM	.endm		PSUBUSBM %mm0,(%edi)	PSUBUSBM %mm0,8(%edi)	PSUBUSBM %mm0,16(%edi)	PSUBUSBM %mm0,24(%edi)	PSUBUSBM %mm0,32(%edi)	PSUBUSBM %mm0,40(%edi)	PSUBUSBM %mm0,48(%edi)	PSUBUSBM %mm0,56(%edi)	movd %mm0,%eax	and $0xff,%eaxdone:	# swap metrics	movl %esi,%eax	movl %edi,%esi	movl %eax,%edi	jmp 1b	2:	emms	movl 8(%ebp),%ebx	# ebx = vp	# stash metric pointers	movl %esi,OLDMETRICS(%ebx)	movl %edi,NEWMETRICS(%ebx)	movl %edx,DP(%ebx)	# stash incremented value of vp->dp	xorl %eax,%eaxerr:	popl %ebx	popl %edx	popl %edi	popl %esi	popl %ebp	ret	.data		.align 16thirtyones:	.byte 31,31,31,31,31,31,31,31		

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -