lbn960jx.s

来自「包含哈希,对称以及非对称的经典算法包含经典事例」· S 代码 · 共 251 行
251 行
# Assembly-language bignum primitives for the i960 Jx series.## The Jx series is fairly straightforward single-instruction-issue # implementation, with a 1-cycle-issue 4-cycle-latency non-pipelined# multiplier that we can use.  Note also that loads which hit in the# cache have 2 cycles of latency and stores stall until all pending# loads are done.## What is intensely annoying about the i960 is that it uses the same# flags for all conditional branches (even compare-and-branch sets the# flags) AND for the carry bit.  Further, it is hard to manipulate# that bit.## Calling conventions:# The r registers are all local, if you set them up.  There's an alternative# calling convention that uses bal (branch and link) and doesn't set them up.# Currently, all of these functions are designed to work that way.# g0-g7 are argument registers and volatile across calls.  return in g0-g3.# g8-g11 are extra argument registers, and volatile if used, but#	preserved if not.  Here, they are not.# g12 is used for PIC, and is preserved.# g13 is a pointer to a structure return value, if used, and is volatile.# g14 is magic, and is used as a return address in the branch-and-link#	convention, and as a pointer to an argument block if the arguments#	won't fit in registers, but is usually hardwired 0 and must be#	returned set to zero (0).# g15 is the frame pointer, and shouldn't be messed with.# The AC (condition codes) are all volatile.# The fp registers are all volatile, but irrelevant.## BNWORD32# lbnMultAdd1_32(BNWORD32 *out, BNWORD32 const *in, unsigned len, BNWORD32 k)# This adds "k" * "in" to "len" words of "out" and returns the word of# carry.## For doing multiply-add, the 960 is a bit annoying because it uses# the same status bits for the carry flag and for the loop indexing# computation, and doesn't have an "add with carry out but not carry in"# instruction.  Fortunately, we can arrange to have the loop indexing# leave the carry bit clear most of the time.## The basic sequence of the loop is:# 1. Multiply k * *in++ -> high, low# 2. Addc carry word and carry bit to low# 3. Addc carry bit to high, producing carry word (note: cannot generate carry!)# 4. Addc low to *out++## Note that the carry bit set in step 4 is used in step 2.  The only place# in this loop that the carry flag isn't in use is between steps 3 and 4,# so we have to rotate the loop to place the loop indexing operations here.# (Which consist of a compare-and-decrement and a conditional branch.)# The loop above ignores the details of when to do loads and stores, which# have some flexibility, but must be carefully scheduled to avoid stalls.## The first iteration has no carry word in, so it requires only steps 1 and 4,# and since we begin the loop with step 4, it boils down to just step 1# followed by the loop indexing (which clears the carry bit in preparation# for step 4).## Arguments are passed as follows:# g0 - out pointer# g1 - in pointer# g2 - length# g3 - k# The other registers are used as follows.# g4 - low word of product# g5 - high word of product# g6 - current word of "out"# g7 - carry word# g13 - current word of "in"	.globl _lbnMulAdd1_32_lbnMulAdd1_32:	ld	(g1),g13   	# Fetch *in	addo	g1,4,g1   	# Increment in	emul	g13,g3,g4	# Do multiply (step 1)	ld	(g0),g6   	# Fetch *out	chkbit	0,g2		# Check if loop counter was odd	shro	1,g2,g2   	# Divide loop counter by 2	mov	g5,g7		# Move high word to carry	bno	ma_loop1	# If even, jump to ma_loop1	cmpo	0,g2		# If odd, was it 1 (now 0)?	be	ma_done   	# If equal (carry set), jump to ending code# Entered with carry bit clearma_loop:	ld	(g1),g13  	# Fetch *in	addc	g4,g6,g6	# Add low to *out (step 4), generate carry	emul	g13,g3,g4	# Do multiply (step 1)	st	g6,(g0)  	# Write out *out	addo	g0,4,g0  	# Increment out	addo	g1,4,g1  	# Increment in	ld	(g0),g6  	# Fetch next *out	addc	g7,g4,g4	# Add carries to low (step 2)	addc	g5,0,g7  	# Add carry bit to high (step 3) & clear carryma_loop1:	ld	(g1),g13  	# Fetch *in	addc	g4,g6,g6	# Add low to *out (step 4), generate carry	emul	g13,g3,g4	# Do multiply (step 1)	st	g6,(g0)  	# Write out *out	addo	g0,4,g0  	# Increment out	addo	g1,4,g1  	# Increment in	ld	(g0),g6  	# Fetch next *out	addc	g7,g4,g4	# Add carries to low (step 2)	addc	g5,0,g7  	# Add carry bit to high (step 3) & clear carry	cmpdeco	1,g2,g2	bne	ma_loop# When we come here, carry is *set*, and we stil have to do step 4ma_done:	cmpi	0,1		# Clear carry (equal flag)	addc	g4,g6,g6	# Add low to *out (step 4), generate carry	st	g6,(g0)   	# Write out *out	addc	g7,0,g0   	# Add carry bit and word to produce return value	ret# Now, multiply N by 1 is similarly annoying.  We only have one add in the# whole loop, which should just be able to leave its carry output in the# carry flag for the next iteration, but we need the condition codes to do# loop testing.  *Sigh*.## void# lbnMultN1_32(BNWORD32 *out, BNWORD32 const *in, unsigned len, BNWORD32 k)# This stores len+1 words of "k" * len words of "in" and stores the result# in "out".## To avoid having to do a move after the first iteration, for the first# step, g4/g5 is the product.  For second step, g6/g7 is used for product# storage and g5 is the carry in.  It alternates from then on.	.globl _lbnMulN1_32_lbnMulN1_32:	ld	(g1),g13 	# Fetch *in	addo	g1,4,g1 	# Increment in	emul	g13,g3,g4	# Do multiply (step 1)	chkbit	0,g2		# Check if loop counter was odd	shro	1,g2,g2  	# Divide loop counter by 2	bno	m_loop1  	# If even, jump to ma_loop1	mov	g4,g6	cmpo	0,g2		# If counter was odd, was it 1 (now 0)?	mov	g5,g7	be	m_done		# If equal (carry set), jump to ending code# Entered with carry bit clearm_loop:	# Result in g6, carry word in g7	ld	(g1),g13	# Fetch *in	addo	g1,4,g1 	# Increment in	emul	g13,g3,g4	# Do multiply (step 1)	st	g6,(g0) 	# Write out *out	addo	g0,4,g0		# Increment out	addc	g7,g4,g4	# Add carries to low (step 2)# No need to add carry bit here, because it'll get remembered until next addc.#	addc	g5,0,g5 	# Add carry bit to high (step 3)m_loop1:	# Carry word in g5	ld	(g1),g13	# Fetch *in	addo	g1,4,g1		# Increment in	emul	g13,g3,g6	# Do multiply (step 1)	st	g4,(g0)		# Write out *out	addo	g0,4,g0 	# Increment out	addc	g5,g6,g6	# Add carries to low (step 2)	addc	g7,0,g7 	# Add carry bit to high (step 3)	cmpdeco	1,g2,g2	bne	m_loop# When we come here, we have to store g6 and the carry word in g7.m_done:	st	g6,(g0) 	# Write out *out	st	g7,4(g0)	# Write out *out	ret# BNWORD32# lbnMultSub1_32(BNWORD32 *out, BNWORD32 const *in, unsigned len, BNWORD32 k)# This subtracts "k" * "in" from "len" words of "out" and returns the word of# borrow.## This is similar to multiply-add, but actually a bit more obnoxious,# because of the carry situation.  The 960 uses a carry (rather than a borrow)# bit on subtracts, so the carry bit should be 1 for a subc to do the# same thing as an ordinary subo.  So we use two carry chains: one from# the add of the low-order words to the high-order carry word, and a second,# which uses an extra register, to connect the subtracts.  This avoids# the need to fiddle with inverting the bit in the usual case.## Arguments are passed as follows:# g0 - out pointer# g1 - in pointer# g2 - length# g3 - k# The other registers are used as follows.# g4 - low word of product# g5 - high word of product# g6 - current word of "out"# g7 - carry word# g13 - current word of "in"# g14 - remembered carry bit	.globl _lbnMulSub1_32_lbnMulSub1_32:	ld	(g1),g13	# Fetch *in	addo	g1,4,g1 	# Increment in	emul	g13,g3,g4	# Do multiply (step 1)	ld	(g0),g6 	# Fetch *out	chkbit	0,g2    	# Check if loop counter was odd	mov	1,g14   	# Set remembered carry for first iteration	shro	1,g2,g2 	# Divide loop counter by 2	mov	g5,g7   	# Move high word to carry	bno	ms_loop1	# If even, jump to ma_loop1	cmpo	0,g2    	# If odd, was it 1 (now 0)?	be	ms_done 	# If equal (carry set), jump to ending code# Entered with carry bit clearms_loop:	ld	(g1),g13	# Fetch *in	cmpi	g14,1   	# Set carry flag	subc	g4,g6,g6	# Subtract low from *out (step 4), gen. carry	emul	g13,g3,g4	# Do multiply (step 1)	addc	0,0,g14 	# g14 = carry, then clear carry	st	g6,(g0) 	# Write out *out	addo	g0,4,g0 	# Increment out	addo	g1,4,g1 	# Increment in	ld	(g0),g6 	# Fetch next *out	addc	g7,g4,g4	# Add carries to low (step 2)	addc	g5,0,g7 	# Add carry bit to high (step 3)ms_loop1:	ld	(g1),g13	# Fetch *in	cmpi	g14,1   	# Set carry flag for subtrsct	subc	g4,g6,g6	# Subtract low from *out (step 4), gen. carry	emul	g13,g3,g4	# Do multiply (step 1)	addc	0,0,g14 	# g14 = carry, then clear carry	st	g6,(g0) 	# Write out *out	addo	g0,4,g0 	# Increment out	addo	g1,4,g1 	# Increment in	ld	(g0),g6 	# Fetch next *out	addc	g7,g4,g4	# Add carries to low (step 2)	addc	g5,0,g7 	# Add carry bit to high (step 3)	cmpdeco	1,g2,g2	bne	ms_loop# When we come here, carry is *set*, and we stil have to do step 4ms_done:	cmpi	g14,1   	# set carry (equal flag)	subc	g4,g6,g6	# Add low to *out (step 4), generate carry	st	g6,(g0) 	# Write out *out	subc	0,0,g14 	# g14 = -1 if no carry (borrow), 0 if carry	subo	g14,g7,g0	# Add borrow bit to produce return value	mov	0,g14   	# Restore g14 to 0 for return	ret
lbn960jx.s - 源码说明

本页面展示了「包含哈希,对称以及非对称的经典算法包含经典事例」中的 lbn960jx.s 源码文件，采用 S 编程语言编写，共 251 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与对称相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?