📄 lib1funcs.asm

📁 linux下的gcc编译器
💻 ASM
字号:
;; libgcc routines for the Hitachi H8/300 CPU.;; Contributed by Steve Chamberlain <sac@cygnus.com>;; Optimizations by Toshiyasu Morita <toshiyasu.morita@hsa.hitachi.com>/* Copyright (C) 1994, 2000, 2001, 2002 Free Software Foundation, Inc.This file is free software; you can redistribute it and/or modify itunder the terms of the GNU General Public License as published by theFree Software Foundation; either version 2, or (at your option) anylater version.In addition to the permissions in the GNU General Public License, theFree Software Foundation gives you unlimited permission to link thecompiled version of this file into combinations with other programs,and to distribute those combinations without any restriction comingfrom the use of this file.  (The General Public License restrictionsdo apply in other respects; for example, they cover modification ofthe file, and distribution when not linked into a combineexecutable.)This file is distributed in the hope that it will be useful, butWITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNUGeneral Public License for more details.You should have received a copy of the GNU General Public Licensealong with this program; see the file COPYING.  If not, write tothe Free Software Foundation, 59 Temple Place - Suite 330,Boston, MA 02111-1307, USA.  *//* Assembler register definitions.  */#define A0 r0#define A0L r0l#define A0H r0h#define A1 r1#define A1L r1l#define A1H r1h#define A2 r2#define A2L r2l#define A2H r2h#define A3 r3#define A3L r3l#define A3H r3h#define S0 r4#define S0L r4l#define S0H r4h#define S1 r5#define S1L r5l#define S1H r5h#define S2 r6#define S2L r6l#define S2H r6h#ifdef __H8300__#define MOVP	mov.w	/* pointers are 16 bits */#define ADDP	add.w#define CMPP	cmp.w#define PUSHP	push#define POPP	pop#define A0P	r0#define A1P	r1#define A2P	r2#define A3P	r3#define S0P	r4#define S1P	r5#define S2P	r6#endif#if defined (__H8300H__) || defined (__H8300S__)#define MOVP	mov.l	/* pointers are 32 bits */#define ADDP	add.l#define CMPP	cmp.l#define PUSHP	push.l#define POPP	pop.l#define A0P	er0#define A1P	er1#define A2P	er2#define A3P	er3#define S0P	er4#define S1P	er5#define S2P	er6#define A0E	e0#define A1E	e1#define A2E	e2#define A3E	e3#endif#ifdef __H8300H__#ifdef __NORMAL_MODE__	.h8300hn#else	.h8300h#endif#endif#ifdef __H8300S__#ifdef __NORMAL_MODE__	.h8300sn#else	.h8300s#endif#endif#ifdef L_cmpsi2#ifdef __H8300__	.section .text	.align 2	.global ___cmpsi2___cmpsi2:	cmp.w	A0,A2	bne	.L2	cmp.w	A1,A3	bne	.L4	mov.w	#1,A0	rts.L2:	bgt	.L5.L3:	mov.w	#2,A0	rts.L4:	bls	.L3.L5:	sub.w	A0,A0	rts	.end#endif#endif /* L_cmpsi2 */#ifdef L_ucmpsi2#ifdef __H8300__	.section .text	.align 2	.global ___ucmpsi2___ucmpsi2:	cmp.w	A0,A2	bne	.L2	cmp.w	A1,A3	bne	.L4	mov.w	#1,A0	rts.L2:	bhi	.L5.L3:	mov.w	#2,A0	rts.L4:	bls	.L3.L5:	sub.w	A0,A0	rts	.end#endif#endif /* L_ucmpsi2 */#ifdef L_divhi3;; HImode divides for the H8/300.;; We bunch all of this into one object file since there are several;; "supporting routines".; general purpose normalize routine;; divisor in A0; dividend in A1; turns both into +ve numbers, and leaves what the answer sign; should be in A2L#ifdef __H8300__	.section .text	.align 2divnorm:	mov.b	#0x0,A2L	or	A0H,A0H		; is divisor > 0	bge	_lab1	not	A0H		; no - then make it +ve	not	A0L	adds	#1,A0	xor	#0x1,A2L	; and remember that in A2L_lab1:	or	A1H,A1H	; look at dividend	bge	_lab2	not	A1H		; it is -ve, make it positive	not	A1L	adds	#1,A1	xor	#0x1,A2L; and toggle sign of result_lab2:	rts;; Basically the same, except that the sign of the divisor determines;; the sign.modnorm:	mov.b	#0x0,A2L	or	A0H,A0H		; is divisor > 0	bge	_lab7	not	A0H		; no - then make it +ve	not	A0L	adds	#1,A0	xor	#0x1,A2L	; and remember that in A2L_lab7:	or	A1H,A1H	; look at dividend	bge	_lab8	not	A1H		; it is -ve, make it positive	not	A1L	adds	#1,A1_lab8:	rts; A0=A0/A1 signed	.global	___divhi3___divhi3:	bsr	divnorm	bsr	___udivhi3negans:	or	A2L,A2L	; should answer be negative ?	beq	_lab4	not	A0H	; yes, so make it so	not	A0L	adds	#1,A0_lab4:	rts; A0=A0%A1 signed	.global	___modhi3___modhi3:	bsr	modnorm	bsr	___udivhi3	mov	A3,A0	bra	negans; A0=A0%A1 unsigned	.global	___umodhi3___umodhi3:	bsr	___udivhi3	mov	A3,A0	rts; A0=A0/A1 unsigned; A3=A0%A1 unsigned; A2H trashed; D high 8 bits of denom; d low 8 bits of denom; N high 8 bits of num; n low 8 bits of num; M high 8 bits of mod; m low 8 bits of mod; Q high 8 bits of quot; q low 8 bits of quot; P preserve; The H8/300 only has a 16/8 bit divide, so we look at the incoming and; see how to partition up the expression.	.global	___udivhi3___udivhi3:				; A0 A1 A2 A3				; Nn Dd       P	sub.w	A3,A3		; Nn Dd xP 00	or	A1H,A1H	bne	divlongway	or	A0H,A0H	beq	_lab6; we know that D == 0 and N is != 0	mov.b	A0H,A3L		; Nn Dd xP 0N	divxu	A1L,A3		;          MQ	mov.b	A3L,A0H	 	; Q; dealt with N, do n_lab6:	mov.b	A0L,A3L		;           n	divxu	A1L,A3		;          mq	mov.b	A3L,A0L		; Qq	mov.b	A3H,A3L         ;           m	mov.b	#0x0,A3H	; Qq       0m	rts; D != 0 - which means the denominator is;          loop around to get the result.divlongway:	mov.b	A0H,A3L		; Nn Dd xP 0N	mov.b	#0x0,A0H	; high byte of answer has to be zero	mov.b	#0x8,A2H	;       8div8:	add.b	A0L,A0L		; n*=2	rotxl	A3L		; Make remainder bigger	rotxl	A3H	sub.w	A1,A3		; Q-=N	bhs	setbit		; set a bit ?	add.w	A1,A3		;  no : too far , Q+=N	dec	A2H	bne	div8		; next bit	rtssetbit:	inc	A0L		; do insert bit	dec	A2H	bne	div8		; next bit	rts#endif /* __H8300__ */#endif /* L_divhi3 */#ifdef L_divsi3;; 4 byte integer divides for the H8/300.;;;; We have one routine which does all the work and lots of;; little ones which prepare the args and massage the sign.;; We bunch all of this into one object file since there are several;; "supporting routines".	.section .text	.align 2; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.; This function is here to keep branch displacements small.#ifdef __H8300__divnorm:	mov.b	#0,S2L		; keep the sign in S2	mov.b	A0H,A0H		; is the numerator -ve	bge	postive	; negate arg	not	A0H	not	A1H	not	A0L	not	A1L	add	#1,A1L	addx	#0,A1H	addx	#0,A0L	addx	#0,A0H	mov.b	#1,S2L		; the sign will be -vepostive:	mov.b	A2H,A2H		; is the denominator -ve	bge	postive2	not	A2L	not	A2H	not	A3L	not	A3H	add.b	#1,A3L	addx	#0,A3H	addx	#0,A2L	addx	#0,A2H	xor	#1,S2L		; toggle result signpostive2:	rts;; Basically the same, except that the sign of the divisor determines;; the sign.modnorm:	mov.b	#0,S2L		; keep the sign in S2	mov.b	A0H,A0H		; is the numerator -ve	bge	mpostive	; negate arg	not	A0H	not	A1H	not	A0L	not	A1L	add	#1,A1L	addx	#0,A1H	addx	#0,A0L	addx	#0,A0H	mov.b	#1,S2L		; the sign will be -vempostive:	mov.b	A2H,A2H		; is the denominator -ve	bge	mpostive2	not	A2L	not	A2H	not	A3L	not	A3H	add.b	#1,A3L	addx	#0,A3H	addx	#0,A2L	addx	#0,A2Hmpostive2:	rts#else /* __H8300H__ */divnorm:	mov.b	#0,S2L		; keep the sign in S2	mov.l	A0P,A0P		; is the numerator -ve	bge	postive	neg.l	A0P		; negate arg	mov.b	#1,S2L		; the sign will be -vepostive:	mov.l	A1P,A1P		; is the denominator -ve	bge	postive2	neg.l	A1P		; negate arg	xor.b	#1,S2L		; toggle result signpostive2:	rts;; Basically the same, except that the sign of the divisor determines;; the sign.modnorm:	mov.b	#0,S2L		; keep the sign in S2	mov.l	A0P,A0P		; is the numerator -ve	bge	mpostive	neg.l	A0P		; negate arg	mov.b	#1,S2L		; the sign will be -vempostive:	mov.l	A1P,A1P		; is the denominator -ve	bge	mpostive2	neg.l	A1P		; negate argmpostive2:	rts#endif; numerator in A0/A1; denominator in A2/A3	.global	___modsi3___modsi3:	PUSHP	S2P	PUSHP	S0P	PUSHP	S1P	bsr	modnorm	bsr	divmodsi4#ifdef __H8300__	mov	S0,A0	mov	S1,A1#else	mov.l	S0P,A0P#endif	bra	exitdiv	.global	___udivsi3___udivsi3:	PUSHP	S2P	PUSHP	S0P	PUSHP	S1P	mov.b	#0,S2L	; keep sign low	bsr	divmodsi4	bra	exitdiv	.global	___umodsi3___umodsi3:	PUSHP	S2P	PUSHP	S0P	PUSHP	S1P	mov.b	#0,S2L	; keep sign low	bsr	divmodsi4#ifdef __H8300__	mov	S0,A0	mov	S1,A1#else	mov.l	S0P,A0P#endif	bra	exitdiv	.global	___divsi3___divsi3:	PUSHP	S2P	PUSHP	S0P	PUSHP	S1P	jsr	divnorm	jsr	divmodsi4	; examine what the sign should beexitdiv:	POPP	S1P	POPP	S0P	or	S2L,S2L	beq	reti	; should be -ve#ifdef __H8300__	not	A0H	not	A1H	not	A0L	not	A1L	add	#1,A1L	addx	#0,A1H	addx	#0,A0L	addx	#0,A0H#else /* __H8300H__ */	neg.l	A0P#endifreti:	POPP	S2P	rts	; takes A0/A1 numerator (A0P for H8/300H)	; A2/A3 denominator (A1P for H8/300H)	; returns A0/A1 quotient (A0P for H8/300H)	; S0/S1 remainder (S0P for H8/300H)	; trashes S2#ifdef __H8300__divmodsi4:        sub.w	S0,S0		; zero play area        mov.w	S0,S1        mov.b	A2H,S2H        or	A2L,S2H        or	A3H,S2H        bne	DenHighZero        mov.b	A0H,A0H        bne	NumByte0Zero        mov.b	A0L,A0L        bne	NumByte1Zero        mov.b	A1H,A1H        bne	NumByte2Zero        bra	NumByte3ZeroNumByte0Zero:	mov.b	A0H,S1L        divxu	A3L,S1        mov.b	S1L,A0HNumByte1Zero:	mov.b	A0L,S1L        divxu	A3L,S1        mov.b	S1L,A0LNumByte2Zero:	mov.b	A1H,S1L        divxu	A3L,S1        mov.b	S1L,A1HNumByte3Zero:	mov.b	A1L,S1L        divxu	A3L,S1        mov.b	S1L,A1L        mov.b	S1H,S1L        mov.b	#0x0,S1H        rts; have to do the divide by shift and testDenHighZero:	mov.b	A0H,S1L        mov.b	A0L,A0H        mov.b	A1H,A0L        mov.b	A1L,A1H        mov.b	#0,A1L        mov.b	#24,S2H	; only do 24 iterationsnextbit:	add.w	A1,A1	; double the answer guess        rotxl	A0L        rotxl	A0H        rotxl	S1L	; double remainder        rotxl	S1H        rotxl	S0L        rotxl	S0H        sub.w	A3,S1	; does it all fit        subx	A2L,S0L        subx	A2H,S0H        bhs	setone        add.w	A3,S1	; no, restore mistake        addx	A2L,S0L        addx	A2H,S0H        dec	S2H        bne	nextbit        rtssetone:	inc	A1L        dec	S2H        bne	nextbit        rts#else /* __H8300H__ */divmodsi4:	sub.l	S0P,S0P		; zero play area	mov.w	A1E,A1E		; denominator top word 0?	bne	DenHighZero	; do it the easy way, see page 107 in manual	mov.w	A0E,A2	extu.l	A2P	divxu.w	A1,A2P	mov.w	A2E,A0E	divxu.w	A1,A0P	mov.w	A0E,S0	mov.w	A2,A0E	extu.l	S0P	rtsDenHighZero:	mov.w	A0E,A2	mov.b	A2H,S0L	mov.b	A2L,A2H	mov.b	A0H,A2L	mov.w	A2,A0E	mov.b	A0L,A0H	mov.b	#0,A0L	mov.b	#24,S2H		; only do 24 iterationsnextbit:	shll.l	A0P		; double the answer guess	rotxl.l	S0P		; double remainder	sub.l	A1P,S0P		; does it all fit?	bhs	setone	add.l	A1P,S0P		; no, restore mistake	dec	S2H	bne	nextbit	rtssetone:	inc	A0L	dec	S2H	bne	nextbit	rts#endif#endif /* L_divsi3 */#ifdef L_mulhi3;; HImode multiply.; The H8/300 only has an 8*8->16 multiply.; The answer is the same as:;; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256; (we can ignore A1.h * A0.h cause that will all off the top); A0 in; A1 in; A0 answer#ifdef __H8300__	.section .text	.align 2	.global	___mulhi3___mulhi3:	mov.b	A1L,A2L		; A2l gets srcb.l	mulxu	A0L,A2		; A2 gets first sub product	mov.b	A0H,A3L		; prepare for	mulxu	A1L,A3		; second sub product	add.b	A3L,A2H		; sum first two terms	mov.b	A1H,A3L		; third sub product	mulxu	A0L,A3	add.b	A3L,A2H		; almost there	mov.w	A2,A0		; that is	rts#endif#endif /* L_mulhi3 */#ifdef L_mulsi3;; SImode multiply.;;;; I think that shift and add may be sufficient for this.  Using the;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead.  This way;; the inner loop uses maybe 20 cycles + overhead, but terminates;; quickly on small args.;;;; A0/A1 src_a;; A2/A3 src_b;;;;  while (a);;    {;;      if (a & 1);;        r += b;;;      a >>= 1;;;      b <<= 1;;;    }	.section .text	.align 2#ifdef __H8300__	.global	___mulsi3___mulsi3:	PUSHP	S0P	PUSHP	S1P	PUSHP	S2P	sub.w	S0,S0	sub.w	S1,S1	; while (a)_top:	mov.w	A0,A0	bne	_more	mov.w	A1,A1	beq	_done_more:	; if (a & 1)	bld	#0,A1L	bcc	_nobit	; r += b	add.w	A3,S1	addx	A2L,S0L	addx	A2H,S0H_nobit:	; a >>= 1	shlr	A0H	rotxr	A0L	rotxr	A1H	rotxr	A1L	; b <<= 1	add.w	A3,A3	addx	A2L,A2L	addx	A2H,A2H	bra 	_top_done:	mov.w	S0,A0	mov.w	S1,A1	POPP	S2P	POPP	S1P	POPP	S0P	rts#else /* __H8300H__ */;; mulsi3 for H8/300H - based on Hitachi SH implementation;; by Toshiyasu Morita;; Old code:;; 16b * 16b = 372 states (worst case); 32b * 32b = 724 states (worst case);; New code:;; 16b * 16b =  48 states; 16b * 32b =  72 states; 32b * 32b =  92 states;	.global	___mulsi3___mulsi3:	mov.w	r1,r2   ; ( 2 states) b * d	mulxu	r0,er2  ; (22 states)	mov.w	e0,r3   ; ( 2 states) a * d	beq	L_skip1 ; ( 4 states)	mulxu	r1,er3  ; (22 states)	add.w	r3,e2   ; ( 2 states)L_skip1:	mov.w	e1,r3   ; ( 2 states) c * b	beq	L_skip2 ; ( 4 states)	mulxu	r0,er3  ; (22 states)	add.w	r3,e2   ; ( 2 states)L_skip2:	mov.l	er2,er0	; ( 2 states)	rts		; (10 states)#endif#endif /* L_mulsi3 */#ifdef L_fixunssfsi_asm/* For the h8300 we use asm to save some bytes, to   allow more programs to fit into the tiny address   space.  For the H8/300H and H8S, the C version is good enough.  */#ifdef __H8300__/* We still treat NANs different than libgcc2.c, but then, the   behavior is undefined anyways.  */	.global	___fixunssfsi___fixunssfsi:	cmp.b #0x47,r0h	bge Large_num	jmp     @___fixsfsiLarge_num:	bhi L_huge_num	xor.b #0x80,A0L	bmi L_shift8L_huge_num:	mov.w #65535,A0	mov.w A0,A1	rtsL_shift8:	mov.b A0L,A0H	mov.b A1H,A0L	mov.b A1L,A1H	mov.b #0,A1L	rts#endif#endif /* L_fixunssfsi_asm */
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -