memcpy.s

来自「Glibc 2.3.2源代码(解压后有100多M)」· S 代码 · 共 920 行 · 第 1/3 页
920 行
/* Copy SIZE bytes from SRC to DEST.   For UltraSPARC.   Copyright (C) 1996, 97, 98, 99, 2003 Free Software Foundation, Inc.   This file is part of the GNU C Library.   Contributed by David S. Miller (davem@caip.rutgers.edu) and		  Jakub Jelinek (jakub@redhat.com).   The GNU C Library is free software; you can redistribute it and/or   modify it under the terms of the GNU Lesser General Public   License as published by the Free Software Foundation; either   version 2.1 of the License, or (at your option) any later version.   The GNU C Library is distributed in the hope that it will be useful,   but WITHOUT ANY WARRANTY; without even the implied warranty of   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU   Lesser General Public License for more details.   You should have received a copy of the GNU Lesser General Public   License along with the GNU C Library; if not, write to the Free   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA   02111-1307 USA.  */#include <sysdep.h>#include <asm/asi.h>#ifndef XCC#define USE_BPR	.register	%g2, #scratch	.register	%g3, #scratch	.register	%g6, #scratch#define XCC	xcc#endif#define FPRS_FEF	4#define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9)		\	faligndata	%f1, %f2, %f48;				\	faligndata	%f2, %f3, %f50;				\	faligndata	%f3, %f4, %f52;				\	faligndata	%f4, %f5, %f54;				\	faligndata	%f5, %f6, %f56;				\	faligndata	%f6, %f7, %f58;				\	faligndata	%f7, %f8, %f60;				\	faligndata	%f8, %f9, %f62;#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt)	\	ldda		[%src] %asi, %fdest;			\	add		%src, 0x40, %src;			\	add		%dest, 0x40, %dest;			\	subcc		%len, 0x40, %len;			\	be,pn		%xcc, jmptgt;				\	 stda		%fsrc, [%dest - 0x40] %asi;#define LOOP_CHUNK1(src, dest, len, branch_dest)		\	MAIN_LOOP_CHUNK(src, dest, f0,  f48, len, branch_dest)#define LOOP_CHUNK2(src, dest, len, branch_dest)		\	MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest)#define LOOP_CHUNK3(src, dest, len, branch_dest)		\	MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)#define STORE_SYNC(dest, fsrc)					\	stda		%fsrc, [%dest] %asi;			\	add		%dest, 0x40, %dest;#define STORE_JUMP(dest, fsrc, target)				\	stda		%fsrc, [%dest] %asi;			\	add		%dest, 0x40, %dest;			\	ba,pt		%xcc, target;#define VISLOOP_PAD nop; nop; nop; nop; 			\		    nop; nop; nop; nop; 			\		    nop; nop; nop; nop; 			\		    nop; nop; nop;#define FINISH_VISCHUNK(dest, f0, f1, left)			\	subcc		%left, 8, %left;			\	bl,pn		%xcc, 205f;				\	 faligndata	%f0, %f1, %f48;				\	std		%f48, [%dest];				\	add		%dest, 8, %dest;#define UNEVEN_VISCHUNK(dest, f0, f1, left)			\	subcc		%left, 8, %left;			\	bl,pn		%xcc, 205f;				\	 fsrc1		%f0, %f1;				\	ba,a,pt		%xcc, 204f;	/* Macros for non-VIS memcpy code. */#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3)		\	ldx		[%src + offset + 0x00], %t0; 		\	ldx		[%src + offset + 0x08], %t1; 		\	ldx		[%src + offset + 0x10], %t2; 		\	ldx		[%src + offset + 0x18], %t3; 		\	stw		%t0, [%dst + offset + 0x04]; 		\	srlx		%t0, 32, %t0;				\	stw		%t0, [%dst + offset + 0x00]; 		\	stw		%t1, [%dst + offset + 0x0c]; 		\	srlx		%t1, 32, %t1;				\	stw		%t1, [%dst + offset + 0x08]; 		\	stw		%t2, [%dst + offset + 0x14]; 		\	srlx		%t2, 32, %t2;				\	stw		%t2, [%dst + offset + 0x10]; 		\	stw		%t3, [%dst + offset + 0x1c];		\	srlx		%t3, 32, %t3;				\	stw		%t3, [%dst + offset + 0x18];#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3)	\	ldx		[%src + offset + 0x00], %t0; 		\	ldx		[%src + offset + 0x08], %t1; 		\	ldx		[%src + offset + 0x10], %t2; 		\	ldx		[%src + offset + 0x18], %t3; 		\	stx		%t0, [%dst + offset + 0x00]; 		\	stx		%t1, [%dst + offset + 0x08]; 		\	stx		%t2, [%dst + offset + 0x10]; 		\	stx		%t3, [%dst + offset + 0x18]; 		\	ldx		[%src + offset + 0x20], %t0; 		\	ldx		[%src + offset + 0x28], %t1; 		\	ldx		[%src + offset + 0x30], %t2; 		\	ldx		[%src + offset + 0x38], %t3; 		\	stx		%t0, [%dst + offset + 0x20]; 		\	stx		%t1, [%dst + offset + 0x28]; 		\	stx		%t2, [%dst + offset + 0x30]; 		\	stx		%t3, [%dst + offset + 0x38];#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3)	\	ldx		[%src - offset - 0x10], %t0;		\	ldx		[%src - offset - 0x08], %t1; 		\	stw		%t0, [%dst - offset - 0x0c]; 		\	srlx		%t0, 32, %t2;				\	stw		%t2, [%dst - offset - 0x10]; 		\	stw		%t1, [%dst - offset - 0x04]; 		\	srlx		%t1, 32, %t3;				\	stw		%t3, [%dst - offset - 0x08];#define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1)		\	ldx		[%src - offset - 0x10], %t0; 		\	ldx		[%src - offset - 0x08], %t1; 		\	stx		%t0, [%dst - offset - 0x10]; 		\	stx		%t1, [%dst - offset - 0x08];	/* Macros for non-VIS memmove code. */#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3)	\	ldx		[%src - offset - 0x20], %t0; 		\	ldx		[%src - offset - 0x18], %t1; 		\	ldx		[%src - offset - 0x10], %t2; 		\	ldx		[%src - offset - 0x08], %t3; 		\	stw		%t0, [%dst - offset - 0x1c]; 		\	srlx		%t0, 32, %t0;				\	stw		%t0, [%dst - offset - 0x20]; 		\	stw		%t1, [%dst - offset - 0x14]; 		\	srlx		%t1, 32, %t1;				\	stw		%t1, [%dst - offset - 0x18]; 		\	stw		%t2, [%dst - offset - 0x0c]; 		\	srlx		%t2, 32, %t2;				\	stw		%t2, [%dst - offset - 0x10]; 		\	stw		%t3, [%dst - offset - 0x04];		\	srlx		%t3, 32, %t3;				\	stw		%t3, [%dst - offset - 0x08];#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3)	\	ldx		[%src - offset - 0x20], %t0; 		\	ldx		[%src - offset - 0x18], %t1; 		\	ldx		[%src - offset - 0x10], %t2; 		\	ldx		[%src - offset - 0x08], %t3; 		\	stx		%t0, [%dst - offset - 0x20]; 		\	stx		%t1, [%dst - offset - 0x18]; 		\	stx		%t2, [%dst - offset - 0x10]; 		\	stx		%t3, [%dst - offset - 0x08];		\	ldx		[%src - offset - 0x40], %t0; 		\	ldx		[%src - offset - 0x38], %t1; 		\	ldx		[%src - offset - 0x30], %t2; 		\	ldx		[%src - offset - 0x28], %t3; 		\	stx		%t0, [%dst - offset - 0x40]; 		\	stx		%t1, [%dst - offset - 0x38]; 		\	stx		%t2, [%dst - offset - 0x30]; 		\	stx		%t3, [%dst - offset - 0x28];#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3)	\	ldx		[%src + offset + 0x00], %t0;		\	ldx		[%src + offset + 0x08], %t1; 		\	stw		%t0, [%dst + offset + 0x04]; 		\	srlx		%t0, 32, %t2;				\	stw		%t2, [%dst + offset + 0x00]; 		\	stw		%t1, [%dst + offset + 0x0c]; 		\	srlx		%t1, 32, %t3;				\	stw		%t3, [%dst + offset + 0x08];#define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1)		\	ldx		[%src + offset + 0x00], %t0; 		\	ldx		[%src + offset + 0x08], %t1; 		\	stx		%t0, [%dst + offset + 0x00]; 		\	stx		%t1, [%dst + offset + 0x08];	.text	.align		32ENTRY(bcopy)	sub		%o1, %o0, %o4			/* IEU0		Group		*/	mov		%o0, %g3			/* IEU1				*/	cmp		%o4, %o2			/* IEU1		Group		*/	mov		%o1, %o0			/* IEU0				*/	bgeu,pt		%XCC, 210f			/* CTI				*/	 mov		%g3, %o1			/* IEU0		Group		*/#ifndef USE_BPR	srl		%o2, 0, %o2			/* IEU1				*/#endif	brnz,pn		%o2, 220f			/* CTI		Group		*/	 add		%o0, %o2, %o0			/* IEU0				*/	retl	 nopEND(bcopy)	.align		32200:	be,pt		%xcc, 201f			/* CTI				*/	 andcc		%o0, 0x38, %g5			/* IEU1		Group		*/	mov		8, %g1				/* IEU0				*/	sub		%g1, %g2, %g2			/* IEU0		Group		*/	andcc		%o0, 1, %g0			/* IEU1				*/	be,pt		%icc, 2f			/* CTI				*/	 sub		%o2, %g2, %o2			/* IEU0		Group		*/1:	ldub		[%o1], %o5			/* Load		Group		*/	add		%o1, 1, %o1			/* IEU0				*/	add		%o0, 1, %o0			/* IEU1				*/	subcc		%g2, 1, %g2			/* IEU1		Group		*/	be,pn		%xcc, 3f			/* CTI				*/	 stb		%o5, [%o0 - 1]			/* Store			*/2:	ldub		[%o1], %o5			/* Load		Group		*/	add		%o0, 2, %o0			/* IEU0				*/	ldub		[%o1 + 1], %g3			/* Load		Group		*/	subcc		%g2, 2, %g2			/* IEU1		Group		*/	stb		%o5, [%o0 - 2]			/* Store			*/	add		%o1, 2, %o1			/* IEU0				*/	bne,pt		%xcc, 2b			/* CTI		Group		*/	 stb		%g3, [%o0 - 1]			/* Store			*/3:	andcc		%o0, 0x38, %g5			/* IEU1		Group		*/201:	be,pt		%icc, 202f			/* CTI				*/	 mov		64, %g1				/* IEU0				*/	fmovd		%f0, %f2			/* FPU				*/	sub		%g1, %g5, %g5			/* IEU0		Group		*/	alignaddr	%o1, %g0, %g1			/* GRU		Group		*/	ldd		[%g1], %f4			/* Load		Group		*/	sub		%o2, %g5, %o2			/* IEU0				*/1:	ldd		[%g1 + 0x8], %f6		/* Load		Group		*/	add		%g1, 0x8, %g1			/* IEU0		Group		*/	subcc		%g5, 8, %g5			/* IEU1				*/	faligndata	%f4, %f6, %f0			/* GRU		Group		*/	std		%f0, [%o0]			/* Store			*/	add		%o1, 8, %o1			/* IEU0		Group		*/	be,pn		%xcc, 202f			/* CTI				*/	 add		%o0, 8, %o0			/* IEU1				*/	ldd		[%g1 + 0x8], %f4		/* Load		Group		*/	add		%g1, 8, %g1			/* IEU0				*/	subcc		%g5, 8, %g5			/* IEU1				*/	faligndata	%f6, %f4, %f0			/* GRU		Group		*/	std		%f0, [%o0]			/* Store			*/	add		%o1, 8, %o1			/* IEU0				*/	bne,pt		%xcc, 1b			/* CTI		Group		*/	 add		%o0, 8, %o0			/* IEU0				*/202:	membar	  #LoadStore | #StoreStore | #StoreLoad	/* LSU		Group		*/	wr		%g0, ASI_BLK_P, %asi		/* LSU		Group		*/	subcc		%o2, 0x40, %g6			/* IEU1		Group		*/	mov		%o1, %g1			/* IEU0				*/	andncc		%g6, (0x40 - 1), %g6		/* IEU1		Group		*/	srl		%g1, 3, %g2			/* IEU0				*/	sub		%o2, %g6, %g3			/* IEU0		Group		*/	andn		%o1, (0x40 - 1), %o1		/* IEU1				*/	and		%g2, 7, %g2			/* IEU0		Group		*/	andncc		%g3, 0x7, %g3			/* IEU1				*/	fmovd		%f0, %f2			/* FPU				*/	sub		%g3, 0x10, %g3			/* IEU0		Group		*/	sub		%o2, %g6, %o2			/* IEU1				*/	alignaddr	%g1, %g0, %g0			/* GRU		Group		*/	add		%g1, %g6, %g1			/* IEU0		Group		*/	subcc		%o2, %g3, %o2			/* IEU1				*/	ldda		[%o1 + 0x00] %asi, %f0		/* LSU		Group		*/	add		%g1, %g3, %g1			/* IEU0				*/	ldda		[%o1 + 0x40] %asi, %f16		/* LSU		Group		*/	sub		%g6, 0x80, %g6			/* IEU0				*/	ldda		[%o1 + 0x80] %asi, %f32		/* LSU		Group		*/							/* Clk1		Group 8-(	*/							/* Clk2		Group 8-(	*/							/* Clk3		Group 8-(	*/							/* Clk4		Group 8-(	*/203:	rd		%pc, %g5			/* PDU		Group 8-(	*/	addcc		%g5, %lo(300f - 203b), %g5	/* IEU1		Group		*/	sll		%g2, 9, %g2			/* IEU0				*/	jmpl		%g5 + %g2, %g0			/* CTI		Group brk forced*/	 addcc		%o1, 0xc0, %o1			/* IEU1		Group		*/	.align		512		/* OK, here comes the fun part... */300:	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)	LOOP_CHUNK1(o1, o0, g6, 301f)	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)	LOOP_CHUNK2(o1, o0, g6, 302f)	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)	LOOP_CHUNK3(o1, o0, g6, 303f)	b,pt		%xcc, 300b+4; faligndata %f0, %f2, %f48301:	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)	STORE_SYNC(o0, f48) membar #Sync	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)	STORE_JUMP(o0, f48, 400f) membar #Sync302:	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)	STORE_SYNC(o0, f48) membar #Sync	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)	STORE_JUMP(o0, f48, 416f) membar #Sync303:	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)	STORE_SYNC(o0, f48) membar #Sync	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)	STORE_JUMP(o0, f48, 432f) membar #Sync	VISLOOP_PAD310:	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)	LOOP_CHUNK1(o1, o0, g6, 311f)	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)	LOOP_CHUNK2(o1, o0, g6, 312f)	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)	LOOP_CHUNK3(o1, o0, g6, 313f)	b,pt		%xcc, 310b+4; faligndata %f2, %f4, %f48311:	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)	STORE_SYNC(o0, f48) membar #Sync	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)	STORE_JUMP(o0, f48, 402f) membar #Sync312:	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)	STORE_SYNC(o0, f48) membar #Sync	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)	STORE_JUMP(o0, f48, 418f) membar #Sync
memcpy.s - 源码说明

本页面展示了「Glibc 2.3.2源代码(解压后有100多M)」中的 memcpy.s 源码文件，采用 S 编程语言编写，共 920 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Glibc相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?