📄 viscopy.s
字号:
/* $Id: VIScopy.S,v 1.27 2002/02/09 19:49:30 davem Exp $ * VIScopy.S: High speed copy operations utilizing the UltraSparc * Visual Instruction Set. * * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz) */#include "VIS.h" /* VIS code can be used for numerous copy/set operation variants. * It can be made to work in the kernel, one single instance, * for all of memcpy, copy_to_user, and copy_from_user by setting * the ASI src/dest globals correctly. Furthermore it can * be used for kernel-->kernel page copies as well, a hook label * is put in here just for this purpose. * * For userland, compiling this without __KERNEL__ defined makes * it work just fine as a generic libc bcopy and memcpy. * If for userland it is compiled with a 32bit gcc (but you need * -Wa,-Av9a for as), the code will just rely on lower 32bits of * IEU registers, if you compile it with 64bit gcc (ie. define * __sparc_v9__), the code will use full 64bit. */ #ifdef __KERNEL__#include <asm/visasm.h>#include <asm/thread_info.h>#define FPU_CLEAN_RETL \ ldub [%g6 + TI_CURRENT_DS], %o1; \ VISExit \ clr %o0; \ retl; \ wr %o1, %g0, %asi;#define FPU_RETL \ ldub [%g6 + TI_CURRENT_DS], %o1; \ VISExit \ clr %o0; \ retl; \ wr %o1, %g0, %asi;#define NORMAL_RETL \ ldub [%g6 + TI_CURRENT_DS], %o1; \ clr %o0; \ retl; \ wr %o1, %g0, %asi;#define EX(x,y,a,b) \98: x,y; \ .section .fixup; \ .align 4; \99: ba VIScopyfixup_ret; \ a, b, %o1; \ .section __ex_table; \ .align 4; \ .word 98b, 99b; \ .text; \ .align 4;#define EX2(x,y,c,d,e,a,b) \98: x,y; \ .section .fixup; \ .align 4; \99: c, d, e; \ ba VIScopyfixup_ret; \ a, b, %o1; \ .section __ex_table; \ .align 4; \ .word 98b, 99b; \ .text; \ .align 4;#define EXO2(x,y) \98: x,y; \ .section __ex_table; \ .align 4; \ .word 98b, VIScopyfixup_reto2; \ .text; \ .align 4;#define EXVISN(x,y,n) \98: x,y; \ .section __ex_table; \ .align 4; \ .word 98b, VIScopyfixup_vis##n; \ .text; \ .align 4;#define EXT(start,end,handler) \ .section __ex_table; \ .align 4; \ .word start, 0, end, handler; \ .text; \ .align 4;#else#ifdef REGS_64BIT#define FPU_CLEAN_RETL \ retl; \ mov %g6, %o0;#define FPU_RETL \ retl; \ mov %g6, %o0;#else#define FPU_CLEAN_RETL \ wr %g0, FPRS_FEF, %fprs; \ retl; \ mov %g6, %o0;#define FPU_RETL \ wr %g0, FPRS_FEF, %fprs; \ retl; \ mov %g6, %o0;#endif#define NORMAL_RETL \ retl; \ mov %g6, %o0;#define EX(x,y,a,b) x,y#define EX2(x,y,c,d,e,a,b) x,y#define EXO2(x,y) x,y#define EXVISN(x,y,n) x,y#define EXT(a,b,c)#endif#define EXVIS(x,y) EXVISN(x,y,0)#define EXVIS1(x,y) EXVISN(x,y,1)#define EXVIS2(x,y) EXVISN(x,y,2)#define EXVIS3(x,y) EXVISN(x,y,3)#define EXVIS4(x,y) EXVISN(x,y,4)#define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9) \ faligndata %f1, %f2, %f48; \ faligndata %f2, %f3, %f50; \ faligndata %f3, %f4, %f52; \ faligndata %f4, %f5, %f54; \ faligndata %f5, %f6, %f56; \ faligndata %f6, %f7, %f58; \ faligndata %f7, %f8, %f60; \ faligndata %f8, %f9, %f62;#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ EXVIS(LDBLK [%src] ASIBLK, %fdest); \ ASI_SETDST_BLK \ EXVIS(STBLK %fsrc, [%dest] ASIBLK); \ add %src, 0x40, %src; \ subcc %len, 0x40, %len; \ be,pn %xcc, jmptgt; \ add %dest, 0x40, %dest; \ ASI_SETSRC_BLK#define LOOP_CHUNK1(src, dest, len, branch_dest) \ MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest)#define LOOP_CHUNK2(src, dest, len, branch_dest) \ MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest)#define LOOP_CHUNK3(src, dest, len, branch_dest) \ MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)#define STORE_SYNC(dest, fsrc) \ EXVIS(STBLK %fsrc, [%dest] ASIBLK); \ add %dest, 0x40, %dest;#ifdef __KERNEL__#define STORE_JUMP(dest, fsrc, target) \ srl asi_dest, 3, %g5; \ EXVIS2(STBLK %fsrc, [%dest] ASIBLK); \ xor asi_dest, ASI_BLK_XOR1, asi_dest;\ add %dest, 0x40, %dest; \ xor asi_dest, %g5, asi_dest; \ ba,pt %xcc, target;#else#define STORE_JUMP(dest, fsrc, target) \ EXVIS2(STBLK %fsrc, [%dest] ASIBLK); \ add %dest, 0x40, %dest; \ ba,pt %xcc, target;#endif#ifndef __KERNEL__#define VISLOOP_PAD nop; nop; nop; nop; \ nop; nop; nop; nop; \ nop; nop; nop; nop; \ nop; nop; nop;#else#define VISLOOP_PAD#endif#define FINISH_VISCHUNK(dest, f0, f1, left) \ ASI_SETDST_NOBLK \ subcc %left, 8, %left; \ bl,pn %xcc, vis_out; \ faligndata %f0, %f1, %f48; \ EXVIS3(STDF %f48, [%dest] ASINORMAL); \ add %dest, 8, %dest;#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ subcc %left, 8, %left; \ bl,pn %xcc, vis_out; \ fsrc1 %f0, %f1;#define UNEVEN_VISCHUNK(dest, f0, f1, left) \ UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ ba,a,pt %xcc, vis_out_slk; /* Macros for non-VIS memcpy code. */#ifdef REGS_64BIT#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ ASI_SETSRC_NOBLK \ LDX [%src + offset + 0x00] ASINORMAL, %t0; \ LDX [%src + offset + 0x08] ASINORMAL, %t1; \ LDX [%src + offset + 0x10] ASINORMAL, %t2; \ LDX [%src + offset + 0x18] ASINORMAL, %t3; \ ASI_SETDST_NOBLK \ STW %t0, [%dst + offset + 0x04] ASINORMAL; \ srlx %t0, 32, %t0; \ STW %t0, [%dst + offset + 0x00] ASINORMAL; \ STW %t1, [%dst + offset + 0x0c] ASINORMAL; \ srlx %t1, 32, %t1; \ STW %t1, [%dst + offset + 0x08] ASINORMAL; \ STW %t2, [%dst + offset + 0x14] ASINORMAL; \ srlx %t2, 32, %t2; \ STW %t2, [%dst + offset + 0x10] ASINORMAL; \ STW %t3, [%dst + offset + 0x1c] ASINORMAL; \ srlx %t3, 32, %t3; \ STW %t3, [%dst + offset + 0x18] ASINORMAL;#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ ASI_SETSRC_NOBLK \ LDX [%src + offset + 0x00] ASINORMAL, %t0; \ LDX [%src + offset + 0x08] ASINORMAL, %t1; \ LDX [%src + offset + 0x10] ASINORMAL, %t2; \ LDX [%src + offset + 0x18] ASINORMAL, %t3; \ ASI_SETDST_NOBLK \ STX %t0, [%dst + offset + 0x00] ASINORMAL; \ STX %t1, [%dst + offset + 0x08] ASINORMAL; \ STX %t2, [%dst + offset + 0x10] ASINORMAL; \ STX %t3, [%dst + offset + 0x18] ASINORMAL; \ ASI_SETSRC_NOBLK \ LDX [%src + offset + 0x20] ASINORMAL, %t0; \ LDX [%src + offset + 0x28] ASINORMAL, %t1; \ LDX [%src + offset + 0x30] ASINORMAL, %t2; \ LDX [%src + offset + 0x38] ASINORMAL, %t3; \ ASI_SETDST_NOBLK \ STX %t0, [%dst + offset + 0x20] ASINORMAL; \ STX %t1, [%dst + offset + 0x28] ASINORMAL; \ STX %t2, [%dst + offset + 0x30] ASINORMAL; \ STX %t3, [%dst + offset + 0x38] ASINORMAL;#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ ASI_SETSRC_NOBLK \ LDX [%src - offset - 0x10] ASINORMAL, %t0; \ LDX [%src - offset - 0x08] ASINORMAL, %t1; \ ASI_SETDST_NOBLK \ STW %t0, [%dst - offset - 0x0c] ASINORMAL; \ srlx %t0, 32, %t2; \ STW %t2, [%dst - offset - 0x10] ASINORMAL; \ STW %t1, [%dst - offset - 0x04] ASINORMAL; \ srlx %t1, 32, %t3; \ STW %t3, [%dst - offset - 0x08] ASINORMAL;#define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \ ASI_SETSRC_NOBLK \ LDX [%src - offset - 0x10] ASINORMAL, %t0; \ LDX [%src - offset - 0x08] ASINORMAL, %t1; \ ASI_SETDST_NOBLK \ STX %t0, [%dst - offset - 0x10] ASINORMAL; \ STX %t1, [%dst - offset - 0x08] ASINORMAL;#else /* !REGS_64BIT */#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ lduw [%src + offset + 0x00], %t0; \ lduw [%src + offset + 0x04], %t1; \ lduw [%src + offset + 0x08], %t2; \ lduw [%src + offset + 0x0c], %t3; \ stw %t0, [%dst + offset + 0x00]; \ stw %t1, [%dst + offset + 0x04]; \ stw %t2, [%dst + offset + 0x08]; \ stw %t3, [%dst + offset + 0x0c]; \ lduw [%src + offset + 0x10], %t0; \ lduw [%src + offset + 0x14], %t1; \ lduw [%src + offset + 0x18], %t2; \ lduw [%src + offset + 0x1c], %t3; \ stw %t0, [%dst + offset + 0x10]; \ stw %t1, [%dst + offset + 0x14]; \ stw %t2, [%dst + offset + 0x18]; \ stw %t3, [%dst + offset + 0x1c];#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ lduw [%src - offset - 0x10], %t0; \ lduw [%src - offset - 0x0c], %t1; \ lduw [%src - offset - 0x08], %t2; \ lduw [%src - offset - 0x04], %t3; \ stw %t0, [%dst - offset - 0x10]; \ stw %t1, [%dst - offset - 0x0c]; \ stw %t2, [%dst - offset - 0x08]; \ stw %t3, [%dst - offset - 0x04];#endif /* !REGS_64BIT */#ifdef __KERNEL__ .section __ex_table,#alloc .section .fixup,#alloc,#execinstr#endif .text .align 32 .globl memcpy .type memcpy,@function .globl bcopy .type bcopy,@function#ifdef __KERNEL__memcpy_private:memcpy: mov ASI_P, asi_src ! IEU0 Group brnz,pt %o2, __memcpy_entry ! CTI mov ASI_P, asi_dest ! IEU1 retl clr %o0 .align 32 .globl __copy_from_user .type __copy_from_user,@function__copy_from_user:rd %asi, asi_src ! IEU0 Group brnz,pt %o2, __memcpy_entry ! CTI mov ASI_P, asi_dest ! IEU1 .globl __copy_to_user .type __copy_to_user,@function__copy_to_user: mov ASI_P, asi_src ! IEU0 Group brnz,pt %o2, __memcpy_entry ! CTI rd %asi, asi_dest ! IEU1 retl ! CTI Group clr %o0 ! IEU0 Group .globl __copy_in_user .type __copy_in_user,@function__copy_in_user: rd %asi, asi_src ! IEU0 Group brnz,pt %o2, __memcpy_entry ! CTI mov asi_src, asi_dest ! IEU1 retl ! CTI Group clr %o0 ! IEU0 Group#endifbcopy: or %o0, 0, %g3 ! IEU0 Group addcc %o1, 0, %o0 ! IEU1 brgez,pt %o2, memcpy_private ! CTI or %g3, 0, %o1 ! IEU0 Group retl ! CTI Group brk forced clr %o0 ! IEU0#ifdef __KERNEL__#define BRANCH_ALWAYS 0x10680000#define NOP 0x01000000#define ULTRA3_DO_PATCH(OLD, NEW) \ sethi %hi(NEW), %g1; \ or %g1, %lo(NEW), %g1; \ sethi %hi(OLD), %g2; \ or %g2, %lo(OLD), %g2; \ sub %g1, %g2, %g1; \ sethi %hi(BRANCH_ALWAYS), %g3; \ srl %g1, 2, %g1; \ or %g3, %lo(BRANCH_ALWAYS), %g3; \ or %g3, %g1, %g3; \ stw %g3, [%g2]; \ sethi %hi(NOP), %g3; \ or %g3, %lo(NOP), %g3; \ stw %g3, [%g2 + 0x4]; \ flush %g2; .globl cheetah_patch_copyopscheetah_patch_copyops: ULTRA3_DO_PATCH(memcpy, U3memcpy) ULTRA3_DO_PATCH(__copy_from_user, U3copy_from_user) ULTRA3_DO_PATCH(__copy_to_user, U3copy_to_user) ULTRA3_DO_PATCH(__copy_in_user, U3copy_in_user) retl nop#undef BRANCH_ALWAYS#undef NOP#undef ULTRA3_DO_PATCH#endif /* __KERNEL__ */ .align 32#ifdef __KERNEL__ andcc %o0, 7, %g2 ! IEU1 Group#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -