📄 cpu_asm.c
字号:
/* cpu_asm.c ===> cpu_asm.S or cpu_asm.s * * This file contains the basic algorithms for all assembly code used * in an specific CPU port of RTEMS. These algorithms must be implemented * in assembly language * * NOTE: This is supposed to be a .S or .s file NOT a C file. * * COPYRIGHT (c) 1989-1999. * On-Line Applications Research Corporation (OAR). * * The license and distribution terms for this file may be * found in the file LICENSE in this distribution or at * http://www.rtems.com/license/LICENSE. * * This file adapted from no_bsp board library of the RTEMS distribution. * The body has been modified for the Bender Or1k implementation by * Chris Ziomkowski. <chris@asics.ws> *//* * This is supposed to be an assembly file. This means that system.h * and cpu.h should not be included in a "real" cpu_asm file. An * implementation in assembly should include "cpu_asm.h> */#include <rtems/system.h>#include <rtems/score/cpu.h>/* #include "cpu_asm.h> *//* * _CPU_Context_save_fp_context * * This routine is responsible for saving the FP context * at *fp_context_ptr. If the point to load the FP context * from is changed then the pointer is modified by this routine. * * Sometimes a macro implementation of this is in cpu.h which dereferences * the ** and a similarly named routine in this file is passed something * like a (Context_Control_fp *). The general rule on making this decision * is to avoid writing assembly language. * * or1k specific Information: * * This implementation of RTEMS considers the concept of * "fast context switching", as defined in the or1k architecture * specification. Whether or not this makes a significant * impact on speed is dubious, however it is not a significant * impediment to include it. It probably wastes a few cycles on * every floating point context switch. * * This implementation will currently not work on a processor where * the integer unit and floating point unit are not the same size. I * am waiting on an architecture change to make this feasible. It * should work fine on 64 bit architectures, except for the fact that * the variables are declared as 32 bits. This shouldn't really make * a difference, as the fact that they must be registers should force * them into a 64 bit word anyway. * * The decision as to whether to do 32 or 64 bit saves is performed * at run time based on the configuration of the CPUCFGR register. This * takes a performance hit of a few cycles, but this should be a very * small percentage of the total number of cycles necessary to do the * save, and doesn't require special code for 32 or 64 bit versions. * * ADDITIONAL INFORMATION: * * It has been unanimously agreed that floating point will not be * included in the initial releases of the Or1k chips, and that * significant changes to the floating point architecture may * occur before any such release will ever be implemented. The code * below is therefore never called and never used. */void _CPU_Context_save_fp( void **fp_context_ptr){ register unsigned32 temp; register unsigned32 address = (unsigned32)(*fp_context_ptr); register unsigned32 xfer; register unsigned32 loop; /* %0 is a temporary register which is used for several values throughout the code. %3 contains the address to save the context, and is modified during the course of the context save. %1 is a second dummy register which is used during transfer of the floating point value to memory. %2 is an end of loop marker which is compared against the pointer %3. */ asm volatile ("l.mfspr %0,r0,0x02 \n\t" /* CPUCFGR */ "l.andi %0,%0,0x380 \n\t" /* OF32S or OV64S or OF64S */ "l.sfnei %0,0x0 \n\t" "l.bf _L_nofps \n\t" /* exit if no floating point */ "l.sfeqi %0,0x080 \n\t" /* (DELAY) single precision? */ "l.mfspr %0,r0,0x11 \n\t" /* Load Status Register */ "l.srli %0,%0,58 \n\t" /* Move CID into low byte*32 */ "l.bnf _L_spfp_loops \n\t" /* Branch on single precision */ "l.addi %2,%0,0x20 \n" /* Terminating condition */ /**** Double Precision Floating Point Section ****/ "_L_dpfp_loops: \n\t" "l.mfspr %1,%0,0x600 \n\t" /* Load VFRx */ "l.sd 0(%3),%1 \n\t" /* Save VFRx */ "l.addi %0,%0,0x01 \n\t" /* Increment counter */ "l.sfeq %0,%2 \n\t" /* Branch if incomplete */ "l.bf _L_dpfp_loops \n\t" "l.addi %3,%3,0x08 \n\t" /* (DELAY) update pointer */ "l.bnf _L_nofps \n\t" /* exit */ "l.nop \n" /**** Single Precision Floating Point Section ****/ "_L_spfp_loops: \n\t" "l.mfspr %1,%0,0x600 \n\t" /* Load VFRx */ "l.sw 0(%3),%1 \n\t" /* Save VFRx */ "l.addi %0,%0,0x01 \n\t" /* Increment counter */ "l.sfeq %0,%2 \n\t" /* Branch if incomplete */ "l.bf _L_spfp_loops \n\t" "l.addi %3,%3,0x04 \n" /* (DELAY) update pointer */ "_L_nofps: \n\t" /* End of context save */ : "=&r" (temp), "=r" (xfer), "=&r" (loop), "+r" (address));}/* * _CPU_Context_restore_fp_context * * This routine is responsible for restoring the FP context * at *fp_context_ptr. If the point to load the FP context * from is changed then the pointer is modified by this routine. * * */void _CPU_Context_restore_fp( void **fp_context_ptr){ register unsigned32 temp; register unsigned32 address = (unsigned32)(*fp_context_ptr); register unsigned32 xfer; register unsigned32 loop; /* The reverse of Context_save_fp */ /* %0 is a temporary register which is used for several values throughout the code. %1 contains the address to save the context, and is modified during the course of the context save. %2 is a second dummy register which is used during transfer of the floating point value to memory. %3 is an end of loop marker which is compared against the pointer %1. */ asm volatile ("l.mfspr %0,r0,0x02 \n\t" /* CPUCFGR */ "l.andi %0,%0,0x380 \n\t" /* OF32S or OV64S or OF64S */ "l.sfnei %0,0x0 \n\t" "l.bf _L_nofpr \n\t" /* exit if no floating point */ "l.sfeqi %0,0x080 \n\t" /* (DELAY) single precision? */ "l.mfspr %0,r0,0x11 \n\t" /* Load Status Register */ "l.srli %0,%0,58 \n\t" /* Move CID into low byte*32 */ "l.bnf _L_spfp_loopr \n\t" /* Branch on single precision */ "l.addi %3,%0,0x20 \n" /* Terminating condition */ /**** Double Precision Floating Point Section ****/ "_L_dpfp_loopr: \n\t" "l.mfspr %2,%0,0x600 \n\t" /* Load VFRx */ "l.sd 0(%1),%2 \n\t" /* Save VFRx */ "l.addi %0,%0,0x01 \n\t" /* Increment counter */ "l.sfeq %0,%3 \n\t" /* Branch if incomplete */ "l.bf _L_dpfp_loopr \n\t" "l.addi %1,%1,0x08 \n\t" /* (DELAY) update pointer */ "l.bnf _L_nofpr \n\t" /* exit */ "l.nop \n" /**** Single Precision Floating Point Section ****/ "_L_spfp_loopr: \n\t" "l.mfspr %2,%0,0x600 \n\t" /* Load VFRx */ "l.sw 0(%1),%2 \n\t" /* Save VFRx */ "l.addi %0,%0,0x01 \n\t" /* Increment counter */ "l.sfeq %0,%3 \n\t" /* Branch if incomplete */ "l.bf _L_spfp_loopr \n\t" "l.addi %1,%1,0x04 \n" /* (DELAY) update pointer */ "_L_nofpr: \n\t" /* End of context save */ : "=&r" (temp), "+r" (address), "=r" (xfer), "=&r" (loop));}/* _CPU_Context_switch * * This routine performs a normal non-FP context switch. * * NO_CPU Specific Information: * * XXX document implementation including references if appropriate */void _CPU_Context_switch( Context_Control *run, Context_Control *heir){ register unsigned32 temp1 = 0; register unsigned32 temp2 = 0; /* This function is really tricky. When this function is called, we should save our state as we need it, and then grab the new state from the pointer. We then do a longjump to this code, replacing the current stack pointer with the new environment. This function never returns. Instead, at some later time, another person will call context switch with our pointer in the heir variable, and they will longjump to us. We will then continue. Let's see how this works... */ /* Technically, we could probably not worry about saving r3 and r4, since these are parameters guaranteed to be saved by the calling function. We could also probably get away without saving r11, as that is filled in by the return statement. But as a first cut I'm in favor of just saving everything.... */ /* We could be more efficient and use compile time directives for 32 or 64 bit, but this will allow the code to run on everything without modification. Feel free to comment the relevant sections out if you don't need it. */ /* We should probably write this whole routine in assembly so that we can have seperate entry points for self restore or context switch. You can't jump to local labels from inline assembly across function calls, and I don't feel like embedding all the .global directives here...it really screws up the debugger. Oh well, what's 2 more instructions and a branch really cost... */ /* One thing which we should do is check for 32 or 64 bit models first, and then do one branch to the appropriate code section. Currently, we check the architecture bit in CPUCFGR twice. Once during the load section and again during restore. That is inefficient, and considering this code is huge anyway, saving the few bytes simply doesn't make any practical sense. FIX THIS LATER. */ /* Note that this routine assumes software context switches are done with the same CID. In other words, it will not manage the CIDs and assign a new one as necessary. If you tell it to restore a context at CID 2, and the current one is at CID 4, it will do what it is told. It will overwrite the registers for context ID 2, meaning they are irretrievably lost. I hope you saved them earlier.... */ /* Note that you can have a context jump anywhere you want, although by default we will jump to the L_restore label. If you then modify the location in the Context_Control structure, it will continue whereever you told it to go. Note however that you had better also have cleaned up the stack and frame pointers though, because they are probably still set with the values obtained from entering this function... */ asm volatile ("l.sfeqi %3,0x0 \n\t" /* Is this a self restore? */ "l.bf _L_restore \n\t" /* Yes it is...go there */ "l.nop \n\t" "l.lwz %0,0(%3) \n\t" /* Prefetch new context */ "l.mfspr %2,r0,0x11 \n\t" /* Status Register */ "l.sw 0(%1),%2 \n\t" /* Save it */ "l.srli %2,%2,28 \n\t" /* Move CID into low byte */ "l.mfspr %0,%2,0x20 \n\t" /* Offset from EPCR */ "l.sw 4(%1),%0 \n\t" /* Store it */ "l.mfspr %0,%2,0x30 \n\t" /* Offset from EEAR */ "l.sw 8(%1),%0 \n\t" /* Store it */ "l.mfspr %0,%2,0x40 \n\t" /* Offset from ESR */ "l.sw 12(%1),%0 \n\t" /* Store it */ "l.mfspr %0,r0,0x02 \n\t" /* CPUCFGR */ "l.andi %0,%0,0x40 \n\t" /* OB64S */ "l.sfnei %0,0x0 \n\t" "l.bf _L_64bit \n\t" /* 64 bit architecture */ "l.movhi %0,hi(_L_restore)\n\t" /**** 32 bit implementation ****/ "l.ori %0,%0,lo(_L_restore)\n\t" "l.sw 140(%1),%0 \n\t" /* Save the PC */ "l.lwz %0,140(%3) \n\t" /* New PC. Expect cache miss */ "l.sw 16(%1),r1 \n\t" "l.sw 20(%1),r2 \n\t" "l.sw 24(%1),r3 \n\t" "l.sw 28(%1),r4 \n\t" "l.sw 32(%1),r5 \n\t" "l.sw 36(%1),r6 \n\t" "l.sw 40(%1),r7 \n\t" "l.sw 44(%1),r8 \n\t" "l.sw 48(%1),r9 \n\t" "l.sw 52(%1),r10 \n\t" "l.sw 56(%1),r11 \n\t" "l.sw 60(%1),r12 \n\t" "l.sw 64(%1),r13 \n\t" "l.sw 68(%1),r14 \n\t" "l.sw 72(%1),r15 \n\t" "l.sw 76(%1),r16 \n\t"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -