📄 usercopy.s
字号:
lw t2,dcache_size .set noreorder nop mfc0 t3,C0_SR # save sr nop .set reorder .set noreorder li v0,SR_ISC # interrupts off, isolate caches mtc0 v0,C0_SR bltu t2,NBPG,1f # cache is smaller than region nop li t2,NBPG1: addu t2,a0 # ending address + 1 move t0,a0 # cache must be isolated by now .set reorder1: sb zero,0(t0) sb zero,4(t0) sb zero,8(t0) sb zero,12(t0) sb zero,16(t0) sb zero,20(t0) sb zero,24(t0) addu t0,32 sb zero,-4(t0) bltu t0,t2,1b .set noreorder nop # insure isolated stores out of pipe nop nop mtc0 t3,C0_SR # un-isolate, enable interrupts nop # insure cache unisolated nop nop .set reorder#ifdef CACHETRICKS lw v0,dcachemask srl t1,a0,PGSHIFT and t1,v0 sll t1,1 # cachecnt index lhu t0,dcachecnt(t1) addu t0,1 sh t0,dcachecnt(t1)#endif CACHETRICKS j ra END(kn01_page_dflush)/* * Config_cache() -- determine sizes of i and d caches * Sizes stored in globals dcache_size and icache_size */CONFIGFRM= (4*4)+4+4 # 4 arg saves, ra, and a saved registerNESTED(config_cache, CONFIGFRM, zero) subu sp,CONFIGFRM sw ra,CONFIGFRM-4(sp) sw s0,CONFIGFRM-8(sp) # save s0 on stack .set noreorder nop mfc0 s0,C0_SR # save SR nop mtc0 zero,C0_SR # disable interrupts nop .set reorder .set noreorder la v0,1f or v0,K1BASE j v0 # run uncached nop1: jal size_cache nop sw v0,dcache_size nop # make sure sw out of pipe nop nop nop li v0,SR_SWC # swap caches mtc0 v0,C0_SR nop # insure caches stable nop nop nop jal size_cache nop sw v0,icache_size nop # make sure sw out of pipe nop nop nop mtc0 zero,C0_SR # swap back caches nop nop nop nop la t0,1f j t0 # back to cached mode nop1: mtc0 s0,C0_SR # restore SR nop lw s0,CONFIGFRM-8(sp) # restore old s0 lw ra,CONFIGFRM-4(sp) addu sp,CONFIGFRM j ra nop .set reorder END(config_cache)/* * size_cache() * return size of current data cache */LEAF(size_cache) .set noreorder mfc0 t0,C0_SR # save current sr nop # LDSLOT or v0,t0,SR_ISC # isolate cache nop # make sure no stores in pipe mtc0 v0,C0_SR nop # make sure isolated nop nop /* * Clear cache size boundries to known state. */ li v0,MINCACHE1: sw zero,K0BASE(v0) sll v0,1 ble v0,+MAXCACHE,1b nop # BDSLOT li v0,-1 sw v0,K0BASE(zero) # store marker in cache li v0,MINCACHE # MIN cache size2: lw v1,K0BASE(v0) # Look for marker nop # LDSLOT bne v1,zero,3f # found marker nop # BDSLOT sll v0,1 # cache size * 2 ble v0,+MAXCACHE,2b # keep looking nop move v0,zero # must be no cache .set reorder .set noreorder nop3: mtc0 t0,C0_SR # restore sr nop .set reorder .set noreorder nop nop # make sure unisolated nop nop nop nop .set reorder j ra END(size_cache)#ifdef MIPSEB# define LWS lwl# define LWB lwr# define SWS swl# define SWB swr#else# define LWS lwr# define LWB lwl# define SWS swr# define SWB swl#endif/* * Copy a null terminated string from the user address space into * the kernel address space. * * copyinstr(user_src, kernel_dest, maxlength, &lencopied) * returns: * 0 - success * EFAULT - user_src not accessable * ENAMETOOLONG - string exceeded maxlength */LEAF(copyinstr)#ifdef ASSERTIONS lw v0,u+PCB_CPUPTR lw v0,CPU_NOFAULT(v0) beq v0,zero,8f PANIC("recursive nofault")8:#endif ASSERTIONS bgez a0,_cpyiostr # user_src must be in kuseg b cstrerror END(copyinstr)/* * Copy a null terminated string from the kernel address space into * user address space. * * copyoutstr(kernel_src, user_dest, maxlength, &lencopied) * returns: * 0 - success * EFAULT - user_dest not accessable * ENAMETOOLONG - string exceeded maxlength */LEAF(copyoutstr)#ifdef ASSERTIONS lw v0,u+PCB_CPUPTR lw v0,CPU_NOFAULT(v0) beq v0,zero,8f PANIC("recursive nofault")8:#endif ASSERTIONS bgez a1,_cpyiostr # user_dest must be in kuseg b cstrerror END(copyoutstr)/* * Copy a null terminated string from one point to another in * kernel address space. * * copystr(src, dest, maxlength, &lencopied) * returns: * 0 - success * EFAULT - address not accessable (bogus length) * ENAMETOOLONG - string exceeded maxlength */LEAF(copystr) bgez a2,_cpystr b cstrerrorXLEAF(_cpyiostr) .set noreorder li v0,NF_COPYSTR lw v1,u+PCB_CPUPTR nop sw v0,CPU_NOFAULT(v1) # prepare for the worst .set reorderXLEAF(_cpystr) /* * start up first word * adjust pointers so that a0 points to next word * t7 = a1 adjusted by same amount minus one * t0,t1,t2,t3 are filled with 4 consecutive bytes * t4 is filled with the same 4 bytes in a single word */ .set noreorder ble a2,4,$dumbcpy # not enough for a word move v0,a2 # BDSLOT save copy of maxlength lb t0,0(a0) nop # LDSLOT beq t0,zero,$cpy1ch or t5,a1,3 # LDSLOT get an early start lb t1,1(a0) subu t6,t5,a1 # LDSLOT number of char in 1st word of dst - 1 beq t1,zero,$cpy2ch addu t7,a0,t6 # BDSLOT offset starting pt for source string lb t2,2(a0) nop # LDSLOT beq t2,zero,$cpy3ch LWS t4,0(a0) # BDSLOT safe: always in same word as 0(a0) lb t3,3(a0) # LDSLOT LWB t4,3(a0) # LDSLOT fill out word beq t3,zero,$cpy4ch # LDSLOT addu t6,1 # BDSLOT chars stored by SWS blt a2,t6,$cpy4ch # out of space addu a0,t6 # adjust source pointer SWS t4,0(a1) # store entire or part word subu a1,t5,3 # adjust destination ptr subu a2,t6 # decr maxlength /* * inner loop * at this point the destination is word aligned and t7 * points 1 byte before the corresponding source location */1: ble a2,4,$dumbcpy addu a1,4 # BDSLOT lb t0,1(t7) addu t7,4 # LDSLOT beq t0,zero,$cpy1ch nop # BDSLOT lb t1,1+1-4(t7) nop # LDSLOT beq t1,zero,$cpy2ch nop # BDSLOT lb t2,2+1-4(t7) addu a0,4 # LDSLOT adjust source pointer beq t2,zero,$cpy3ch LWS t4,0+1-4(t7) # BDSLOT subu a2,4 # LDSLOT bltz a2,$nsp4ch # no room for 4 lb t3,3+1-4(t7) # BDSLOT LWB t4,3+1-4(t7) # LDSLOT bne t3,zero,1b # LDSLOT sw t4,0(a1) # BDSLOT b $cpyok nop # BDSLOT$cpy4ch: /* * 4 bytes left to store */ subu a2,4 bgez a2,$do4ch # room left nop # BDSLOT b $nsp3ch # try 3 characters addu a2,1 # BDSLOT$do4ch: SWS t4,0(a1) b $cpyok SWB t4,3(a1) # BDSLOT$cpy3ch: /* * 3 bytes left to store */ subu a2,3 bgez a2,$do3ch # room left nop # BDSLOT b $nsp2ch # no space for 3, see if 2 will fit addu a2,1 # BDSLOT$cpy2ch: /* * 2 bytes left to store */ subu a2,2 bgez a2,$do2ch # room left nop # BDSLOT b $nsp1ch # no space for 2, see if 1 will fit addu a2,1 # BDSLOT$cpy1ch: /* * 1 last byte to store */ subu a2,1 bgez a2,$do1ch # room left nop # BDSLOT b $nospace # no space at all addu a2,1 # BDSLOT$do3ch: sb t2,2(a1)$do2ch: sb t1,1(a1)$do1ch: sb t0,0(a1)$cpyok: /* * copy complete, calculate length copied if necessary and * return */ subu a2,v0,a2 # bytes copied = maxlength - rem move v0,zero # success return code$cpyexit: .set reorder lw v1,u+PCB_CPUPTR sw zero,CPU_NOFAULT(v1) beq a3,zero,1f # &lencopied == 0 ? sw a2,0(a3) # no, return lencopied1: j ra .set noreorder/* * not enough room to move one word, do stupid byte copy */$dumbcpy: beq a2,zero,$nospace # no room nop # BDSLOT lbu t0,0(a0) subu a2,1 # LDSLOT decr count addu a0,1 # bump source ptr sb t0,0(a1) bne t0,zero,$dumbcpy # not null terminator addu a1,1 # BDSLOT bump dest ptr b $cpyok nop # BDSLOT/* * ran out of space, copy as many characters as possible */$nsp4ch: addu a2,1$nsp3ch: bgez a2,$ndo3ch # room for 3 addu a2,1 # BDSLOT$nsp2ch: bgez a2,$ndo2ch # room for 2 addu a2,1 # BDSLOT$nsp1ch: bgez a2,$ndo1ch # room for 1 nop # BDSLOT b $nospace nop # BDSLOT$ndo3ch:sb t2,2(a1)$ndo2ch:sb t1,1(a1)$ndo1ch:sb t0,0(a1)$nospace: /* * Ran out of space, length copied is always maxlength */ move a2,v0 # copied max length b $cpyexit li v0,ENAMETOOLONG # BDSLOT string too big .set reorder END(cpystr)/* * handle address fault for copy*str routines */LEAF(cstrerror) lw v0,u+PCB_CPUPTR sw zero,CPU_NOFAULT(v0) li v0,EFAULT j ra END(cstrerror)#ifdef oldmips/* * hwcpout is going from 32 bit bus to 16 bit bus, hwcpin is opposite. * a0 is the source. a1 is the destination. a2 is the byte-count. * a3 is (usually) the value a0 should have when the current move' * loop is done. v0,v1,t0,t1 scratch regs, used for alignment, and * moves. Does not save any regs. No return value. * * Basic Algorithm: * First check if the count passed is < HWMINCOPY. If yes, * jump to byte copy routine, it isn't worth hassling. * Then, try to align the 32 bit side on a word bit boundary. * If you can't, just byte copy. This happens very rarely, the * typical case is both sides word aligned. Then do as many * 16 byte copy loops as you can, then do as many 2 byte copy * iterations as you can, then pick up the dregs with byte copies. * This assumes typical case is full aligned, 20-112 bytes. *//* * HWMINCOPY is the minimum copy size on which we try to align and * use half-word rather than byte copy loop. Mash thought 8-12 * was a good number for bcopy(), seems to me that the outside of * that range would suite here. Why? We are assuming cache hit rate * of 0%. Our half-word loop is less efficient in its use of the * WB and of cycles than his full word loop. MORE THOUGHT HERE PLEASE. */#define HWMINCOPY 12LEAF(hwcpout)/* * first, check for alignment possibilities */ .sdatatmp: .word 0 # used for WB flush .text xor v0, a0, a1 # bash src & dst for align chk blt a2, HWMINCOPY, hbytecopy # too short, just byte copy and v0, 1 # low-order bit for align chk subu v1, zero, a0 # -src; BDSLOT bne v0, zero, hbytecopy # src and dst not alignable/* * src and dst can be simultaneously word aligned. */ and v1, 3 # number of bytes til aligned subu a2, v1 # bcount -= alignment addu a3,v1,a0 # end of align move beq v1, zero, hblkcopy # already aligned/* * This is the easy way, could maybe be done better. The problem * is that lwl/r and swl/r will not work on the 16 bit side. Since * worst case is three times through, the math to do the possible * half-word copy does not seem worth it, nor does the shifting to * use the lwl/r from the 32 bit side. */1: # tight loop lb v0, 0(a0) addu a0, 1 sb v0, 0(a1) addu a1, 1 sw zero, tmp # ensure no WB gather bne a0, a3, 1b /* * 16 byte block, aligned copy loop (for big reads/writes) * We must out fox the WB on 16 bit stores, else the card will * punt the data. This explains the somewhat esoteric ordering of * the stores. If we write consecutive half-words to the same * word address, we loose. */hblkcopy: and a3, a2, ~15 # total space in 16 byte chunks subu a2, a3 # count after by-16 byte loop done beq a3, zero, hwordcopy # less than 16 bytes to copy addu a3, a0 # source endpoint .set noreorder1: lw v0, 0(a0) addu a0, 16 # src += 16 ; no other delay slot... lw t0, -12(a0) sh v0, 2(a1) srl v0, 16 lw t1, -8(a0) sh t0, 6(a1) sh v0, 0(a1) srl t0, 16 lw t2, -4(a0) sh t1, 10(a1) sh t0, 4(a1) srl t1, 16 sh t2, 14(a1) sh t1, 8(a1) srl t2, 16 sh t2, 12(a1) bne a0, a3, 1b addu a1, 16 # dst += 16 .set reorder/* * copy what ever is left, but is aligned, in half-words */hwordcopy: addu a3, a2, a0 # source endpoint; ble a2, 1, hbytecopy/* * This could maybe be done better? */ and t0, a3, ~1 # catch tail subu a2, a3, t0 move a3, t01: # tight loop .set noreorder lh v0, 0(a0) addu a0, 2 #LDSLOT sh v0, 0(a1) addu a1, 2 bne a0, a3, 1b sw zero, tmp # ensure no WB gather .set reorder /* * Brute force byte copy loop, pick up the dregs. Also pick up copies * that are unalignable, doing the math to be smarter under the * 16 bit constraints turns out to lose. */hbytecopy: addu a3, a2, a0 # source endpoint; BDSLOT ble a2, zero, hcopydone # nothing left to copy, or bad length1: # tight loop .set noreorder lb v0, 0(a0) addu a0, 1 # incr src address sb v0, 0(a1) addu a1, 1 # incr dst address bne a0, a3, 1b sw zero, tmp # ensure no WB gather .set reorderhcopydone: j ra END(hwcpout)LEAF(hwcpin)/* * first, check for alignment possibilities */ xor v0, a0, a1 # bash src & dst for align chk blt a2, HWMINCOPY, hbytecopy # too short, just byte copy
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -