📄 big_copy.s
字号:
/* @(#)big_copy.s 1.1 92/07/30 SMI */ .seg "text" .align 4/* * Copyright (c) 1987 by Sun Microsystems, Inc. */#include <machine/asm_linkage.h>#include <sys/errno.h>#ifdef never#include <sys/param.h>#include <sun4/mmu.h>#include "assym.s"/* * Copy a block of storage, returning an error code if `from' or * `to' takes a kernel pagefault which cannot be resolved. * Returns EIO on pagefault error, 0 if all ok * * int * kcopy(from, to, count) * caddr_t from, to; * u_int count; */ ENTRY(kcopy) sethi %hi(copyerr), %o3 ! copyerr is lofault value b do_copy ! common code or %o3, %lo(copyerr), %o3/* * We got here because of a fault during kcopy. */copyerr: st %l6, [_u+U_LOFAULT] ! restore old u.u_lofault ret restore %g0, EIO, %o0 ! return (EIO)/* * Copy a block of storage - must not overlap ( from + len <= to). * Registers: l6 - saved u.u_lofault * * bcopy(from, to, count) * caddr_t from, to; * u_int count; */ ENTRY(bcopy) mov 0, %o3 ! no lofault valuedo_copy: save %sp, -SA(MINFRAME), %sp ! get another window ld [_u+U_LOFAULT], %l6 ! save u.u_lofault cmp %i2, 12 ! for small counts bl bytecp ! just copy bytes st %i3, [_u+U_LOFAULT] ! install new vector ! ! Probe to see if word access is allowed (i.e. not VME_D16) ! This assumes that the source and destination will not ! change to VME_D16 during the bcopy. This will get a data ! fault with be_vmeserr set if unsuccessful. Trap will ! then return to bcopy_vme16. This is gross but fast. ! .global _bcopy_probe, _ebcopy_probe_bcopy_probe: andn %i0, 3, %l0 ! align source ld [%l0], %g0 ! probe source andn %i1, 3, %l0 ! align dest ld [%l0], %g0 ! probe dest_ebcopy_probe: ! ! use aligned transfers where possible ! xor %i0, %i1, %o4 ! xor from and to address btst 7, %o4 ! if lower three bits zero bz aldoubcp ! can align on double boundary btst 3, %o4 ! if lower two bits zero bz alwordcp ! can align on word boundary btst 3, %i0 ! delay slot, from address unaligned? ! ! use aligned reads and writes where possible ! this differs from wordcp in that it copes ! with odd alignment between source and destnation ! using word reads and writes with the proper shifts ! in between to align transfers to and from memory ! i0 - src address, i1 - dest address, i2 - count ! i3, i4 - tmps for used generating complete word ! i5 (word to write) ! l0 size in bits of upper part of source word (US) ! l1 size in bits of lower part of source word (LS = 32 - US) ! l2 size in bits of upper part of destination word (UD) ! l3 size in bits of lower part of destination word (LD = 32 - UD) ! l4 number of bytes leftover after aligned transfers complete ! l5 the number 32 ! mov 32, %l5 ! load an oft-needed constant bz align_dst_only btst 3, %i1 ! is destnation address aligned? clr %i4 ! clear registers used in either case bz align_src_only clr %l0 ! ! both source and destination addresses are unaligned !1: ! align source ldub [%i0], %i3 ! read a byte from source address add %i0, 1, %i0 ! increment source address or %i4, %i3, %i4 ! or in with previous bytes (if any) btst 3, %i0 ! is source aligned? add %l0, 8, %l0 ! increment size of upper source (US) bnz,a 1b sll %i4, 8, %i4 ! make room for next byte sub %l5, %l0, %l1 ! generate shift left count (LS) sll %i4, %l1, %i4 ! prepare to get rest ld [%i0], %i3 ! read a word add %i0, 4, %i0 ! increment source address srl %i3, %l0, %i5 ! upper src bits into lower dst bits or %i4, %i5, %i5 ! merge mov 24, %l3 ! align destination1: srl %i5, %l3, %i4 ! prepare to write a single byte stb %i4, [%i1] ! write a byte add %i1, 1, %i1 ! increment destination address sub %i2, 1, %i2 ! decrement count btst 3, %i1 ! is destination aligned? bnz,a 1b sub %l3, 8, %l3 ! delay slot, decrement shift count (LD) sub %l5, %l3, %l2 ! generate shift left count (UD) sll %i5, %l2, %i5 ! move leftover into upper bytes cmp %l2, %l0 ! cmp # req'd to fill dst w old src left bg more_needed ! need more to fill than we have nop sll %i3, %l1, %i3 ! clear upper used byte(s) srl %i3, %l1, %i3 ! get the odd bytes between alignments sub %l0, %l2, %l0 ! regenerate shift count sub %l5, %l0, %l1 ! generate new shift left count (LS) and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0 andn %i2, 3, %i2 ! # of aligned bytes that can be moved srl %i3, %l0, %i4 or %i5, %i4, %i5 st %i5, [%i1] ! write a word subcc %i2, 4, %i2 ! decrement count bz unalign_out add %i1, 4, %i1 ! increment destination address b 2f sll %i3, %l1, %i5 ! get leftover into upper bitsmore_needed: sll %i3, %l0, %i3 ! save remaining byte(s) srl %i3, %l0, %i3 sub %l2, %l0, %l1 ! regenerate shift count sub %l5, %l1, %l0 ! generate new shift left count sll %i3, %l1, %i4 ! move to fill empty space b 3f or %i5, %i4, %i5 ! merge to complete word ! ! the source address is aligned and destination is not !align_dst_only: ld [%i0], %i4 ! read a word add %i0, 4, %i0 ! increment source address mov 24, %l0 ! initial shift alignment count1: srl %i4, %l0, %i3 ! prepare to write a single byte stb %i3, [%i1] ! write a byte add %i1, 1, %i1 ! increment destination address sub %i2, 1, %i2 ! decrement count btst 3, %i1 ! is destination aligned? bnz,a 1b sub %l0, 8, %l0 ! delay slot, decrement shift countxfer: sub %l5, %l0, %l1 ! generate shift left count sll %i4, %l1, %i5 ! get leftover3: and %i2, 3, %l4 ! must do remaining bytes if count%4 > 0 andn %i2, 3, %i2 ! # of aligned bytes that can be moved2: ld [%i0], %i3 ! read a source word add %i0, 4, %i0 ! increment source address srl %i3, %l0, %i4 ! upper src bits into lower dst bits or %i5, %i4, %i5 ! merge with upper dest bits (leftover) st %i5, [%i1] ! write a destination word subcc %i2, 4, %i2 ! decrement count bz unalign_out ! check if done add %i1, 4, %i1 ! increment destination address b 2b ! loop sll %i3, %l1, %i5 ! get leftoverunalign_out: tst %l4 ! any bytes leftover? bz cpdone .empty ! allow next instruction in delay slot1: sub %l0, 8, %l0 ! decrement shift srl %i3, %l0, %i4 ! upper src byte into lower dst byte stb %i4, [%i1] ! write a byte subcc %l4, 1, %l4 ! decrement count bz cpdone ! done? add %i1, 1, %i1 ! increment destination tst %l0 ! any more previously read bytes bnz 1b ! we have leftover bytes mov %l4, %i2 ! delay slot, mv cnt where bytecp wants b,a bytecp ! let bcopy do the rest ! ! the destination address is aligned and the source is not !align_src_only: ldub [%i0], %i3 ! read a byte from source address add %i0, 1, %i0 ! increment source address or %i4, %i3, %i4 ! or in with previous bytes (if any) btst 3, %i0 ! is source aligned? add %l0, 8, %l0 ! increment shift count (US) bnz,a align_src_only sll %i4, 8, %i4 ! make room for next byte b,a xfer ! ! if from address unaligned for double-word moves, ! move bytes till it is, if count is < 56 it could take ! longer to align the thing than to do the transfer ! in word size chunks right away !aldoubcp: cmp %i2, 56 ! if count < 56, use wordcp, it takes bl,a alwordcp ! longer to align doubles than words mov 3, %o0 ! mask for word alignment call alignit ! copy bytes until aligned mov 7, %o0 ! mask for double alignment ! ! source and destination are now double-word aligned ! see if transfer is large enough to gain by loop unrolling ! cmp %i2, 512 ! if less than 512 bytes bge,a blkcopy ! just copy double-words (overwrite i3) mov 0x100, %i3 ! blk copy chunk size for unrolled loop ! ! i3 has aligned count returned by alignit ! and %i2, 7, %i2 ! unaligned leftover count5: ldd [%i0], %o4 ! read from address add %i0, 8, %i0 ! inc from address std %o4, [%i1] ! write at destination address subcc %i3, 8, %i3 ! dec count bg 5b add %i1, 8, %i1 ! delay slot, inc to addresswcpchk: cmp %i2, 4 ! see if we can copy a word bl bytecp ! if 3 or less bytes use bytecp ! ! for leftover bytes we fall into wordcp, if needed !wordcp: and %i2, 3, %i2 ! unaligned leftover count5: ld [%i0], %o4 ! read from address add %i0, 4, %i0 ! inc from address st %o4, [%i1] ! write at destination address subcc %i3, 4, %i3 ! dec count bg 5b add %i1, 4, %i1 ! delay slot, inc to address b,a bytecp ! we come here to align copies on word boundariesalwordcp: call alignit ! go word-align it mov 3, %o0 ! bits that must be zero to be aligned b,a wordcp ! ! byte copy, works with any alignment !1: add %i0, 1, %i0 ! inc from address stb %o4, [%i1] ! write to address add %i1, 1, %i1 ! inc to addressbytecp: subcc %i2, 1, %i2 ! dec count bge,a 1b ! loop till done ldub [%i0], %o4 ! read from addresscpdone: st %l6, [_u+U_LOFAULT] ! restore old u.u_lofault ret restore %g0, 0, %o0 ! return (0)/* * Common code used to align transfers on word and doubleword * boudaries. Aligns source and destination and returns a count * of aligned bytes to transfer in %i3 */1: inc %i0 ! inc from stb %o4, [%i1] ! write a byte inc %i1 ! inc to dec %i2 ! dec countalignit: btst %o0, %i0 ! %o0 is bit mask to check for alignment bnz,a 1b ldub [%i0], %o4 ! read next byte retl andn %i2, %o0, %i3 ! return size of aligned bytes/* * Copy a page of memory. * Assumes double word alignment and a count >= 256. * * pgcopy(from, to, count) * caddr_t from, to; * u_int count; */ ENTRY(pgcopy) save %sp, -SA(MINFRAME), %sp ! get another window mov 0x100, %i3 ! ! loops have been unrolled so that 64 instructions(16 cache-lines) ! are used; 256 bytes are moved each time through the loop ! i0 - from; i1 - to; i2 - count; i3 - chunksize; o4,o5 -tmp ! ! We read a whole cache line and then we write it to ! minimize thrashing. !blkcopy: ldd [%i0+0xf8], %l0 ! 0xfc ldd [%i0+0xf0], %l2 std %l0, [%i1+0xf8] std %l2, [%i1+0xf0] ldd [%i0+0xe8], %l0 ! 0xec ldd [%i0+0xe0], %l2 std %l0, [%i1+0xe8] std %l2, [%i1+0xe0] ldd [%i0+0xd8], %l0 ! 0xdc ldd [%i0+0xd0], %l2 std %l0, [%i1+0xd8] std %l2, [%i1+0xd0] ldd [%i0+0xc8], %l0 ! 0xcc ldd [%i0+0xc0], %l2 std %l0, [%i1+0xc8] std %l2, [%i1+0xc0] ldd [%i0+0xb8], %l0 ! 0xbc ldd [%i0+0xb0], %l2 std %l0, [%i1+0xb8] std %l2, [%i1+0xb0] ldd [%i0+0xa8], %l0 ! 0xac ldd [%i0+0xa0], %l2 std %l0, [%i1+0xa8] std %l2, [%i1+0xa0] ldd [%i0+0x98], %l0 ! 0x9c ldd [%i0+0x90], %l2 std %l0, [%i1+0x98] std %l2, [%i1+0x90] ldd [%i0+0x88], %l0 ! 0x8c ldd [%i0+0x80], %l2 std %l0, [%i1+0x88] std %l2, [%i1+0x80] ldd [%i0+0x78], %l0 ! 0x7c ldd [%i0+0x70], %l2 std %l0, [%i1+0x78] std %l2, [%i1+0x70] ldd [%i0+0x68], %l0 ! 0x6c ldd [%i0+0x60], %l2 std %l0, [%i1+0x68] std %l2, [%i1+0x60] ldd [%i0+0x58], %l0 ! 0x5c ldd [%i0+0x50], %l2 std %l0, [%i1+0x58] std %l2, [%i1+0x50] ldd [%i0+0x48], %l0 ! 0x4c ldd [%i0+0x40], %l2 std %l0, [%i1+0x48] std %l2, [%i1+0x40] ldd [%i0+0x38], %l0 ! 0x3c ldd [%i0+0x30], %l2 std %l0, [%i1+0x38] std %l2, [%i1+0x30] ldd [%i0+0x28], %l0 ! 0x2c ldd [%i0+0x20], %l2 std %l0, [%i1+0x28] std %l2, [%i1+0x20] ldd [%i0+0x18], %l0 ! 0x1c ldd [%i0+0x10], %l2 std %l0, [%i1+0x18] std %l2, [%i1+0x10] ldd [%i0+0x8], %l0 ! 0x0c ldd [%i0], %l2 std %l0, [%i1+0x8] std %l2, [%i1]instr: sub %i2, %i3, %i2 ! decrement count add %i0, %i3, %i0 ! increment from address cmp %i2, 0x100 ! enough to do another block? bge blkcopy ! yes, do another chunk add %i1, %i3, %i1 ! increment to address tst %i2 ! all done yet? ble cpdone ! yes, return cmp %i2, 15 ! can we do more cache lines bg,a 1f andn %i2, 15, %i3 ! %i3 bytes left, aligned (to 16 bytes) b wcpchk andn %i2, 3, %i3 ! %i3 bytes left, aligned to 4 bytes1: set instr, %o5 ! address of copy instructions sub %o5, %i3, %o5 ! jmp address relative to instr jmp %o5 nop#endif never/* * Block copy with possibly overlapped operands. * * ovbcopy(from, to, count) * caddr_t from, to; * u_int count; */ ENTRY(ovbcopy) tst %o2 ! check count bg,a 1f ! nothing to do or bad arguments subcc %o0, %o1, %o3 ! difference of from and to address retl ! return nop1: bneg,a 2f neg %o3 ! if < 0, make it positive2: cmp %o2, %o3 ! cmp size and abs(from - to) ble _bcopy ! if size <= abs(diff): use bcopy, .empty ! no overlap cmp %o0, %o1 ! compare from and to addresses blu ov_bkwd ! if from < to, copy backwards nop ! ! Copy forwards. !ov_fwd: ldub [%o0], %o3 ! read from address inc %o0 ! inc from address stb %o3, [%o1] ! write to address deccc %o2 ! dec count bg ov_fwd ! loop till done inc %o1 ! inc to address retl ! return nop ! ! Copy backwards. !ov_bkwd: deccc %o2 ! dec count ldub [%o0 + %o2], %o3 ! get byte at end of src bg ov_bkwd ! loop till done stb %o3, [%o1 + %o2] ! delay slot, store at end of dst retl ! return nop#ifdef never/* * Zero a block of storage, returning an error code if we * take a kernel pagefault which cannot be resolved. * Returns EIO on pagefault error, 0 if all ok * * int
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -