📄 filemap.c
字号:
int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size){ char *kaddr; unsigned long left, count = desc->count; if (size > count) size = count; kaddr = kmap(page); left = __copy_to_user(desc->buf, kaddr + offset, size); kunmap(page); if (left) { size -= left; desc->error = -EFAULT; } desc->count = count - size; desc->written += size; desc->buf += size; return size;}/* * This is the "read()" routine for all filesystems * that can use the page cache directly. */ssize_t generic_file_read(struct file * filp, char * buf, size_t count, loff_t *ppos){ ssize_t retval; if ((ssize_t) count < 0) return -EINVAL; if (filp->f_flags & O_DIRECT) goto o_direct; retval = -EFAULT; if (access_ok(VERIFY_WRITE, buf, count)) { retval = 0; if (count) { read_descriptor_t desc; desc.written = 0; desc.count = count; desc.buf = buf; desc.error = 0; do_generic_file_read(filp, ppos, &desc, file_read_actor); retval = desc.written; if (!retval) retval = desc.error; } } out: return retval; o_direct: { loff_t pos = *ppos, size; struct address_space *mapping = filp->f_dentry->d_inode->i_mapping; struct inode *inode = mapping->host; retval = 0; if (!count) goto out; /* skip atime */ size = inode->i_size; if (pos < size) { if (pos + count > size) count = size - pos; retval = generic_file_direct_IO(READ, filp, buf, count, pos); if (retval > 0) *ppos = pos + retval; } UPDATE_ATIME(filp->f_dentry->d_inode); goto out; }}static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset , unsigned long size){ ssize_t written; unsigned long count = desc->count; struct file *file = (struct file *) desc->buf; if (size > count) size = count; if (file->f_op->sendpage) { written = file->f_op->sendpage(file, page, offset, size, &file->f_pos, size<count); } else { char *kaddr; mm_segment_t old_fs; old_fs = get_fs(); set_fs(KERNEL_DS); kaddr = kmap(page); written = file->f_op->write(file, kaddr + offset, size, &file->f_pos); kunmap(page); set_fs(old_fs); } if (written < 0) { desc->error = written; written = 0; } desc->count = count - written; desc->written += written; return written;}asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, size_t count){ ssize_t retval; struct file * in_file, * out_file; struct inode * in_inode, * out_inode; /* * Get input file, and verify that it is ok.. */ retval = -EBADF; in_file = fget(in_fd); if (!in_file) goto out; if (!(in_file->f_mode & FMODE_READ)) goto fput_in; retval = -EINVAL; in_inode = in_file->f_dentry->d_inode; if (!in_inode) goto fput_in; if (!in_inode->i_mapping->a_ops->readpage) goto fput_in; retval = locks_verify_area(FLOCK_VERIFY_READ, in_inode, in_file, in_file->f_pos, count); if (retval) goto fput_in; /* * Get output file, and verify that it is ok.. */ retval = -EBADF; out_file = fget(out_fd); if (!out_file) goto fput_in; if (!(out_file->f_mode & FMODE_WRITE)) goto fput_out; retval = -EINVAL; if (!out_file->f_op || !out_file->f_op->write) goto fput_out; out_inode = out_file->f_dentry->d_inode; retval = locks_verify_area(FLOCK_VERIFY_WRITE, out_inode, out_file, out_file->f_pos, count); if (retval) goto fput_out; retval = 0; if (count) { read_descriptor_t desc; loff_t pos = 0, *ppos; retval = -EFAULT; ppos = &in_file->f_pos; if (offset) { if (get_user(pos, offset)) goto fput_out; ppos = &pos; } desc.written = 0; desc.count = count; desc.buf = (char *) out_file; desc.error = 0; do_generic_file_read(in_file, ppos, &desc, file_send_actor); retval = desc.written; if (!retval) retval = desc.error; if (offset) put_user(pos, offset); }fput_out: fput(out_file);fput_in: fput(in_file);out: return retval;}static ssize_t do_readahead(struct file *file, unsigned long index, unsigned long nr){ struct address_space *mapping = file->f_dentry->d_inode->i_mapping; unsigned long max; if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage) return -EINVAL; /* Limit it to the size of the file.. */ max = (mapping->host->i_size + ~PAGE_CACHE_MASK) >> PAGE_CACHE_SHIFT; if (index > max) return 0; max -= index; if (nr > max) nr = max; /* And limit it to a sane percentage of the inactive list.. */ max = nr_inactive_pages / 2; if (nr > max) nr = max; while (nr) { page_cache_read(file, index); index++; nr--; } return 0;}asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count){ ssize_t ret; struct file *file; ret = -EBADF; file = fget(fd); if (file) { if (file->f_mode & FMODE_READ) { unsigned long start = offset >> PAGE_CACHE_SHIFT; unsigned long len = (count + ((long)offset & ~PAGE_CACHE_MASK)) >> PAGE_CACHE_SHIFT; ret = do_readahead(file, start, len); } fput(file); } return ret;}/* * Read-ahead and flush behind for MADV_SEQUENTIAL areas. Since we are * sure this is sequential access, we don't need a flexible read-ahead * window size -- we can always use a large fixed size window. */static void nopage_sequential_readahead(struct vm_area_struct * vma, unsigned long pgoff, unsigned long filesize){ unsigned long ra_window; ra_window = get_max_readahead(vma->vm_file->f_dentry->d_inode); ra_window = CLUSTER_OFFSET(ra_window + CLUSTER_PAGES - 1); /* vm_raend is zero if we haven't read ahead in this area yet. */ if (vma->vm_raend == 0) vma->vm_raend = vma->vm_pgoff + ra_window; /* * If we've just faulted the page half-way through our window, * then schedule reads for the next window, and release the * pages in the previous window. */ if ((pgoff + (ra_window >> 1)) == vma->vm_raend) { unsigned long start = vma->vm_pgoff + vma->vm_raend; unsigned long end = start + ra_window; if (end > ((vma->vm_end >> PAGE_SHIFT) + vma->vm_pgoff)) end = (vma->vm_end >> PAGE_SHIFT) + vma->vm_pgoff; if (start > end) return; while ((start < end) && (start < filesize)) { if (read_cluster_nonblocking(vma->vm_file, start, filesize) < 0) break; start += CLUSTER_PAGES; } run_task_queue(&tq_disk); /* if we're far enough past the beginning of this area, recycle pages that are in the previous window. */ if (vma->vm_raend > (vma->vm_pgoff + ra_window + ra_window)) { unsigned long window = ra_window << PAGE_SHIFT; end = vma->vm_start + (vma->vm_raend << PAGE_SHIFT); end -= window + window; filemap_sync(vma, end - window, window, MS_INVALIDATE); } vma->vm_raend += ra_window; } return;}/* * filemap_nopage() is invoked via the vma operations vector for a * mapped memory region to read in file data during a page fault. * * The goto's are kind of ugly, but this streamlines the normal case of having * it in the page cache, and handles the special cases reasonably without * having a lot of duplicated code. */struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address, int unused){ int error; struct file *file = area->vm_file; struct address_space *mapping = file->f_dentry->d_inode->i_mapping; struct inode *inode = mapping->host; struct page *page, **hash; unsigned long size, pgoff, endoff; pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff; endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;retry_all: /* * An external ptracer can access pages that normally aren't * accessible.. */ size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; if ((pgoff >= size) && (area->vm_mm == current->mm)) return NULL; /* The "size" of the file, as far as mmap is concerned, isn't bigger than the mapping */ if (size > endoff) size = endoff; /* * Do we have something in the page cache already? */ hash = page_hash(mapping, pgoff);retry_find: page = __find_get_page(mapping, pgoff, hash); if (!page) goto no_cached_page; /* * Ok, found a page in the page cache, now we need to check * that it's up-to-date. */ if (!Page_Uptodate(page)) goto page_not_uptodate;success: /* * Try read-ahead for sequential areas. */ if (VM_SequentialReadHint(area)) nopage_sequential_readahead(area, pgoff, size); /* * Found the page and have a reference on it, need to check sharing * and possibly copy it over to another page.. */ mark_page_accessed(page); flush_page_to_ram(page); return page;no_cached_page: /* * If the requested offset is within our file, try to read a whole * cluster of pages at once. * * Otherwise, we're off the end of a privately mapped file, * so we need to map a zero page. */ if ((pgoff < size) && !VM_RandomReadHint(area)) error = read_cluster_nonblocking(file, pgoff, size); else error = page_cache_read(file, pgoff); /* * The page we want has now been added to the page cache. * In the unlikely event that someone removed it in the * meantime, we'll just come back here and read it again. */ if (error >= 0) goto retry_find; /* * An error return from page_cache_read can result if the * system is low on memory, or a problem occurs while trying * to schedule I/O. */ if (error == -ENOMEM) return NOPAGE_OOM; return NULL;page_not_uptodate: lock_page(page); /* Did it get unhashed while we waited for it? */ if (!page->mapping) { UnlockPage(page); page_cache_release(page); goto retry_all; } /* Did somebody else get it up-to-date? */ if (Page_Uptodate(page)) { UnlockPage(page); goto success; } if (!mapping->a_ops->readpage(file, page)) { wait_on_page(page); if (Page_Uptodate(page)) goto success; } /* * Umm, take care of errors if the page isn't up-to-date. * Try to re-read it _once_. We do this synchronously, * because there really aren't any performance issues here * and we need to check for errors. */ lock_page(page); /* Somebody truncated the page on us? */ if (!page->mapping) { UnlockPage(page); page_cache_release(page); goto retry_all; } /* Somebody else successfully read it in? */ if (Page_Uptodate(page)) { UnlockPage(page); goto success; } ClearPageError(page); if (!mapping->a_ops->readpage(file, page)) { wait_on_page(page); if (Page_Uptodate(page)) goto success; } /* * Things didn't work out. Return zero to tell the * mm layer so, possibly freeing the page cache page first. */ page_cache_release(page); return NULL;}/* Called with mm->page_table_lock held to protect against other * threads/the swapper from ripping pte's out from under us. */static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma, unsigned long address, unsigned int flags){ pte_t pte = *ptep; if (pte_present(pte)) { struct page *page = pte_page(pte); if (VALID_PAGE(page) && !PageReserved(page) && ptep_test_and_clear_dirty(ptep)) { flush_tlb_page(vma, address); set_page_dirty(page); } } return 0;}static inline int filemap_sync_pte_range(pmd_t * pmd, unsigned long address, unsigned long size, struct vm_area_struct *vma, unsigned long offset, unsigned int flags){ pte_t * pte; unsigned long end; int error; if (pmd_none(*pmd)) return 0; if (pmd_bad(*pmd)) { pmd_ERROR(*pmd); pmd_clear(pmd); return 0; } pte = pte_offset(pmd, address); offset += address & PMD_MASK; address &= ~PMD_MASK; end = address + size; if (end > PMD_SIZE) end = PMD_SIZE; error = 0; do { error |= filemap_sync_pte(pte, vma, address + offset, flags); address += PAGE_SIZE; pte++; } while (address && (address < end)); return error;}static inline int filemap_sync_pmd_range(pgd_t * pgd, unsigned long address, unsigned long size, struct vm_area_struct *vma, unsigned int flags){ pmd_t * pmd; unsigned long offset, end; int error; if (pgd_none(*pgd)) return 0; if (pgd_bad(*pgd)) { pgd_ERROR(*pgd); pgd_clear(pgd); return 0; } pmd = pmd_offset(pgd, address); offset = address & PGDIR_MASK; address &= ~PGDIR_MASK; end = address + size; if (end > PGDIR_SIZE) end = PGDIR_SIZE; error = 0; do { error |= filemap_sync_pte_range(pmd, address, end - address, vma, offset, flags);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -