📄 filemap.c
字号:
/* * Do we have something in the page cache already? */retry_find: page = find_lock_page(mapping, vmf->pgoff); /* * For sequential accesses, we use the generic readahead logic. */ if (VM_SequentialReadHint(vma)) { if (!page) { page_cache_sync_readahead(mapping, ra, file, vmf->pgoff, 1); page = find_lock_page(mapping, vmf->pgoff); if (!page) goto no_cached_page; } if (PageReadahead(page)) { page_cache_async_readahead(mapping, ra, file, page, vmf->pgoff, 1); } } if (!page) { unsigned long ra_pages; ra->mmap_miss++; /* * Do we miss much more than hit in this file? If so, * stop bothering with read-ahead. It will only hurt. */ if (ra->mmap_miss > MMAP_LOTSAMISS) goto no_cached_page; /* * To keep the pgmajfault counter straight, we need to * check did_readaround, as this is an inner loop. */ if (!did_readaround) { ret = VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); } did_readaround = 1; ra_pages = max_sane_readahead(file->f_ra.ra_pages); if (ra_pages) { pgoff_t start = 0; if (vmf->pgoff > ra_pages / 2) start = vmf->pgoff - ra_pages / 2; do_page_cache_readahead(mapping, file, start, ra_pages); } page = find_lock_page(mapping, vmf->pgoff); if (!page) goto no_cached_page; } if (!did_readaround) ra->mmap_miss--; /* * We have a locked page in the page cache, now we need to check * that it's up-to-date. If not, it is going to be due to an error. */ if (unlikely(!PageUptodate(page))) goto page_not_uptodate; /* Must recheck i_size under page lock */ size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; if (unlikely(vmf->pgoff >= size)) { unlock_page(page); page_cache_release(page); return VM_FAULT_SIGBUS; } /* * Found the page and have a reference on it. */ ra->prev_pos = (loff_t)page->index << PAGE_CACHE_SHIFT; vmf->page = page; return ret | VM_FAULT_LOCKED;no_cached_page: /* * We're only likely to ever get here if MADV_RANDOM is in * effect. */ error = page_cache_read(file, vmf->pgoff); /* * The page we want has now been added to the page cache. * In the unlikely event that someone removed it in the * meantime, we'll just come back here and read it again. */ if (error >= 0) goto retry_find; /* * An error return from page_cache_read can result if the * system is low on memory, or a problem occurs while trying * to schedule I/O. */ if (error == -ENOMEM) return VM_FAULT_OOM; return VM_FAULT_SIGBUS;page_not_uptodate: /* IO error path */ if (!did_readaround) { ret = VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); } /* * Umm, take care of errors if the page isn't up-to-date. * Try to re-read it _once_. We do this synchronously, * because there really aren't any performance issues here * and we need to check for errors. */ ClearPageError(page); error = mapping->a_ops->readpage(file, page); if (!error) { wait_on_page_locked(page); if (!PageUptodate(page)) error = -EIO; } page_cache_release(page); if (!error || error == AOP_TRUNCATED_PAGE) goto retry_find; /* Things didn't work out. Return zero to tell the mm layer so. */ shrink_readahead_size_eio(file, ra); return VM_FAULT_SIGBUS;}EXPORT_SYMBOL(filemap_fault);struct vm_operations_struct generic_file_vm_ops = { .fault = filemap_fault,};/* This is used for a general mmap of a disk file */int generic_file_mmap(struct file * file, struct vm_area_struct * vma){ struct address_space *mapping = file->f_mapping; if (!mapping->a_ops->readpage) return -ENOEXEC; file_accessed(file); vma->vm_ops = &generic_file_vm_ops; vma->vm_flags |= VM_CAN_NONLINEAR; return 0;}/* * This is for filesystems which do not implement ->writepage. */int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma){ if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) return -EINVAL; return generic_file_mmap(file, vma);}#elseint generic_file_mmap(struct file * file, struct vm_area_struct * vma){ return -ENOSYS;}int generic_file_readonly_mmap(struct file * file, struct vm_area_struct * vma){ return -ENOSYS;}#endif /* CONFIG_MMU */EXPORT_SYMBOL(generic_file_mmap);EXPORT_SYMBOL(generic_file_readonly_mmap);static struct page *__read_cache_page(struct address_space *mapping, pgoff_t index, int (*filler)(void *,struct page*), void *data){ struct page *page; int err;repeat: page = find_get_page(mapping, index); if (!page) { page = page_cache_alloc_cold(mapping); if (!page) return ERR_PTR(-ENOMEM); err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); if (unlikely(err)) { page_cache_release(page); if (err == -EEXIST) goto repeat; /* Presumably ENOMEM for radix tree node */ return ERR_PTR(err); } err = filler(data, page); if (err < 0) { page_cache_release(page); page = ERR_PTR(err); } } return page;}/** * read_cache_page_async - read into page cache, fill it if needed * @mapping: the page's address_space * @index: the page index * @filler: function to perform the read * @data: destination for read data * * Same as read_cache_page, but don't wait for page to become unlocked * after submitting it to the filler. * * Read into the page cache. If a page already exists, and PageUptodate() is * not set, try to fill the page but don't wait for it to become unlocked. * * If the page does not get brought uptodate, return -EIO. */struct page *read_cache_page_async(struct address_space *mapping, pgoff_t index, int (*filler)(void *,struct page*), void *data){ struct page *page; int err;retry: page = __read_cache_page(mapping, index, filler, data); if (IS_ERR(page)) return page; if (PageUptodate(page)) goto out; lock_page(page); if (!page->mapping) { unlock_page(page); page_cache_release(page); goto retry; } if (PageUptodate(page)) { unlock_page(page); goto out; } err = filler(data, page); if (err < 0) { page_cache_release(page); return ERR_PTR(err); }out: mark_page_accessed(page); return page;}EXPORT_SYMBOL(read_cache_page_async);/** * read_cache_page - read into page cache, fill it if needed * @mapping: the page's address_space * @index: the page index * @filler: function to perform the read * @data: destination for read data * * Read into the page cache. If a page already exists, and PageUptodate() is * not set, try to fill the page then wait for it to become unlocked. * * If the page does not get brought uptodate, return -EIO. */struct page *read_cache_page(struct address_space *mapping, pgoff_t index, int (*filler)(void *,struct page*), void *data){ struct page *page; page = read_cache_page_async(mapping, index, filler, data); if (IS_ERR(page)) goto out; wait_on_page_locked(page); if (!PageUptodate(page)) { page_cache_release(page); page = ERR_PTR(-EIO); } out: return page;}EXPORT_SYMBOL(read_cache_page);/* * The logic we want is * * if suid or (sgid and xgrp) * remove privs */int should_remove_suid(struct dentry *dentry){ mode_t mode = dentry->d_inode->i_mode; int kill = 0; /* suid always must be killed */ if (unlikely(mode & S_ISUID)) kill = ATTR_KILL_SUID; /* * sgid without any exec bits is just a mandatory locking mark; leave * it alone. If some exec bits are set, it's a real sgid; kill it. */ if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) kill |= ATTR_KILL_SGID; if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode))) return kill; return 0;}EXPORT_SYMBOL(should_remove_suid);static int __remove_suid(struct dentry *dentry, int kill){ struct iattr newattrs; newattrs.ia_valid = ATTR_FORCE | kill; return notify_change(dentry, &newattrs);}int file_remove_suid(struct file *file){ struct dentry *dentry = file->f_path.dentry; int killsuid = should_remove_suid(dentry); int killpriv = security_inode_need_killpriv(dentry); int error = 0; if (killpriv < 0) return killpriv; if (killpriv) error = security_inode_killpriv(dentry); if (!error && killsuid) error = __remove_suid(dentry, killsuid); return error;}EXPORT_SYMBOL(file_remove_suid);static size_t __iovec_copy_from_user_inatomic(char *vaddr, const struct iovec *iov, size_t base, size_t bytes){ size_t copied = 0, left = 0; while (bytes) { char __user *buf = iov->iov_base + base; int copy = min(bytes, iov->iov_len - base); base = 0; left = __copy_from_user_inatomic_nocache(vaddr, buf, copy); copied += copy; bytes -= copy; vaddr += copy; iov++; if (unlikely(left)) break; } return copied - left;}/* * Copy as much as we can into the page and return the number of bytes which * were sucessfully copied. If a fault is encountered then return the number of * bytes which were copied. */size_t iov_iter_copy_from_user_atomic(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes){ char *kaddr; size_t copied; BUG_ON(!in_atomic()); kaddr = kmap_atomic(page, KM_USER0); if (likely(i->nr_segs == 1)) { int left; char __user *buf = i->iov->iov_base + i->iov_offset; left = __copy_from_user_inatomic_nocache(kaddr + offset, buf, bytes); copied = bytes - left; } else { copied = __iovec_copy_from_user_inatomic(kaddr + offset, i->iov, i->iov_offset, bytes); } kunmap_atomic(kaddr, KM_USER0); return copied;}EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);/* * This has the same sideeffects and return value as * iov_iter_copy_from_user_atomic(). * The difference is that it attempts to resolve faults. * Page must not be locked. */size_t iov_iter_copy_from_user(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes){ char *kaddr; size_t copied; kaddr = kmap(page); if (likely(i->nr_segs == 1)) { int left; char __user *buf = i->iov->iov_base + i->iov_offset; left = __copy_from_user_nocache(kaddr + offset, buf, bytes); copied = bytes - left; } else { copied = __iovec_copy_from_user_inatomic(kaddr + offset, i->iov, i->iov_offset, bytes); } kunmap(page); return copied;}EXPORT_SYMBOL(iov_iter_copy_from_user);void iov_iter_advance(struct iov_iter *i, size_t bytes){ BUG_ON(i->count < bytes); if (likely(i->nr_segs == 1)) { i->iov_offset += bytes; i->count -= bytes; } else { const struct iovec *iov = i->iov; size_t base = i->iov_offset; /* * The !iov->iov_len check ensures we skip over unlikely * zero-length segments (without overruning the iovec). */ while (bytes || unlikely(i->count && !iov->iov_len)) { int copy; copy = min(bytes, iov->iov_len - base); BUG_ON(!i->count || i->count < copy); i->count -= copy; bytes -= copy; base += copy; if (iov->iov_len == base) { iov++; base = 0; } } i->iov = iov; i->iov_offset = base; }}EXPORT_SYMBOL(iov_iter_advance);/* * Fault in the first iovec of the given iov_iter, to a maximum length * of bytes. Returns 0 on success, or non-zero if the memory could not be * accessed (ie. because it is an invalid address). * * writev-intensive code may want this to prefault several iovecs -- that * would be possible (callers must not rely on the fact that _only_ the * first iovec will be faulted with the current implementation). */int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes){ char __user *buf = i->iov->iov_base + i->iov_offset; bytes = min(bytes, i->iov->iov_len - i->iov_offset); return fault_in_pages_readable(buf, bytes);}EXPORT_SYMBOL(iov_iter_fault_in_readable);/* * Return the count of just the current iov_iter segment. */size_t iov_iter_single_seg_count(struct iov_iter *i){ const struct iovec *iov = i->iov; if (i->nr_segs == 1) return i->count; else return min(i->count, iov->iov_len - i->iov_offset);}EXPORT_SYMBOL(iov_iter_single_seg_count);/* * Performs necessary checks before doing a write *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -