📄 filemap.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
	/*	 * Do we have something in the page cache already?	 */retry_find:	page = find_lock_page(mapping, vmf->pgoff);	/*	 * For sequential accesses, we use the generic readahead logic.	 */	if (VM_SequentialReadHint(vma)) {		if (!page) {			page_cache_sync_readahead(mapping, ra, file,							   vmf->pgoff, 1);			page = find_lock_page(mapping, vmf->pgoff);			if (!page)				goto no_cached_page;		}		if (PageReadahead(page)) {			page_cache_async_readahead(mapping, ra, file, page,							   vmf->pgoff, 1);		}	}	if (!page) {		unsigned long ra_pages;		ra->mmap_miss++;		/*		 * Do we miss much more than hit in this file? If so,		 * stop bothering with read-ahead. It will only hurt.		 */		if (ra->mmap_miss > MMAP_LOTSAMISS)			goto no_cached_page;		/*		 * To keep the pgmajfault counter straight, we need to		 * check did_readaround, as this is an inner loop.		 */		if (!did_readaround) {			ret = VM_FAULT_MAJOR;			count_vm_event(PGMAJFAULT);		}		did_readaround = 1;		ra_pages = max_sane_readahead(file->f_ra.ra_pages);		if (ra_pages) {			pgoff_t start = 0;			if (vmf->pgoff > ra_pages / 2)				start = vmf->pgoff - ra_pages / 2;			do_page_cache_readahead(mapping, file, start, ra_pages);		}		page = find_lock_page(mapping, vmf->pgoff);		if (!page)			goto no_cached_page;	}	if (!did_readaround)		ra->mmap_miss--;	/*	 * We have a locked page in the page cache, now we need to check	 * that it's up-to-date. If not, it is going to be due to an error.	 */	if (unlikely(!PageUptodate(page)))		goto page_not_uptodate;	/* Must recheck i_size under page lock */	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;	if (unlikely(vmf->pgoff >= size)) {		unlock_page(page);		page_cache_release(page);		return VM_FAULT_SIGBUS;	}	/*	 * Found the page and have a reference on it.	 */	ra->prev_pos = (loff_t)page->index << PAGE_CACHE_SHIFT;	vmf->page = page;	return ret | VM_FAULT_LOCKED;no_cached_page:	/*	 * We're only likely to ever get here if MADV_RANDOM is in	 * effect.	 */	error = page_cache_read(file, vmf->pgoff);	/*	 * The page we want has now been added to the page cache.	 * In the unlikely event that someone removed it in the	 * meantime, we'll just come back here and read it again.	 */	if (error >= 0)		goto retry_find;	/*	 * An error return from page_cache_read can result if the	 * system is low on memory, or a problem occurs while trying	 * to schedule I/O.	 */	if (error == -ENOMEM)		return VM_FAULT_OOM;	return VM_FAULT_SIGBUS;page_not_uptodate:	/* IO error path */	if (!did_readaround) {		ret = VM_FAULT_MAJOR;		count_vm_event(PGMAJFAULT);	}	/*	 * Umm, take care of errors if the page isn't up-to-date.	 * Try to re-read it _once_. We do this synchronously,	 * because there really aren't any performance issues here	 * and we need to check for errors.	 */	ClearPageError(page);	error = mapping->a_ops->readpage(file, page);	if (!error) {		wait_on_page_locked(page);		if (!PageUptodate(page))			error = -EIO;	}	page_cache_release(page);	if (!error || error == AOP_TRUNCATED_PAGE)		goto retry_find;	/* Things didn't work out. Return zero to tell the mm layer so. */	shrink_readahead_size_eio(file, ra);	return VM_FAULT_SIGBUS;}EXPORT_SYMBOL(filemap_fault);struct vm_operations_struct generic_file_vm_ops = {	.fault		= filemap_fault,};/* This is used for a general mmap of a disk file */int generic_file_mmap(struct file * file, struct vm_area_struct * vma){	struct address_space *mapping = file->f_mapping;	if (!mapping->a_ops->readpage)		return -ENOEXEC;	file_accessed(file);	vma->vm_ops = &generic_file_vm_ops;	vma->vm_flags |= VM_CAN_NONLINEAR;	return 0;}/* * This is for filesystems which do not implement ->writepage. */int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma){	if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))		return -EINVAL;	return generic_file_mmap(file, vma);}#elseint generic_file_mmap(struct file * file, struct vm_area_struct * vma){	return -ENOSYS;}int generic_file_readonly_mmap(struct file * file, struct vm_area_struct * vma){	return -ENOSYS;}#endif /* CONFIG_MMU */EXPORT_SYMBOL(generic_file_mmap);EXPORT_SYMBOL(generic_file_readonly_mmap);static struct page *__read_cache_page(struct address_space *mapping,				pgoff_t index,				int (*filler)(void *,struct page*),				void *data){	struct page *page;	int err;repeat:	page = find_get_page(mapping, index);	if (!page) {		page = page_cache_alloc_cold(mapping);		if (!page)			return ERR_PTR(-ENOMEM);		err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);		if (unlikely(err)) {			page_cache_release(page);			if (err == -EEXIST)				goto repeat;			/* Presumably ENOMEM for radix tree node */			return ERR_PTR(err);		}		err = filler(data, page);		if (err < 0) {			page_cache_release(page);			page = ERR_PTR(err);		}	}	return page;}/** * read_cache_page_async - read into page cache, fill it if needed * @mapping:	the page's address_space * @index:	the page index * @filler:	function to perform the read * @data:	destination for read data * * Same as read_cache_page, but don't wait for page to become unlocked * after submitting it to the filler. * * Read into the page cache. If a page already exists, and PageUptodate() is * not set, try to fill the page but don't wait for it to become unlocked. * * If the page does not get brought uptodate, return -EIO. */struct page *read_cache_page_async(struct address_space *mapping,				pgoff_t index,				int (*filler)(void *,struct page*),				void *data){	struct page *page;	int err;retry:	page = __read_cache_page(mapping, index, filler, data);	if (IS_ERR(page))		return page;	if (PageUptodate(page))		goto out;	lock_page(page);	if (!page->mapping) {		unlock_page(page);		page_cache_release(page);		goto retry;	}	if (PageUptodate(page)) {		unlock_page(page);		goto out;	}	err = filler(data, page);	if (err < 0) {		page_cache_release(page);		return ERR_PTR(err);	}out:	mark_page_accessed(page);	return page;}EXPORT_SYMBOL(read_cache_page_async);/** * read_cache_page - read into page cache, fill it if needed * @mapping:	the page's address_space * @index:	the page index * @filler:	function to perform the read * @data:	destination for read data * * Read into the page cache. If a page already exists, and PageUptodate() is * not set, try to fill the page then wait for it to become unlocked. * * If the page does not get brought uptodate, return -EIO. */struct page *read_cache_page(struct address_space *mapping,				pgoff_t index,				int (*filler)(void *,struct page*),				void *data){	struct page *page;	page = read_cache_page_async(mapping, index, filler, data);	if (IS_ERR(page))		goto out;	wait_on_page_locked(page);	if (!PageUptodate(page)) {		page_cache_release(page);		page = ERR_PTR(-EIO);	} out:	return page;}EXPORT_SYMBOL(read_cache_page);/* * The logic we want is * *	if suid or (sgid and xgrp) *		remove privs */int should_remove_suid(struct dentry *dentry){	mode_t mode = dentry->d_inode->i_mode;	int kill = 0;	/* suid always must be killed */	if (unlikely(mode & S_ISUID))		kill = ATTR_KILL_SUID;	/*	 * sgid without any exec bits is just a mandatory locking mark; leave	 * it alone.  If some exec bits are set, it's a real sgid; kill it.	 */	if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))		kill |= ATTR_KILL_SGID;	if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))		return kill;	return 0;}EXPORT_SYMBOL(should_remove_suid);static int __remove_suid(struct dentry *dentry, int kill){	struct iattr newattrs;	newattrs.ia_valid = ATTR_FORCE | kill;	return notify_change(dentry, &newattrs);}int file_remove_suid(struct file *file){	struct dentry *dentry = file->f_path.dentry;	int killsuid = should_remove_suid(dentry);	int killpriv = security_inode_need_killpriv(dentry);	int error = 0;	if (killpriv < 0)		return killpriv;	if (killpriv)		error = security_inode_killpriv(dentry);	if (!error && killsuid)		error = __remove_suid(dentry, killsuid);	return error;}EXPORT_SYMBOL(file_remove_suid);static size_t __iovec_copy_from_user_inatomic(char *vaddr,			const struct iovec *iov, size_t base, size_t bytes){	size_t copied = 0, left = 0;	while (bytes) {		char __user *buf = iov->iov_base + base;		int copy = min(bytes, iov->iov_len - base);		base = 0;		left = __copy_from_user_inatomic_nocache(vaddr, buf, copy);		copied += copy;		bytes -= copy;		vaddr += copy;		iov++;		if (unlikely(left))			break;	}	return copied - left;}/* * Copy as much as we can into the page and return the number of bytes which * were sucessfully copied.  If a fault is encountered then return the number of * bytes which were copied. */size_t iov_iter_copy_from_user_atomic(struct page *page,		struct iov_iter *i, unsigned long offset, size_t bytes){	char *kaddr;	size_t copied;	BUG_ON(!in_atomic());	kaddr = kmap_atomic(page, KM_USER0);	if (likely(i->nr_segs == 1)) {		int left;		char __user *buf = i->iov->iov_base + i->iov_offset;		left = __copy_from_user_inatomic_nocache(kaddr + offset,							buf, bytes);		copied = bytes - left;	} else {		copied = __iovec_copy_from_user_inatomic(kaddr + offset,						i->iov, i->iov_offset, bytes);	}	kunmap_atomic(kaddr, KM_USER0);	return copied;}EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);/* * This has the same sideeffects and return value as * iov_iter_copy_from_user_atomic(). * The difference is that it attempts to resolve faults. * Page must not be locked. */size_t iov_iter_copy_from_user(struct page *page,		struct iov_iter *i, unsigned long offset, size_t bytes){	char *kaddr;	size_t copied;	kaddr = kmap(page);	if (likely(i->nr_segs == 1)) {		int left;		char __user *buf = i->iov->iov_base + i->iov_offset;		left = __copy_from_user_nocache(kaddr + offset, buf, bytes);		copied = bytes - left;	} else {		copied = __iovec_copy_from_user_inatomic(kaddr + offset,						i->iov, i->iov_offset, bytes);	}	kunmap(page);	return copied;}EXPORT_SYMBOL(iov_iter_copy_from_user);void iov_iter_advance(struct iov_iter *i, size_t bytes){	BUG_ON(i->count < bytes);	if (likely(i->nr_segs == 1)) {		i->iov_offset += bytes;		i->count -= bytes;	} else {		const struct iovec *iov = i->iov;		size_t base = i->iov_offset;		/*		 * The !iov->iov_len check ensures we skip over unlikely		 * zero-length segments (without overruning the iovec).		 */		while (bytes || unlikely(i->count && !iov->iov_len)) {			int copy;			copy = min(bytes, iov->iov_len - base);			BUG_ON(!i->count || i->count < copy);			i->count -= copy;			bytes -= copy;			base += copy;			if (iov->iov_len == base) {				iov++;				base = 0;			}		}		i->iov = iov;		i->iov_offset = base;	}}EXPORT_SYMBOL(iov_iter_advance);/* * Fault in the first iovec of the given iov_iter, to a maximum length * of bytes. Returns 0 on success, or non-zero if the memory could not be * accessed (ie. because it is an invalid address). * * writev-intensive code may want this to prefault several iovecs -- that * would be possible (callers must not rely on the fact that _only_ the * first iovec will be faulted with the current implementation). */int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes){	char __user *buf = i->iov->iov_base + i->iov_offset;	bytes = min(bytes, i->iov->iov_len - i->iov_offset);	return fault_in_pages_readable(buf, bytes);}EXPORT_SYMBOL(iov_iter_fault_in_readable);/* * Return the count of just the current iov_iter segment. */size_t iov_iter_single_seg_count(struct iov_iter *i){	const struct iovec *iov = i->iov;	if (i->nr_segs == 1)		return i->count;	else		return min(i->count, iov->iov_len - i->iov_offset);}EXPORT_SYMBOL(iov_iter_single_seg_count);/* * Performs necessary checks before doing a write *
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -