📄 filemap.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
 * It is going insane. Fix it by quickly scaling down the readahead size. */static void shrink_readahead_size_eio(struct file *filp,					struct file_ra_state *ra){	if (!ra->ra_pages)		return;	ra->ra_pages /= 4;}/** * do_generic_file_read - generic file read routine * @filp:	the file to read * @ppos:	current file position * @desc:	read_descriptor * @actor:	read method * * This is a generic file read routine, and uses the * mapping->a_ops->readpage() function for the actual low-level stuff. * * This is really ugly. But the goto's actually try to clarify some * of the logic when it comes to error handling etc. */static void do_generic_file_read(struct file *filp, loff_t *ppos,		read_descriptor_t *desc, read_actor_t actor){	struct address_space *mapping = filp->f_mapping;	struct inode *inode = mapping->host;	struct file_ra_state *ra = &filp->f_ra;	pgoff_t index;	pgoff_t last_index;	pgoff_t prev_index;	unsigned long offset;      /* offset into pagecache page */	unsigned int prev_offset;	int error;	index = *ppos >> PAGE_CACHE_SHIFT;	prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT;	prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1);	last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;	offset = *ppos & ~PAGE_CACHE_MASK;	for (;;) {		struct page *page;		pgoff_t end_index;		loff_t isize;		unsigned long nr, ret;		cond_resched();find_page:		page = find_get_page(mapping, index);		if (!page) {			page_cache_sync_readahead(mapping,					ra, filp,					index, last_index - index);			page = find_get_page(mapping, index);			if (unlikely(page == NULL))				goto no_cached_page;		}		if (PageReadahead(page)) {			page_cache_async_readahead(mapping,					ra, filp, page,					index, last_index - index);		}		if (!PageUptodate(page)) {			if (inode->i_blkbits == PAGE_CACHE_SHIFT ||					!mapping->a_ops->is_partially_uptodate)				goto page_not_up_to_date;			if (!trylock_page(page))				goto page_not_up_to_date;			if (!mapping->a_ops->is_partially_uptodate(page,								desc, offset))				goto page_not_up_to_date_locked;			unlock_page(page);		}page_ok:		/*		 * i_size must be checked after we know the page is Uptodate.		 *		 * Checking i_size after the check allows us to calculate		 * the correct value for "nr", which means the zero-filled		 * part of the page is not copied back to userspace (unless		 * another truncate extends the file - this is desired though).		 */		isize = i_size_read(inode);		end_index = (isize - 1) >> PAGE_CACHE_SHIFT;		if (unlikely(!isize || index > end_index)) {			page_cache_release(page);			goto out;		}		/* nr is the maximum number of bytes to copy from this page */		nr = PAGE_CACHE_SIZE;		if (index == end_index) {			nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;			if (nr <= offset) {				page_cache_release(page);				goto out;			}		}		nr = nr - offset;		/* If users can be writing to this page using arbitrary		 * virtual addresses, take care about potential aliasing		 * before reading the page on the kernel side.		 */		if (mapping_writably_mapped(mapping))			flush_dcache_page(page);		/*		 * When a sequential read accesses a page several times,		 * only mark it as accessed the first time.		 */		if (prev_index != index || offset != prev_offset)			mark_page_accessed(page);		prev_index = index;		/*		 * Ok, we have the page, and it's up-to-date, so		 * now we can copy it to user space...		 *		 * The actor routine returns how many bytes were actually used..		 * NOTE! This may not be the same as how much of a user buffer		 * we filled up (we may be padding etc), so we can only update		 * "pos" here (the actor routine has to update the user buffer		 * pointers and the remaining count).		 */		ret = actor(desc, page, offset, nr);		offset += ret;		index += offset >> PAGE_CACHE_SHIFT;		offset &= ~PAGE_CACHE_MASK;		prev_offset = offset;		page_cache_release(page);		if (ret == nr && desc->count)			continue;		goto out;page_not_up_to_date:		/* Get exclusive access to the page ... */		error = lock_page_killable(page);		if (unlikely(error))			goto readpage_error;page_not_up_to_date_locked:		/* Did it get truncated before we got the lock? */		if (!page->mapping) {			unlock_page(page);			page_cache_release(page);			continue;		}		/* Did somebody else fill it already? */		if (PageUptodate(page)) {			unlock_page(page);			goto page_ok;		}readpage:		/* Start the actual read. The read will unlock the page. */		error = mapping->a_ops->readpage(filp, page);		if (unlikely(error)) {			if (error == AOP_TRUNCATED_PAGE) {				page_cache_release(page);				goto find_page;			}			goto readpage_error;		}		if (!PageUptodate(page)) {			error = lock_page_killable(page);			if (unlikely(error))				goto readpage_error;			if (!PageUptodate(page)) {				if (page->mapping == NULL) {					/*					 * invalidate_inode_pages got it					 */					unlock_page(page);					page_cache_release(page);					goto find_page;				}				unlock_page(page);				shrink_readahead_size_eio(filp, ra);				error = -EIO;				goto readpage_error;			}			unlock_page(page);		}		goto page_ok;readpage_error:		/* UHHUH! A synchronous read error occurred. Report it */		desc->error = error;		page_cache_release(page);		goto out;no_cached_page:		/*		 * Ok, it wasn't cached, so we need to create a new		 * page..		 */		page = page_cache_alloc_cold(mapping);		if (!page) {			desc->error = -ENOMEM;			goto out;		}		error = add_to_page_cache_lru(page, mapping,						index, GFP_KERNEL);		if (error) {			page_cache_release(page);			if (error == -EEXIST)				goto find_page;			desc->error = error;			goto out;		}		goto readpage;	}out:	ra->prev_pos = prev_index;	ra->prev_pos <<= PAGE_CACHE_SHIFT;	ra->prev_pos |= prev_offset;	*ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset;	file_accessed(filp);}int file_read_actor(read_descriptor_t *desc, struct page *page,			unsigned long offset, unsigned long size){	char *kaddr;	unsigned long left, count = desc->count;	if (size > count)		size = count;	/*	 * Faults on the destination of a read are common, so do it before	 * taking the kmap.	 */	if (!fault_in_pages_writeable(desc->arg.buf, size)) {		kaddr = kmap_atomic(page, KM_USER0);		left = __copy_to_user_inatomic(desc->arg.buf,						kaddr + offset, size);		kunmap_atomic(kaddr, KM_USER0);		if (left == 0)			goto success;	}	/* Do it the slow way */	kaddr = kmap(page);	left = __copy_to_user(desc->arg.buf, kaddr + offset, size);	kunmap(page);	if (left) {		size -= left;		desc->error = -EFAULT;	}success:	desc->count = count - size;	desc->written += size;	desc->arg.buf += size;	return size;}/* * Performs necessary checks before doing a write * @iov:	io vector request * @nr_segs:	number of segments in the iovec * @count:	number of bytes to write * @access_flags: type of access: %VERIFY_READ or %VERIFY_WRITE * * Adjust number of segments and amount of bytes to write (nr_segs should be * properly initialized first). Returns appropriate error code that caller * should return or zero in case that write should be allowed. */int generic_segment_checks(const struct iovec *iov,			unsigned long *nr_segs, size_t *count, int access_flags){	unsigned long   seg;	size_t cnt = 0;	for (seg = 0; seg < *nr_segs; seg++) {		const struct iovec *iv = &iov[seg];		/*		 * If any segment has a negative length, or the cumulative		 * length ever wraps negative then return -EINVAL.		 */		cnt += iv->iov_len;		if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))			return -EINVAL;		if (access_ok(access_flags, iv->iov_base, iv->iov_len))			continue;		if (seg == 0)			return -EFAULT;		*nr_segs = seg;		cnt -= iv->iov_len;	/* This segment is no good */		break;	}	*count = cnt;	return 0;}EXPORT_SYMBOL(generic_segment_checks);/** * generic_file_aio_read - generic filesystem read routine * @iocb:	kernel I/O control block * @iov:	io vector request * @nr_segs:	number of segments in the iovec * @pos:	current file position * * This is the "read()" routine for all filesystems * that can use the page cache directly. */ssize_tgeneric_file_aio_read(struct kiocb *iocb, const struct iovec *iov,		unsigned long nr_segs, loff_t pos){	struct file *filp = iocb->ki_filp;	ssize_t retval;	unsigned long seg;	size_t count;	loff_t *ppos = &iocb->ki_pos;	count = 0;	retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);	if (retval)		return retval;	/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */	if (filp->f_flags & O_DIRECT) {		loff_t size;		struct address_space *mapping;		struct inode *inode;		mapping = filp->f_mapping;		inode = mapping->host;		if (!count)			goto out; /* skip atime */		size = i_size_read(inode);		if (pos < size) {			retval = filemap_write_and_wait_range(mapping, pos,					pos + iov_length(iov, nr_segs) - 1);			if (!retval) {				retval = mapping->a_ops->direct_IO(READ, iocb,							iov, pos, nr_segs);			}			if (retval > 0)				*ppos = pos + retval;			if (retval) {				file_accessed(filp);				goto out;			}		}	}	for (seg = 0; seg < nr_segs; seg++) {		read_descriptor_t desc;		desc.written = 0;		desc.arg.buf = iov[seg].iov_base;		desc.count = iov[seg].iov_len;		if (desc.count == 0)			continue;		desc.error = 0;		do_generic_file_read(filp, ppos, &desc, file_read_actor);		retval += desc.written;		if (desc.error) {			retval = retval ?: desc.error;			break;		}		if (desc.count > 0)			break;	}out:	return retval;}EXPORT_SYMBOL(generic_file_aio_read);static ssize_tdo_readahead(struct address_space *mapping, struct file *filp,	     pgoff_t index, unsigned long nr){	if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage)		return -EINVAL;	force_page_cache_readahead(mapping, filp, index,					max_sane_readahead(nr));	return 0;}SYSCALL_DEFINE(readahead)(int fd, loff_t offset, size_t count){	ssize_t ret;	struct file *file;	ret = -EBADF;	file = fget(fd);	if (file) {		if (file->f_mode & FMODE_READ) {			struct address_space *mapping = file->f_mapping;			pgoff_t start = offset >> PAGE_CACHE_SHIFT;			pgoff_t end = (offset + count - 1) >> PAGE_CACHE_SHIFT;			unsigned long len = end - start + 1;			ret = do_readahead(mapping, file, start, len);		}		fput(file);	}	return ret;}#ifdef CONFIG_HAVE_SYSCALL_WRAPPERSasmlinkage long SyS_readahead(long fd, loff_t offset, long count){	return SYSC_readahead((int) fd, offset, (size_t) count);}SYSCALL_ALIAS(sys_readahead, SyS_readahead);#endif#ifdef CONFIG_MMU/** * page_cache_read - adds requested page to the page cache if not already there * @file:	file to read * @offset:	page index * * This adds the requested page to the page cache if it isn't already there, * and schedules an I/O to read in its contents from disk. */static int page_cache_read(struct file *file, pgoff_t offset){	struct address_space *mapping = file->f_mapping;	struct page *page; 	int ret;	do {		page = page_cache_alloc_cold(mapping);		if (!page)			return -ENOMEM;		ret = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL);		if (ret == 0)			ret = mapping->a_ops->readpage(file, page);		else if (ret == -EEXIST)			ret = 0; /* losing race to add is OK */		page_cache_release(page);	} while (ret == AOP_TRUNCATED_PAGE);			return ret;}#define MMAP_LOTSAMISS  (100)/** * filemap_fault - read in file data for page fault handling * @vma:	vma in which the fault was taken * @vmf:	struct vm_fault containing details of the fault * * filemap_fault() is invoked via the vma operations vector for a * mapped memory region to read in file data during a page fault. * * The goto's are kind of ugly, but this streamlines the normal case of having * it in the page cache, and handles the special cases reasonably without * having a lot of duplicated code. */int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf){	int error;	struct file *file = vma->vm_file;	struct address_space *mapping = file->f_mapping;	struct file_ra_state *ra = &file->f_ra;	struct inode *inode = mapping->host;	struct page *page;	pgoff_t size;	int did_readaround = 0;	int ret = 0;	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;	if (vmf->pgoff >= size)		return VM_FAULT_SIGBUS;	/* If we don't want any read-ahead, don't bother */	if (VM_RandomReadHint(vma))		goto no_cached_page;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -