📄 filemap.c
字号:
* It is going insane. Fix it by quickly scaling down the readahead size. */static void shrink_readahead_size_eio(struct file *filp, struct file_ra_state *ra){ if (!ra->ra_pages) return; ra->ra_pages /= 4;}/** * do_generic_file_read - generic file read routine * @filp: the file to read * @ppos: current file position * @desc: read_descriptor * @actor: read method * * This is a generic file read routine, and uses the * mapping->a_ops->readpage() function for the actual low-level stuff. * * This is really ugly. But the goto's actually try to clarify some * of the logic when it comes to error handling etc. */static void do_generic_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor){ struct address_space *mapping = filp->f_mapping; struct inode *inode = mapping->host; struct file_ra_state *ra = &filp->f_ra; pgoff_t index; pgoff_t last_index; pgoff_t prev_index; unsigned long offset; /* offset into pagecache page */ unsigned int prev_offset; int error; index = *ppos >> PAGE_CACHE_SHIFT; prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT; prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1); last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; offset = *ppos & ~PAGE_CACHE_MASK; for (;;) { struct page *page; pgoff_t end_index; loff_t isize; unsigned long nr, ret; cond_resched();find_page: page = find_get_page(mapping, index); if (!page) { page_cache_sync_readahead(mapping, ra, filp, index, last_index - index); page = find_get_page(mapping, index); if (unlikely(page == NULL)) goto no_cached_page; } if (PageReadahead(page)) { page_cache_async_readahead(mapping, ra, filp, page, index, last_index - index); } if (!PageUptodate(page)) { if (inode->i_blkbits == PAGE_CACHE_SHIFT || !mapping->a_ops->is_partially_uptodate) goto page_not_up_to_date; if (!trylock_page(page)) goto page_not_up_to_date; if (!mapping->a_ops->is_partially_uptodate(page, desc, offset)) goto page_not_up_to_date_locked; unlock_page(page); }page_ok: /* * i_size must be checked after we know the page is Uptodate. * * Checking i_size after the check allows us to calculate * the correct value for "nr", which means the zero-filled * part of the page is not copied back to userspace (unless * another truncate extends the file - this is desired though). */ isize = i_size_read(inode); end_index = (isize - 1) >> PAGE_CACHE_SHIFT; if (unlikely(!isize || index > end_index)) { page_cache_release(page); goto out; } /* nr is the maximum number of bytes to copy from this page */ nr = PAGE_CACHE_SIZE; if (index == end_index) { nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; if (nr <= offset) { page_cache_release(page); goto out; } } nr = nr - offset; /* If users can be writing to this page using arbitrary * virtual addresses, take care about potential aliasing * before reading the page on the kernel side. */ if (mapping_writably_mapped(mapping)) flush_dcache_page(page); /* * When a sequential read accesses a page several times, * only mark it as accessed the first time. */ if (prev_index != index || offset != prev_offset) mark_page_accessed(page); prev_index = index; /* * Ok, we have the page, and it's up-to-date, so * now we can copy it to user space... * * The actor routine returns how many bytes were actually used.. * NOTE! This may not be the same as how much of a user buffer * we filled up (we may be padding etc), so we can only update * "pos" here (the actor routine has to update the user buffer * pointers and the remaining count). */ ret = actor(desc, page, offset, nr); offset += ret; index += offset >> PAGE_CACHE_SHIFT; offset &= ~PAGE_CACHE_MASK; prev_offset = offset; page_cache_release(page); if (ret == nr && desc->count) continue; goto out;page_not_up_to_date: /* Get exclusive access to the page ... */ error = lock_page_killable(page); if (unlikely(error)) goto readpage_error;page_not_up_to_date_locked: /* Did it get truncated before we got the lock? */ if (!page->mapping) { unlock_page(page); page_cache_release(page); continue; } /* Did somebody else fill it already? */ if (PageUptodate(page)) { unlock_page(page); goto page_ok; }readpage: /* Start the actual read. The read will unlock the page. */ error = mapping->a_ops->readpage(filp, page); if (unlikely(error)) { if (error == AOP_TRUNCATED_PAGE) { page_cache_release(page); goto find_page; } goto readpage_error; } if (!PageUptodate(page)) { error = lock_page_killable(page); if (unlikely(error)) goto readpage_error; if (!PageUptodate(page)) { if (page->mapping == NULL) { /* * invalidate_inode_pages got it */ unlock_page(page); page_cache_release(page); goto find_page; } unlock_page(page); shrink_readahead_size_eio(filp, ra); error = -EIO; goto readpage_error; } unlock_page(page); } goto page_ok;readpage_error: /* UHHUH! A synchronous read error occurred. Report it */ desc->error = error; page_cache_release(page); goto out;no_cached_page: /* * Ok, it wasn't cached, so we need to create a new * page.. */ page = page_cache_alloc_cold(mapping); if (!page) { desc->error = -ENOMEM; goto out; } error = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); if (error) { page_cache_release(page); if (error == -EEXIST) goto find_page; desc->error = error; goto out; } goto readpage; }out: ra->prev_pos = prev_index; ra->prev_pos <<= PAGE_CACHE_SHIFT; ra->prev_pos |= prev_offset; *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset; file_accessed(filp);}int file_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset, unsigned long size){ char *kaddr; unsigned long left, count = desc->count; if (size > count) size = count; /* * Faults on the destination of a read are common, so do it before * taking the kmap. */ if (!fault_in_pages_writeable(desc->arg.buf, size)) { kaddr = kmap_atomic(page, KM_USER0); left = __copy_to_user_inatomic(desc->arg.buf, kaddr + offset, size); kunmap_atomic(kaddr, KM_USER0); if (left == 0) goto success; } /* Do it the slow way */ kaddr = kmap(page); left = __copy_to_user(desc->arg.buf, kaddr + offset, size); kunmap(page); if (left) { size -= left; desc->error = -EFAULT; }success: desc->count = count - size; desc->written += size; desc->arg.buf += size; return size;}/* * Performs necessary checks before doing a write * @iov: io vector request * @nr_segs: number of segments in the iovec * @count: number of bytes to write * @access_flags: type of access: %VERIFY_READ or %VERIFY_WRITE * * Adjust number of segments and amount of bytes to write (nr_segs should be * properly initialized first). Returns appropriate error code that caller * should return or zero in case that write should be allowed. */int generic_segment_checks(const struct iovec *iov, unsigned long *nr_segs, size_t *count, int access_flags){ unsigned long seg; size_t cnt = 0; for (seg = 0; seg < *nr_segs; seg++) { const struct iovec *iv = &iov[seg]; /* * If any segment has a negative length, or the cumulative * length ever wraps negative then return -EINVAL. */ cnt += iv->iov_len; if (unlikely((ssize_t)(cnt|iv->iov_len) < 0)) return -EINVAL; if (access_ok(access_flags, iv->iov_base, iv->iov_len)) continue; if (seg == 0) return -EFAULT; *nr_segs = seg; cnt -= iv->iov_len; /* This segment is no good */ break; } *count = cnt; return 0;}EXPORT_SYMBOL(generic_segment_checks);/** * generic_file_aio_read - generic filesystem read routine * @iocb: kernel I/O control block * @iov: io vector request * @nr_segs: number of segments in the iovec * @pos: current file position * * This is the "read()" routine for all filesystems * that can use the page cache directly. */ssize_tgeneric_file_aio_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos){ struct file *filp = iocb->ki_filp; ssize_t retval; unsigned long seg; size_t count; loff_t *ppos = &iocb->ki_pos; count = 0; retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); if (retval) return retval; /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (filp->f_flags & O_DIRECT) { loff_t size; struct address_space *mapping; struct inode *inode; mapping = filp->f_mapping; inode = mapping->host; if (!count) goto out; /* skip atime */ size = i_size_read(inode); if (pos < size) { retval = filemap_write_and_wait_range(mapping, pos, pos + iov_length(iov, nr_segs) - 1); if (!retval) { retval = mapping->a_ops->direct_IO(READ, iocb, iov, pos, nr_segs); } if (retval > 0) *ppos = pos + retval; if (retval) { file_accessed(filp); goto out; } } } for (seg = 0; seg < nr_segs; seg++) { read_descriptor_t desc; desc.written = 0; desc.arg.buf = iov[seg].iov_base; desc.count = iov[seg].iov_len; if (desc.count == 0) continue; desc.error = 0; do_generic_file_read(filp, ppos, &desc, file_read_actor); retval += desc.written; if (desc.error) { retval = retval ?: desc.error; break; } if (desc.count > 0) break; }out: return retval;}EXPORT_SYMBOL(generic_file_aio_read);static ssize_tdo_readahead(struct address_space *mapping, struct file *filp, pgoff_t index, unsigned long nr){ if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage) return -EINVAL; force_page_cache_readahead(mapping, filp, index, max_sane_readahead(nr)); return 0;}SYSCALL_DEFINE(readahead)(int fd, loff_t offset, size_t count){ ssize_t ret; struct file *file; ret = -EBADF; file = fget(fd); if (file) { if (file->f_mode & FMODE_READ) { struct address_space *mapping = file->f_mapping; pgoff_t start = offset >> PAGE_CACHE_SHIFT; pgoff_t end = (offset + count - 1) >> PAGE_CACHE_SHIFT; unsigned long len = end - start + 1; ret = do_readahead(mapping, file, start, len); } fput(file); } return ret;}#ifdef CONFIG_HAVE_SYSCALL_WRAPPERSasmlinkage long SyS_readahead(long fd, loff_t offset, long count){ return SYSC_readahead((int) fd, offset, (size_t) count);}SYSCALL_ALIAS(sys_readahead, SyS_readahead);#endif#ifdef CONFIG_MMU/** * page_cache_read - adds requested page to the page cache if not already there * @file: file to read * @offset: page index * * This adds the requested page to the page cache if it isn't already there, * and schedules an I/O to read in its contents from disk. */static int page_cache_read(struct file *file, pgoff_t offset){ struct address_space *mapping = file->f_mapping; struct page *page; int ret; do { page = page_cache_alloc_cold(mapping); if (!page) return -ENOMEM; ret = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL); if (ret == 0) ret = mapping->a_ops->readpage(file, page); else if (ret == -EEXIST) ret = 0; /* losing race to add is OK */ page_cache_release(page); } while (ret == AOP_TRUNCATED_PAGE); return ret;}#define MMAP_LOTSAMISS (100)/** * filemap_fault - read in file data for page fault handling * @vma: vma in which the fault was taken * @vmf: struct vm_fault containing details of the fault * * filemap_fault() is invoked via the vma operations vector for a * mapped memory region to read in file data during a page fault. * * The goto's are kind of ugly, but this streamlines the normal case of having * it in the page cache, and handles the special cases reasonably without * having a lot of duplicated code. */int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf){ int error; struct file *file = vma->vm_file; struct address_space *mapping = file->f_mapping; struct file_ra_state *ra = &file->f_ra; struct inode *inode = mapping->host; struct page *page; pgoff_t size; int did_readaround = 0; int ret = 0; size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; if (vmf->pgoff >= size) return VM_FAULT_SIGBUS; /* If we don't want any read-ahead, don't bother */ if (VM_RandomReadHint(vma)) goto no_cached_page;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -