📄 filemap.c
字号:
* Can adjust writing position or amount of bytes to write. * Returns appropriate error code that caller should return or * zero in case that write should be allowed. */inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk){ struct inode *inode = file->f_mapping->host; unsigned long limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; if (unlikely(*pos < 0)) return -EINVAL; if (!isblk) { /* FIXME: this is for backwards compatibility with 2.4 */ if (file->f_flags & O_APPEND) *pos = i_size_read(inode); if (limit != RLIM_INFINITY) { if (*pos >= limit) { send_sig(SIGXFSZ, current, 0); return -EFBIG; } if (*count > limit - (typeof(limit))*pos) { *count = limit - (typeof(limit))*pos; } } } /* * LFS rule */ if (unlikely(*pos + *count > MAX_NON_LFS && !(file->f_flags & O_LARGEFILE))) { if (*pos >= MAX_NON_LFS) { return -EFBIG; } if (*count > MAX_NON_LFS - (unsigned long)*pos) { *count = MAX_NON_LFS - (unsigned long)*pos; } } /* * Are we about to exceed the fs block limit ? * * If we have written data it becomes a short write. If we have * exceeded without writing data we send a signal and return EFBIG. * Linus frestrict idea will clean these up nicely.. */ if (likely(!isblk)) { if (unlikely(*pos >= inode->i_sb->s_maxbytes)) { if (*count || *pos > inode->i_sb->s_maxbytes) { return -EFBIG; } /* zero-length writes at ->s_maxbytes are OK */ } if (unlikely(*pos + *count > inode->i_sb->s_maxbytes)) *count = inode->i_sb->s_maxbytes - *pos; } else {#ifdef CONFIG_BLOCK loff_t isize; if (bdev_read_only(I_BDEV(inode))) return -EPERM; isize = i_size_read(inode); if (*pos >= isize) { if (*count || *pos > isize) return -ENOSPC; } if (*pos + *count > isize) *count = isize - *pos;#else return -EPERM;#endif } return 0;}EXPORT_SYMBOL(generic_write_checks);int pagecache_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata){ const struct address_space_operations *aops = mapping->a_ops; return aops->write_begin(file, mapping, pos, len, flags, pagep, fsdata);}EXPORT_SYMBOL(pagecache_write_begin);int pagecache_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata){ const struct address_space_operations *aops = mapping->a_ops; mark_page_accessed(page); return aops->write_end(file, mapping, pos, len, copied, page, fsdata);}EXPORT_SYMBOL(pagecache_write_end);ssize_tgeneric_file_direct_write(struct kiocb *iocb, const struct iovec *iov, unsigned long *nr_segs, loff_t pos, loff_t *ppos, size_t count, size_t ocount){ struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; ssize_t written; size_t write_len; pgoff_t end; if (count != ocount) *nr_segs = iov_shorten((struct iovec *)iov, *nr_segs, count); write_len = iov_length(iov, *nr_segs); end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT; written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1); if (written) goto out; /* * After a write we want buffered reads to be sure to go to disk to get * the new data. We invalidate clean cached page from the region we're * about to write. We do this *before* the write so that we can return * without clobbering -EIOCBQUEUED from ->direct_IO(). */ if (mapping->nrpages) { written = invalidate_inode_pages2_range(mapping, pos >> PAGE_CACHE_SHIFT, end); /* * If a page can not be invalidated, return 0 to fall back * to buffered write. */ if (written) { if (written == -EBUSY) return 0; goto out; } } written = mapping->a_ops->direct_IO(WRITE, iocb, iov, pos, *nr_segs); /* * Finally, try again to invalidate clean pages which might have been * cached by non-direct readahead, or faulted in by get_user_pages() * if the source of the write was an mmap'ed region of the file * we're writing. Either one is a pretty crazy thing to do, * so we don't support it 100%. If this invalidation * fails, tough, the write still worked... */ if (mapping->nrpages) { invalidate_inode_pages2_range(mapping, pos >> PAGE_CACHE_SHIFT, end); } if (written > 0) { loff_t end = pos + written; if (end > i_size_read(inode) && !S_ISBLK(inode->i_mode)) { i_size_write(inode, end); mark_inode_dirty(inode); } *ppos = end; } /* * Sync the fs metadata but not the minor inode changes and * of course not the data as we did direct DMA for the IO. * i_mutex is held, which protects generic_osync_inode() from * livelocking. AIO O_DIRECT ops attempt to sync metadata here. */out: if ((written >= 0 || written == -EIOCBQUEUED) && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { int err = generic_osync_inode(inode, mapping, OSYNC_METADATA); if (err < 0) written = err; } return written;}EXPORT_SYMBOL(generic_file_direct_write);/* * Find or create a page at the given pagecache position. Return the locked * page. This function is specifically for buffered writes. */struct page *grab_cache_page_write_begin(struct address_space *mapping, pgoff_t index, unsigned flags){ int status; struct page *page; gfp_t gfp_notmask = 0; if (flags & AOP_FLAG_NOFS) gfp_notmask = __GFP_FS;repeat: page = find_lock_page(mapping, index); if (likely(page)) return page; page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~gfp_notmask); if (!page) return NULL; status = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL & ~gfp_notmask); if (unlikely(status)) { page_cache_release(page); if (status == -EEXIST) goto repeat; return NULL; } return page;}EXPORT_SYMBOL(grab_cache_page_write_begin);static ssize_t generic_perform_write(struct file *file, struct iov_iter *i, loff_t pos){ struct address_space *mapping = file->f_mapping; const struct address_space_operations *a_ops = mapping->a_ops; long status = 0; ssize_t written = 0; unsigned int flags = 0; /* * Copies from kernel address space cannot fail (NFSD is a big user). */ if (segment_eq(get_fs(), KERNEL_DS)) flags |= AOP_FLAG_UNINTERRUPTIBLE; do { struct page *page; pgoff_t index; /* Pagecache index for current page */ unsigned long offset; /* Offset into pagecache page */ unsigned long bytes; /* Bytes to write to page */ size_t copied; /* Bytes copied from user */ void *fsdata; offset = (pos & (PAGE_CACHE_SIZE - 1)); index = pos >> PAGE_CACHE_SHIFT; bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, iov_iter_count(i));again: /* * Bring in the user page that we will copy from _first_. * Otherwise there's a nasty deadlock on copying from the * same page as we're writing to, without it being marked * up-to-date. * * Not only is this an optimisation, but it is also required * to check that the address is actually valid, when atomic * usercopies are used, below. */ if (unlikely(iov_iter_fault_in_readable(i, bytes))) { status = -EFAULT; break; } status = a_ops->write_begin(file, mapping, pos, bytes, flags, &page, &fsdata); if (unlikely(status)) break; pagefault_disable(); copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); pagefault_enable(); flush_dcache_page(page); status = a_ops->write_end(file, mapping, pos, bytes, copied, page, fsdata); if (unlikely(status < 0)) break; copied = status; cond_resched(); iov_iter_advance(i, copied); if (unlikely(copied == 0)) { /* * If we were unable to copy any data at all, we must * fall back to a single segment length write. * * If we didn't fallback here, we could livelock * because not all segments in the iov can be copied at * once without a pagefault. */ bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, iov_iter_single_seg_count(i)); goto again; } pos += copied; written += copied; balance_dirty_pages_ratelimited(mapping); } while (iov_iter_count(i)); return written ? written : status;}ssize_tgeneric_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos, loff_t *ppos, size_t count, ssize_t written){ struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; const struct address_space_operations *a_ops = mapping->a_ops; struct inode *inode = mapping->host; ssize_t status; struct iov_iter i; iov_iter_init(&i, iov, nr_segs, count, written); status = generic_perform_write(file, &i, pos); if (likely(status >= 0)) { written += status; *ppos = pos + status; /* * For now, when the user asks for O_SYNC, we'll actually give * O_DSYNC */ if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))) { if (!a_ops->writepage || !is_sync_kiocb(iocb)) status = generic_osync_inode(inode, mapping, OSYNC_METADATA|OSYNC_DATA); } } /* * If we get here for O_DIRECT writes then we must have fallen through * to buffered writes (block instantiation inside i_size). So we sync * the file data here, to try to honour O_DIRECT expectations. */ if (unlikely(file->f_flags & O_DIRECT) && written) status = filemap_write_and_wait_range(mapping, pos, pos + written - 1); return written ? written : status;}EXPORT_SYMBOL(generic_file_buffered_write);static ssize_t__generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t *ppos){ struct file *file = iocb->ki_filp; struct address_space * mapping = file->f_mapping; size_t ocount; /* original count */ size_t count; /* after file limit checks */ struct inode *inode = mapping->host; loff_t pos; ssize_t written; ssize_t err; ocount = 0; err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); if (err) return err; count = ocount; pos = *ppos; vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; written = 0; err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (err) goto out; if (count == 0) goto out; err = file_remove_suid(file); if (err) goto out; file_update_time(file); /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (unlikely(file->f_flags & O_DIRECT)) { loff_t endbyte; ssize_t written_buffered; written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos, count, ocount); if (written < 0 || written == count) goto out; /* * direct-io write to a hole: fall through to buffered I/O * for completing the rest of the request. */ pos += written; count -= written; written_buffered = generic_file_buffered_write(iocb, iov, nr_segs, pos, ppos, count, written); /* * If generic_file_buffered_write() retuned a synchronous error * then we want to return the number of bytes which were * direct-written, or the error code if that was zero. Note * that this differs from normal direct-io semantics, which * will return -EFOO even if some bytes were written. */ if (written_buffered < 0) { err = written_buffered; goto out; } /* * We need to ensure that the page cache pages are written to * disk and invalidated to preserve the expected O_DIRECT * semantics. */ endbyte = pos + written_buffered - written - 1; err = do_sync_mapping_range(file->f_mapping, pos, endbyte, SYNC_FILE_RANGE_WAIT_BEFORE| SYNC_FILE_RANGE_WRITE| SYNC_FILE_RANGE_WAIT_AFTER); if (err == 0) { written = written_buffered; invalidate_mapping_pages(mapping, pos >> PAGE_CACHE_SHIFT, endbyte >> PAGE_CACHE_SHIFT); } else { /* * We don't know how much we wrote, so just return * the number of bytes which were direct-written */ } } else { written = generic_file_buffered_write(iocb, iov, nr_segs, pos, ppos, count, written); }out: current->backing_dev_info = NULL; return written ? written : err;}ssize_t generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos){ struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; ssize_t ret; BUG_ON(iocb->ki_pos != pos); ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { ssize_t err; err = sync_page_range_nolock(inode, mapping, pos, ret); if (err < 0) ret = err; } return ret;}EXPORT_SYMBOL(generic_file_aio_write_nolock);ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos){ struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; ssize_t ret; BUG_ON(iocb->ki_pos != pos); mutex_lock(&inode->i_mutex); ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); mutex_unlock(&inode->i_mutex); if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { ssize_t err; err = sync_page_range(in
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -