📄 filemap.c

📁 最新最稳定的Linux内存管理模块源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
 * Can adjust writing position or amount of bytes to write. * Returns appropriate error code that caller should return or * zero in case that write should be allowed. */inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk){	struct inode *inode = file->f_mapping->host;	unsigned long limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;        if (unlikely(*pos < 0))                return -EINVAL;	if (!isblk) {		/* FIXME: this is for backwards compatibility with 2.4 */		if (file->f_flags & O_APPEND)                        *pos = i_size_read(inode);		if (limit != RLIM_INFINITY) {			if (*pos >= limit) {				send_sig(SIGXFSZ, current, 0);				return -EFBIG;			}			if (*count > limit - (typeof(limit))*pos) {				*count = limit - (typeof(limit))*pos;			}		}	}	/*	 * LFS rule	 */	if (unlikely(*pos + *count > MAX_NON_LFS &&				!(file->f_flags & O_LARGEFILE))) {		if (*pos >= MAX_NON_LFS) {			return -EFBIG;		}		if (*count > MAX_NON_LFS - (unsigned long)*pos) {			*count = MAX_NON_LFS - (unsigned long)*pos;		}	}	/*	 * Are we about to exceed the fs block limit ?	 *	 * If we have written data it becomes a short write.  If we have	 * exceeded without writing data we send a signal and return EFBIG.	 * Linus frestrict idea will clean these up nicely..	 */	if (likely(!isblk)) {		if (unlikely(*pos >= inode->i_sb->s_maxbytes)) {			if (*count || *pos > inode->i_sb->s_maxbytes) {				return -EFBIG;			}			/* zero-length writes at ->s_maxbytes are OK */		}		if (unlikely(*pos + *count > inode->i_sb->s_maxbytes))			*count = inode->i_sb->s_maxbytes - *pos;	} else {#ifdef CONFIG_BLOCK		loff_t isize;		if (bdev_read_only(I_BDEV(inode)))			return -EPERM;		isize = i_size_read(inode);		if (*pos >= isize) {			if (*count || *pos > isize)				return -ENOSPC;		}		if (*pos + *count > isize)			*count = isize - *pos;#else		return -EPERM;#endif	}	return 0;}EXPORT_SYMBOL(generic_write_checks);int pagecache_write_begin(struct file *file, struct address_space *mapping,				loff_t pos, unsigned len, unsigned flags,				struct page **pagep, void **fsdata){	const struct address_space_operations *aops = mapping->a_ops;	return aops->write_begin(file, mapping, pos, len, flags,							pagep, fsdata);}EXPORT_SYMBOL(pagecache_write_begin);int pagecache_write_end(struct file *file, struct address_space *mapping,				loff_t pos, unsigned len, unsigned copied,				struct page *page, void *fsdata){	const struct address_space_operations *aops = mapping->a_ops;	mark_page_accessed(page);	return aops->write_end(file, mapping, pos, len, copied, page, fsdata);}EXPORT_SYMBOL(pagecache_write_end);ssize_tgeneric_file_direct_write(struct kiocb *iocb, const struct iovec *iov,		unsigned long *nr_segs, loff_t pos, loff_t *ppos,		size_t count, size_t ocount){	struct file	*file = iocb->ki_filp;	struct address_space *mapping = file->f_mapping;	struct inode	*inode = mapping->host;	ssize_t		written;	size_t		write_len;	pgoff_t		end;	if (count != ocount)		*nr_segs = iov_shorten((struct iovec *)iov, *nr_segs, count);	write_len = iov_length(iov, *nr_segs);	end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT;	written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);	if (written)		goto out;	/*	 * After a write we want buffered reads to be sure to go to disk to get	 * the new data.  We invalidate clean cached page from the region we're	 * about to write.  We do this *before* the write so that we can return	 * without clobbering -EIOCBQUEUED from ->direct_IO().	 */	if (mapping->nrpages) {		written = invalidate_inode_pages2_range(mapping,					pos >> PAGE_CACHE_SHIFT, end);		/*		 * If a page can not be invalidated, return 0 to fall back		 * to buffered write.		 */		if (written) {			if (written == -EBUSY)				return 0;			goto out;		}	}	written = mapping->a_ops->direct_IO(WRITE, iocb, iov, pos, *nr_segs);	/*	 * Finally, try again to invalidate clean pages which might have been	 * cached by non-direct readahead, or faulted in by get_user_pages()	 * if the source of the write was an mmap'ed region of the file	 * we're writing.  Either one is a pretty crazy thing to do,	 * so we don't support it 100%.  If this invalidation	 * fails, tough, the write still worked...	 */	if (mapping->nrpages) {		invalidate_inode_pages2_range(mapping,					      pos >> PAGE_CACHE_SHIFT, end);	}	if (written > 0) {		loff_t end = pos + written;		if (end > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {			i_size_write(inode,  end);			mark_inode_dirty(inode);		}		*ppos = end;	}	/*	 * Sync the fs metadata but not the minor inode changes and	 * of course not the data as we did direct DMA for the IO.	 * i_mutex is held, which protects generic_osync_inode() from	 * livelocking.  AIO O_DIRECT ops attempt to sync metadata here.	 */out:	if ((written >= 0 || written == -EIOCBQUEUED) &&	    ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {		int err = generic_osync_inode(inode, mapping, OSYNC_METADATA);		if (err < 0)			written = err;	}	return written;}EXPORT_SYMBOL(generic_file_direct_write);/* * Find or create a page at the given pagecache position. Return the locked * page. This function is specifically for buffered writes. */struct page *grab_cache_page_write_begin(struct address_space *mapping,					pgoff_t index, unsigned flags){	int status;	struct page *page;	gfp_t gfp_notmask = 0;	if (flags & AOP_FLAG_NOFS)		gfp_notmask = __GFP_FS;repeat:	page = find_lock_page(mapping, index);	if (likely(page))		return page;	page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~gfp_notmask);	if (!page)		return NULL;	status = add_to_page_cache_lru(page, mapping, index,						GFP_KERNEL & ~gfp_notmask);	if (unlikely(status)) {		page_cache_release(page);		if (status == -EEXIST)			goto repeat;		return NULL;	}	return page;}EXPORT_SYMBOL(grab_cache_page_write_begin);static ssize_t generic_perform_write(struct file *file,				struct iov_iter *i, loff_t pos){	struct address_space *mapping = file->f_mapping;	const struct address_space_operations *a_ops = mapping->a_ops;	long status = 0;	ssize_t written = 0;	unsigned int flags = 0;	/*	 * Copies from kernel address space cannot fail (NFSD is a big user).	 */	if (segment_eq(get_fs(), KERNEL_DS))		flags |= AOP_FLAG_UNINTERRUPTIBLE;	do {		struct page *page;		pgoff_t index;		/* Pagecache index for current page */		unsigned long offset;	/* Offset into pagecache page */		unsigned long bytes;	/* Bytes to write to page */		size_t copied;		/* Bytes copied from user */		void *fsdata;		offset = (pos & (PAGE_CACHE_SIZE - 1));		index = pos >> PAGE_CACHE_SHIFT;		bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,						iov_iter_count(i));again:		/*		 * Bring in the user page that we will copy from _first_.		 * Otherwise there's a nasty deadlock on copying from the		 * same page as we're writing to, without it being marked		 * up-to-date.		 *		 * Not only is this an optimisation, but it is also required		 * to check that the address is actually valid, when atomic		 * usercopies are used, below.		 */		if (unlikely(iov_iter_fault_in_readable(i, bytes))) {			status = -EFAULT;			break;		}		status = a_ops->write_begin(file, mapping, pos, bytes, flags,						&page, &fsdata);		if (unlikely(status))			break;		pagefault_disable();		copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);		pagefault_enable();		flush_dcache_page(page);		status = a_ops->write_end(file, mapping, pos, bytes, copied,						page, fsdata);		if (unlikely(status < 0))			break;		copied = status;		cond_resched();		iov_iter_advance(i, copied);		if (unlikely(copied == 0)) {			/*			 * If we were unable to copy any data at all, we must			 * fall back to a single segment length write.			 *			 * If we didn't fallback here, we could livelock			 * because not all segments in the iov can be copied at			 * once without a pagefault.			 */			bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,						iov_iter_single_seg_count(i));			goto again;		}		pos += copied;		written += copied;		balance_dirty_pages_ratelimited(mapping);	} while (iov_iter_count(i));	return written ? written : status;}ssize_tgeneric_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,		unsigned long nr_segs, loff_t pos, loff_t *ppos,		size_t count, ssize_t written){	struct file *file = iocb->ki_filp;	struct address_space *mapping = file->f_mapping;	const struct address_space_operations *a_ops = mapping->a_ops;	struct inode *inode = mapping->host;	ssize_t status;	struct iov_iter i;	iov_iter_init(&i, iov, nr_segs, count, written);	status = generic_perform_write(file, &i, pos);	if (likely(status >= 0)) {		written += status;		*ppos = pos + status;		/*		 * For now, when the user asks for O_SYNC, we'll actually give		 * O_DSYNC		 */		if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))) {			if (!a_ops->writepage || !is_sync_kiocb(iocb))				status = generic_osync_inode(inode, mapping,						OSYNC_METADATA|OSYNC_DATA);		}  	}		/*	 * If we get here for O_DIRECT writes then we must have fallen through	 * to buffered writes (block instantiation inside i_size).  So we sync	 * the file data here, to try to honour O_DIRECT expectations.	 */	if (unlikely(file->f_flags & O_DIRECT) && written)		status = filemap_write_and_wait_range(mapping,					pos, pos + written - 1);	return written ? written : status;}EXPORT_SYMBOL(generic_file_buffered_write);static ssize_t__generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,				unsigned long nr_segs, loff_t *ppos){	struct file *file = iocb->ki_filp;	struct address_space * mapping = file->f_mapping;	size_t ocount;		/* original count */	size_t count;		/* after file limit checks */	struct inode 	*inode = mapping->host;	loff_t		pos;	ssize_t		written;	ssize_t		err;	ocount = 0;	err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);	if (err)		return err;	count = ocount;	pos = *ppos;	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);	/* We can write back this queue in page reclaim */	current->backing_dev_info = mapping->backing_dev_info;	written = 0;	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));	if (err)		goto out;	if (count == 0)		goto out;	err = file_remove_suid(file);	if (err)		goto out;	file_update_time(file);	/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */	if (unlikely(file->f_flags & O_DIRECT)) {		loff_t endbyte;		ssize_t written_buffered;		written = generic_file_direct_write(iocb, iov, &nr_segs, pos,							ppos, count, ocount);		if (written < 0 || written == count)			goto out;		/*		 * direct-io write to a hole: fall through to buffered I/O		 * for completing the rest of the request.		 */		pos += written;		count -= written;		written_buffered = generic_file_buffered_write(iocb, iov,						nr_segs, pos, ppos, count,						written);		/*		 * If generic_file_buffered_write() retuned a synchronous error		 * then we want to return the number of bytes which were		 * direct-written, or the error code if that was zero.  Note		 * that this differs from normal direct-io semantics, which		 * will return -EFOO even if some bytes were written.		 */		if (written_buffered < 0) {			err = written_buffered;			goto out;		}		/*		 * We need to ensure that the page cache pages are written to		 * disk and invalidated to preserve the expected O_DIRECT		 * semantics.		 */		endbyte = pos + written_buffered - written - 1;		err = do_sync_mapping_range(file->f_mapping, pos, endbyte,					    SYNC_FILE_RANGE_WAIT_BEFORE|					    SYNC_FILE_RANGE_WRITE|					    SYNC_FILE_RANGE_WAIT_AFTER);		if (err == 0) {			written = written_buffered;			invalidate_mapping_pages(mapping,						 pos >> PAGE_CACHE_SHIFT,						 endbyte >> PAGE_CACHE_SHIFT);		} else {			/*			 * We don't know how much we wrote, so just return			 * the number of bytes which were direct-written			 */		}	} else {		written = generic_file_buffered_write(iocb, iov, nr_segs,				pos, ppos, count, written);	}out:	current->backing_dev_info = NULL;	return written ? written : err;}ssize_t generic_file_aio_write_nolock(struct kiocb *iocb,		const struct iovec *iov, unsigned long nr_segs, loff_t pos){	struct file *file = iocb->ki_filp;	struct address_space *mapping = file->f_mapping;	struct inode *inode = mapping->host;	ssize_t ret;	BUG_ON(iocb->ki_pos != pos);	ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs,			&iocb->ki_pos);	if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {		ssize_t err;		err = sync_page_range_nolock(inode, mapping, pos, ret);		if (err < 0)			ret = err;	}	return ret;}EXPORT_SYMBOL(generic_file_aio_write_nolock);ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,		unsigned long nr_segs, loff_t pos){	struct file *file = iocb->ki_filp;	struct address_space *mapping = file->f_mapping;	struct inode *inode = mapping->host;	ssize_t ret;	BUG_ON(iocb->ki_pos != pos);	mutex_lock(&inode->i_mutex);	ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs,			&iocb->ki_pos);	mutex_unlock(&inode->i_mutex);	if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {		ssize_t err;		err = sync_page_range(in
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -