📄 inode.c

📁 linux 内核源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
	ret = walk_page_buffers(handle, page_buffers(page), from,				to, &partial, write_end_fn);	if (!partial)		SetPageUptodate(page);	if (pos+copied > inode->i_size)		i_size_write(inode, pos+copied);	EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;	if (inode->i_size > EXT4_I(inode)->i_disksize) {		EXT4_I(inode)->i_disksize = inode->i_size;		ret2 = ext4_mark_inode_dirty(handle, inode);		if (!ret)			ret = ret2;	}	ret2 = ext4_journal_stop(handle);	if (!ret)		ret = ret2;	unlock_page(page);	page_cache_release(page);	return ret ? ret : copied;}/* * bmap() is special.  It gets used by applications such as lilo and by * the swapper to find the on-disk block of a specific piece of data. * * Naturally, this is dangerous if the block concerned is still in the * journal.  If somebody makes a swapfile on an ext4 data-journaling * filesystem and enables swap, then they may get a nasty shock when the * data getting swapped to that swapfile suddenly gets overwritten by * the original zero's written out previously to the journal and * awaiting writeback in the kernel's buffer cache. * * So, if we see any bmap calls here on a modified, data-journaled file, * take extra steps to flush any blocks which might be in the cache. */static sector_t ext4_bmap(struct address_space *mapping, sector_t block){	struct inode *inode = mapping->host;	journal_t *journal;	int err;	if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) {		/*		 * This is a REALLY heavyweight approach, but the use of		 * bmap on dirty files is expected to be extremely rare:		 * only if we run lilo or swapon on a freshly made file		 * do we expect this to happen.		 *		 * (bmap requires CAP_SYS_RAWIO so this does not		 * represent an unprivileged user DOS attack --- we'd be		 * in trouble if mortal users could trigger this path at		 * will.)		 *		 * NB. EXT4_STATE_JDATA is not set on files other than		 * regular files.  If somebody wants to bmap a directory		 * or symlink and gets confused because the buffer		 * hasn't yet been flushed to disk, they deserve		 * everything they get.		 */		EXT4_I(inode)->i_state &= ~EXT4_STATE_JDATA;		journal = EXT4_JOURNAL(inode);		jbd2_journal_lock_updates(journal);		err = jbd2_journal_flush(journal);		jbd2_journal_unlock_updates(journal);		if (err)			return 0;	}	return generic_block_bmap(mapping,block,ext4_get_block);}static int bget_one(handle_t *handle, struct buffer_head *bh){	get_bh(bh);	return 0;}static int bput_one(handle_t *handle, struct buffer_head *bh){	put_bh(bh);	return 0;}static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh){	if (buffer_mapped(bh))		return ext4_journal_dirty_data(handle, bh);	return 0;}/* * Note that we always start a transaction even if we're not journalling * data.  This is to preserve ordering: any hole instantiation within * __block_write_full_page -> ext4_get_block() should be journalled * along with the data so we don't crash and then get metadata which * refers to old data. * * In all journalling modes block_write_full_page() will start the I/O. * * Problem: * *	ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> *		ext4_writepage() * * Similar for: * *	ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ... * * Same applies to ext4_get_block().  We will deadlock on various things like * lock_journal and i_truncate_mutex. * * Setting PF_MEMALLOC here doesn't work - too many internal memory * allocations fail. * * 16May01: If we're reentered then journal_current_handle() will be *	    non-zero. We simply *return*. * * 1 July 2001: @@@ FIXME: *   In journalled data mode, a data buffer may be metadata against the *   current transaction.  But the same file is part of a shared mapping *   and someone does a writepage() on it. * *   We will move the buffer onto the async_data list, but *after* it has *   been dirtied. So there's a small window where we have dirty data on *   BJ_Metadata. * *   Note that this only applies to the last partial page in the file.  The *   bit which block_write_full_page() uses prepare/commit for.  (That's *   broken code anyway: it's wrong for msync()). * *   It's a rare case: affects the final partial page, for journalled data *   where the file is subject to bith write() and writepage() in the same *   transction.  To fix it we'll need a custom block_write_full_page(). *   We'll probably need that anyway for journalling writepage() output. * * We don't honour synchronous mounts for writepage().  That would be * disastrous.  Any write() or metadata operation will sync the fs for * us. * * AKPM2: if all the page's buffers are mapped to disk and !data=journal, * we don't need to open a transaction here. */static int ext4_ordered_writepage(struct page *page,				struct writeback_control *wbc){	struct inode *inode = page->mapping->host;	struct buffer_head *page_bufs;	handle_t *handle = NULL;	int ret = 0;	int err;	J_ASSERT(PageLocked(page));	/*	 * We give up here if we're reentered, because it might be for a	 * different filesystem.	 */	if (ext4_journal_current_handle())		goto out_fail;	handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));	if (IS_ERR(handle)) {		ret = PTR_ERR(handle);		goto out_fail;	}	if (!page_has_buffers(page)) {		create_empty_buffers(page, inode->i_sb->s_blocksize,				(1 << BH_Dirty)|(1 << BH_Uptodate));	}	page_bufs = page_buffers(page);	walk_page_buffers(handle, page_bufs, 0,			PAGE_CACHE_SIZE, NULL, bget_one);	ret = block_write_full_page(page, ext4_get_block, wbc);	/*	 * The page can become unlocked at any point now, and	 * truncate can then come in and change things.  So we	 * can't touch *page from now on.  But *page_bufs is	 * safe due to elevated refcount.	 */	/*	 * And attach them to the current transaction.  But only if	 * block_write_full_page() succeeded.  Otherwise they are unmapped,	 * and generally junk.	 */	if (ret == 0) {		err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,					NULL, jbd2_journal_dirty_data_fn);		if (!ret)			ret = err;	}	walk_page_buffers(handle, page_bufs, 0,			PAGE_CACHE_SIZE, NULL, bput_one);	err = ext4_journal_stop(handle);	if (!ret)		ret = err;	return ret;out_fail:	redirty_page_for_writepage(wbc, page);	unlock_page(page);	return ret;}static int ext4_writeback_writepage(struct page *page,				struct writeback_control *wbc){	struct inode *inode = page->mapping->host;	handle_t *handle = NULL;	int ret = 0;	int err;	if (ext4_journal_current_handle())		goto out_fail;	handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));	if (IS_ERR(handle)) {		ret = PTR_ERR(handle);		goto out_fail;	}	if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))		ret = nobh_writepage(page, ext4_get_block, wbc);	else		ret = block_write_full_page(page, ext4_get_block, wbc);	err = ext4_journal_stop(handle);	if (!ret)		ret = err;	return ret;out_fail:	redirty_page_for_writepage(wbc, page);	unlock_page(page);	return ret;}static int ext4_journalled_writepage(struct page *page,				struct writeback_control *wbc){	struct inode *inode = page->mapping->host;	handle_t *handle = NULL;	int ret = 0;	int err;	if (ext4_journal_current_handle())		goto no_write;	handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));	if (IS_ERR(handle)) {		ret = PTR_ERR(handle);		goto no_write;	}	if (!page_has_buffers(page) || PageChecked(page)) {		/*		 * It's mmapped pagecache.  Add buffers and journal it.  There		 * doesn't seem much point in redirtying the page here.		 */		ClearPageChecked(page);		ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,					ext4_get_block);		if (ret != 0) {			ext4_journal_stop(handle);			goto out_unlock;		}		ret = walk_page_buffers(handle, page_buffers(page), 0,			PAGE_CACHE_SIZE, NULL, do_journal_get_write_access);		err = walk_page_buffers(handle, page_buffers(page), 0,				PAGE_CACHE_SIZE, NULL, write_end_fn);		if (ret == 0)			ret = err;		EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;		unlock_page(page);	} else {		/*		 * It may be a page full of checkpoint-mode buffers.  We don't		 * really know unless we go poke around in the buffer_heads.		 * But block_write_full_page will do the right thing.		 */		ret = block_write_full_page(page, ext4_get_block, wbc);	}	err = ext4_journal_stop(handle);	if (!ret)		ret = err;out:	return ret;no_write:	redirty_page_for_writepage(wbc, page);out_unlock:	unlock_page(page);	goto out;}static int ext4_readpage(struct file *file, struct page *page){	return mpage_readpage(page, ext4_get_block);}static intext4_readpages(struct file *file, struct address_space *mapping,		struct list_head *pages, unsigned nr_pages){	return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);}static void ext4_invalidatepage(struct page *page, unsigned long offset){	journal_t *journal = EXT4_JOURNAL(page->mapping->host);	/*	 * If it's a full truncate we just forget about the pending dirtying	 */	if (offset == 0)		ClearPageChecked(page);	jbd2_journal_invalidatepage(journal, page, offset);}static int ext4_releasepage(struct page *page, gfp_t wait){	journal_t *journal = EXT4_JOURNAL(page->mapping->host);	WARN_ON(PageChecked(page));	if (!page_has_buffers(page))		return 0;	return jbd2_journal_try_to_free_buffers(journal, page, wait);}/* * If the O_DIRECT write will extend the file then add this inode to the * orphan list.  So recovery will truncate it back to the original size * if the machine crashes during the write. * * If the O_DIRECT write is intantiating holes inside i_size and the machine * crashes then stale disk data _may_ be exposed inside the file. */static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,			const struct iovec *iov, loff_t offset,			unsigned long nr_segs){	struct file *file = iocb->ki_filp;	struct inode *inode = file->f_mapping->host;	struct ext4_inode_info *ei = EXT4_I(inode);	handle_t *handle = NULL;	ssize_t ret;	int orphan = 0;	size_t count = iov_length(iov, nr_segs);	if (rw == WRITE) {		loff_t final_size = offset + count;		handle = ext4_journal_start(inode, DIO_CREDITS);		if (IS_ERR(handle)) {			ret = PTR_ERR(handle);			goto out;		}		if (final_size > inode->i_size) {			ret = ext4_orphan_add(handle, inode);			if (ret)				goto out_stop;			orphan = 1;			ei->i_disksize = inode->i_size;		}	}	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,				 offset, nr_segs,				 ext4_get_block, NULL);	/*	 * Reacquire the handle: ext4_get_block() can restart the transaction	 */	handle = ext4_journal_current_handle();out_stop:	if (handle) {		int err;		if (orphan && inode->i_nlink)			ext4_orphan_del(handle, inode);		if (orphan && ret > 0) {			loff_t end = offset + ret;			if (end > inode->i_size) {				ei->i_disksize = end;				i_size_write(inode, end);				/*				 * We're going to return a positive `ret'				 * here due to non-zero-length I/O, so there's				 * no way of reporting error returns from				 * ext4_mark_inode_dirty() to userspace.  So				 * ignore it.				 */				ext4_mark_inode_dirty(handle, inode);			}		}		err = ext4_journal_stop(handle);		if (ret == 0)			ret = err;	}out:	return ret;}/* * Pages can be marked dirty completely asynchronously from ext4's journalling * activity.  By filemap_sync_pte(), try_to_unmap_one(), etc.  We cannot do * much here because ->set_page_dirty is called under VFS locks.  The page is * not necessarily locked. * * We cannot just dirty the page and leave attached buffers clean, because the * buffers' dirty state is "definitive".  We cannot just set the buffers dirty * or jbddirty because all the journalling code will explode. * * So what we do is to mark the page "pending dirty" and next time writepage * is called, propagate that into the buffers appropriately. */static int ext4_journalled_set_page_dirty(struct page *page){	SetPageChecked(page);	return __set_page_dirty_nobuffers(page);}static const struct address_space_operations ext4_ordered_aops = {	.readpage	= ext4_readpage,	.readpages	= ext4_readpages,	.writepage	= ext4_ordered_writepage,	.sync_page	= block_sync_page,	.write_begin	= ext4_write_begin,	.write_end	= ext4_ordered_write_end,	.bmap		= ext4_bmap,	.invalidatepage	= ext4_invalidatepage,	.releasepage	= ext4_releasepage,	.direct_IO	= ext4_direct_IO,	.migratepage	= buffer_migrate_page,
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -