inode.c

来自「Linux Kernel 2.6.9 for OMAP1710」· C语言 代码 · 共 2,219 行 · 第 1/5 页

C
2,219
字号
 *   current transaction.  But the same file is part of a shared mapping *   and someone does a writepage() on it. * *   We will move the buffer onto the async_data list, but *after* it has *   been dirtied. So there's a small window where we have dirty data on *   BJ_Metadata. * *   Note that this only applies to the last partial page in the file.  The *   bit which block_write_full_page() uses prepare/commit for.  (That's *   broken code anyway: it's wrong for msync()). * *   It's a rare case: affects the final partial page, for journalled data *   where the file is subject to bith write() and writepage() in the same *   transction.  To fix it we'll need a custom block_write_full_page(). *   We'll probably need that anyway for journalling writepage() output. * * We don't honour synchronous mounts for writepage().  That would be * disastrous.  Any write() or metadata operation will sync the fs for * us. * * AKPM2: if all the page's buffers are mapped to disk and !data=journal, * we don't need to open a transaction here. */static int ext3_ordered_writepage(struct page *page,			struct writeback_control *wbc){	struct inode *inode = page->mapping->host;	struct buffer_head *page_bufs;	handle_t *handle = NULL;	int ret = 0;	int err;	J_ASSERT(PageLocked(page));	/*	 * We give up here if we're reentered, because it might be for a	 * different filesystem.	 */	if (ext3_journal_current_handle())		goto out_fail;	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));	if (IS_ERR(handle)) {		ret = PTR_ERR(handle);		goto out_fail;	}	if (!page_has_buffers(page)) {		create_empty_buffers(page, inode->i_sb->s_blocksize,				(1 << BH_Dirty)|(1 << BH_Uptodate));	}	page_bufs = page_buffers(page);	walk_page_buffers(handle, page_bufs, 0,			PAGE_CACHE_SIZE, NULL, bget_one);	ret = block_write_full_page(page, ext3_get_block, wbc);	/*	 * The page can become unlocked at any point now, and	 * truncate can then come in and change things.  So we	 * can't touch *page from now on.  But *page_bufs is	 * safe due to elevated refcount.	 */	/*	 * And attach them to the current transaction.  But only if 	 * block_write_full_page() succeeded.  Otherwise they are unmapped,	 * and generally junk.	 */	if (ret == 0) {		err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,					NULL, journal_dirty_data_fn);		if (!ret)			ret = err;	}	walk_page_buffers(handle, page_bufs, 0,			PAGE_CACHE_SIZE, NULL, bput_one);	err = ext3_journal_stop(handle);	if (!ret)		ret = err;	return ret;out_fail:	redirty_page_for_writepage(wbc, page);	unlock_page(page);	return ret;}static int ext3_writeback_writepage(struct page *page,				struct writeback_control *wbc){	struct inode *inode = page->mapping->host;	handle_t *handle = NULL;	int ret = 0;	int err;	if (ext3_journal_current_handle())		goto out_fail;	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));	if (IS_ERR(handle)) {		ret = PTR_ERR(handle);		goto out_fail;	}	ret = block_write_full_page(page, ext3_get_block, wbc);	err = ext3_journal_stop(handle);	if (!ret)		ret = err;	return ret;out_fail:	redirty_page_for_writepage(wbc, page);	unlock_page(page);	return ret;}static int ext3_journalled_writepage(struct page *page,				struct writeback_control *wbc){	struct inode *inode = page->mapping->host;	handle_t *handle = NULL;	int ret = 0;	int err;	if (ext3_journal_current_handle())		goto no_write;	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));	if (IS_ERR(handle)) {		ret = PTR_ERR(handle);		goto no_write;	}	if (!page_has_buffers(page) || PageChecked(page)) {		/*		 * It's mmapped pagecache.  Add buffers and journal it.  There		 * doesn't seem much point in redirtying the page here.		 */		ClearPageChecked(page);		ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,					ext3_get_block);		if (ret != 0)			goto out_unlock;		ret = walk_page_buffers(handle, page_buffers(page), 0,			PAGE_CACHE_SIZE, NULL, do_journal_get_write_access);		err = walk_page_buffers(handle, page_buffers(page), 0,				PAGE_CACHE_SIZE, NULL, commit_write_fn);		if (ret == 0)			ret = err;		EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;		unlock_page(page);	} else {		/*		 * It may be a page full of checkpoint-mode buffers.  We don't		 * really know unless we go poke around in the buffer_heads.		 * But block_write_full_page will do the right thing.		 */		ret = block_write_full_page(page, ext3_get_block, wbc);	}	err = ext3_journal_stop(handle);	if (!ret)		ret = err;out:	return ret;no_write:	redirty_page_for_writepage(wbc, page);out_unlock:	unlock_page(page);	goto out;}static int ext3_readpage(struct file *file, struct page *page){	return mpage_readpage(page, ext3_get_block);}static intext3_readpages(struct file *file, struct address_space *mapping,		struct list_head *pages, unsigned nr_pages){	return mpage_readpages(mapping, pages, nr_pages, ext3_get_block);}static int ext3_invalidatepage(struct page *page, unsigned long offset){	journal_t *journal = EXT3_JOURNAL(page->mapping->host);	/*	 * If it's a full truncate we just forget about the pending dirtying	 */	if (offset == 0)		ClearPageChecked(page);	return journal_invalidatepage(journal, page, offset);}static int ext3_releasepage(struct page *page, int wait){	journal_t *journal = EXT3_JOURNAL(page->mapping->host);	WARN_ON(PageChecked(page));	return journal_try_to_free_buffers(journal, page, wait);}/* * If the O_DIRECT write will extend the file then add this inode to the * orphan list.  So recovery will truncate it back to the original size * if the machine crashes during the write. * * If the O_DIRECT write is intantiating holes inside i_size and the machine * crashes then stale disk data _may_ be exposed inside the file. */static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,			const struct iovec *iov, loff_t offset,			unsigned long nr_segs){	struct file *file = iocb->ki_filp;	struct inode *inode = file->f_mapping->host;	struct ext3_inode_info *ei = EXT3_I(inode);	handle_t *handle = NULL;	ssize_t ret;	int orphan = 0;	size_t count = iov_length(iov, nr_segs);	if (rw == WRITE) {		loff_t final_size = offset + count;		handle = ext3_journal_start(inode, DIO_CREDITS);		if (IS_ERR(handle)) {			ret = PTR_ERR(handle);			goto out;		}		if (final_size > inode->i_size) {			ret = ext3_orphan_add(handle, inode);			if (ret)				goto out_stop;			orphan = 1;			ei->i_disksize = inode->i_size;		}	}	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 				 offset, nr_segs,				 ext3_direct_io_get_blocks, NULL);	/*	 * Reacquire the handle: ext3_direct_io_get_block() can restart the	 * transaction	 */	handle = journal_current_handle();out_stop:	if (handle) {		int err;		if (orphan) 			ext3_orphan_del(handle, inode);		if (orphan && ret > 0) {			loff_t end = offset + ret;			if (end > inode->i_size) {				ei->i_disksize = end;				i_size_write(inode, end);				err = ext3_mark_inode_dirty(handle, inode);				if (!ret) 					ret = err;			}		}		err = ext3_journal_stop(handle);		if (ret == 0)			ret = err;	}out:	return ret;}/* * Pages can be marked dirty completely asynchronously from ext3's journalling * activity.  By filemap_sync_pte(), try_to_unmap_one(), etc.  We cannot do * much here because ->set_page_dirty is called under VFS locks.  The page is * not necessarily locked. * * We cannot just dirty the page and leave attached buffers clean, because the * buffers' dirty state is "definitive".  We cannot just set the buffers dirty * or jbddirty because all the journalling code will explode. * * So what we do is to mark the page "pending dirty" and next time writepage * is called, propagate that into the buffers appropriately. */static int ext3_journalled_set_page_dirty(struct page *page){	SetPageChecked(page);	return __set_page_dirty_nobuffers(page);}static struct address_space_operations ext3_ordered_aops = {	.readpage	= ext3_readpage,	.readpages	= ext3_readpages,	.writepage	= ext3_ordered_writepage,	.sync_page	= block_sync_page,	.prepare_write	= ext3_prepare_write,	.commit_write	= ext3_ordered_commit_write,	.bmap		= ext3_bmap,	.invalidatepage	= ext3_invalidatepage,	.releasepage	= ext3_releasepage,	.direct_IO	= ext3_direct_IO,};static struct address_space_operations ext3_writeback_aops = {	.readpage	= ext3_readpage,	.readpages	= ext3_readpages,	.writepage	= ext3_writeback_writepage,	.sync_page	= block_sync_page,	.prepare_write	= ext3_prepare_write,	.commit_write	= ext3_writeback_commit_write,	.bmap		= ext3_bmap,	.invalidatepage	= ext3_invalidatepage,	.releasepage	= ext3_releasepage,	.direct_IO	= ext3_direct_IO,};static struct address_space_operations ext3_journalled_aops = {	.readpage	= ext3_readpage,	.readpages	= ext3_readpages,	.writepage	= ext3_journalled_writepage,	.sync_page	= block_sync_page,	.prepare_write	= ext3_prepare_write,	.commit_write	= ext3_journalled_commit_write,	.set_page_dirty	= ext3_journalled_set_page_dirty,	.bmap		= ext3_bmap,	.invalidatepage	= ext3_invalidatepage,	.releasepage	= ext3_releasepage,};void ext3_set_aops(struct inode *inode){	if (ext3_should_order_data(inode))		inode->i_mapping->a_ops = &ext3_ordered_aops;	else if (ext3_should_writeback_data(inode))		inode->i_mapping->a_ops = &ext3_writeback_aops;	else		inode->i_mapping->a_ops = &ext3_journalled_aops;}/* * ext3_block_truncate_page() zeroes out a mapping from file offset `from' * up to the end of the block which corresponds to `from'. * This required during truncate. We need to physically zero the tail end * of that block so it doesn't yield old data if the file is later grown. */static int ext3_block_truncate_page(handle_t *handle, struct page *page,		struct address_space *mapping, loff_t from){	unsigned long index = from >> PAGE_CACHE_SHIFT;	unsigned offset = from & (PAGE_CACHE_SIZE-1);	unsigned blocksize, iblock, length, pos;	struct inode *inode = mapping->host;	struct buffer_head *bh;	int err;	void *kaddr;	blocksize = inode->i_sb->s_blocksize;	length = blocksize - (offset & (blocksize - 1));	iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);	if (!page_has_buffers(page))		create_empty_buffers(page, blocksize, 0);	/* Find the buffer that contains "offset" */	bh = page_buffers(page);	pos = blocksize;	while (offset >= pos) {		bh = bh->b_this_page;		iblock++;		pos += blocksize;	}	err = 0;	if (buffer_freed(bh)) {		BUFFER_TRACE(bh, "freed: skip");		goto unlock;	}	if (!buffer_mapped(bh)) {		BUFFER_TRACE(bh, "unmapped");		ext3_get_block(inode, iblock, bh, 0);		/* unmapped? It's a hole - nothing to do */		if (!buffer_mapped(bh)) {			BUFFER_TRACE(bh, "still unmapped");			goto unlock;		}	}	/* Ok, it's mapped. Make sure it's up-to-date */	if (PageUptodate(page))		set_buffer_uptodate(bh);	if (!buffer_uptodate(bh)) {		err = -EIO;		ll_rw_block(READ, 1, &bh);		wait_on_buffer(bh);		/* Uhhuh. Read error. Complain and punt. */		if (!buffer_uptodate(bh))			goto unlock;	}	if (ext3_should_journal_data(inode)) {		BUFFER_TRACE(bh, "get write access");		err = ext3_journal_get_write_access(handle, bh);		if (err)			goto unlock;	}	kaddr = kmap_atomic(page, KM_USER0);	memset(kaddr + offset, 0, length);	flush_dcache_page(page);	kunmap_atomic(kaddr, KM_USER0);	BUFFER_TRACE(bh, "zeroed end of block");	err = 0;	if (ext3_should_journal_data(inode)) {		err = ext3_journal_dirty_metadata(handle, bh);	} else {		if (ext3_should_order_data(inode))			err = ext3_journal_dirty_data(handle, bh);		mark_buffer_dirty(bh);	}unlock:	unlock_page(page);	page_cache_release(page);	return err;}/* * Probably it should be a library function... search for first non-zero word * or memcmp with zero_page, whatever is better for particular architecture. * Linus? */static inline int all_zeroes(__le32 *p, __le32 *q)

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?