inode.c

来自「Linux Kernel 2.6.9 for OMAP1710」· C语言 代码 · 共 2,219 行 · 第 1/5 页

C
2,219
字号
	if (handle->h_transaction->t_state == T_LOCKED) {		/*		 * Huge direct-io writes can hold off commits for long		 * periods of time.  Let this commit run.		 */		ext3_journal_stop(handle);		handle = ext3_journal_start(inode, DIO_CREDITS);		if (IS_ERR(handle))			ret = PTR_ERR(handle);		goto get_block;	}	if (handle->h_buffer_credits <= EXT3_RESERVE_TRANS_BLOCKS) {		/*		 * Getting low on buffer credits...		 */		ret = ext3_journal_extend(handle, DIO_CREDITS);		if (ret > 0) {			/*			 * Couldn't extend the transaction.  Start a new one.			 */			ret = ext3_journal_restart(handle, DIO_CREDITS);		}	}get_block:	if (ret == 0)		ret = ext3_get_block_handle(handle, inode, iblock,					bh_result, create, 0);	bh_result->b_size = (1 << inode->i_blkbits);	return ret;}/* * `handle' can be NULL if create is zero */struct buffer_head *ext3_getblk(handle_t *handle, struct inode * inode,				long block, int create, int * errp){	struct buffer_head dummy;	int fatal = 0, err;	J_ASSERT(handle != NULL || create == 0);	dummy.b_state = 0;	dummy.b_blocknr = -1000;	buffer_trace_init(&dummy.b_history);	*errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1);	if (!*errp && buffer_mapped(&dummy)) {		struct buffer_head *bh;		bh = sb_getblk(inode->i_sb, dummy.b_blocknr);		if (buffer_new(&dummy)) {			J_ASSERT(create != 0);			J_ASSERT(handle != 0);			/* Now that we do not always journal data, we			   should keep in mind whether this should			   always journal the new buffer as metadata.			   For now, regular file writes use			   ext3_get_block instead, so it's not a			   problem. */			lock_buffer(bh);			BUFFER_TRACE(bh, "call get_create_access");			fatal = ext3_journal_get_create_access(handle, bh);			if (!fatal && !buffer_uptodate(bh)) {				memset(bh->b_data, 0, inode->i_sb->s_blocksize);				set_buffer_uptodate(bh);			}			unlock_buffer(bh);			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");			err = ext3_journal_dirty_metadata(handle, bh);			if (!fatal)				fatal = err;		} else {			BUFFER_TRACE(bh, "not a new buffer");		}		if (fatal) {			*errp = fatal;			brelse(bh);			bh = NULL;		}		return bh;	}	return NULL;}struct buffer_head *ext3_bread(handle_t *handle, struct inode * inode,			       int block, int create, int *err){	struct buffer_head * bh;	int prev_blocks;	prev_blocks = inode->i_blocks;	bh = ext3_getblk (handle, inode, block, create, err);	if (!bh)		return bh;#ifdef EXT3_PREALLOCATE	/*	 * If the inode has grown, and this is a directory, then use a few	 * more of the preallocated blocks to keep directory fragmentation	 * down.  The preallocated blocks are guaranteed to be contiguous.	 */	if (create &&	    S_ISDIR(inode->i_mode) &&	    inode->i_blocks > prev_blocks &&	    EXT3_HAS_COMPAT_FEATURE(inode->i_sb,				    EXT3_FEATURE_COMPAT_DIR_PREALLOC)) {		int i;		struct buffer_head *tmp_bh;		for (i = 1;		     EXT3_I(inode)->i_prealloc_count &&		     i < EXT3_SB(inode->i_sb)->s_es->s_prealloc_dir_blocks;		     i++) {			/*			 * ext3_getblk will zero out the contents of the			 * directory for us			 */			tmp_bh = ext3_getblk(handle, inode,						block+i, create, err);			if (!tmp_bh) {				brelse (bh);				return 0;			}			brelse (tmp_bh);		}	}#endif	if (buffer_uptodate(bh))		return bh;	ll_rw_block (READ, 1, &bh);	wait_on_buffer (bh);	if (buffer_uptodate(bh))		return bh;	brelse (bh);	*err = -EIO;	return NULL;}static int walk_page_buffers(	handle_t *handle,				struct buffer_head *head,				unsigned from,				unsigned to,				int *partial,				int (*fn)(	handle_t *handle,						struct buffer_head *bh)){	struct buffer_head *bh;	unsigned block_start, block_end;	unsigned blocksize = head->b_size;	int err, ret = 0;	struct buffer_head *next;	for (	bh = head, block_start = 0;		ret == 0 && (bh != head || !block_start);	    	block_start = block_end, bh = next)	{		next = bh->b_this_page;		block_end = block_start + blocksize;		if (block_end <= from || block_start >= to) {			if (partial && !buffer_uptodate(bh))				*partial = 1;			continue;		}		err = (*fn)(handle, bh);		if (!ret)			ret = err;	}	return ret;}/* * To preserve ordering, it is essential that the hole instantiation and * the data write be encapsulated in a single transaction.  We cannot * close off a transaction and start a new one between the ext3_get_block() * and the commit_write().  So doing the journal_start at the start of * prepare_write() is the right place. * * Also, this function can nest inside ext3_writepage() -> * block_write_full_page(). In that case, we *know* that ext3_writepage() * has generated enough buffer credits to do the whole page.  So we won't * block on the journal in that case, which is good, because the caller may * be PF_MEMALLOC. * * By accident, ext3 can be reentered when a transaction is open via * quota file writes.  If we were to commit the transaction while thus * reentered, there can be a deadlock - we would be holding a quota * lock, and the commit would never complete if another thread had a * transaction open and was blocking on the quota lock - a ranking * violation. * * So what we do is to rely on the fact that journal_stop/journal_start * will _not_ run commit under these circumstances because handle->h_ref * is elevated.  We'll still have enough credits for the tiny quotafile * write.   */static int do_journal_get_write_access(handle_t *handle, 				       struct buffer_head *bh){	if (!buffer_mapped(bh) || buffer_freed(bh))		return 0;	return ext3_journal_get_write_access(handle, bh);}static int ext3_prepare_write(struct file *file, struct page *page,			      unsigned from, unsigned to){	struct inode *inode = page->mapping->host;	int ret, needed_blocks = ext3_writepage_trans_blocks(inode);	handle_t *handle;	int retries = 0;retry:	handle = ext3_journal_start(inode, needed_blocks);	if (IS_ERR(handle)) {		ret = PTR_ERR(handle);		goto out;	}	ret = block_prepare_write(page, from, to, ext3_get_block);	if (ret)		goto prepare_write_failed;	if (ext3_should_journal_data(inode)) {		ret = walk_page_buffers(handle, page_buffers(page),				from, to, NULL, do_journal_get_write_access);	}prepare_write_failed:	if (ret)		ext3_journal_stop(handle);	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))		goto retry;out:	return ret;}static intext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh){	int err = journal_dirty_data(handle, bh);	if (err)		ext3_journal_abort_handle(__FUNCTION__, __FUNCTION__,						bh, handle,err);	return err;}/* For commit_write() in data=journal mode */static int commit_write_fn(handle_t *handle, struct buffer_head *bh){	if (!buffer_mapped(bh) || buffer_freed(bh))		return 0;	set_buffer_uptodate(bh);	return ext3_journal_dirty_metadata(handle, bh);}/* * We need to pick up the new inode size which generic_commit_write gave us * `file' can be NULL - eg, when called from page_symlink(). * * ext3 never places buffers on inode->i_mapping->private_list.  metadata * buffers are managed internally. */static int ext3_ordered_commit_write(struct file *file, struct page *page,			     unsigned from, unsigned to){	handle_t *handle = ext3_journal_current_handle();	struct inode *inode = page->mapping->host;	int ret = 0, ret2;	ret = walk_page_buffers(handle, page_buffers(page),		from, to, NULL, ext3_journal_dirty_data);	if (ret == 0) {		/*		 * generic_commit_write() will run mark_inode_dirty() if i_size		 * changes.  So let's piggyback the i_disksize mark_inode_dirty		 * into that.		 */		loff_t new_i_size;		new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;		if (new_i_size > EXT3_I(inode)->i_disksize)			EXT3_I(inode)->i_disksize = new_i_size;		ret = generic_commit_write(file, page, from, to);	}	ret2 = ext3_journal_stop(handle);	if (!ret)		ret = ret2;	return ret;}static int ext3_writeback_commit_write(struct file *file, struct page *page,			     unsigned from, unsigned to){	handle_t *handle = ext3_journal_current_handle();	struct inode *inode = page->mapping->host;	int ret = 0, ret2;	loff_t new_i_size;	new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;	if (new_i_size > EXT3_I(inode)->i_disksize)		EXT3_I(inode)->i_disksize = new_i_size;	ret = generic_commit_write(file, page, from, to);	ret2 = ext3_journal_stop(handle);	if (!ret)		ret = ret2;	return ret;}static int ext3_journalled_commit_write(struct file *file,			struct page *page, unsigned from, unsigned to){	handle_t *handle = ext3_journal_current_handle();	struct inode *inode = page->mapping->host;	int ret = 0, ret2;	int partial = 0;	loff_t pos;	/*	 * Here we duplicate the generic_commit_write() functionality	 */	pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;	ret = walk_page_buffers(handle, page_buffers(page), from,				to, &partial, commit_write_fn);	if (!partial)		SetPageUptodate(page);	if (pos > inode->i_size)		i_size_write(inode, pos);	EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;	if (inode->i_size > EXT3_I(inode)->i_disksize) {		EXT3_I(inode)->i_disksize = inode->i_size;		ret2 = ext3_mark_inode_dirty(handle, inode);		if (!ret) 			ret = ret2;	}	ret2 = ext3_journal_stop(handle);	if (!ret)		ret = ret2;	return ret;}/*  * bmap() is special.  It gets used by applications such as lilo and by * the swapper to find the on-disk block of a specific piece of data. * * Naturally, this is dangerous if the block concerned is still in the * journal.  If somebody makes a swapfile on an ext3 data-journaling * filesystem and enables swap, then they may get a nasty shock when the * data getting swapped to that swapfile suddenly gets overwritten by * the original zero's written out previously to the journal and * awaiting writeback in the kernel's buffer cache.  * * So, if we see any bmap calls here on a modified, data-journaled file, * take extra steps to flush any blocks which might be in the cache.  */static sector_t ext3_bmap(struct address_space *mapping, sector_t block){	struct inode *inode = mapping->host;	journal_t *journal;	int err;	if (EXT3_I(inode)->i_state & EXT3_STATE_JDATA) {		/* 		 * This is a REALLY heavyweight approach, but the use of		 * bmap on dirty files is expected to be extremely rare:		 * only if we run lilo or swapon on a freshly made file		 * do we expect this to happen. 		 *		 * (bmap requires CAP_SYS_RAWIO so this does not		 * represent an unprivileged user DOS attack --- we'd be		 * in trouble if mortal users could trigger this path at		 * will.) 		 *		 * NB. EXT3_STATE_JDATA is not set on files other than		 * regular files.  If somebody wants to bmap a directory		 * or symlink and gets confused because the buffer		 * hasn't yet been flushed to disk, they deserve		 * everything they get.		 */		EXT3_I(inode)->i_state &= ~EXT3_STATE_JDATA;		journal = EXT3_JOURNAL(inode);		journal_lock_updates(journal);		err = journal_flush(journal);		journal_unlock_updates(journal);		if (err)			return 0;	}	return generic_block_bmap(mapping,block,ext3_get_block);}static int bget_one(handle_t *handle, struct buffer_head *bh){	get_bh(bh);	return 0;}static int bput_one(handle_t *handle, struct buffer_head *bh){	put_bh(bh);	return 0;}static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh){	if (buffer_mapped(bh))		return ext3_journal_dirty_data(handle, bh);	return 0;}/* * Note that we always start a transaction even if we're not journalling * data.  This is to preserve ordering: any hole instantiation within * __block_write_full_page -> ext3_get_block() should be journalled * along with the data so we don't crash and then get metadata which * refers to old data. * * In all journalling modes block_write_full_page() will start the I/O. * * Problem: * *	ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> *		ext3_writepage() * * Similar for: * *	ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ... * * Same applies to ext3_get_block().  We will deadlock on various things like * lock_journal and i_truncate_sem. * * Setting PF_MEMALLOC here doesn't work - too many internal memory * allocations fail. * * 16May01: If we're reentered then journal_current_handle() will be *	    non-zero. We simply *return*. * * 1 July 2001: @@@ FIXME: *   In journalled data mode, a data buffer may be metadata against the

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?