commit.c

来自「Linux Kernel 2.6.9 for OMAP1710」· C语言 代码 · 共 842 行 · 第 1/2 页

C
842
字号
/* * linux/fs/commit.c * * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 * * Copyright 1998 Red Hat corp --- All Rights Reserved * * This file is part of the Linux kernel and is made available under * the terms of the GNU General Public License, version 2, or at your * option, any later version, incorporated herein by reference. * * Journal commit routines for the generic filesystem journaling code; * part of the ext2fs journaling system. */#include <linux/time.h>#include <linux/fs.h>#include <linux/jbd.h>#include <linux/errno.h>#include <linux/slab.h>#include <linux/mm.h>#include <linux/pagemap.h>#include <linux/smp_lock.h>/* * Default IO end handler for temporary BJ_IO buffer_heads. */static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate){	BUFFER_TRACE(bh, "");	if (uptodate)		set_buffer_uptodate(bh);	else		clear_buffer_uptodate(bh);	unlock_buffer(bh);}/* * When an ext3-ordered file is truncated, it is possible that many pages are * not sucessfully freed, because they are attached to a committing transaction. * After the transaction commits, these pages are left on the LRU, with no * ->mapping, and with attached buffers.  These pages are trivially reclaimable * by the VM, but their apparent absence upsets the VM accounting, and it makes * the numbers in /proc/meminfo look odd. * * So here, we have a buffer which has just come off the forget list.  Look to * see if we can strip all buffers from the backing page. * * Called under lock_journal(), and possibly under journal_datalist_lock.  The * caller provided us with a ref against the buffer, and we drop that here. */static void release_buffer_page(struct buffer_head *bh){	struct page *page;	if (buffer_dirty(bh))		goto nope;	if (atomic_read(&bh->b_count) != 1)		goto nope;	page = bh->b_page;	if (!page)		goto nope;	if (page->mapping)		goto nope;	/* OK, it's a truncated page */	if (TestSetPageLocked(page))		goto nope;	page_cache_get(page);	__brelse(bh);	try_to_free_buffers(page);	unlock_page(page);	page_cache_release(page);	return;nope:	__brelse(bh);}/* * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is * held.  For ranking reasons we must trylock.  If we lose, schedule away and * return 0.  j_list_lock is dropped in this case. */static int inverted_lock(journal_t *journal, struct buffer_head *bh){	if (!jbd_trylock_bh_state(bh)) {		spin_unlock(&journal->j_list_lock);		schedule();		return 0;	}	return 1;}/* * journal_commit_transaction * * The primary function for committing a transaction to the log.  This * function is called by the journal thread to begin a complete commit. */void journal_commit_transaction(journal_t *journal){	transaction_t *commit_transaction;	struct journal_head *jh, *new_jh, *descriptor;	struct buffer_head *wbuf[64];	int bufs;	int flags;	int err;	unsigned long blocknr;	char *tagp = NULL;	journal_header_t *header;	journal_block_tag_t *tag = NULL;	int space_left = 0;	int first_tag = 0;	int tag_flag;	int i;	/*	 * First job: lock down the current transaction and wait for	 * all outstanding updates to complete.	 */#ifdef COMMIT_STATS	spin_lock(&journal->j_list_lock);	summarise_journal_usage(journal);	spin_unlock(&journal->j_list_lock);#endif	/* Do we need to erase the effects of a prior journal_flush? */	if (journal->j_flags & JFS_FLUSHED) {		jbd_debug(3, "super block updated\n");		journal_update_superblock(journal, 1);	} else {		jbd_debug(3, "superblock not updated\n");	}	J_ASSERT(journal->j_running_transaction != NULL);	J_ASSERT(journal->j_committing_transaction == NULL);	commit_transaction = journal->j_running_transaction;	J_ASSERT(commit_transaction->t_state == T_RUNNING);	jbd_debug(1, "JBD: starting commit of transaction %d\n",			commit_transaction->t_tid);	spin_lock(&journal->j_state_lock);	commit_transaction->t_state = T_LOCKED;	spin_lock(&commit_transaction->t_handle_lock);	while (commit_transaction->t_updates) {		DEFINE_WAIT(wait);		prepare_to_wait(&journal->j_wait_updates, &wait,					TASK_UNINTERRUPTIBLE);		if (commit_transaction->t_updates) {			spin_unlock(&commit_transaction->t_handle_lock);			spin_unlock(&journal->j_state_lock);			schedule();			spin_lock(&journal->j_state_lock);			spin_lock(&commit_transaction->t_handle_lock);		}		finish_wait(&journal->j_wait_updates, &wait);	}	spin_unlock(&commit_transaction->t_handle_lock);	J_ASSERT (commit_transaction->t_outstanding_credits <=			journal->j_max_transaction_buffers);	/*	 * First thing we are allowed to do is to discard any remaining	 * BJ_Reserved buffers.  Note, it is _not_ permissible to assume	 * that there are no such buffers: if a large filesystem	 * operation like a truncate needs to split itself over multiple	 * transactions, then it may try to do a journal_restart() while	 * there are still BJ_Reserved buffers outstanding.  These must	 * be released cleanly from the current transaction.	 *	 * In this case, the filesystem must still reserve write access	 * again before modifying the buffer in the new transaction, but	 * we do not require it to remember exactly which old buffers it	 * has reserved.  This is consistent with the existing behaviour	 * that multiple journal_get_write_access() calls to the same	 * buffer are perfectly permissable.	 */	while (commit_transaction->t_reserved_list) {		jh = commit_transaction->t_reserved_list;		JBUFFER_TRACE(jh, "reserved, unused: refile");		/*		 * A journal_get_undo_access()+journal_release_buffer() may		 * leave undo-committed data.		 */		if (jh->b_committed_data) {			struct buffer_head *bh = jh2bh(jh);			jbd_lock_bh_state(bh);			if (jh->b_committed_data) {				kfree(jh->b_committed_data);				jh->b_committed_data = NULL;			}			jbd_unlock_bh_state(bh);		}		journal_refile_buffer(journal, jh);	}	/*	 * Now try to drop any written-back buffers from the journal's	 * checkpoint lists.  We do this *before* commit because it potentially	 * frees some memory	 */	spin_lock(&journal->j_list_lock);	__journal_clean_checkpoint_list(journal);	spin_unlock(&journal->j_list_lock);	jbd_debug (3, "JBD: commit phase 1\n");	/*	 * Switch to a new revoke table.	 */	journal_switch_revoke_table(journal);	commit_transaction->t_state = T_FLUSH;	journal->j_committing_transaction = commit_transaction;	journal->j_running_transaction = NULL;	commit_transaction->t_log_start = journal->j_head;	wake_up(&journal->j_wait_transaction_locked);	spin_unlock(&journal->j_state_lock);	jbd_debug (3, "JBD: commit phase 2\n");	/*	 * Now start flushing things to disk, in the order they appear	 * on the transaction lists.  Data blocks go first.	 */	err = 0;	/*	 * Whenever we unlock the journal and sleep, things can get added	 * onto ->t_sync_datalist, so we have to keep looping back to	 * write_out_data until we *know* that the list is empty.	 */	bufs = 0;	/*	 * Cleanup any flushed data buffers from the data list.  Even in	 * abort mode, we want to flush this out as soon as possible.	 */write_out_data:	cond_resched();	spin_lock(&journal->j_list_lock);	while (commit_transaction->t_sync_datalist) {		struct buffer_head *bh;		jh = commit_transaction->t_sync_datalist;		commit_transaction->t_sync_datalist = jh->b_tnext;		bh = jh2bh(jh);		if (buffer_locked(bh)) {			BUFFER_TRACE(bh, "locked");			if (!inverted_lock(journal, bh))				goto write_out_data;			__journal_unfile_buffer(jh);			__journal_file_buffer(jh, commit_transaction,						BJ_Locked);			jbd_unlock_bh_state(bh);			if (need_resched()) {				spin_unlock(&journal->j_list_lock);				goto write_out_data;			}		} else {			if (buffer_dirty(bh)) {				BUFFER_TRACE(bh, "start journal writeout");				get_bh(bh);				wbuf[bufs++] = bh;				if (bufs == ARRAY_SIZE(wbuf)) {					jbd_debug(2, "submit %d writes\n",							bufs);					spin_unlock(&journal->j_list_lock);					ll_rw_block(WRITE, bufs, wbuf);					journal_brelse_array(wbuf, bufs);					bufs = 0;					goto write_out_data;				}			} else {				BUFFER_TRACE(bh, "writeout complete: unfile");				if (!inverted_lock(journal, bh))					goto write_out_data;				__journal_unfile_buffer(jh);				jbd_unlock_bh_state(bh);				journal_remove_journal_head(bh);				put_bh(bh);				if (need_resched()) {					spin_unlock(&journal->j_list_lock);					goto write_out_data;				}			}		}	}	if (bufs) {		spin_unlock(&journal->j_list_lock);		ll_rw_block(WRITE, bufs, wbuf);		journal_brelse_array(wbuf, bufs);		spin_lock(&journal->j_list_lock);	}	/*	 * Wait for all previously submitted IO to complete.	 */	while (commit_transaction->t_locked_list) {		struct buffer_head *bh;		jh = commit_transaction->t_locked_list->b_tprev;		bh = jh2bh(jh);		get_bh(bh);		if (buffer_locked(bh)) {			spin_unlock(&journal->j_list_lock);			wait_on_buffer(bh);			if (unlikely(!buffer_uptodate(bh)))				err = -EIO;			spin_lock(&journal->j_list_lock);		}		if (!inverted_lock(journal, bh)) {			put_bh(bh);			spin_lock(&journal->j_list_lock);			continue;		}		if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {			__journal_unfile_buffer(jh);			jbd_unlock_bh_state(bh);			journal_remove_journal_head(bh);			put_bh(bh);		} else {			jbd_unlock_bh_state(bh);		}		put_bh(bh);		if (need_resched()) {			spin_unlock(&journal->j_list_lock);			cond_resched();			spin_lock(&journal->j_list_lock);		}	}	spin_unlock(&journal->j_list_lock);	journal_write_revoke_records(journal, commit_transaction);	jbd_debug(3, "JBD: commit phase 2\n");	/*	 * If we found any dirty or locked buffers, then we should have	 * looped back up to the write_out_data label.  If there weren't	 * any then journal_clean_data_list should have wiped the list	 * clean by now, so check that it is in fact empty.	 */	J_ASSERT (commit_transaction->t_sync_datalist == NULL);	jbd_debug (3, "JBD: commit phase 3\n");	/*	 * Way to go: we have now written out all of the data for a	 * transaction!  Now comes the tricky part: we need to write out	 * metadata.  Loop over the transaction's entire buffer list:	 */	commit_transaction->t_state = T_COMMIT;	descriptor = NULL;	bufs = 0;	while (commit_transaction->t_buffers) {		/* Find the next buffer to be journaled... */		jh = commit_transaction->t_buffers;		/* If we're in abort mode, we just un-journal the buffer and		   release it for background writing. */		if (is_journal_aborted(journal)) {			JBUFFER_TRACE(jh, "journal is aborting: refile");			journal_refile_buffer(journal, jh);			/* If that was the last one, we need to clean up			 * any descriptor buffers which may have been			 * already allocated, even if we are now			 * aborting. */			if (!commit_transaction->t_buffers)				goto start_journal_io;			continue;		}		/* Make sure we have a descriptor block in which to		   record the metadata buffer. */		if (!descriptor) {			struct buffer_head *bh;			J_ASSERT (bufs == 0);			jbd_debug(4, "JBD: get descriptor\n");			descriptor = journal_get_descriptor_buffer(journal);			if (!descriptor) {				__journal_abort_hard(journal);				continue;			}			bh = jh2bh(descriptor);			jbd_debug(4, "JBD: got buffer %llu (%p)\n",				(unsigned long long)bh->b_blocknr, bh->b_data);			header = (journal_header_t *)&bh->b_data[0];			header->h_magic     = cpu_to_be32(JFS_MAGIC_NUMBER);			header->h_blocktype = cpu_to_be32(JFS_DESCRIPTOR_BLOCK);			header->h_sequence  = cpu_to_be32(commit_transaction->t_tid);			tagp = &bh->b_data[sizeof(journal_header_t)];			space_left = bh->b_size - sizeof(journal_header_t);			first_tag = 1;			set_buffer_jwrite(bh);			set_buffer_dirty(bh);			wbuf[bufs++] = bh;			/* Record it so that we can wait for IO                           completion later */			BUFFER_TRACE(bh, "ph3: file as descriptor");

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?