📄 transaction.c

📁 linux 内核源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
/** * void journal_lock_updates () - establish a transaction barrier. * @journal:  Journal to establish a barrier on. * * This locks out any further updates from being started, and blocks * until all existing updates have completed, returning only once the * journal is in a quiescent state with no updates running. * * The journal lock should not be held on entry. */void journal_lock_updates(journal_t *journal){	DEFINE_WAIT(wait);	spin_lock(&journal->j_state_lock);	++journal->j_barrier_count;	/* Wait until there are no running updates */	while (1) {		transaction_t *transaction = journal->j_running_transaction;		if (!transaction)			break;		spin_lock(&transaction->t_handle_lock);		if (!transaction->t_updates) {			spin_unlock(&transaction->t_handle_lock);			break;		}		prepare_to_wait(&journal->j_wait_updates, &wait,				TASK_UNINTERRUPTIBLE);		spin_unlock(&transaction->t_handle_lock);		spin_unlock(&journal->j_state_lock);		schedule();		finish_wait(&journal->j_wait_updates, &wait);		spin_lock(&journal->j_state_lock);	}	spin_unlock(&journal->j_state_lock);	/*	 * We have now established a barrier against other normal updates, but	 * we also need to barrier against other journal_lock_updates() calls	 * to make sure that we serialise special journal-locked operations	 * too.	 */	mutex_lock(&journal->j_barrier);}/** * void journal_unlock_updates (journal_t* journal) - release barrier * @journal:  Journal to release the barrier on. * * Release a transaction barrier obtained with journal_lock_updates(). * * Should be called without the journal lock held. */void journal_unlock_updates (journal_t *journal){	J_ASSERT(journal->j_barrier_count != 0);	mutex_unlock(&journal->j_barrier);	spin_lock(&journal->j_state_lock);	--journal->j_barrier_count;	spin_unlock(&journal->j_state_lock);	wake_up(&journal->j_wait_transaction_locked);}/* * Report any unexpected dirty buffers which turn up.  Normally those * indicate an error, but they can occur if the user is running (say) * tune2fs to modify the live filesystem, so we need the option of * continuing as gracefully as possible.  # * * The caller should already hold the journal lock and * j_list_lock spinlock: most callers will need those anyway * in order to probe the buffer's journaling state safely. */static void jbd_unexpected_dirty_buffer(struct journal_head *jh){	int jlist;	/* If this buffer is one which might reasonably be dirty	 * --- ie. data, or not part of this journal --- then	 * we're OK to leave it alone, but otherwise we need to	 * move the dirty bit to the journal's own internal	 * JBDDirty bit. */	jlist = jh->b_jlist;	if (jlist == BJ_Metadata || jlist == BJ_Reserved ||	    jlist == BJ_Shadow || jlist == BJ_Forget) {		struct buffer_head *bh = jh2bh(jh);		if (test_clear_buffer_dirty(bh))			set_buffer_jbddirty(bh);	}}/* * If the buffer is already part of the current transaction, then there * is nothing we need to do.  If it is already part of a prior * transaction which we are still committing to disk, then we need to * make sure that we do not overwrite the old copy: we do copy-out to * preserve the copy going to disk.  We also account the buffer against * the handle's metadata buffer credits (unless the buffer is already * part of the transaction, that is). * */static intdo_get_write_access(handle_t *handle, struct journal_head *jh,			int force_copy){	struct buffer_head *bh;	transaction_t *transaction;	journal_t *journal;	int error;	char *frozen_buffer = NULL;	int need_copy = 0;	if (is_handle_aborted(handle))		return -EROFS;	transaction = handle->h_transaction;	journal = transaction->t_journal;	jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy);	JBUFFER_TRACE(jh, "entry");repeat:	bh = jh2bh(jh);	/* @@@ Need to check for errors here at some point. */	lock_buffer(bh);	jbd_lock_bh_state(bh);	/* We now hold the buffer lock so it is safe to query the buffer	 * state.  Is the buffer dirty?	 *	 * If so, there are two possibilities.  The buffer may be	 * non-journaled, and undergoing a quite legitimate writeback.	 * Otherwise, it is journaled, and we don't expect dirty buffers	 * in that state (the buffers should be marked JBD_Dirty	 * instead.)  So either the IO is being done under our own	 * control and this is a bug, or it's a third party IO such as	 * dump(8) (which may leave the buffer scheduled for read ---	 * ie. locked but not dirty) or tune2fs (which may actually have	 * the buffer dirtied, ugh.)  */	if (buffer_dirty(bh)) {		/*		 * First question: is this buffer already part of the current		 * transaction or the existing committing transaction?		 */		if (jh->b_transaction) {			J_ASSERT_JH(jh,				jh->b_transaction == transaction ||				jh->b_transaction ==					journal->j_committing_transaction);			if (jh->b_next_transaction)				J_ASSERT_JH(jh, jh->b_next_transaction ==							transaction);		}		/*		 * In any case we need to clean the dirty flag and we must		 * do it under the buffer lock to be sure we don't race		 * with running write-out.		 */		JBUFFER_TRACE(jh, "Unexpected dirty buffer");		jbd_unexpected_dirty_buffer(jh);	}	unlock_buffer(bh);	error = -EROFS;	if (is_handle_aborted(handle)) {		jbd_unlock_bh_state(bh);		goto out;	}	error = 0;	/*	 * The buffer is already part of this transaction if b_transaction or	 * b_next_transaction points to it	 */	if (jh->b_transaction == transaction ||	    jh->b_next_transaction == transaction)		goto done;	/*	 * If there is already a copy-out version of this buffer, then we don't	 * need to make another one	 */	if (jh->b_frozen_data) {		JBUFFER_TRACE(jh, "has frozen data");		J_ASSERT_JH(jh, jh->b_next_transaction == NULL);		jh->b_next_transaction = transaction;		goto done;	}	/* Is there data here we need to preserve? */	if (jh->b_transaction && jh->b_transaction != transaction) {		JBUFFER_TRACE(jh, "owned by older transaction");		J_ASSERT_JH(jh, jh->b_next_transaction == NULL);		J_ASSERT_JH(jh, jh->b_transaction ==					journal->j_committing_transaction);		/* There is one case we have to be very careful about.		 * If the committing transaction is currently writing		 * this buffer out to disk and has NOT made a copy-out,		 * then we cannot modify the buffer contents at all		 * right now.  The essence of copy-out is that it is the		 * extra copy, not the primary copy, which gets		 * journaled.  If the primary copy is already going to		 * disk then we cannot do copy-out here. */		if (jh->b_jlist == BJ_Shadow) {			DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow);			wait_queue_head_t *wqh;			wqh = bit_waitqueue(&bh->b_state, BH_Unshadow);			JBUFFER_TRACE(jh, "on shadow: sleep");			jbd_unlock_bh_state(bh);			/* commit wakes up all shadow buffers after IO */			for ( ; ; ) {				prepare_to_wait(wqh, &wait.wait,						TASK_UNINTERRUPTIBLE);				if (jh->b_jlist != BJ_Shadow)					break;				schedule();			}			finish_wait(wqh, &wait.wait);			goto repeat;		}		/* Only do the copy if the currently-owning transaction		 * still needs it.  If it is on the Forget list, the		 * committing transaction is past that stage.  The		 * buffer had better remain locked during the kmalloc,		 * but that should be true --- we hold the journal lock		 * still and the buffer is already on the BUF_JOURNAL		 * list so won't be flushed.		 *		 * Subtle point, though: if this is a get_undo_access,		 * then we will be relying on the frozen_data to contain		 * the new value of the committed_data record after the		 * transaction, so we HAVE to force the frozen_data copy		 * in that case. */		if (jh->b_jlist != BJ_Forget || force_copy) {			JBUFFER_TRACE(jh, "generate frozen data");			if (!frozen_buffer) {				JBUFFER_TRACE(jh, "allocate memory for buffer");				jbd_unlock_bh_state(bh);				frozen_buffer =					jbd_alloc(jh2bh(jh)->b_size,							 GFP_NOFS);				if (!frozen_buffer) {					printk(KERN_EMERG					       "%s: OOM for frozen_buffer\n",					       __FUNCTION__);					JBUFFER_TRACE(jh, "oom!");					error = -ENOMEM;					jbd_lock_bh_state(bh);					goto done;				}				goto repeat;			}			jh->b_frozen_data = frozen_buffer;			frozen_buffer = NULL;			need_copy = 1;		}		jh->b_next_transaction = transaction;	}	/*	 * Finally, if the buffer is not journaled right now, we need to make	 * sure it doesn't get written to disk before the caller actually	 * commits the new data	 */	if (!jh->b_transaction) {		JBUFFER_TRACE(jh, "no transaction");		J_ASSERT_JH(jh, !jh->b_next_transaction);		jh->b_transaction = transaction;		JBUFFER_TRACE(jh, "file as BJ_Reserved");		spin_lock(&journal->j_list_lock);		__journal_file_buffer(jh, transaction, BJ_Reserved);		spin_unlock(&journal->j_list_lock);	}done:	if (need_copy) {		struct page *page;		int offset;		char *source;		J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)),			    "Possible IO failure.\n");		page = jh2bh(jh)->b_page;		offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;		source = kmap_atomic(page, KM_USER0);		memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);		kunmap_atomic(source, KM_USER0);	}	jbd_unlock_bh_state(bh);	/*	 * If we are about to journal a buffer, then any revoke pending on it is	 * no longer valid	 */	journal_cancel_revoke(handle, jh);out:	if (unlikely(frozen_buffer))	/* It's usually NULL */		jbd_free(frozen_buffer, bh->b_size);	JBUFFER_TRACE(jh, "exit");	return error;}/** * int journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. * @handle: transaction to add buffer modifications to * @bh:     bh to be used for metadata writes * @credits: variable that will receive credits for the buffer * * Returns an error code or 0 on success. * * In full data journalling mode the buffer may be of type BJ_AsyncData, * because we're write()ing a buffer which is also part of a shared mapping. */int journal_get_write_access(handle_t *handle, struct buffer_head *bh){	struct journal_head *jh = journal_add_journal_head(bh);	int rc;	/* We do not want to get caught playing with fields which the	 * log thread also manipulates.  Make sure that the buffer	 * completes any outstanding IO before proceeding. */	rc = do_get_write_access(handle, jh, 0);	journal_put_journal_head(jh);	return rc;}/* * When the user wants to journal a newly created buffer_head * (ie. getblk() returned a new buffer and we are going to populate it * manually rather than reading off disk), then we need to keep the * buffer_head locked until it has been completely filled with new * data.  In this case, we should be able to make the assertion that * the bh is not already part of an existing transaction. * * The buffer should already be locked by the caller by this point. * There is no lock ranking violation: it was a newly created, * unlocked buffer beforehand. *//** * int journal_get_create_access () - notify intent to use newly created bh * @handle: transaction to new buffer to * @bh: new buffer. * * Call this if you create a new bh. */int journal_get_create_access(handle_t *handle, struct buffer_head *bh){	transaction_t *transaction = handle->h_transaction;	journal_t *journal = transaction->t_journal;	struct journal_head *jh = journal_add_journal_head(bh);	int err;	jbd_debug(5, "journal_head %p\n", jh);	err = -EROFS;	if (is_handle_aborted(handle))		goto out;	err = 0;	JBUFFER_TRACE(jh, "entry");	/*	 * The buffer may already belong to this transaction due to pre-zeroing	 * in the filesystem's new_block code.  It may also be on the previous,	 * committing transaction's lists, but it HAS to be in Forget state in	 * that case: the transaction must have deleted the buffer for it to be	 * reused here.	 */	jbd_lock_bh_state(bh);	spin_lock(&journal->j_list_lock);	J_ASSERT_JH(jh, (jh->b_transaction == transaction ||		jh->b_transaction == NULL ||		(jh->b_transaction == journal->j_committing_transaction &&			  jh->b_jlist == BJ_Forget)));	J_ASSERT_JH(jh, jh->b_next_transaction == NULL);	J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));	if (jh->b_transaction == NULL) {		jh->b_transaction = transaction;		JBUFFER_TRACE(jh, "file as BJ_Reserved");		__journal_file_buffer(jh, transaction, BJ_Reserved);	} else if (jh->b_transaction == journal->j_committing_transaction) {		JBUFFER_TRACE(jh, "set next transaction");		jh->b_next_transaction = transaction;	}	spin_unlock(&journal->j_list_lock);	jbd_unlock_bh_state(bh);	/*	 * akpm: I added this.  ext3_alloc_branch can pick up new indirect	 * blocks which contain freed but then revoked metadata.  We need	 * to cancel the revoke in case we end up freeing it yet again	 * and the reallocating as data - this would cause a second revoke,	 * which hits an assertion error.	 */	JBUFFER_TRACE(jh, "cancelling revoke");	journal_cancel_revoke(handle, jh);	journal_put_journal_head(jh);out:	return err;}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -