📄 transaction.c
字号:
/** * void journal_lock_updates () - establish a transaction barrier. * @journal: Journal to establish a barrier on. * * This locks out any further updates from being started, and blocks * until all existing updates have completed, returning only once the * journal is in a quiescent state with no updates running. * * The journal lock should not be held on entry. */void journal_lock_updates(journal_t *journal){ DEFINE_WAIT(wait); spin_lock(&journal->j_state_lock); ++journal->j_barrier_count; /* Wait until there are no running updates */ while (1) { transaction_t *transaction = journal->j_running_transaction; if (!transaction) break; spin_lock(&transaction->t_handle_lock); if (!transaction->t_updates) { spin_unlock(&transaction->t_handle_lock); break; } prepare_to_wait(&journal->j_wait_updates, &wait, TASK_UNINTERRUPTIBLE); spin_unlock(&transaction->t_handle_lock); spin_unlock(&journal->j_state_lock); schedule(); finish_wait(&journal->j_wait_updates, &wait); spin_lock(&journal->j_state_lock); } spin_unlock(&journal->j_state_lock); /* * We have now established a barrier against other normal updates, but * we also need to barrier against other journal_lock_updates() calls * to make sure that we serialise special journal-locked operations * too. */ mutex_lock(&journal->j_barrier);}/** * void journal_unlock_updates (journal_t* journal) - release barrier * @journal: Journal to release the barrier on. * * Release a transaction barrier obtained with journal_lock_updates(). * * Should be called without the journal lock held. */void journal_unlock_updates (journal_t *journal){ J_ASSERT(journal->j_barrier_count != 0); mutex_unlock(&journal->j_barrier); spin_lock(&journal->j_state_lock); --journal->j_barrier_count; spin_unlock(&journal->j_state_lock); wake_up(&journal->j_wait_transaction_locked);}/* * Report any unexpected dirty buffers which turn up. Normally those * indicate an error, but they can occur if the user is running (say) * tune2fs to modify the live filesystem, so we need the option of * continuing as gracefully as possible. # * * The caller should already hold the journal lock and * j_list_lock spinlock: most callers will need those anyway * in order to probe the buffer's journaling state safely. */static void jbd_unexpected_dirty_buffer(struct journal_head *jh){ int jlist; /* If this buffer is one which might reasonably be dirty * --- ie. data, or not part of this journal --- then * we're OK to leave it alone, but otherwise we need to * move the dirty bit to the journal's own internal * JBDDirty bit. */ jlist = jh->b_jlist; if (jlist == BJ_Metadata || jlist == BJ_Reserved || jlist == BJ_Shadow || jlist == BJ_Forget) { struct buffer_head *bh = jh2bh(jh); if (test_clear_buffer_dirty(bh)) set_buffer_jbddirty(bh); }}/* * If the buffer is already part of the current transaction, then there * is nothing we need to do. If it is already part of a prior * transaction which we are still committing to disk, then we need to * make sure that we do not overwrite the old copy: we do copy-out to * preserve the copy going to disk. We also account the buffer against * the handle's metadata buffer credits (unless the buffer is already * part of the transaction, that is). * */static intdo_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy){ struct buffer_head *bh; transaction_t *transaction; journal_t *journal; int error; char *frozen_buffer = NULL; int need_copy = 0; if (is_handle_aborted(handle)) return -EROFS; transaction = handle->h_transaction; journal = transaction->t_journal; jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy); JBUFFER_TRACE(jh, "entry");repeat: bh = jh2bh(jh); /* @@@ Need to check for errors here at some point. */ lock_buffer(bh); jbd_lock_bh_state(bh); /* We now hold the buffer lock so it is safe to query the buffer * state. Is the buffer dirty? * * If so, there are two possibilities. The buffer may be * non-journaled, and undergoing a quite legitimate writeback. * Otherwise, it is journaled, and we don't expect dirty buffers * in that state (the buffers should be marked JBD_Dirty * instead.) So either the IO is being done under our own * control and this is a bug, or it's a third party IO such as * dump(8) (which may leave the buffer scheduled for read --- * ie. locked but not dirty) or tune2fs (which may actually have * the buffer dirtied, ugh.) */ if (buffer_dirty(bh)) { /* * First question: is this buffer already part of the current * transaction or the existing committing transaction? */ if (jh->b_transaction) { J_ASSERT_JH(jh, jh->b_transaction == transaction || jh->b_transaction == journal->j_committing_transaction); if (jh->b_next_transaction) J_ASSERT_JH(jh, jh->b_next_transaction == transaction); } /* * In any case we need to clean the dirty flag and we must * do it under the buffer lock to be sure we don't race * with running write-out. */ JBUFFER_TRACE(jh, "Unexpected dirty buffer"); jbd_unexpected_dirty_buffer(jh); } unlock_buffer(bh); error = -EROFS; if (is_handle_aborted(handle)) { jbd_unlock_bh_state(bh); goto out; } error = 0; /* * The buffer is already part of this transaction if b_transaction or * b_next_transaction points to it */ if (jh->b_transaction == transaction || jh->b_next_transaction == transaction) goto done; /* * If there is already a copy-out version of this buffer, then we don't * need to make another one */ if (jh->b_frozen_data) { JBUFFER_TRACE(jh, "has frozen data"); J_ASSERT_JH(jh, jh->b_next_transaction == NULL); jh->b_next_transaction = transaction; goto done; } /* Is there data here we need to preserve? */ if (jh->b_transaction && jh->b_transaction != transaction) { JBUFFER_TRACE(jh, "owned by older transaction"); J_ASSERT_JH(jh, jh->b_next_transaction == NULL); J_ASSERT_JH(jh, jh->b_transaction == journal->j_committing_transaction); /* There is one case we have to be very careful about. * If the committing transaction is currently writing * this buffer out to disk and has NOT made a copy-out, * then we cannot modify the buffer contents at all * right now. The essence of copy-out is that it is the * extra copy, not the primary copy, which gets * journaled. If the primary copy is already going to * disk then we cannot do copy-out here. */ if (jh->b_jlist == BJ_Shadow) { DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow); wait_queue_head_t *wqh; wqh = bit_waitqueue(&bh->b_state, BH_Unshadow); JBUFFER_TRACE(jh, "on shadow: sleep"); jbd_unlock_bh_state(bh); /* commit wakes up all shadow buffers after IO */ for ( ; ; ) { prepare_to_wait(wqh, &wait.wait, TASK_UNINTERRUPTIBLE); if (jh->b_jlist != BJ_Shadow) break; schedule(); } finish_wait(wqh, &wait.wait); goto repeat; } /* Only do the copy if the currently-owning transaction * still needs it. If it is on the Forget list, the * committing transaction is past that stage. The * buffer had better remain locked during the kmalloc, * but that should be true --- we hold the journal lock * still and the buffer is already on the BUF_JOURNAL * list so won't be flushed. * * Subtle point, though: if this is a get_undo_access, * then we will be relying on the frozen_data to contain * the new value of the committed_data record after the * transaction, so we HAVE to force the frozen_data copy * in that case. */ if (jh->b_jlist != BJ_Forget || force_copy) { JBUFFER_TRACE(jh, "generate frozen data"); if (!frozen_buffer) { JBUFFER_TRACE(jh, "allocate memory for buffer"); jbd_unlock_bh_state(bh); frozen_buffer = jbd_alloc(jh2bh(jh)->b_size, GFP_NOFS); if (!frozen_buffer) { printk(KERN_EMERG "%s: OOM for frozen_buffer\n", __FUNCTION__); JBUFFER_TRACE(jh, "oom!"); error = -ENOMEM; jbd_lock_bh_state(bh); goto done; } goto repeat; } jh->b_frozen_data = frozen_buffer; frozen_buffer = NULL; need_copy = 1; } jh->b_next_transaction = transaction; } /* * Finally, if the buffer is not journaled right now, we need to make * sure it doesn't get written to disk before the caller actually * commits the new data */ if (!jh->b_transaction) { JBUFFER_TRACE(jh, "no transaction"); J_ASSERT_JH(jh, !jh->b_next_transaction); jh->b_transaction = transaction; JBUFFER_TRACE(jh, "file as BJ_Reserved"); spin_lock(&journal->j_list_lock); __journal_file_buffer(jh, transaction, BJ_Reserved); spin_unlock(&journal->j_list_lock); }done: if (need_copy) { struct page *page; int offset; char *source; J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)), "Possible IO failure.\n"); page = jh2bh(jh)->b_page; offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK; source = kmap_atomic(page, KM_USER0); memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); kunmap_atomic(source, KM_USER0); } jbd_unlock_bh_state(bh); /* * If we are about to journal a buffer, then any revoke pending on it is * no longer valid */ journal_cancel_revoke(handle, jh);out: if (unlikely(frozen_buffer)) /* It's usually NULL */ jbd_free(frozen_buffer, bh->b_size); JBUFFER_TRACE(jh, "exit"); return error;}/** * int journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. * @handle: transaction to add buffer modifications to * @bh: bh to be used for metadata writes * @credits: variable that will receive credits for the buffer * * Returns an error code or 0 on success. * * In full data journalling mode the buffer may be of type BJ_AsyncData, * because we're write()ing a buffer which is also part of a shared mapping. */int journal_get_write_access(handle_t *handle, struct buffer_head *bh){ struct journal_head *jh = journal_add_journal_head(bh); int rc; /* We do not want to get caught playing with fields which the * log thread also manipulates. Make sure that the buffer * completes any outstanding IO before proceeding. */ rc = do_get_write_access(handle, jh, 0); journal_put_journal_head(jh); return rc;}/* * When the user wants to journal a newly created buffer_head * (ie. getblk() returned a new buffer and we are going to populate it * manually rather than reading off disk), then we need to keep the * buffer_head locked until it has been completely filled with new * data. In this case, we should be able to make the assertion that * the bh is not already part of an existing transaction. * * The buffer should already be locked by the caller by this point. * There is no lock ranking violation: it was a newly created, * unlocked buffer beforehand. *//** * int journal_get_create_access () - notify intent to use newly created bh * @handle: transaction to new buffer to * @bh: new buffer. * * Call this if you create a new bh. */int journal_get_create_access(handle_t *handle, struct buffer_head *bh){ transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; struct journal_head *jh = journal_add_journal_head(bh); int err; jbd_debug(5, "journal_head %p\n", jh); err = -EROFS; if (is_handle_aborted(handle)) goto out; err = 0; JBUFFER_TRACE(jh, "entry"); /* * The buffer may already belong to this transaction due to pre-zeroing * in the filesystem's new_block code. It may also be on the previous, * committing transaction's lists, but it HAS to be in Forget state in * that case: the transaction must have deleted the buffer for it to be * reused here. */ jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); J_ASSERT_JH(jh, (jh->b_transaction == transaction || jh->b_transaction == NULL || (jh->b_transaction == journal->j_committing_transaction && jh->b_jlist == BJ_Forget))); J_ASSERT_JH(jh, jh->b_next_transaction == NULL); J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); if (jh->b_transaction == NULL) { jh->b_transaction = transaction; JBUFFER_TRACE(jh, "file as BJ_Reserved"); __journal_file_buffer(jh, transaction, BJ_Reserved); } else if (jh->b_transaction == journal->j_committing_transaction) { JBUFFER_TRACE(jh, "set next transaction"); jh->b_next_transaction = transaction; } spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); /* * akpm: I added this. ext3_alloc_branch can pick up new indirect * blocks which contain freed but then revoked metadata. We need * to cancel the revoke in case we end up freeing it yet again * and the reallocating as data - this would cause a second revoke, * which hits an assertion error. */ JBUFFER_TRACE(jh, "cancelling revoke"); journal_cancel_revoke(handle, jh); journal_put_journal_head(jh);out: return err;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -