📄 replay.c
字号:
#include <linux/module.h>
#include <linux/time.h>
#include <linux/fs.h>
#include <linux/jbd.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/freezer.h>
#include <linux/pagemap.h>
#include <linux/kthread.h>
#include <linux/poison.h>
#include <linux/proc_fs.h>
#include <linux/debugfs.h>
/*
* Called under j_state_lock. Returns true if a transaction was started.
*/
int __log_start_commit(journal_t *journal, tid_t target)
{
/*
* Are we already doing a recent enough commit?
*/
if (!tid_geq(journal->j_commit_request, target)) {
/*
* We want a new commit: OK, mark the request and wakup the
* commit thread. We do _not_ do the commit ourselves.
*/
journal->j_commit_request = target;
jbd_debug(1, "JBD: requesting commit %d/%d\n",
journal->j_commit_request,
journal->j_commit_sequence);
wake_up(&journal->j_wait_commit);
return 1;
}
return 0;
}
int log_start_commit(journal_t *journal, tid_t tid)
{
int ret;
spin_lock(&journal->j_state_lock);
ret = __log_start_commit(journal, tid);
spin_unlock(&journal->j_state_lock);
return ret;
}
/*
* Journal abort has very specific semantics, which we describe
* for journal abort.
*
* Two internal function, which provide abort to te jbd layer
* itself are here.
*/
/*
* Quick version for internal journal use (doesn't lock the journal).
* Aborts hard --- we mark the abort as occurred, but do _nothing_ else,
* and don't attempt to make any other journal updates.
*/
static void __journal_abort_hard(journal_t *journal)
{
transaction_t *transaction;
if (journal->j_flags & JFS_ABORT)
return;
spin_lock(&journal->j_state_lock);
journal->j_flags |= JFS_ABORT;
transaction = journal->j_running_transaction;
if (transaction)
__log_start_commit(journal, transaction->t_tid);
spin_unlock(&journal->j_state_lock);
}
/* Soft abort: record the abort error status in the journal superblock,
* but don't do any other IO. */
static void __journal_abort_soft (journal_t *journal, int err)
{
if (journal->j_flags & JFS_ABORT)
return;
if (!journal->j_errno)
journal->j_errno = err;
__journal_abort_hard(journal);
if (err)
journal_update_superblock(journal, 1);
}
/**
* void journal_abort () - Shutdown the journal immediately.
* @journal: the journal to shutdown.
* @errno: an error number to record in the journal indicating
* the reason for the shutdown.
*
* Perform a complete, immediate shutdown of the ENTIRE
* journal (not of a single transaction). This operation cannot be
* undone without closing and reopening the journal.
*
* The journal_abort function is intended to support higher level error
* recovery mechanisms such as the ext2/ext3 remount-readonly error
* mode.
*
* Journal abort has very specific semantics. Any existing dirty,
* unjournaled buffers in the main filesystem will still be written to
* disk by bdflush, but the journaling mechanism will be suspended
* immediately and no further transaction commits will be honoured.
*
* Any dirty, journaled buffers will be written back to disk without
* hitting the journal. Atomicity cannot be guaranteed on an aborted
* filesystem, but we _do_ attempt to leave as much data as possible
* behind for fsck to use for cleanup.
*
* Any attempt to get a new transaction handle on a journal which is in
* ABORT state will just result in an -EROFS error return. A
* journal_stop on an existing handle will return -EIO if we have
* entered abort state during the update.
*
* Recursive transactions are not disturbed by journal abort until the
* final journal_stop, which will receive the -EIO error.
*
* Finally, the journal_abort call allows the caller to supply an errno
* which will be recorded (if possible) in the journal superblock. This
* allows a client to record failure conditions in the middle of a
* transaction without having to complete the transaction to record the
* failure to disk. ext3_error, for example, now uses this
* functionality.
*
* Errors which originate from within the journaling layer will NOT
* supply an errno; a null errno implies that absolutely no further
* writes are done to the journal (unless there are any already in
* progress).
*
*/
void journal_abort(journal_t *journal, int err)
{
__journal_abort_soft(journal, err);
}
/**
* int journal_errno () - returns the journal's error state.
* @journal: journal to examine.
*
* This is the errno numbet set with journal_abort(), the last
* time the journal was mounted - if the journal was stopped
* without calling abort this will be 0.
*
* If the journal has been aborted on this mount time -EROFS will
* be returned.
*/
int journal_errno(journal_t *journal)
{
int err;
spin_lock(&journal->j_state_lock);
if (journal->j_flags & JFS_ABORT)
err = -EROFS;
else
err = journal->j_errno;
spin_unlock(&journal->j_state_lock);
return err;
}
/**
* int journal_clear_err () - clears the journal's error state
* @journal: journal to act on.
*
* An error must be cleared or Acked to take a FS out of readonly
* mode.
*/
int journal_clear_err(journal_t *journal)
{
int err = 0;
spin_lock(&journal->j_state_lock);
if (journal->j_flags & JFS_ABORT)
err = -EROFS;
else
journal->j_errno = 0;
spin_unlock(&journal->j_state_lock);
return err;
}
/**
* void journal_ack_err() - Ack journal err.
* @journal: journal to act on.
*
* An error must be cleared or Acked to take a FS out of readonly
* mode.
*/
void journal_ack_err(journal_t *journal)
{
spin_lock(&journal->j_state_lock);
if (journal->j_errno)
journal->j_flags |= JFS_ACK_ERR;
spin_unlock(&journal->j_state_lock);
}
int journal_blocks_per_page(struct inode *inode)
{
return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
}
/*
* Journal_head storage management
*/
static struct kmem_cache *journal_head_cache = NULL;
#ifdef CONFIG_JBD_DEBUG
static atomic_t nr_journal_heads = ATOMIC_INIT(0);
#endif
static int journal_init_journal_head_cache(void)
{
int retval;
J_ASSERT(journal_head_cache == 0);
journal_head_cache = kmem_cache_create("journal_head",
sizeof(struct journal_head),
0, /* offset */
SLAB_TEMPORARY, /* flags */
NULL); /* ctor */
retval = 0;
if (journal_head_cache == 0) {
retval = -ENOMEM;
printk(KERN_EMERG "JBD: no memory for journal_head cache\n");
}
return retval;
}
static void journal_destroy_journal_head_cache(void)
{
J_ASSERT(journal_head_cache != NULL);
kmem_cache_destroy(journal_head_cache);
journal_head_cache = NULL;
}
/*
* journal_head splicing and dicing
*/
static struct journal_head *journal_alloc_journal_head(void)
{
struct journal_head *ret;
static unsigned long last_warning;
#ifdef CONFIG_JBD_DEBUG
atomic_inc(&nr_journal_heads);
#endif
ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
if (ret == NULL) {
jbd_debug(1, "out of memory for journal_head\n");
if (time_after(jiffies, last_warning + 5*HZ)) {
printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
__FUNCTION__);
last_warning = jiffies;
}
while (ret == NULL) {
yield();
ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
}
}
return ret;
}
static void journal_free_journal_head(struct journal_head *jh)
{
#ifdef CONFIG_JBD_DEBUG
atomic_dec(&nr_journal_heads);
memset(jh, JBD_POISON_FREE, sizeof(*jh));
#endif
kmem_cache_free(journal_head_cache, jh);
}
/*
* A journal_head is attached to a buffer_head whenever JBD has an
* interest in the buffer.
*
* Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit
* is set. This bit is tested in core kernel code where we need to take
* JBD-specific actions. Testing the zeroness of ->b_private is not reliable
* there.
*
* When a buffer has its BH_JBD bit set, its ->b_count is elevated by one.
*
* When a buffer has its BH_JBD bit set it is immune from being released by
* core kernel code, mainly via ->b_count.
*
* A journal_head may be detached from its buffer_head when the journal_head's
* b_transaction, b_cp_transaction and b_next_transaction pointers are NULL.
* Various places in JBD call journal_remove_journal_head() to indicate that the
* journal_head can be dropped if needed.
*
* Various places in the kernel want to attach a journal_head to a buffer_head
* _before_ attaching the journal_head to a transaction. To protect the
* journal_head in this situation, journal_add_journal_head elevates the
* journal_head's b_jcount refcount by one. The caller must call
* journal_put_journal_head() to undo this.
*
* So the typical usage would be:
*
* (Attach a journal_head if needed. Increments b_jcount)
* struct journal_head *jh = journal_add_journal_head(bh);
* ...
* jh->b_transaction = xxx;
* journal_put_journal_head(jh);
*
* Now, the journal_head's b_jcount is zero, but it is safe from being released
* because it has a non-zero b_transaction.
*/
/*
* Give a buffer_head a journal_head.
*
* Doesn't need the journal lock.
* May sleep.
*/
struct journal_head *journal_add_journal_head(struct buffer_head *bh)
{
struct journal_head *jh;
struct journal_head *new_jh = NULL;
repeat:
if (!buffer_jbd(bh)) {
new_jh = journal_alloc_journal_head();
memset(new_jh, 0, sizeof(*new_jh));
}
jbd_lock_bh_journal_head(bh);
if (buffer_jbd(bh)) {
jh = bh2jh(bh);
} else {
J_ASSERT_BH(bh,
(atomic_read(&bh->b_count) > 0) ||
(bh->b_page && bh->b_page->mapping));
if (!new_jh) {
jbd_unlock_bh_journal_head(bh);
goto repeat;
}
jh = new_jh;
new_jh = NULL; /* We consumed it */
set_buffer_jbd(bh);
bh->b_private = jh;
jh->b_bh = bh;
get_bh(bh);
BUFFER_TRACE(bh, "added journal_head");
}
jh->b_jcount++;
jbd_unlock_bh_journal_head(bh);
if (new_jh)
journal_free_journal_head(new_jh);
return bh->b_private;
}
/*
* Grab a ref against this buffer_head's journal_head. If it ended up not
* having a journal_head, return NULL
*/
struct journal_head *journal_grab_journal_head(struct buffer_head *bh)
{
struct journal_head *jh = NULL;
jbd_lock_bh_journal_head(bh);
if (buffer_jbd(bh)) {
jh = bh2jh(bh);
jh->b_jcount++;
}
jbd_unlock_bh_journal_head(bh);
return jh;
}
static void __journal_remove_journal_head(struct buffer_head *bh)
{
struct journal_head *jh = bh2jh(bh);
J_ASSERT_JH(jh, jh->b_jcount >= 0);
get_bh(bh);
if (jh->b_jcount == 0) {
if (jh->b_transaction == NULL &&
jh->b_next_transaction == NULL &&
jh->b_cp_transaction == NULL) {
J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
J_ASSERT_BH(bh, buffer_jbd(bh));
J_ASSERT_BH(bh, jh2bh(jh) == bh);
BUFFER_TRACE(bh, "remove journal_head");
if (jh->b_frozen_data) {
printk(KERN_WARNING "%s: freeing "
"b_frozen_data\n",
__FUNCTION__);
jbd_free(jh->b_frozen_data, bh->b_size);
}
if (jh->b_committed_data) {
printk(KERN_WARNING "%s: freeing "
"b_committed_data\n",
__FUNCTION__);
jbd_free(jh->b_committed_data, bh->b_size);
}
bh->b_private = NULL;
jh->b_bh = NULL; /* debug, really */
clear_buffer_jbd(bh);
__brelse(bh);
journal_free_journal_head(jh);
} else {
BUFFER_TRACE(bh, "journal_head was locked");
}
}
}
/*
* journal_remove_journal_head(): if the buffer isn't attached to a transaction
* and has a zero b_jcount then remove and release its journal_head. If we did
* see that the buffer is not used by any transaction we also "logically"
* decrement ->b_count.
*
* We in fact take an additional increment on ->b_count as a convenience,
* because the caller usually wants to do additional things with the bh
* after calling here.
* The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
* time. Once the caller has run __brelse(), the buffer is eligible for
* reaping by try_to_free_buffers().
*/
void journal_remove_journal_head(struct buffer_head *bh)
{
jbd_lock_bh_journal_head(bh);
__journal_remove_journal_head(bh);
jbd_unlock_bh_journal_head(bh);
}
/*
* Drop a reference on the passed journal_head. If it fell to zero then try to
* release the journal_head from the buffer_head.
*/
void journal_put_journal_head(struct journal_head *jh)
{
struct buffer_head *bh = jh2bh(jh);
jbd_lock_bh_journal_head(bh);
J_ASSERT_JH(jh, jh->b_jcount > 0);
--jh->b_jcount;
if (!jh->b_jcount && !jh->b_transaction) {
__journal_remove_journal_head(bh);
__brelse(bh);
}
jbd_unlock_bh_journal_head(bh);
}
/*
* Log buffer allocation routines:
*/
int journal_next_log_block(journal_t *journal, unsigned long *retp)
{
unsigned long blocknr;
spin_lock(&journal->j_state_lock);
J_ASSERT(journal->j_free > 1);
blocknr = journal->j_head;
journal->j_head++;
journal->j_free--;
if (journal->j_head == journal->j_last)
journal->j_head = journal->j_first;
spin_unlock(&journal->j_state_lock);
return journal_bmap(journal, blocknr, retp);
}
/*
* Conversion of logical to physical block numbers for the journal
*
* On external journals the journal blocks are identity-mapped, so
* this is a no-op. If needed, we can use j_blk_offset - everything is
* ready.
*/
int journal_bmap(journal_t *journal, unsigned long blocknr,
unsigned long *retp)
{
int err = 0;
unsigned long ret;
if (journal->j_inode) {
ret = (unsigned long)bmap(journal->j_inode, (sector_t)blocknr);
if (ret)
*retp = ret;
else {
printk(KERN_ALERT "%s: journal block not found "
"at offset %lu ...\n",
__FUNCTION__,
blocknr);
err = -EIO;
__journal_abort_soft(journal, err);
}
} else {
*retp = blocknr; /* +journal->j_blk_offset */
}
return err;
}
/*
* We play buffer_head aliasing tricks to write data/metadata blocks to
* the journal without copying their contents, but for journal
* descriptor blocks we do need to generate bona fide buffers.
*
* After the caller of journal_get_descriptor_buffer() has finished modifying
* the buffer's contents they really should run flush_dcache_page(bh->b_page).
* But we don't bother doing that, so there will be coherency problems with
* mmaps of blockdevs which hold live JBD-controlled filesystems.
*/
struct journal_head *journal_get_descriptor_buffer(journal_t *journal)
{
struct buffer_head *bh;
unsigned long blocknr;
int err;
err = journal_next_log_block(journal, &blocknr);
if (err)
return NULL;
bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
lock_buffer(bh);
memset(bh->b_data, 0, journal->j_blocksize);
set_buffer_uptodate(bh);
unlock_buffer(bh);
BUFFER_TRACE(bh, "return this buffer");
return journal_add_journal_head(bh);
}
/*
* Management for journal control blocks: functions to create and
* destroy journal_t structures, and to initialise and read existing
* journal blocks from disk. */
/* First: create and setup a journal_t object in memory. We initialise
* very few fields yet: that has to wait until we have created the
* journal structures from from scratch, or loaded them from disk. */
static journal_t * journal_init_common (void)
{
journal_t *journal;
int err;
journal = kzalloc(sizeof(*journal), GFP_KERNEL);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -