📄 journal.c
字号:
/* * linux/fs/jbd2/journal.c * * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 * * Copyright 1998 Red Hat corp --- All Rights Reserved * * This file is part of the Linux kernel and is made available under * the terms of the GNU General Public License, version 2, or at your * option, any later version, incorporated herein by reference. * * Generic filesystem journal-writing code; part of the ext2fs * journaling system. * * This file manages journals: areas of disk reserved for logging * transactional updates. This includes the kernel journaling thread * which is responsible for scheduling updates to the log. * * We do not actually manage the physical storage of the journal in this * file: that is left to a per-journal policy function, which allows us * to store the journal within a filesystem-specified area for ext2 * journaling (ext2 can use a reserved inode for storing the log). */#include <linux/module.h>#include <linux/time.h>#include <linux/fs.h>#include <linux/jbd2.h>#include <linux/errno.h>#include <linux/slab.h>#include <linux/init.h>#include <linux/mm.h>#include <linux/freezer.h>#include <linux/pagemap.h>#include <linux/kthread.h>#include <linux/poison.h>#include <linux/proc_fs.h>#include <linux/debugfs.h>#include <asm/uaccess.h>#include <asm/page.h>EXPORT_SYMBOL(jbd2_journal_start);EXPORT_SYMBOL(jbd2_journal_restart);EXPORT_SYMBOL(jbd2_journal_extend);EXPORT_SYMBOL(jbd2_journal_stop);EXPORT_SYMBOL(jbd2_journal_lock_updates);EXPORT_SYMBOL(jbd2_journal_unlock_updates);EXPORT_SYMBOL(jbd2_journal_get_write_access);EXPORT_SYMBOL(jbd2_journal_get_create_access);EXPORT_SYMBOL(jbd2_journal_get_undo_access);EXPORT_SYMBOL(jbd2_journal_dirty_data);EXPORT_SYMBOL(jbd2_journal_dirty_metadata);EXPORT_SYMBOL(jbd2_journal_release_buffer);EXPORT_SYMBOL(jbd2_journal_forget);#if 0EXPORT_SYMBOL(journal_sync_buffer);#endifEXPORT_SYMBOL(jbd2_journal_flush);EXPORT_SYMBOL(jbd2_journal_revoke);EXPORT_SYMBOL(jbd2_journal_init_dev);EXPORT_SYMBOL(jbd2_journal_init_inode);EXPORT_SYMBOL(jbd2_journal_update_format);EXPORT_SYMBOL(jbd2_journal_check_used_features);EXPORT_SYMBOL(jbd2_journal_check_available_features);EXPORT_SYMBOL(jbd2_journal_set_features);EXPORT_SYMBOL(jbd2_journal_create);EXPORT_SYMBOL(jbd2_journal_load);EXPORT_SYMBOL(jbd2_journal_destroy);EXPORT_SYMBOL(jbd2_journal_update_superblock);EXPORT_SYMBOL(jbd2_journal_abort);EXPORT_SYMBOL(jbd2_journal_errno);EXPORT_SYMBOL(jbd2_journal_ack_err);EXPORT_SYMBOL(jbd2_journal_clear_err);EXPORT_SYMBOL(jbd2_log_wait_commit);EXPORT_SYMBOL(jbd2_journal_start_commit);EXPORT_SYMBOL(jbd2_journal_force_commit_nested);EXPORT_SYMBOL(jbd2_journal_wipe);EXPORT_SYMBOL(jbd2_journal_blocks_per_page);EXPORT_SYMBOL(jbd2_journal_invalidatepage);EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);EXPORT_SYMBOL(jbd2_journal_force_commit);static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);static void __journal_abort_soft (journal_t *journal, int errno);/* * Helper function used to manage commit timeouts */static void commit_timeout(unsigned long __data){ struct task_struct * p = (struct task_struct *) __data; wake_up_process(p);}/* * kjournald2: The main thread function used to manage a logging device * journal. * * This kernel thread is responsible for two things: * * 1) COMMIT: Every so often we need to commit the current state of the * filesystem to disk. The journal thread is responsible for writing * all of the metadata buffers to disk. * * 2) CHECKPOINT: We cannot reuse a used section of the log file until all * of the data in that part of the log has been rewritten elsewhere on * the disk. Flushing these old buffers to reclaim space in the log is * known as checkpointing, and this thread is responsible for that job. */static int kjournald2(void *arg){ journal_t *journal = arg; transaction_t *transaction; /* * Set up an interval timer which can be used to trigger a commit wakeup * after the commit interval expires */ setup_timer(&journal->j_commit_timer, commit_timeout, (unsigned long)current); /* Record that the journal thread is running */ journal->j_task = current; wake_up(&journal->j_wait_done_commit); printk(KERN_INFO "kjournald2 starting. Commit interval %ld seconds\n", journal->j_commit_interval / HZ); /* * And now, wait forever for commit wakeup events. */ spin_lock(&journal->j_state_lock);loop: if (journal->j_flags & JBD2_UNMOUNT) goto end_loop; jbd_debug(1, "commit_sequence=%d, commit_request=%d\n", journal->j_commit_sequence, journal->j_commit_request); if (journal->j_commit_sequence != journal->j_commit_request) { jbd_debug(1, "OK, requests differ\n"); spin_unlock(&journal->j_state_lock); del_timer_sync(&journal->j_commit_timer); jbd2_journal_commit_transaction(journal); spin_lock(&journal->j_state_lock); goto loop; } wake_up(&journal->j_wait_done_commit); if (freezing(current)) { /* * The simpler the better. Flushing journal isn't a * good idea, because that depends on threads that may * be already stopped. */ jbd_debug(1, "Now suspending kjournald2\n"); spin_unlock(&journal->j_state_lock); refrigerator(); spin_lock(&journal->j_state_lock); } else { /* * We assume on resume that commits are already there, * so we don't sleep */ DEFINE_WAIT(wait); int should_sleep = 1; prepare_to_wait(&journal->j_wait_commit, &wait, TASK_INTERRUPTIBLE); if (journal->j_commit_sequence != journal->j_commit_request) should_sleep = 0; transaction = journal->j_running_transaction; if (transaction && time_after_eq(jiffies, transaction->t_expires)) should_sleep = 0; if (journal->j_flags & JBD2_UNMOUNT) should_sleep = 0; if (should_sleep) { spin_unlock(&journal->j_state_lock); schedule(); spin_lock(&journal->j_state_lock); } finish_wait(&journal->j_wait_commit, &wait); } jbd_debug(1, "kjournald2 wakes\n"); /* * Were we woken up by a commit wakeup event? */ transaction = journal->j_running_transaction; if (transaction && time_after_eq(jiffies, transaction->t_expires)) { journal->j_commit_request = transaction->t_tid; jbd_debug(1, "woke because of timeout\n"); } goto loop;end_loop: spin_unlock(&journal->j_state_lock); del_timer_sync(&journal->j_commit_timer); journal->j_task = NULL; wake_up(&journal->j_wait_done_commit); jbd_debug(1, "Journal thread exiting.\n"); return 0;}static int jbd2_journal_start_thread(journal_t *journal){ struct task_struct *t; t = kthread_run(kjournald2, journal, "kjournald2"); if (IS_ERR(t)) return PTR_ERR(t); wait_event(journal->j_wait_done_commit, journal->j_task != 0); return 0;}static void journal_kill_thread(journal_t *journal){ spin_lock(&journal->j_state_lock); journal->j_flags |= JBD2_UNMOUNT; while (journal->j_task) { wake_up(&journal->j_wait_commit); spin_unlock(&journal->j_state_lock); wait_event(journal->j_wait_done_commit, journal->j_task == 0); spin_lock(&journal->j_state_lock); } spin_unlock(&journal->j_state_lock);}/* * jbd2_journal_write_metadata_buffer: write a metadata buffer to the journal. * * Writes a metadata buffer to a given disk block. The actual IO is not * performed but a new buffer_head is constructed which labels the data * to be written with the correct destination disk block. * * Any magic-number escaping which needs to be done will cause a * copy-out here. If the buffer happens to start with the * JBD2_MAGIC_NUMBER, then we can't write it to the log directly: the * magic number is only written to the log for descripter blocks. In * this case, we copy the data and replace the first word with 0, and we * return a result code which indicates that this buffer needs to be * marked as an escaped buffer in the corresponding log descriptor * block. The missing word can then be restored when the block is read * during recovery. * * If the source buffer has already been modified by a new transaction * since we took the last commit snapshot, we use the frozen copy of * that data for IO. If we end up using the existing buffer_head's data * for the write, then we *have* to lock the buffer to prevent anyone * else from using and possibly modifying it while the IO is in * progress. * * The function returns a pointer to the buffer_heads to be used for IO. * * We assume that the journal has already been locked in this function. * * Return value: * <0: Error * >=0: Finished OK * * On success: * Bit 0 set == escape performed on the data * Bit 1 set == buffer copy-out performed (kfree the data after IO) */int jbd2_journal_write_metadata_buffer(transaction_t *transaction, struct journal_head *jh_in, struct journal_head **jh_out, unsigned long long blocknr){ int need_copy_out = 0; int done_copy_out = 0; int do_escape = 0; char *mapped_data; struct buffer_head *new_bh; struct journal_head *new_jh; struct page *new_page; unsigned int new_offset; struct buffer_head *bh_in = jh2bh(jh_in); /* * The buffer really shouldn't be locked: only the current committing * transaction is allowed to write it, so nobody else is allowed * to do any IO. * * akpm: except if we're journalling data, and write() output is * also part of a shared mapping, and another thread has * decided to launch a writepage() against this buffer. */ J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); /* * If a new transaction has already done a buffer copy-out, then * we use that version of the data for the commit. */ jbd_lock_bh_state(bh_in);repeat: if (jh_in->b_frozen_data) { done_copy_out = 1; new_page = virt_to_page(jh_in->b_frozen_data); new_offset = offset_in_page(jh_in->b_frozen_data); } else { new_page = jh2bh(jh_in)->b_page; new_offset = offset_in_page(jh2bh(jh_in)->b_data); } mapped_data = kmap_atomic(new_page, KM_USER0); /* * Check for escaping */ if (*((__be32 *)(mapped_data + new_offset)) == cpu_to_be32(JBD2_MAGIC_NUMBER)) { need_copy_out = 1; do_escape = 1; } kunmap_atomic(mapped_data, KM_USER0); /* * Do we need to do a data copy? */ if (need_copy_out && !done_copy_out) { char *tmp; jbd_unlock_bh_state(bh_in); tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); jbd_lock_bh_state(bh_in); if (jh_in->b_frozen_data) { jbd2_free(tmp, bh_in->b_size); goto repeat; } jh_in->b_frozen_data = tmp; mapped_data = kmap_atomic(new_page, KM_USER0); memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); kunmap_atomic(mapped_data, KM_USER0); new_page = virt_to_page(tmp); new_offset = offset_in_page(tmp); done_copy_out = 1; } /* * Did we need to do an escaping? Now we've done all the * copying, we can finally do so. */ if (do_escape) { mapped_data = kmap_atomic(new_page, KM_USER0); *((unsigned int *)(mapped_data + new_offset)) = 0; kunmap_atomic(mapped_data, KM_USER0); } /* keep subsequent assertions sane */ new_bh->b_state = 0; init_buffer(new_bh, NULL, NULL); atomic_set(&new_bh->b_count, 1); jbd_unlock_bh_state(bh_in); new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */ set_bh_page(new_bh, new_page, new_offset); new_jh->b_transaction = NULL; new_bh->b_size = jh2bh(jh_in)->b_size; new_bh->b_bdev = transaction->t_journal->j_dev; new_bh->b_blocknr = blocknr; set_buffer_mapped(new_bh); set_buffer_dirty(new_bh); *jh_out = new_jh; /* * The to-be-written buffer needs to get moved to the io queue, * and the original buffer whose contents we are shadowing or * copying is moved to the transaction's shadow queue. */ JBUFFER_TRACE(jh_in, "file as BJ_Shadow"); jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); JBUFFER_TRACE(new_jh, "file as BJ_IO"); jbd2_journal_file_buffer(new_jh, transaction, BJ_IO); return do_escape | (done_copy_out << 1);}/* * Allocation code for the journal file. Manage the space left in the * journal, so that we can begin checkpointing when appropriate. *//* * __jbd2_log_space_left: Return the number of free blocks left in the journal. * * Called with the journal already locked. * * Called under j_state_lock */int __jbd2_log_space_left(journal_t *journal){ int left = journal->j_free; assert_spin_locked(&journal->j_state_lock); /* * Be pessimistic here about the number of those free blocks which * might be required for log descriptor control blocks. */#define MIN_LOG_RESERVED_BLOCKS 32 /* Allow for rounding errors */ left -= MIN_LOG_RESERVED_BLOCKS; if (left <= 0) return 0; left -= (left >> 3); return left;}/* * Called under j_state_lock. Returns true if a transaction was started. */int __jbd2_log_start_commit(journal_t *journal, tid_t target){ /* * Are we already doing a recent enough commit? */ if (!tid_geq(journal->j_commit_request, target)) { /* * We want a new commit: OK, mark the request and wakup the * commit thread. We do _not_ do the commit ourselves. */ journal->j_commit_request = target; jbd_debug(1, "JBD: requesting commit %d/%d\n", journal->j_commit_request, journal->j_commit_sequence); wake_up(&journal->j_wait_commit); return 1; } return 0;}int jbd2_log_start_commit(journal_t *journal, tid_t tid){ int ret; spin_lock(&journal->j_state_lock); ret = __jbd2_log_start_commit(journal, tid); spin_unlock(&journal->j_state_lock); return ret;}/* * Force and wait upon a commit if the calling process is not within * transaction. This is used for forcing out undo-protected data which contains * bitmaps, when the fs is running out of space. * * We can only force the running transaction if we don't have an active handle; * otherwise, we will deadlock. * * Returns true if a transaction was started. */int jbd2_journal_force_commit_nested(journal_t *journal){ transaction_t *transaction = NULL; tid_t tid; spin_lock(&journal->j_state_lock); if (journal->j_running_transaction && !current->journal_info) { transaction = journal->j_running_transaction; __jbd2_log_start_commit(journal, transaction->t_tid); } else if (journal->j_committing_transaction) transaction = journal->j_committing_transaction; if (!transaction) { spin_unlock(&journal->j_state_lock); return 0; /* Nothing to retry */ } tid = transaction->t_tid; spin_unlock(&journal->j_state_lock); jbd2_log_wait_commit(journal, tid); return 1;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -