📄 commit.c
字号:
/* * linux/fs/jbd2/commit.c * * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 * * Copyright 1998 Red Hat corp --- All Rights Reserved * * This file is part of the Linux kernel and is made available under * the terms of the GNU General Public License, version 2, or at your * option, any later version, incorporated herein by reference. * * Journal commit routines for the generic filesystem journaling code; * part of the ext2fs journaling system. */#include <linux/time.h>#include <linux/fs.h>#include <linux/jbd2.h>#include <linux/errno.h>#include <linux/slab.h>#include <linux/mm.h>#include <linux/pagemap.h>/* * Default IO end handler for temporary BJ_IO buffer_heads. */static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate){ BUFFER_TRACE(bh, ""); if (uptodate) set_buffer_uptodate(bh); else clear_buffer_uptodate(bh); unlock_buffer(bh);}/* * When an ext3-ordered file is truncated, it is possible that many pages are * not sucessfully freed, because they are attached to a committing transaction. * After the transaction commits, these pages are left on the LRU, with no * ->mapping, and with attached buffers. These pages are trivially reclaimable * by the VM, but their apparent absence upsets the VM accounting, and it makes * the numbers in /proc/meminfo look odd. * * So here, we have a buffer which has just come off the forget list. Look to * see if we can strip all buffers from the backing page. * * Called under lock_journal(), and possibly under journal_datalist_lock. The * caller provided us with a ref against the buffer, and we drop that here. */static void release_buffer_page(struct buffer_head *bh){ struct page *page; if (buffer_dirty(bh)) goto nope; if (atomic_read(&bh->b_count) != 1) goto nope; page = bh->b_page; if (!page) goto nope; if (page->mapping) goto nope; /* OK, it's a truncated page */ if (TestSetPageLocked(page)) goto nope; page_cache_get(page); __brelse(bh); try_to_free_buffers(page); unlock_page(page); page_cache_release(page); return;nope: __brelse(bh);}/* * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is * held. For ranking reasons we must trylock. If we lose, schedule away and * return 0. j_list_lock is dropped in this case. */static int inverted_lock(journal_t *journal, struct buffer_head *bh){ if (!jbd_trylock_bh_state(bh)) { spin_unlock(&journal->j_list_lock); schedule(); return 0; } return 1;}/* Done it all: now write the commit record. We should have * cleaned up our previous buffers by now, so if we are in abort * mode we can now just skip the rest of the journal write * entirely. * * Returns 1 if the journal needs to be aborted or 0 on success */static int journal_write_commit_record(journal_t *journal, transaction_t *commit_transaction){ struct journal_head *descriptor; struct buffer_head *bh; int i, ret; int barrier_done = 0; if (is_journal_aborted(journal)) return 0; descriptor = jbd2_journal_get_descriptor_buffer(journal); if (!descriptor) return 1; bh = jh2bh(descriptor); /* AKPM: buglet - add `i' to tmp! */ for (i = 0; i < bh->b_size; i += 512) { journal_header_t *tmp = (journal_header_t*)bh->b_data; tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); } JBUFFER_TRACE(descriptor, "write commit block"); set_buffer_dirty(bh); if (journal->j_flags & JBD2_BARRIER) { set_buffer_ordered(bh); barrier_done = 1; } ret = sync_dirty_buffer(bh); /* is it possible for another commit to fail at roughly * the same time as this one? If so, we don't want to * trust the barrier flag in the super, but instead want * to remember if we sent a barrier request */ if (ret == -EOPNOTSUPP && barrier_done) { char b[BDEVNAME_SIZE]; printk(KERN_WARNING "JBD: barrier-based sync failed on %s - " "disabling barriers\n", bdevname(journal->j_dev, b)); spin_lock(&journal->j_state_lock); journal->j_flags &= ~JBD2_BARRIER; spin_unlock(&journal->j_state_lock); /* And try again, without the barrier */ clear_buffer_ordered(bh); set_buffer_uptodate(bh); set_buffer_dirty(bh); ret = sync_dirty_buffer(bh); } put_bh(bh); /* One for getblk() */ jbd2_journal_put_journal_head(descriptor); return (ret == -EIO);}static void journal_do_submit_data(struct buffer_head **wbuf, int bufs){ int i; for (i = 0; i < bufs; i++) { wbuf[i]->b_end_io = end_buffer_write_sync; /* We use-up our safety reference in submit_bh() */ submit_bh(WRITE, wbuf[i]); }}/* * Submit all the data buffers to disk */static void journal_submit_data_buffers(journal_t *journal, transaction_t *commit_transaction){ struct journal_head *jh; struct buffer_head *bh; int locked; int bufs = 0; struct buffer_head **wbuf = journal->j_wbuf; /* * Whenever we unlock the journal and sleep, things can get added * onto ->t_sync_datalist, so we have to keep looping back to * write_out_data until we *know* that the list is empty. * * Cleanup any flushed data buffers from the data list. Even in * abort mode, we want to flush this out as soon as possible. */write_out_data: cond_resched(); spin_lock(&journal->j_list_lock); while (commit_transaction->t_sync_datalist) { jh = commit_transaction->t_sync_datalist; bh = jh2bh(jh); locked = 0; /* Get reference just to make sure buffer does not disappear * when we are forced to drop various locks */ get_bh(bh); /* If the buffer is dirty, we need to submit IO and hence * we need the buffer lock. We try to lock the buffer without * blocking. If we fail, we need to drop j_list_lock and do * blocking lock_buffer(). */ if (buffer_dirty(bh)) { if (test_set_buffer_locked(bh)) { BUFFER_TRACE(bh, "needs blocking lock"); spin_unlock(&journal->j_list_lock); /* Write out all data to prevent deadlocks */ journal_do_submit_data(wbuf, bufs); bufs = 0; lock_buffer(bh); spin_lock(&journal->j_list_lock); } locked = 1; } /* We have to get bh_state lock. Again out of order, sigh. */ if (!inverted_lock(journal, bh)) { jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); } /* Someone already cleaned up the buffer? */ if (!buffer_jbd(bh) || jh->b_transaction != commit_transaction || jh->b_jlist != BJ_SyncData) { jbd_unlock_bh_state(bh); if (locked) unlock_buffer(bh); BUFFER_TRACE(bh, "already cleaned up"); put_bh(bh); continue; } if (locked && test_clear_buffer_dirty(bh)) { BUFFER_TRACE(bh, "needs writeout, adding to array"); wbuf[bufs++] = bh; __jbd2_journal_file_buffer(jh, commit_transaction, BJ_Locked); jbd_unlock_bh_state(bh); if (bufs == journal->j_wbufsize) { spin_unlock(&journal->j_list_lock); journal_do_submit_data(wbuf, bufs); bufs = 0; goto write_out_data; } } else if (!locked && buffer_locked(bh)) { __jbd2_journal_file_buffer(jh, commit_transaction, BJ_Locked); jbd_unlock_bh_state(bh); put_bh(bh); } else { BUFFER_TRACE(bh, "writeout complete: unfile"); __jbd2_journal_unfile_buffer(jh); jbd_unlock_bh_state(bh); if (locked) unlock_buffer(bh); jbd2_journal_remove_journal_head(bh); /* Once for our safety reference, once for * jbd2_journal_remove_journal_head() */ put_bh(bh); put_bh(bh); } if (lock_need_resched(&journal->j_list_lock)) { spin_unlock(&journal->j_list_lock); goto write_out_data; } } spin_unlock(&journal->j_list_lock); journal_do_submit_data(wbuf, bufs);}static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag, unsigned long long block){ tag->t_blocknr = cpu_to_be32(block & (u32)~0); if (tag_bytes > JBD2_TAG_SIZE32) tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);}/* * jbd2_journal_commit_transaction * * The primary function for committing a transaction to the log. This * function is called by the journal thread to begin a complete commit. */void jbd2_journal_commit_transaction(journal_t *journal){ transaction_t *commit_transaction; struct journal_head *jh, *new_jh, *descriptor; struct buffer_head **wbuf = journal->j_wbuf; int bufs; int flags; int err; unsigned long long blocknr; char *tagp = NULL; journal_header_t *header; journal_block_tag_t *tag = NULL; int space_left = 0; int first_tag = 0; int tag_flag; int i; int tag_bytes = journal_tag_bytes(journal); /* * First job: lock down the current transaction and wait for * all outstanding updates to complete. */#ifdef COMMIT_STATS spin_lock(&journal->j_list_lock); summarise_journal_usage(journal); spin_unlock(&journal->j_list_lock);#endif /* Do we need to erase the effects of a prior jbd2_journal_flush? */ if (journal->j_flags & JBD2_FLUSHED) { jbd_debug(3, "super block updated\n"); jbd2_journal_update_superblock(journal, 1); } else { jbd_debug(3, "superblock not updated\n"); } J_ASSERT(journal->j_running_transaction != NULL); J_ASSERT(journal->j_committing_transaction == NULL); commit_transaction = journal->j_running_transaction; J_ASSERT(commit_transaction->t_state == T_RUNNING); jbd_debug(1, "JBD: starting commit of transaction %d\n", commit_transaction->t_tid); spin_lock(&journal->j_state_lock); commit_transaction->t_state = T_LOCKED; spin_lock(&commit_transaction->t_handle_lock); while (commit_transaction->t_updates) { DEFINE_WAIT(wait); prepare_to_wait(&journal->j_wait_updates, &wait, TASK_UNINTERRUPTIBLE); if (commit_transaction->t_updates) { spin_unlock(&commit_transaction->t_handle_lock); spin_unlock(&journal->j_state_lock); schedule(); spin_lock(&journal->j_state_lock); spin_lock(&commit_transaction->t_handle_lock); } finish_wait(&journal->j_wait_updates, &wait); } spin_unlock(&commit_transaction->t_handle_lock); J_ASSERT (commit_transaction->t_outstanding_credits <= journal->j_max_transaction_buffers); /* * First thing we are allowed to do is to discard any remaining * BJ_Reserved buffers. Note, it is _not_ permissible to assume * that there are no such buffers: if a large filesystem * operation like a truncate needs to split itself over multiple * transactions, then it may try to do a jbd2_journal_restart() while * there are still BJ_Reserved buffers outstanding. These must * be released cleanly from the current transaction. * * In this case, the filesystem must still reserve write access * again before modifying the buffer in the new transaction, but * we do not require it to remember exactly which old buffers it * has reserved. This is consistent with the existing behaviour * that multiple jbd2_journal_get_write_access() calls to the same * buffer are perfectly permissable. */ while (commit_transaction->t_reserved_list) { jh = commit_transaction->t_reserved_list; JBUFFER_TRACE(jh, "reserved, unused: refile"); /* * A jbd2_journal_get_undo_access()+jbd2_journal_release_buffer() may * leave undo-committed data. */ if (jh->b_committed_data) { struct buffer_head *bh = jh2bh(jh); jbd_lock_bh_state(bh); jbd2_free(jh->b_committed_data, bh->b_size); jh->b_committed_data = NULL; jbd_unlock_bh_state(bh); } jbd2_journal_refile_buffer(journal, jh); } /* * Now try to drop any written-back buffers from the journal's * checkpoint lists. We do this *before* commit because it potentially * frees some memory */ spin_lock(&journal->j_list_lock); __jbd2_journal_clean_checkpoint_list(journal); spin_unlock(&journal->j_list_lock); jbd_debug (3, "JBD: commit phase 1\n"); /* * Switch to a new revoke table. */ jbd2_journal_switch_revoke_table(journal); commit_transaction->t_state = T_FLUSH; journal->j_committing_transaction = commit_transaction; journal->j_running_transaction = NULL; commit_transaction->t_log_start = journal->j_head; wake_up(&journal->j_wait_transaction_locked); spin_unlock(&journal->j_state_lock); jbd_debug (3, "JBD: commit phase 2\n"); /* * First, drop modified flag: all accesses to the buffers * will be tracked for a new trasaction only -bzzz */ spin_lock(&journal->j_list_lock); if (commit_transaction->t_buffers) { new_jh = jh = commit_transaction->t_buffers->b_tnext; do { J_ASSERT_JH(new_jh, new_jh->b_modified == 1 || new_jh->b_modified == 0); new_jh->b_modified = 0; new_jh = new_jh->b_tnext; } while (new_jh != jh); } spin_unlock(&journal->j_list_lock); /* * Now start flushing things to disk, in the order they appear * on the transaction lists. Data blocks go first. */ err = 0; journal_submit_data_buffers(journal, commit_transaction); /* * Wait for all previously submitted IO to complete. */ spin_lock(&journal->j_list_lock); while (commit_transaction->t_locked_list) { struct buffer_head *bh; jh = commit_transaction->t_locked_list->b_tprev; bh = jh2bh(jh); get_bh(bh); if (buffer_locked(bh)) { spin_unlock(&journal->j_list_lock); wait_on_buffer(bh); if (unlikely(!buffer_uptodate(bh))) err = -EIO; spin_lock(&journal->j_list_lock); } if (!inverted_lock(journal, bh)) { put_bh(bh); spin_lock(&journal->j_list_lock); continue; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -