📄 journal.c
字号:
mlog(0, "Returned from journal_init_inode\n"); mlog(0, "j_journal->j_maxlen = %u\n", j_journal->j_maxlen); *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) & OCFS2_JOURNAL_DIRTY_FL); journal->j_journal = j_journal; journal->j_inode = inode; journal->j_bh = bh; ocfs2_set_journal_params(osb); journal->j_state = OCFS2_JOURNAL_LOADED; status = 0;done: if (status < 0) { if (meta_lock) ocfs2_meta_unlock(inode, 1); if (bh != NULL) brelse(bh); if (inode) { OCFS2_I(inode)->ip_open_count--; iput(inode); } } mlog_exit(status); return status;}static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, int dirty){ int status; unsigned int flags; struct ocfs2_journal *journal = osb->journal; struct buffer_head *bh = journal->j_bh; struct ocfs2_dinode *fe; mlog_entry_void(); fe = (struct ocfs2_dinode *)bh->b_data; if (!OCFS2_IS_VALID_DINODE(fe)) { /* This is called from startup/shutdown which will * handle the errors in a specific manner, so no need * to call ocfs2_error() here. */ mlog(ML_ERROR, "Journal dinode %"MLFu64" has invalid " "signature: %.*s", fe->i_blkno, 7, fe->i_signature); status = -EIO; goto out; } flags = le32_to_cpu(fe->id1.journal1.ij_flags); if (dirty) flags |= OCFS2_JOURNAL_DIRTY_FL; else flags &= ~OCFS2_JOURNAL_DIRTY_FL; fe->id1.journal1.ij_flags = cpu_to_le32(flags); status = ocfs2_write_block(osb, bh, journal->j_inode); if (status < 0) mlog_errno(status);out: mlog_exit(status); return status;}/* * If the journal has been kmalloc'd it needs to be freed after this * call. */void ocfs2_journal_shutdown(struct ocfs2_super *osb){ struct ocfs2_journal *journal = NULL; int status = 0; struct inode *inode = NULL; int num_running_trans = 0; mlog_entry_void(); if (!osb) BUG(); journal = osb->journal; if (!journal) goto done; inode = journal->j_inode; if (journal->j_state != OCFS2_JOURNAL_LOADED) goto done; /* need to inc inode use count as journal_destroy will iput. */ if (!igrab(inode)) BUG(); num_running_trans = atomic_read(&(osb->journal->j_num_trans)); if (num_running_trans > 0) mlog(0, "Shutting down journal: must wait on %d " "running transactions!\n", num_running_trans); /* Do a commit_cache here. It will flush our journal, *and* * release any locks that are still held. * set the SHUTDOWN flag and release the trans lock. * the commit thread will take the trans lock for us below. */ journal->j_state = OCFS2_JOURNAL_IN_SHUTDOWN; /* The OCFS2_JOURNAL_IN_SHUTDOWN will signal to commit_cache to not * drop the trans_lock (which we want to hold until we * completely destroy the journal. */ if (osb->commit_task) { /* Wait for the commit thread */ mlog(0, "Waiting for ocfs2commit to exit....\n"); kthread_stop(osb->commit_task); osb->commit_task = NULL; } BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0); if (ocfs2_mount_local(osb)) { journal_lock_updates(journal->j_journal); status = journal_flush(journal->j_journal); journal_unlock_updates(journal->j_journal); if (status < 0) mlog_errno(status); } if (status == 0) { /* * Do not toggle if flush was unsuccessful otherwise * will leave dirty metadata in a "clean" journal */ status = ocfs2_journal_toggle_dirty(osb, 0); if (status < 0) mlog_errno(status); } /* Shutdown the kernel journal system */ journal_destroy(journal->j_journal); OCFS2_I(inode)->ip_open_count--; /* unlock our journal */ ocfs2_meta_unlock(inode, 1); brelse(journal->j_bh); journal->j_bh = NULL; journal->j_state = OCFS2_JOURNAL_FREE;// up_write(&journal->j_trans_barrier);done: if (inode) iput(inode); mlog_exit_void();}static void ocfs2_clear_journal_error(struct super_block *sb, journal_t *journal, int slot){ int olderr; olderr = journal_errno(journal); if (olderr) { mlog(ML_ERROR, "File system error %d recorded in " "journal %u.\n", olderr, slot); mlog(ML_ERROR, "File system on device %s needs checking.\n", sb->s_id); journal_ack_err(journal); journal_clear_err(journal); }}int ocfs2_journal_load(struct ocfs2_journal *journal, int local){ int status = 0; struct ocfs2_super *osb; mlog_entry_void(); if (!journal) BUG(); osb = journal->j_osb; status = journal_load(journal->j_journal); if (status < 0) { mlog(ML_ERROR, "Failed to load journal!\n"); goto done; } ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num); status = ocfs2_journal_toggle_dirty(osb, 1); if (status < 0) { mlog_errno(status); goto done; } /* Launch the commit thread */ if (!local) { osb->commit_task = kthread_run(ocfs2_commit_thread, osb, "ocfs2cmt-%d", osb->osb_id); if (IS_ERR(osb->commit_task)) { status = PTR_ERR(osb->commit_task); osb->commit_task = NULL; mlog(ML_ERROR, "unable to launch ocfs2commit thread, " "error=%d", status); goto done; } } else osb->commit_task = NULL;done: mlog_exit(status); return status;}/* 'full' flag tells us whether we clear out all blocks or if we just * mark the journal clean */int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full){ int status; mlog_entry_void(); if (!journal) BUG(); status = journal_wipe(journal->j_journal, full); if (status < 0) { mlog_errno(status); goto bail; } status = ocfs2_journal_toggle_dirty(journal->j_osb, 0); if (status < 0) mlog_errno(status);bail: mlog_exit(status); return status;}/* * JBD Might read a cached version of another nodes journal file. We * don't want this as this file changes often and we get no * notification on those changes. The only way to be sure that we've * got the most up to date version of those blocks then is to force * read them off disk. Just searching through the buffer cache won't * work as there may be pages backing this file which are still marked * up to date. We know things can't change on this file underneath us * as we have the lock by now :) */static int ocfs2_force_read_journal(struct inode *inode){ int status = 0; int i, p_blocks; u64 v_blkno, p_blkno;#define CONCURRENT_JOURNAL_FILL 32 struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL]; mlog_entry_void(); BUG_ON(inode->i_blocks != ocfs2_align_bytes_to_sectors(i_size_read(inode))); memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL); mlog(0, "Force reading %lu blocks\n", (unsigned long)(inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9))); v_blkno = 0; while (v_blkno < (inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9))) { status = ocfs2_extent_map_get_blocks(inode, v_blkno, 1, &p_blkno, &p_blocks); if (status < 0) { mlog_errno(status); goto bail; } if (p_blocks > CONCURRENT_JOURNAL_FILL) p_blocks = CONCURRENT_JOURNAL_FILL; /* We are reading journal data which should not * be put in the uptodate cache */ status = ocfs2_read_blocks(OCFS2_SB(inode->i_sb), p_blkno, p_blocks, bhs, 0, NULL); if (status < 0) { mlog_errno(status); goto bail; } for(i = 0; i < p_blocks; i++) { brelse(bhs[i]); bhs[i] = NULL; } v_blkno += p_blocks; }bail: for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++) if (bhs[i]) brelse(bhs[i]); mlog_exit(status); return status;}struct ocfs2_la_recovery_item { struct list_head lri_list; int lri_slot; struct ocfs2_dinode *lri_la_dinode; struct ocfs2_dinode *lri_tl_dinode; int lri_node_num;};/* Does the second half of the recovery process. By this point, the * node is marked clean and can actually be considered recovered, * hence it's no longer in the recovery map, but there's still some * cleanup we can do which shouldn't happen within the recovery thread * as locking in that context becomes very difficult if we are to take * recovering nodes into account. * * NOTE: This function can and will sleep on recovery of other nodes * during cluster locking, just like any other ocfs2 process. */void ocfs2_complete_recovery(void *data){ int ret; struct ocfs2_super *osb = data; struct ocfs2_journal *journal = osb->journal; struct ocfs2_dinode *la_dinode, *tl_dinode; struct ocfs2_la_recovery_item *item; struct list_head *p, *n; LIST_HEAD(tmp_la_list); mlog_entry_void(); mlog(0, "completing recovery from keventd\n"); spin_lock(&journal->j_lock); list_splice_init(&journal->j_la_cleanups, &tmp_la_list); spin_unlock(&journal->j_lock); list_for_each_safe(p, n, &tmp_la_list) { item = list_entry(p, struct ocfs2_la_recovery_item, lri_list); list_del_init(&item->lri_list); mlog(0, "Complete recovery for slot %d\n", item->lri_slot); la_dinode = item->lri_la_dinode; if (la_dinode) { mlog(0, "Clean up local alloc %"MLFu64"\n", la_dinode->i_blkno); ret = ocfs2_complete_local_alloc_recovery(osb, la_dinode); if (ret < 0) mlog_errno(ret); kfree(la_dinode); } tl_dinode = item->lri_tl_dinode; if (tl_dinode) { mlog(0, "Clean up truncate log %"MLFu64"\n", tl_dinode->i_blkno); ret = ocfs2_complete_truncate_log_recovery(osb, tl_dinode); if (ret < 0) mlog_errno(ret); kfree(tl_dinode); } ret = ocfs2_recover_orphans(osb, item->lri_slot, item->lri_node_num); if (ret < 0) mlog_errno(ret); kfree(item); } mlog(0, "Recovery completion\n"); mlog_exit_void();}/* NOTE: This function always eats your references to la_dinode and * tl_dinode, either manually on error, or by passing them to * ocfs2_complete_recovery */static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, int slot_num, struct ocfs2_dinode *la_dinode, struct ocfs2_dinode *tl_dinode, int node_num){ struct ocfs2_la_recovery_item *item; item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS); if (!item) { /* Though we wish to avoid it, we are in fact safe in * skipping local alloc cleanup as fsck.ocfs2 is more * than capable of reclaiming unused space. */ if (la_dinode) kfree(la_dinode); if (tl_dinode) kfree(tl_dinode); mlog_errno(-ENOMEM); return; } INIT_LIST_HEAD(&item->lri_list); item->lri_la_dinode = la_dinode; item->lri_slot = slot_num; item->lri_node_num = node_num; item->lri_tl_dinode = tl_dinode; spin_lock(&journal->j_lock); list_add_tail(&item->lri_list, &journal->j_la_cleanups); queue_work(ocfs2_wq, &journal->j_recovery_work); spin_unlock(&journal->j_lock);}/* Called by the mount code to queue recovery the last part of * recovery for it's own slot. */void ocfs2_complete_mount_recovery(struct ocfs2_super *osb){ struct ocfs2_journal *journal = osb->journal; if (osb->dirty) { /* No need to queue up our truncate_log as regular * cleanup will catch that. */ ocfs2_queue_recovery_completion(journal, osb->slot_num, osb->local_alloc_copy, NULL, osb->node_num); ocfs2_schedule_truncate_log_flush(osb, 0); osb->local_alloc_copy = NULL; osb->dirty = 0; }}static int __ocfs2_recovery_thread(void *arg){ int status; int node_num = O2NM_INVALID_NODE_NUM; struct ocfs2_super *osb = arg; mlog_entry_void(); status = ocfs2_wait_on_mount(osb); if (status < 0) { goto bail; }restart: status = ocfs2_super_lock(osb, 1); if (status < 0) { mlog_errno(status); goto bail; } while(!ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { node_num = ocfs2_node_map_first_set_bit(osb, &osb->recovery_map); if (node_num == O2NM_INVALID_NODE_NUM) { mlog(0, "Out of nodes to recover.\n"); break; } status = ocfs2_recover_node(osb, node_num); if (status < 0) { mlog(ML_ERROR, "Error %d recovering node %d on device (%u,%u)!\n", status, node_num, MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); mlog(ML_ERROR, "Volume requires unmount.\n"); continue; } ocfs2_recovery_map_clear(osb, node_num); } ocfs2_super_unlock(osb, 1); /* We always run recovery on our own orphan dir - the dead * node(s) may have voted "no" on an inode delete earlier. A * revote is therefore required. */ ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, NULL, node_num);bail: down(&osb->recovery_lock); if (!status && !ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { up(&osb->recovery_lock); goto restart; } osb->recovery_thread_task = NULL; mb(); /* sync with ocfs2_recovery_thread_running */ wake_up(&osb->recovery_event); up(&osb->recovery_lock); mlog_exit(status); /* no one is callint kthread_stop() for us so the kthread() api * requires that we call do_exit(). And it isn't exported, but * complete_and_exit() seems to be a minimal wrapper around it. */ complete_and_exit(NULL, status); return status;}void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num){ mlog_entry("(node_num=%d, osb->node_num = %d)\n", node_num, osb->node_num); down(&osb->recovery_lock); if (osb->disable_recovery) goto out; /* People waiting on recovery will wait on * the recovery map to empty. */ if (!ocfs2_recovery_map_set(osb, node_num)) mlog(0, "node %d already be in recovery.\n", node_num); mlog(0, "starting recovery thread...\n"); if (osb->recovery_thread_task) goto out; osb->recovery_thread_task = kthread_run(__ocfs2_recovery_thread, osb, "ocfs2rec-%d", osb->osb_id); if (IS_ERR(osb->recovery_thread_task)) { mlog_errno((int)PTR_ERR(osb->recovery_thread_task)); osb->recovery_thread_task = NULL; }out: up(&osb->recovery_lock); wake_up(&osb->recovery_event); mlog_exit_void();}/* Does the actual journal replay and marks the journal inode as * clean. Will only replay if the journal inode is marked dirty. */static int ocfs2_replay_journal(struct ocfs2_super *osb, int node_num, int slot_num){ int status; int got_lock = 0; unsigned int flags; struct inode *inode = NULL; struct ocfs2_dinode *fe; journal_t *journal = NULL; struct buffer_head *bh = NULL; inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, slot_num); if (inode == NULL) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -