📄 journal.c
字号:
mlog(0, "Shutting down journal: must wait on %d " "running transactions!\n", num_running_trans); /* Do a commit_cache here. It will flush our journal, *and* * release any locks that are still held. * set the SHUTDOWN flag and release the trans lock. * the commit thread will take the trans lock for us below. */ journal->j_state = OCFS2_JOURNAL_IN_SHUTDOWN; /* The OCFS2_JOURNAL_IN_SHUTDOWN will signal to commit_cache to not * drop the trans_lock (which we want to hold until we * completely destroy the journal. */ if (osb->commit_task) { /* Wait for the commit thread */ mlog(0, "Waiting for ocfs2commit to exit....\n"); kthread_stop(osb->commit_task); osb->commit_task = NULL; } BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0); if (ocfs2_mount_local(osb)) { journal_lock_updates(journal->j_journal); status = journal_flush(journal->j_journal); journal_unlock_updates(journal->j_journal); if (status < 0) mlog_errno(status); } if (status == 0) { /* * Do not toggle if flush was unsuccessful otherwise * will leave dirty metadata in a "clean" journal */ status = ocfs2_journal_toggle_dirty(osb, 0); if (status < 0) mlog_errno(status); } /* Shutdown the kernel journal system */ journal_destroy(journal->j_journal); OCFS2_I(inode)->ip_open_count--; /* unlock our journal */ ocfs2_meta_unlock(inode, 1); brelse(journal->j_bh); journal->j_bh = NULL; journal->j_state = OCFS2_JOURNAL_FREE;// up_write(&journal->j_trans_barrier);done: if (inode) iput(inode); mlog_exit_void();}static void ocfs2_clear_journal_error(struct super_block *sb, journal_t *journal, int slot){ int olderr; olderr = journal_errno(journal); if (olderr) { mlog(ML_ERROR, "File system error %d recorded in " "journal %u.\n", olderr, slot); mlog(ML_ERROR, "File system on device %s needs checking.\n", sb->s_id); journal_ack_err(journal); journal_clear_err(journal); }}int ocfs2_journal_load(struct ocfs2_journal *journal, int local){ int status = 0; struct ocfs2_super *osb; mlog_entry_void(); if (!journal) BUG(); osb = journal->j_osb; status = journal_load(journal->j_journal); if (status < 0) { mlog(ML_ERROR, "Failed to load journal!\n"); goto done; } ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num); status = ocfs2_journal_toggle_dirty(osb, 1); if (status < 0) { mlog_errno(status); goto done; } /* Launch the commit thread */ if (!local) { osb->commit_task = kthread_run(ocfs2_commit_thread, osb, "ocfs2cmt"); if (IS_ERR(osb->commit_task)) { status = PTR_ERR(osb->commit_task); osb->commit_task = NULL; mlog(ML_ERROR, "unable to launch ocfs2commit thread, " "error=%d", status); goto done; } } else osb->commit_task = NULL;done: mlog_exit(status); return status;}/* 'full' flag tells us whether we clear out all blocks or if we just * mark the journal clean */int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full){ int status; mlog_entry_void(); BUG_ON(!journal); status = journal_wipe(journal->j_journal, full); if (status < 0) { mlog_errno(status); goto bail; } status = ocfs2_journal_toggle_dirty(journal->j_osb, 0); if (status < 0) mlog_errno(status);bail: mlog_exit(status); return status;}/* * JBD Might read a cached version of another nodes journal file. We * don't want this as this file changes often and we get no * notification on those changes. The only way to be sure that we've * got the most up to date version of those blocks then is to force * read them off disk. Just searching through the buffer cache won't * work as there may be pages backing this file which are still marked * up to date. We know things can't change on this file underneath us * as we have the lock by now :) */static int ocfs2_force_read_journal(struct inode *inode){ int status = 0; int i; u64 v_blkno, p_blkno, p_blocks, num_blocks;#define CONCURRENT_JOURNAL_FILL 32ULL struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL]; mlog_entry_void(); memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL); num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, inode->i_size); v_blkno = 0; while (v_blkno < num_blocks) { status = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, &p_blocks, NULL); if (status < 0) { mlog_errno(status); goto bail; } if (p_blocks > CONCURRENT_JOURNAL_FILL) p_blocks = CONCURRENT_JOURNAL_FILL; /* We are reading journal data which should not * be put in the uptodate cache */ status = ocfs2_read_blocks(OCFS2_SB(inode->i_sb), p_blkno, p_blocks, bhs, 0, NULL); if (status < 0) { mlog_errno(status); goto bail; } for(i = 0; i < p_blocks; i++) { brelse(bhs[i]); bhs[i] = NULL; } v_blkno += p_blocks; }bail: for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++) if (bhs[i]) brelse(bhs[i]); mlog_exit(status); return status;}struct ocfs2_la_recovery_item { struct list_head lri_list; int lri_slot; struct ocfs2_dinode *lri_la_dinode; struct ocfs2_dinode *lri_tl_dinode;};/* Does the second half of the recovery process. By this point, the * node is marked clean and can actually be considered recovered, * hence it's no longer in the recovery map, but there's still some * cleanup we can do which shouldn't happen within the recovery thread * as locking in that context becomes very difficult if we are to take * recovering nodes into account. * * NOTE: This function can and will sleep on recovery of other nodes * during cluster locking, just like any other ocfs2 process. */void ocfs2_complete_recovery(struct work_struct *work){ int ret; struct ocfs2_journal *journal = container_of(work, struct ocfs2_journal, j_recovery_work); struct ocfs2_super *osb = journal->j_osb; struct ocfs2_dinode *la_dinode, *tl_dinode; struct ocfs2_la_recovery_item *item, *n; LIST_HEAD(tmp_la_list); mlog_entry_void(); mlog(0, "completing recovery from keventd\n"); spin_lock(&journal->j_lock); list_splice_init(&journal->j_la_cleanups, &tmp_la_list); spin_unlock(&journal->j_lock); list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) { list_del_init(&item->lri_list); mlog(0, "Complete recovery for slot %d\n", item->lri_slot); la_dinode = item->lri_la_dinode; if (la_dinode) { mlog(0, "Clean up local alloc %llu\n", (unsigned long long)le64_to_cpu(la_dinode->i_blkno)); ret = ocfs2_complete_local_alloc_recovery(osb, la_dinode); if (ret < 0) mlog_errno(ret); kfree(la_dinode); } tl_dinode = item->lri_tl_dinode; if (tl_dinode) { mlog(0, "Clean up truncate log %llu\n", (unsigned long long)le64_to_cpu(tl_dinode->i_blkno)); ret = ocfs2_complete_truncate_log_recovery(osb, tl_dinode); if (ret < 0) mlog_errno(ret); kfree(tl_dinode); } ret = ocfs2_recover_orphans(osb, item->lri_slot); if (ret < 0) mlog_errno(ret); kfree(item); } mlog(0, "Recovery completion\n"); mlog_exit_void();}/* NOTE: This function always eats your references to la_dinode and * tl_dinode, either manually on error, or by passing them to * ocfs2_complete_recovery */static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, int slot_num, struct ocfs2_dinode *la_dinode, struct ocfs2_dinode *tl_dinode){ struct ocfs2_la_recovery_item *item; item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS); if (!item) { /* Though we wish to avoid it, we are in fact safe in * skipping local alloc cleanup as fsck.ocfs2 is more * than capable of reclaiming unused space. */ if (la_dinode) kfree(la_dinode); if (tl_dinode) kfree(tl_dinode); mlog_errno(-ENOMEM); return; } INIT_LIST_HEAD(&item->lri_list); item->lri_la_dinode = la_dinode; item->lri_slot = slot_num; item->lri_tl_dinode = tl_dinode; spin_lock(&journal->j_lock); list_add_tail(&item->lri_list, &journal->j_la_cleanups); queue_work(ocfs2_wq, &journal->j_recovery_work); spin_unlock(&journal->j_lock);}/* Called by the mount code to queue recovery the last part of * recovery for it's own slot. */void ocfs2_complete_mount_recovery(struct ocfs2_super *osb){ struct ocfs2_journal *journal = osb->journal; if (osb->dirty) { /* No need to queue up our truncate_log as regular * cleanup will catch that. */ ocfs2_queue_recovery_completion(journal, osb->slot_num, osb->local_alloc_copy, NULL); ocfs2_schedule_truncate_log_flush(osb, 0); osb->local_alloc_copy = NULL; osb->dirty = 0; }}static int __ocfs2_recovery_thread(void *arg){ int status, node_num; struct ocfs2_super *osb = arg; mlog_entry_void(); status = ocfs2_wait_on_mount(osb); if (status < 0) { goto bail; }restart: status = ocfs2_super_lock(osb, 1); if (status < 0) { mlog_errno(status); goto bail; } while(!ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { node_num = ocfs2_node_map_first_set_bit(osb, &osb->recovery_map); if (node_num == O2NM_INVALID_NODE_NUM) { mlog(0, "Out of nodes to recover.\n"); break; } status = ocfs2_recover_node(osb, node_num); if (status < 0) { mlog(ML_ERROR, "Error %d recovering node %d on device (%u,%u)!\n", status, node_num, MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); mlog(ML_ERROR, "Volume requires unmount.\n"); continue; } ocfs2_recovery_map_clear(osb, node_num); } ocfs2_super_unlock(osb, 1); /* We always run recovery on our own orphan dir - the dead * node(s) may have voted "no" on an inode delete earlier. A * revote is therefore required. */ ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, NULL);bail: mutex_lock(&osb->recovery_lock); if (!status && !ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { mutex_unlock(&osb->recovery_lock); goto restart; } osb->recovery_thread_task = NULL; mb(); /* sync with ocfs2_recovery_thread_running */ wake_up(&osb->recovery_event); mutex_unlock(&osb->recovery_lock); mlog_exit(status); /* no one is callint kthread_stop() for us so the kthread() api * requires that we call do_exit(). And it isn't exported, but * complete_and_exit() seems to be a minimal wrapper around it. */ complete_and_exit(NULL, status); return status;}void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num){ mlog_entry("(node_num=%d, osb->node_num = %d)\n", node_num, osb->node_num); mutex_lock(&osb->recovery_lock); if (osb->disable_recovery) goto out; /* People waiting on recovery will wait on * the recovery map to empty. */ if (!ocfs2_recovery_map_set(osb, node_num)) mlog(0, "node %d already be in recovery.\n", node_num); mlog(0, "starting recovery thread...\n"); if (osb->recovery_thread_task) goto out; osb->recovery_thread_task = kthread_run(__ocfs2_recovery_thread, osb, "ocfs2rec"); if (IS_ERR(osb->recovery_thread_task)) { mlog_errno((int)PTR_ERR(osb->recovery_thread_task)); osb->recovery_thread_task = NULL; }out: mutex_unlock(&osb->recovery_lock); wake_up(&osb->recovery_event); mlog_exit_void();}/* Does the actual journal replay and marks the journal inode as * clean. Will only replay if the journal inode is marked dirty. */static int ocfs2_replay_journal(struct ocfs2_super *osb, int node_num, int slot_num){ int status; int got_lock = 0; unsigned int flags; struct inode *inode = NULL; struct ocfs2_dinode *fe; journal_t *journal = NULL; struct buffer_head *bh = NULL; inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, slot_num); if (inode == NULL) { status = -EACCES; mlog_errno(status); goto done; } if (is_bad_inode(inode)) { status = -EACCES; iput(inode); inode = NULL; mlog_errno(status); goto done; } SET_INODE_JOURNAL(inode); status = ocfs2_meta_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); if (status < 0) { mlog(0, "status returned from ocfs2_meta_lock=%d\n", status); if (status != -ERESTARTSYS) mlog(ML_ERROR, "Could not lock journal!\n"); goto done; } got_lock = 1; fe = (struct ocfs2_dinode *) bh->b_data; flags = le32_to_cpu(fe->id1.journal1.ij_flags); if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) { mlog(0, "No recovery required for node %d\n", node_num); goto done; } mlog(ML_NOTICE, "Recovering node %d from slot %d on device (%u,%u)\n", node_num, slot_num, MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); status = ocfs2_force_read_journal(inode);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -