📄 journal.c

📁 linux 内核源代码
💻 C
📖 第 1 页 / 共 3 页
字号:
		mlog(0, "Shutting down journal: must wait on %d "		     "running transactions!\n",		     num_running_trans);	/* Do a commit_cache here. It will flush our journal, *and*	 * release any locks that are still held.	 * set the SHUTDOWN flag and release the trans lock.	 * the commit thread will take the trans lock for us below. */	journal->j_state = OCFS2_JOURNAL_IN_SHUTDOWN;	/* The OCFS2_JOURNAL_IN_SHUTDOWN will signal to commit_cache to not	 * drop the trans_lock (which we want to hold until we	 * completely destroy the journal. */	if (osb->commit_task) {		/* Wait for the commit thread */		mlog(0, "Waiting for ocfs2commit to exit....\n");		kthread_stop(osb->commit_task);		osb->commit_task = NULL;	}	BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0);	if (ocfs2_mount_local(osb)) {		journal_lock_updates(journal->j_journal);		status = journal_flush(journal->j_journal);		journal_unlock_updates(journal->j_journal);		if (status < 0)			mlog_errno(status);	}	if (status == 0) {		/*		 * Do not toggle if flush was unsuccessful otherwise		 * will leave dirty metadata in a "clean" journal		 */		status = ocfs2_journal_toggle_dirty(osb, 0);		if (status < 0)			mlog_errno(status);	}	/* Shutdown the kernel journal system */	journal_destroy(journal->j_journal);	OCFS2_I(inode)->ip_open_count--;	/* unlock our journal */	ocfs2_meta_unlock(inode, 1);	brelse(journal->j_bh);	journal->j_bh = NULL;	journal->j_state = OCFS2_JOURNAL_FREE;//	up_write(&journal->j_trans_barrier);done:	if (inode)		iput(inode);	mlog_exit_void();}static void ocfs2_clear_journal_error(struct super_block *sb,				      journal_t *journal,				      int slot){	int olderr;	olderr = journal_errno(journal);	if (olderr) {		mlog(ML_ERROR, "File system error %d recorded in "		     "journal %u.\n", olderr, slot);		mlog(ML_ERROR, "File system on device %s needs checking.\n",		     sb->s_id);		journal_ack_err(journal);		journal_clear_err(journal);	}}int ocfs2_journal_load(struct ocfs2_journal *journal, int local){	int status = 0;	struct ocfs2_super *osb;	mlog_entry_void();	if (!journal)		BUG();	osb = journal->j_osb;	status = journal_load(journal->j_journal);	if (status < 0) {		mlog(ML_ERROR, "Failed to load journal!\n");		goto done;	}	ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);	status = ocfs2_journal_toggle_dirty(osb, 1);	if (status < 0) {		mlog_errno(status);		goto done;	}	/* Launch the commit thread */	if (!local) {		osb->commit_task = kthread_run(ocfs2_commit_thread, osb,					       "ocfs2cmt");		if (IS_ERR(osb->commit_task)) {			status = PTR_ERR(osb->commit_task);			osb->commit_task = NULL;			mlog(ML_ERROR, "unable to launch ocfs2commit thread, "			     "error=%d", status);			goto done;		}	} else		osb->commit_task = NULL;done:	mlog_exit(status);	return status;}/* 'full' flag tells us whether we clear out all blocks or if we just * mark the journal clean */int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full){	int status;	mlog_entry_void();	BUG_ON(!journal);	status = journal_wipe(journal->j_journal, full);	if (status < 0) {		mlog_errno(status);		goto bail;	}	status = ocfs2_journal_toggle_dirty(journal->j_osb, 0);	if (status < 0)		mlog_errno(status);bail:	mlog_exit(status);	return status;}/* * JBD Might read a cached version of another nodes journal file. We * don't want this as this file changes often and we get no * notification on those changes. The only way to be sure that we've * got the most up to date version of those blocks then is to force * read them off disk. Just searching through the buffer cache won't * work as there may be pages backing this file which are still marked * up to date. We know things can't change on this file underneath us * as we have the lock by now :) */static int ocfs2_force_read_journal(struct inode *inode){	int status = 0;	int i;	u64 v_blkno, p_blkno, p_blocks, num_blocks;#define CONCURRENT_JOURNAL_FILL 32ULL	struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL];	mlog_entry_void();	memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);	num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, inode->i_size);	v_blkno = 0;	while (v_blkno < num_blocks) {		status = ocfs2_extent_map_get_blocks(inode, v_blkno,						     &p_blkno, &p_blocks, NULL);		if (status < 0) {			mlog_errno(status);			goto bail;		}		if (p_blocks > CONCURRENT_JOURNAL_FILL)			p_blocks = CONCURRENT_JOURNAL_FILL;		/* We are reading journal data which should not		 * be put in the uptodate cache */		status = ocfs2_read_blocks(OCFS2_SB(inode->i_sb),					   p_blkno, p_blocks, bhs, 0,					   NULL);		if (status < 0) {			mlog_errno(status);			goto bail;		}		for(i = 0; i < p_blocks; i++) {			brelse(bhs[i]);			bhs[i] = NULL;		}		v_blkno += p_blocks;	}bail:	for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)		if (bhs[i])			brelse(bhs[i]);	mlog_exit(status);	return status;}struct ocfs2_la_recovery_item {	struct list_head	lri_list;	int			lri_slot;	struct ocfs2_dinode	*lri_la_dinode;	struct ocfs2_dinode	*lri_tl_dinode;};/* Does the second half of the recovery process. By this point, the * node is marked clean and can actually be considered recovered, * hence it's no longer in the recovery map, but there's still some * cleanup we can do which shouldn't happen within the recovery thread * as locking in that context becomes very difficult if we are to take * recovering nodes into account. * * NOTE: This function can and will sleep on recovery of other nodes * during cluster locking, just like any other ocfs2 process. */void ocfs2_complete_recovery(struct work_struct *work){	int ret;	struct ocfs2_journal *journal =		container_of(work, struct ocfs2_journal, j_recovery_work);	struct ocfs2_super *osb = journal->j_osb;	struct ocfs2_dinode *la_dinode, *tl_dinode;	struct ocfs2_la_recovery_item *item, *n;	LIST_HEAD(tmp_la_list);	mlog_entry_void();	mlog(0, "completing recovery from keventd\n");	spin_lock(&journal->j_lock);	list_splice_init(&journal->j_la_cleanups, &tmp_la_list);	spin_unlock(&journal->j_lock);	list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) {		list_del_init(&item->lri_list);		mlog(0, "Complete recovery for slot %d\n", item->lri_slot);		la_dinode = item->lri_la_dinode;		if (la_dinode) {			mlog(0, "Clean up local alloc %llu\n",			     (unsigned long long)le64_to_cpu(la_dinode->i_blkno));			ret = ocfs2_complete_local_alloc_recovery(osb,								  la_dinode);			if (ret < 0)				mlog_errno(ret);			kfree(la_dinode);		}		tl_dinode = item->lri_tl_dinode;		if (tl_dinode) {			mlog(0, "Clean up truncate log %llu\n",			     (unsigned long long)le64_to_cpu(tl_dinode->i_blkno));			ret = ocfs2_complete_truncate_log_recovery(osb,								   tl_dinode);			if (ret < 0)				mlog_errno(ret);			kfree(tl_dinode);		}		ret = ocfs2_recover_orphans(osb, item->lri_slot);		if (ret < 0)			mlog_errno(ret);		kfree(item);	}	mlog(0, "Recovery completion\n");	mlog_exit_void();}/* NOTE: This function always eats your references to la_dinode and * tl_dinode, either manually on error, or by passing them to * ocfs2_complete_recovery */static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,					    int slot_num,					    struct ocfs2_dinode *la_dinode,					    struct ocfs2_dinode *tl_dinode){	struct ocfs2_la_recovery_item *item;	item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS);	if (!item) {		/* Though we wish to avoid it, we are in fact safe in		 * skipping local alloc cleanup as fsck.ocfs2 is more		 * than capable of reclaiming unused space. */		if (la_dinode)			kfree(la_dinode);		if (tl_dinode)			kfree(tl_dinode);		mlog_errno(-ENOMEM);		return;	}	INIT_LIST_HEAD(&item->lri_list);	item->lri_la_dinode = la_dinode;	item->lri_slot = slot_num;	item->lri_tl_dinode = tl_dinode;	spin_lock(&journal->j_lock);	list_add_tail(&item->lri_list, &journal->j_la_cleanups);	queue_work(ocfs2_wq, &journal->j_recovery_work);	spin_unlock(&journal->j_lock);}/* Called by the mount code to queue recovery the last part of * recovery for it's own slot. */void ocfs2_complete_mount_recovery(struct ocfs2_super *osb){	struct ocfs2_journal *journal = osb->journal;	if (osb->dirty) {		/* No need to queue up our truncate_log as regular		 * cleanup will catch that. */		ocfs2_queue_recovery_completion(journal,						osb->slot_num,						osb->local_alloc_copy,						NULL);		ocfs2_schedule_truncate_log_flush(osb, 0);		osb->local_alloc_copy = NULL;		osb->dirty = 0;	}}static int __ocfs2_recovery_thread(void *arg){	int status, node_num;	struct ocfs2_super *osb = arg;	mlog_entry_void();	status = ocfs2_wait_on_mount(osb);	if (status < 0) {		goto bail;	}restart:	status = ocfs2_super_lock(osb, 1);	if (status < 0) {		mlog_errno(status);		goto bail;	}	while(!ocfs2_node_map_is_empty(osb, &osb->recovery_map)) {		node_num = ocfs2_node_map_first_set_bit(osb,							&osb->recovery_map);		if (node_num == O2NM_INVALID_NODE_NUM) {			mlog(0, "Out of nodes to recover.\n");			break;		}		status = ocfs2_recover_node(osb, node_num);		if (status < 0) {			mlog(ML_ERROR,			     "Error %d recovering node %d on device (%u,%u)!\n",			     status, node_num,			     MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));			mlog(ML_ERROR, "Volume requires unmount.\n");			continue;		}		ocfs2_recovery_map_clear(osb, node_num);	}	ocfs2_super_unlock(osb, 1);	/* We always run recovery on our own orphan dir - the dead	 * node(s) may have voted "no" on an inode delete earlier. A	 * revote is therefore required. */	ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,					NULL);bail:	mutex_lock(&osb->recovery_lock);	if (!status &&	    !ocfs2_node_map_is_empty(osb, &osb->recovery_map)) {		mutex_unlock(&osb->recovery_lock);		goto restart;	}	osb->recovery_thread_task = NULL;	mb(); /* sync with ocfs2_recovery_thread_running */	wake_up(&osb->recovery_event);	mutex_unlock(&osb->recovery_lock);	mlog_exit(status);	/* no one is callint kthread_stop() for us so the kthread() api	 * requires that we call do_exit().  And it isn't exported, but	 * complete_and_exit() seems to be a minimal wrapper around it. */	complete_and_exit(NULL, status);	return status;}void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num){	mlog_entry("(node_num=%d, osb->node_num = %d)\n",		   node_num, osb->node_num);	mutex_lock(&osb->recovery_lock);	if (osb->disable_recovery)		goto out;	/* People waiting on recovery will wait on	 * the recovery map to empty. */	if (!ocfs2_recovery_map_set(osb, node_num))		mlog(0, "node %d already be in recovery.\n", node_num);	mlog(0, "starting recovery thread...\n");	if (osb->recovery_thread_task)		goto out;	osb->recovery_thread_task =  kthread_run(__ocfs2_recovery_thread, osb,						 "ocfs2rec");	if (IS_ERR(osb->recovery_thread_task)) {		mlog_errno((int)PTR_ERR(osb->recovery_thread_task));		osb->recovery_thread_task = NULL;	}out:	mutex_unlock(&osb->recovery_lock);	wake_up(&osb->recovery_event);	mlog_exit_void();}/* Does the actual journal replay and marks the journal inode as * clean. Will only replay if the journal inode is marked dirty. */static int ocfs2_replay_journal(struct ocfs2_super *osb,				int node_num,				int slot_num){	int status;	int got_lock = 0;	unsigned int flags;	struct inode *inode = NULL;	struct ocfs2_dinode *fe;	journal_t *journal = NULL;	struct buffer_head *bh = NULL;	inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,					    slot_num);	if (inode == NULL) {		status = -EACCES;		mlog_errno(status);		goto done;	}	if (is_bad_inode(inode)) {		status = -EACCES;		iput(inode);		inode = NULL;		mlog_errno(status);		goto done;	}	SET_INODE_JOURNAL(inode);	status = ocfs2_meta_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);	if (status < 0) {		mlog(0, "status returned from ocfs2_meta_lock=%d\n", status);		if (status != -ERESTARTSYS)			mlog(ML_ERROR, "Could not lock journal!\n");		goto done;	}	got_lock = 1;	fe = (struct ocfs2_dinode *) bh->b_data;	flags = le32_to_cpu(fe->id1.journal1.ij_flags);	if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) {		mlog(0, "No recovery required for node %d\n", node_num);		goto done;	}	mlog(ML_NOTICE, "Recovering node %d from slot %d on device (%u,%u)\n",	     node_num, slot_num,	     MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);	status = ocfs2_force_read_journal(inode);
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -