📄 journal.c

📁 ocfs1.2.7 源码
💻 C
📖 第 1 页 / 共 3 页
字号:
	mlog(0, "Returned from journal_init_inode\n");	mlog(0, "j_journal->j_maxlen = %u\n", j_journal->j_maxlen);	*dirty = (le32_to_cpu(di->id1.journal1.ij_flags) &		  OCFS2_JOURNAL_DIRTY_FL);	journal->j_journal = j_journal;	journal->j_inode = inode;	journal->j_bh = bh;	ocfs2_set_journal_params(osb);	journal->j_state = OCFS2_JOURNAL_LOADED;	status = 0;done:	if (status < 0) {		if (meta_lock)			ocfs2_meta_unlock(inode, 1);		if (bh != NULL)			brelse(bh);		if (inode) {			OCFS2_I(inode)->ip_open_count--;			iput(inode);		}	}	mlog_exit(status);	return status;}static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,				      int dirty){	int status;	unsigned int flags;	struct ocfs2_journal *journal = osb->journal;	struct buffer_head *bh = journal->j_bh;	struct ocfs2_dinode *fe;	mlog_entry_void();	fe = (struct ocfs2_dinode *)bh->b_data;	if (!OCFS2_IS_VALID_DINODE(fe)) {		/* This is called from startup/shutdown which will		 * handle the errors in a specific manner, so no need		 * to call ocfs2_error() here. */		mlog(ML_ERROR, "Journal dinode %"MLFu64"  has invalid "		     "signature: %.*s", fe->i_blkno, 7, fe->i_signature);		status = -EIO;		goto out;	}	flags = le32_to_cpu(fe->id1.journal1.ij_flags);	if (dirty)		flags |= OCFS2_JOURNAL_DIRTY_FL;	else		flags &= ~OCFS2_JOURNAL_DIRTY_FL;	fe->id1.journal1.ij_flags = cpu_to_le32(flags);	status = ocfs2_write_block(osb, bh, journal->j_inode);	if (status < 0)		mlog_errno(status);out:	mlog_exit(status);	return status;}/* * If the journal has been kmalloc'd it needs to be freed after this * call. */void ocfs2_journal_shutdown(struct ocfs2_super *osb){	struct ocfs2_journal *journal = NULL;	int status = 0;	struct inode *inode = NULL;	int num_running_trans = 0;	mlog_entry_void();	if (!osb)		BUG();	journal = osb->journal;	if (!journal)		goto done;	inode = journal->j_inode;	if (journal->j_state != OCFS2_JOURNAL_LOADED)		goto done;	/* need to inc inode use count as journal_destroy will iput. */	if (!igrab(inode))		BUG();	num_running_trans = atomic_read(&(osb->journal->j_num_trans));	if (num_running_trans > 0)		mlog(0, "Shutting down journal: must wait on %d "		     "running transactions!\n",		     num_running_trans);	/* Do a commit_cache here. It will flush our journal, *and*	 * release any locks that are still held.	 * set the SHUTDOWN flag and release the trans lock.	 * the commit thread will take the trans lock for us below. */	journal->j_state = OCFS2_JOURNAL_IN_SHUTDOWN;	/* The OCFS2_JOURNAL_IN_SHUTDOWN will signal to commit_cache to not	 * drop the trans_lock (which we want to hold until we	 * completely destroy the journal. */	if (osb->commit_task) {		/* Wait for the commit thread */		mlog(0, "Waiting for ocfs2commit to exit....\n");		kthread_stop(osb->commit_task);		osb->commit_task = NULL;	}	BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0);	if (ocfs2_mount_local(osb)) {		journal_lock_updates(journal->j_journal);		status = journal_flush(journal->j_journal);		journal_unlock_updates(journal->j_journal);		if (status < 0)			mlog_errno(status);	}	if (status == 0) {		/*		 * Do not toggle if flush was unsuccessful otherwise		 * will leave dirty metadata in a "clean" journal		 */		status = ocfs2_journal_toggle_dirty(osb, 0);		if (status < 0)			mlog_errno(status);	}	/* Shutdown the kernel journal system */	journal_destroy(journal->j_journal);	OCFS2_I(inode)->ip_open_count--;	/* unlock our journal */	ocfs2_meta_unlock(inode, 1);	brelse(journal->j_bh);	journal->j_bh = NULL;	journal->j_state = OCFS2_JOURNAL_FREE;//	up_write(&journal->j_trans_barrier);done:	if (inode)		iput(inode);	mlog_exit_void();}static void ocfs2_clear_journal_error(struct super_block *sb,				      journal_t *journal,				      int slot){	int olderr;	olderr = journal_errno(journal);	if (olderr) {		mlog(ML_ERROR, "File system error %d recorded in "		     "journal %u.\n", olderr, slot);		mlog(ML_ERROR, "File system on device %s needs checking.\n",		     sb->s_id);		journal_ack_err(journal);		journal_clear_err(journal);	}}int ocfs2_journal_load(struct ocfs2_journal *journal, int local){	int status = 0;	struct ocfs2_super *osb;	mlog_entry_void();	if (!journal)		BUG();	osb = journal->j_osb;	status = journal_load(journal->j_journal);	if (status < 0) {		mlog(ML_ERROR, "Failed to load journal!\n");		goto done;	}	ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);	status = ocfs2_journal_toggle_dirty(osb, 1);	if (status < 0) {		mlog_errno(status);		goto done;	}	/* Launch the commit thread */	if (!local) {		osb->commit_task = kthread_run(ocfs2_commit_thread, osb,					       "ocfs2cmt-%d", osb->osb_id);		if (IS_ERR(osb->commit_task)) {			status = PTR_ERR(osb->commit_task);			osb->commit_task = NULL;			mlog(ML_ERROR, "unable to launch ocfs2commit thread, "			     "error=%d", status);			goto done;		}	} else		osb->commit_task = NULL;done:	mlog_exit(status);	return status;}/* 'full' flag tells us whether we clear out all blocks or if we just * mark the journal clean */int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full){	int status;	mlog_entry_void();	if (!journal)		BUG();	status = journal_wipe(journal->j_journal, full);	if (status < 0) {		mlog_errno(status);		goto bail;	}	status = ocfs2_journal_toggle_dirty(journal->j_osb, 0);	if (status < 0)		mlog_errno(status);bail:	mlog_exit(status);	return status;}/* * JBD Might read a cached version of another nodes journal file. We * don't want this as this file changes often and we get no * notification on those changes. The only way to be sure that we've * got the most up to date version of those blocks then is to force * read them off disk. Just searching through the buffer cache won't * work as there may be pages backing this file which are still marked * up to date. We know things can't change on this file underneath us * as we have the lock by now :) */static int ocfs2_force_read_journal(struct inode *inode){	int status = 0;	int i, p_blocks;	u64 v_blkno, p_blkno;#define CONCURRENT_JOURNAL_FILL 32	struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL];	mlog_entry_void();	BUG_ON(inode->i_blocks !=		     ocfs2_align_bytes_to_sectors(i_size_read(inode)));	memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);	mlog(0, "Force reading %lu blocks\n",	     (unsigned long)(inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9)));	v_blkno = 0;	while (v_blkno <	       (inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9))) {		status = ocfs2_extent_map_get_blocks(inode, v_blkno,						     1, &p_blkno,						     &p_blocks);		if (status < 0) {			mlog_errno(status);			goto bail;		}		if (p_blocks > CONCURRENT_JOURNAL_FILL)			p_blocks = CONCURRENT_JOURNAL_FILL;		/* We are reading journal data which should not		 * be put in the uptodate cache */		status = ocfs2_read_blocks(OCFS2_SB(inode->i_sb),					   p_blkno, p_blocks, bhs, 0,					   NULL);		if (status < 0) {			mlog_errno(status);			goto bail;		}		for(i = 0; i < p_blocks; i++) {			brelse(bhs[i]);			bhs[i] = NULL;		}		v_blkno += p_blocks;	}bail:	for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)		if (bhs[i])			brelse(bhs[i]);	mlog_exit(status);	return status;}struct ocfs2_la_recovery_item {	struct list_head	lri_list;	int			lri_slot;	struct ocfs2_dinode	*lri_la_dinode;	struct ocfs2_dinode	*lri_tl_dinode;	int 			lri_node_num;};/* Does the second half of the recovery process. By this point, the * node is marked clean and can actually be considered recovered, * hence it's no longer in the recovery map, but there's still some * cleanup we can do which shouldn't happen within the recovery thread * as locking in that context becomes very difficult if we are to take * recovering nodes into account. * * NOTE: This function can and will sleep on recovery of other nodes * during cluster locking, just like any other ocfs2 process. */void ocfs2_complete_recovery(void *data){	int ret;	struct ocfs2_super *osb = data;	struct ocfs2_journal *journal = osb->journal;	struct ocfs2_dinode *la_dinode, *tl_dinode;	struct ocfs2_la_recovery_item *item;	struct list_head *p, *n;	LIST_HEAD(tmp_la_list);	mlog_entry_void();	mlog(0, "completing recovery from keventd\n");	spin_lock(&journal->j_lock);	list_splice_init(&journal->j_la_cleanups, &tmp_la_list);	spin_unlock(&journal->j_lock);	list_for_each_safe(p, n, &tmp_la_list) {		item = list_entry(p, struct ocfs2_la_recovery_item, lri_list);		list_del_init(&item->lri_list);		mlog(0, "Complete recovery for slot %d\n", item->lri_slot);		la_dinode = item->lri_la_dinode;		if (la_dinode) {			mlog(0, "Clean up local alloc %"MLFu64"\n",			     la_dinode->i_blkno);			ret = ocfs2_complete_local_alloc_recovery(osb,								  la_dinode);			if (ret < 0)				mlog_errno(ret);			kfree(la_dinode);		}		tl_dinode = item->lri_tl_dinode;		if (tl_dinode) {			mlog(0, "Clean up truncate log %"MLFu64"\n",			     tl_dinode->i_blkno);			ret = ocfs2_complete_truncate_log_recovery(osb,								   tl_dinode);			if (ret < 0)				mlog_errno(ret);			kfree(tl_dinode);		}		ret = ocfs2_recover_orphans(osb, item->lri_slot, 							item->lri_node_num);		if (ret < 0)			mlog_errno(ret);		kfree(item);	}	mlog(0, "Recovery completion\n");	mlog_exit_void();}/* NOTE: This function always eats your references to la_dinode and * tl_dinode, either manually on error, or by passing them to * ocfs2_complete_recovery */static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,					    int slot_num,					    struct ocfs2_dinode *la_dinode,					    struct ocfs2_dinode *tl_dinode,					    int node_num){	struct ocfs2_la_recovery_item *item;	item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS);	if (!item) {		/* Though we wish to avoid it, we are in fact safe in		 * skipping local alloc cleanup as fsck.ocfs2 is more		 * than capable of reclaiming unused space. */		if (la_dinode)			kfree(la_dinode);		if (tl_dinode)			kfree(tl_dinode);		mlog_errno(-ENOMEM);		return;	}	INIT_LIST_HEAD(&item->lri_list);	item->lri_la_dinode = la_dinode;	item->lri_slot = slot_num;	item->lri_node_num = node_num;	item->lri_tl_dinode = tl_dinode;	spin_lock(&journal->j_lock);	list_add_tail(&item->lri_list, &journal->j_la_cleanups);	queue_work(ocfs2_wq, &journal->j_recovery_work);	spin_unlock(&journal->j_lock);}/* Called by the mount code to queue recovery the last part of * recovery for it's own slot. */void ocfs2_complete_mount_recovery(struct ocfs2_super *osb){	struct ocfs2_journal *journal = osb->journal;	if (osb->dirty) {		/* No need to queue up our truncate_log as regular		 * cleanup will catch that. */		ocfs2_queue_recovery_completion(journal,						osb->slot_num,						osb->local_alloc_copy,						NULL,						osb->node_num);		ocfs2_schedule_truncate_log_flush(osb, 0);		osb->local_alloc_copy = NULL;		osb->dirty = 0;	}}static int __ocfs2_recovery_thread(void *arg){	int status;	int node_num = O2NM_INVALID_NODE_NUM;	struct ocfs2_super *osb = arg;	mlog_entry_void();	status = ocfs2_wait_on_mount(osb);	if (status < 0) {		goto bail;	}restart:	status = ocfs2_super_lock(osb, 1);	if (status < 0) {		mlog_errno(status);		goto bail;	}	while(!ocfs2_node_map_is_empty(osb, &osb->recovery_map)) {		node_num = ocfs2_node_map_first_set_bit(osb,							&osb->recovery_map);		if (node_num == O2NM_INVALID_NODE_NUM) {			mlog(0, "Out of nodes to recover.\n");			break;		}		status = ocfs2_recover_node(osb, node_num);		if (status < 0) {			mlog(ML_ERROR,			     "Error %d recovering node %d on device (%u,%u)!\n",			     status, node_num,			     MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));			mlog(ML_ERROR, "Volume requires unmount.\n");			continue;		}		ocfs2_recovery_map_clear(osb, node_num);	}	ocfs2_super_unlock(osb, 1);	/* We always run recovery on our own orphan dir - the dead	 * node(s) may have voted "no" on an inode delete earlier. A	 * revote is therefore required. */	ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,					NULL, node_num);bail:	down(&osb->recovery_lock);	if (!status &&	    !ocfs2_node_map_is_empty(osb, &osb->recovery_map)) {		up(&osb->recovery_lock);		goto restart;	}	osb->recovery_thread_task = NULL;	mb(); /* sync with ocfs2_recovery_thread_running */	wake_up(&osb->recovery_event);	up(&osb->recovery_lock);	mlog_exit(status);	/* no one is callint kthread_stop() for us so the kthread() api	 * requires that we call do_exit().  And it isn't exported, but	 * complete_and_exit() seems to be a minimal wrapper around it. */	complete_and_exit(NULL, status);	return status;}void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num){	mlog_entry("(node_num=%d, osb->node_num = %d)\n",		   node_num, osb->node_num);	down(&osb->recovery_lock);	if (osb->disable_recovery)		goto out;	/* People waiting on recovery will wait on	 * the recovery map to empty. */	if (!ocfs2_recovery_map_set(osb, node_num))		mlog(0, "node %d already be in recovery.\n", node_num);	mlog(0, "starting recovery thread...\n");	if (osb->recovery_thread_task)		goto out;	osb->recovery_thread_task =  kthread_run(__ocfs2_recovery_thread, osb,						 "ocfs2rec-%d", osb->osb_id);	if (IS_ERR(osb->recovery_thread_task)) {		mlog_errno((int)PTR_ERR(osb->recovery_thread_task));		osb->recovery_thread_task = NULL;	}out:	up(&osb->recovery_lock);	wake_up(&osb->recovery_event);	mlog_exit_void();}/* Does the actual journal replay and marks the journal inode as * clean. Will only replay if the journal inode is marked dirty. */static int ocfs2_replay_journal(struct ocfs2_super *osb,				int node_num,				int slot_num){	int status;	int got_lock = 0;	unsigned int flags;	struct inode *inode = NULL;	struct ocfs2_dinode *fe;	journal_t *journal = NULL;	struct buffer_head *bh = NULL;	inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,					    slot_num);	if (inode == NULL) {
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -