mft.c

来自「Linux Kernel 2.6.9 for OMAP1710」· C语言 代码 · 共 1,099 行 · 第 1/3 页

C
1,099
字号
{	ntfs_volume *vol = ni->vol;	struct page *page = ni->page;	unsigned int blocksize = vol->sb->s_blocksize;	int max_bhs = vol->mft_record_size / blocksize;	struct buffer_head *bhs[max_bhs];	struct buffer_head *bh, *head;	unsigned int block_start, block_end, m_start, m_end;	int i_bhs, nr_bhs, err = 0;	ntfs_debug("Entering for inode 0x%lx.", ni->mft_no);	BUG_ON(NInoAttr(ni));	BUG_ON(!max_bhs);	BUG_ON(!PageLocked(page));	/*	 * If the ntfs_inode is clean no need to do anything.  If it is dirty,	 * mark it as clean now so that it can be redirtied later on if needed.	 * There is no danger of races since the caller is holding the locks	 * for the mft record @m and the page it is in.	 */	if (!NInoTestClearDirty(ni))		goto done;	/* Make sure we have mapped buffers. */	if (!page_has_buffers(page)) {no_buffers_err_out:		ntfs_error(vol->sb, "Writing mft records without existing "				"buffers is not implemented yet.  %s",				ntfs_please_email);		err = -EOPNOTSUPP;		goto err_out;	}	bh = head = page_buffers(page);	if (!bh)		goto no_buffers_err_out;	nr_bhs = 0;	block_start = 0;	m_start = ni->page_ofs;	m_end = m_start + vol->mft_record_size;	do {		block_end = block_start + blocksize;		/*		 * If the buffer is outside the mft record, just skip it,		 * clearing it if it is dirty to make sure it is not written		 * out.  It should never be marked dirty but better be safe.		 */		if ((block_end <= m_start) || (block_start >= m_end)) {			if (buffer_dirty(bh)) {				ntfs_warning(vol->sb, "Clearing dirty mft "						"record page buffer.  %s",						ntfs_please_email);				clear_buffer_dirty(bh);			}			continue;		}		if (!buffer_mapped(bh)) {			ntfs_error(vol->sb, "Writing mft records without "					"existing mapped buffers is not "					"implemented yet.  %s",					ntfs_please_email);			err = -EOPNOTSUPP;			continue;		}		if (!buffer_uptodate(bh)) {			ntfs_error(vol->sb, "Writing mft records without "					"existing uptodate buffers is not "					"implemented yet.  %s",					ntfs_please_email);			err = -EOPNOTSUPP;			continue;		}		BUG_ON(!nr_bhs && (m_start != block_start));		BUG_ON(nr_bhs >= max_bhs);		bhs[nr_bhs++] = bh;		BUG_ON((nr_bhs >= max_bhs) && (m_end != block_end));	} while (block_start = block_end, (bh = bh->b_this_page) != head);	if (unlikely(err))		goto cleanup_out;	/* Apply the mst protection fixups. */	err = pre_write_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size);	if (err) {		ntfs_error(vol->sb, "Failed to apply mst fixups!");		goto cleanup_out;	}	flush_dcache_mft_record_page(ni);	/* Lock buffers and start synchronous write i/o on them. */	for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {		struct buffer_head *tbh = bhs[i_bhs];		if (unlikely(test_set_buffer_locked(tbh)))			BUG();		BUG_ON(!buffer_uptodate(tbh));		if (buffer_dirty(tbh))			clear_buffer_dirty(tbh);		get_bh(tbh);		tbh->b_end_io = end_buffer_write_sync;		submit_bh(WRITE, tbh);	}	/* Synchronize the mft mirror now if not @sync. */	if (!sync && ni->mft_no < vol->mftmirr_size)		sync_mft_mirror(ni, m, sync);	/* Wait on i/o completion of buffers. */	for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {		struct buffer_head *tbh = bhs[i_bhs];		wait_on_buffer(tbh);		if (unlikely(!buffer_uptodate(tbh))) {			err = -EIO;			/*			 * Set the buffer uptodate so the page & buffer states			 * don't become out of sync.			 */			if (PageUptodate(page))				set_buffer_uptodate(tbh);		}	}	/* If @sync, now synchronize the mft mirror. */	if (sync && ni->mft_no < vol->mftmirr_size)		sync_mft_mirror(ni, m, sync);	/* Remove the mst protection fixups again. */	post_write_mst_fixup((NTFS_RECORD*)m);	flush_dcache_mft_record_page(ni);	if (unlikely(err)) {		/* I/O error during writing.  This is really bad! */		ntfs_error(vol->sb, "I/O error while writing mft record "				"0x%lx!  Marking base inode as bad.  You "				"should unmount the volume and run chkdsk.",				ni->mft_no);		goto err_out;	}done:	ntfs_debug("Done.");	return 0;cleanup_out:	/* Clean the buffers. */	for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++)		clear_buffer_dirty(bhs[i_bhs]);err_out:	/*	 * Current state: all buffers are clean, unlocked, and uptodate.	 * The caller should mark the base inode as bad so that no more i/o	 * happens.  ->clear_inode() will still be invoked so all extent inodes	 * and other allocated memory will be freed.	 */	if (err == -ENOMEM) {		ntfs_error(vol->sb, "Not enough memory to write mft record.  "				"Redirtying so the write is retried later.");		mark_mft_record_dirty(ni);		err = 0;	}	return err;}/** * ntfs_mft_writepage - check if a metadata page contains dirty mft records * @page:	metadata page possibly containing dirty mft records * @wbc:	writeback control structure * * This is called from the VM when it wants to have a dirty $MFT/$DATA metadata * page cache page cleaned.  The VM has already locked the page and marked it * clean.  Instead of writing the page as a conventional ->writepage function * would do, we check if the page still contains any dirty mft records (it must * have done at some point in the past since the page was marked dirty) and if * none are found, i.e. all mft records are clean, we unlock the page and * return.  The VM is then free to do with the page as it pleases.  If on the * other hand we do find any dirty mft records in the page, we redirty the page * before unlocking it and returning so the VM knows that the page is still * busy and cannot be thrown out. * * Note, we do not actually write any dirty mft records here because they are * dirty inodes and hence will be written by the VFS inode dirty code paths. * There is no need to write them from the VM page dirty code paths, too and in * fact once we implement journalling it would be a complete nightmare having * two code paths leading to mft record writeout. */static int ntfs_mft_writepage(struct page *page, struct writeback_control *wbc){	struct inode *mft_vi = page->mapping->host;	struct super_block *sb = mft_vi->i_sb;	ntfs_volume *vol = NTFS_SB(sb);	u8 *maddr;	MFT_RECORD *m;	ntfs_inode **extent_nis;	unsigned long mft_no;	int nr, i, j;	BOOL is_dirty = FALSE;	BUG_ON(!PageLocked(page));	BUG_ON(PageWriteback(page));	BUG_ON(mft_vi != vol->mft_ino);	/* The first mft record number in the page. */	mft_no = page->index << (PAGE_CACHE_SHIFT - vol->mft_record_size_bits);	/* Number of mft records in the page. */	nr = PAGE_CACHE_SIZE >> vol->mft_record_size_bits;	BUG_ON(!nr);	ntfs_debug("Entering for %i inodes starting at 0x%lx.", nr, mft_no);	/* Iterate over the mft records in the page looking for a dirty one. */	maddr = (u8*)kmap(page);	for (i = 0; i < nr; ++i, ++mft_no, maddr += vol->mft_record_size) {		struct inode *vi;		ntfs_inode *ni, *eni;		ntfs_attr na;		na.mft_no = mft_no;		na.name = NULL;		na.name_len = 0;		na.type = AT_UNUSED;		/*		 * Check if the inode corresponding to this mft record is in		 * the VFS inode cache and obtain a reference to it if it is.		 */		ntfs_debug("Looking for inode 0x%lx in icache.", mft_no);		/*		 * For inode 0, i.e. $MFT itself, we cannot use ilookup5() from		 * here or we deadlock because the inode is already locked by		 * the kernel (fs/fs-writeback.c::__sync_single_inode()) and		 * ilookup5() waits until the inode is unlocked before		 * returning it and it never gets unlocked because		 * ntfs_mft_writepage() never returns.  )-:  Fortunately, we		 * have inode 0 pinned in icache for the duration of the mount		 * so we can access it directly.		 */		if (!mft_no) {			/* Balance the below iput(). */			vi = igrab(mft_vi);			BUG_ON(vi != mft_vi);		} else			vi = ilookup5(sb, mft_no, (test_t)ntfs_test_inode, &na);		if (vi) {			ntfs_debug("Inode 0x%lx is in icache.", mft_no);			/* The inode is in icache.  Check if it is dirty. */			ni = NTFS_I(vi);			if (!NInoDirty(ni)) {				/* The inode is not dirty, skip this record. */				ntfs_debug("Inode 0x%lx is not dirty, "						"continuing search.", mft_no);				iput(vi);				continue;			}			ntfs_debug("Inode 0x%lx is dirty, aborting search.",					mft_no);			/* The inode is dirty, no need to search further. */			iput(vi);			is_dirty = TRUE;			break;		}		ntfs_debug("Inode 0x%lx is not in icache.", mft_no);		/* The inode is not in icache. */		/* Skip the record if it is not a mft record (type "FILE"). */		if (!ntfs_is_mft_recordp((le32*)maddr)) {			ntfs_debug("Mft record 0x%lx is not a FILE record, "					"continuing search.", mft_no);			continue;		}		m = (MFT_RECORD*)maddr;		/*		 * Skip the mft record if it is not in use.  FIXME:  What about		 * deleted/deallocated (extent) inodes?  (AIA)		 */		if (!(m->flags & MFT_RECORD_IN_USE)) {			ntfs_debug("Mft record 0x%lx is not in use, "					"continuing search.", mft_no);			continue;		}		/* Skip the mft record if it is a base inode. */		if (!m->base_mft_record) {			ntfs_debug("Mft record 0x%lx is a base record, "					"continuing search.", mft_no);			continue;		}		/*		 * This is an extent mft record.  Check if the inode		 * corresponding to its base mft record is in icache.		 */		na.mft_no = MREF_LE(m->base_mft_record);		ntfs_debug("Mft record 0x%lx is an extent record.  Looking "				"for base inode 0x%lx in icache.", mft_no,				na.mft_no);		vi = ilookup5(sb, na.mft_no, (test_t)ntfs_test_inode,				&na);		if (!vi) {			/*			 * The base inode is not in icache.  Skip this extent			 * mft record.			 */			ntfs_debug("Base inode 0x%lx is not in icache, "					"continuing search.", na.mft_no);			continue;		}		ntfs_debug("Base inode 0x%lx is in icache.", na.mft_no);		/*		 * The base inode is in icache.  Check if it has the extent		 * inode corresponding to this extent mft record attached.		 */		ni = NTFS_I(vi);		down(&ni->extent_lock);		if (ni->nr_extents <= 0) {			/*			 * The base inode has no attached extent inodes.  Skip			 * this extent mft record.			 */			up(&ni->extent_lock);			iput(vi);			continue;		}		/* Iterate over the attached extent inodes. */		extent_nis = ni->ext.extent_ntfs_inos;		for (eni = NULL, j = 0; j < ni->nr_extents; ++j) {			if (mft_no == extent_nis[j]->mft_no) {				/*				 * Found the extent inode corresponding to this				 * extent mft record.				 */				eni = extent_nis[j];				break;			}		}		/*		 * If the extent inode was not attached to the base inode, skip		 * this extent mft record.		 */		if (!eni) {			up(&ni->extent_lock);			iput(vi);			continue;		}		/*		 * Found the extent inode corrsponding to this extent mft		 * record.  If it is dirty, no need to search further.		 */		if (NInoDirty(eni)) {			up(&ni->extent_lock);			iput(vi);			is_dirty = TRUE;			break;		}		/* The extent inode is not dirty, so do the next record. */		up(&ni->extent_lock);		iput(vi);	}	kunmap(page);	/* If a dirty mft record was found, redirty the page. */	if (is_dirty) {		ntfs_debug("Inode 0x%lx is dirty.  Redirtying the page "				"starting at inode 0x%lx.", mft_no,				page->index << (PAGE_CACHE_SHIFT -				vol->mft_record_size_bits));		redirty_page_for_writepage(wbc, page);		unlock_page(page);	} else {		/*		 * Keep the VM happy.  This must be done otherwise the		 * radix-tree tag PAGECACHE_TAG_DIRTY remains set even though		 * the page is clean.		 */		BUG_ON(PageWriteback(page));		set_page_writeback(page);		unlock_page(page);		end_page_writeback(page);	}	ntfs_debug("Done.");	return 0;}#endif /* NTFS_RW */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?