📄 mft.c

📁 linux 内核源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
	if (!nr_bhs)		goto done;	if (unlikely(err))		goto cleanup_out;	/* Apply the mst protection fixups. */	err = pre_write_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size);	if (err) {		ntfs_error(vol->sb, "Failed to apply mst fixups!");		goto cleanup_out;	}	flush_dcache_mft_record_page(ni);	/* Lock buffers and start synchronous write i/o on them. */	for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {		struct buffer_head *tbh = bhs[i_bhs];		if (unlikely(test_set_buffer_locked(tbh)))			BUG();		BUG_ON(!buffer_uptodate(tbh));		clear_buffer_dirty(tbh);		get_bh(tbh);		tbh->b_end_io = end_buffer_write_sync;		submit_bh(WRITE, tbh);	}	/* Synchronize the mft mirror now if not @sync. */	if (!sync && ni->mft_no < vol->mftmirr_size)		ntfs_sync_mft_mirror(vol, ni->mft_no, m, sync);	/* Wait on i/o completion of buffers. */	for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {		struct buffer_head *tbh = bhs[i_bhs];		wait_on_buffer(tbh);		if (unlikely(!buffer_uptodate(tbh))) {			err = -EIO;			/*			 * Set the buffer uptodate so the page and buffer			 * states do not become out of sync.			 */			if (PageUptodate(page))				set_buffer_uptodate(tbh);		}	}	/* If @sync, now synchronize the mft mirror. */	if (sync && ni->mft_no < vol->mftmirr_size)		ntfs_sync_mft_mirror(vol, ni->mft_no, m, sync);	/* Remove the mst protection fixups again. */	post_write_mst_fixup((NTFS_RECORD*)m);	flush_dcache_mft_record_page(ni);	if (unlikely(err)) {		/* I/O error during writing.  This is really bad! */		ntfs_error(vol->sb, "I/O error while writing mft record "				"0x%lx!  Marking base inode as bad.  You "				"should unmount the volume and run chkdsk.",				ni->mft_no);		goto err_out;	}done:	ntfs_debug("Done.");	return 0;cleanup_out:	/* Clean the buffers. */	for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++)		clear_buffer_dirty(bhs[i_bhs]);err_out:	/*	 * Current state: all buffers are clean, unlocked, and uptodate.	 * The caller should mark the base inode as bad so that no more i/o	 * happens.  ->clear_inode() will still be invoked so all extent inodes	 * and other allocated memory will be freed.	 */	if (err == -ENOMEM) {		ntfs_error(vol->sb, "Not enough memory to write mft record.  "				"Redirtying so the write is retried later.");		mark_mft_record_dirty(ni);		err = 0;	} else		NVolSetErrors(vol);	return err;}/** * ntfs_may_write_mft_record - check if an mft record may be written out * @vol:	[IN]  ntfs volume on which the mft record to check resides * @mft_no:	[IN]  mft record number of the mft record to check * @m:		[IN]  mapped mft record to check * @locked_ni:	[OUT] caller has to unlock this ntfs inode if one is returned * * Check if the mapped (base or extent) mft record @m with mft record number * @mft_no belonging to the ntfs volume @vol may be written out.  If necessary * and possible the ntfs inode of the mft record is locked and the base vfs * inode is pinned.  The locked ntfs inode is then returned in @locked_ni.  The * caller is responsible for unlocking the ntfs inode and unpinning the base * vfs inode. * * Return 'true' if the mft record may be written out and 'false' if not. * * The caller has locked the page and cleared the uptodate flag on it which * means that we can safely write out any dirty mft records that do not have * their inodes in icache as determined by ilookup5() as anyone * opening/creating such an inode would block when attempting to map the mft * record in read_cache_page() until we are finished with the write out. * * Here is a description of the tests we perform: * * If the inode is found in icache we know the mft record must be a base mft * record.  If it is dirty, we do not write it and return 'false' as the vfs * inode write paths will result in the access times being updated which would * cause the base mft record to be redirtied and written out again.  (We know * the access time update will modify the base mft record because Windows * chkdsk complains if the standard information attribute is not in the base * mft record.) * * If the inode is in icache and not dirty, we attempt to lock the mft record * and if we find the lock was already taken, it is not safe to write the mft * record and we return 'false'. * * If we manage to obtain the lock we have exclusive access to the mft record, * which also allows us safe writeout of the mft record.  We then set * @locked_ni to the locked ntfs inode and return 'true'. * * Note we cannot just lock the mft record and sleep while waiting for the lock * because this would deadlock due to lock reversal (normally the mft record is * locked before the page is locked but we already have the page locked here * when we try to lock the mft record). * * If the inode is not in icache we need to perform further checks. * * If the mft record is not a FILE record or it is a base mft record, we can * safely write it and return 'true'. * * We now know the mft record is an extent mft record.  We check if the inode * corresponding to its base mft record is in icache and obtain a reference to * it if it is.  If it is not, we can safely write it and return 'true'. * * We now have the base inode for the extent mft record.  We check if it has an * ntfs inode for the extent mft record attached and if not it is safe to write * the extent mft record and we return 'true'. * * The ntfs inode for the extent mft record is attached to the base inode so we * attempt to lock the extent mft record and if we find the lock was already * taken, it is not safe to write the extent mft record and we return 'false'. * * If we manage to obtain the lock we have exclusive access to the extent mft * record, which also allows us safe writeout of the extent mft record.  We * set the ntfs inode of the extent mft record clean and then set @locked_ni to * the now locked ntfs inode and return 'true'. * * Note, the reason for actually writing dirty mft records here and not just * relying on the vfs inode dirty code paths is that we can have mft records * modified without them ever having actual inodes in memory.  Also we can have * dirty mft records with clean ntfs inodes in memory.  None of the described * cases would result in the dirty mft records being written out if we only * relied on the vfs inode dirty code paths.  And these cases can really occur * during allocation of new mft records and in particular when the * initialized_size of the $MFT/$DATA attribute is extended and the new space * is initialized using ntfs_mft_record_format().  The clean inode can then * appear if the mft record is reused for a new inode before it got written * out. */bool ntfs_may_write_mft_record(ntfs_volume *vol, const unsigned long mft_no,		const MFT_RECORD *m, ntfs_inode **locked_ni){	struct super_block *sb = vol->sb;	struct inode *mft_vi = vol->mft_ino;	struct inode *vi;	ntfs_inode *ni, *eni, **extent_nis;	int i;	ntfs_attr na;	ntfs_debug("Entering for inode 0x%lx.", mft_no);	/*	 * Normally we do not return a locked inode so set @locked_ni to NULL.	 */	BUG_ON(!locked_ni);	*locked_ni = NULL;	/*	 * Check if the inode corresponding to this mft record is in the VFS	 * inode cache and obtain a reference to it if it is.	 */	ntfs_debug("Looking for inode 0x%lx in icache.", mft_no);	na.mft_no = mft_no;	na.name = NULL;	na.name_len = 0;	na.type = AT_UNUSED;	/*	 * Optimize inode 0, i.e. $MFT itself, since we have it in memory and	 * we get here for it rather often.	 */	if (!mft_no) {		/* Balance the below iput(). */		vi = igrab(mft_vi);		BUG_ON(vi != mft_vi);	} else {		/*		 * Have to use ilookup5_nowait() since ilookup5() waits for the		 * inode lock which causes ntfs to deadlock when a concurrent		 * inode write via the inode dirty code paths and the page		 * dirty code path of the inode dirty code path when writing		 * $MFT occurs.		 */		vi = ilookup5_nowait(sb, mft_no, (test_t)ntfs_test_inode, &na);	}	if (vi) {		ntfs_debug("Base inode 0x%lx is in icache.", mft_no);		/* The inode is in icache. */		ni = NTFS_I(vi);		/* Take a reference to the ntfs inode. */		atomic_inc(&ni->count);		/* If the inode is dirty, do not write this record. */		if (NInoDirty(ni)) {			ntfs_debug("Inode 0x%lx is dirty, do not write it.",					mft_no);			atomic_dec(&ni->count);			iput(vi);			return false;		}		ntfs_debug("Inode 0x%lx is not dirty.", mft_no);		/* The inode is not dirty, try to take the mft record lock. */		if (unlikely(!mutex_trylock(&ni->mrec_lock))) {			ntfs_debug("Mft record 0x%lx is already locked, do "					"not write it.", mft_no);			atomic_dec(&ni->count);			iput(vi);			return false;		}		ntfs_debug("Managed to lock mft record 0x%lx, write it.",				mft_no);		/*		 * The write has to occur while we hold the mft record lock so		 * return the locked ntfs inode.		 */		*locked_ni = ni;		return true;	}	ntfs_debug("Inode 0x%lx is not in icache.", mft_no);	/* The inode is not in icache. */	/* Write the record if it is not a mft record (type "FILE"). */	if (!ntfs_is_mft_record(m->magic)) {		ntfs_debug("Mft record 0x%lx is not a FILE record, write it.",				mft_no);		return true;	}	/* Write the mft record if it is a base inode. */	if (!m->base_mft_record) {		ntfs_debug("Mft record 0x%lx is a base record, write it.",				mft_no);		return true;	}	/*	 * This is an extent mft record.  Check if the inode corresponding to	 * its base mft record is in icache and obtain a reference to it if it	 * is.	 */	na.mft_no = MREF_LE(m->base_mft_record);	ntfs_debug("Mft record 0x%lx is an extent record.  Looking for base "			"inode 0x%lx in icache.", mft_no, na.mft_no);	if (!na.mft_no) {		/* Balance the below iput(). */		vi = igrab(mft_vi);		BUG_ON(vi != mft_vi);	} else		vi = ilookup5_nowait(sb, na.mft_no, (test_t)ntfs_test_inode,				&na);	if (!vi) {		/*		 * The base inode is not in icache, write this extent mft		 * record.		 */		ntfs_debug("Base inode 0x%lx is not in icache, write the "				"extent record.", na.mft_no);		return true;	}	ntfs_debug("Base inode 0x%lx is in icache.", na.mft_no);	/*	 * The base inode is in icache.  Check if it has the extent inode	 * corresponding to this extent mft record attached.	 */	ni = NTFS_I(vi);	mutex_lock(&ni->extent_lock);	if (ni->nr_extents <= 0) {		/*		 * The base inode has no attached extent inodes, write this		 * extent mft record.		 */		mutex_unlock(&ni->extent_lock);		iput(vi);		ntfs_debug("Base inode 0x%lx has no attached extent inodes, "				"write the extent record.", na.mft_no);		return true;	}	/* Iterate over the attached extent inodes. */	extent_nis = ni->ext.extent_ntfs_inos;	for (eni = NULL, i = 0; i < ni->nr_extents; ++i) {		if (mft_no == extent_nis[i]->mft_no) {			/*			 * Found the extent inode corresponding to this extent			 * mft record.			 */			eni = extent_nis[i];			break;		}	}	/*	 * If the extent inode was not attached to the base inode, write this	 * extent mft record.	 */	if (!eni) {		mutex_unlock(&ni->extent_lock);		iput(vi);		ntfs_debug("Extent inode 0x%lx is not attached to its base "				"inode 0x%lx, write the extent record.",				mft_no, na.mft_no);		return true;	}	ntfs_debug("Extent inode 0x%lx is attached to its base inode 0x%lx.",			mft_no, na.mft_no);	/* Take a reference to the extent ntfs inode. */	atomic_inc(&eni->count);	mutex_unlock(&ni->extent_lock);	/*	 * Found the extent inode coresponding to this extent mft record.	 * Try to take the mft record lock.	 */	if (unlikely(!mutex_trylock(&eni->mrec_lock))) {		atomic_dec(&eni->count);		iput(vi);		ntfs_debug("Extent mft record 0x%lx is already locked, do "				"not write it.", mft_no);		return false;	}	ntfs_debug("Managed to lock extent mft record 0x%lx, write it.",			mft_no);	if (NInoTestClearDirty(eni))		ntfs_debug("Extent inode 0x%lx is dirty, marking it clean.",				mft_no);	/*	 * The write has to occur while we hold the mft record lock so return	 * the locked extent ntfs inode.	 */	*locked_ni = eni;	return true;}static const char *es = "  Leaving inconsistent metadata.  Unmount and run "		"chkdsk.";/** * ntfs_mft_bitmap_find_and_alloc_free_rec_nolock - see name * @vol:	volume on which to search for a free mft record * @base_ni:	open base inode if allocating an extent mft record or NULL * * Search for a free mft record in the mft bitmap attribute on the ntfs volume * @vol. * * If @base_ni is NULL start the search at the default allocator position. * * If @base_ni is not NULL start the search at the mft record after the base * mft record @base_ni. * * Return the free mft record on success and -errno on error.  An error code of * -ENOSPC means that there are no free mft records in the currently * initialized mft bitmap. * * Locking: Caller must hold vol->mftbmp_lock for writing. */static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol,		ntfs_inode *base_ni){	s64 pass_end, ll, data_pos, pass_start, ofs, bit;	unsigned long flags;	struct address_space *mftbmp_mapping;	u8 *buf, *byte;	struct page *page;	unsigned int page_ofs, size;	u8 pass, b;	ntfs_debug("Searching for free mft record in the currently "			"initialized mft bitmap.");	mftbmp_mapping = vol->mftbmp_ino->i_mapping;	/*	 * Set the end of the pass making sure we do not overflow the mft	 * bitmap.	 */	read_lock_irqsave(&NTFS_I(vol->mft_ino)->size_lock, flags);
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -