📄 file.c

📁 linux 内核源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/* * file.c - NTFS kernel file operations.  Part of the Linux-NTFS project. * * Copyright (c) 2001-2007 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published * by the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program/include file is distributed in the hope that it will be * useful, but WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program (in the main directory of the Linux-NTFS * distribution in the file COPYING); if not, write to the Free Software * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */#include <linux/buffer_head.h>#include <linux/pagemap.h>#include <linux/pagevec.h>#include <linux/sched.h>#include <linux/swap.h>#include <linux/uio.h>#include <linux/writeback.h>#include <asm/page.h>#include <asm/uaccess.h>#include "attrib.h"#include "bitmap.h"#include "inode.h"#include "debug.h"#include "lcnalloc.h"#include "malloc.h"#include "mft.h"#include "ntfs.h"/** * ntfs_file_open - called when an inode is about to be opened * @vi:		inode to be opened * @filp:	file structure describing the inode * * Limit file size to the page cache limit on architectures where unsigned long * is 32-bits. This is the most we can do for now without overflowing the page * cache page index. Doing it this way means we don't run into problems because * of existing too large files. It would be better to allow the user to read * the beginning of the file but I doubt very much anyone is going to hit this * check on a 32-bit architecture, so there is no point in adding the extra * complexity required to support this. * * On 64-bit architectures, the check is hopefully optimized away by the * compiler. * * After the check passes, just call generic_file_open() to do its work. */static int ntfs_file_open(struct inode *vi, struct file *filp){	if (sizeof(unsigned long) < 8) {		if (i_size_read(vi) > MAX_LFS_FILESIZE)			return -EOVERFLOW;	}	return generic_file_open(vi, filp);}#ifdef NTFS_RW/** * ntfs_attr_extend_initialized - extend the initialized size of an attribute * @ni:			ntfs inode of the attribute to extend * @new_init_size:	requested new initialized size in bytes * @cached_page:	store any allocated but unused page here * @lru_pvec:		lru-buffering pagevec of the caller * * Extend the initialized size of an attribute described by the ntfs inode @ni * to @new_init_size bytes.  This involves zeroing any non-sparse space between * the old initialized size and @new_init_size both in the page cache and on * disk (if relevant complete pages are already uptodate in the page cache then * these are simply marked dirty). * * As a side-effect, the file size (vfs inode->i_size) may be incremented as, * in the resident attribute case, it is tied to the initialized size and, in * the non-resident attribute case, it may not fall below the initialized size. * * Note that if the attribute is resident, we do not need to touch the page * cache at all.  This is because if the page cache page is not uptodate we * bring it uptodate later, when doing the write to the mft record since we * then already have the page mapped.  And if the page is uptodate, the * non-initialized region will already have been zeroed when the page was * brought uptodate and the region may in fact already have been overwritten * with new data via mmap() based writes, so we cannot just zero it.  And since * POSIX specifies that the behaviour of resizing a file whilst it is mmap()ped * is unspecified, we choose not to do zeroing and thus we do not need to touch * the page at all.  For a more detailed explanation see ntfs_truncate() in * fs/ntfs/inode.c. * * @cached_page and @lru_pvec are just optimizations for dealing with multiple * pages. * * Return 0 on success and -errno on error.  In the case that an error is * encountered it is possible that the initialized size will already have been * incremented some way towards @new_init_size but it is guaranteed that if * this is the case, the necessary zeroing will also have happened and that all * metadata is self-consistent. * * Locking: i_mutex on the vfs inode corrseponsind to the ntfs inode @ni must be *	    held by the caller. */static int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size,		struct page **cached_page, struct pagevec *lru_pvec){	s64 old_init_size;	loff_t old_i_size;	pgoff_t index, end_index;	unsigned long flags;	struct inode *vi = VFS_I(ni);	ntfs_inode *base_ni;	MFT_RECORD *m = NULL;	ATTR_RECORD *a;	ntfs_attr_search_ctx *ctx = NULL;	struct address_space *mapping;	struct page *page = NULL;	u8 *kattr;	int err;	u32 attr_len;	read_lock_irqsave(&ni->size_lock, flags);	old_init_size = ni->initialized_size;	old_i_size = i_size_read(vi);	BUG_ON(new_init_size > ni->allocated_size);	read_unlock_irqrestore(&ni->size_lock, flags);	ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, "			"old_initialized_size 0x%llx, "			"new_initialized_size 0x%llx, i_size 0x%llx.",			vi->i_ino, (unsigned)le32_to_cpu(ni->type),			(unsigned long long)old_init_size,			(unsigned long long)new_init_size, old_i_size);	if (!NInoAttr(ni))		base_ni = ni;	else		base_ni = ni->ext.base_ntfs_ino;	/* Use goto to reduce indentation and we need the label below anyway. */	if (NInoNonResident(ni))		goto do_non_resident_extend;	BUG_ON(old_init_size != old_i_size);	m = map_mft_record(base_ni);	if (IS_ERR(m)) {		err = PTR_ERR(m);		m = NULL;		goto err_out;	}	ctx = ntfs_attr_get_search_ctx(base_ni, m);	if (unlikely(!ctx)) {		err = -ENOMEM;		goto err_out;	}	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,			CASE_SENSITIVE, 0, NULL, 0, ctx);	if (unlikely(err)) {		if (err == -ENOENT)			err = -EIO;		goto err_out;	}	m = ctx->mrec;	a = ctx->attr;	BUG_ON(a->non_resident);	/* The total length of the attribute value. */	attr_len = le32_to_cpu(a->data.resident.value_length);	BUG_ON(old_i_size != (loff_t)attr_len);	/*	 * Do the zeroing in the mft record and update the attribute size in	 * the mft record.	 */	kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);	memset(kattr + attr_len, 0, new_init_size - attr_len);	a->data.resident.value_length = cpu_to_le32((u32)new_init_size);	/* Finally, update the sizes in the vfs and ntfs inodes. */	write_lock_irqsave(&ni->size_lock, flags);	i_size_write(vi, new_init_size);	ni->initialized_size = new_init_size;	write_unlock_irqrestore(&ni->size_lock, flags);	goto done;do_non_resident_extend:	/*	 * If the new initialized size @new_init_size exceeds the current file	 * size (vfs inode->i_size), we need to extend the file size to the	 * new initialized size.	 */	if (new_init_size > old_i_size) {		m = map_mft_record(base_ni);		if (IS_ERR(m)) {			err = PTR_ERR(m);			m = NULL;			goto err_out;		}		ctx = ntfs_attr_get_search_ctx(base_ni, m);		if (unlikely(!ctx)) {			err = -ENOMEM;			goto err_out;		}		err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,				CASE_SENSITIVE, 0, NULL, 0, ctx);		if (unlikely(err)) {			if (err == -ENOENT)				err = -EIO;			goto err_out;		}		m = ctx->mrec;		a = ctx->attr;		BUG_ON(!a->non_resident);		BUG_ON(old_i_size != (loff_t)				sle64_to_cpu(a->data.non_resident.data_size));		a->data.non_resident.data_size = cpu_to_sle64(new_init_size);		flush_dcache_mft_record_page(ctx->ntfs_ino);		mark_mft_record_dirty(ctx->ntfs_ino);		/* Update the file size in the vfs inode. */		i_size_write(vi, new_init_size);		ntfs_attr_put_search_ctx(ctx);		ctx = NULL;		unmap_mft_record(base_ni);		m = NULL;	}	mapping = vi->i_mapping;	index = old_init_size >> PAGE_CACHE_SHIFT;	end_index = (new_init_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;	do {		/*		 * Read the page.  If the page is not present, this will zero		 * the uninitialized regions for us.		 */		page = read_mapping_page(mapping, index, NULL);		if (IS_ERR(page)) {			err = PTR_ERR(page);			goto init_err_out;		}		if (unlikely(PageError(page))) {			page_cache_release(page);			err = -EIO;			goto init_err_out;		}		/*		 * Update the initialized size in the ntfs inode.  This is		 * enough to make ntfs_writepage() work.		 */		write_lock_irqsave(&ni->size_lock, flags);		ni->initialized_size = (s64)(index + 1) << PAGE_CACHE_SHIFT;		if (ni->initialized_size > new_init_size)			ni->initialized_size = new_init_size;		write_unlock_irqrestore(&ni->size_lock, flags);		/* Set the page dirty so it gets written out. */		set_page_dirty(page);		page_cache_release(page);		/*		 * Play nice with the vm and the rest of the system.  This is		 * very much needed as we can potentially be modifying the		 * initialised size from a very small value to a really huge		 * value, e.g.		 *	f = open(somefile, O_TRUNC);		 *	truncate(f, 10GiB);		 *	seek(f, 10GiB);		 *	write(f, 1);		 * And this would mean we would be marking dirty hundreds of		 * thousands of pages or as in the above example more than		 * two and a half million pages!		 *		 * TODO: For sparse pages could optimize this workload by using		 * the FsMisc / MiscFs page bit as a "PageIsSparse" bit.  This		 * would be set in readpage for sparse pages and here we would		 * not need to mark dirty any pages which have this bit set.		 * The only caveat is that we have to clear the bit everywhere		 * where we allocate any clusters that lie in the page or that		 * contain the page.		 *		 * TODO: An even greater optimization would be for us to only		 * call readpage() on pages which are not in sparse regions as		 * determined from the runlist.  This would greatly reduce the		 * number of pages we read and make dirty in the case of sparse		 * files.		 */		balance_dirty_pages_ratelimited(mapping);		cond_resched();	} while (++index < end_index);	read_lock_irqsave(&ni->size_lock, flags);	BUG_ON(ni->initialized_size != new_init_size);	read_unlock_irqrestore(&ni->size_lock, flags);	/* Now bring in sync the initialized_size in the mft record. */	m = map_mft_record(base_ni);	if (IS_ERR(m)) {		err = PTR_ERR(m);		m = NULL;		goto init_err_out;	}	ctx = ntfs_attr_get_search_ctx(base_ni, m);	if (unlikely(!ctx)) {		err = -ENOMEM;		goto init_err_out;	}	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,			CASE_SENSITIVE, 0, NULL, 0, ctx);	if (unlikely(err)) {		if (err == -ENOENT)			err = -EIO;		goto init_err_out;	}	m = ctx->mrec;	a = ctx->attr;	BUG_ON(!a->non_resident);	a->data.non_resident.initialized_size = cpu_to_sle64(new_init_size);done:	flush_dcache_mft_record_page(ctx->ntfs_ino);	mark_mft_record_dirty(ctx->ntfs_ino);	if (ctx)		ntfs_attr_put_search_ctx(ctx);	if (m)		unmap_mft_record(base_ni);	ntfs_debug("Done, initialized_size 0x%llx, i_size 0x%llx.",			(unsigned long long)new_init_size, i_size_read(vi));	return 0;init_err_out:	write_lock_irqsave(&ni->size_lock, flags);	ni->initialized_size = old_init_size;	write_unlock_irqrestore(&ni->size_lock, flags);err_out:	if (ctx)		ntfs_attr_put_search_ctx(ctx);	if (m)		unmap_mft_record(base_ni);	ntfs_debug("Failed.  Returning error code %i.", err);	return err;}/** * ntfs_fault_in_pages_readable - * * Fault a number of userspace pages into pagetables. * * Unlike include/linux/pagemap.h::fault_in_pages_readable(), this one copes * with more than two userspace pages as well as handling the single page case * elegantly. * * If you find this difficult to understand, then think of the while loop being * the following code, except that we do without the integer variable ret: * *	do { *		ret = __get_user(c, uaddr); *		uaddr += PAGE_SIZE; *	} while (!ret && uaddr < end); * * Note, the final __get_user() may well run out-of-bounds of the user buffer, * but _not_ out-of-bounds of the page the user buffer belongs to, and since * this is only a read and not a write, and since it is still in the same page, * it should not matter and this makes the code much simpler. */static inline void ntfs_fault_in_pages_readable(const char __user *uaddr,		int bytes){	const char __user *end;	volatile char c;	/* Set @end to the first byte outside the last page we care about. */	end = (const char __user*)PAGE_ALIGN((unsigned long)uaddr + bytes);	while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end))		;}/** * ntfs_fault_in_pages_readable_iovec - * * Same as ntfs_fault_in_pages_readable() but operates on an array of iovecs. */static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,		size_t iov_ofs, int bytes){	do {		const char __user *buf;		unsigned len;		buf = iov->iov_base + iov_ofs;		len = iov->iov_len - iov_ofs;		if (len > bytes)			len = bytes;		ntfs_fault_in_pages_readable(buf, len);		bytes -= len;		iov++;		iov_ofs = 0;	} while (bytes);}/** * __ntfs_grab_cache_pages - obtain a number of locked pages * @mapping:	address space mapping from which to obtain page cache pages * @index:	starting index in @mapping at which to begin obtaining pages * @nr_pages:	number of page cache pages to obtain * @pages:	array of pages in which to return the obtained page cache pages * @cached_page: allocated but as yet unused page * @lru_pvec:	lru-buffering pagevec of caller * * Obtain @nr_pages locked page cache pages from the mapping @maping and * starting at index @index. * * If a page is newly created, increment its refcount and add it to the * caller's lru-buffering pagevec @lru_pvec. * * This is the same as mm/filemap.c::__grab_cache_page(), except that @nr_pages * are obtained at once instead of just one page and that 0 is returned on * success and -errno on error. * * Note, the page locks are obtained in ascending page index order. */static inline int __ntfs_grab_cache_pages(struct address_space *mapping,		pgoff_t index, const unsigned nr_pages, struct page **pages,		struct page **cached_page, struct pagevec *lru_pvec){	int err, nr;	BUG_ON(!nr_pages);	err = nr = 0;	do {		pages[nr] = find_lock_page(mapping, index);		if (!pages[nr]) {			if (!*cached_page) {				*cached_page = page_cache_alloc(mapping);				if (unlikely(!*cached_page)) {					err = -ENOMEM;					goto err_out;				}			}			err = add_to_page_cache(*cached_page, mapping, index,					GFP_KERNEL);
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -