📄 file.c
字号:
/* * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. * * Copyright (c) 2001-2007 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published * by the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program/include file is distributed in the hope that it will be * useful, but WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program (in the main directory of the Linux-NTFS * distribution in the file COPYING); if not, write to the Free Software * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */#include <linux/buffer_head.h>#include <linux/pagemap.h>#include <linux/pagevec.h>#include <linux/sched.h>#include <linux/swap.h>#include <linux/uio.h>#include <linux/writeback.h>#include <asm/page.h>#include <asm/uaccess.h>#include "attrib.h"#include "bitmap.h"#include "inode.h"#include "debug.h"#include "lcnalloc.h"#include "malloc.h"#include "mft.h"#include "ntfs.h"/** * ntfs_file_open - called when an inode is about to be opened * @vi: inode to be opened * @filp: file structure describing the inode * * Limit file size to the page cache limit on architectures where unsigned long * is 32-bits. This is the most we can do for now without overflowing the page * cache page index. Doing it this way means we don't run into problems because * of existing too large files. It would be better to allow the user to read * the beginning of the file but I doubt very much anyone is going to hit this * check on a 32-bit architecture, so there is no point in adding the extra * complexity required to support this. * * On 64-bit architectures, the check is hopefully optimized away by the * compiler. * * After the check passes, just call generic_file_open() to do its work. */static int ntfs_file_open(struct inode *vi, struct file *filp){ if (sizeof(unsigned long) < 8) { if (i_size_read(vi) > MAX_LFS_FILESIZE) return -EOVERFLOW; } return generic_file_open(vi, filp);}#ifdef NTFS_RW/** * ntfs_attr_extend_initialized - extend the initialized size of an attribute * @ni: ntfs inode of the attribute to extend * @new_init_size: requested new initialized size in bytes * @cached_page: store any allocated but unused page here * @lru_pvec: lru-buffering pagevec of the caller * * Extend the initialized size of an attribute described by the ntfs inode @ni * to @new_init_size bytes. This involves zeroing any non-sparse space between * the old initialized size and @new_init_size both in the page cache and on * disk (if relevant complete pages are already uptodate in the page cache then * these are simply marked dirty). * * As a side-effect, the file size (vfs inode->i_size) may be incremented as, * in the resident attribute case, it is tied to the initialized size and, in * the non-resident attribute case, it may not fall below the initialized size. * * Note that if the attribute is resident, we do not need to touch the page * cache at all. This is because if the page cache page is not uptodate we * bring it uptodate later, when doing the write to the mft record since we * then already have the page mapped. And if the page is uptodate, the * non-initialized region will already have been zeroed when the page was * brought uptodate and the region may in fact already have been overwritten * with new data via mmap() based writes, so we cannot just zero it. And since * POSIX specifies that the behaviour of resizing a file whilst it is mmap()ped * is unspecified, we choose not to do zeroing and thus we do not need to touch * the page at all. For a more detailed explanation see ntfs_truncate() in * fs/ntfs/inode.c. * * @cached_page and @lru_pvec are just optimizations for dealing with multiple * pages. * * Return 0 on success and -errno on error. In the case that an error is * encountered it is possible that the initialized size will already have been * incremented some way towards @new_init_size but it is guaranteed that if * this is the case, the necessary zeroing will also have happened and that all * metadata is self-consistent. * * Locking: i_mutex on the vfs inode corrseponsind to the ntfs inode @ni must be * held by the caller. */static int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size, struct page **cached_page, struct pagevec *lru_pvec){ s64 old_init_size; loff_t old_i_size; pgoff_t index, end_index; unsigned long flags; struct inode *vi = VFS_I(ni); ntfs_inode *base_ni; MFT_RECORD *m = NULL; ATTR_RECORD *a; ntfs_attr_search_ctx *ctx = NULL; struct address_space *mapping; struct page *page = NULL; u8 *kattr; int err; u32 attr_len; read_lock_irqsave(&ni->size_lock, flags); old_init_size = ni->initialized_size; old_i_size = i_size_read(vi); BUG_ON(new_init_size > ni->allocated_size); read_unlock_irqrestore(&ni->size_lock, flags); ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, " "old_initialized_size 0x%llx, " "new_initialized_size 0x%llx, i_size 0x%llx.", vi->i_ino, (unsigned)le32_to_cpu(ni->type), (unsigned long long)old_init_size, (unsigned long long)new_init_size, old_i_size); if (!NInoAttr(ni)) base_ni = ni; else base_ni = ni->ext.base_ntfs_ino; /* Use goto to reduce indentation and we need the label below anyway. */ if (NInoNonResident(ni)) goto do_non_resident_extend; BUG_ON(old_init_size != old_i_size); m = map_mft_record(base_ni); if (IS_ERR(m)) { err = PTR_ERR(m); m = NULL; goto err_out; } ctx = ntfs_attr_get_search_ctx(base_ni, m); if (unlikely(!ctx)) { err = -ENOMEM; goto err_out; } err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, CASE_SENSITIVE, 0, NULL, 0, ctx); if (unlikely(err)) { if (err == -ENOENT) err = -EIO; goto err_out; } m = ctx->mrec; a = ctx->attr; BUG_ON(a->non_resident); /* The total length of the attribute value. */ attr_len = le32_to_cpu(a->data.resident.value_length); BUG_ON(old_i_size != (loff_t)attr_len); /* * Do the zeroing in the mft record and update the attribute size in * the mft record. */ kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset); memset(kattr + attr_len, 0, new_init_size - attr_len); a->data.resident.value_length = cpu_to_le32((u32)new_init_size); /* Finally, update the sizes in the vfs and ntfs inodes. */ write_lock_irqsave(&ni->size_lock, flags); i_size_write(vi, new_init_size); ni->initialized_size = new_init_size; write_unlock_irqrestore(&ni->size_lock, flags); goto done;do_non_resident_extend: /* * If the new initialized size @new_init_size exceeds the current file * size (vfs inode->i_size), we need to extend the file size to the * new initialized size. */ if (new_init_size > old_i_size) { m = map_mft_record(base_ni); if (IS_ERR(m)) { err = PTR_ERR(m); m = NULL; goto err_out; } ctx = ntfs_attr_get_search_ctx(base_ni, m); if (unlikely(!ctx)) { err = -ENOMEM; goto err_out; } err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, CASE_SENSITIVE, 0, NULL, 0, ctx); if (unlikely(err)) { if (err == -ENOENT) err = -EIO; goto err_out; } m = ctx->mrec; a = ctx->attr; BUG_ON(!a->non_resident); BUG_ON(old_i_size != (loff_t) sle64_to_cpu(a->data.non_resident.data_size)); a->data.non_resident.data_size = cpu_to_sle64(new_init_size); flush_dcache_mft_record_page(ctx->ntfs_ino); mark_mft_record_dirty(ctx->ntfs_ino); /* Update the file size in the vfs inode. */ i_size_write(vi, new_init_size); ntfs_attr_put_search_ctx(ctx); ctx = NULL; unmap_mft_record(base_ni); m = NULL; } mapping = vi->i_mapping; index = old_init_size >> PAGE_CACHE_SHIFT; end_index = (new_init_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; do { /* * Read the page. If the page is not present, this will zero * the uninitialized regions for us. */ page = read_mapping_page(mapping, index, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto init_err_out; } if (unlikely(PageError(page))) { page_cache_release(page); err = -EIO; goto init_err_out; } /* * Update the initialized size in the ntfs inode. This is * enough to make ntfs_writepage() work. */ write_lock_irqsave(&ni->size_lock, flags); ni->initialized_size = (s64)(index + 1) << PAGE_CACHE_SHIFT; if (ni->initialized_size > new_init_size) ni->initialized_size = new_init_size; write_unlock_irqrestore(&ni->size_lock, flags); /* Set the page dirty so it gets written out. */ set_page_dirty(page); page_cache_release(page); /* * Play nice with the vm and the rest of the system. This is * very much needed as we can potentially be modifying the * initialised size from a very small value to a really huge * value, e.g. * f = open(somefile, O_TRUNC); * truncate(f, 10GiB); * seek(f, 10GiB); * write(f, 1); * And this would mean we would be marking dirty hundreds of * thousands of pages or as in the above example more than * two and a half million pages! * * TODO: For sparse pages could optimize this workload by using * the FsMisc / MiscFs page bit as a "PageIsSparse" bit. This * would be set in readpage for sparse pages and here we would * not need to mark dirty any pages which have this bit set. * The only caveat is that we have to clear the bit everywhere * where we allocate any clusters that lie in the page or that * contain the page. * * TODO: An even greater optimization would be for us to only * call readpage() on pages which are not in sparse regions as * determined from the runlist. This would greatly reduce the * number of pages we read and make dirty in the case of sparse * files. */ balance_dirty_pages_ratelimited(mapping); cond_resched(); } while (++index < end_index); read_lock_irqsave(&ni->size_lock, flags); BUG_ON(ni->initialized_size != new_init_size); read_unlock_irqrestore(&ni->size_lock, flags); /* Now bring in sync the initialized_size in the mft record. */ m = map_mft_record(base_ni); if (IS_ERR(m)) { err = PTR_ERR(m); m = NULL; goto init_err_out; } ctx = ntfs_attr_get_search_ctx(base_ni, m); if (unlikely(!ctx)) { err = -ENOMEM; goto init_err_out; } err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, CASE_SENSITIVE, 0, NULL, 0, ctx); if (unlikely(err)) { if (err == -ENOENT) err = -EIO; goto init_err_out; } m = ctx->mrec; a = ctx->attr; BUG_ON(!a->non_resident); a->data.non_resident.initialized_size = cpu_to_sle64(new_init_size);done: flush_dcache_mft_record_page(ctx->ntfs_ino); mark_mft_record_dirty(ctx->ntfs_ino); if (ctx) ntfs_attr_put_search_ctx(ctx); if (m) unmap_mft_record(base_ni); ntfs_debug("Done, initialized_size 0x%llx, i_size 0x%llx.", (unsigned long long)new_init_size, i_size_read(vi)); return 0;init_err_out: write_lock_irqsave(&ni->size_lock, flags); ni->initialized_size = old_init_size; write_unlock_irqrestore(&ni->size_lock, flags);err_out: if (ctx) ntfs_attr_put_search_ctx(ctx); if (m) unmap_mft_record(base_ni); ntfs_debug("Failed. Returning error code %i.", err); return err;}/** * ntfs_fault_in_pages_readable - * * Fault a number of userspace pages into pagetables. * * Unlike include/linux/pagemap.h::fault_in_pages_readable(), this one copes * with more than two userspace pages as well as handling the single page case * elegantly. * * If you find this difficult to understand, then think of the while loop being * the following code, except that we do without the integer variable ret: * * do { * ret = __get_user(c, uaddr); * uaddr += PAGE_SIZE; * } while (!ret && uaddr < end); * * Note, the final __get_user() may well run out-of-bounds of the user buffer, * but _not_ out-of-bounds of the page the user buffer belongs to, and since * this is only a read and not a write, and since it is still in the same page, * it should not matter and this makes the code much simpler. */static inline void ntfs_fault_in_pages_readable(const char __user *uaddr, int bytes){ const char __user *end; volatile char c; /* Set @end to the first byte outside the last page we care about. */ end = (const char __user*)PAGE_ALIGN((unsigned long)uaddr + bytes); while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end)) ;}/** * ntfs_fault_in_pages_readable_iovec - * * Same as ntfs_fault_in_pages_readable() but operates on an array of iovecs. */static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov, size_t iov_ofs, int bytes){ do { const char __user *buf; unsigned len; buf = iov->iov_base + iov_ofs; len = iov->iov_len - iov_ofs; if (len > bytes) len = bytes; ntfs_fault_in_pages_readable(buf, len); bytes -= len; iov++; iov_ofs = 0; } while (bytes);}/** * __ntfs_grab_cache_pages - obtain a number of locked pages * @mapping: address space mapping from which to obtain page cache pages * @index: starting index in @mapping at which to begin obtaining pages * @nr_pages: number of page cache pages to obtain * @pages: array of pages in which to return the obtained page cache pages * @cached_page: allocated but as yet unused page * @lru_pvec: lru-buffering pagevec of caller * * Obtain @nr_pages locked page cache pages from the mapping @maping and * starting at index @index. * * If a page is newly created, increment its refcount and add it to the * caller's lru-buffering pagevec @lru_pvec. * * This is the same as mm/filemap.c::__grab_cache_page(), except that @nr_pages * are obtained at once instead of just one page and that 0 is returned on * success and -errno on error. * * Note, the page locks are obtained in ascending page index order. */static inline int __ntfs_grab_cache_pages(struct address_space *mapping, pgoff_t index, const unsigned nr_pages, struct page **pages, struct page **cached_page, struct pagevec *lru_pvec){ int err, nr; BUG_ON(!nr_pages); err = nr = 0; do { pages[nr] = find_lock_page(mapping, index); if (!pages[nr]) { if (!*cached_page) { *cached_page = page_cache_alloc(mapping); if (unlikely(!*cached_page)) { err = -ENOMEM; goto err_out; } } err = add_to_page_cache(*cached_page, mapping, index, GFP_KERNEL);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -