📄 inode.c

📁 linux 内核源代码
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* * hugetlbpage-backed filesystem.  Based on ramfs. * * William Irwin, 2002 * * Copyright (C) 2002 Linus Torvalds. */#include <linux/module.h>#include <linux/thread_info.h>#include <asm/current.h>#include <linux/sched.h>		/* remove ASAP */#include <linux/fs.h>#include <linux/mount.h>#include <linux/file.h>#include <linux/kernel.h>#include <linux/writeback.h>#include <linux/pagemap.h>#include <linux/highmem.h>#include <linux/init.h>#include <linux/string.h>#include <linux/capability.h>#include <linux/ctype.h>#include <linux/backing-dev.h>#include <linux/hugetlb.h>#include <linux/pagevec.h>#include <linux/parser.h>#include <linux/mman.h>#include <linux/quotaops.h>#include <linux/slab.h>#include <linux/dnotify.h>#include <linux/statfs.h>#include <linux/security.h>#include <asm/uaccess.h>/* some random number */#define HUGETLBFS_MAGIC	0x958458f6static const struct super_operations hugetlbfs_ops;static const struct address_space_operations hugetlbfs_aops;const struct file_operations hugetlbfs_file_operations;static const struct inode_operations hugetlbfs_dir_inode_operations;static const struct inode_operations hugetlbfs_inode_operations;static struct backing_dev_info hugetlbfs_backing_dev_info = {	.ra_pages	= 0,	/* No readahead */	.capabilities	= BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,};int sysctl_hugetlb_shm_group;enum {	Opt_size, Opt_nr_inodes,	Opt_mode, Opt_uid, Opt_gid,	Opt_err,};static match_table_t tokens = {	{Opt_size,	"size=%s"},	{Opt_nr_inodes,	"nr_inodes=%s"},	{Opt_mode,	"mode=%o"},	{Opt_uid,	"uid=%u"},	{Opt_gid,	"gid=%u"},	{Opt_err,	NULL},};static void huge_pagevec_release(struct pagevec *pvec){	int i;	for (i = 0; i < pagevec_count(pvec); ++i)		put_page(pvec->pages[i]);	pagevec_reinit(pvec);}static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma){	struct inode *inode = file->f_path.dentry->d_inode;	loff_t len, vma_len;	int ret;	/*	 * vma address alignment (but not the pgoff alignment) has	 * already been checked by prepare_hugepage_range.  If you add	 * any error returns here, do so after setting VM_HUGETLB, so	 * is_vm_hugetlb_page tests below unmap_region go the right	 * way when do_mmap_pgoff unwinds (may be important on powerpc	 * and ia64).	 */	vma->vm_flags |= VM_HUGETLB | VM_RESERVED;	vma->vm_ops = &hugetlb_vm_ops;	if (vma->vm_pgoff & ~(HPAGE_MASK >> PAGE_SHIFT))		return -EINVAL;	vma_len = (loff_t)(vma->vm_end - vma->vm_start);	mutex_lock(&inode->i_mutex);	file_accessed(file);	ret = -ENOMEM;	len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);	if (vma->vm_flags & VM_MAYSHARE &&	    hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT),				  len >> HPAGE_SHIFT))		goto out;	ret = 0;	hugetlb_prefault_arch_hook(vma->vm_mm);	if (vma->vm_flags & VM_WRITE && inode->i_size < len)		inode->i_size = len;out:	mutex_unlock(&inode->i_mutex);	return ret;}/* * Called under down_write(mmap_sem). */#ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREAstatic unsigned longhugetlb_get_unmapped_area(struct file *file, unsigned long addr,		unsigned long len, unsigned long pgoff, unsigned long flags){	struct mm_struct *mm = current->mm;	struct vm_area_struct *vma;	unsigned long start_addr;	if (len & ~HPAGE_MASK)		return -EINVAL;	if (len > TASK_SIZE)		return -ENOMEM;	if (flags & MAP_FIXED) {		if (prepare_hugepage_range(addr, len))			return -EINVAL;		return addr;	}	if (addr) {		addr = ALIGN(addr, HPAGE_SIZE);		vma = find_vma(mm, addr);		if (TASK_SIZE - len >= addr &&		    (!vma || addr + len <= vma->vm_start))			return addr;	}	start_addr = mm->free_area_cache;	if (len <= mm->cached_hole_size)		start_addr = TASK_UNMAPPED_BASE;full_search:	addr = ALIGN(start_addr, HPAGE_SIZE);	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {		/* At this point:  (!vma || addr < vma->vm_end). */		if (TASK_SIZE - len < addr) {			/*			 * Start a new search - just in case we missed			 * some holes.			 */			if (start_addr != TASK_UNMAPPED_BASE) {				start_addr = TASK_UNMAPPED_BASE;				goto full_search;			}			return -ENOMEM;		}		if (!vma || addr + len <= vma->vm_start)			return addr;		addr = ALIGN(vma->vm_end, HPAGE_SIZE);	}}#endifstatic inthugetlbfs_read_actor(struct page *page, unsigned long offset,			char __user *buf, unsigned long count,			unsigned long size){	char *kaddr;	unsigned long left, copied = 0;	int i, chunksize;	if (size > count)		size = count;	/* Find which 4k chunk and offset with in that chunk */	i = offset >> PAGE_CACHE_SHIFT;	offset = offset & ~PAGE_CACHE_MASK;	while (size) {		chunksize = PAGE_CACHE_SIZE;		if (offset)			chunksize -= offset;		if (chunksize > size)			chunksize = size;		kaddr = kmap(&page[i]);		left = __copy_to_user(buf, kaddr + offset, chunksize);		kunmap(&page[i]);		if (left) {			copied += (chunksize - left);			break;		}		offset = 0;		size -= chunksize;		buf += chunksize;		copied += chunksize;		i++;	}	return copied ? copied : -EFAULT;}/* * Support for read() - Find the page attached to f_mapping and copy out the * data. Its *very* similar to do_generic_mapping_read(), we can't use that * since it has PAGE_CACHE_SIZE assumptions. */static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,			      size_t len, loff_t *ppos){	struct address_space *mapping = filp->f_mapping;	struct inode *inode = mapping->host;	unsigned long index = *ppos >> HPAGE_SHIFT;	unsigned long offset = *ppos & ~HPAGE_MASK;	unsigned long end_index;	loff_t isize;	ssize_t retval = 0;	mutex_lock(&inode->i_mutex);	/* validate length */	if (len == 0)		goto out;	isize = i_size_read(inode);	if (!isize)		goto out;	end_index = (isize - 1) >> HPAGE_SHIFT;	for (;;) {		struct page *page;		int nr, ret;		/* nr is the maximum number of bytes to copy from this page */		nr = HPAGE_SIZE;		if (index >= end_index) {			if (index > end_index)				goto out;			nr = ((isize - 1) & ~HPAGE_MASK) + 1;			if (nr <= offset) {				goto out;			}		}		nr = nr - offset;		/* Find the page */		page = find_get_page(mapping, index);		if (unlikely(page == NULL)) {			/*			 * We have a HOLE, zero out the user-buffer for the			 * length of the hole or request.			 */			ret = len < nr ? len : nr;			if (clear_user(buf, ret))				ret = -EFAULT;		} else {			/*			 * We have the page, copy it to user space buffer.			 */			ret = hugetlbfs_read_actor(page, offset, buf, len, nr);		}		if (ret < 0) {			if (retval == 0)				retval = ret;			if (page)				page_cache_release(page);			goto out;		}		offset += ret;		retval += ret;		len -= ret;		index += offset >> HPAGE_SHIFT;		offset &= ~HPAGE_MASK;		if (page)			page_cache_release(page);		/* short read or no more work */		if ((ret != nr) || (len == 0))			break;	}out:	*ppos = ((loff_t)index << HPAGE_SHIFT) + offset;	mutex_unlock(&inode->i_mutex);	return retval;}/* * Read a page. Again trivial. If it didn't already exist * in the page cache, it is zero-filled. */static int hugetlbfs_readpage(struct file *file, struct page * page){	unlock_page(page);	return -EINVAL;}static int hugetlbfs_write_begin(struct file *file,			struct address_space *mapping,			loff_t pos, unsigned len, unsigned flags,			struct page **pagep, void **fsdata){	return -EINVAL;}static int hugetlbfs_write_end(struct file *file, struct address_space *mapping,			loff_t pos, unsigned len, unsigned copied,			struct page *page, void *fsdata){	BUG();	return -EINVAL;}static void truncate_huge_page(struct page *page){	cancel_dirty_page(page, /* No IO accounting for huge pages? */0);	ClearPageUptodate(page);	remove_from_page_cache(page);	put_page(page);}static void truncate_hugepages(struct inode *inode, loff_t lstart){	struct address_space *mapping = &inode->i_data;	const pgoff_t start = lstart >> HPAGE_SHIFT;	struct pagevec pvec;	pgoff_t next;	int i, freed = 0;	pagevec_init(&pvec, 0);	next = start;	while (1) {		if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {			if (next == start)				break;			next = start;			continue;		}		for (i = 0; i < pagevec_count(&pvec); ++i) {			struct page *page = pvec.pages[i];			lock_page(page);			if (page->index > next)				next = page->index;			++next;			truncate_huge_page(page);			unlock_page(page);			freed++;		}		huge_pagevec_release(&pvec);	}	BUG_ON(!lstart && mapping->nrpages);	hugetlb_unreserve_pages(inode, start, freed);}static void hugetlbfs_delete_inode(struct inode *inode){	truncate_hugepages(inode, 0);	clear_inode(inode);}static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock){	struct super_block *sb = inode->i_sb;	if (!hlist_unhashed(&inode->i_hash)) {		if (!(inode->i_state & (I_DIRTY|I_SYNC)))			list_move(&inode->i_list, &inode_unused);		inodes_stat.nr_unused++;		if (!sb || (sb->s_flags & MS_ACTIVE)) {			spin_unlock(&inode_lock);			return;		}		inode->i_state |= I_WILL_FREE;		spin_unlock(&inode_lock);		/*		 * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK		 * in our backing_dev_info.		 */		write_inode_now(inode, 1);		spin_lock(&inode_lock);		inode->i_state &= ~I_WILL_FREE;		inodes_stat.nr_unused--;		hlist_del_init(&inode->i_hash);	}	list_del_init(&inode->i_list);	list_del_init(&inode->i_sb_list);	inode->i_state |= I_FREEING;	inodes_stat.nr_inodes--;	spin_unlock(&inode_lock);	truncate_hugepages(inode, 0);	clear_inode(inode);	destroy_inode(inode);}static void hugetlbfs_drop_inode(struct inode *inode){	if (!inode->i_nlink)		generic_delete_inode(inode);	else		hugetlbfs_forget_inode(inode);}static inline voidhugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff){	struct vm_area_struct *vma;	struct prio_tree_iter iter;	vma_prio_tree_foreach(vma, &iter, root, pgoff, ULONG_MAX) {		unsigned long v_offset;		/*		 * Can the expression below overflow on 32-bit arches?		 * No, because the prio_tree returns us only those vmas		 * which overlap the truncated area starting at pgoff,		 * and no vma on a 32-bit arch can span beyond the 4GB.		 */		if (vma->vm_pgoff < pgoff)			v_offset = (pgoff - vma->vm_pgoff) << PAGE_SHIFT;		else			v_offset = 0;		__unmap_hugepage_range(vma,				vma->vm_start + v_offset, vma->vm_end);	}}static int hugetlb_vmtruncate(struct inode *inode, loff_t offset){	pgoff_t pgoff;	struct address_space *mapping = inode->i_mapping;	BUG_ON(offset & ~HPAGE_MASK);	pgoff = offset >> PAGE_SHIFT;	i_size_write(inode, offset);	spin_lock(&mapping->i_mmap_lock);	if (!prio_tree_empty(&mapping->i_mmap))		hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);	spin_unlock(&mapping->i_mmap_lock);	truncate_hugepages(inode, offset);	return 0;}static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr){	struct inode *inode = dentry->d_inode;	int error;	unsigned int ia_valid = attr->ia_valid;	BUG_ON(!inode);	error = inode_change_ok(inode, attr);	if (error)		goto out;	if (ia_valid & ATTR_SIZE) {		error = -EINVAL;		if (!(attr->ia_size & ~HPAGE_MASK))			error = hugetlb_vmtruncate(inode, attr->ia_size);		if (error)			goto out;		attr->ia_valid &= ~ATTR_SIZE;	}	error = inode_setattr(inode, attr);out:	return error;}static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, 					gid_t gid, int mode, dev_t dev){	struct inode *inode;	inode = new_inode(sb);	if (inode) {		struct hugetlbfs_inode_info *info;		inode->i_mode = mode;		inode->i_uid = uid;		inode->i_gid = gid;		inode->i_blocks = 0;		inode->i_mapping->a_ops = &hugetlbfs_aops;		inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;		INIT_LIST_HEAD(&inode->i_mapping->private_list);		info = HUGETLBFS_I(inode);		mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, NULL);
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -