📄 inode.c
字号:
/* * hugetlbpage-backed filesystem. Based on ramfs. * * William Irwin, 2002 * * Copyright (C) 2002 Linus Torvalds. */#include <linux/module.h>#include <linux/thread_info.h>#include <asm/current.h>#include <linux/sched.h> /* remove ASAP */#include <linux/fs.h>#include <linux/mount.h>#include <linux/file.h>#include <linux/kernel.h>#include <linux/writeback.h>#include <linux/pagemap.h>#include <linux/highmem.h>#include <linux/init.h>#include <linux/string.h>#include <linux/capability.h>#include <linux/ctype.h>#include <linux/backing-dev.h>#include <linux/hugetlb.h>#include <linux/pagevec.h>#include <linux/parser.h>#include <linux/mman.h>#include <linux/quotaops.h>#include <linux/slab.h>#include <linux/dnotify.h>#include <linux/statfs.h>#include <linux/security.h>#include <asm/uaccess.h>/* some random number */#define HUGETLBFS_MAGIC 0x958458f6static const struct super_operations hugetlbfs_ops;static const struct address_space_operations hugetlbfs_aops;const struct file_operations hugetlbfs_file_operations;static const struct inode_operations hugetlbfs_dir_inode_operations;static const struct inode_operations hugetlbfs_inode_operations;static struct backing_dev_info hugetlbfs_backing_dev_info = { .ra_pages = 0, /* No readahead */ .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,};int sysctl_hugetlb_shm_group;enum { Opt_size, Opt_nr_inodes, Opt_mode, Opt_uid, Opt_gid, Opt_err,};static match_table_t tokens = { {Opt_size, "size=%s"}, {Opt_nr_inodes, "nr_inodes=%s"}, {Opt_mode, "mode=%o"}, {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, {Opt_err, NULL},};static void huge_pagevec_release(struct pagevec *pvec){ int i; for (i = 0; i < pagevec_count(pvec); ++i) put_page(pvec->pages[i]); pagevec_reinit(pvec);}static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma){ struct inode *inode = file->f_path.dentry->d_inode; loff_t len, vma_len; int ret; /* * vma address alignment (but not the pgoff alignment) has * already been checked by prepare_hugepage_range. If you add * any error returns here, do so after setting VM_HUGETLB, so * is_vm_hugetlb_page tests below unmap_region go the right * way when do_mmap_pgoff unwinds (may be important on powerpc * and ia64). */ vma->vm_flags |= VM_HUGETLB | VM_RESERVED; vma->vm_ops = &hugetlb_vm_ops; if (vma->vm_pgoff & ~(HPAGE_MASK >> PAGE_SHIFT)) return -EINVAL; vma_len = (loff_t)(vma->vm_end - vma->vm_start); mutex_lock(&inode->i_mutex); file_accessed(file); ret = -ENOMEM; len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); if (vma->vm_flags & VM_MAYSHARE && hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT), len >> HPAGE_SHIFT)) goto out; ret = 0; hugetlb_prefault_arch_hook(vma->vm_mm); if (vma->vm_flags & VM_WRITE && inode->i_size < len) inode->i_size = len;out: mutex_unlock(&inode->i_mutex); return ret;}/* * Called under down_write(mmap_sem). */#ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREAstatic unsigned longhugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags){ struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long start_addr; if (len & ~HPAGE_MASK) return -EINVAL; if (len > TASK_SIZE) return -ENOMEM; if (flags & MAP_FIXED) { if (prepare_hugepage_range(addr, len)) return -EINVAL; return addr; } if (addr) { addr = ALIGN(addr, HPAGE_SIZE); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && (!vma || addr + len <= vma->vm_start)) return addr; } start_addr = mm->free_area_cache; if (len <= mm->cached_hole_size) start_addr = TASK_UNMAPPED_BASE;full_search: addr = ALIGN(start_addr, HPAGE_SIZE); for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { /* At this point: (!vma || addr < vma->vm_end). */ if (TASK_SIZE - len < addr) { /* * Start a new search - just in case we missed * some holes. */ if (start_addr != TASK_UNMAPPED_BASE) { start_addr = TASK_UNMAPPED_BASE; goto full_search; } return -ENOMEM; } if (!vma || addr + len <= vma->vm_start) return addr; addr = ALIGN(vma->vm_end, HPAGE_SIZE); }}#endifstatic inthugetlbfs_read_actor(struct page *page, unsigned long offset, char __user *buf, unsigned long count, unsigned long size){ char *kaddr; unsigned long left, copied = 0; int i, chunksize; if (size > count) size = count; /* Find which 4k chunk and offset with in that chunk */ i = offset >> PAGE_CACHE_SHIFT; offset = offset & ~PAGE_CACHE_MASK; while (size) { chunksize = PAGE_CACHE_SIZE; if (offset) chunksize -= offset; if (chunksize > size) chunksize = size; kaddr = kmap(&page[i]); left = __copy_to_user(buf, kaddr + offset, chunksize); kunmap(&page[i]); if (left) { copied += (chunksize - left); break; } offset = 0; size -= chunksize; buf += chunksize; copied += chunksize; i++; } return copied ? copied : -EFAULT;}/* * Support for read() - Find the page attached to f_mapping and copy out the * data. Its *very* similar to do_generic_mapping_read(), we can't use that * since it has PAGE_CACHE_SIZE assumptions. */static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos){ struct address_space *mapping = filp->f_mapping; struct inode *inode = mapping->host; unsigned long index = *ppos >> HPAGE_SHIFT; unsigned long offset = *ppos & ~HPAGE_MASK; unsigned long end_index; loff_t isize; ssize_t retval = 0; mutex_lock(&inode->i_mutex); /* validate length */ if (len == 0) goto out; isize = i_size_read(inode); if (!isize) goto out; end_index = (isize - 1) >> HPAGE_SHIFT; for (;;) { struct page *page; int nr, ret; /* nr is the maximum number of bytes to copy from this page */ nr = HPAGE_SIZE; if (index >= end_index) { if (index > end_index) goto out; nr = ((isize - 1) & ~HPAGE_MASK) + 1; if (nr <= offset) { goto out; } } nr = nr - offset; /* Find the page */ page = find_get_page(mapping, index); if (unlikely(page == NULL)) { /* * We have a HOLE, zero out the user-buffer for the * length of the hole or request. */ ret = len < nr ? len : nr; if (clear_user(buf, ret)) ret = -EFAULT; } else { /* * We have the page, copy it to user space buffer. */ ret = hugetlbfs_read_actor(page, offset, buf, len, nr); } if (ret < 0) { if (retval == 0) retval = ret; if (page) page_cache_release(page); goto out; } offset += ret; retval += ret; len -= ret; index += offset >> HPAGE_SHIFT; offset &= ~HPAGE_MASK; if (page) page_cache_release(page); /* short read or no more work */ if ((ret != nr) || (len == 0)) break; }out: *ppos = ((loff_t)index << HPAGE_SHIFT) + offset; mutex_unlock(&inode->i_mutex); return retval;}/* * Read a page. Again trivial. If it didn't already exist * in the page cache, it is zero-filled. */static int hugetlbfs_readpage(struct file *file, struct page * page){ unlock_page(page); return -EINVAL;}static int hugetlbfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata){ return -EINVAL;}static int hugetlbfs_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata){ BUG(); return -EINVAL;}static void truncate_huge_page(struct page *page){ cancel_dirty_page(page, /* No IO accounting for huge pages? */0); ClearPageUptodate(page); remove_from_page_cache(page); put_page(page);}static void truncate_hugepages(struct inode *inode, loff_t lstart){ struct address_space *mapping = &inode->i_data; const pgoff_t start = lstart >> HPAGE_SHIFT; struct pagevec pvec; pgoff_t next; int i, freed = 0; pagevec_init(&pvec, 0); next = start; while (1) { if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { if (next == start) break; next = start; continue; } for (i = 0; i < pagevec_count(&pvec); ++i) { struct page *page = pvec.pages[i]; lock_page(page); if (page->index > next) next = page->index; ++next; truncate_huge_page(page); unlock_page(page); freed++; } huge_pagevec_release(&pvec); } BUG_ON(!lstart && mapping->nrpages); hugetlb_unreserve_pages(inode, start, freed);}static void hugetlbfs_delete_inode(struct inode *inode){ truncate_hugepages(inode, 0); clear_inode(inode);}static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock){ struct super_block *sb = inode->i_sb; if (!hlist_unhashed(&inode->i_hash)) { if (!(inode->i_state & (I_DIRTY|I_SYNC))) list_move(&inode->i_list, &inode_unused); inodes_stat.nr_unused++; if (!sb || (sb->s_flags & MS_ACTIVE)) { spin_unlock(&inode_lock); return; } inode->i_state |= I_WILL_FREE; spin_unlock(&inode_lock); /* * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK * in our backing_dev_info. */ write_inode_now(inode, 1); spin_lock(&inode_lock); inode->i_state &= ~I_WILL_FREE; inodes_stat.nr_unused--; hlist_del_init(&inode->i_hash); } list_del_init(&inode->i_list); list_del_init(&inode->i_sb_list); inode->i_state |= I_FREEING; inodes_stat.nr_inodes--; spin_unlock(&inode_lock); truncate_hugepages(inode, 0); clear_inode(inode); destroy_inode(inode);}static void hugetlbfs_drop_inode(struct inode *inode){ if (!inode->i_nlink) generic_delete_inode(inode); else hugetlbfs_forget_inode(inode);}static inline voidhugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff){ struct vm_area_struct *vma; struct prio_tree_iter iter; vma_prio_tree_foreach(vma, &iter, root, pgoff, ULONG_MAX) { unsigned long v_offset; /* * Can the expression below overflow on 32-bit arches? * No, because the prio_tree returns us only those vmas * which overlap the truncated area starting at pgoff, * and no vma on a 32-bit arch can span beyond the 4GB. */ if (vma->vm_pgoff < pgoff) v_offset = (pgoff - vma->vm_pgoff) << PAGE_SHIFT; else v_offset = 0; __unmap_hugepage_range(vma, vma->vm_start + v_offset, vma->vm_end); }}static int hugetlb_vmtruncate(struct inode *inode, loff_t offset){ pgoff_t pgoff; struct address_space *mapping = inode->i_mapping; BUG_ON(offset & ~HPAGE_MASK); pgoff = offset >> PAGE_SHIFT; i_size_write(inode, offset); spin_lock(&mapping->i_mmap_lock); if (!prio_tree_empty(&mapping->i_mmap)) hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); spin_unlock(&mapping->i_mmap_lock); truncate_hugepages(inode, offset); return 0;}static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr){ struct inode *inode = dentry->d_inode; int error; unsigned int ia_valid = attr->ia_valid; BUG_ON(!inode); error = inode_change_ok(inode, attr); if (error) goto out; if (ia_valid & ATTR_SIZE) { error = -EINVAL; if (!(attr->ia_size & ~HPAGE_MASK)) error = hugetlb_vmtruncate(inode, attr->ia_size); if (error) goto out; attr->ia_valid &= ~ATTR_SIZE; } error = inode_setattr(inode, attr);out: return error;}static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, gid_t gid, int mode, dev_t dev){ struct inode *inode; inode = new_inode(sb); if (inode) { struct hugetlbfs_inode_info *info; inode->i_mode = mode; inode->i_uid = uid; inode->i_gid = gid; inode->i_blocks = 0; inode->i_mapping->a_ops = &hugetlbfs_aops; inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; INIT_LIST_HEAD(&inode->i_mapping->private_list); info = HUGETLBFS_I(inode); mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, NULL);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -