📄 filemap.c
字号:
address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address && (address < end)); return error;}int filemap_sync(struct vm_area_struct * vma, unsigned long address, size_t size, unsigned int flags){ pgd_t * dir; unsigned long end = address + size; int error = 0; /* Aquire the lock early; it may be possible to avoid dropping * and reaquiring it repeatedly. */ spin_lock(&vma->vm_mm->page_table_lock); dir = pgd_offset(vma->vm_mm, address); flush_cache_range(vma->vm_mm, end - size, end); if (address >= end) BUG(); do { error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags); address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (address && (address < end)); flush_tlb_range(vma->vm_mm, end - size, end); spin_unlock(&vma->vm_mm->page_table_lock); return error;}static struct vm_operations_struct generic_file_vm_ops = { nopage: filemap_nopage,};/* This is used for a general mmap of a disk file */int generic_file_mmap(struct file * file, struct vm_area_struct * vma){ struct address_space *mapping = file->f_dentry->d_inode->i_mapping; struct inode *inode = mapping->host; if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) { if (!mapping->a_ops->writepage) return -EINVAL; } if (!mapping->a_ops->readpage) return -ENOEXEC; UPDATE_ATIME(inode); vma->vm_ops = &generic_file_vm_ops; return 0;}/* * The msync() system call. *//* * MS_SYNC syncs the entire file - including mappings. * * MS_ASYNC initiates writeout of just the dirty mapped data. * This provides no guarantee of file integrity - things like indirect * blocks may not have started writeout. MS_ASYNC is primarily useful * where the application knows that it has finished with the data and * wishes to intelligently schedule its own I/O traffic. */static int msync_interval(struct vm_area_struct * vma, unsigned long start, unsigned long end, int flags){ int ret = 0; struct file * file = vma->vm_file; if (file && (vma->vm_flags & VM_SHARED)) { ret = filemap_sync(vma, start, end-start, flags); if (!ret && (flags & (MS_SYNC|MS_ASYNC))) { struct inode * inode = file->f_dentry->d_inode; down(&inode->i_sem); ret = filemap_fdatasync(inode->i_mapping); if (flags & MS_SYNC) { int err; if (file->f_op && file->f_op->fsync) { err = file->f_op->fsync(file, file->f_dentry, 1); if (err && !ret) ret = err; } err = filemap_fdatawait(inode->i_mapping); if (err && !ret) ret = err; } up(&inode->i_sem); } } return ret;}asmlinkage long sys_msync(unsigned long start, size_t len, int flags){ unsigned long end; struct vm_area_struct * vma; int unmapped_error, error = -EINVAL; down_read(¤t->mm->mmap_sem); if (start & ~PAGE_MASK) goto out; len = (len + ~PAGE_MASK) & PAGE_MASK; end = start + len; if (end < start) goto out; if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC)) goto out; error = 0; if (end == start) goto out; /* * If the interval [start,end) covers some unmapped address ranges, * just ignore them, but return -EFAULT at the end. */ vma = find_vma(current->mm, start); unmapped_error = 0; for (;;) { /* Still start < end. */ error = -EFAULT; if (!vma) goto out; /* Here start < vma->vm_end. */ if (start < vma->vm_start) { unmapped_error = -EFAULT; start = vma->vm_start; } /* Here vma->vm_start <= start < vma->vm_end. */ if (end <= vma->vm_end) { if (start < end) { error = msync_interval(vma, start, end, flags); if (error) goto out; } error = unmapped_error; goto out; } /* Here vma->vm_start <= start < vma->vm_end < end. */ error = msync_interval(vma, start, vma->vm_end, flags); if (error) goto out; start = vma->vm_end; vma = vma->vm_next; }out: up_read(¤t->mm->mmap_sem); return error;}static inline void setup_read_behavior(struct vm_area_struct * vma, int behavior){ VM_ClearReadHint(vma); switch(behavior) { case MADV_SEQUENTIAL: vma->vm_flags |= VM_SEQ_READ; break; case MADV_RANDOM: vma->vm_flags |= VM_RAND_READ; break; default: break; } return;}static long madvise_fixup_start(struct vm_area_struct * vma, unsigned long end, int behavior){ struct vm_area_struct * n; struct mm_struct * mm = vma->vm_mm; n = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); if (!n) return -EAGAIN; *n = *vma; n->vm_end = end; setup_read_behavior(n, behavior); n->vm_raend = 0; if (n->vm_file) get_file(n->vm_file); if (n->vm_ops && n->vm_ops->open) n->vm_ops->open(n); vma->vm_pgoff += (end - vma->vm_start) >> PAGE_SHIFT; lock_vma_mappings(vma); spin_lock(&mm->page_table_lock); vma->vm_start = end; __insert_vm_struct(mm, n); spin_unlock(&mm->page_table_lock); unlock_vma_mappings(vma); return 0;}static long madvise_fixup_end(struct vm_area_struct * vma, unsigned long start, int behavior){ struct vm_area_struct * n; struct mm_struct * mm = vma->vm_mm; n = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); if (!n) return -EAGAIN; *n = *vma; n->vm_start = start; n->vm_pgoff += (n->vm_start - vma->vm_start) >> PAGE_SHIFT; setup_read_behavior(n, behavior); n->vm_raend = 0; if (n->vm_file) get_file(n->vm_file); if (n->vm_ops && n->vm_ops->open) n->vm_ops->open(n); lock_vma_mappings(vma); spin_lock(&mm->page_table_lock); vma->vm_end = start; __insert_vm_struct(mm, n); spin_unlock(&mm->page_table_lock); unlock_vma_mappings(vma); return 0;}static long madvise_fixup_middle(struct vm_area_struct * vma, unsigned long start, unsigned long end, int behavior){ struct vm_area_struct * left, * right; struct mm_struct * mm = vma->vm_mm; left = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); if (!left) return -EAGAIN; right = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); if (!right) { kmem_cache_free(vm_area_cachep, left); return -EAGAIN; } *left = *vma; *right = *vma; left->vm_end = start; right->vm_start = end; right->vm_pgoff += (right->vm_start - left->vm_start) >> PAGE_SHIFT; left->vm_raend = 0; right->vm_raend = 0; if (vma->vm_file) atomic_add(2, &vma->vm_file->f_count); if (vma->vm_ops && vma->vm_ops->open) { vma->vm_ops->open(left); vma->vm_ops->open(right); } vma->vm_pgoff += (start - vma->vm_start) >> PAGE_SHIFT; vma->vm_raend = 0; lock_vma_mappings(vma); spin_lock(&mm->page_table_lock); vma->vm_start = start; vma->vm_end = end; setup_read_behavior(vma, behavior); __insert_vm_struct(mm, left); __insert_vm_struct(mm, right); spin_unlock(&mm->page_table_lock); unlock_vma_mappings(vma); return 0;}/* * We can potentially split a vm area into separate * areas, each area with its own behavior. */static long madvise_behavior(struct vm_area_struct * vma, unsigned long start, unsigned long end, int behavior){ int error = 0; /* This caps the number of vma's this process can own */ if (vma->vm_mm->map_count > MAX_MAP_COUNT) return -ENOMEM; if (start == vma->vm_start) { if (end == vma->vm_end) { setup_read_behavior(vma, behavior); vma->vm_raend = 0; } else error = madvise_fixup_start(vma, end, behavior); } else { if (end == vma->vm_end) error = madvise_fixup_end(vma, start, behavior); else error = madvise_fixup_middle(vma, start, end, behavior); } return error;}/* * Schedule all required I/O operations, then run the disk queue * to make sure they are started. Do not wait for completion. */static long madvise_willneed(struct vm_area_struct * vma, unsigned long start, unsigned long end){ long error = -EBADF; struct file * file; unsigned long size, rlim_rss; /* Doesn't work if there's no mapped file. */ if (!vma->vm_file) return error; file = vma->vm_file; size = (file->f_dentry->d_inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; if (end > vma->vm_end) end = vma->vm_end; end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; /* Make sure this doesn't exceed the process's max rss. */ error = -EIO; rlim_rss = current->rlim ? current->rlim[RLIMIT_RSS].rlim_cur : LONG_MAX; /* default: see resource.h */ if ((vma->vm_mm->rss + (end - start)) > rlim_rss) return error; /* round to cluster boundaries if this isn't a "random" area. */ if (!VM_RandomReadHint(vma)) { start = CLUSTER_OFFSET(start); end = CLUSTER_OFFSET(end + CLUSTER_PAGES - 1); while ((start < end) && (start < size)) { error = read_cluster_nonblocking(file, start, size); start += CLUSTER_PAGES; if (error < 0) break; } } else { while ((start < end) && (start < size)) { error = page_cache_read(file, start); start++; if (error < 0) break; } } /* Don't wait for someone else to push these requests. */ run_task_queue(&tq_disk); return error;}/* * Application no longer needs these pages. If the pages are dirty, * it's OK to just throw them away. The app will be more careful about * data it wants to keep. Be sure to free swap resources too. The * zap_page_range call sets things up for refill_inactive to actually free * these pages later if no one else has touched them in the meantime, * although we could add these pages to a global reuse list for * refill_inactive to pick up before reclaiming other pages. * * NB: This interface discards data rather than pushes it out to swap, * as some implementations do. This has performance implications for * applications like large transactional databases which want to discard * pages in anonymous maps after committing to backing store the data * that was kept in them. There is no reason to write this data out to * the swap area if the application is discarding it. * * An interface that causes the system to free clean pages and flush * dirty pages is already available as msync(MS_INVALIDATE). */static long madvise_dontneed(struct vm_area_struct * vma, unsigned long start, unsigned long end){ if (vma->vm_flags & VM_LOCKED) return -EINVAL; zap_page_range(vma->vm_mm, start, end - start); return 0;}static long madvise_vma(struct vm_area_struct * vma, unsigned long start, unsigned long end, int behavior){ long error = -EBADF; switch (behavior) { case MADV_NORMAL: case MADV_SEQUENTIAL: case MADV_RANDOM: error = madvise_behavior(vma, start, end, behavior); break; case MADV_WILLNEED: error = madvise_willneed(vma, start, end); break; case MADV_DONTNEED: error = madvise_dontneed(vma, start, end); break; default: error = -EINVAL; break; } return error;}/* * The madvise(2) system call. * * Applications can use madvise() to advise the kernel how it should * handle paging I/O in this VM area. The idea is to help the kernel * use appropriate read-ahead and caching techniques. The information * provided is advisory only, and can be safely disregarded by the * kernel without affecting the correct operation of the application. * * behavior values: * MADV_NORMAL - the default behavior is to read clusters. This * results in some read-ahead and read-behind. * MADV_RANDOM - the system should read the minimum amount of data * on any access, since it is unlikely that the appli- * cation will need more than what it asks for. * MADV_SEQUENTIAL - pages in the given range will probably be accessed * once, so they can be aggressively read ahead, and * can be freed soon after they are accessed. * MADV_WILLNEED - the application is notifying the system to read * some pages ahead. * MADV_DONTNEED - the application is finished with the given range, * so the kernel can free resources associated with it. * * return values: * zero - success * -EINVAL - start + len < 0, start is not page-aligned, * "behavior" is not a valid value, or application * is attempting to release locked or shared pages. * -ENOMEM - addresses in the specified range are not currently * mapped, or are outside the AS of the process. * -EIO - an I/O error occurred while paging in data. * -EBADF - map exists, but area maps something that isn't a file. * -EAGAIN - a kernel resource was temporarily unavailable. */asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior){ unsigned long end; struct vm_area_struct * vma; int unmapped_error = 0; int error = -EINVAL; down_write(¤t->mm->mmap_sem); if (start & ~PAGE_MASK) goto out; len = (len + ~PAGE_MASK) & PAGE_MASK; end = start + len; if (end < start) goto out; error = 0; if (end == start) goto out; /* * If the interval [start,end) covers some unmapped address * ranges, just ignore them, but return -ENOMEM at the end. */ vma = find_vma(current->mm, start); for (;;) { /* Still start < end. */ error = -ENOMEM; if (!vma) goto out; /* Here start < vma->vm_end. */ if (start < vma->vm_start) { unmapped_error = -ENOMEM; start = vma->vm_start; } /* Here vma->vm_start <= start < vma->vm_end. */ if (end <= vma->vm_end) { if (start < end) { error = madvise_vma(vma, start, end, behavior); if (error) goto out; } error = unmapped_error; goto out; } /* Here vma->vm_start <= start < vma->vm_end < end. */ error = madvise_vma(vma, start, vma->vm_end, behavior); if (error) goto out; start = vma->vm_end; vma = vma->vm_next; }out: up_write(¤t->mm->mmap_sem); return error;}/* * Later we can get more picky about what "in core" means precisely. * For now, simply check to see if the page is in the page cache, * and is up to date; i.e. that no page-in operation would be required * at this time if an application were to map and access this page. */static unsigned char mincore_page(struct vm_area_struct * vma,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -