📄 llite_mmap.c
字号:
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * * Copyright (c) 2001-2003 Cluster File Systems, Inc. * * This file is part of Lustre, http://www.lustre.org. * * Lustre is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * Lustre is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Lustre; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */#ifndef AUTOCONF_INCLUDED#include <linux/config.h>#endif#include <linux/kernel.h>#include <linux/mm.h>#include <linux/string.h>#include <linux/stat.h>#include <linux/errno.h>#include <linux/smp_lock.h>#include <linux/unistd.h>#include <linux/version.h>#include <asm/system.h>#include <asm/uaccess.h>#include <linux/fs.h>#include <linux/stat.h>#include <asm/uaccess.h>#include <linux/mm.h>#include <linux/pagemap.h>#include <linux/smp_lock.h>#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))#include <linux/iobuf.h>#endif#define DEBUG_SUBSYSTEM S_LLITE#include <lustre_lite.h>#include "llite_internal.h"#include <linux/lustre_compat25.h>#define VMA_DEBUG(vma, fmt, arg...) \ CDEBUG(D_MMAP, "vma(%p) start(%ld) end(%ld) pgoff(%ld) inode(%p) " \ "ino(%lu) iname(%s): " fmt, vma, vma->vm_start, vma->vm_end, \ vma->vm_pgoff, vma->vm_file->f_dentry->d_inode, \ vma->vm_file->f_dentry->d_inode->i_ino, \ vma->vm_file->f_dentry->d_iname, ## arg); \struct ll_lock_tree_node { rb_node_t lt_node; struct list_head lt_locked_item; __u64 lt_oid; ldlm_policy_data_t lt_policy; struct lustre_handle lt_lockh; ldlm_mode_t lt_mode; struct inode *lt_inode;};int lt_get_mmap_locks(struct ll_lock_tree *tree, unsigned long addr, size_t count);#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address, int *type);#elsestruct page *ll_nopage(struct vm_area_struct *vma, unsigned long address, int unused);#endifstruct ll_lock_tree_node * ll_node_from_inode(struct inode *inode, __u64 start, __u64 end, ldlm_mode_t mode){ struct ll_lock_tree_node *node; OBD_ALLOC(node, sizeof(*node)); if (node == NULL) RETURN(ERR_PTR(-ENOMEM)); node->lt_inode = inode; node->lt_oid = ll_i2info(inode)->lli_smd->lsm_object_id; node->lt_policy.l_extent.start = start; node->lt_policy.l_extent.end = end; memset(&node->lt_lockh, 0, sizeof(node->lt_lockh)); INIT_LIST_HEAD(&node->lt_locked_item); node->lt_mode = mode; return node;}int lt_compare(struct ll_lock_tree_node *one, struct ll_lock_tree_node *two){ /* To avoid multiple fs deadlock */ if (one->lt_inode->i_sb->s_dev < two->lt_inode->i_sb->s_dev) return -1; if (one->lt_inode->i_sb->s_dev > two->lt_inode->i_sb->s_dev) return 1; if (one->lt_oid < two->lt_oid) return -1; if (one->lt_oid > two->lt_oid) return 1; if (one->lt_policy.l_extent.end < two->lt_policy.l_extent.start) return -1; if (one->lt_policy.l_extent.start > two->lt_policy.l_extent.end) return 1; return 0; /* they are the same object and overlap */}static void lt_merge(struct ll_lock_tree_node *dst, struct ll_lock_tree_node *src){ dst->lt_policy.l_extent.start = min(dst->lt_policy.l_extent.start, src->lt_policy.l_extent.start); dst->lt_policy.l_extent.end = max(dst->lt_policy.l_extent.end, src->lt_policy.l_extent.end); /* XXX could be a real call to the dlm to find superset modes */ if (src->lt_mode == LCK_PW && dst->lt_mode != LCK_PW) dst->lt_mode = LCK_PW;}static void lt_insert(struct ll_lock_tree *tree, struct ll_lock_tree_node *node){ struct ll_lock_tree_node *walk; rb_node_t **p, *parent; ENTRY;restart: p = &tree->lt_root.rb_node; parent = NULL; while (*p) { parent = *p; walk = rb_entry(parent, struct ll_lock_tree_node, lt_node); switch (lt_compare(node, walk)) { case -1: p = &(*p)->rb_left; break; case 1: p = &(*p)->rb_right; break; case 0: lt_merge(node, walk); rb_erase(&walk->lt_node, &tree->lt_root); OBD_FREE(walk, sizeof(*walk)); goto restart; break; default: LBUG(); break; } } rb_link_node(&node->lt_node, parent, p); rb_insert_color(&node->lt_node, &tree->lt_root); EXIT;}static struct ll_lock_tree_node *lt_least_node(struct ll_lock_tree *tree){ rb_node_t *rbnode; struct ll_lock_tree_node *node = NULL; for ( rbnode = tree->lt_root.rb_node; rbnode != NULL; rbnode = rbnode->rb_left) { if (rbnode->rb_left == NULL) { node = rb_entry(rbnode, struct ll_lock_tree_node, lt_node); break; } } RETURN(node);}int ll_tree_unlock(struct ll_lock_tree *tree){ struct ll_lock_tree_node *node; struct list_head *pos, *n; struct inode *inode; int rc = 0; ENTRY; list_for_each_safe(pos, n, &tree->lt_locked_list) { node = list_entry(pos, struct ll_lock_tree_node, lt_locked_item); inode = node->lt_inode; rc = ll_extent_unlock(tree->lt_fd, inode, ll_i2info(inode)->lli_smd, node->lt_mode, &node->lt_lockh); if (rc != 0) { /* XXX better message */ CERROR("couldn't unlock %d\n", rc); } list_del(&node->lt_locked_item); OBD_FREE(node, sizeof(*node)); } while ((node = lt_least_node(tree))) { rb_erase(&node->lt_node, &tree->lt_root); OBD_FREE(node, sizeof(*node)); } RETURN(rc);}int ll_tree_lock_iov(struct ll_lock_tree *tree, struct ll_lock_tree_node *first_node, const struct iovec *iov, unsigned long nr_segs, int ast_flags){ struct ll_lock_tree_node *node; int rc = 0; unsigned long seg; ENTRY; tree->lt_root.rb_node = NULL; INIT_LIST_HEAD(&tree->lt_locked_list); if (first_node != NULL) lt_insert(tree, first_node); /* To avoid such subtle deadlock case: client1 try to read file1 to * mmapped file2, on the same time, client2 try to read file2 to * mmapped file1.*/ for (seg = 0; seg < nr_segs; seg++) { const struct iovec *iv = &iov[seg]; rc = lt_get_mmap_locks(tree, (unsigned long)iv->iov_base, iv->iov_len); if (rc) GOTO(out, rc); } while ((node = lt_least_node(tree))) { struct inode *inode = node->lt_inode; rc = ll_extent_lock(tree->lt_fd, inode, ll_i2info(inode)->lli_smd, node->lt_mode, &node->lt_policy, &node->lt_lockh, ast_flags); if (rc != 0) GOTO(out, rc); rb_erase(&node->lt_node, &tree->lt_root); list_add_tail(&node->lt_locked_item, &tree->lt_locked_list); } RETURN(rc);out: ll_tree_unlock(tree); RETURN(rc);}int ll_tree_lock(struct ll_lock_tree *tree, struct ll_lock_tree_node *first_node, const char *buf, size_t count, int ast_flags){ struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count }; return ll_tree_lock_iov(tree, first_node, &local_iov, 1, ast_flags);}static ldlm_mode_t mode_from_vma(struct vm_area_struct *vma){ /* we only want to hold PW locks if the mmap() can generate * writes back to the file and that only happens in shared * writable vmas */ if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) return LCK_PW; return LCK_PR;}static void policy_from_vma(ldlm_policy_data_t *policy, struct vm_area_struct *vma, unsigned long addr, size_t count){ policy->l_extent.start = ((addr - vma->vm_start) & CFS_PAGE_MASK) + ((__u64)vma->vm_pgoff << CFS_PAGE_SHIFT); policy->l_extent.end = (policy->l_extent.start + count - 1) | ~CFS_PAGE_MASK;}static struct vm_area_struct * our_vma(unsigned long addr, size_t count){ struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *ret = NULL; ENTRY; /* No MM (e.g. NFS)? No vmas too. */ if (!mm) RETURN(NULL); spin_lock(&mm->page_table_lock); for(vma = find_vma(mm, addr); vma != NULL && vma->vm_start < (addr + count); vma = vma->vm_next) { if (vma->vm_ops && vma->vm_ops->nopage == ll_nopage && vma->vm_flags & VM_SHARED) { ret = vma; break; } } spin_unlock(&mm->page_table_lock); RETURN(ret);}int lt_get_mmap_locks(struct ll_lock_tree *tree, unsigned long addr, size_t count){ struct vm_area_struct *vma; struct ll_lock_tree_node *node; ldlm_policy_data_t policy; struct inode *inode; ENTRY; if (count == 0) RETURN(0); /* we need to look up vmas on page aligned addresses */ count += addr & (~CFS_PAGE_MASK); addr &= CFS_PAGE_MASK; while ((vma = our_vma(addr, count)) != NULL) { LASSERT(vma->vm_file); inode = vma->vm_file->f_dentry->d_inode; policy_from_vma(&policy, vma, addr, count);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -