📄 mmap.c

📁 ocfs1.2.7 源码
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * mmap.c * * Code to deal with the mess that is clustered mmap. * * Copyright (C) 2002, 2004 Oracle.  All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 021110-1307, USA. */#include <linux/sched.h>#include <linux/fs.h>#include <linux/types.h>#include <linux/slab.h>#include <linux/highmem.h>#include <linux/pagemap.h>#include <linux/uio.h>#include <linux/signal.h>#include <linux/rbtree.h>#define MLOG_MASK_PREFIX ML_FILE_IO#include <cluster/masklog.h>#include "ocfs2.h"#include "alloc.h"#include "dlmglue.h"#include "file.h"#include "inode.h"#include "journal.h"#include "mmap.h"#include "buffer_head_io.h"static inline u64 ocfs2_binode_blkno(struct ocfs2_backing_inode *binode);static inline struct rb_node * __ocfs2_buffer_lock_ctxt_root(	struct ocfs2_buffer_lock_ctxt *ctxt);static int ocfs2_buffer_lock_ctxt_insert(struct ocfs2_buffer_lock_ctxt *ctxt,					 struct inode *inode,					 struct ocfs2_backing_inode **binode_ret);static int ocfs2_fill_ctxt_from_buf(struct super_block *sb,				    struct inode *target_inode,				    char __user *buf,				    size_t size,				    struct ocfs2_buffer_lock_ctxt *ctxt);static struct page *ocfs2_nopage(struct vm_area_struct * area,				 unsigned long address,				 int *type){	int status, tmpstat, locked;	struct inode *inode = area->vm_file->f_dentry->d_inode;	struct page *page;	sigset_t blocked, oldset;	DECLARE_IO_MARKER(io_marker);	mlog_entry("(inode %lu, address %lu)\n", inode->i_ino,		   address);	locked = ocfs2_is_in_io_marker_list(inode, current);	if (!locked) {		/* For lack of a better error... Unfortunately returns		 * from nopage aren't very expressive right now. */		page = NOPAGE_SIGBUS;		/* The best way to deal with signals in this path is		 * to block them upfront, rather than allowing the		 * locking paths to return -ERESTARTSYS. */		sigfillset(&blocked);		/* We should technically never get a bad status return		 * from sigprocmask */		status = sigprocmask(SIG_BLOCK, &blocked, &oldset);		if (status < 0) {			mlog_errno(status);			goto bail;		}		/* Since we don't allow shared writable, we need only		 * worry about read locking here. */		status = ocfs2_meta_lock(inode, NULL, NULL, 0);		if (status < 0) {			mlog_errno(status);			if (status == -ENOMEM)				page = NOPAGE_OOM;			goto bail_setmask;		}		status = ocfs2_data_lock(inode, 0);		if (status < 0) {			mlog_errno(status);			if (status == -ENOMEM)				page = NOPAGE_OOM;			goto bail_unlock;		}		tmpstat = sigprocmask(SIG_SETMASK, &oldset, NULL);		if (tmpstat < 0)			mlog_errno(tmpstat);		/* I'm not sure if we can somehow recurse back into		 * nopage or not, but this doesn't cost us anything,		 * so lets do it for now. */		ocfs2_add_io_marker(inode, &io_marker);	}	page = filemap_nopage(area, address, type);	if (!locked) {		ocfs2_del_io_marker(inode, &io_marker);		ocfs2_data_unlock(inode, 0);		ocfs2_meta_unlock(inode, 0);	}bail:	mlog_exit_ptr(page);	return page;bail_unlock:	ocfs2_meta_unlock(inode, 0);bail_setmask:	tmpstat = sigprocmask(SIG_SETMASK, &oldset, NULL);	if (tmpstat < 0)		mlog_errno(tmpstat);	mlog_exit_ptr(page);	return page;}static struct vm_operations_struct ocfs2_file_vm_ops = {	.nopage = ocfs2_nopage,};int ocfs2_mmap(struct file *file,	       struct vm_area_struct *vma){	struct ocfs2_super *osb = OCFS2_SB(file->f_dentry->d_inode->i_sb);	/* We don't want to support shared writable mappings yet. */	if (!ocfs2_mount_local(osb) &&	    ((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE))	    && ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) {		mlog(0, "disallow shared writable mmaps %lx\n", vma->vm_flags);		/* This is -EINVAL because generic_file_readonly_mmap		 * returns it in a similar situation. */		return -EINVAL;	}	file_accessed(file);	vma->vm_ops = &ocfs2_file_vm_ops;	return 0;}static inline u64 ocfs2_binode_blkno(struct ocfs2_backing_inode *binode){	struct inode *inode = binode->ba_inode;	BUG_ON(!inode);	return OCFS2_I(inode)->ip_blkno;}static inline struct rb_node * __ocfs2_buffer_lock_ctxt_root(	struct ocfs2_buffer_lock_ctxt *ctxt){	return ctxt->b_inodes.rb_node;}static int ocfs2_buffer_lock_ctxt_insert(struct ocfs2_buffer_lock_ctxt *ctxt,					 struct inode *inode,					 struct ocfs2_backing_inode **binode_ret){	u64 blkno;	struct ocfs2_backing_inode *tmp, *binode;	struct rb_node * parent = NULL;	struct rb_node ** p = &ctxt->b_inodes.rb_node;	BUG_ON(!ctxt);	BUG_ON(!inode);	blkno = OCFS2_I(inode)->ip_blkno;	while(*p) {		parent = *p;		tmp = rb_entry(parent, struct ocfs2_backing_inode, ba_node);		if (blkno < ocfs2_binode_blkno(tmp))			p = &(*p)->rb_left;		else if (blkno > ocfs2_binode_blkno(tmp))			p = &(*p)->rb_right;		else			return 0; /* Don't insert duplicates */	}	binode = kcalloc(1, sizeof(struct ocfs2_backing_inode), GFP_KERNEL);	if (!binode)		return -ENOMEM;	binode->ba_inode = inode;	ocfs2_init_io_marker(&binode->ba_task);	if (binode_ret)		*binode_ret = binode;	rb_link_node(&binode->ba_node, parent, p);	rb_insert_color(&binode->ba_node, &ctxt->b_inodes);	return 0;}static int ocfs2_fill_ctxt_from_buf(struct super_block *sb,				    struct inode *target_inode,				    char __user *buf,				    size_t size,				    struct ocfs2_buffer_lock_ctxt *ctxt){	int status;	unsigned long start = (unsigned long)buf;	unsigned long end = start + size;	struct inode *inode;	struct mm_struct *mm = current->mm;	struct vm_area_struct *vma;	for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {		if (end <= vma->vm_start)			break;		if (vma->vm_ops == &ocfs2_file_vm_ops) {			if (!vma->vm_file)				continue;			inode = vma->vm_file->f_dentry->d_inode;			if (inode->i_sb == sb &&			    inode != target_inode) {				status = ocfs2_buffer_lock_ctxt_insert(ctxt,								       inode,								       NULL);				if (status < 0)					goto bail;			}		}	}	status = 0;bail:	return status;}int ocfs2_setup_io_locks(struct super_block *sb,			 struct inode *target_inode,			 char __user *buf,			 size_t size,			 struct ocfs2_buffer_lock_ctxt *ctxt,			 struct ocfs2_backing_inode **target_binode){	struct mm_struct *mm = current->mm;	int skip_sem = (current->flags & PF_DUMPCORE) || !mm;	int status;	if (!skip_sem)		down_read(&mm->mmap_sem);	BUG_ON(__ocfs2_buffer_lock_ctxt_root(ctxt));	/* We always insert target because it might not be backing part of the	 * buffer - but it needs to be in there so that it's lock gets ordered	 * with everything else */	status = ocfs2_buffer_lock_ctxt_insert(ctxt, target_inode,					       target_binode);	/* knfsd, which lacks an mm, may call us to do I/O. Since the buffer	 * is private to the kernel, there isn't any need to insert any other	 * locks, so we can skip it.	 *	 * The pile of duct tape and mixed nuts that is NFS 1, universe 0	 */	if (!status && mm) {		/* Now fill the tree with any inodes that back this		 * buffer. If target inode is in there, it will be		 * skipped over. */		status = ocfs2_fill_ctxt_from_buf(sb, target_inode, buf, size,						  ctxt);	}	if (!skip_sem)		up_read(&mm->mmap_sem);	if (status < 0) {		mlog_errno(status);		ocfs2_unlock_buffer_inodes(ctxt);		goto bail;	}	status = 0;bail:	return status;}/* starting from pos, which can be null for the first call, give the * next buffer that needs unlocking.  we return null when there are none * left or we see last_inode */static struct ocfs2_backing_inode *ocfs2_next_unlocked(struct ocfs2_buffer_lock_ctxt *ctxt,		    struct inode *last_inode,		    struct ocfs2_backing_inode *pos){	struct ocfs2_backing_inode *binode = NULL;	struct rb_node *node = NULL;	if (pos == NULL) {		if (ctxt->b_next_unlocked)			binode = ctxt->b_next_unlocked;		else			node = rb_first(&ctxt->b_inodes);	} else		node = rb_next(&pos->ba_node);	if (node)		binode = rb_entry(node, struct ocfs2_backing_inode, ba_node);	if (binode && last_inode && binode->ba_inode == last_inode)		binode = NULL;	/* this is just an optimization to skip nodes in the tree	 * that we've already seen.  If we're moving from one we've locked	 * to one we haven't then we mark this node in the ctxt so that	 * we'll return to it in a future after, say, hitting last_inode	 * or EIOCBRETRY in lock_buffer_inodes */	if (pos && pos->ba_meta_locked && pos->ba_data_locked && binode)		ctxt->b_next_unlocked = binode;	return binode;}/* Will take locks on all inodes in the ctxt up until 'last_inode'. If * last_inode is NULL, then we take locks on everything. We mark lock * status on the context so we skip any that have already been * locked. On error we will completely abort the context. *//* WARNING: If you get a failure case here, you *must* call * "ocfs2_unlock_buffer_inodes" as we may have left a few inodes under * cluster lock. */int ocfs2_lock_buffer_inodes(struct ocfs2_buffer_lock_ctxt *ctxt,			     struct inode *last_inode){	int status;	struct ocfs2_backing_inode *binode = NULL;	struct inode *inode;	while((binode = ocfs2_next_unlocked(ctxt, last_inode, binode))) {		/* the tricksy caller might have locked inodes themselves		 * between calls. */		if (binode->ba_meta_locked && binode->ba_data_locked)			continue;		inode = binode->ba_inode;		if (!binode->ba_meta_locked) {			status = ocfs2_meta_lock_full(inode, NULL, NULL,						      binode->ba_lock_meta_level,						      0, ctxt->b_cb,						      ctxt->b_cb_data);			if (status < 0) {				if (status != -EIOCBRETRY)					mlog_errno(status);				goto bail;			}			binode->ba_meta_locked = 1;		}		if (!binode->ba_data_locked) {			status = ocfs2_data_lock(inode,						 binode->ba_lock_data_level);			if (status < 0) {				if (status == -EIOCBRETRY)					goto bail;				/* clean up the metadata lock that we took				 * above				 */				ocfs2_meta_unlock(inode,						  binode->ba_lock_meta_level);
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -