📄 aops.c

📁 ocfs1.4.1 oracle分布式文件系统
💻 C
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
/* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * Copyright (C) 2002, 2004 Oracle.  All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 021110-1307, USA. */#include <linux/fs.h>#include <linux/slab.h>#include <linux/highmem.h>#include <linux/pagemap.h>#include <asm/byteorder.h>#include <linux/swap.h>#include <linux/pipe_fs_i.h>#include <linux/mpage.h>#define MLOG_MASK_PREFIX ML_FILE_IO#include <cluster/masklog.h>#include "ocfs2.h"#include "alloc.h"#include "aops.h"#include "dlmglue.h"#include "extent_map.h"#include "file.h"#include "inode.h"#include "journal.h"#include "suballoc.h"#include "super.h"#include "symlink.h"#include "buffer_head_io.h"static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,				   struct buffer_head *bh_result, int create){	int err = -EIO;	int status;	struct ocfs2_dinode *fe = NULL;	struct buffer_head *bh = NULL;	struct buffer_head *buffer_cache_bh = NULL;	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);	void *kaddr;	mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode,		   (unsigned long long)iblock, bh_result, create);	BUG_ON(ocfs2_inode_is_fast_symlink(inode));	if ((iblock << inode->i_sb->s_blocksize_bits) > PATH_MAX + 1) {		mlog(ML_ERROR, "block offset > PATH_MAX: %llu",		     (unsigned long long)iblock);		goto bail;	}	status = ocfs2_read_block(OCFS2_SB(inode->i_sb),				  OCFS2_I(inode)->ip_blkno,				  &bh, OCFS2_BH_CACHED, inode);	if (status < 0) {		mlog_errno(status);		goto bail;	}	fe = (struct ocfs2_dinode *) bh->b_data;	if (!OCFS2_IS_VALID_DINODE(fe)) {		mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",		     (unsigned long long)le64_to_cpu(fe->i_blkno), 7,		     fe->i_signature);		goto bail;	}	if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb,						    le32_to_cpu(fe->i_clusters))) {		mlog(ML_ERROR, "block offset is outside the allocated size: "		     "%llu\n", (unsigned long long)iblock);		goto bail;	}	/* We don't use the page cache to create symlink data, so if	 * need be, copy it over from the buffer cache. */	if (!buffer_uptodate(bh_result) && ocfs2_inode_is_new(inode)) {		u64 blkno = le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) +			    iblock;		buffer_cache_bh = sb_getblk(osb->sb, blkno);		if (!buffer_cache_bh) {			mlog(ML_ERROR, "couldn't getblock for symlink!\n");			goto bail;		}		/* we haven't locked out transactions, so a commit		 * could've happened. Since we've got a reference on		 * the bh, even if it commits while we're doing the		 * copy, the data is still good. */		if (buffer_jbd(buffer_cache_bh)		    && ocfs2_inode_is_new(inode)) {			kaddr = kmap_atomic(bh_result->b_page, KM_USER0);			if (!kaddr) {				mlog(ML_ERROR, "couldn't kmap!\n");				goto bail;			}			memcpy(kaddr + (bh_result->b_size * iblock),			       buffer_cache_bh->b_data,			       bh_result->b_size);			kunmap_atomic(kaddr, KM_USER0);			set_buffer_uptodate(bh_result);		}		brelse(buffer_cache_bh);	}	map_bh(bh_result, inode->i_sb,	       le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + iblock);	err = 0;bail:	if (bh)		brelse(bh);	mlog_exit(err);	return err;}static int ocfs2_get_block(struct inode *inode, sector_t iblock,			   struct buffer_head *bh_result, int create){	int err = 0;	unsigned int ext_flags;	u64 max_blocks = bh_result->b_size >> inode->i_blkbits;	u64 p_blkno, count, past_eof;	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);	mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode,		   (unsigned long long)iblock, bh_result, create);	if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE)		mlog(ML_NOTICE, "get_block on system inode 0x%p (%lu)\n",		     inode, inode->i_ino);	if (S_ISLNK(inode->i_mode)) {		/* this always does I/O for some reason. */		err = ocfs2_symlink_get_block(inode, iblock, bh_result, create);		goto bail;	}	err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, &count,					  &ext_flags);	if (err) {		mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, "		     "%llu, NULL)\n", err, inode, (unsigned long long)iblock,		     (unsigned long long)p_blkno);		goto bail;	}	if (max_blocks < count)		count = max_blocks;	/*	 * ocfs2 never allocates in this function - the only time we	 * need to use BH_New is when we're extending i_size on a file	 * system which doesn't support holes, in which case BH_New	 * allows block_prepare_write() to zero.	 *	 * If we see this on a sparse file system, then a truncate has	 * raced us and removed the cluster. In this case, we clear	 * the buffers dirty and uptodate bits and let the buffer code	 * ignore it as a hole.	 */	if (create && p_blkno == 0 && ocfs2_sparse_alloc(osb)) {		clear_buffer_dirty(bh_result);		clear_buffer_uptodate(bh_result);		goto bail;	}	/* Treat the unwritten extent as a hole for zeroing purposes. */	if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))		map_bh(bh_result, inode->i_sb, p_blkno);	bh_result->b_size = count << inode->i_blkbits;	if (!ocfs2_sparse_alloc(osb)) {		if (p_blkno == 0) {			err = -EIO;			mlog(ML_ERROR,			     "iblock = %llu p_blkno = %llu blkno=(%llu)\n",			     (unsigned long long)iblock,			     (unsigned long long)p_blkno,			     (unsigned long long)OCFS2_I(inode)->ip_blkno);			mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters);			dump_stack();		}		past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));		mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,		     (unsigned long long)past_eof);		if (create && (iblock >= past_eof))			set_buffer_new(bh_result);	}bail:	if (err < 0)		err = -EIO;	mlog_exit(err);	return err;}int ocfs2_read_inline_data(struct inode *inode, struct page *page,			   struct buffer_head *di_bh){	void *kaddr;	loff_t size;	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;	if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) {		ocfs2_error(inode->i_sb, "Inode %llu lost inline data flag",			    (unsigned long long)OCFS2_I(inode)->ip_blkno);		return -EROFS;	}	size = i_size_read(inode);	if (size > PAGE_CACHE_SIZE ||	    size > ocfs2_max_inline_data(inode->i_sb)) {		ocfs2_error(inode->i_sb,			    "Inode %llu has with inline data has bad size: %Lu",			    (unsigned long long)OCFS2_I(inode)->ip_blkno,			    (unsigned long long)size);		return -EROFS;	}	kaddr = kmap_atomic(page, KM_USER0);	if (size)		memcpy(kaddr, di->id2.i_data.id_data, size);	/* Clear the remaining part of the page */	memset(kaddr + size, 0, PAGE_CACHE_SIZE - size);	flush_dcache_page(page);	kunmap_atomic(kaddr, KM_USER0);	SetPageUptodate(page);	return 0;}static int ocfs2_readpage_inline(struct inode *inode, struct page *page){	int ret;	struct buffer_head *di_bh = NULL;	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);	BUG_ON(!PageLocked(page));	BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL));	ret = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &di_bh,			       OCFS2_BH_CACHED, inode);	if (ret) {		mlog_errno(ret);		goto out;	}	ret = ocfs2_read_inline_data(inode, page, di_bh);out:	unlock_page(page);	brelse(di_bh);	return ret;}static int ocfs2_readpage(struct file *file, struct page *page){	struct inode *inode = page->mapping->host;	struct ocfs2_inode_info *oi = OCFS2_I(inode);	loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT;	int ret, unlock = 1;	mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0));	ret = ocfs2_inode_lock_with_page(inode, NULL, 0, page);	if (ret != 0) {		if (ret == AOP_TRUNCATED_PAGE)			unlock = 0;		mlog_errno(ret);		goto out;	}	if (down_read_trylock(&oi->ip_alloc_sem) == 0) {		ret = AOP_TRUNCATED_PAGE;		goto out_inode_unlock;	}	/*	 * i_size might have just been updated as we grabed the meta lock.  We	 * might now be discovering a truncate that hit on another node.	 * block_read_full_page->get_block freaks out if it is asked to read	 * beyond the end of a file, so we check here.  Callers	 * (generic_file_read, vm_ops->fault) are clever enough to check i_size	 * and notice that the page they just read isn't needed.	 *	 * XXX sys_readahead() seems to get that wrong?	 */	if (start >= i_size_read(inode)) {		zero_user_page(page, 0, PAGE_SIZE, KM_USER0);		SetPageUptodate(page);		ret = 0;		goto out_alloc;	}	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)		ret = ocfs2_readpage_inline(inode, page);	else		ret = block_read_full_page(page, ocfs2_get_block);	unlock = 0;out_alloc:	up_read(&OCFS2_I(inode)->ip_alloc_sem);out_inode_unlock:	ocfs2_inode_unlock(inode, 0);out:	if (unlock)		unlock_page(page);	mlog_exit(ret);	return ret;}/* * This is used only for read-ahead. Failures or difficult to handle * situations are safe to ignore. * * Right now, we don't bother with BH_Boundary - in-inode extent lists * are quite large (243 extents on 4k blocks), so most inodes don't * grow out to a tree. If need be, detecting boundary extents could * trivially be added in a future version of ocfs2_get_block(). */static int ocfs2_readpages(struct file *filp, struct address_space *mapping,			   struct list_head *pages, unsigned nr_pages){	int ret, err = -EIO;	struct inode *inode = mapping->host;	struct ocfs2_inode_info *oi = OCFS2_I(inode);	loff_t start;	struct page *last;	/*	 * Use the nonblocking flag for the dlm code to avoid page	 * lock inversion, but don't bother with retrying.	 */	ret = ocfs2_inode_lock_full(inode, NULL, 0, OCFS2_LOCK_NONBLOCK);	if (ret)		return err;	if (down_read_trylock(&oi->ip_alloc_sem) == 0) {		ocfs2_inode_unlock(inode, 0);		return err;	}	/*	 * Don't bother with inline-data. There isn't anything	 * to read-ahead in that case anyway...	 */	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)		goto out_unlock;	/*	 * Check whether a remote node truncated this file - we just	 * drop out in that case as it's not worth handling here.	 */	last = list_entry(pages->prev, struct page, lru);	start = (loff_t)last->index << PAGE_CACHE_SHIFT;	if (start >= i_size_read(inode))		goto out_unlock;	err = mpage_readpages(mapping, pages, nr_pages, ocfs2_get_block);out_unlock:	up_read(&oi->ip_alloc_sem);	ocfs2_inode_unlock(inode, 0);	return err;}/* Note: Because we don't support holes, our allocation has * already happened (allocation writes zeros to the file data) * so we don't have to worry about ordered writes in * ocfs2_writepage. * * ->writepage is called during the process of invalidating the page cache * during blocked lock processing.  It can't block on any cluster locks * to during block mapping.  It's relying on the fact that the block * mapping can't have disappeared under the dirty pages that it is * being asked to write back. */static int ocfs2_writepage(struct page *page, struct writeback_control *wbc){	int ret;	mlog_entry("(0x%p)\n", page);	ret = block_write_full_page(page, ocfs2_get_block, wbc);	mlog_exit(ret);	return ret;}/* * This is called from ocfs2_write_zero_page() which has handled it's * own cluster locking and has ensured allocation exists for those * blocks to be written. */int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,			       unsigned from, unsigned to){	int ret;	ret = block_prepare_write(page, from, to, ocfs2_get_block);	return ret;}/* Taken from ext3. We don't necessarily need the full blown * functionality yet, but IMHO it's better to cut and paste the whole * thing so we can avoid introducing our own bugs (and easily pick up * their fixes when they happen) --Mark */int walk_page_buffers(	handle_t *handle,			struct buffer_head *head,			unsigned from,			unsigned to,			int *partial,			int (*fn)(	handle_t *handle,					struct buffer_head *bh)){	struct buffer_head *bh;	unsigned block_start, block_end;	unsigned blocksize = head->b_size;	int err, ret = 0;	struct buffer_head *next;	for (	bh = head, block_start = 0;		ret == 0 && (bh != head || !block_start);	    	block_start = block_end, bh = next)	{		next = bh->b_this_page;		block_end = block_start + blocksize;		if (block_end <= from || block_start >= to) {			if (partial && !buffer_uptodate(bh))				*partial = 1;			continue;		}		err = (*fn)(handle, bh);		if (!ret)			ret = err;	}	return ret;}handle_t *ocfs2_start_walk_page_trans(struct inode *inode,							 struct page *page,							 unsigned from,							 unsigned to){	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);	handle_t *handle;	int ret = 0;	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);	if (IS_ERR(handle)) {		ret = -ENOMEM;		mlog_errno(ret);		goto out;	}	if (ocfs2_should_order_data(inode)) {		ret = walk_page_buffers(handle,					page_buffers(page),					from, to, NULL,					ocfs2_journal_dirty_data);		if (ret < 0) 			mlog_errno(ret);	}out:	if (ret) {
12 3 4 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -