📄 extent_map.c

📁 ocfs1.2.7 源码
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * extent_map.c * * In-memory extent map for OCFS2.  Man, this code was prettier in * the library. * * Copyright (C) 2004 Oracle.  All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License, version 2,  as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 021110-1307, USA. */#include <linux/fs.h>#include <linux/init.h>#include <linux/types.h>#include <linux/slab.h>#include <linux/rbtree.h>#define MLOG_MASK_PREFIX ML_EXTENT_MAP#include <cluster/masklog.h>#include "ocfs2.h"#include "extent_map.h"#include "inode.h"#include "super.h"#include "buffer_head_io.h"/* * SUCK SUCK SUCK * Our headers are so bad that struct ocfs2_extent_map is in ocfs.h */struct ocfs2_extent_map_entry {	struct rb_node e_node;	int e_tree_depth;	struct ocfs2_extent_rec e_rec;};struct ocfs2_em_insert_context {	int need_left;	int need_right;	struct ocfs2_extent_map_entry *new_ent;	struct ocfs2_extent_map_entry *old_ent;	struct ocfs2_extent_map_entry *left_ent;	struct ocfs2_extent_map_entry *right_ent;};static kmem_cache_t *ocfs2_em_ent_cachep = NULL;static struct ocfs2_extent_map_entry *ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,			u32 cpos, u32 clusters,			struct rb_node ***ret_p,			struct rb_node **ret_parent);static int ocfs2_extent_map_insert_entry(struct ocfs2_extent_map *em,					 struct ocfs2_extent_map_entry *ent);static int ocfs2_extent_map_find_leaf(struct inode *inode,				      u32 cpos, u32 clusters,				      struct ocfs2_extent_list *el);static int ocfs2_extent_map_lookup_read(struct inode *inode,					u32 cpos, u32 clusters,					struct ocfs2_extent_map_entry **ret_ent);static int ocfs2_extent_map_try_insert(struct inode *inode,				       struct ocfs2_extent_rec *rec,				       int tree_depth,				       struct ocfs2_em_insert_context *ctxt);/* returns 1 only if the rec contains all the given clusters -- that is that * rec's cpos is <= the cluster cpos and that the rec endpoint (cpos + * clusters) is >= the argument's endpoint */static int ocfs2_extent_rec_contains_clusters(struct ocfs2_extent_rec *rec,					      u32 cpos, u32 clusters){	if (le32_to_cpu(rec->e_cpos) > cpos)		return 0;	if (cpos + clusters > le32_to_cpu(rec->e_cpos) + 			      le32_to_cpu(rec->e_clusters))		return 0;	return 1;}/* * Find an entry in the tree that intersects the region passed in. * Note that this will find straddled intervals, it is up to the * callers to enforce any boundary conditions. * * Callers must hold ip_lock.  This lookup is not guaranteed to return * a tree_depth 0 match, and as such can race inserts if the lock * were not held. * * The rb_node garbage lets insertion share the search.  Trivial * callers pass NULL. */static struct ocfs2_extent_map_entry *ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,			u32 cpos, u32 clusters,			struct rb_node ***ret_p,			struct rb_node **ret_parent){	struct rb_node **p = &em->em_extents.rb_node;	struct rb_node *parent = NULL;	struct ocfs2_extent_map_entry *ent = NULL;	while (*p)	{		parent = *p;		ent = rb_entry(parent, struct ocfs2_extent_map_entry,			       e_node);		if ((cpos + clusters) <= le32_to_cpu(ent->e_rec.e_cpos)) {			p = &(*p)->rb_left;			ent = NULL;		} else if (cpos >= (le32_to_cpu(ent->e_rec.e_cpos) +				    le32_to_cpu(ent->e_rec.e_clusters))) {			p = &(*p)->rb_right;			ent = NULL;		} else			break;	}	if (ret_p != NULL)		*ret_p = p;	if (ret_parent != NULL)		*ret_parent = parent;	return ent;}/* * Find the leaf containing the interval we want.  While we're on our * way down the tree, fill in every record we see at any depth, because * we might want it later. * * Note that this code is run without ip_lock.  That's because it * sleeps while reading.  If someone is also filling the extent list at * the same time we are, we might have to restart. */static int ocfs2_extent_map_find_leaf(struct inode *inode,				      u32 cpos, u32 clusters,				      struct ocfs2_extent_list *el){	int i, ret;	struct buffer_head *eb_bh = NULL;	u64 blkno;	u32 rec_end;	struct ocfs2_extent_block *eb;	struct ocfs2_extent_rec *rec;	/*	 * The bh data containing the el cannot change here, because	 * we hold alloc_sem.  So we can do this without other	 * locks.	 */	while (el->l_tree_depth)	{		blkno = 0;		for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {			rec = &el->l_recs[i];			rec_end = (le32_to_cpu(rec->e_cpos) +				   le32_to_cpu(rec->e_clusters));			ret = -EBADR;			if (rec_end > OCFS2_I(inode)->ip_clusters) {				mlog_errno(ret);				ocfs2_error(inode->i_sb,					    "Extent %d at e_blkno %"MLFu64" of inode %"MLFu64" goes past ip_clusters of %u\n",					    i,					    le64_to_cpu(rec->e_blkno),					    OCFS2_I(inode)->ip_blkno,					    OCFS2_I(inode)->ip_clusters);				goto out_free;			}			if (rec_end <= cpos) {				ret = ocfs2_extent_map_insert(inode, rec,						le16_to_cpu(el->l_tree_depth));				if (ret && (ret != -EEXIST)) {					mlog_errno(ret);					goto out_free;				}				continue;			}			if ((cpos + clusters) <= le32_to_cpu(rec->e_cpos)) {				ret = ocfs2_extent_map_insert(inode, rec,						le16_to_cpu(el->l_tree_depth));				if (ret && (ret != -EEXIST)) {					mlog_errno(ret);					goto out_free;				}				continue;			}			/*			 * We've found a record that matches our			 * interval.  We don't insert it because we're			 * about to traverse it.			 */			/* Check to see if we're stradling */			ret = -ESRCH;			if (!ocfs2_extent_rec_contains_clusters(rec,							        cpos,								clusters)) {				mlog_errno(ret);				goto out_free;			}			/*			 * If we've already found a record, the el has			 * two records covering the same interval.			 * EEEK!			 */			ret = -EBADR;			if (blkno) {				mlog_errno(ret);				ocfs2_error(inode->i_sb,					    "Multiple extents for (cpos = %u, clusters = %u) on inode %"MLFu64"; e_blkno %"MLFu64" and rec %d at e_blkno %"MLFu64"\n",					    cpos, clusters,					    OCFS2_I(inode)->ip_blkno,					    blkno, i,					    le64_to_cpu(rec->e_blkno));				goto out_free;			}			blkno = le64_to_cpu(rec->e_blkno);		}		/*		 * We don't support holes, and we're still up		 * in the branches, so we'd better have found someone		 */		ret = -EBADR;		if (!blkno) {			ocfs2_error(inode->i_sb,				    "No record found for (cpos = %u, clusters = %u) on inode %"MLFu64"\n",				    cpos, clusters,				    OCFS2_I(inode)->ip_blkno);			mlog_errno(ret);			goto out_free;		}		if (eb_bh) {			brelse(eb_bh);			eb_bh = NULL;		}		ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),				       blkno, &eb_bh, OCFS2_BH_CACHED,				       inode);		if (ret) {			mlog_errno(ret);			goto out_free;		}		eb = (struct ocfs2_extent_block *)eb_bh->b_data;		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);			ret = -EIO;			goto out_free;		}		el = &eb->h_list;	}	if (el->l_tree_depth)		BUG();	for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {		rec = &el->l_recs[i];		if ((le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters)) >		    OCFS2_I(inode)->ip_clusters) {			ret = -EBADR;			mlog_errno(ret);			ocfs2_error(inode->i_sb,				    "Extent %d at e_blkno %"MLFu64" of inode %"MLFu64" goes past ip_clusters of %u\n",				    i,				    le64_to_cpu(rec->e_blkno),				    OCFS2_I(inode)->ip_blkno,				    OCFS2_I(inode)->ip_clusters);			return ret;		}		ret = ocfs2_extent_map_insert(inode, rec,					      le16_to_cpu(el->l_tree_depth));		if (ret && (ret != -EEXIST)) {			mlog_errno(ret);			goto out_free;		}	}	ret = 0;out_free:	if (eb_bh)		brelse(eb_bh);	return ret;}/* * This lookup actually will read from disk.  It has one invariant: * It will never re-traverse blocks.  This means that all inserts should * be new regions or more granular regions (both allowed by insert). */static int ocfs2_extent_map_lookup_read(struct inode *inode,					u32 cpos,					u32 clusters,					struct ocfs2_extent_map_entry **ret_ent){	int ret;	u64 blkno;	struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;	struct ocfs2_extent_map_entry *ent;	struct buffer_head *bh = NULL;	struct ocfs2_extent_block *eb;	struct ocfs2_dinode *di;	struct ocfs2_extent_list *el;	spin_lock(&OCFS2_I(inode)->ip_lock);	ent = ocfs2_extent_map_lookup(em, cpos, clusters, NULL, NULL);	if (ent) {		if (!ent->e_tree_depth) {			spin_unlock(&OCFS2_I(inode)->ip_lock);			*ret_ent = ent;			return 0;		}		blkno = le64_to_cpu(ent->e_rec.e_blkno);		spin_unlock(&OCFS2_I(inode)->ip_lock);		ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno, &bh,				       OCFS2_BH_CACHED, inode);		if (ret) {			mlog_errno(ret);			if (bh)				brelse(bh);			return ret;		}		eb = (struct ocfs2_extent_block *)bh->b_data;		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);			brelse(bh);			return -EIO;		}		el = &eb->h_list;	} else {		spin_unlock(&OCFS2_I(inode)->ip_lock);		ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),				       OCFS2_I(inode)->ip_blkno, &bh,				       OCFS2_BH_CACHED, inode);		if (ret) {			mlog_errno(ret);			if (bh)				brelse(bh);			return ret;		}		di = (struct ocfs2_dinode *)bh->b_data;		if (!OCFS2_IS_VALID_DINODE(di)) {			brelse(bh);			OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, di);			return -EIO;		}		el = &di->id2.i_list;	}	ret = ocfs2_extent_map_find_leaf(inode, cpos, clusters, el);	brelse(bh);	if (ret) {		mlog_errno(ret);		return ret;	}	ent = ocfs2_extent_map_lookup(em, cpos, clusters, NULL, NULL);	if (!ent) {		ret = -ESRCH;		mlog_errno(ret);		return ret;	}	if (ent->e_tree_depth)		BUG();  /* FIXME: Make sure this isn't a corruption */	*ret_ent = ent;	return 0;}/* * Callers must hold ip_lock.  This can insert pieces of the tree, * thus racing lookup if the lock weren't held. */static int ocfs2_extent_map_insert_entry(struct ocfs2_extent_map *em,					 struct ocfs2_extent_map_entry *ent){	struct rb_node **p, *parent;	struct ocfs2_extent_map_entry *old_ent;	old_ent = ocfs2_extent_map_lookup(em, le32_to_cpu(ent->e_rec.e_cpos),					  le32_to_cpu(ent->e_rec.e_clusters),					  &p, &parent);	if (old_ent)		return -EEXIST;	rb_link_node(&ent->e_node, parent, p);	rb_insert_color(&ent->e_node, &em->em_extents);	return 0;}/* * Simple rule: on any return code other than -EAGAIN, anything left * in the insert_context will be freed. * * Simple rule #2: A return code of -EEXIST from this function or * its calls to ocfs2_extent_map_insert_entry() signifies that another * thread beat us to the insert.  It is not an actual error, but it * tells the caller we have no more work to do. */static int ocfs2_extent_map_try_insert(struct inode *inode,				       struct ocfs2_extent_rec *rec,				       int tree_depth,				       struct ocfs2_em_insert_context *ctxt){	int ret;	struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;	struct ocfs2_extent_map_entry *old_ent;	ctxt->need_left = 0;	ctxt->need_right = 0;	ctxt->old_ent = NULL;	spin_lock(&OCFS2_I(inode)->ip_lock);	ret = ocfs2_extent_map_insert_entry(em, ctxt->new_ent);	if (!ret) {		ctxt->new_ent = NULL;		goto out_unlock;	}	/* Since insert_entry failed, the map MUST have old_ent */	old_ent = ocfs2_extent_map_lookup(em, le32_to_cpu(rec->e_cpos),					  le32_to_cpu(rec->e_clusters),					  NULL, NULL);	if (!old_ent)		BUG();	if (old_ent->e_tree_depth < tree_depth) {		/* Another thread beat us to the lower tree_depth */		ret = -EEXIST;		goto out_unlock;	}	if (old_ent->e_tree_depth == tree_depth) {		/*		 * Another thread beat us to this tree_depth.		 * Let's make sure we agree with that thread (the		 * extent_rec should be identical).		 */		if (!memcmp(rec, &old_ent->e_rec,			    sizeof(struct ocfs2_extent_rec)))			ret = 0;		else			/* FIXME: Should this be ESRCH/EBADR??? */			ret = -EEXIST;		goto out_unlock;	}	/*	 * We do it in this order specifically so that no actual tree	 * changes occur until we have all the pieces we need.  We	 * don't want malloc failures to leave an inconsistent tree.	 * Whenever we drop the lock, another process could be	 * inserting.  Also note that, if another process just beat us	 * to an insert, we might not need the same pieces we needed	 * the first go round.  In the end, the pieces we need will	 * be used, and the pieces we don't will be freed.	 */	ctxt->need_left = !!(le32_to_cpu(rec->e_cpos) >			     le32_to_cpu(old_ent->e_rec.e_cpos));	ctxt->need_right = !!((le32_to_cpu(old_ent->e_rec.e_cpos) +			       le32_to_cpu(old_ent->e_rec.e_clusters)) >			      (le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters)));	ret = -EAGAIN;	if (ctxt->need_left) {		if (!ctxt->left_ent)			goto out_unlock;		*(ctxt->left_ent) = *old_ent;		ctxt->left_ent->e_rec.e_clusters =			cpu_to_le32(le32_to_cpu(rec->e_cpos) -				    le32_to_cpu(ctxt->left_ent->e_rec.e_cpos));	}	if (ctxt->need_right) {		if (!ctxt->right_ent)			goto out_unlock;		*(ctxt->right_ent) = *old_ent;		ctxt->right_ent->e_rec.e_cpos =			cpu_to_le32(le32_to_cpu(rec->e_cpos) +				    le32_to_cpu(rec->e_clusters));		ctxt->right_ent->e_rec.e_clusters =			cpu_to_le32((le32_to_cpu(old_ent->e_rec.e_cpos) +				     le32_to_cpu(old_ent->e_rec.e_clusters)) -				    le32_to_cpu(ctxt->right_ent->e_rec.e_cpos));
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -