📄 file.c

📁 ocfs1.4.1 oracle分布式文件系统
💻 C
📖 第 1 页 / 共 5 页
字号:
	int free_extents;	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;	enum ocfs2_alloc_restarted reason = RESTART_NONE;	u32 bit_off, num_bits;	u64 block;	u8 flags = 0;	BUG_ON(!clusters_to_add);	if (mark_unwritten)		flags = OCFS2_EXT_UNWRITTEN;	free_extents = ocfs2_num_free_extents(osb, inode, fe);	if (free_extents < 0) {		status = free_extents;		mlog_errno(status);		goto leave;	}	/* there are two cases which could cause us to EAGAIN in the	 * we-need-more-metadata case:	 * 1) we haven't reserved *any*	 * 2) we are so fragmented, we've needed to add metadata too	 *    many times. */	if (!free_extents && !meta_ac) {		mlog(0, "we haven't reserved any metadata!\n");		status = -EAGAIN;		reason = RESTART_META;		goto leave;	} else if ((!free_extents)		   && (ocfs2_alloc_context_bits_left(meta_ac)		       < ocfs2_extend_meta_needed(fe))) {		mlog(0, "filesystem is really fragmented...\n");		status = -EAGAIN;		reason = RESTART_META;		goto leave;	}	status = __ocfs2_claim_clusters(osb, handle, data_ac, 1,					clusters_to_add, &bit_off, &num_bits);	if (status < 0) {		if (status != -ENOSPC)			mlog_errno(status);		goto leave;	}	BUG_ON(num_bits > clusters_to_add);	/* reserve our write early -- insert_extent may update the inode */	status = ocfs2_journal_access(handle, inode, fe_bh,				      OCFS2_JOURNAL_ACCESS_WRITE);	if (status < 0) {		mlog_errno(status);		goto leave;	}	block = ocfs2_clusters_to_blocks(osb->sb, bit_off);	mlog(0, "Allocating %u clusters at block %u for inode %llu\n",	     num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);	status = ocfs2_insert_extent(osb, handle, inode, fe_bh,				     *logical_offset, block, num_bits,				     flags, meta_ac);	if (status < 0) {		mlog_errno(status);		goto leave;	}	status = ocfs2_journal_dirty(handle, fe_bh);	if (status < 0) {		mlog_errno(status);		goto leave;	}	clusters_to_add -= num_bits;	*logical_offset += num_bits;	if (clusters_to_add) {		mlog(0, "need to alloc once more, clusters = %u, wanted = "		     "%u\n", fe->i_clusters, clusters_to_add);		status = -EAGAIN;		reason = RESTART_TRANS;	}leave:	mlog_exit(status);	if (reason_ret)		*reason_ret = reason;	return status;}/* * For a given allocation, determine which allocators will need to be * accessed, and lock them, reserving the appropriate number of bits. * * Sparse file systems call this from ocfs2_write_begin_nolock() * and ocfs2_allocate_unwritten_extents(). * * File systems which don't support holes call this from * ocfs2_extend_allocation(). */int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,			  u32 clusters_to_add, u32 extents_to_split,			  struct ocfs2_alloc_context **data_ac,			  struct ocfs2_alloc_context **meta_ac){	int ret = 0, num_free_extents;	unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);	*meta_ac = NULL;	if (data_ac)		*data_ac = NULL;	BUG_ON(clusters_to_add != 0 && data_ac == NULL);	mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "	     "clusters_to_add = %u, extents_to_split = %u\n",	     (unsigned long long)OCFS2_I(inode)->ip_blkno, (long long)i_size_read(inode),	     le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split);	num_free_extents = ocfs2_num_free_extents(osb, inode, di);	if (num_free_extents < 0) {		ret = num_free_extents;		mlog_errno(ret);		goto out;	}	/*	 * Sparse allocation file systems need to be more conservative	 * with reserving room for expansion - the actual allocation	 * happens while we've got a journal handle open so re-taking	 * a cluster lock (because we ran out of room for another	 * extent) will violate ordering rules.	 *	 * Most of the time we'll only be seeing this 1 cluster at a time	 * anyway.	 *	 * Always lock for any unwritten extents - we might want to	 * add blocks during a split.	 */	if (!num_free_extents ||	    (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {		ret = ocfs2_reserve_new_metadata(osb, di, meta_ac);		if (ret < 0) {			if (ret != -ENOSPC)				mlog_errno(ret);			goto out;		}	}	if (clusters_to_add == 0)		goto out;	ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);	if (ret < 0) {		if (ret != -ENOSPC)			mlog_errno(ret);		goto out;	}out:	if (ret) {		if (*meta_ac) {			ocfs2_free_alloc_context(*meta_ac);			*meta_ac = NULL;		}		/*		 * We cannot have an error and a non null *data_ac.		 */	}	return ret;}static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,				     u32 clusters_to_add, int mark_unwritten){	int status = 0;	int restart_func = 0;	int credits;	u32 prev_clusters;	struct buffer_head *bh = NULL;	struct ocfs2_dinode *fe = NULL;	handle_t *handle = NULL;	struct ocfs2_alloc_context *data_ac = NULL;	struct ocfs2_alloc_context *meta_ac = NULL;	enum ocfs2_alloc_restarted why;	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);	mlog_entry("(clusters_to_add = %u)\n", clusters_to_add);	/*	 * This function only exists for file systems which don't	 * support holes.	 */	BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb));	status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh,				  OCFS2_BH_CACHED, inode);	if (status < 0) {		mlog_errno(status);		goto leave;	}	fe = (struct ocfs2_dinode *) bh->b_data;	if (!OCFS2_IS_VALID_DINODE(fe)) {		OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);		status = -EIO;		goto leave;	}restart_all:	BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);	status = ocfs2_lock_allocators(inode, fe, clusters_to_add, 0, &data_ac,				       &meta_ac);	if (status) {		mlog_errno(status);		goto leave;	}	credits = ocfs2_calc_extend_credits(osb->sb, fe, clusters_to_add);	handle = ocfs2_start_trans(osb, credits);	if (IS_ERR(handle)) {		status = PTR_ERR(handle);		handle = NULL;		mlog_errno(status);		goto leave;	}restarted_transaction:	/* reserve a write to the file entry early on - that we if we	 * run out of credits in the allocation path, we can still	 * update i_size. */	status = ocfs2_journal_access(handle, inode, bh,				      OCFS2_JOURNAL_ACCESS_WRITE);	if (status < 0) {		mlog_errno(status);		goto leave;	}	prev_clusters = OCFS2_I(inode)->ip_clusters;	status = ocfs2_do_extend_allocation(osb,					    inode,					    &logical_start,					    clusters_to_add,					    mark_unwritten,					    bh,					    handle,					    data_ac,					    meta_ac,					    &why);	if ((status < 0) && (status != -EAGAIN)) {		if (status != -ENOSPC)			mlog_errno(status);		goto leave;	}	status = ocfs2_journal_dirty(handle, bh);	if (status < 0) {		mlog_errno(status);		goto leave;	}	spin_lock(&OCFS2_I(inode)->ip_lock);	clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters);	spin_unlock(&OCFS2_I(inode)->ip_lock);	if (why != RESTART_NONE && clusters_to_add) {		if (why == RESTART_META) {			mlog(0, "restarting function.\n");			restart_func = 1;		} else {			BUG_ON(why != RESTART_TRANS);			mlog(0, "restarting transaction.\n");			/* TODO: This can be more intelligent. */			credits = ocfs2_calc_extend_credits(osb->sb,							    fe,							    clusters_to_add);			status = ocfs2_extend_trans(handle, credits);			if (status < 0) {				/* handle still has to be committed at				 * this point. */				status = -ENOMEM;				mlog_errno(status);				goto leave;			}			goto restarted_transaction;		}	}	mlog(0, "fe: i_clusters = %u, i_size=%llu\n",	     le32_to_cpu(fe->i_clusters),	     (unsigned long long)le64_to_cpu(fe->i_size));	mlog(0, "inode: ip_clusters=%u, i_size=%lld\n",	     OCFS2_I(inode)->ip_clusters, (long long)i_size_read(inode));leave:	if (handle) {		ocfs2_commit_trans(osb, handle);		handle = NULL;	}	if (data_ac) {		ocfs2_free_alloc_context(data_ac);		data_ac = NULL;	}	if (meta_ac) {		ocfs2_free_alloc_context(meta_ac);		meta_ac = NULL;	}	if ((!status) && restart_func) {		restart_func = 0;		goto restart_all;	}	if (bh) {		brelse(bh);		bh = NULL;	}	mlog_exit(status);	return status;}/* Some parts of this taken from generic_cont_expand, which turned out * to be too fragile to do exactly what we need without us having to * worry about recursive locking in ->prepare_write() and * ->commit_write(). */static int ocfs2_write_zero_page(struct inode *inode,				 u64 size){	struct address_space *mapping = inode->i_mapping;	struct page *page;	unsigned long index;	unsigned int offset;	handle_t *handle = NULL;	int ret;	offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */	/* ugh.  in prepare/commit_write, if from==to==start of block, we 	** skip the prepare.  make sure we never send an offset for the start	** of a block	*/	if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {		offset++;	}	index = size >> PAGE_CACHE_SHIFT;	page = grab_cache_page(mapping, index);	if (!page) {		ret = -ENOMEM;		mlog_errno(ret);		goto out;	}	ret = ocfs2_prepare_write_nolock(inode, page, offset, offset);	if (ret < 0) {		mlog_errno(ret);		goto out_unlock;	}	if (ocfs2_should_order_data(inode)) {		handle = ocfs2_start_walk_page_trans(inode, page, offset,						     offset);		if (IS_ERR(handle)) {			ret = PTR_ERR(handle);			handle = NULL;			goto out_unlock;		}	}	/* must not update i_size! */	ret = block_commit_write(page, offset, offset);	if (ret < 0)		mlog_errno(ret);	else		ret = 0;	if (handle)		ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);out_unlock:	unlock_page(page);	page_cache_release(page);out:	return ret;}static int ocfs2_zero_extend(struct inode *inode,			     u64 zero_to_size){	int ret = 0;	u64 start_off;	struct super_block *sb = inode->i_sb;	start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));	while (start_off < zero_to_size) {		ret = ocfs2_write_zero_page(inode, start_off);		if (ret < 0) {			mlog_errno(ret);			goto out;		}		start_off += sb->s_blocksize;		/*		 * Very large extends have the potential to lock up		 * the cpu for extended periods of time.		 */		cond_resched();	}out:	return ret;}int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to){	int ret;	u32 clusters_to_add;	struct ocfs2_inode_info *oi = OCFS2_I(inode);	clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);	if (clusters_to_add < oi->ip_clusters)		clusters_to_add = 0;	else		clusters_to_add -= oi->ip_clusters;	if (clusters_to_add) {		ret = __ocfs2_extend_allocation(inode, oi->ip_clusters,						clusters_to_add, 0);		if (ret) {			mlog_errno(ret);			goto out;		}	}	/*	 * Call this even if we don't add any clusters to the tree. We	 * still need to zero the area between the old i_size and the	 * new i_size.	 */	ret = ocfs2_zero_extend(inode, zero_to);	if (ret < 0)		mlog_errno(ret);out:	return ret;}static int ocfs2_extend_file(struct inode *inode,			     struct buffer_head *di_bh,			     u64 new_i_size){	int ret = 0;	struct ocfs2_inode_info *oi = OCFS2_I(inode);	BUG_ON(!di_bh);	/* setattr sometimes calls us like this. */	if (new_i_size == 0)		goto out;	if (i_size_read(inode) == new_i_size)  		goto out;	BUG_ON(new_i_size < i_size_read(inode));	/*	 * Fall through for converting inline data, even if the fs	 * supports sparse files.	 *	 * The check for inline data here is legal - nobody can add	 * the feature since we have i_mutex. We must check it again	 * after acquiring ip_alloc_sem though, as paths like mmap	 * might have raced us to converting the inode to extents.	 */	if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)	    && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))		goto out_update_size;	/*	 * The alloc sem blocks people in read/write from reading our	 * allocation until we're done changing it. We depend on	 * i_mutex to block other extend/truncate calls while we're	 * here.	 */	down_write(&oi->ip_alloc_sem);	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {		/*		 * We can optimize small extends by keeping the inodes		 * inline data.		 */		if (ocfs2_size_fits_inline_data(di_bh, new_i_size)) {			up_write(&oi->ip_alloc_sem);			goto out_update_size;		}		ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);		if (ret) {			up_write(&oi->ip_alloc_sem);			mlog_errno(ret);			goto out;		}	}	if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))		ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size);	up_write(&oi->ip_alloc_sem);	if (ret < 0) {		mlog_errno(ret);		goto out;	}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -