📄 aops.c

📁 ocfs1.4.1 oracle分布式文件系统
💻 C
📖 第 1 页 / 共 4 页
字号:
}static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc){	ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);	brelse(wc->w_di_bh);	kfree(wc);}static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,				  struct ocfs2_super *osb, loff_t pos,				  unsigned len, struct buffer_head *di_bh){	u32 cend;	struct ocfs2_write_ctxt *wc;	wc = kzalloc(sizeof(struct ocfs2_write_ctxt), GFP_NOFS);	if (!wc)		return -ENOMEM;	wc->w_cpos = pos >> osb->s_clustersize_bits;	cend = (pos + len - 1) >> osb->s_clustersize_bits;	wc->w_clen = cend - wc->w_cpos + 1;	get_bh(di_bh);	wc->w_di_bh = di_bh;	if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits))		wc->w_large_pages = 1;	else		wc->w_large_pages = 0;	ocfs2_init_dealloc_ctxt(&wc->w_dealloc);	*wcp = wc;	return 0;}/* * If a page has any new buffers, zero them out here, and mark them uptodate * and dirty so they'll be written out (in order to prevent uninitialised * block data from leaking). And clear the new bit. */static void ocfs2_zero_new_buffers(struct page *page, unsigned from, unsigned to){	unsigned int block_start, block_end;	struct buffer_head *head, *bh;	BUG_ON(!PageLocked(page));	if (!page_has_buffers(page))		return;	bh = head = page_buffers(page);	block_start = 0;	do {		block_end = block_start + bh->b_size;		if (buffer_new(bh)) {			if (block_end > from && block_start < to) {				if (!PageUptodate(page)) {					unsigned start, end;					start = max(from, block_start);					end = min(to, block_end);					zero_user_page(page, start, end - start, KM_USER0);					set_buffer_uptodate(bh);				}				clear_buffer_new(bh);				mark_buffer_dirty(bh);			}		}		block_start = block_end;		bh = bh->b_this_page;	} while (bh != head);}/* * Only called when we have a failure during allocating write to write * zero's to the newly allocated region. */static void ocfs2_write_failure(struct inode *inode,				struct ocfs2_write_ctxt *wc,				loff_t user_pos, unsigned user_len){	int i;	unsigned from = user_pos & (PAGE_CACHE_SIZE - 1),		to = user_pos + user_len;	struct page *tmppage;	ocfs2_zero_new_buffers(wc->w_target_page, from, to);	for(i = 0; i < wc->w_num_pages; i++) {		tmppage = wc->w_pages[i];		if (page_has_buffers(tmppage)) {			if (ocfs2_should_order_data(inode))				walk_page_buffers(wc->w_handle,						  page_buffers(tmppage),						  from, to, NULL,						  ocfs2_journal_dirty_data);			block_commit_write(tmppage, from, to);		}	}}static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,					struct ocfs2_write_ctxt *wc,					struct page *page, u32 cpos,					loff_t user_pos, unsigned user_len,					int new){	int ret;	unsigned int map_from = 0, map_to = 0;	unsigned int cluster_start, cluster_end;	unsigned int user_data_from = 0, user_data_to = 0;	ocfs2_figure_cluster_boundaries(OCFS2_SB(inode->i_sb), cpos,					&cluster_start, &cluster_end);	if (page == wc->w_target_page) {		map_from = user_pos & (PAGE_CACHE_SIZE - 1);		map_to = map_from + user_len;		if (new)			ret = ocfs2_map_page_blocks(page, p_blkno, inode,						    cluster_start, cluster_end,						    new);		else			ret = ocfs2_map_page_blocks(page, p_blkno, inode,						    map_from, map_to, new);		if (ret) {			mlog_errno(ret);			goto out;		}		user_data_from = map_from;		user_data_to = map_to;		if (new) {			map_from = cluster_start;			map_to = cluster_end;		}	} else {		/*		 * If we haven't allocated the new page yet, we		 * shouldn't be writing it out without copying user		 * data. This is likely a math error from the caller.		 */		BUG_ON(!new);		map_from = cluster_start;		map_to = cluster_end;		ret = ocfs2_map_page_blocks(page, p_blkno, inode,					    cluster_start, cluster_end, new);		if (ret) {			mlog_errno(ret);			goto out;		}	}	/*	 * Parts of newly allocated pages need to be zero'd.	 *	 * Above, we have also rewritten 'to' and 'from' - as far as	 * the rest of the function is concerned, the entire cluster	 * range inside of a page needs to be written.	 *	 * We can skip this if the page is up to date - it's already	 * been zero'd from being read in as a hole.	 */	if (new && !PageUptodate(page))		ocfs2_clear_page_regions(page, OCFS2_SB(inode->i_sb),					 cpos, user_data_from, user_data_to);	flush_dcache_page(page);out:	return ret;}/* * This function will only grab one clusters worth of pages. */static int ocfs2_grab_pages_for_write(struct address_space *mapping,				      struct ocfs2_write_ctxt *wc,				      u32 cpos, loff_t user_pos, int new,				      struct page *mmap_page){	int ret = 0, i;	unsigned long start, target_index, index;	struct inode *inode = mapping->host;	target_index = user_pos >> PAGE_CACHE_SHIFT;	/*	 * Figure out how many pages we'll be manipulating here. For	 * non allocating write, we just change the one	 * page. Otherwise, we'll need a whole clusters worth.	 */	if (new) {		wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb);		start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos);	} else {		wc->w_num_pages = 1;		start = target_index;	}	for(i = 0; i < wc->w_num_pages; i++) {		index = start + i;		if (index == target_index && mmap_page) {			/*			 * ocfs2_pagemkwrite() is a little different			 * and wants us to directly use the page			 * passed in.			 */			lock_page(mmap_page);			if (mmap_page->mapping != mapping) {				unlock_page(mmap_page);				/*				 * Sanity check - the locking in				 * ocfs2_pagemkwrite() should ensure				 * that this code doesn't trigger.				 */				ret = -EINVAL;				mlog_errno(ret);				goto out;			}			page_cache_get(mmap_page);			wc->w_pages[i] = mmap_page;		} else {			wc->w_pages[i] = find_or_create_page(mapping, index,							     GFP_NOFS);			if (!wc->w_pages[i]) {				ret = -ENOMEM;				mlog_errno(ret);				goto out;			}		}		if (index == target_index)			wc->w_target_page = wc->w_pages[i];	}out:	return ret;}/* * Prepare a single cluster for write one cluster into the file. */static int ocfs2_write_cluster(struct address_space *mapping,			       u32 phys, unsigned int unwritten,			       struct ocfs2_alloc_context *data_ac,			       struct ocfs2_alloc_context *meta_ac,			       struct ocfs2_write_ctxt *wc, u32 cpos,			       loff_t user_pos, unsigned user_len){	int ret, i, new, should_zero = 0;	u64 v_blkno, p_blkno;	struct inode *inode = mapping->host;	new = phys == 0 ? 1 : 0;	if (new || unwritten)		should_zero = 1;	if (new) {		u32 tmp_pos;		/*		 * This is safe to call with the page locks - it won't take		 * any additional semaphores or cluster locks.		 */		tmp_pos = cpos;		ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode,						 &tmp_pos, 1, 0, wc->w_di_bh,						 wc->w_handle, data_ac,						 meta_ac, NULL);		/*		 * This shouldn't happen because we must have already		 * calculated the correct meta data allocation required. The		 * internal tree allocation code should know how to increase		 * transaction credits itself.		 *		 * If need be, we could handle -EAGAIN for a		 * RESTART_TRANS here.		 */		mlog_bug_on_msg(ret == -EAGAIN,				"Inode %llu: EAGAIN return during allocation.\n",				(unsigned long long)OCFS2_I(inode)->ip_blkno);		if (ret < 0) {			mlog_errno(ret);			goto out;		}	} else if (unwritten) {		ret = ocfs2_mark_extent_written(inode, wc->w_di_bh,						wc->w_handle, cpos, 1, phys,						meta_ac, &wc->w_dealloc);		if (ret < 0) {			mlog_errno(ret);			goto out;		}	}	if (should_zero)		v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, cpos);	else		v_blkno = user_pos >> inode->i_sb->s_blocksize_bits;	/*	 * The only reason this should fail is due to an inability to	 * find the extent added.	 */	ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL,					  NULL);	if (ret < 0) {		ocfs2_error(inode->i_sb, "Corrupting extend for inode %llu, "			    "at logical block %llu",			    (unsigned long long)OCFS2_I(inode)->ip_blkno,			    (unsigned long long)v_blkno);		goto out;	}	BUG_ON(p_blkno == 0);	for(i = 0; i < wc->w_num_pages; i++) {		int tmpret;		tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc,						      wc->w_pages[i], cpos,						      user_pos, user_len,						      should_zero);		if (tmpret) {			mlog_errno(tmpret);			if (ret == 0)				tmpret = ret;		}	}	/*	 * We only have cleanup to do in case of allocating write.	 */	if (ret && new)		ocfs2_write_failure(inode, wc, user_pos, user_len);out:	return ret;}static int ocfs2_write_cluster_by_desc(struct address_space *mapping,				       struct ocfs2_alloc_context *data_ac,				       struct ocfs2_alloc_context *meta_ac,				       struct ocfs2_write_ctxt *wc,				       loff_t pos, unsigned len){	int ret, i;	loff_t cluster_off;	unsigned int local_len = len;	struct ocfs2_write_cluster_desc *desc;	struct ocfs2_super *osb = OCFS2_SB(mapping->host->i_sb);	for (i = 0; i < wc->w_clen; i++) {		desc = &wc->w_desc[i];		/*		 * We have to make sure that the total write passed in		 * doesn't extend past a single cluster.		 */		local_len = len;		cluster_off = pos & (osb->s_clustersize - 1);		if ((cluster_off + local_len) > osb->s_clustersize)			local_len = osb->s_clustersize - cluster_off;		ret = ocfs2_write_cluster(mapping, desc->c_phys,					  desc->c_unwritten, data_ac, meta_ac,					  wc, desc->c_cpos, pos, local_len);		if (ret) {			mlog_errno(ret);			goto out;		}		len -= local_len;		pos += local_len;	}	ret = 0;out:	return ret;}/* * ocfs2_write_end() wants to know which parts of the target page it * should complete the write on. It's easiest to compute them ahead of * time when a more complete view of the write is available. */static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,					struct ocfs2_write_ctxt *wc,					loff_t pos, unsigned len, int alloc){	struct ocfs2_write_cluster_desc *desc;	wc->w_target_from = pos & (PAGE_CACHE_SIZE - 1);	wc->w_target_to = wc->w_target_from + len;	if (alloc == 0)		return;	/*	 * Allocating write - we may have different boundaries based	 * on page size and cluster size.	 *	 * NOTE: We can no longer compute one value from the other as	 * the actual write length and user provided length may be	 * different.	 */	if (wc->w_large_pages) {		/*		 * We only care about the 1st and last cluster within		 * our range and whether they should be zero'd or not. Either		 * value may be extended out to the start/end of a		 * newly allocated cluster.		 */		desc = &wc->w_desc[0];		if (ocfs2_should_zero_cluster(desc))			ocfs2_figure_cluster_boundaries(osb,							desc->c_cpos,							&wc->w_target_from,							NULL);		desc = &wc->w_desc[wc->w_clen - 1];		if (ocfs2_should_zero_cluster(desc))			ocfs2_figure_cluster_boundaries(osb,							desc->c_cpos,							NULL,							&wc->w_target_to);	} else {		wc->w_target_from = 0;		wc->w_target_to = PAGE_CACHE_SIZE;	}}/* * Populate each single-cluster write descriptor in the write context * with information about the i/o to be done. * * Returns the number of clusters that will have to be allocated, as * well as a worst case estimate of the number of extent records that * would have to be created during a write to an unwritten region. */static int ocfs2_populate_write_desc(struct inode *inode,				     struct ocfs2_write_ctxt *wc,				     unsigned int *clusters_to_alloc,				     unsigned int *extents_to_split){	int ret;	struct ocfs2_write_cluster_desc *desc;	unsigned int num_clusters = 0;	unsigned int ext_flags = 0;	u32 phys = 0;	int i;	*clusters_to_alloc = 0;	*extents_to_split = 0;	for (i = 0; i < wc->w_clen; i++) {		desc = &wc->w_desc[i];		desc->c_cpos = wc->w_cpos + i;		if (num_clusters == 0) {			/*			 * Need to look up the next extent record.			 */			ret = ocfs2_get_clusters(inode, desc->c_cpos, &phys,						 &num_clusters, &ext_flags);			if (ret) {				mlog_errno(ret);				goto out;			}			/*			 * Assume worst case - that we're writing in			 * the middle of the extent.			 *			 * We can assume that the write proceeds from			 * left to right, in which case the extent			 * insert code is smart enough to coalesce the			 * next splits into the previous records created.			 */
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -