📄 file.c

📁 ocfs1.4.1 oracle分布式文件系统
💻 C
📖 第 1 页 / 共 5 页
字号:
	return ret;}static int ocfs2_remove_inode_range(struct inode *inode,				    struct buffer_head *di_bh, u64 byte_start,				    u64 byte_len){	int ret = 0;	u32 trunc_start, trunc_len, cpos, phys_cpos, alloc_size;	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);	struct ocfs2_cached_dealloc_ctxt dealloc;	struct address_space *mapping = inode->i_mapping;	ocfs2_init_dealloc_ctxt(&dealloc);	if (byte_len == 0)		return 0;	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {		ret = ocfs2_truncate_inline(inode, di_bh, byte_start,					    byte_start + byte_len, 0);		if (ret) {			mlog_errno(ret);			goto out;		}		/*		 * There's no need to get fancy with the page cache		 * truncate of an inline-data inode. We're talking		 * about less than a page here, which will be cached		 * in the dinode buffer anyway.		 */		unmap_mapping_range(mapping, 0, 0, 0);		truncate_inode_pages(mapping, 0);		goto out;	}	trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start);	trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits;	if (trunc_len >= trunc_start)		trunc_len -= trunc_start;	else		trunc_len = 0;	mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u\n",	     (unsigned long long)OCFS2_I(inode)->ip_blkno,	     (unsigned long long)byte_start,	     (unsigned long long)byte_len, trunc_start, trunc_len);	ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len);	if (ret) {		mlog_errno(ret);		goto out;	}	cpos = trunc_start;	while (trunc_len) {		ret = ocfs2_get_clusters(inode, cpos, &phys_cpos,					 &alloc_size, NULL);		if (ret) {			mlog_errno(ret);			goto out;		}		if (alloc_size > trunc_len)			alloc_size = trunc_len;		/* Only do work for non-holes */		if (phys_cpos != 0) {			ret = __ocfs2_remove_inode_range(inode, di_bh, cpos,							 phys_cpos, alloc_size,							 &dealloc);			if (ret) {				mlog_errno(ret);				goto out;			}		}		cpos += alloc_size;		trunc_len -= alloc_size;	}	ocfs2_truncate_cluster_pages(inode, byte_start, byte_len);out:	ocfs2_schedule_truncate_log_flush(osb, 1);	ocfs2_run_deallocs(osb, &dealloc);	return ret;}/* * Parts of this function taken from xfs_change_file_space() */static int __ocfs2_change_file_space(struct file *file, struct inode *inode,				     loff_t f_pos, unsigned int cmd,				     struct ocfs2_space_resv *sr,				     int change_size){	int ret;	s64 llen;	loff_t size;	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);	struct buffer_head *di_bh = NULL;	handle_t *handle;	unsigned long long max_off = inode->i_sb->s_maxbytes;	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))		return -EROFS;	mutex_lock(&inode->i_mutex);	/*	 * This prevents concurrent writes on other nodes	 */	ret = ocfs2_rw_lock(inode, 1);	if (ret) {		mlog_errno(ret);		goto out;	}	ret = ocfs2_inode_lock(inode, &di_bh, 1);	if (ret) {		mlog_errno(ret);		goto out_rw_unlock;	}	if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) {		ret = -EPERM;		goto out_inode_unlock;	}	switch (sr->l_whence) {	case 0: /*SEEK_SET*/		break;	case 1: /*SEEK_CUR*/		sr->l_start += f_pos;		break;	case 2: /*SEEK_END*/		sr->l_start += i_size_read(inode);		break;	default:		ret = -EINVAL;		goto out_inode_unlock;	}	sr->l_whence = 0;	llen = sr->l_len > 0 ? sr->l_len - 1 : sr->l_len;	if (sr->l_start < 0	    || sr->l_start > max_off	    || (sr->l_start + llen) < 0	    || (sr->l_start + llen) > max_off) {		ret = -EINVAL;		goto out_inode_unlock;	}	size = sr->l_start + sr->l_len;	if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) {		if (sr->l_len <= 0) {			ret = -EINVAL;			goto out_inode_unlock;		}	}	if (file && should_remove_suid(filp_dentry(file))) {		ret = __ocfs2_write_remove_suid(inode, di_bh);		if (ret) {			mlog_errno(ret);			goto out_inode_unlock;		}	}	down_write(&OCFS2_I(inode)->ip_alloc_sem);	switch (cmd) {	case OCFS2_IOC_RESVSP:	case OCFS2_IOC_RESVSP64:		/*		 * This takes unsigned offsets, but the signed ones we		 * pass have been checked against overflow above.		 */		ret = ocfs2_allocate_unwritten_extents(inode, sr->l_start,						       sr->l_len);		break;	case OCFS2_IOC_UNRESVSP:	case OCFS2_IOC_UNRESVSP64:		ret = ocfs2_remove_inode_range(inode, di_bh, sr->l_start,					       sr->l_len);		break;	default:		ret = -EINVAL;	}	up_write(&OCFS2_I(inode)->ip_alloc_sem);	if (ret) {		mlog_errno(ret);		goto out_inode_unlock;	}	/*	 * We update c/mtime for these changes	 */	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);	if (IS_ERR(handle)) {		ret = PTR_ERR(handle);		mlog_errno(ret);		goto out_inode_unlock;	}	if (change_size && i_size_read(inode) < size)		i_size_write(inode, size);	inode->i_ctime = inode->i_mtime = CURRENT_TIME;	ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);	if (ret < 0)		mlog_errno(ret);	ocfs2_commit_trans(osb, handle);out_inode_unlock:	brelse(di_bh);	ocfs2_inode_unlock(inode, 1);out_rw_unlock:	ocfs2_rw_unlock(inode, 1);out:	mutex_unlock(&inode->i_mutex);	return ret;}int ocfs2_change_file_space(struct file *file, unsigned int cmd,			    struct ocfs2_space_resv *sr){	struct inode *inode = filp_dentry(file)->d_inode;	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);;	if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) &&	    !ocfs2_writes_unwritten_extents(osb))		return -ENOTTY;	else if ((cmd == OCFS2_IOC_UNRESVSP || cmd == OCFS2_IOC_UNRESVSP64) &&		 !ocfs2_sparse_alloc(osb))		return -ENOTTY;	if (!S_ISREG(inode->i_mode))		return -EINVAL;	if (!(file->f_mode & FMODE_WRITE))		return -EBADF;	return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0);}#ifndef NO_FALLOCATEstatic long ocfs2_fallocate(struct inode *inode, int mode, loff_t offset,			    loff_t len){	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);	struct ocfs2_space_resv sr;	int change_size = 1;	if (!ocfs2_writes_unwritten_extents(osb))		return -EOPNOTSUPP;	if (S_ISDIR(inode->i_mode))		return -ENODEV;	if (mode & FALLOC_FL_KEEP_SIZE)		change_size = 0;	sr.l_whence = 0;	sr.l_start = (s64)offset;	sr.l_len = (s64)len;	return __ocfs2_change_file_space(NULL, inode, offset,					 OCFS2_IOC_RESVSP64, &sr, change_size);}#endifstatic int ocfs2_prepare_inode_for_write(struct dentry *dentry,					 loff_t *ppos,					 size_t count,					 int appending,					 int *direct_io){	int ret = 0, meta_level = 0;	struct inode *inode = dentry->d_inode;	loff_t saved_pos, end;	/* 	 * We start with a read level meta lock and only jump to an ex	 * if we need to make modifications here.	 */	for(;;) {		ret = ocfs2_inode_lock(inode, NULL, meta_level);		if (ret < 0) {			meta_level = -1;			mlog_errno(ret);			goto out;		}		/* Clear suid / sgid if necessary. We do this here		 * instead of later in the write path because		 * remove_suid() calls ->setattr without any hint that		 * we may have already done our cluster locking. Since		 * ocfs2_setattr() *must* take cluster locks to		 * proceeed, this will lead us to recursively lock the		 * inode. There's also the dinode i_size state which		 * can be lost via setattr during extending writes (we		 * set inode->i_size at the end of a write. */		if (should_remove_suid(dentry)) {			if (meta_level == 0) {				ocfs2_inode_unlock(inode, meta_level);				meta_level = 1;				continue;			}			ret = ocfs2_write_remove_suid(inode);			if (ret < 0) {				mlog_errno(ret);				goto out_unlock;			}		}		/* work on a copy of ppos until we're sure that we won't have		 * to recalculate it due to relocking. */		if (appending) {			saved_pos = i_size_read(inode);			mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos);		} else {			saved_pos = *ppos;		}		end = saved_pos + count;		/*		 * Skip the O_DIRECT checks if we don't need		 * them.		 */		if (!direct_io || !(*direct_io))			break;		/*		 * There's no sane way to do direct writes to an inode		 * with inline data.		 */		if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {			*direct_io = 0;			break;		}		/*		 * Allowing concurrent direct writes means		 * i_size changes wouldn't be synchronized, so		 * one node could wind up truncating another		 * nodes writes.		 */		if (end > i_size_read(inode)) {			*direct_io = 0;			break;		}		/*		 * We don't fill holes during direct io, so		 * check for them here. If any are found, the		 * caller will have to retake some cluster		 * locks and initiate the io as buffered.		 */		ret = ocfs2_check_range_for_holes(inode, saved_pos, count);		if (ret == 1) {			*direct_io = 0;			ret = 0;		} else if (ret < 0)			mlog_errno(ret);		break;	}	if (appending)		*ppos = saved_pos;out_unlock:	ocfs2_inode_unlock(inode, meta_level);out:	return ret;}static inline voidocfs2_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes){	const struct iovec *iov = *iovp;	size_t base = *basep;	do {		int copy = min(bytes, iov->iov_len - base);		bytes -= copy;		base += copy;		if (iov->iov_len == base) {			iov++;			base = 0;		}	} while (bytes);	*iovp = iov;	*basep = base;}static struct page * ocfs2_get_write_source(char **ret_src_buf,					    const struct iovec *cur_iov,					    size_t iov_offset){	int ret;	char *buf = cur_iov->iov_base + iov_offset;	struct page *src_page = NULL;	unsigned long off;	off = (unsigned long)(buf) & ~PAGE_CACHE_MASK;	if (!segment_eq(get_fs(), KERNEL_DS)) {		/*		 * Pull in the user page. We want to do this outside		 * of the meta data locks in order to preserve locking		 * order in case of page fault.		 */		ret = get_user_pages(current, current->mm,				     (unsigned long)buf & PAGE_CACHE_MASK, 1,				     0, 0, &src_page, NULL);		if (ret == 1)			*ret_src_buf = kmap(src_page) + off;		else			src_page = ERR_PTR(-EFAULT);	} else {		*ret_src_buf = buf;	}	return src_page;}static void ocfs2_put_write_source(struct page *page){	if (page) {		kunmap(page);		page_cache_release(page);	}}static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos,					 const struct iovec *iov,					 unsigned long nr_segs,					 size_t count,					 ssize_t o_direct_written){	int ret = 0;	ssize_t copied, total = 0;	size_t iov_offset = 0, bytes;	loff_t pos;	const struct iovec *cur_iov = iov;	struct page *user_page, *page;	char * uninitialized_var(buf);	char *dst;	void *fsdata;	/*	 * handle partial DIO write.  Adjust cur_iov if needed.	 */	ocfs2_set_next_iovec(&cur_iov, &iov_offset, o_direct_written);	do {		pos = *ppos;		user_page = ocfs2_get_write_source(&buf, cur_iov, iov_offset);		if (IS_ERR(user_page)) {			ret = PTR_ERR(user_page);			goto out;		}		/* Stay within our page boundaries */		bytes = min((PAGE_CACHE_SIZE - ((unsigned long)pos & ~PAGE_CACHE_MASK)),			    (PAGE_CACHE_SIZE - ((unsigned long)buf & ~PAGE_CACHE_MASK)));		/* Stay within the vector boundary */		bytes = min_t(size_t, bytes, cur_iov->iov_len - iov_offset);		/* Stay within count */		bytes = min(bytes, count);		page = NULL;		ret = ocfs2_write_begin(file, file->f_mapping, pos, bytes, 0,					&page, &fsdata);		if (ret) {			mlog_errno(ret);			goto out;		}		dst = kmap_atomic(page, KM_USER0);		memcpy(dst + (pos & (loff_t)(PAGE_CACHE_SIZE - 1)), buf, bytes);		kunmap_atomic(dst, KM_USER0);		flush_dcache_page(page);		ocfs2_put_write_source(user_page);		copied = ocfs2_write_end(file, file->f_mapping, pos, bytes,					 bytes, page, fsdata);		if (copied < 0) {			mlog_errno(copied);			ret = copied;			goto out;		}		total += copied;		*ppos = pos + copied;		count -= copied;		ocfs2_set_next_iovec(&cur_iov, &iov_offset, copied);	} while(count);out:	return total ? total : ret;}static ssize_t __ocfs2_file_aio_write(struct kiocb *iocb,				    const struct iovec *iov,				    unsigned long nr_segs,
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -