📄 file.c
字号:
/* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * file.c * * File open, close, extend, truncate * * Copyright (C) 2002, 2004 Oracle. All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 021110-1307, USA. */#include <linux/fs.h>#include <linux/types.h>#include <linux/slab.h>#include <linux/highmem.h>#include <linux/pagemap.h>#include <linux/uio.h>#define MLOG_MASK_PREFIX ML_INODE#include <cluster/masklog.h>#include "ocfs2.h"#include "aio.h"#include "alloc.h"#include "dir.h"#include "dlmglue.h"#include "extent_map.h"#include "file.h"#include "sysfile.h"#include "inode.h"#include "journal.h"#include "mmap.h"#include "suballoc.h"#include "super.h"#include "buffer_head_io.h"static int ocfs2_zero_extend(struct inode *inode);static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, struct inode *inode, struct buffer_head *fe_bh, u64 new_i_size);int ocfs2_sync_inode(struct inode *inode){ filemap_fdatawrite(inode->i_mapping); return sync_mapping_buffers(inode->i_mapping);}static int ocfs2_file_open(struct inode *inode, struct file *file){ int status; int mode = file->f_flags; struct ocfs2_inode_info *oi = OCFS2_I(inode); mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file, file->f_dentry->d_name.len, file->f_dentry->d_name.name); spin_lock(&oi->ip_lock); /* Check that the inode hasn't been wiped from disk by another * node. If it hasn't then we're safe as long as we hold the * spin lock until our increment of open count. */ if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { spin_unlock(&oi->ip_lock); status = -ENOENT; goto leave; } if (mode & O_DIRECT) oi->ip_flags |= OCFS2_INODE_OPEN_DIRECT; oi->ip_open_count++; spin_unlock(&oi->ip_lock); status = 0;leave: mlog_exit(status); return status;}static int ocfs2_file_release(struct inode *inode, struct file *file){ struct ocfs2_inode_info *oi = OCFS2_I(inode); mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file, file->f_dentry->d_name.len, file->f_dentry->d_name.name); spin_lock(&oi->ip_lock);#ifdef OCFS2_DELETE_INODE_WORKAROUND /* Do the sync *before* decrementing ip_open_count as * otherwise the voting code might allow this inode to be * wiped. */ if (oi->ip_open_count == 1 && oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) { spin_unlock(&oi->ip_lock); write_inode_now(inode, 1); spin_lock(&oi->ip_lock); }#endif if (!--oi->ip_open_count) oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT; spin_unlock(&oi->ip_lock); mlog_exit(0); return 0;}static int ocfs2_sync_file(struct file *file, struct dentry *dentry, int datasync){ int err = 0; journal_t *journal; struct inode *inode = dentry->d_inode; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync, dentry->d_name.len, dentry->d_name.name); err = ocfs2_sync_inode(dentry->d_inode); if (err) goto bail; journal = osb->journal->j_journal; err = journal_force_commit(journal);bail: mlog_exit(err); return (err < 0) ? -EIO : 0;}static void ocfs2_update_inode_size(struct inode *inode, u64 new_size){ i_size_write(inode, new_size); inode->i_blocks = ocfs2_align_bytes_to_sectors(new_size);}void ocfs2_file_finish_extension(struct inode *inode, loff_t newsize, unsigned direct_extend){ int status; mlog(0, "inode %"MLFu64", newsize = %lld, direct_extend = %u\n", OCFS2_I(inode)->ip_blkno, (long long)newsize, direct_extend); ocfs2_update_inode_size(inode, newsize);#ifdef OCFS2_ORACORE_WORKAROUNDS if (direct_extend) { /* * This leaves dirty data in holes. * Caveat Emptor. */ OCFS2_I(inode)->ip_mmu_private = newsize; return; }#endif status = ocfs2_zero_extend(inode); /* * Don't overwrite the result of * generic_file_write */ if (status) mlog(ML_ERROR, "Unable to pre-zero extension of inode " "(%d)\n", status);}static ssize_t ocfs2_file_write(struct file *filp, const char __user *buf, size_t count, loff_t *ppos){ struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count }; int ret = 0; struct ocfs2_super *osb = NULL; struct dentry *dentry = filp->f_dentry; struct inode *inode = dentry->d_inode; struct ocfs2_write_lock_info info = {0, }; DECLARE_BUFFER_LOCK_CTXT(ctxt); mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf, (unsigned int)count, filp->f_dentry->d_name.len, filp->f_dentry->d_name.name); /* happy write of zero bytes */ if (count == 0) { ret = 0; goto bail; } if (!inode) { mlog(0, "bad inode\n"); ret = -EIO; goto bail; } osb = OCFS2_SB(inode->i_sb); ret = ocfs2_write_lock_maybe_extend(filp, buf, count, ppos, &info, &ctxt); if (ret) goto bail; down_read(&OCFS2_I(inode)->ip_alloc_sem);#ifdef OCFS2_ORACORE_WORKAROUNDS if (osb->s_mount_opt & OCFS2_MOUNT_COMPAT_OCFS) { unsigned int saved_flags = filp->f_flags; if (info.wl_do_direct_io) filp->f_flags |= O_DIRECT; else filp->f_flags &= ~O_DIRECT; ret = generic_file_write_nolock(filp, &local_iov, 1, ppos); filp->f_flags = saved_flags; } else#endif ret = generic_file_write_nolock(filp, &local_iov, 1, ppos); up_read(&OCFS2_I(inode)->ip_alloc_sem);bail: /* * if this write created a hole then write zeros into it.. wl_extended * is only set if we got the data lock so the buffered zero writing * will have lock coverage. This must be done before unlocking. */ if (info.wl_extended) ocfs2_file_finish_extension(inode, info.wl_newsize, info.wl_do_direct_io); if (info.wl_unlock_ctxt) ocfs2_unlock_buffer_inodes(&ctxt); if (info.wl_have_i_mutex) mutex_unlock(&inode->i_mutex); mlog_exit(ret); return ret;}static ssize_t ocfs2_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos){ int ret = 0; struct ocfs2_super *osb = NULL; struct dentry *dentry = filp->f_dentry; struct inode *inode = dentry->d_inode; struct ocfs2_backing_inode *target_binode; DECLARE_BUFFER_LOCK_CTXT(ctxt); mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf, (unsigned int)count, filp->f_dentry->d_name.len, filp->f_dentry->d_name.name); if (!inode) { ret = -EINVAL; mlog_errno(ret); goto bail; } osb = OCFS2_SB(inode->i_sb);#ifdef OCFS2_ORACORE_WORKAROUNDS if (osb->s_mount_opt & OCFS2_MOUNT_COMPAT_OCFS) { if (filp->f_flags & O_DIRECT) { int sector_size = 1 << osb->s_sectsize_bits; if (((*ppos) & (sector_size - 1)) || (count & (sector_size - 1)) || ((unsigned long)buf & (sector_size - 1)) || (i_size_read(inode) & (sector_size -1))) { filp->f_flags &= ~O_DIRECT; } } }#endif ret = ocfs2_setup_io_locks(inode->i_sb, inode, buf, count, &ctxt, &target_binode); if (ret < 0) { mlog_errno(ret); goto bail; } target_binode->ba_lock_data_level = 0; ret = ocfs2_lock_buffer_inodes(&ctxt, NULL); if (ret < 0) { mlog_errno(ret); goto bail_unlock; } down_read(&OCFS2_I(inode)->ip_alloc_sem); ret = generic_file_read(filp, buf, count, ppos); up_read(&OCFS2_I(inode)->ip_alloc_sem); if (ret == -EINVAL) mlog(ML_ERROR, "Generic_file_read returned -EINVAL\n");bail_unlock: ocfs2_unlock_buffer_inodes(&ctxt);bail: mlog_exit(ret); return ret;}static ssize_t ocfs2_file_sendfile(struct file *in_file, loff_t *ppos, size_t count, read_actor_t actor, void *target){ int ret; struct inode *inode = in_file->f_mapping->host; DECLARE_IO_MARKER(io_marker); mlog_entry("inode %"MLFu64", ppos %lld, count = %u\n", OCFS2_I(inode)->ip_blkno, (long long) *ppos, (unsigned int) count); /* Obviously, there is no user buffer to worry about here -- * this simplifies locking, so no need to walk vmas a la * read/write. We take a simple set of cluster locks against * the inode and call generic_file_sendfile. */ ret = ocfs2_meta_lock(inode, NULL, NULL, 0); if (ret < 0) { mlog_errno(ret); goto bail; } ret = ocfs2_data_lock(inode, 0); if (ret < 0) { mlog_errno(ret); goto bail_unlock_meta; } down_read(&OCFS2_I(inode)->ip_alloc_sem); /* * We still need this so readpage doesn't throw an error. */ ocfs2_add_io_marker(inode, &io_marker); ret = generic_file_sendfile(in_file, ppos, count, actor, target); ocfs2_del_io_marker(inode, &io_marker); up_read(&OCFS2_I(inode)->ip_alloc_sem); ocfs2_data_unlock(inode, 0);bail_unlock_meta: ocfs2_meta_unlock(inode, 0);bail: mlog_exit(ret); return ret;}struct file_operations ocfs2_fops = { .read = ocfs2_file_read, .write = ocfs2_file_write, .sendfile = ocfs2_file_sendfile, .mmap = ocfs2_mmap, .fsync = ocfs2_sync_file, .release = ocfs2_file_release, .open = ocfs2_file_open, .aio_read = ocfs2_file_aio_read, .aio_write = ocfs2_file_aio_write,};struct file_operations ocfs2_dops = { .read = generic_read_dir, .readdir = ocfs2_readdir, .fsync = ocfs2_sync_file,};int ocfs2_set_inode_size(struct ocfs2_journal_handle *handle, struct inode *inode, struct buffer_head *fe_bh, u64 new_i_size){ int status, grow; mlog_entry_void(); grow = new_i_size > inode->i_size; i_size_write(inode, new_i_size); inode->i_blocks = ocfs2_align_bytes_to_sectors(new_i_size); inode->i_ctime = inode->i_mtime = CURRENT_TIME; status = ocfs2_mark_inode_dirty(handle, inode, fe_bh); if (status < 0) { mlog_errno(status); goto bail; } /* FIXME: I think this should all be in the caller */ spin_lock(&OCFS2_I(inode)->ip_lock); if (!grow) OCFS2_I(inode)->ip_mmu_private = i_size_read(inode); spin_unlock(&OCFS2_I(inode)->ip_lock);bail: mlog_exit(status); return status;}static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, struct inode *inode, struct buffer_head *fe_bh, u64 new_i_size){ int status; struct ocfs2_journal_handle *handle; mlog_entry_void(); /* TODO: This needs to actually orphan the inode in this * transaction. */ handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); goto out; } status = ocfs2_set_inode_size(handle, inode, fe_bh, new_i_size); if (status < 0) mlog_errno(status); ocfs2_commit_trans(handle);out: mlog_exit(status); return status;}static int ocfs2_truncate_file(struct ocfs2_super *osb, u64 new_i_size, struct inode *inode){ int status = 0; struct ocfs2_dinode *fe = NULL; struct buffer_head *fe_bh = NULL; struct ocfs2_journal_handle *handle = NULL; struct ocfs2_truncate_context *tc = NULL; mlog_entry("(inode = %"MLFu64", new_i_size = %"MLFu64"\n", OCFS2_I(inode)->ip_blkno, new_i_size); truncate_inode_pages(inode->i_mapping, new_i_size); status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &fe_bh, OCFS2_BH_CACHED, inode); if (status < 0) { mlog_errno(status); goto bail; } fe = (struct ocfs2_dinode *) fe_bh->b_data; if (!OCFS2_IS_VALID_DINODE(fe)) { OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); status = -EIO; goto bail; } mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode), "Inode %"MLFu64", inode i_size = %lld != di " "i_size = %"MLFu64", i_flags = 0x%x\n", OCFS2_I(inode)->ip_blkno, i_size_read(inode), le64_to_cpu(fe->i_size), le32_to_cpu(fe->i_flags)); if (new_i_size > le64_to_cpu(fe->i_size)) { mlog(0, "asked to truncate file with size (%"MLFu64") " "to size (%"MLFu64")!\n", le64_to_cpu(fe->i_size), new_i_size); status = -EINVAL; mlog_errno(status); goto bail; } mlog(0, "inode %"MLFu64", i_size = %"MLFu64", new_i_size = %"MLFu64"\n", le64_to_cpu(fe->i_blkno), le64_to_cpu(fe->i_size), new_i_size); /* lets handle the simple truncate cases before doing any more * cluster locking. */ if (new_i_size == le64_to_cpu(fe->i_size)) goto bail; /* This forces other nodes to sync and drop their pages. Do * this even if we have a truncate without allocation change - * ocfs2 cluster sizes can be much greater than page size, so * we have to truncate them anyway. */ status = ocfs2_data_lock(inode, 1); if (status < 0) { mlog_errno(status); goto bail; } ocfs2_data_unlock(inode, 1); if (le32_to_cpu(fe->i_clusters) == ocfs2_clusters_for_bytes(osb->sb, new_i_size)) { mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n", fe->i_clusters); /* No allocation change is required, so lets fast path * this truncate. */ handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS); if (IS_ERR(handle)) { status = PTR_ERR(handle); handle = NULL; mlog_errno(status); goto bail; } status = ocfs2_set_inode_size(handle, inode, fe_bh, new_i_size); if (status < 0) mlog_errno(status); goto bail; } /* alright, we're going to need to do a full blown alloc size * change. Orphan the inode so that recovery can complete the * truncate if necessary. This does the task of marking * i_size. */ status = ocfs2_orphan_for_truncate(osb, inode, fe_bh, new_i_size); if (status < 0) { mlog_errno(status); goto bail; } status = ocfs2_prepare_truncate(osb, inode, fe_bh, &tc); if (status < 0) { mlog_errno(status); goto bail; } status = ocfs2_commit_truncate(osb, inode, fe_bh, tc); if (status < 0) { mlog_errno(status); goto bail; } /* TODO: orphan dir cleanup here. */bail: if (handle) ocfs2_commit_trans(handle); if (fe_bh) brelse(fe_bh); mlog_exit(status); return status;}static int ocfs2_zero_extend(struct inode *inode){ struct address_space *mapping = inode->i_mapping; struct page *page; u64 size = i_size_read(inode) - 1; unsigned int offset; int res = 0; /* Start the zeroing of blocks */ if (i_size_read(inode) > OCFS2_I(inode)->ip_mmu_private) { page = grab_cache_page(mapping,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -