fsfilt_ext3.c
来自「lustre 1.6.5 source code」· C语言 代码 · 共 1,853 行 · 第 1/5 页
C
1,853 行
* any of the matching commit_write operations, so even if we split * up to use "smaller" transactions none of them could complete until * all of them were opened. By having a single journal transaction, * we eliminate duplicate reservations for common blocks like the * superblock and group descriptors or bitmaps. * * We will start the transaction here, but each prepare_write will * add a refcount to the transaction, and each commit_write will * remove a refcount. The transaction will be closed when all of * the pages have been written. */static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso, int niocount, struct niobuf_local *nb, void *desc_private, int logs){ journal_t *journal; handle_t *handle; int needed; ENTRY; LASSERT(current->journal_info == desc_private); journal = EXT3_SB(fso->fso_dentry->d_inode->i_sb)->s_journal; needed = fsfilt_ext3_credits_needed(objcount, fso, niocount, nb); /* The number of blocks we could _possibly_ dirty can very large. * We reduce our request if it is absurd (and we couldn't get that * many credits for a single handle anyways). * * At some point we have to limit the size of I/Os sent at one time, * increase the size of the journal, or we have to calculate the * actual journal requirements more carefully by checking all of * the blocks instead of being maximally pessimistic. It remains to * be seen if this is a real problem or not. */ if (needed > journal->j_max_transaction_buffers) { CERROR("want too many journal credits (%d) using %d instead\n", needed, journal->j_max_transaction_buffers); needed = journal->j_max_transaction_buffers; } LASSERTF(needed > 0, "can't start %d credit transaction\n", needed); lock_24kernel(); handle = fsfilt_ext3_journal_start(fso->fso_dentry->d_inode, needed); unlock_24kernel(); if (IS_ERR(handle)) { CERROR("can't get handle for %d credits: rc = %ld\n", needed, PTR_ERR(handle)); } else { LASSERT(handle->h_buffer_credits >= needed); LASSERT(current->journal_info == handle); } RETURN(handle);}static int fsfilt_ext3_extend(struct inode *inode, unsigned int nblocks,void *h){ handle_t *handle = h; /* fsfilt_extend called with nblocks = 0 for testing in special cases */ if (nblocks == 0) { handle->h_buffer_credits = 0; CWARN("setting credits of handle %p to zero by request\n", h); } if (handle->h_buffer_credits > nblocks) return 0; if (journal_extend(handle, nblocks) == 0) return 0; ext3_mark_inode_dirty(handle, inode); return journal_restart(handle, nblocks);}static int fsfilt_ext3_commit(struct inode *inode, void *h, int force_sync){ int rc; handle_t *handle = h; LASSERT(current->journal_info == handle); if (force_sync) handle->h_sync = 1; /* recovery likes this */ lock_24kernel(); rc = fsfilt_ext3_journal_stop(handle); unlock_24kernel(); return rc;}static int fsfilt_ext3_commit_async(struct inode *inode, void *h, void **wait_handle){ unsigned long tid; transaction_t *transaction;#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) unsigned long rtid;#endif handle_t *handle = h; journal_t *journal; int rc; LASSERT(current->journal_info == handle); lock_24kernel(); transaction = handle->h_transaction; journal = transaction->t_journal; tid = transaction->t_tid; /* we don't want to be blocked */ handle->h_sync = 0; rc = fsfilt_ext3_journal_stop(handle); if (rc) { CERROR("error while stopping transaction: %d\n", rc); unlock_24kernel(); return rc; }#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) rtid = log_start_commit(journal, transaction); if (rtid != tid) CERROR("strange race: %lu != %lu\n", (unsigned long) tid, (unsigned long) rtid);#else log_start_commit(journal, tid);#endif unlock_24kernel(); *wait_handle = (void *) tid; CDEBUG(D_INODE, "commit async: %lu\n", (unsigned long) tid); return 0;}static int fsfilt_ext3_commit_wait(struct inode *inode, void *h){ journal_t *journal = EXT3_JOURNAL(inode); tid_t tid = (tid_t)(long)h; CDEBUG(D_INODE, "commit wait: %lu\n", (unsigned long) tid); if (unlikely(is_journal_aborted(journal))) return -EIO; log_wait_commit(EXT3_JOURNAL(inode), tid); if (unlikely(is_journal_aborted(journal))) return -EIO; return 0;}static int fsfilt_ext3_setattr(struct dentry *dentry, void *handle, struct iattr *iattr, int do_trunc){ struct inode *inode = dentry->d_inode; int rc = 0; lock_24kernel(); /* Avoid marking the inode dirty on the superblock list unnecessarily. * We are already writing the inode to disk as part of this * transaction and want to avoid a lot of extra inode writeout * later on. b=9828 */ if (iattr->ia_valid & ATTR_SIZE && !do_trunc) { /* ATTR_SIZE would invoke truncate: clear it */ iattr->ia_valid &= ~ATTR_SIZE; EXT3_I(inode)->i_disksize = iattr->ia_size; i_size_write(inode, iattr->ia_size); if (iattr->ia_valid & ATTR_UID) inode->i_uid = iattr->ia_uid; if (iattr->ia_valid & ATTR_GID) inode->i_gid = iattr->ia_gid; if (iattr->ia_valid & ATTR_ATIME) inode->i_atime = iattr->ia_atime; if (iattr->ia_valid & ATTR_MTIME) inode->i_mtime = iattr->ia_mtime; if (iattr->ia_valid & ATTR_CTIME) inode->i_ctime = iattr->ia_ctime; if (iattr->ia_valid & ATTR_MODE) { inode->i_mode = iattr->ia_mode; if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) inode->i_mode &= ~S_ISGID; } inode->i_sb->s_op->dirty_inode(inode); goto out; } /* Don't allow setattr to change file type */ if (iattr->ia_valid & ATTR_MODE) iattr->ia_mode = (inode->i_mode & S_IFMT) | (iattr->ia_mode & ~S_IFMT); /* We set these flags on the client, but have already checked perms * so don't confuse inode_change_ok. */ iattr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET); if (inode->i_op->setattr) { rc = inode->i_op->setattr(dentry, iattr); } else { rc = inode_change_ok(inode, iattr); if (!rc) rc = inode_setattr(inode, iattr); } out: unlock_24kernel(); RETURN(rc);}static int fsfilt_ext3_iocontrol(struct inode * inode, struct file *file, unsigned int cmd, unsigned long arg){ int rc = 0; ENTRY; /* FIXME: Can't do this because of nested transaction deadlock */ if (cmd == EXT3_IOC_SETFLAGS && (*(int *)arg) & EXT3_JOURNAL_DATA_FL) { CERROR("can't set data journal flag on file\n"); RETURN(-EPERM); } if (inode->i_fop->ioctl) rc = inode->i_fop->ioctl(inode, file, cmd, arg); else RETURN(-ENOTTY); RETURN(rc);}static int fsfilt_ext3_set_md(struct inode *inode, void *handle, void *lmm, int lmm_size, const char *name){ int rc; LASSERT(TRYLOCK_INODE_MUTEX(inode) == 0); lock_24kernel(); rc = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_TRUSTED, name, lmm, lmm_size, 0); unlock_24kernel(); if (rc && rc != -EROFS) CERROR("error adding MD data to inode %lu: rc = %d\n", inode->i_ino, rc); return rc;}/* Must be called with i_mutex held */static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int lmm_size, const char *name){ int rc; LASSERT(TRYLOCK_INODE_MUTEX(inode) == 0); lock_24kernel(); rc = ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, name, lmm, lmm_size); unlock_24kernel(); /* This gives us the MD size */ if (lmm == NULL) return (rc == -ENODATA) ? 0 : rc; if (rc < 0) { CDEBUG(D_INFO, "error getting EA %d/%s from inode %lu: rc %d\n", EXT3_XATTR_INDEX_TRUSTED, name, inode->i_ino, rc); memset(lmm, 0, lmm_size); return (rc == -ENODATA) ? 0 : rc; } return rc;}#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))static int fsfilt_ext3_send_bio(int rw, struct inode *inode, struct bio *bio){ submit_bio(rw, bio); return 0;}#elsestatic int fsfilt_ext3_send_bio(int rw, struct inode *inode, struct kiobuf *bio){ int rc, blk_per_page; rc = brw_kiovec(rw, 1, &bio, inode->i_dev, KIOBUF_GET_BLOCKS(bio), 1 << inode->i_blkbits); /* * brw_kiovec() returns number of bytes actually written. If error * occurred after something was written, error code is returned though * kiobuf->errno. (See bug 6854.) */ blk_per_page = CFS_PAGE_SIZE >> inode->i_blkbits; if (rc != (1 << inode->i_blkbits) * bio->nr_pages * blk_per_page) { CERROR("short write? expected %d, wrote %d (%d)\n", (1 << inode->i_blkbits) * bio->nr_pages * blk_per_page, rc, bio->errno); } if (bio->errno != 0) { CERROR("IO error. Wrote %d of %d (%d)\n", rc, (1 << inode->i_blkbits) * bio->nr_pages * blk_per_page, bio->errno); rc = bio->errno; } return rc;}#endifstatic ssize_t fsfilt_ext3_readpage(struct file *file, char *buf, size_t count, loff_t *off){ struct inode *inode = file->f_dentry->d_inode; int rc = 0; if (S_ISREG(inode->i_mode)) rc = file->f_op->read(file, buf, count, off); else { const int blkbits = inode->i_sb->s_blocksize_bits; const int blksize = inode->i_sb->s_blocksize; CDEBUG(D_EXT2, "reading "LPSZ" at dir %lu+%llu\n", count, inode->i_ino, *off); while (count > 0) { struct buffer_head *bh; bh = NULL; if (*off < i_size_read(inode)) { int err = 0; bh = ext3_bread(NULL, inode, *off >> blkbits, 0, &err); CDEBUG(D_EXT2, "read %u@%llu\n", blksize, *off); if (bh) { memcpy(buf, bh->b_data, blksize); brelse(bh); } else if (err) { /* XXX in theory we should just fake * this buffer and continue like ext3, * especially if this is a partial read */ CERROR("error read dir %lu+%llu: %d\n", inode->i_ino, *off, err); RETURN(err); } } if (!bh) { struct ext3_dir_entry_2 *fake = (void *)buf; CDEBUG(D_EXT2, "fake %u@%llu\n", blksize, *off); memset(fake, 0, sizeof(*fake)); fake->rec_len = cpu_to_le16(blksize); } count -= blksize; buf += blksize; *off += blksize; rc += blksize; } } return rc;}static void fsfilt_ext3_cb_func(struct journal_callback *jcb, int error)
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?