fsfilt_ext3.c

来自「lustre 1.6.5 source code」· C语言 代码 · 共 1,853 行 · 第 1/5 页

C
1,853
字号
 * any of the matching commit_write operations, so even if we split * up to use "smaller" transactions none of them could complete until * all of them were opened.  By having a single journal transaction, * we eliminate duplicate reservations for common blocks like the * superblock and group descriptors or bitmaps. * * We will start the transaction here, but each prepare_write will * add a refcount to the transaction, and each commit_write will * remove a refcount.  The transaction will be closed when all of * the pages have been written. */static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso,                                   int niocount, struct niobuf_local *nb,                                   void *desc_private, int logs){        journal_t *journal;        handle_t *handle;        int needed;        ENTRY;        LASSERT(current->journal_info == desc_private);        journal = EXT3_SB(fso->fso_dentry->d_inode->i_sb)->s_journal;        needed = fsfilt_ext3_credits_needed(objcount, fso, niocount, nb);        /* The number of blocks we could _possibly_ dirty can very large.         * We reduce our request if it is absurd (and we couldn't get that         * many credits for a single handle anyways).         *         * At some point we have to limit the size of I/Os sent at one time,         * increase the size of the journal, or we have to calculate the         * actual journal requirements more carefully by checking all of         * the blocks instead of being maximally pessimistic.  It remains to         * be seen if this is a real problem or not.         */        if (needed > journal->j_max_transaction_buffers) {                CERROR("want too many journal credits (%d) using %d instead\n",                       needed, journal->j_max_transaction_buffers);                needed = journal->j_max_transaction_buffers;        }        LASSERTF(needed > 0, "can't start %d credit transaction\n", needed);        lock_24kernel();        handle = fsfilt_ext3_journal_start(fso->fso_dentry->d_inode, needed);        unlock_24kernel();        if (IS_ERR(handle)) {                CERROR("can't get handle for %d credits: rc = %ld\n", needed,                       PTR_ERR(handle));        } else {                LASSERT(handle->h_buffer_credits >= needed);                LASSERT(current->journal_info == handle);        }        RETURN(handle);}static int fsfilt_ext3_extend(struct inode *inode, unsigned int nblocks,void *h){       handle_t *handle = h;       /* fsfilt_extend called with nblocks = 0 for testing in special cases */       if (nblocks == 0) {               handle->h_buffer_credits = 0;               CWARN("setting credits of handle %p to zero by request\n", h);       }       if (handle->h_buffer_credits > nblocks)                return 0;       if (journal_extend(handle, nblocks) == 0)                return 0;       ext3_mark_inode_dirty(handle, inode);       return journal_restart(handle, nblocks);}static int fsfilt_ext3_commit(struct inode *inode, void *h, int force_sync){        int rc;        handle_t *handle = h;        LASSERT(current->journal_info == handle);        if (force_sync)                handle->h_sync = 1; /* recovery likes this */        lock_24kernel();        rc = fsfilt_ext3_journal_stop(handle);        unlock_24kernel();        return rc;}static int fsfilt_ext3_commit_async(struct inode *inode, void *h,                                    void **wait_handle){        unsigned long tid;        transaction_t *transaction;#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)        unsigned long rtid;#endif        handle_t *handle = h;        journal_t *journal;        int rc;        LASSERT(current->journal_info == handle);        lock_24kernel();        transaction = handle->h_transaction;        journal = transaction->t_journal;        tid = transaction->t_tid;        /* we don't want to be blocked */        handle->h_sync = 0;        rc = fsfilt_ext3_journal_stop(handle);        if (rc) {                CERROR("error while stopping transaction: %d\n", rc);                unlock_24kernel();                return rc;        }#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)        rtid = log_start_commit(journal, transaction);        if (rtid != tid)                CERROR("strange race: %lu != %lu\n",                       (unsigned long) tid, (unsigned long) rtid);#else        log_start_commit(journal, tid);#endif        unlock_24kernel();        *wait_handle = (void *) tid;        CDEBUG(D_INODE, "commit async: %lu\n", (unsigned long) tid);        return 0;}static int fsfilt_ext3_commit_wait(struct inode *inode, void *h){        journal_t *journal = EXT3_JOURNAL(inode);        tid_t tid = (tid_t)(long)h;        CDEBUG(D_INODE, "commit wait: %lu\n", (unsigned long) tid);        if (unlikely(is_journal_aborted(journal)))                return -EIO;        log_wait_commit(EXT3_JOURNAL(inode), tid);        if (unlikely(is_journal_aborted(journal)))                return -EIO;        return 0;}static int fsfilt_ext3_setattr(struct dentry *dentry, void *handle,                               struct iattr *iattr, int do_trunc){        struct inode *inode = dentry->d_inode;        int rc = 0;        lock_24kernel();        /* Avoid marking the inode dirty on the superblock list unnecessarily.         * We are already writing the inode to disk as part of this         * transaction and want to avoid a lot of extra inode writeout         * later on. b=9828 */        if (iattr->ia_valid & ATTR_SIZE && !do_trunc) {                /* ATTR_SIZE would invoke truncate: clear it */                iattr->ia_valid &= ~ATTR_SIZE;                EXT3_I(inode)->i_disksize = iattr->ia_size;                i_size_write(inode, iattr->ia_size);                if (iattr->ia_valid & ATTR_UID)                        inode->i_uid = iattr->ia_uid;                if (iattr->ia_valid & ATTR_GID)                        inode->i_gid = iattr->ia_gid;                if (iattr->ia_valid & ATTR_ATIME)                        inode->i_atime = iattr->ia_atime;                if (iattr->ia_valid & ATTR_MTIME)                        inode->i_mtime = iattr->ia_mtime;                if (iattr->ia_valid & ATTR_CTIME)                        inode->i_ctime = iattr->ia_ctime;                if (iattr->ia_valid & ATTR_MODE) {                        inode->i_mode = iattr->ia_mode;                        if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))                                inode->i_mode &= ~S_ISGID;                }                inode->i_sb->s_op->dirty_inode(inode);                goto out;        }        /* Don't allow setattr to change file type */        if (iattr->ia_valid & ATTR_MODE)                iattr->ia_mode = (inode->i_mode & S_IFMT) |                                 (iattr->ia_mode & ~S_IFMT);        /* We set these flags on the client, but have already checked perms         * so don't confuse inode_change_ok. */        iattr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET);        if (inode->i_op->setattr) {                rc = inode->i_op->setattr(dentry, iattr);        } else {                rc = inode_change_ok(inode, iattr);                if (!rc)                        rc = inode_setattr(inode, iattr);        } out:        unlock_24kernel();        RETURN(rc);}static int fsfilt_ext3_iocontrol(struct inode * inode, struct file *file,                                 unsigned int cmd, unsigned long arg){        int rc = 0;        ENTRY;        /* FIXME: Can't do this because of nested transaction deadlock */        if (cmd == EXT3_IOC_SETFLAGS && (*(int *)arg) & EXT3_JOURNAL_DATA_FL) {                CERROR("can't set data journal flag on file\n");                RETURN(-EPERM);        }        if (inode->i_fop->ioctl)                rc = inode->i_fop->ioctl(inode, file, cmd, arg);        else                RETURN(-ENOTTY);        RETURN(rc);}static int fsfilt_ext3_set_md(struct inode *inode, void *handle,                              void *lmm, int lmm_size, const char *name){        int rc;        LASSERT(TRYLOCK_INODE_MUTEX(inode) == 0);        lock_24kernel();        rc = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_TRUSTED,                                   name, lmm, lmm_size, 0);        unlock_24kernel();        if (rc && rc != -EROFS)                CERROR("error adding MD data to inode %lu: rc = %d\n",                       inode->i_ino, rc);        return rc;}/* Must be called with i_mutex held */static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int lmm_size,                              const char *name){        int rc;        LASSERT(TRYLOCK_INODE_MUTEX(inode) == 0);        lock_24kernel();        rc = ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED,                            name, lmm, lmm_size);        unlock_24kernel();        /* This gives us the MD size */        if (lmm == NULL)                return (rc == -ENODATA) ? 0 : rc;        if (rc < 0) {                CDEBUG(D_INFO, "error getting EA %d/%s from inode %lu: rc %d\n",                       EXT3_XATTR_INDEX_TRUSTED, name,                       inode->i_ino, rc);                memset(lmm, 0, lmm_size);                return (rc == -ENODATA) ? 0 : rc;        }        return rc;}#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))static int fsfilt_ext3_send_bio(int rw, struct inode *inode, struct bio *bio){        submit_bio(rw, bio);        return 0;}#elsestatic int fsfilt_ext3_send_bio(int rw, struct inode *inode, struct kiobuf *bio){        int rc, blk_per_page;        rc = brw_kiovec(rw, 1, &bio, inode->i_dev,                        KIOBUF_GET_BLOCKS(bio), 1 << inode->i_blkbits);        /*         * brw_kiovec() returns number of bytes actually written. If error         * occurred after something was written, error code is returned though         * kiobuf->errno. (See bug 6854.)         */        blk_per_page = CFS_PAGE_SIZE >> inode->i_blkbits;        if (rc != (1 << inode->i_blkbits) * bio->nr_pages * blk_per_page) {                CERROR("short write?  expected %d, wrote %d (%d)\n",                       (1 << inode->i_blkbits) * bio->nr_pages * blk_per_page,                       rc, bio->errno);        }        if (bio->errno != 0) {                CERROR("IO error. Wrote %d of %d (%d)\n",                       rc,                       (1 << inode->i_blkbits) * bio->nr_pages * blk_per_page,                       bio->errno);                rc = bio->errno;        }        return rc;}#endifstatic ssize_t fsfilt_ext3_readpage(struct file *file, char *buf, size_t count,                                    loff_t *off){        struct inode *inode = file->f_dentry->d_inode;        int rc = 0;        if (S_ISREG(inode->i_mode))                rc = file->f_op->read(file, buf, count, off);        else {                const int blkbits = inode->i_sb->s_blocksize_bits;                const int blksize = inode->i_sb->s_blocksize;                CDEBUG(D_EXT2, "reading "LPSZ" at dir %lu+%llu\n",                       count, inode->i_ino, *off);                while (count > 0) {                        struct buffer_head *bh;                        bh = NULL;                        if (*off < i_size_read(inode)) {                                int err = 0;                                bh = ext3_bread(NULL, inode, *off >> blkbits,                                                0, &err);                                CDEBUG(D_EXT2, "read %u@%llu\n", blksize, *off);                                if (bh) {                                        memcpy(buf, bh->b_data, blksize);                                        brelse(bh);                                } else if (err) {                                        /* XXX in theory we should just fake                                         * this buffer and continue like ext3,                                         * especially if this is a partial read                                         */                                        CERROR("error read dir %lu+%llu: %d\n",                                               inode->i_ino, *off, err);                                        RETURN(err);                                }                        }                        if (!bh) {                                struct ext3_dir_entry_2 *fake = (void *)buf;                                CDEBUG(D_EXT2, "fake %u@%llu\n", blksize, *off);                                memset(fake, 0, sizeof(*fake));                                fake->rec_len = cpu_to_le16(blksize);                        }                        count -= blksize;                        buf += blksize;                        *off += blksize;                        rc += blksize;                }        }        return rc;}static void fsfilt_ext3_cb_func(struct journal_callback *jcb, int error)

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?