filter_io_26.c

来自「lustre 1.6.5 source code」· C语言 代码 · 共 811 行 · 第 1/3 页

C
811
字号
        int            page_idx;        int            i;        int            rc = 0;        ENTRY;        LASSERT(iobuf->dr_npages == npages);        LASSERT(total_blocks <= OBDFILTER_CREATED_SCRATCHPAD_ENTRIES);        for (page_idx = 0, block_idx = 0;             page_idx < npages;             page_idx++, block_idx += blocks_per_page) {                page = pages[page_idx];                LASSERT (block_idx + blocks_per_page <= total_blocks);                for (i = 0, page_offset = 0;                     i < blocks_per_page;                     i += nblocks, page_offset += blocksize * nblocks) {                        nblocks = 1;                        if (blocks[block_idx + i] == 0) {  /* hole */                                LASSERT(rw == OBD_BRW_READ);                                memset(kmap(page) + page_offset, 0, blocksize);                                kunmap(page);                                continue;                        }                        sector = (sector_t)blocks[block_idx + i] << sector_bits;                        /* Additional contiguous file blocks? */                        while (i + nblocks < blocks_per_page &&                               (sector + (nblocks << sector_bits)) ==                               ((sector_t)blocks[block_idx + i + nblocks] <<                                sector_bits))                                nblocks++;#ifdef HAVE_PAGE_CONSTANT                        /* I only set the page to be constant only if it                          * is mapped to a contiguous underlying disk block(s).                          * It will then make sure the corresponding device                          * cache of raid5 will be overwritten by this page.                          * - jay */                        if ((rw == OBD_BRW_WRITE) &&                             (nblocks == blocks_per_page) &&                             mapping_cap_page_constant_write(inode->i_mapping))                                SetPageConstant(page);#endif                        if (bio != NULL &&                            can_be_merged(bio, sector) &&                            bio_add_page(bio, page,                                         blocksize * nblocks, page_offset) != 0)                                continue;       /* added this frag OK */                        if (bio != NULL) {                                request_queue_t *q =                                        bdev_get_queue(bio->bi_bdev);                                /* Dang! I have to fragment this I/O */                                CDEBUG(D_INODE, "bio++ sz %d vcnt %d(%d) "                                       "sectors %d(%d) psg %d(%d) hsg %d(%d)\n",                                       bio->bi_size,                                       bio->bi_vcnt, bio->bi_max_vecs,                                       bio->bi_size >> 9, q->max_sectors,                                       bio_phys_segments(q, bio),                                       q->max_phys_segments,                                       bio_hw_segments(q, bio),                                       q->max_hw_segments);                                record_start_io(iobuf, rw, bio->bi_size, exp);                                rc = fsfilt_send_bio(rw, obd, inode, bio);                                if (rc < 0) {                                        CERROR("Can't send bio: %d\n", rc);                                        record_finish_io(iobuf, rw, rc);                                        goto out;                                }                                frags++;                        }                        /* allocate new bio */                        bio = bio_alloc(GFP_NOIO,                                        (npages - page_idx) * blocks_per_page);                        if (bio == NULL) {                                CERROR("Can't allocate bio %u*%u = %u pages\n",                                       (npages - page_idx), blocks_per_page,                                       (npages - page_idx) * blocks_per_page);                                rc = -ENOMEM;                                goto out;                        }                        bio->bi_bdev = inode->i_sb->s_bdev;                        bio->bi_sector = sector;                        bio->bi_end_io = dio_complete_routine;                        bio->bi_private = iobuf;                        rc = bio_add_page(bio, page,                                          blocksize * nblocks, page_offset);                        LASSERT (rc != 0);                }        }        if (bio != NULL) {                record_start_io(iobuf, rw, bio->bi_size, exp);                rc = fsfilt_send_bio(rw, obd, inode, bio);                if (rc >= 0) {                        frags++;                        rc = 0;                } else {                        CERROR("Can't send bio: %d\n", rc);                        record_finish_io(iobuf, rw, rc);                }        } out:        wait_event(iobuf->dr_wait, atomic_read(&iobuf->dr_numreqs) == 0);        if (rw == OBD_BRW_READ) {                lprocfs_oh_tally(&obd->u.filter.fo_filter_stats.hist[BRW_R_DIO_FRAGS], frags);                lprocfs_oh_tally(&exp->exp_filter_data.fed_brw_stats.hist[BRW_R_DIO_FRAGS],                                 frags);                lprocfs_oh_tally_log2(&obd->u.filter.fo_filter_stats.hist[BRW_R_IO_TIME],                                      jiffies - start_time);                lprocfs_oh_tally_log2(&exp->exp_filter_data.fed_brw_stats.hist[BRW_R_IO_TIME],                                 jiffies - start_time);                if (exp->exp_nid_stats && exp->exp_nid_stats->nid_brw_stats) {                        lprocfs_oh_tally(&exp->exp_nid_stats->nid_brw_stats->hist[BRW_R_DIO_FRAGS],                                         frags);                        lprocfs_oh_tally_log2(&exp->exp_nid_stats->nid_brw_stats->hist[BRW_R_IO_TIME],                                              jiffies - start_time);                }        } else {                lprocfs_oh_tally(&obd->u.filter.fo_filter_stats.hist[BRW_W_DIO_FRAGS], frags);                lprocfs_oh_tally(&exp->exp_filter_data.fed_brw_stats.hist[BRW_W_DIO_FRAGS],                                 frags);                lprocfs_oh_tally_log2(&obd->u.filter.fo_filter_stats.hist[BRW_W_IO_TIME],                                      jiffies - start_time);                lprocfs_oh_tally_log2(&exp->exp_filter_data.fed_brw_stats.hist[BRW_W_IO_TIME],                                 jiffies - start_time);                if (exp->exp_nid_stats && exp->exp_nid_stats->nid_brw_stats) {                        lprocfs_oh_tally(&exp->exp_nid_stats->nid_brw_stats->hist[BRW_W_DIO_FRAGS],                                         frags);                        lprocfs_oh_tally_log2(&exp->exp_nid_stats->nid_brw_stats->hist[BRW_W_IO_TIME],                                              jiffies - start_time);                }        }        if (rc == 0)                rc = iobuf->dr_error;        RETURN(rc);}/* These are our hacks to keep our directio/bh IO coherent with ext3's * page cache use.  Most notably ext3 reads file data into the page * cache when it is zeroing the tail of partial-block truncates and * leaves it there, sometimes generating io from it at later truncates. * This removes the partial page and its buffers from the page cache, * so it should only ever cause a wait in rare cases, as otherwise we * always do full-page IO to the OST. * * The call to truncate_complete_page() will call journal_invalidatepage() * to free the buffers and drop the page from cache.  The buffers should * not be dirty, because we already called fdatasync/fdatawait on them. */static int filter_sync_inode_data(struct inode *inode, int locked){        int rc = 0;        /* This is nearly do_fsync(), without the waiting on the inode */        /* XXX: in 2.6.16 (at least) we don't need to hold i_mutex over         * filemap_fdatawrite() and filemap_fdatawait(), so we may no longer         * need this lock here at all. */        if (!locked)                LOCK_INODE_MUTEX(inode);        if (inode->i_mapping->nrpages) {#ifdef PF_SYNCWRITE                current->flags |= PF_SYNCWRITE;#endif                rc = filemap_fdatawrite(inode->i_mapping);                if (rc == 0)                        rc = filemap_fdatawait(inode->i_mapping);#ifdef PF_SYNCWRITE                current->flags &= ~PF_SYNCWRITE;#endif        }        if (!locked)                UNLOCK_INODE_MUTEX(inode);        return rc;}/* Clear pages from the mapping before we do direct IO to that offset. * Now that the only source of such pages in the truncate path flushes * these pages to disk and then discards them, this is error condition. * If add back read cache this will happen again.  This could be disabled * until that time if we never see the below error. */static int filter_clear_page_cache(struct inode *inode,                                   struct filter_iobuf *iobuf){        struct page *page;        int i, rc;        rc = filter_sync_inode_data(inode, 0);        if (rc != 0)                RETURN(rc);        /* be careful to call this after fsync_inode_data_buffers has waited         * for IO to complete before we evict it from the cache */        for (i = 0; i < iobuf->dr_npages; i++) {                page = find_lock_page(inode->i_mapping,                                      iobuf->dr_pages[i]->index);                if (page == NULL)                        continue;                if (page->mapping != NULL) {                        CERROR("page %lu (%d/%d) in page cache during write!\n",                               page->index, i, iobuf->dr_npages);                        wait_on_page_writeback(page);                        ll_truncate_complete_page(page);                }                unlock_page(page);                page_cache_release(page);        }        return 0;}int filter_clear_truncated_page(struct inode *inode){        struct page *page;        int rc;        /* Truncate on page boundary, so nothing to flush? */        if (!(i_size_read(inode) & ~CFS_PAGE_MASK))                return 0;        rc = filter_sync_inode_data(inode, 1);        if (rc != 0)                RETURN(rc);        /* be careful to call this after fsync_inode_data_buffers has waited         * for IO to complete before we evict it from the cache */        page = find_lock_page(inode->i_mapping,                              i_size_read(inode) >> CFS_PAGE_SHIFT);        if (page) {                if (page->mapping != NULL) {                        wait_on_page_writeback(page);                        ll_truncate_complete_page(page);                }                unlock_page(page);                page_cache_release(page);        }        return 0;}/* Must be called with i_mutex taken for writes; this will drop it */int filter_direct_io(int rw, struct dentry *dchild, struct filter_iobuf *iobuf,                     struct obd_export *exp, struct iattr *attr,                     struct obd_trans_info *oti, void **wait_handle){        struct obd_device *obd = exp->exp_obd;        struct inode *inode = dchild->d_inode;        int blocks_per_page = CFS_PAGE_SIZE >> inode->i_blkbits;        int rc, rc2, create;        struct semaphore *sem;        ENTRY;        LASSERTF(iobuf->dr_npages <= iobuf->dr_max_pages, "%d,%d\n",                 iobuf->dr_npages, iobuf->dr_max_pages);        LASSERT(iobuf->dr_npages <= OBDFILTER_CREATED_SCRATCHPAD_ENTRIES);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?