filter_io_24.c

来自「lustre 1.6.5 source code」· C语言 代码 · 共 545 行 · 第 1/2 页

C
545
字号
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * *  linux/fs/obdfilter/filter_io.c * *  Copyright (c) 2001-2003 Cluster File Systems, Inc. *   Author: Peter Braam <braam@clusterfs.com> *   Author: Andreas Dilger <adilger@clusterfs.com> *   Author: Phil Schwan <phil@clusterfs.com> * *   This file is part of the Lustre file system, http://www.lustre.org *   Lustre is a trademark of Cluster File Systems, Inc. * *   You may have signed or agreed to another license before downloading *   this software.  If so, you are bound by the terms and conditions *   of that agreement, and the following does not apply to you.  See the *   LICENSE file included with this distribution for more information. * *   If you did not agree to a different license, then this copy of Lustre *   is open source software; you can redistribute it and/or modify it *   under the terms of version 2 of the GNU General Public License as *   published by the Free Software Foundation. * *   In either case, Lustre is distributed in the hope that it will be *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *   license text for more details. */#ifndef AUTOCONF_INCLUDED#include <linux/config.h>#endif#include <linux/module.h>#include <linux/pagemap.h> // XXX kill me soon#include <linux/version.h>#define DEBUG_SUBSYSTEM S_FILTER#include <linux/iobuf.h>#include <linux/locks.h>#include <obd_class.h>#include <lustre_fsfilt.h>#include "filter_internal.h"/* Bug 2254 -- this is better done in ext3_map_inode_page, but this * workaround will suffice until everyone has upgraded their kernels */static void check_pending_bhs(unsigned long *blocks, int nr_pages, dev_t dev,                              int size){#if (LUSTRE_KERNEL_VERSION < 32)        struct buffer_head *bh;        int i;        for (i = 0; i < nr_pages; i++) {                bh = get_hash_table(dev, blocks[i], size);                if (bh == NULL)                        continue;                if (!buffer_dirty(bh)) {                        put_bh(bh);                        continue;                }                mark_buffer_clean(bh);                wait_on_buffer(bh);                clear_bit(BH_Req, &bh->b_state);                __brelse(bh);        }#endif}/* when brw_kiovec() is asked to read from block -1UL it just zeros * the page.  this gives us a chance to verify the write mappings * as well */static int filter_cleanup_mappings(int rw, struct kiobuf *iobuf,                                   struct inode *inode){        int i, blocks_per_page_bits = CFS_PAGE_SHIFT - inode->i_blkbits;        ENTRY;        for (i = 0 ; i < iobuf->nr_pages << blocks_per_page_bits; i++) {                if (KIOBUF_GET_BLOCKS(iobuf)[i] > 0)                        continue;                if (rw == OBD_BRW_WRITE)                        RETURN(-EINVAL);                KIOBUF_GET_BLOCKS(iobuf)[i] = -1UL;        }        RETURN(0);}#if 0static void dump_page(int rw, unsigned long block, struct page *page){        char *blah = kmap(page);        CDEBUG(D_PAGE, "rw %d block %lu: %02x %02x %02x %02x\n", rw, block,                       blah[0], blah[1], blah[2], blah[3]);        kunmap(page);}#endif/* These are our hacks to keep our directio/bh IO coherent with ext3's * page cache use.  Most notably ext3 reads file data into the page * cache when it is zeroing the tail of partial-block truncates and * leaves it there, sometimes generating io from it at later truncates. * This removes the partial page and its buffers from the page cache, * so it should only ever cause a wait in rare cases, as otherwise we * always do full-page IO to the OST. * * The call to truncate_complete_page() will call journal_flushpage() to * free the buffers and drop the page from cache.  The buffers should not * be dirty, because we already called fdatasync/fdatawait on them. */static int filter_sync_inode_data(struct inode *inode){        int rc, rc2;        /* This is nearly generic_osync_inode, without the waiting on the inode        rc = generic_osync_inode(inode, inode->i_mapping,                                 OSYNC_DATA|OSYNC_METADATA);         */        rc = filemap_fdatasync(inode->i_mapping);        rc2 = fsync_inode_data_buffers(inode);        if (rc == 0)                rc = rc2;        rc2 = filemap_fdatawait(inode->i_mapping);        if (rc == 0)                rc = rc2;        return rc;}static int filter_clear_page_cache(struct inode *inode, struct kiobuf *iobuf){        struct page *page;        int i, rc;        check_pending_bhs(KIOBUF_GET_BLOCKS(iobuf), iobuf->nr_pages,                          inode->i_dev, 1 << inode->i_blkbits);        rc = filter_sync_inode_data(inode);        if (rc != 0)                RETURN(rc);        /* be careful to call this after fsync_inode_data_buffers has waited         * for IO to complete before we evict it from the cache */        for (i = 0; i < iobuf->nr_pages ; i++) {                page = find_lock_page(inode->i_mapping,                                      iobuf->maplist[i]->index);                if (page == NULL)                        continue;                if (page->mapping != NULL) {                        /* Now that the only source of such pages in truncate                         * path flushes these pages to disk and and then                         * discards, this is error condition */                        CERROR("Data page in page cache during write!\n");                        ll_truncate_complete_page(page);                }                unlock_page(page);                page_cache_release(page);        }        return 0;}int filter_clear_truncated_page(struct inode *inode){        struct page *page;        int rc;        /* Truncate on page boundary, so nothing to flush? */        if (!(i_size_read(inode) & ~CFS_PAGE_MASK))                return 0;        rc = filter_sync_inode_data(inode);        if (rc != 0)                RETURN(rc);        /* be careful to call this after fsync_inode_data_buffers has waited         * for IO to complete before we evict it from the cache */        page = find_lock_page(inode->i_mapping,                              i_size_read(inode) >> CFS_PAGE_SHIFT);        if (page) {                if (page->mapping != NULL)                        ll_truncate_complete_page(page);                unlock_page(page);                page_cache_release(page);        }        return 0;}/* Must be called with i_sem taken for writes; this will drop it */int filter_direct_io(int rw, struct dentry *dchild, struct filter_iobuf *buf,                     struct obd_export *exp, struct iattr *attr,                     struct obd_trans_info *oti, void **wait_handle){        struct obd_device *obd = exp->exp_obd;        struct inode *inode = dchild->d_inode;        struct kiobuf *iobuf = (void *)buf;        int rc, create = (rw == OBD_BRW_WRITE), committed = 0;        int blocks_per_page = CFS_PAGE_SIZE >> inode->i_blkbits, cleanup_phase = 0;        struct semaphore *sem = NULL;        ENTRY;        LASSERTF(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ, "%x\n", rw);        if (iobuf->nr_pages == 0)                GOTO(cleanup, rc = 0);        if (iobuf->nr_pages * blocks_per_page > KIO_MAX_SECTORS)                GOTO(cleanup, rc = -EINVAL);        if (iobuf->nr_pages * blocks_per_page >            OBDFILTER_CREATED_SCRATCHPAD_ENTRIES)                GOTO(cleanup, rc = -EINVAL);        cleanup_phase = 1;        rc = lock_kiovec(1, &iobuf, 1);        if (rc < 0)                GOTO(cleanup, rc);        cleanup_phase = 2;        if (rw == OBD_BRW_WRITE) {                create = 1;                sem = &obd->u.filter.fo_alloc_lock;        }        rc = fsfilt_map_inode_pages(obd, inode, iobuf->maplist,                                    iobuf->nr_pages, KIOBUF_GET_BLOCKS(iobuf),                                    obdfilter_created_scratchpad, create, sem);        if (rc)                GOTO(cleanup, rc);        rc = filter_cleanup_mappings(rw, iobuf, inode);        if (rc)                GOTO(cleanup, rc);        if (rw == OBD_BRW_WRITE) {                if (rc == 0) {                        filter_tally(exp, iobuf->maplist, iobuf->nr_pages,                                     KIOBUF_GET_BLOCKS(iobuf), blocks_per_page,                                     1);                        if (attr->ia_size > i_size_read(inode))                                attr->ia_valid |= ATTR_SIZE;                        rc = fsfilt_setattr(obd, dchild,                                            oti->oti_handle, attr, 0);                        if (rc)                                GOTO(cleanup, rc);                }                up(&inode->i_sem);                cleanup_phase = 3;                rc = filter_finish_transno(exp, oti, 0, 0);                if (rc)                        GOTO(cleanup, rc);                rc = fsfilt_commit_async(obd,inode,oti->oti_handle,wait_handle);                committed = 1;                if (rc)                        GOTO(cleanup, rc);        } else {                filter_tally(exp, iobuf->maplist, iobuf->nr_pages,                             KIOBUF_GET_BLOCKS(iobuf), blocks_per_page, 0);        }        rc = filter_clear_page_cache(inode, iobuf);        if (rc < 0)                GOTO(cleanup, rc);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?