filter_io_24.c
来自「lustre 1.6.5 source code」· C语言 代码 · 共 545 行 · 第 1/2 页
C
545 行
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * * linux/fs/obdfilter/filter_io.c * * Copyright (c) 2001-2003 Cluster File Systems, Inc. * Author: Peter Braam <braam@clusterfs.com> * Author: Andreas Dilger <adilger@clusterfs.com> * Author: Phil Schwan <phil@clusterfs.com> * * This file is part of the Lustre file system, http://www.lustre.org * Lustre is a trademark of Cluster File Systems, Inc. * * You may have signed or agreed to another license before downloading * this software. If so, you are bound by the terms and conditions * of that agreement, and the following does not apply to you. See the * LICENSE file included with this distribution for more information. * * If you did not agree to a different license, then this copy of Lustre * is open source software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * In either case, Lustre is distributed in the hope that it will be * useful, but WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * license text for more details. */#ifndef AUTOCONF_INCLUDED#include <linux/config.h>#endif#include <linux/module.h>#include <linux/pagemap.h> // XXX kill me soon#include <linux/version.h>#define DEBUG_SUBSYSTEM S_FILTER#include <linux/iobuf.h>#include <linux/locks.h>#include <obd_class.h>#include <lustre_fsfilt.h>#include "filter_internal.h"/* Bug 2254 -- this is better done in ext3_map_inode_page, but this * workaround will suffice until everyone has upgraded their kernels */static void check_pending_bhs(unsigned long *blocks, int nr_pages, dev_t dev, int size){#if (LUSTRE_KERNEL_VERSION < 32) struct buffer_head *bh; int i; for (i = 0; i < nr_pages; i++) { bh = get_hash_table(dev, blocks[i], size); if (bh == NULL) continue; if (!buffer_dirty(bh)) { put_bh(bh); continue; } mark_buffer_clean(bh); wait_on_buffer(bh); clear_bit(BH_Req, &bh->b_state); __brelse(bh); }#endif}/* when brw_kiovec() is asked to read from block -1UL it just zeros * the page. this gives us a chance to verify the write mappings * as well */static int filter_cleanup_mappings(int rw, struct kiobuf *iobuf, struct inode *inode){ int i, blocks_per_page_bits = CFS_PAGE_SHIFT - inode->i_blkbits; ENTRY; for (i = 0 ; i < iobuf->nr_pages << blocks_per_page_bits; i++) { if (KIOBUF_GET_BLOCKS(iobuf)[i] > 0) continue; if (rw == OBD_BRW_WRITE) RETURN(-EINVAL); KIOBUF_GET_BLOCKS(iobuf)[i] = -1UL; } RETURN(0);}#if 0static void dump_page(int rw, unsigned long block, struct page *page){ char *blah = kmap(page); CDEBUG(D_PAGE, "rw %d block %lu: %02x %02x %02x %02x\n", rw, block, blah[0], blah[1], blah[2], blah[3]); kunmap(page);}#endif/* These are our hacks to keep our directio/bh IO coherent with ext3's * page cache use. Most notably ext3 reads file data into the page * cache when it is zeroing the tail of partial-block truncates and * leaves it there, sometimes generating io from it at later truncates. * This removes the partial page and its buffers from the page cache, * so it should only ever cause a wait in rare cases, as otherwise we * always do full-page IO to the OST. * * The call to truncate_complete_page() will call journal_flushpage() to * free the buffers and drop the page from cache. The buffers should not * be dirty, because we already called fdatasync/fdatawait on them. */static int filter_sync_inode_data(struct inode *inode){ int rc, rc2; /* This is nearly generic_osync_inode, without the waiting on the inode rc = generic_osync_inode(inode, inode->i_mapping, OSYNC_DATA|OSYNC_METADATA); */ rc = filemap_fdatasync(inode->i_mapping); rc2 = fsync_inode_data_buffers(inode); if (rc == 0) rc = rc2; rc2 = filemap_fdatawait(inode->i_mapping); if (rc == 0) rc = rc2; return rc;}static int filter_clear_page_cache(struct inode *inode, struct kiobuf *iobuf){ struct page *page; int i, rc; check_pending_bhs(KIOBUF_GET_BLOCKS(iobuf), iobuf->nr_pages, inode->i_dev, 1 << inode->i_blkbits); rc = filter_sync_inode_data(inode); if (rc != 0) RETURN(rc); /* be careful to call this after fsync_inode_data_buffers has waited * for IO to complete before we evict it from the cache */ for (i = 0; i < iobuf->nr_pages ; i++) { page = find_lock_page(inode->i_mapping, iobuf->maplist[i]->index); if (page == NULL) continue; if (page->mapping != NULL) { /* Now that the only source of such pages in truncate * path flushes these pages to disk and and then * discards, this is error condition */ CERROR("Data page in page cache during write!\n"); ll_truncate_complete_page(page); } unlock_page(page); page_cache_release(page); } return 0;}int filter_clear_truncated_page(struct inode *inode){ struct page *page; int rc; /* Truncate on page boundary, so nothing to flush? */ if (!(i_size_read(inode) & ~CFS_PAGE_MASK)) return 0; rc = filter_sync_inode_data(inode); if (rc != 0) RETURN(rc); /* be careful to call this after fsync_inode_data_buffers has waited * for IO to complete before we evict it from the cache */ page = find_lock_page(inode->i_mapping, i_size_read(inode) >> CFS_PAGE_SHIFT); if (page) { if (page->mapping != NULL) ll_truncate_complete_page(page); unlock_page(page); page_cache_release(page); } return 0;}/* Must be called with i_sem taken for writes; this will drop it */int filter_direct_io(int rw, struct dentry *dchild, struct filter_iobuf *buf, struct obd_export *exp, struct iattr *attr, struct obd_trans_info *oti, void **wait_handle){ struct obd_device *obd = exp->exp_obd; struct inode *inode = dchild->d_inode; struct kiobuf *iobuf = (void *)buf; int rc, create = (rw == OBD_BRW_WRITE), committed = 0; int blocks_per_page = CFS_PAGE_SIZE >> inode->i_blkbits, cleanup_phase = 0; struct semaphore *sem = NULL; ENTRY; LASSERTF(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ, "%x\n", rw); if (iobuf->nr_pages == 0) GOTO(cleanup, rc = 0); if (iobuf->nr_pages * blocks_per_page > KIO_MAX_SECTORS) GOTO(cleanup, rc = -EINVAL); if (iobuf->nr_pages * blocks_per_page > OBDFILTER_CREATED_SCRATCHPAD_ENTRIES) GOTO(cleanup, rc = -EINVAL); cleanup_phase = 1; rc = lock_kiovec(1, &iobuf, 1); if (rc < 0) GOTO(cleanup, rc); cleanup_phase = 2; if (rw == OBD_BRW_WRITE) { create = 1; sem = &obd->u.filter.fo_alloc_lock; } rc = fsfilt_map_inode_pages(obd, inode, iobuf->maplist, iobuf->nr_pages, KIOBUF_GET_BLOCKS(iobuf), obdfilter_created_scratchpad, create, sem); if (rc) GOTO(cleanup, rc); rc = filter_cleanup_mappings(rw, iobuf, inode); if (rc) GOTO(cleanup, rc); if (rw == OBD_BRW_WRITE) { if (rc == 0) { filter_tally(exp, iobuf->maplist, iobuf->nr_pages, KIOBUF_GET_BLOCKS(iobuf), blocks_per_page, 1); if (attr->ia_size > i_size_read(inode)) attr->ia_valid |= ATTR_SIZE; rc = fsfilt_setattr(obd, dchild, oti->oti_handle, attr, 0); if (rc) GOTO(cleanup, rc); } up(&inode->i_sem); cleanup_phase = 3; rc = filter_finish_transno(exp, oti, 0, 0); if (rc) GOTO(cleanup, rc); rc = fsfilt_commit_async(obd,inode,oti->oti_handle,wait_handle); committed = 1; if (rc) GOTO(cleanup, rc); } else { filter_tally(exp, iobuf->maplist, iobuf->nr_pages, KIOBUF_GET_BLOCKS(iobuf), blocks_per_page, 0); } rc = filter_clear_page_cache(inode, iobuf); if (rc < 0) GOTO(cleanup, rc);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?