⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rw.c

📁 lustre 1.6.5 source code
💻 C
📖 第 1 页 / 共 5 页
字号:
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * * Lustre Lite I/O page cache routines shared by different kernel revs * *  Copyright (c) 2001-2003 Cluster File Systems, Inc. * *   This file is part of Lustre, http://www.lustre.org. * *   Lustre is free software; you can redistribute it and/or *   modify it under the terms of version 2 of the GNU General Public *   License as published by the Free Software Foundation. * *   Lustre is distributed in the hope that it will be useful, *   but WITHOUT ANY WARRANTY; without even the implied warranty of *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *   GNU General Public License for more details. * *   You should have received a copy of the GNU General Public License *   along with Lustre; if not, write to the Free Software *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */#ifndef AUTOCONF_INCLUDED#include <linux/config.h>#endif#include <linux/kernel.h>#include <linux/mm.h>#include <linux/string.h>#include <linux/stat.h>#include <linux/errno.h>#include <linux/smp_lock.h>#include <linux/unistd.h>#include <linux/version.h>#include <asm/system.h>#include <asm/uaccess.h>#include <linux/fs.h>#include <linux/stat.h>#include <asm/uaccess.h>#include <linux/mm.h>#include <linux/pagemap.h>#include <linux/smp_lock.h>#define DEBUG_SUBSYSTEM S_LLITE#include <lustre_lite.h>#include "llite_internal.h"#include <linux/lustre_compat25.h>#ifndef list_for_each_prev_safe#define list_for_each_prev_safe(pos, n, head) \        for (pos = (head)->prev, n = pos->prev; pos != (head); \                pos = n, n = pos->prev )#endifcfs_mem_cache_t *ll_async_page_slab = NULL;size_t ll_async_page_slab_size = 0;/* SYNCHRONOUS I/O to object storage for an inode */static int ll_brw(int cmd, struct inode *inode, struct obdo *oa,                  struct page *page, int flags){        struct ll_inode_info *lli = ll_i2info(inode);        struct lov_stripe_md *lsm = lli->lli_smd;        struct obd_info oinfo = { { { 0 } } };        struct brw_page pg;        int rc;        ENTRY;        pg.pg = page;        pg.off = ((obd_off)page->index) << CFS_PAGE_SHIFT;        if ((cmd & OBD_BRW_WRITE) && (pg.off+CFS_PAGE_SIZE>i_size_read(inode)))                pg.count = i_size_read(inode) % CFS_PAGE_SIZE;        else                pg.count = CFS_PAGE_SIZE;        LL_CDEBUG_PAGE(D_PAGE, page, "%s %d bytes ino %lu at "LPU64"/"LPX64"\n",                       cmd & OBD_BRW_WRITE ? "write" : "read", pg.count,                       inode->i_ino, pg.off, pg.off);        if (pg.count == 0) {                CERROR("ZERO COUNT: ino %lu: size %p:%Lu(%p:%Lu) idx %lu off "                       LPU64"\n", inode->i_ino, inode, i_size_read(inode),                       page->mapping->host, i_size_read(page->mapping->host),                       page->index, pg.off);        }        pg.flag = flags;        if (cmd & OBD_BRW_WRITE)                ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_WRITE,                                   pg.count);        else                ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_READ,                           pg.count);        oinfo.oi_oa = oa;        oinfo.oi_md = lsm;        rc = obd_brw(cmd, ll_i2obdexp(inode), &oinfo, 1, &pg, NULL);        if (rc == 0)                obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS);        else if (rc != -EIO)                CERROR("error from obd_brw: rc = %d\n", rc);        RETURN(rc);}int ll_file_punch(struct inode * inode, loff_t new_size, int srvlock){        struct ll_inode_info *lli = ll_i2info(inode);        struct obd_info oinfo = { { { 0 } } };        struct obdo oa;        int rc;        ENTRY;        CDEBUG(D_INFO, "calling punch for "LPX64" (new size %Lu=%#Lx)\n",               lli->lli_smd->lsm_object_id, new_size, new_size);        oinfo.oi_md = lli->lli_smd;        oinfo.oi_policy.l_extent.start = new_size;        oinfo.oi_policy.l_extent.end = OBD_OBJECT_EOF;        oinfo.oi_oa = &oa;        oa.o_id = lli->lli_smd->lsm_object_id;        oa.o_valid = OBD_MD_FLID;        oa.o_flags = srvlock ? OBD_FL_TRUNCLOCK : 0;        obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLMODE |OBD_MD_FLFID|                        OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME |                        OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLGENER |                        OBD_MD_FLBLOCKS);        rc = obd_punch_rqset(ll_i2obdexp(inode), &oinfo, NULL);        if (rc) {                CERROR("obd_truncate fails (%d) ino %lu\n", rc, inode->i_ino);                RETURN(rc);        }        obdo_to_inode(inode, &oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |                      OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);        RETURN(0);}/* this isn't where truncate starts.   roughly: * sys_truncate->ll_setattr_raw->vmtruncate->ll_truncate. setattr_raw grabs * DLM lock on [size, EOF], i_mutex, ->lli_size_sem, and WRITE_I_ALLOC_SEM to * avoid races. * * must be called under ->lli_size_sem */void ll_truncate(struct inode *inode){        struct ll_inode_info *lli = ll_i2info(inode);        int srvlock = test_bit(LLI_F_SRVLOCK, &lli->lli_flags);        loff_t new_size;        ENTRY;        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) to %Lu=%#Lx\n",inode->i_ino,               inode->i_generation, inode, i_size_read(inode), i_size_read(inode));        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_TRUNC, 1);        if (lli->lli_size_sem_owner != current) {                EXIT;                return;        }        if (!lli->lli_smd) {                CDEBUG(D_INODE, "truncate on inode %lu with no objects\n",                       inode->i_ino);                GOTO(out_unlock, 0);        }        LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);        if (!srvlock) {                struct ost_lvb lvb;                int rc;                /* XXX I'm pretty sure this is a hack to paper over a more fundamental                 * race condition. */                lov_stripe_lock(lli->lli_smd);                inode_init_lvb(inode, &lvb);                rc = obd_merge_lvb(ll_i2obdexp(inode), lli->lli_smd, &lvb, 0);                inode->i_blocks = lvb.lvb_blocks;                if (lvb.lvb_size == i_size_read(inode) && rc == 0) {                        CDEBUG(D_VFSTRACE, "skipping punch for obj "LPX64", %Lu=%#Lx\n",                               lli->lli_smd->lsm_object_id, i_size_read(inode),                               i_size_read(inode));                        lov_stripe_unlock(lli->lli_smd);                        GOTO(out_unlock, 0);                }                obd_adjust_kms(ll_i2obdexp(inode), lli->lli_smd,                               i_size_read(inode), 1);                lov_stripe_unlock(lli->lli_smd);        }        if (unlikely((ll_i2sbi(inode)->ll_flags & LL_SBI_LLITE_CHECKSUM) &&                     (i_size_read(inode) & ~CFS_PAGE_MASK))) {                /* If the truncate leaves a partial page, update its checksum */                struct page *page = find_get_page(inode->i_mapping,                                                  i_size_read(inode) >>                                                  CFS_PAGE_SHIFT);                if (page != NULL) {                        struct ll_async_page *llap = llap_cast_private(page);                        if (llap != NULL) {                                char *kaddr = kmap_atomic(page, KM_USER0);                                llap->llap_checksum =                                        init_checksum(OSC_DEFAULT_CKSUM);                                llap->llap_checksum =                                        compute_checksum(llap->llap_checksum,                                                         kaddr, CFS_PAGE_SIZE,                                                         OSC_DEFAULT_CKSUM);                                kunmap_atomic(kaddr, KM_USER0);                        }                        page_cache_release(page);                }        }        new_size = i_size_read(inode);        ll_inode_size_unlock(inode, 0);        if (!srvlock)                ll_file_punch(inode, new_size, 0);        EXIT;        return; out_unlock:        ll_inode_size_unlock(inode, 0);} /* ll_truncate */int ll_prepare_write(struct file *file, struct page *page, unsigned from,                     unsigned to){        struct inode *inode = page->mapping->host;        struct ll_inode_info *lli = ll_i2info(inode);        struct lov_stripe_md *lsm = lli->lli_smd;        obd_off offset = ((obd_off)page->index) << CFS_PAGE_SHIFT;        struct obd_info oinfo = { { { 0 } } };        struct brw_page pga;        struct obdo oa;        struct ost_lvb lvb;        int rc = 0;        ENTRY;        LASSERT(PageLocked(page));        (void)llap_cast_private(page); /* assertion */        /* Check to see if we should return -EIO right away */        pga.pg = page;        pga.off = offset;        pga.count = CFS_PAGE_SIZE;        pga.flag = 0;        oa.o_mode = inode->i_mode;        oa.o_id = lsm->lsm_object_id;        oa.o_valid = OBD_MD_FLID | OBD_MD_FLMODE | OBD_MD_FLTYPE;        obdo_from_inode(&oa, inode, OBD_MD_FLFID | OBD_MD_FLGENER);        oinfo.oi_oa = &oa;        oinfo.oi_md = lsm;        rc = obd_brw(OBD_BRW_CHECK, ll_i2obdexp(inode), &oinfo, 1, &pga, NULL);        if (rc)                RETURN(rc);        if (PageUptodate(page)) {                LL_CDEBUG_PAGE(D_PAGE, page, "uptodate\n");                RETURN(0);        }        /* We're completely overwriting an existing page, so _don't_ set it up         * to date until commit_write */        if (from == 0 && to == CFS_PAGE_SIZE) {                LL_CDEBUG_PAGE(D_PAGE, page, "full page write\n");                POISON_PAGE(page, 0x11);                RETURN(0);        }        /* If are writing to a new page, no need to read old data.  The extent         * locking will have updated the KMS, and for our purposes here we can         * treat it like i_size. */        lov_stripe_lock(lsm);        inode_init_lvb(inode, &lvb);        obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1);        lov_stripe_unlock(lsm);        if (lvb.lvb_size <= offset) {                char *kaddr = kmap_atomic(page, KM_USER0);                LL_CDEBUG_PAGE(D_PAGE, page, "kms "LPU64" <= offset "LPU64"\n",                               lvb.lvb_size, offset);                memset(kaddr, 0, CFS_PAGE_SIZE);                kunmap_atomic(kaddr, KM_USER0);                GOTO(prepare_done, rc = 0);        }        /* XXX could be an async ocp read.. read-ahead? */        rc = ll_brw(OBD_BRW_READ, inode, &oa, page, 0);        if (rc == 0) {                /* bug 1598: don't clobber blksize */                oa.o_valid &= ~(OBD_MD_FLSIZE | OBD_MD_FLBLKSZ);                obdo_refresh_inode(inode, &oa, oa.o_valid);        }        EXIT; prepare_done:        if (rc == 0)                SetPageUptodate(page);        return rc;}static int ll_ap_make_ready(void *data, int cmd){        struct ll_async_page *llap;        struct page *page;        ENTRY;        llap = LLAP_FROM_COOKIE(data);        page = llap->llap_page;        LASSERTF(!(cmd & OBD_BRW_READ), "cmd %x page %p ino %lu index %lu\n", cmd, page,                 page->mapping->host->i_ino, page->index);        /* we're trying to write, but the page is locked.. come back later */        if (TryLockPage(page))                RETURN(-EAGAIN);        LASSERT(!PageWriteback(page));        /* if we left PageDirty we might get another writepage call         * in the future.  list walkers are bright enough         * to check page dirty so we can leave it on whatever list         * its on.  XXX also, we're called with the cli list so if         * we got the page cache list we'd create a lock inversion         * with the removepage path which gets the page lock then the         * cli lock */#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)        clear_page_dirty(page);#else        LASSERTF(!PageWriteback(page),"cmd %x page %p ino %lu index %lu\n", cmd, page,                 page->mapping->host->i_ino, page->index);        clear_page_dirty_for_io(page);        /* This actually clears the dirty bit in the radix tree.*/        set_page_writeback(page);#endif        LL_CDEBUG_PAGE(D_PAGE, page, "made ready\n");        page_cache_get(page);        RETURN(0);}/* We have two reasons for giving llite the opportunity to change the * write length of a given queued page as it builds the RPC containing * the page: * * 1) Further extending writes may have landed in the page cache *    since a partial write first queued this page requiring us *    to write more from the page cache.  (No further races are possible, since *    by the time this is called, the page is locked.) * 2) We might have raced with truncate and want to avoid performing *    write RPCs that are just going to be thrown away by the

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -