fsfilt_ext3.c

来自「lustre 1.6.5 source code」· C语言 代码 · 共 1,853 行 · 第 1/5 页

C
1,853
字号
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * *  lustre/lib/fsfilt_ext3.c *  Lustre filesystem abstraction routines * *  Copyright (C) 2002, 2003 Cluster File Systems, Inc. *   Author: Andreas Dilger <adilger@clusterfs.com> * *   This file is part of Lustre, http://www.lustre.org. * *   Lustre is free software; you can redistribute it and/or *   modify it under the terms of version 2 of the GNU General Public *   License as published by the Free Software Foundation. * *   Lustre is distributed in the hope that it will be useful, *   but WITHOUT ANY WARRANTY; without even the implied warranty of *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *   GNU General Public License for more details. * *   You should have received a copy of the GNU General Public License *   along with Lustre; if not, write to the Free Software *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */#define DEBUG_SUBSYSTEM S_FILTER#include <linux/init.h>#include <linux/module.h>#include <linux/fs.h>#include <linux/jbd.h>#include <linux/slab.h>#include <linux/pagemap.h>#include <linux/quotaops.h>#include <linux/ext3_fs.h>#include <linux/ext3_jbd.h>#include <linux/version.h>#include <linux/bitops.h>#include <linux/quota.h>#include <linux/quotaio_v1.h>#include <linux/quotaio_v2.h>#include <linux/parser.h>#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))#include <linux/ext3_xattr.h>#else#include <ext3/xattr.h>#endif#include <libcfs/kp30.h>#include <lustre_fsfilt.h>#include <obd.h>#include <lustre_quota.h>#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))#include <linux/iobuf.h>#endif#include <linux/lustre_compat25.h>#include <linux/lprocfs_status.h>#ifdef EXT3_MULTIBLOCK_ALLOCATOR#include <linux/ext3_extents.h>#endif#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,15)#define FSFILT_DATA_TRANS_BLOCKS(sb)      EXT3_DATA_TRANS_BLOCKS#define FSFILT_DELETE_TRANS_BLOCKS(sb)    EXT3_DELETE_TRANS_BLOCKS#else#define FSFILT_DATA_TRANS_BLOCKS(sb)      EXT3_DATA_TRANS_BLOCKS(sb)#define FSFILT_DELETE_TRANS_BLOCKS(sb)    EXT3_DELETE_TRANS_BLOCKS(sb)#endif#ifdef EXT3_SINGLEDATA_TRANS_BLOCKS_HAS_SB/* for kernels 2.6.18 and later */#define FSFILT_SINGLEDATA_TRANS_BLOCKS(sb) EXT3_SINGLEDATA_TRANS_BLOCKS(sb)#else#define FSFILT_SINGLEDATA_TRANS_BLOCKS(sb) EXT3_SINGLEDATA_TRANS_BLOCKS#endif#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))#define fsfilt_ext3_journal_start(inode, nblocks) \                                journal_start(EXT3_JOURNAL(inode),nblocks)#define fsfilt_ext3_journal_stop(handle)          journal_stop(handle)#else#define fsfilt_ext3_journal_start(inode, nblocks) ext3_journal_start(inode, nblocks)#define fsfilt_ext3_journal_stop(handle)          ext3_journal_stop(handle)#endifstatic cfs_mem_cache_t *fcb_cache;struct fsfilt_cb_data {        struct journal_callback cb_jcb; /* jbd private data - MUST BE FIRST */        fsfilt_cb_t cb_func;            /* MDS/OBD completion function */        struct obd_device *cb_obd;      /* MDS/OBD completion device */        __u64 cb_last_rcvd;             /* MDS/OST last committed operation */        void *cb_data;                  /* MDS/OST completion function data */};#ifndef EXT3_XATTR_INDEX_TRUSTED        /* temporary until we hit l28 kernel */#define EXT3_XATTR_INDEX_TRUSTED        4#endifstatic char *fsfilt_ext3_get_label(struct super_block *sb){        return EXT3_SB(sb)->s_es->s_volume_name;}static int fsfilt_ext3_set_label(struct super_block *sb, char *label){        /* see e.g. fsfilt_ext3_write_record() */        journal_t *journal;        handle_t *handle;        int err;        journal = EXT3_SB(sb)->s_journal;        lock_24kernel();        handle = journal_start(journal, 1);        unlock_24kernel();        if (IS_ERR(handle)) {                CERROR("can't start transaction\n");                return(PTR_ERR(handle));        }        err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);        if (err)                goto out;        memcpy(EXT3_SB(sb)->s_es->s_volume_name, label,               sizeof(EXT3_SB(sb)->s_es->s_volume_name));        err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);out:        lock_24kernel();        journal_stop(handle);        unlock_24kernel();        return(err);}static char *fsfilt_ext3_uuid(struct super_block *sb){        return EXT3_SB(sb)->s_es->s_uuid;}#ifdef HAVE_DISK_INODE_VERSION/* * Get the 64-bit version for an inode. */static __u64 fsfilt_ext3_get_version(struct inode *inode){        return EXT3_I(inode)->i_fs_version;}/* * Set the 64-bit version and return the old version. */static __u64 fsfilt_ext3_set_version(struct inode *inode, __u64 new_version){        __u64 old_version = EXT3_I(inode)->i_fs_version;        (EXT3_I(inode))->i_fs_version = new_version;        return old_version;}#endif/* * We don't currently need any additional blocks for rmdir and * unlink transactions because we are storing the OST oa_id inside * the inode (which we will be changing anyways as part of this * transaction). */static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private,                               int logs){        /* For updates to the last received file */        int nblocks = FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb);        journal_t *journal;        void *handle;        if (current->journal_info) {                CDEBUG(D_INODE, "increasing refcount on %p\n",                       current->journal_info);                goto journal_start;        }        switch(op) {        case FSFILT_OP_RMDIR:        case FSFILT_OP_UNLINK:                /* delete one file + create/update logs for each stripe */                nblocks += FSFILT_DELETE_TRANS_BLOCKS(inode->i_sb);                nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS +                            FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb)) * logs;                break;        case FSFILT_OP_RENAME:                /* modify additional directory */                nblocks += FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb);                /* no break */        case FSFILT_OP_SYMLINK:                /* additional block + block bitmap + GDT for long symlink */                nblocks += 3;                /* no break */        case FSFILT_OP_CREATE: {#if defined(EXT3_EXTENTS_FL) && defined(EXT3_INDEX_FL)                static int warned;                if (!warned) {                        if (!test_opt(inode->i_sb, EXTENTS)) {                                warned = 1;                        } else if (((EXT3_I(inode)->i_flags &                              cpu_to_le32(EXT3_EXTENTS_FL | EXT3_INDEX_FL)) ==                              cpu_to_le32(EXT3_EXTENTS_FL | EXT3_INDEX_FL))) {                                CWARN("extent-mapped directory found - contact "                                      "CFS: support@clusterfs.com\n");                                warned = 1;                        }                }#endif                /* no break */        }        case FSFILT_OP_MKDIR:        case FSFILT_OP_MKNOD:                /* modify one inode + block bitmap + GDT */                nblocks += 3;                /* no break */        case FSFILT_OP_LINK:                /* modify parent directory */                nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS +                         FSFILT_DATA_TRANS_BLOCKS(inode->i_sb);                /* create/update logs for each stripe */                nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS +                            FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb)) * logs;                break;        case FSFILT_OP_SETATTR:                /* Setattr on inode */                nblocks += 1;                nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS +                         FSFILT_DATA_TRANS_BLOCKS(inode->i_sb);                /* quota chown log for each stripe */                nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS +                            FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb)) * logs;                break;        case FSFILT_OP_CANCEL_UNLINK:                /* blocks for log header bitmap update OR                 * blocks for catalog header bitmap update + unlink of logs */                nblocks = (LLOG_CHUNK_SIZE >> inode->i_blkbits) +                        FSFILT_DELETE_TRANS_BLOCKS(inode->i_sb) * logs;                break;        case FSFILT_OP_JOIN:                /* delete 2 file(file + array id) + create 1 file (array id)                  * create/update logs for each stripe */                nblocks += 2 * FSFILT_DELETE_TRANS_BLOCKS(inode->i_sb);                               /*create array log for head file*/                 nblocks += 3;                nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS +                            FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb));                /*update head file array */                nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS +                         FSFILT_DATA_TRANS_BLOCKS(inode->i_sb);                break;        default: CERROR("unknown transaction start op %d\n", op);                LBUG();        }        LASSERT(current->journal_info == desc_private);        journal = EXT3_SB(inode->i_sb)->s_journal;        if (nblocks > journal->j_max_transaction_buffers) {                CWARN("too many credits %d for op %ux%u using %d instead\n",                      nblocks, op, logs, journal->j_max_transaction_buffers);                nblocks = journal->j_max_transaction_buffers;        } journal_start:        LASSERTF(nblocks > 0, "can't start %d credit transaction\n", nblocks);        lock_24kernel();        handle = fsfilt_ext3_journal_start(inode, nblocks);        unlock_24kernel();        if (!IS_ERR(handle))                LASSERT(current->journal_info == handle);        else                CERROR("error starting handle for op %u (%u credits): rc %ld\n",                       op, nblocks, PTR_ERR(handle));        return handle;}/* * Calculate the number of buffer credits needed to write multiple pages in * a single ext3 transaction.  No, this shouldn't be here, but as yet ext3 * doesn't have a nice API for calculating this sort of thing in advance. * * See comment above ext3_writepage_trans_blocks for details.  We assume * no data journaling is being done, but it does allow for all of the pages * being non-contiguous.  If we are guaranteed contiguous pages we could * reduce the number of (d)indirect blocks a lot. * * With N blocks per page and P pages, for each inode we have at most: * N*P indirect * min(N*P, blocksize/4 + 1) dindirect blocks * niocount tindirect * * For the entire filesystem, we have at most: * min(sum(nindir + P), ngroups) bitmap blocks (from the above) * min(sum(nindir + P), gdblocks) group descriptor blocks (from the above) * objcount inode blocks * 1 superblock * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quota files * * 1 EXT3_DATA_TRANS_BLOCKS for the last_rcvd update. */static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso,                                      int niocount, struct niobuf_local *nb){        struct super_block *sb = fso->fso_dentry->d_inode->i_sb;        __u64 next_indir;        const int blockpp = 1 << (CFS_PAGE_SHIFT - sb->s_blocksize_bits);        int nbitmaps = 0, ngdblocks;        int needed = objcount + 1; /* inodes + superblock */        int i, j;        for (i = 0, j = 0; i < objcount; i++, fso++) {                /* two or more dindirect blocks in case we cross boundary */                int ndind = (long)((nb[j + fso->fso_bufcnt - 1].offset -                                    nb[j].offset) >>                                   sb->s_blocksize_bits) /                        (EXT3_ADDR_PER_BLOCK(sb) * EXT3_ADDR_PER_BLOCK(sb));                nbitmaps += min(fso->fso_bufcnt, ndind > 0 ? ndind : 2);                /* leaf, indirect, tindirect blocks for first block */                nbitmaps += blockpp + 2;                j += fso->fso_bufcnt;        }        next_indir = nb[0].offset +                (EXT3_ADDR_PER_BLOCK(sb) << sb->s_blocksize_bits);        for (i = 1; i < niocount; i++) {                if (nb[i].offset >= next_indir) {                        nbitmaps++;     /* additional indirect */                        next_indir = nb[i].offset +                                (EXT3_ADDR_PER_BLOCK(sb)<<sb->s_blocksize_bits);                } else if (nb[i].offset != nb[i - 1].offset + sb->s_blocksize) {                        nbitmaps++;     /* additional indirect */                }                nbitmaps += blockpp;    /* each leaf in different group? */        }        ngdblocks = nbitmaps;        if (nbitmaps > EXT3_SB(sb)->s_groups_count)                nbitmaps = EXT3_SB(sb)->s_groups_count;        if (ngdblocks > EXT3_SB(sb)->s_gdb_count)                ngdblocks = EXT3_SB(sb)->s_gdb_count;        needed += nbitmaps + ngdblocks;        /* last_rcvd update */        needed += FSFILT_DATA_TRANS_BLOCKS(sb);#if defined(CONFIG_QUOTA)        /* We assume that there will be 1 bit set in s_dquot.flags for each         * quota file that is active.  This is at least true for now.         */        needed += hweight32(sb_any_quota_enabled(sb)) *                FSFILT_SINGLEDATA_TRANS_BLOCKS(sb);#endif        return needed;}/* We have to start a huge journal transaction here to hold all of the * metadata for the pages being written here.  This is necessitated by * the fact that we do lots of prepare_write operations before we do

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?