fsfilt_ext3.c
来自「lustre 1.6.5 source code」· C语言 代码 · 共 1,853 行 · 第 1/5 页
C
1,853 行
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * * lustre/lib/fsfilt_ext3.c * Lustre filesystem abstraction routines * * Copyright (C) 2002, 2003 Cluster File Systems, Inc. * Author: Andreas Dilger <adilger@clusterfs.com> * * This file is part of Lustre, http://www.lustre.org. * * Lustre is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * Lustre is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Lustre; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */#define DEBUG_SUBSYSTEM S_FILTER#include <linux/init.h>#include <linux/module.h>#include <linux/fs.h>#include <linux/jbd.h>#include <linux/slab.h>#include <linux/pagemap.h>#include <linux/quotaops.h>#include <linux/ext3_fs.h>#include <linux/ext3_jbd.h>#include <linux/version.h>#include <linux/bitops.h>#include <linux/quota.h>#include <linux/quotaio_v1.h>#include <linux/quotaio_v2.h>#include <linux/parser.h>#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))#include <linux/ext3_xattr.h>#else#include <ext3/xattr.h>#endif#include <libcfs/kp30.h>#include <lustre_fsfilt.h>#include <obd.h>#include <lustre_quota.h>#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))#include <linux/iobuf.h>#endif#include <linux/lustre_compat25.h>#include <linux/lprocfs_status.h>#ifdef EXT3_MULTIBLOCK_ALLOCATOR#include <linux/ext3_extents.h>#endif#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,15)#define FSFILT_DATA_TRANS_BLOCKS(sb) EXT3_DATA_TRANS_BLOCKS#define FSFILT_DELETE_TRANS_BLOCKS(sb) EXT3_DELETE_TRANS_BLOCKS#else#define FSFILT_DATA_TRANS_BLOCKS(sb) EXT3_DATA_TRANS_BLOCKS(sb)#define FSFILT_DELETE_TRANS_BLOCKS(sb) EXT3_DELETE_TRANS_BLOCKS(sb)#endif#ifdef EXT3_SINGLEDATA_TRANS_BLOCKS_HAS_SB/* for kernels 2.6.18 and later */#define FSFILT_SINGLEDATA_TRANS_BLOCKS(sb) EXT3_SINGLEDATA_TRANS_BLOCKS(sb)#else#define FSFILT_SINGLEDATA_TRANS_BLOCKS(sb) EXT3_SINGLEDATA_TRANS_BLOCKS#endif#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))#define fsfilt_ext3_journal_start(inode, nblocks) \ journal_start(EXT3_JOURNAL(inode),nblocks)#define fsfilt_ext3_journal_stop(handle) journal_stop(handle)#else#define fsfilt_ext3_journal_start(inode, nblocks) ext3_journal_start(inode, nblocks)#define fsfilt_ext3_journal_stop(handle) ext3_journal_stop(handle)#endifstatic cfs_mem_cache_t *fcb_cache;struct fsfilt_cb_data { struct journal_callback cb_jcb; /* jbd private data - MUST BE FIRST */ fsfilt_cb_t cb_func; /* MDS/OBD completion function */ struct obd_device *cb_obd; /* MDS/OBD completion device */ __u64 cb_last_rcvd; /* MDS/OST last committed operation */ void *cb_data; /* MDS/OST completion function data */};#ifndef EXT3_XATTR_INDEX_TRUSTED /* temporary until we hit l28 kernel */#define EXT3_XATTR_INDEX_TRUSTED 4#endifstatic char *fsfilt_ext3_get_label(struct super_block *sb){ return EXT3_SB(sb)->s_es->s_volume_name;}static int fsfilt_ext3_set_label(struct super_block *sb, char *label){ /* see e.g. fsfilt_ext3_write_record() */ journal_t *journal; handle_t *handle; int err; journal = EXT3_SB(sb)->s_journal; lock_24kernel(); handle = journal_start(journal, 1); unlock_24kernel(); if (IS_ERR(handle)) { CERROR("can't start transaction\n"); return(PTR_ERR(handle)); } err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); if (err) goto out; memcpy(EXT3_SB(sb)->s_es->s_volume_name, label, sizeof(EXT3_SB(sb)->s_es->s_volume_name)); err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);out: lock_24kernel(); journal_stop(handle); unlock_24kernel(); return(err);}static char *fsfilt_ext3_uuid(struct super_block *sb){ return EXT3_SB(sb)->s_es->s_uuid;}#ifdef HAVE_DISK_INODE_VERSION/* * Get the 64-bit version for an inode. */static __u64 fsfilt_ext3_get_version(struct inode *inode){ return EXT3_I(inode)->i_fs_version;}/* * Set the 64-bit version and return the old version. */static __u64 fsfilt_ext3_set_version(struct inode *inode, __u64 new_version){ __u64 old_version = EXT3_I(inode)->i_fs_version; (EXT3_I(inode))->i_fs_version = new_version; return old_version;}#endif/* * We don't currently need any additional blocks for rmdir and * unlink transactions because we are storing the OST oa_id inside * the inode (which we will be changing anyways as part of this * transaction). */static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private, int logs){ /* For updates to the last received file */ int nblocks = FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb); journal_t *journal; void *handle; if (current->journal_info) { CDEBUG(D_INODE, "increasing refcount on %p\n", current->journal_info); goto journal_start; } switch(op) { case FSFILT_OP_RMDIR: case FSFILT_OP_UNLINK: /* delete one file + create/update logs for each stripe */ nblocks += FSFILT_DELETE_TRANS_BLOCKS(inode->i_sb); nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS + FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb)) * logs; break; case FSFILT_OP_RENAME: /* modify additional directory */ nblocks += FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb); /* no break */ case FSFILT_OP_SYMLINK: /* additional block + block bitmap + GDT for long symlink */ nblocks += 3; /* no break */ case FSFILT_OP_CREATE: {#if defined(EXT3_EXTENTS_FL) && defined(EXT3_INDEX_FL) static int warned; if (!warned) { if (!test_opt(inode->i_sb, EXTENTS)) { warned = 1; } else if (((EXT3_I(inode)->i_flags & cpu_to_le32(EXT3_EXTENTS_FL | EXT3_INDEX_FL)) == cpu_to_le32(EXT3_EXTENTS_FL | EXT3_INDEX_FL))) { CWARN("extent-mapped directory found - contact " "CFS: support@clusterfs.com\n"); warned = 1; } }#endif /* no break */ } case FSFILT_OP_MKDIR: case FSFILT_OP_MKNOD: /* modify one inode + block bitmap + GDT */ nblocks += 3; /* no break */ case FSFILT_OP_LINK: /* modify parent directory */ nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS + FSFILT_DATA_TRANS_BLOCKS(inode->i_sb); /* create/update logs for each stripe */ nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS + FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb)) * logs; break; case FSFILT_OP_SETATTR: /* Setattr on inode */ nblocks += 1; nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS + FSFILT_DATA_TRANS_BLOCKS(inode->i_sb); /* quota chown log for each stripe */ nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS + FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb)) * logs; break; case FSFILT_OP_CANCEL_UNLINK: /* blocks for log header bitmap update OR * blocks for catalog header bitmap update + unlink of logs */ nblocks = (LLOG_CHUNK_SIZE >> inode->i_blkbits) + FSFILT_DELETE_TRANS_BLOCKS(inode->i_sb) * logs; break; case FSFILT_OP_JOIN: /* delete 2 file(file + array id) + create 1 file (array id) * create/update logs for each stripe */ nblocks += 2 * FSFILT_DELETE_TRANS_BLOCKS(inode->i_sb); /*create array log for head file*/ nblocks += 3; nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS + FSFILT_SINGLEDATA_TRANS_BLOCKS(inode->i_sb)); /*update head file array */ nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS + FSFILT_DATA_TRANS_BLOCKS(inode->i_sb); break; default: CERROR("unknown transaction start op %d\n", op); LBUG(); } LASSERT(current->journal_info == desc_private); journal = EXT3_SB(inode->i_sb)->s_journal; if (nblocks > journal->j_max_transaction_buffers) { CWARN("too many credits %d for op %ux%u using %d instead\n", nblocks, op, logs, journal->j_max_transaction_buffers); nblocks = journal->j_max_transaction_buffers; } journal_start: LASSERTF(nblocks > 0, "can't start %d credit transaction\n", nblocks); lock_24kernel(); handle = fsfilt_ext3_journal_start(inode, nblocks); unlock_24kernel(); if (!IS_ERR(handle)) LASSERT(current->journal_info == handle); else CERROR("error starting handle for op %u (%u credits): rc %ld\n", op, nblocks, PTR_ERR(handle)); return handle;}/* * Calculate the number of buffer credits needed to write multiple pages in * a single ext3 transaction. No, this shouldn't be here, but as yet ext3 * doesn't have a nice API for calculating this sort of thing in advance. * * See comment above ext3_writepage_trans_blocks for details. We assume * no data journaling is being done, but it does allow for all of the pages * being non-contiguous. If we are guaranteed contiguous pages we could * reduce the number of (d)indirect blocks a lot. * * With N blocks per page and P pages, for each inode we have at most: * N*P indirect * min(N*P, blocksize/4 + 1) dindirect blocks * niocount tindirect * * For the entire filesystem, we have at most: * min(sum(nindir + P), ngroups) bitmap blocks (from the above) * min(sum(nindir + P), gdblocks) group descriptor blocks (from the above) * objcount inode blocks * 1 superblock * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quota files * * 1 EXT3_DATA_TRANS_BLOCKS for the last_rcvd update. */static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso, int niocount, struct niobuf_local *nb){ struct super_block *sb = fso->fso_dentry->d_inode->i_sb; __u64 next_indir; const int blockpp = 1 << (CFS_PAGE_SHIFT - sb->s_blocksize_bits); int nbitmaps = 0, ngdblocks; int needed = objcount + 1; /* inodes + superblock */ int i, j; for (i = 0, j = 0; i < objcount; i++, fso++) { /* two or more dindirect blocks in case we cross boundary */ int ndind = (long)((nb[j + fso->fso_bufcnt - 1].offset - nb[j].offset) >> sb->s_blocksize_bits) / (EXT3_ADDR_PER_BLOCK(sb) * EXT3_ADDR_PER_BLOCK(sb)); nbitmaps += min(fso->fso_bufcnt, ndind > 0 ? ndind : 2); /* leaf, indirect, tindirect blocks for first block */ nbitmaps += blockpp + 2; j += fso->fso_bufcnt; } next_indir = nb[0].offset + (EXT3_ADDR_PER_BLOCK(sb) << sb->s_blocksize_bits); for (i = 1; i < niocount; i++) { if (nb[i].offset >= next_indir) { nbitmaps++; /* additional indirect */ next_indir = nb[i].offset + (EXT3_ADDR_PER_BLOCK(sb)<<sb->s_blocksize_bits); } else if (nb[i].offset != nb[i - 1].offset + sb->s_blocksize) { nbitmaps++; /* additional indirect */ } nbitmaps += blockpp; /* each leaf in different group? */ } ngdblocks = nbitmaps; if (nbitmaps > EXT3_SB(sb)->s_groups_count) nbitmaps = EXT3_SB(sb)->s_groups_count; if (ngdblocks > EXT3_SB(sb)->s_gdb_count) ngdblocks = EXT3_SB(sb)->s_gdb_count; needed += nbitmaps + ngdblocks; /* last_rcvd update */ needed += FSFILT_DATA_TRANS_BLOCKS(sb);#if defined(CONFIG_QUOTA) /* We assume that there will be 1 bit set in s_dquot.flags for each * quota file that is active. This is at least true for now. */ needed += hweight32(sb_any_quota_enabled(sb)) * FSFILT_SINGLEDATA_TRANS_BLOCKS(sb);#endif return needed;}/* We have to start a huge journal transaction here to hold all of the * metadata for the pages being written here. This is necessitated by * the fact that we do lots of prepare_write operations before we do
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?