📄 ext3-mballoc2-2.6-fc5.patch
字号:
Index: linux-2.6.16.i686/include/linux/ext3_fs.h===================================================================--- linux-2.6.16.i686.orig/include/linux/ext3_fs.h 2006-05-30 22:55:32.000000000 +0800+++ linux-2.6.16.i686/include/linux/ext3_fs.h 2006-05-30 23:02:59.000000000 +0800@@ -57,6 +57,14 @@ #define ext3_debug(f, a...) do {} while (0) #endif +#define EXT3_MULTIBLOCK_ALLOCATOR 1++#define EXT3_MB_HINT_MERGE 1+#define EXT3_MB_HINT_RESERVED 2+#define EXT3_MB_HINT_METADATA 4+#define EXT3_MB_HINT_FIRST 8+#define EXT3_MB_HINT_BEST 16+ /* * Special inodes numbers */@@ -383,6 +391,7 @@ #define EXT3_MOUNT_IOPEN_NOPRIV 0x800000/* Make iopen world-readable */ #define EXT3_MOUNT_EXTENTS 0x1000000/* Extents support */ #define EXT3_MOUNT_EXTDEBUG 0x2000000/* Extents debug */+#define EXT3_MOUNT_MBALLOC 0x4000000/* Buddy allocation support */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef clear_opt@@ -404,6 +413,14 @@ #define ext3_find_first_zero_bit ext2_find_first_zero_bit #define ext3_find_next_zero_bit ext2_find_next_zero_bit +#ifndef ext2_find_next_le_bit+#ifdef __LITTLE_ENDIAN+#define ext2_find_next_le_bit(addr, size, off) find_next_bit((addr), (size), (off))+#else+#error "mballoc needs a patch for big-endian systems - CFS bug 10634"+#endif /* __LITTLE_ENDIAN */+#endif /* !ext2_find_next_le_bit */+ /* * Maximal mount counts between two filesystem checks */@@ -744,7 +753,9 @@ extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *); extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,- unsigned long);+ unsigned long, int);+extern int ext3_new_block_old(handle_t *handle, struct inode *inode,+ unsigned long goal, int *errp); extern void ext3_free_blocks_sb (handle_t *, struct super_block *, unsigned long, unsigned long, int *); extern unsigned long ext3_count_free_blocks (struct super_block *);@@ -865,6 +874,17 @@ extern int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); +/* mballoc.c */+extern long ext3_mb_stats;+extern long ext3_mb_max_to_scan;+extern int ext3_mb_init(struct super_block *, int);+extern int ext3_mb_release(struct super_block *);+extern int ext3_mb_new_blocks(handle_t *, struct inode *, unsigned long, int *, int, int *);+extern int ext3_mb_reserve_blocks(struct super_block *, int);+extern void ext3_mb_release_blocks(struct super_block *, int);+int __init init_ext3_proc(void);+void exit_ext3_proc(void);+ #endif /* __KERNEL__ */ /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */Index: linux-2.6.16.i686/include/linux/ext3_fs_sb.h===================================================================--- linux-2.6.16.i686.orig/include/linux/ext3_fs_sb.h 2006-03-20 13:53:29.000000000 +0800+++ linux-2.6.16.i686/include/linux/ext3_fs_sb.h 2006-05-30 23:02:59.000000000 +0800@@ -21,8 +21,14 @@ #include <linux/wait.h> #include <linux/blockgroup_lock.h> #include <linux/percpu_counter.h>+#include <linux/list.h> #endif #include <linux/rbtree.h>+#include <linux/proc_fs.h>++struct ext3_buddy_group_blocks;+struct ext3_mb_history;+#define EXT3_BB_MAX_BLOCKS /* * third extended-fs super-block data in memory@@ -78,6 +84,43 @@ char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ int s_jquota_fmt; /* Format of quota to use */ #endif++ /* for buddy allocator */+ struct ext3_group_info ***s_group_info;+ struct inode *s_buddy_cache;+ long s_blocks_reserved;+ spinlock_t s_reserve_lock;+ struct list_head s_active_transaction;+ struct list_head s_closed_transaction;+ struct list_head s_committed_transaction;+ spinlock_t s_md_lock;+ tid_t s_last_transaction;+ int s_mb_factor;+ unsigned short *s_mb_offsets, *s_mb_maxs;+ unsigned long s_stripe;++ /* history to debug policy */+ struct ext3_mb_history *s_mb_history;+ int s_mb_history_cur;+ int s_mb_history_max;+ struct proc_dir_entry *s_mb_proc;+ spinlock_t s_mb_history_lock;++ /* stats for buddy allocator */+ atomic_t s_bal_reqs; /* number of reqs with len > 1 */+ atomic_t s_bal_success; /* we found long enough chunks */+ atomic_t s_bal_allocated; /* in blocks */+ atomic_t s_bal_ex_scanned; /* total extents scanned */+ atomic_t s_bal_goals; /* goal hits */+ atomic_t s_bal_breaks; /* too long searches */+ atomic_t s_bal_2orders; /* 2^order hits */+ spinlock_t s_bal_lock;+ unsigned long s_mb_buddies_generated;+ unsigned long long s_mb_generation_time; };++#define EXT3_GROUP_INFO(sb, group) \+ EXT3_SB(sb)->s_group_info[(group) >> EXT3_DESC_PER_BLOCK_BITS(sb)] \+ [(group) & (EXT3_DESC_PER_BLOCK(sb) - 1)] #endif /* _LINUX_EXT3_FS_SB */Index: linux-2.6.16.i686/fs/ext3/super.c===================================================================--- linux-2.6.16.i686.orig/fs/ext3/super.c 2006-05-30 22:55:32.000000000 +0800+++ linux-2.6.16.i686/fs/ext3/super.c 2006-05-30 23:02:59.000000000 +0800@@ -392,6 +392,7 @@ struct ext3_super_block *es = sbi->s_es; int i; + ext3_mb_release(sb); ext3_ext_release(sb); ext3_xattr_put_super(sb); journal_destroy(sbi->s_journal);@@ -640,6 +641,7 @@ Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, Opt_extents, Opt_noextents, Opt_extdebug,+ Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_grpquota }; @@ -694,6 +695,9 @@ {Opt_extents, "extents"}, {Opt_noextents, "noextents"}, {Opt_extdebug, "extdebug"},+ {Opt_mballoc, "mballoc"},+ {Opt_nomballoc, "nomballoc"},+ {Opt_stripe, "stripe=%u"}, {Opt_barrier, "barrier=%u"}, {Opt_err, NULL}, {Opt_resize, "resize"},@@ -1041,6 +1043,19 @@ case Opt_extdebug: set_opt (sbi->s_mount_opt, EXTDEBUG); break;+ case Opt_mballoc:+ set_opt(sbi->s_mount_opt, MBALLOC);+ break;+ case Opt_nomballoc:+ clear_opt(sbi->s_mount_opt, MBALLOC);+ break;+ case Opt_stripe:+ if (match_int(&args[0], &option))+ return 0;+ if (option < 0)+ return 0;+ sbi->s_stripe = option;+ break; default: printk (KERN_ERR "EXT3-fs: Unrecognized mount option \"%s\" "@@ -1766,6 +1771,7 @@ ext3_count_dirs(sb)); ext3_ext_init(sb);+ ext3_mb_init(sb, needs_recovery); lock_kernel(); return 0; @@ -2699,7 +2705,13 @@ static int __init init_ext3_fs(void) {- int err = init_ext3_xattr();+ int err;++ err = init_ext3_proc();+ if (err)+ return err;++ err = init_ext3_xattr(); if (err) return err; err = init_inodecache();@@ -2721,6 +2733,7 @@ unregister_filesystem(&ext3_fs_type); destroy_inodecache(); exit_ext3_xattr();+ exit_ext3_proc(); } int ext3_prep_san_write(struct inode *inode, long *blocks,Index: linux-2.6.16.i686/fs/ext3/extents.c===================================================================--- linux-2.6.16.i686.orig/fs/ext3/extents.c 2006-05-30 22:55:32.000000000 +0800+++ linux-2.6.16.i686/fs/ext3/extents.c 2006-05-30 23:02:59.000000000 +0800@@ -771,7 +771,7 @@ for (i = 0; i < depth; i++) { if (!ablocks[i]) continue;- ext3_free_blocks(handle, tree->inode, ablocks[i], 1);+ ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1); } } kfree(ablocks);@@ -1428,7 +1428,7 @@ path->p_idx->ei_leaf); bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);- ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);+ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1); return err; } @@ -1913,10 +1913,12 @@ int needed = ext3_remove_blocks_credits(tree, ex, from, to); handle_t *handle = ext3_journal_start(tree->inode, needed); struct buffer_head *bh;- int i;+ int i, metadata = 0; if (IS_ERR(handle)) return PTR_ERR(handle);+ if (S_ISDIR(tree->inode->i_mode) || S_ISLNK(tree->inode->i_mode))+ metadata = 1; if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { /* tail removal */ unsigned long num, start;@@ -1928,7 +1930,7 @@ bh = sb_find_get_block(tree->inode->i_sb, start + i); ext3_forget(handle, 0, tree->inode, bh, start + i); }- ext3_free_blocks(handle, tree->inode, start, num);+ ext3_free_blocks(handle, tree->inode, start, num, metadata); } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { printk("strange request: removal %lu-%lu from %u:%u\n", from, to, ex->ee_block, ex->ee_len);Index: linux-2.6.16.i686/fs/ext3/inode.c===================================================================--- linux-2.6.16.i686.orig/fs/ext3/inode.c 2006-05-30 22:55:32.000000000 +0800+++ linux-2.6.16.i686/fs/ext3/inode.c 2006-05-30 23:02:59.000000000 +0800@@ -568,7 +568,7 @@ ext3_journal_forget(handle, branch[i].bh); } for (i = 0; i < keys; i++)- ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);+ ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1); return err; } @@ -1862,7 +1862,7 @@ } } - ext3_free_blocks(handle, inode, block_to_free, count);+ ext3_free_blocks(handle, inode, block_to_free, count, 1); } /**@@ -2035,7 +2035,7 @@ ext3_journal_test_restart(handle, inode); } - ext3_free_blocks(handle, inode, nr, 1);+ ext3_free_blocks(handle, inode, nr, 1, 1); if (parent_bh) { /*Index: linux-2.6.16.i686/fs/ext3/balloc.c===================================================================--- linux-2.6.16.i686.orig/fs/ext3/balloc.c 2006-03-20 13:53:29.000000000 +0800+++ linux-2.6.16.i686/fs/ext3/balloc.c 2006-05-30 23:02:59.000000000 +0800@@ -80,7 +80,7 @@ * * Return buffer_head on success or NULL in case of failure. */-static struct buffer_head *+struct buffer_head * read_block_bitmap(struct super_block *sb, unsigned int block_group) { struct ext3_group_desc * desc;@@ -491,24 +491,6 @@ return; } -/* Free given blocks, update quota and i_blocks field */-void ext3_free_blocks(handle_t *handle, struct inode *inode,- unsigned long block, unsigned long count)-{- struct super_block * sb;- int dquot_freed_blocks;-- sb = inode->i_sb;- if (!sb) {- printk ("ext3_free_blocks: nonexistent device");- return;- }- ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);- if (dquot_freed_blocks)- DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);- return;-}- /* * For ext3 allocations, we must not reuse any blocks which are * allocated in the bitmap buffer's "last committed data" copy. This@@ -1154,7 +1136,7 @@ * bitmap, and then for any free bit if that fails. * This function also updates quota and i_blocks field. */-int ext3_new_block(handle_t *handle, struct inode *inode,+int ext3_new_block_old(handle_t *handle, struct inode *inode, unsigned long goal, int *errp) { struct buffer_head *bitmap_bh = NULL;Index: linux-2.6.16.i686/fs/ext3/xattr.c===================================================================--- linux-2.6.16.i686.orig/fs/ext3/xattr.c 2006-03-20 13:53:29.000000000 +0800+++ linux-2.6.16.i686/fs/ext3/xattr.c 2006-05-30 23:02:59.000000000 +0800@@ -484,7 +484,7 @@ ea_bdebug(bh, "refcount now=0; freeing"); if (ce) mb_cache_entry_free(ce);- ext3_free_blocks(handle, inode, bh->b_blocknr, 1);+ ext3_free_blocks(handle, inode, bh->b_blocknr, 1, 1); get_bh(bh); ext3_forget(handle, 1, inode, bh, bh->b_blocknr); } else {@@ -804,7 +804,7 @@ new_bh = sb_getblk(sb, block); if (!new_bh) { getblk_failed:- ext3_free_blocks(handle, inode, block, 1);+ ext3_free_blocks(handle, inode, block, 1, 1); error = -EIO; goto cleanup; }Index: linux-2.6.16.i686/fs/ext3/mballoc.c===================================================================--- linux-2.6.16.i686.orig/fs/ext3/mballoc.c 2006-05-31 04:14:15.752410384 +0800+++ linux-2.6.16.i686/fs/ext3/mballoc.c 2006-05-30 23:03:38.000000000 +0800@@ -0,0 +1,2726 @@+/*+ * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com+ * Written by Alex Tomas <alex@clusterfs.com>+ *+ * This program is free software; you can redistribute it and/or modify+ * it under the terms of the GNU General Public License version 2 as+ * published by the Free Software Foundation.+ *+ * This program is distributed in the hope that it will be useful,+ * but WITHOUT ANY WARRANTY; without even the implied warranty of+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the+ * GNU General Public License for more details.+ *+ * You should have received a copy of the GNU General Public Licens+ * along with this program; if not, write to the Free Software+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-+ */+++/*+ * mballoc.c contains the multiblocks allocation routines+ */++#include <linux/config.h>+#include <linux/time.h>+#include <linux/fs.h>+#include <linux/namei.h>+#include <linux/jbd.h>+#include <linux/ext3_fs.h>+#include <linux/ext3_jbd.h>+#include <linux/quotaops.h>+#include <linux/buffer_head.h>+#include <linux/module.h>+#include <linux/swap.h>+#include <linux/proc_fs.h>+#include <linux/pagemap.h>+#include <linux/seq_file.h>++/*+ * TODO:+ * - bitmap read-ahead (proposed by Oleg Drokin aka green)+ * - track min/max extents in each group for better group selection+ * - mb_mark_used() may allocate chunk right after splitting buddy+ * - special flag to advice allocator to look for requested + N blocks+ * this may improve interaction between extents and mballoc+ * - tree of groups sorted by number of free blocks+ * - percpu reservation code (hotpath)+ * - error handling+ */++/*+ * with AGRESSIVE_CHECK allocator runs consistency checks over+ * structures. these checks slow things down a lot+ */+#define AGGRESSIVE_CHECK__++/*+ */+#define MB_DEBUG__+#ifdef MB_DEBUG+#define mb_debug(fmt,a...) printk(fmt, ##a)+#else+#define mb_debug(fmt,a...)+#endif++/*+ * with EXT3_MB_HISTORY mballoc stores last N allocations in memory+ * and you can monitor it in /proc/fs/ext3/<dev>/mb_history+ */+#define EXT3_MB_HISTORY++/*+ * How long mballoc can look for a best extent (in found extents)+ */+long ext3_mb_max_to_scan = 500;++/*+ * How long mballoc must look for a best extent+ */+long ext3_mb_min_to_scan = 30;++/*+ * with 'ext3_mb_stats' allocator will collect stats that will be+ * shown at umount. The collecting costs though!+ */++long ext3_mb_stats = 1;++/*+ * for which requests use 2^N search using buddies+ */+long ext3_mb_order2_reqs = 8;++#ifdef EXT3_BB_MAX_BLOCKS+#undef EXT3_BB_MAX_BLOCKS+#endif+#define EXT3_BB_MAX_BLOCKS 30++struct ext3_free_metadata {+ unsigned short group;+ unsigned short num;+ unsigned short blocks[EXT3_BB_MAX_BLOCKS];+ struct list_head list;+};++struct ext3_group_info {+ unsigned long bb_state;+ unsigned long bb_tid;+ struct ext3_free_metadata *bb_md_cur;+ unsigned short bb_first_free;+ unsigned short bb_free;+ unsigned short bb_fragments;+ unsigned short bb_counters[];+};
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -