📄 ext3-mballoc3-core.patch
字号:
+ */+#define MB_DEFAULT_MIN_TO_SCAN 10++/*+ * How many groups mballoc will scan looking for the best chunk+ */+#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5++/*+ * with 'ext3_mb_stats' allocator will collect stats that will be+ * shown at umount. The collecting costs though!+ */+#define MB_DEFAULT_STATS 1++/*+ * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served+ * by the stream allocator, which purpose is to pack requests+ * as close each to other as possible to produce smooth I/O traffic+ */+#define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */++/*+ * for which requests use 2^N search using buddies+ */+#define MB_DEFAULT_ORDER2_REQS 8++/*+ * default stripe size = 1MB+ */+#define MB_DEFAULT_STRIPE 256++static kmem_cache_t *ext3_pspace_cachep = NULL;++#ifdef EXT3_BB_MAX_BLOCKS+#undef EXT3_BB_MAX_BLOCKS+#endif+#define EXT3_BB_MAX_BLOCKS 30++struct ext3_free_metadata {+ unsigned short group;+ unsigned short num;+ unsigned short blocks[EXT3_BB_MAX_BLOCKS];+ struct list_head list;+};++struct ext3_group_info {+ unsigned long bb_state;+ unsigned long bb_tid;+ struct ext3_free_metadata *bb_md_cur;+ unsigned short bb_first_free;+ unsigned short bb_free;+ unsigned short bb_fragments;+ struct list_head bb_prealloc_list;+#ifdef DOUBLE_CHECK+ void *bb_bitmap;+#endif+ unsigned short bb_counters[];+};++#define EXT3_GROUP_INFO_NEED_INIT_BIT 0+#define EXT3_GROUP_INFO_LOCKED_BIT 1++#define EXT3_MB_GRP_NEED_INIT(grp) \+ (test_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, &(grp)->bb_state))+++struct ext3_prealloc_space {+ struct list_head pa_inode_list;+ struct list_head pa_group_list;+ union {+ struct list_head pa_tmp_list;+ struct rcu_head pa_rcu;+ } u;+ spinlock_t pa_lock;+ atomic_t pa_count;+ unsigned pa_deleted;+ unsigned long pa_pstart; /* phys. block */+ unsigned long pa_lstart; /* log. block */+ unsigned short pa_len; /* len of preallocated chunk */+ unsigned short pa_free; /* how many blocks are free */+ unsigned short pa_linear; /* consumed in one direction+ * strictly, for group prealloc */+ spinlock_t *pa_obj_lock;+ struct inode *pa_inode; /* hack, for history only */+};+++struct ext3_free_extent {+ unsigned long fe_logical;+ unsigned long fe_start;+ unsigned long fe_group;+ unsigned long fe_len;+};++/*+ * Locality group:+ * we try to group all related changes together+ * so that writeback can flush/allocate them together as well+ */+struct ext3_locality_group {+ /* for allocator */+ struct semaphore lg_sem; /* to serialize allocates */+ struct list_head lg_prealloc_list;/* list of preallocations */+ spinlock_t lg_prealloc_lock;+};++struct ext3_allocation_context {+ struct inode *ac_inode;+ struct super_block *ac_sb;++ /* original request */+ struct ext3_free_extent ac_o_ex;++ /* goal request (after normalization) */+ struct ext3_free_extent ac_g_ex;++ /* the best found extent */+ struct ext3_free_extent ac_b_ex;++ /* copy of the bext found extent taken before preallocation efforts */+ struct ext3_free_extent ac_f_ex;++ /* number of iterations done. we have to track to limit searching */+ unsigned long ac_ex_scanned;+ __u16 ac_groups_scanned;+ __u16 ac_found;+ __u16 ac_tail;+ __u16 ac_buddy;+ __u16 ac_flags; /* allocation hints */+ __u8 ac_status;+ __u8 ac_criteria;+ __u8 ac_repeats;+ __u8 ac_2order; /* if request is to allocate 2^N blocks and+ * N > 0, the field stores N, otherwise 0 */+ __u8 ac_op; /* operation, for history only */+ struct page *ac_bitmap_page;+ struct page *ac_buddy_page;+ struct ext3_prealloc_space *ac_pa;+ struct ext3_locality_group *ac_lg;+};++#define AC_STATUS_CONTINUE 1+#define AC_STATUS_FOUND 2+#define AC_STATUS_BREAK 3++struct ext3_mb_history {+ struct ext3_free_extent orig; /* orig allocation */+ struct ext3_free_extent goal; /* goal allocation */+ struct ext3_free_extent result; /* result allocation */+ unsigned pid;+ unsigned ino;+ __u16 found; /* how many extents have been found */+ __u16 groups; /* how many groups have been scanned */+ __u16 tail; /* what tail broke some buddy */+ __u16 buddy; /* buddy the tail ^^^ broke */+ __u16 flags;+ __u8 cr:3; /* which phase the result extent was found at */+ __u8 op:4;+ __u8 merged:1;+};++struct ext3_buddy {+ struct page *bd_buddy_page;+ void *bd_buddy;+ struct page *bd_bitmap_page;+ void *bd_bitmap;+ struct ext3_group_info *bd_info;+ struct super_block *bd_sb;+ __u16 bd_blkbits;+ __u16 bd_group;+};+#define EXT3_MB_BITMAP(e3b) ((e3b)->bd_bitmap)+#define EXT3_MB_BUDDY(e3b) ((e3b)->bd_buddy)++#ifndef EXT3_MB_HISTORY+#define ext3_mb_store_history(ac)+#else+static void ext3_mb_store_history(struct ext3_allocation_context *ac);+#endif++#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)++static struct proc_dir_entry *proc_root_ext3;++int ext3_create (struct inode *, struct dentry *, int, struct nameidata *);+struct buffer_head * read_block_bitmap(struct super_block *, unsigned int);+unsigned long ext3_new_blocks_old(handle_t *handle, struct inode *inode,+ unsigned long goal, unsigned long *count, int *errp);+void ext3_mb_release_blocks(struct super_block *, int);+void ext3_mb_poll_new_transaction(struct super_block *, handle_t *);+void ext3_mb_free_committed_blocks(struct super_block *);+void ext3_mb_generate_from_pa(struct super_block *sb, void *bitmap, int group);+void ext3_mb_free_consumed_preallocations(struct ext3_allocation_context *ac);+void ext3_mb_return_to_preallocation(struct inode *inode, struct ext3_buddy *e3b,+ sector_t block, int count);+void ext3_mb_show_ac(struct ext3_allocation_context *ac);+void ext3_mb_check_with_pa(struct ext3_buddy *e3b, int first, int count);+void ext3_mb_put_pa(struct ext3_allocation_context *, struct super_block *, struct ext3_prealloc_space *pa);+int ext3_mb_init_per_dev_proc(struct super_block *sb);+int ext3_mb_destroy_per_dev_proc(struct super_block *sb);++/*+ * Calculate the block group number and offset, given a block number+ */+static void ext3_get_group_no_and_offset(struct super_block *sb,+ unsigned long blocknr,+ unsigned long *blockgrpp,+ unsigned long *offsetp)+{+ struct ext3_super_block *es = EXT3_SB(sb)->s_es;+ unsigned long offset;++ blocknr = blocknr - le32_to_cpu(es->s_first_data_block);+ offset = blocknr % EXT3_BLOCKS_PER_GROUP(sb);+ blocknr = blocknr / EXT3_BLOCKS_PER_GROUP(sb);+ if (offsetp)+ *offsetp = offset;+ if (blockgrpp)+ *blockgrpp = blocknr;++}++static inline void+ext3_lock_group(struct super_block *sb, int group)+{+ bit_spin_lock(EXT3_GROUP_INFO_LOCKED_BIT,+ &EXT3_GROUP_INFO(sb, group)->bb_state);+}++static inline void+ext3_unlock_group(struct super_block *sb, int group)+{+ bit_spin_unlock(EXT3_GROUP_INFO_LOCKED_BIT,+ &EXT3_GROUP_INFO(sb, group)->bb_state);+}++static inline int+ext3_is_group_locked(struct super_block *sb, int group)+{+ return bit_spin_is_locked(EXT3_GROUP_INFO_LOCKED_BIT,+ &EXT3_GROUP_INFO(sb, group)->bb_state);+}++unsigned long ext3_grp_offs_to_block(struct super_block *sb,+ struct ext3_free_extent *fex)+{+ unsigned long block;++ block = (unsigned long) fex->fe_group * EXT3_BLOCKS_PER_GROUP(sb)+ + fex->fe_start+ + le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block);+ return block;+}++#if BITS_PER_LONG == 64+#define mb_correct_addr_and_bit(bit,addr) \+{ \+ bit += ((unsigned long) addr & 7UL) << 3; \+ addr = (void *) ((unsigned long) addr & ~7UL); \+}+#elif BITS_PER_LONG == 32+#define mb_correct_addr_and_bit(bit,addr) \+{ \+ bit += ((unsigned long) addr & 3UL) << 3; \+ addr = (void *) ((unsigned long) addr & ~3UL); \+}+#else+#error "how many bits you are?!"+#endif++static inline int mb_test_bit(int bit, void *addr)+{+ mb_correct_addr_and_bit(bit,addr);+ return ext2_test_bit(bit, addr);+}++static inline void mb_set_bit(int bit, void *addr)+{+ mb_correct_addr_and_bit(bit,addr);+ ext2_set_bit(bit, addr);+}++static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr)+{+ mb_correct_addr_and_bit(bit,addr);+ ext2_set_bit_atomic(lock, bit, addr);+}++static inline void mb_clear_bit(int bit, void *addr)+{+ mb_correct_addr_and_bit(bit,addr);+ ext2_clear_bit(bit, addr);+}++static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)+{+ mb_correct_addr_and_bit(bit,addr);+ ext2_clear_bit_atomic(lock, bit, addr);+}++static inline int mb_find_next_zero_bit(void *addr, int max, int start)+{+ int fix;+#if BITS_PER_LONG == 64+ fix = ((unsigned long) addr & 7UL) << 3;+ addr = (void *) ((unsigned long) addr & ~7UL);+#elif BITS_PER_LONG == 32+ fix = ((unsigned long) addr & 3UL) << 3;+ addr = (void *) ((unsigned long) addr & ~3UL);+#else+#error "how many bits you are?!"+#endif+ max += fix;+ start += fix;+ return ext2_find_next_zero_bit(addr, max, start) - fix;+}++static inline int mb_find_next_bit(void *addr, int max, int start)+{+ int fix;+#if BITS_PER_LONG == 64+ fix = ((unsigned long) addr & 7UL) << 3;+ addr = (void *) ((unsigned long) addr & ~7UL);+#elif BITS_PER_LONG == 32+ fix = ((unsigned long) addr & 3UL) << 3;+ addr = (void *) ((unsigned long) addr & ~3UL);+#else+#error "how many bits you are?!"+#endif+ max += fix;+ start += fix;++#ifdef __BIG_ENDIAN+#else+ return find_next_bit(addr, max, start) - fix;+#endif+}++static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max)+{+ char *bb;++ BUG_ON(EXT3_MB_BITMAP(e3b) == EXT3_MB_BUDDY(e3b));+ BUG_ON(max == NULL);++ if (order > e3b->bd_blkbits + 1) {+ *max = 0;+ return NULL;+ }++ /* at order 0 we see each particular block */+ *max = 1 << (e3b->bd_blkbits + 3);+ if (order == 0)+ return EXT3_MB_BITMAP(e3b);++ bb = EXT3_MB_BUDDY(e3b) + EXT3_SB(e3b->bd_sb)->s_mb_offsets[order];+ *max = EXT3_SB(e3b->bd_sb)->s_mb_maxs[order];++ return bb;+}++#ifdef DOUBLE_CHECK+void mb_free_blocks_double(struct inode *inode, struct ext3_buddy *e3b,+ int first, int count)+{+ int i;+ struct super_block *sb = e3b->bd_sb;++ if (unlikely(e3b->bd_info->bb_bitmap == NULL))+ return;+ BUG_ON(!ext3_is_group_locked(sb, e3b->bd_group));+ for (i = 0; i < count; i++) {+ if (!mb_test_bit(first + i, e3b->bd_info->bb_bitmap)) {+ unsigned long blocknr;+ blocknr = e3b->bd_group * EXT3_BLOCKS_PER_GROUP(sb);+ blocknr += first + i;+ blocknr +=+ le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block);++ ext3_error(sb, __FUNCTION__, "double-free of inode"+ " %lu's block %lu(bit %u in group %u)\n",+ inode ? inode->i_ino : 0, blocknr,+ first + i, e3b->bd_group);+ }+ mb_clear_bit(first + i, e3b->bd_info->bb_bitmap);+ }+}++void mb_mark_used_double(struct ext3_buddy *e3b, int first, int count)+{+ int i;+ if (unlikely(e3b->bd_info->bb_bitmap == NULL))+ return;+ BUG_ON(!ext3_is_group_locked(e3b->bd_sb, e3b->bd_group));+ for (i = 0; i < count; i++) {+ BUG_ON(mb_test_bit(first + i, e3b->bd_info->bb_bitmap));+ mb_set_bit(first + i, e3b->bd_info->bb_bitmap);+ }+}++void mb_cmp_bitmaps(struct ext3_buddy *e3b, void *bitmap)+{+ if (memcmp(e3b->bd_info->bb_bitmap, bitmap, e3b->bd_sb->s_blocksize)) {+ unsigned char *b1, *b2;+ int i;+ b1 = (unsigned char *) e3b->bd_info->bb_bitmap;+ b2 = (unsigned char *) bitmap;+ for (i = 0; i < e3b->bd_sb->s_blocksize; i++) {+ if (b1[i] != b2[i]) {+ printk("corruption in group %u at byte %u(%u): "+ "%x in copy != %x on disk/prealloc\n",+ e3b->bd_group, i, i * 8, b1[i], b2[i]);+ BUG();+ }+ }+ }+}++#else+#define mb_free_blocks_double(a,b,c,d)+#define mb_mark_used_double(a,b,c)+#define mb_cmp_bitmaps(a,b)+#endif++#ifdef AGGRESSIVE_CHECK++#define MB_CHECK_ASSERT(assert) \+do { \+ if (!(assert)) { \+ printk (KERN_EMERG \+ "Assertion failure in %s() at %s:%d: \"%s\"\n", \+ function, file, line, # assert); \+ BUG(); \+ } \+} while (0)++static int __mb_check_buddy(struct ext3_buddy *e3b, char *file,+ const char *function, int line)+{+ struct super_block *sb = e3b->bd_sb;+ int order = e3b->bd_blkbits + 1;+ int max, max2, i, j, k, count;+ struct ext3_group_info *grp;+ int fragments = 0, fstart;+ struct list_head *cur;+ void *buddy, *buddy2;++ if (!test_opt(sb, MBALLOC))+ return 0;++ {+ static int mb_check_counter = 0;+ if (mb_check_counter++ % 100 != 0)+ return 0;+ }++ while (order > 1) {+ buddy = mb_find_buddy(e3b, order, &max);+ MB_CHECK_ASSERT(buddy);+ buddy2 = mb_find_buddy(e3b, order - 1, &max2);+ MB_CHECK_ASSERT(buddy2);+ MB_CHECK_ASSERT(buddy != buddy2);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -