📄 ext3-mballoc2-2.6-fc5.patch
字号:
++ if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {+ unsigned long start;+ start = (e3b->bd_group * EXT3_BLOCKS_PER_GROUP(ac->ac_sb) ++ ex.fe_start + le32_to_cpu(es->s_first_data_block));+ if (start % sbi->s_stripe == 0) {+ ac->ac_found++;+ ac->ac_b_ex = ex;+ ext3_mb_use_best_found(ac, e3b);+ }+ } else if (max >= ac->ac_g_ex.fe_len) {+ J_ASSERT(ex.fe_len > 0);+ J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);+ J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);+ ac->ac_found++;+ ac->ac_b_ex = ex;+ ext3_mb_use_best_found(ac, e3b);+ } else if (max > 0 && (ac->ac_flags & EXT3_MB_HINT_MERGE)) {+ /* Sometimes, caller may want to merge even small+ * number of blocks to an existing extent */+ J_ASSERT(ex.fe_len > 0);+ J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);+ J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);+ ac->ac_found++;+ ac->ac_b_ex = ex;+ ext3_mb_use_best_found(ac, e3b);+ }+ ext3_unlock_group(ac->ac_sb, group);++ ext3_mb_release_desc(e3b);++ return 0;+}++/*+ * The routine scans buddy structures (not bitmap!) from given order+ * to max order and tries to find big enough chunk to satisfy the req+ */+static void ext3_mb_simple_scan_group(struct ext3_allocation_context *ac,+ struct ext3_buddy *e3b)+{+ struct super_block *sb = ac->ac_sb;+ struct ext3_group_info *grp = e3b->bd_info;+ void *buddy;+ int i, k, max;++ J_ASSERT(ac->ac_2order > 0);+ for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {+ if (grp->bb_counters[i] == 0)+ continue;++ buddy = mb_find_buddy(e3b, i, &max);+ if (buddy == NULL) {+ printk(KERN_ALERT "looking for wrong order?\n");+ break;+ }++ k = mb_find_next_zero_bit(buddy, max, 0);+ J_ASSERT(k < max);++ ac->ac_found++;++ ac->ac_b_ex.fe_len = 1 << i;+ ac->ac_b_ex.fe_start = k << i;+ ac->ac_b_ex.fe_group = e3b->bd_group;++ ext3_mb_use_best_found(ac, e3b);+ J_ASSERT(ac->ac_b_ex.fe_len == ac->ac_g_ex.fe_len);++ if (unlikely(ext3_mb_stats))+ atomic_inc(&EXT3_SB(sb)->s_bal_2orders);++ break;+ }+}++/*+ * The routine scans the group and measures all found extents.+ * In order to optimize scanning, caller must pass number of+ * free blocks in the group, so the routine can know upper limit.+ */+static void ext3_mb_complex_scan_group(struct ext3_allocation_context *ac,+ struct ext3_buddy *e3b)+{+ struct super_block *sb = ac->ac_sb;+ void *bitmap = EXT3_MB_BITMAP(e3b);+ struct ext3_free_extent ex;+ int i, free;++ free = e3b->bd_info->bb_free;+ J_ASSERT(free > 0);++ i = e3b->bd_info->bb_first_free;++ while (free && ac->ac_status == AC_STATUS_CONTINUE) {+ i = mb_find_next_zero_bit(bitmap, EXT3_BLOCKS_PER_GROUP(sb), i);+ if (i >= EXT3_BLOCKS_PER_GROUP(sb)) {+ J_ASSERT(free == 0);+ break;+ }++ mb_find_extent(e3b, 0, i, ac->ac_g_ex.fe_len, &ex);+ J_ASSERT(ex.fe_len > 0);+ J_ASSERT(free >= ex.fe_len);++ ext3_mb_measure_extent(ac, &ex, e3b);++ i += ex.fe_len;+ free -= ex.fe_len;+ }+}++/*+ * This is a special case for storages like raid5+ * we try to find stripe-aligned chunks for stripe-size requests+ */+static void ext3_mb_scan_aligned(struct ext3_allocation_context *ac,+ struct ext3_buddy *e3b)+{+ struct super_block *sb = ac->ac_sb;+ struct ext3_sb_info *sbi = EXT3_SB(sb);+ void *bitmap = EXT3_MB_BITMAP(e3b);+ struct ext3_free_extent ex;+ unsigned long i, max;++ J_ASSERT(sbi->s_stripe != 0);++ /* find first stripe-aligned block */+ i = e3b->bd_group * EXT3_BLOCKS_PER_GROUP(sb)+ + le32_to_cpu(sbi->s_es->s_first_data_block);+ i = ((i + sbi->s_stripe - 1) / sbi->s_stripe) * sbi->s_stripe;+ i = (i - le32_to_cpu(sbi->s_es->s_first_data_block))+ % EXT3_BLOCKS_PER_GROUP(sb);++ while (i < EXT3_BLOCKS_PER_GROUP(sb)) {+ if (!mb_test_bit(i, bitmap)) {+ max = mb_find_extent(e3b, 0, i, sbi->s_stripe, &ex);+ if (max >= sbi->s_stripe) {+ ac->ac_found++;+ ac->ac_b_ex = ex;+ ext3_mb_use_best_found(ac, e3b);+ break;+ }+ }+ i += sbi->s_stripe;+ }+}++static int ext3_mb_good_group(struct ext3_allocation_context *ac,+ int group, int cr)+{+ struct ext3_group_info *grp = EXT3_GROUP_INFO(ac->ac_sb, group);+ unsigned free, fragments, i, bits;++ J_ASSERT(cr >= 0 && cr < 4);+ J_ASSERT(!EXT3_MB_GRP_NEED_INIT(grp));++ free = grp->bb_free;+ fragments = grp->bb_fragments;+ if (free == 0)+ return 0;+ if (fragments == 0)+ return 0;++ switch (cr) {+ case 0:+ J_ASSERT(ac->ac_2order != 0);+ bits = ac->ac_sb->s_blocksize_bits + 1;+ for (i = ac->ac_2order; i <= bits; i++)+ if (grp->bb_counters[i] > 0)+ return 1;+ break;+ case 1:+ if ((free / fragments) >= ac->ac_g_ex.fe_len)+ return 1;+ break;+ case 2:+ if (free >= ac->ac_g_ex.fe_len)+ return 1;+ break;+ case 3:+ return 1;+ default:+ BUG();+ }++ return 0;+}++int ext3_mb_new_blocks(handle_t *handle, struct inode *inode,+ unsigned long goal, int *len, int flags, int *errp)+{+ struct buffer_head *bitmap_bh = NULL;+ struct ext3_allocation_context ac;+ int i, group, block, cr, err = 0;+ struct ext3_group_desc *gdp;+ struct ext3_super_block *es;+ struct buffer_head *gdp_bh;+ struct ext3_sb_info *sbi;+ struct super_block *sb;+ struct ext3_buddy e3b;++ J_ASSERT(len != NULL);+ J_ASSERT(*len > 0);++ sb = inode->i_sb;+ if (!sb) {+ printk("ext3_mb_new_nblocks: nonexistent device");+ return 0;+ }++ if (!test_opt(sb, MBALLOC)) {+ static int ext3_mballoc_warning = 0;+ if (ext3_mballoc_warning == 0) {+ printk(KERN_ERR "EXT3-fs: multiblock request with "+ "mballoc disabled!\n");+ ext3_mballoc_warning++;+ }+ *len = 1;+ err = ext3_new_block_old(handle, inode, goal, errp);+ return err;+ }++ ext3_mb_poll_new_transaction(sb, handle);++ sbi = EXT3_SB(sb);+ es = EXT3_SB(sb)->s_es;++ /*+ * We can't allocate > group size+ */+ if (*len >= EXT3_BLOCKS_PER_GROUP(sb) - 10)+ *len = EXT3_BLOCKS_PER_GROUP(sb) - 10;++ if (!(flags & EXT3_MB_HINT_RESERVED)) {+ /* someone asks for non-reserved blocks */+ BUG_ON(*len > 1);+ err = ext3_mb_reserve_blocks(sb, 1);+ if (err) {+ *errp = err;+ return 0;+ }+ }++ ac.ac_buddy_page = NULL;+ ac.ac_bitmap_page = NULL;++ /*+ * Check quota for allocation of this blocks.+ */+ while (*len && DQUOT_ALLOC_BLOCK(inode, *len))+ *len -= 1;+ if (*len == 0) {+ *errp = -EDQUOT;+ block = 0;+ goto out;+ }++ /* start searching from the goal */+ if (goal < le32_to_cpu(es->s_first_data_block) ||+ goal >= le32_to_cpu(es->s_blocks_count))+ goal = le32_to_cpu(es->s_first_data_block);+ group = (goal - le32_to_cpu(es->s_first_data_block)) /+ EXT3_BLOCKS_PER_GROUP(sb);+ block = ((goal - le32_to_cpu(es->s_first_data_block)) %+ EXT3_BLOCKS_PER_GROUP(sb));++ /* set up allocation goals */+ ac.ac_b_ex.fe_group = 0;+ ac.ac_b_ex.fe_start = 0;+ ac.ac_b_ex.fe_len = 0;+ ac.ac_status = AC_STATUS_CONTINUE;+ ac.ac_groups_scanned = 0;+ ac.ac_ex_scanned = 0;+ ac.ac_found = 0;+ ac.ac_sb = inode->i_sb;+ ac.ac_g_ex.fe_group = group;+ ac.ac_g_ex.fe_start = block;+ ac.ac_g_ex.fe_len = *len;+ ac.ac_flags = flags;+ ac.ac_2order = 0;+ ac.ac_criteria = 0;++ if (*len == 1 && sbi->s_stripe) {+ /* looks like a metadata, let's use a dirty hack for raid5+ * move all metadata in first groups in hope to hit cached+ * sectors and thus avoid read-modify cycles in raid5 */+ ac.ac_g_ex.fe_group = group = 0;+ }++ /* probably, the request is for 2^8+ blocks (1/2/3/... MB) */+ i = ffs(*len);+ if (i >= ext3_mb_order2_reqs) {+ i--;+ if ((*len & (~(1 << i))) == 0)+ ac.ac_2order = i;+ }++ /* first, try the goal */+ err = ext3_mb_find_by_goal(&ac, &e3b);+ if (err)+ goto out_err;+ if (ac.ac_status == AC_STATUS_FOUND)+ goto found;++ /* Let's just scan groups to find more-less suitable blocks */+ cr = ac.ac_2order ? 0 : 1;+repeat:+ for (; cr < 4 && ac.ac_status == AC_STATUS_CONTINUE; cr++) {+ ac.ac_criteria = cr;+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) {+ if (group == EXT3_SB(sb)->s_groups_count)+ group = 0;++ if (EXT3_MB_GRP_NEED_INIT(EXT3_GROUP_INFO(sb, group))) {+ /* we need full data about the group+ * to make a good selection */+ err = ext3_mb_load_buddy(ac.ac_sb, group, &e3b);+ if (err)+ goto out_err;+ ext3_mb_release_desc(&e3b);+ }++ /* check is group good for our criteries */+ if (!ext3_mb_good_group(&ac, group, cr))+ continue;++ err = ext3_mb_load_buddy(ac.ac_sb, group, &e3b);+ if (err)+ goto out_err;++ ext3_lock_group(sb, group);+ if (!ext3_mb_good_group(&ac, group, cr)) {+ /* someone did allocation from this group */+ ext3_unlock_group(sb, group);+ ext3_mb_release_desc(&e3b);+ continue;+ }++ ac.ac_groups_scanned++;+ if (cr == 0)+ ext3_mb_simple_scan_group(&ac, &e3b);+ else if (cr == 1 && *len == sbi->s_stripe)+ ext3_mb_scan_aligned(&ac, &e3b);+ else+ ext3_mb_complex_scan_group(&ac, &e3b);++ ext3_unlock_group(sb, group);++ ext3_mb_release_desc(&e3b);++ if (ac.ac_status != AC_STATUS_CONTINUE)+ break;+ }+ }++ if (ac.ac_b_ex.fe_len > 0 && ac.ac_status != AC_STATUS_FOUND &&+ !(ac.ac_flags & EXT3_MB_HINT_FIRST)) {+ /*+ * We've been searching too long. Let's try to allocate+ * the best chunk we've found so far+ */++ /*if (ac.ac_found > ext3_mb_max_to_scan)+ printk(KERN_DEBUG "EXT3-fs: too long searching at "+ "%u (%d/%d)\n", cr, ac.ac_b_ex.fe_len,+ ac.ac_g_ex.fe_len);*/+ ext3_mb_try_best_found(&ac, &e3b);+ if (ac.ac_status != AC_STATUS_FOUND) {+ /*+ * Someone more lucky has already allocated it.+ * The only thing we can do is just take first+ * found block(s)+ printk(KERN_DEBUG "EXT3-fs: someone won our chunk\n");+ */+ ac.ac_b_ex.fe_group = 0;+ ac.ac_b_ex.fe_start = 0;+ ac.ac_b_ex.fe_len = 0;+ ac.ac_status = AC_STATUS_CONTINUE;+ ac.ac_flags |= EXT3_MB_HINT_FIRST;+ cr = 3;+ goto repeat;+ }+ }++ if (ac.ac_status != AC_STATUS_FOUND) {+ /*+ * We aren't lucky definitely+ */+ DQUOT_FREE_BLOCK(inode, *len);+ *errp = -ENOSPC;+ block = 0;+#if 1+ printk(KERN_ERR "EXT3-fs: can't allocate: status %d flags %d\n",+ ac.ac_status, ac.ac_flags);+ printk(KERN_ERR "EXT3-fs: goal %d, best found %d/%d/%d cr %d\n",+ ac.ac_g_ex.fe_len, ac.ac_b_ex.fe_group,+ ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len, cr);+ printk(KERN_ERR "EXT3-fs: %lu block reserved, %d found\n",+ sbi->s_blocks_reserved, ac.ac_found);+ printk("EXT3-fs: groups: ");+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)+ printk("%d: %d ", i, EXT3_GROUP_INFO(sb, i)->bb_free);+ printk("\n");+#endif+ goto out;+ }++found:+ J_ASSERT(ac.ac_b_ex.fe_len > 0);++ /* good news - free block(s) have been found. now it's time+ * to mark block(s) in good old journaled bitmap */+ block = ac.ac_b_ex.fe_group * EXT3_BLOCKS_PER_GROUP(sb)+ + ac.ac_b_ex.fe_start+ + le32_to_cpu(es->s_first_data_block);++ /* we made a desicion, now mark found blocks in good old+ * bitmap to be journaled */++ ext3_debug("using block group %d(%d)\n",+ ac.ac_b_group.group, gdp->bg_free_blocks_count);++ bitmap_bh = read_block_bitmap(sb, ac.ac_b_ex.fe_group);+ if (!bitmap_bh) {+ *errp = -EIO;+ goto out_err;+ }++ err = ext3_journal_get_write_access(handle, bitmap_bh);+ if (err) {+ *errp = err;+ goto out_err;+ }++ gdp = ext3_get_group_desc(sb, ac.ac_b_ex.fe_group, &gdp_bh);+ if (!gdp) {+ *errp = -EIO;+ goto out_err;+ }++ err = ext3_journal_get_write_access(handle, gdp_bh);+ if (err)+ goto out_err;++ block = ac.ac_b_ex.fe_group * EXT3_BLOCKS_PER_GROUP(sb)+ + ac.ac_b_ex.fe_start+ + le32_to_cpu(es->s_first_data_block);++ if (block == le32_to_cpu(gdp->bg_block_bitmap) ||+ block == le32_to_cpu(gdp->bg_inode_bitmap) ||+ in_range(block, le32_to_cpu(gdp->bg_inode_table),+ EXT3_SB(sb)->s_itb_per_group))+ ext3_error(sb, "ext3_new_block",+ "Allocating block in system zone - "+ "block = %u", block);+#ifdef AGGRESSIVE_CHECK+ for (i = 0; i < ac.ac_b_ex.fe_len; i++)+ J_ASSERT(!mb_test_bit(ac.ac_b_ex.fe_start + i, bitmap_bh->b_data));+#endif+ mb_set_bits(bitmap_bh->b_data, ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len);++ spin_lock(sb_bgl_lock(sbi, ac.ac_b_ex.fe_group));+ gdp->bg_free_blocks_count =+ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)+ - ac.ac_b_ex.fe_len);+ spin_unlock(sb_bgl_lock(sbi, ac.ac_b_ex.fe_group));+ percpu_counter_mod(&sbi->s_freeblocks_counter, - ac.ac_b_ex.fe_len);++ err = ext3_journal_dirty_metadata(handle, bitmap_bh);+ if (err)+ goto out_err;+ err = ext3_journal_dirty_metadata(handle, gdp_bh);+ if (err)+ goto out_err;++ sb->s_dirt = 1;+ *errp = 0;+ brelse(bitmap_bh);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -