📄 ext3-mballoc2-2.6-fc5.patch
字号:
++ e3b->bd_blkbits = sb->s_blocksize_bits;+ e3b->bd_info = EXT3_GROUP_INFO(sb, group);+ e3b->bd_sb = sb;+ e3b->bd_group = group;+ e3b->bd_buddy_page = NULL;+ e3b->bd_bitmap_page = NULL;++ block = group * 2;+ pnum = block / blocks_per_page;+ poff = block % blocks_per_page;++ /* we could use find_or_create_page(), but it locks page+ * what we'd like to avoid in fast path ... */+ page = find_get_page(inode->i_mapping, pnum);+ if (page == NULL || !PageUptodate(page)) {+ if (page)+ page_cache_release(page);+ page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);+ if (page) {+ BUG_ON(page->mapping != inode->i_mapping);+ if (!PageUptodate(page))+ ext3_mb_init_cache(page);+ unlock_page(page);+ }+ }+ if (page == NULL || !PageUptodate(page))+ goto err;+ e3b->bd_bitmap_page = page;+ e3b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);+ mark_page_accessed(page);++ block++;+ pnum = block / blocks_per_page;+ poff = block % blocks_per_page;++ page = find_get_page(inode->i_mapping, pnum);+ if (page == NULL || !PageUptodate(page)) {+ if (page)+ page_cache_release(page);+ page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);+ if (page) {+ BUG_ON(page->mapping != inode->i_mapping);+ if (!PageUptodate(page))+ ext3_mb_init_cache(page);+ unlock_page(page);+ }+ }+ if (page == NULL || !PageUptodate(page))+ goto err;+ e3b->bd_buddy_page = page;+ e3b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);+ mark_page_accessed(page);++ J_ASSERT(e3b->bd_bitmap_page != NULL);+ J_ASSERT(e3b->bd_buddy_page != NULL);++ return 0;++err:+ if (e3b->bd_bitmap_page)+ page_cache_release(e3b->bd_bitmap_page);+ if (e3b->bd_buddy_page)+ page_cache_release(e3b->bd_buddy_page);+ e3b->bd_buddy = NULL;+ e3b->bd_bitmap = NULL;+ return -EIO;+}++static void ext3_mb_release_desc(struct ext3_buddy *e3b)+{+ if (e3b->bd_bitmap_page)+ page_cache_release(e3b->bd_bitmap_page);+ if (e3b->bd_buddy_page)+ page_cache_release(e3b->bd_buddy_page);+}+++static inline void+ext3_lock_group(struct super_block *sb, int group)+{+ bit_spin_lock(EXT3_GROUP_INFO_LOCKED_BIT,+ &EXT3_GROUP_INFO(sb, group)->bb_state);+}++static inline void+ext3_unlock_group(struct super_block *sb, int group)+{+ bit_spin_unlock(EXT3_GROUP_INFO_LOCKED_BIT,+ &EXT3_GROUP_INFO(sb, group)->bb_state);+}++static int mb_find_order_for_block(struct ext3_buddy *e3b, int block)+{+ int order = 1;+ void *bb;++ J_ASSERT(EXT3_MB_BITMAP(e3b) != EXT3_MB_BUDDY(e3b));+ J_ASSERT(block < (1 << (e3b->bd_blkbits + 3)));++ bb = EXT3_MB_BUDDY(e3b);+ while (order <= e3b->bd_blkbits + 1) {+ block = block >> 1;+ if (!mb_test_bit(block, bb)) {+ /* this block is part of buddy of order 'order' */+ return order;+ }+ bb += 1 << (e3b->bd_blkbits - order);+ order++;+ }+ return 0;+}++static inline void mb_clear_bits(void *bm, int cur, int len)+{+ __u32 *addr;++ len = cur + len;+ while (cur < len) {+ if ((cur & 31) == 0 && (len - cur) >= 32) {+ /* fast path: clear whole word at once */+ addr = bm + (cur >> 3);+ *addr = 0;+ cur += 32;+ continue;+ }+ mb_clear_bit_atomic(cur, bm);+ cur++;+ }+}++static inline void mb_set_bits(void *bm, int cur, int len)+{+ __u32 *addr;++ len = cur + len;+ while (cur < len) {+ if ((cur & 31) == 0 && (len - cur) >= 32) {+ /* fast path: clear whole word at once */+ addr = bm + (cur >> 3);+ *addr = 0xffffffff;+ cur += 32;+ continue;+ }+ mb_set_bit_atomic(cur, bm);+ cur++;+ }+}++static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count)+{+ int block = 0, max = 0, order;+ void *buddy, *buddy2;++ mb_check_buddy(e3b);++ e3b->bd_info->bb_free += count;+ if (first < e3b->bd_info->bb_first_free)+ e3b->bd_info->bb_first_free = first;++ /* let's maintain fragments counter */+ if (first != 0)+ block = !mb_test_bit(first - 1, EXT3_MB_BITMAP(e3b));+ if (first + count < EXT3_SB(e3b->bd_sb)->s_mb_maxs[0])+ max = !mb_test_bit(first + count, EXT3_MB_BITMAP(e3b));+ if (block && max)+ e3b->bd_info->bb_fragments--;+ else if (!block && !max)+ e3b->bd_info->bb_fragments++;++ /* let's maintain buddy itself */+ while (count-- > 0) {+ block = first++;+ order = 0;++ J_ASSERT(mb_test_bit(block, EXT3_MB_BITMAP(e3b)));+ mb_clear_bit(block, EXT3_MB_BITMAP(e3b));+ e3b->bd_info->bb_counters[order]++;++ /* start of the buddy */+ buddy = mb_find_buddy(e3b, order, &max);++ do {+ block &= ~1UL;+ if (mb_test_bit(block, buddy) ||+ mb_test_bit(block + 1, buddy))+ break;++ /* both the buddies are free, try to coalesce them */+ buddy2 = mb_find_buddy(e3b, order + 1, &max);++ if (!buddy2)+ break;++ if (order > 0) {+ /* for special purposes, we don't set+ * free bits in bitmap */+ mb_set_bit(block, buddy);+ mb_set_bit(block + 1, buddy);+ }+ e3b->bd_info->bb_counters[order]--;+ e3b->bd_info->bb_counters[order]--;++ block = block >> 1;+ order++;+ e3b->bd_info->bb_counters[order]++;++ mb_clear_bit(block, buddy2);+ buddy = buddy2;+ } while (1);+ }+ mb_check_buddy(e3b);++ return 0;+}++static int mb_find_extent(struct ext3_buddy *e3b, int order, int block,+ int needed, struct ext3_free_extent *ex)+{+ int next = block, max, ord;+ void *buddy;++ J_ASSERT(ex != NULL);++ buddy = mb_find_buddy(e3b, order, &max);+ J_ASSERT(buddy);+ J_ASSERT(block < max);+ if (mb_test_bit(block, buddy)) {+ ex->fe_len = 0;+ ex->fe_start = 0;+ ex->fe_group = 0;+ return 0;+ }++ if (likely(order == 0)) {+ /* find actual order */+ order = mb_find_order_for_block(e3b, block);+ block = block >> order;+ }++ ex->fe_len = 1 << order;+ ex->fe_start = block << order;+ ex->fe_group = e3b->bd_group;++ /* calc difference from given start */+ next = next - ex->fe_start;+ ex->fe_len -= next;+ ex->fe_start += next;++ while (needed > ex->fe_len && (buddy = mb_find_buddy(e3b, order, &max))) {++ if (block + 1 >= max)+ break;++ next = (block + 1) * (1 << order);+ if (mb_test_bit(next, EXT3_MB_BITMAP(e3b)))+ break;++ ord = mb_find_order_for_block(e3b, next);++ order = ord;+ block = next >> order;+ ex->fe_len += 1 << order;+ }++ J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3)));+ return ex->fe_len;+}++static int mb_mark_used(struct ext3_buddy *e3b, struct ext3_free_extent *ex)+{+ int ord, mlen = 0, max = 0, cur;+ int start = ex->fe_start;+ int len = ex->fe_len;+ unsigned ret = 0;+ int len0 = len;+ void *buddy;++ mb_check_buddy(e3b);++ e3b->bd_info->bb_free -= len;+ if (e3b->bd_info->bb_first_free == start)+ e3b->bd_info->bb_first_free += len;++ /* let's maintain fragments counter */+ if (start != 0)+ mlen = !mb_test_bit(start - 1, EXT3_MB_BITMAP(e3b));+ if (start + len < EXT3_SB(e3b->bd_sb)->s_mb_maxs[0])+ max = !mb_test_bit(start + len, EXT3_MB_BITMAP(e3b));+ if (mlen && max)+ e3b->bd_info->bb_fragments++;+ else if (!mlen && !max)+ e3b->bd_info->bb_fragments--;++ /* let's maintain buddy itself */+ while (len) {+ ord = mb_find_order_for_block(e3b, start);++ if (((start >> ord) << ord) == start && len >= (1 << ord)) {+ /* the whole chunk may be allocated at once! */+ mlen = 1 << ord;+ buddy = mb_find_buddy(e3b, ord, &max);+ J_ASSERT((start >> ord) < max);+ mb_set_bit(start >> ord, buddy);+ e3b->bd_info->bb_counters[ord]--;+ start += mlen;+ len -= mlen;+ J_ASSERT(len >= 0);+ continue;+ }++ /* store for history */+ if (ret == 0)+ ret = len | (ord << 16);++ /* we have to split large buddy */+ J_ASSERT(ord > 0);+ buddy = mb_find_buddy(e3b, ord, &max);+ mb_set_bit(start >> ord, buddy);+ e3b->bd_info->bb_counters[ord]--;++ ord--;+ cur = (start >> ord) & ~1U;+ buddy = mb_find_buddy(e3b, ord, &max);+ mb_clear_bit(cur, buddy);+ mb_clear_bit(cur + 1, buddy);+ e3b->bd_info->bb_counters[ord]++;+ e3b->bd_info->bb_counters[ord]++;+ }++ /* now drop all the bits in bitmap */+ mb_set_bits(EXT3_MB_BITMAP(e3b), ex->fe_start, len0);++ mb_check_buddy(e3b);++ return ret;+}++/*+ * Must be called under group lock!+ */+static void ext3_mb_use_best_found(struct ext3_allocation_context *ac,+ struct ext3_buddy *e3b)+{+ unsigned long ret;++ ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);+ ret = mb_mark_used(e3b, &ac->ac_b_ex);++ ac->ac_status = AC_STATUS_FOUND;+ ac->ac_tail = ret & 0xffff;+ ac->ac_buddy = ret >> 16;++ /* hold in-core structures until allocated+ * blocks are marked non-free in on-disk bitmap */+ ac->ac_buddy_page = e3b->bd_buddy_page;+ page_cache_get(e3b->bd_buddy_page);+ ac->ac_bitmap_page = e3b->bd_bitmap_page;+ page_cache_get(e3b->bd_bitmap_page);+}++/*+ * The routine checks whether found extent is good enough. If it is,+ * then the extent gets marked used and flag is set to the context+ * to stop scanning. Otherwise, the extent is compared with the+ * previous found extent and if new one is better, then it's stored+ * in the context. Later, the best found extent will be used, if+ * mballoc can't find good enough extent.+ *+ * FIXME: real allocation policy is to be designed yet!+ */+static void ext3_mb_measure_extent(struct ext3_allocation_context *ac,+ struct ext3_free_extent *ex,+ struct ext3_buddy *e3b)+{+ struct ext3_free_extent *bex = &ac->ac_b_ex;+ struct ext3_free_extent *gex = &ac->ac_g_ex;++ J_ASSERT(ex->fe_len > 0);+ J_ASSERT(ex->fe_len < EXT3_BLOCKS_PER_GROUP(ac->ac_sb));+ J_ASSERT(ex->fe_start < EXT3_BLOCKS_PER_GROUP(ac->ac_sb));++ ac->ac_found++;++ /*+ * The special case - take what you catch first+ */+ if (unlikely(ac->ac_flags & EXT3_MB_HINT_FIRST)) {+ *bex = *ex;+ ext3_mb_use_best_found(ac, e3b);+ return;+ }++ /*+ * Let's check whether the chunk is good enough+ */+ if (ex->fe_len == gex->fe_len) {+ *bex = *ex;+ ext3_mb_use_best_found(ac, e3b);+ return;+ }++ /*+ * If this is first found extent, just store it in the context+ */+ if (bex->fe_len == 0) {+ *bex = *ex;+ return;+ }++ /*+ * If new found extent is better, store it in the context+ */+ if (bex->fe_len < gex->fe_len) {+ /* if the request isn't satisfied, any found extent+ * larger than previous best one is better */+ if (ex->fe_len > bex->fe_len)+ *bex = *ex;+ } else if (ex->fe_len > gex->fe_len) {+ /* if the request is satisfied, then we try to find+ * an extent that still satisfy the request, but is+ * smaller than previous one */+ *bex = *ex;+ }++ /*+ * Let's scan at least few extents and don't pick up a first one+ */+ if (bex->fe_len > gex->fe_len && ac->ac_found > ext3_mb_min_to_scan)+ ac->ac_status = AC_STATUS_BREAK;++ /*+ * We don't want to scan for a whole year+ */+ if (ac->ac_found > ext3_mb_max_to_scan)+ ac->ac_status = AC_STATUS_BREAK;+}++static int ext3_mb_try_best_found(struct ext3_allocation_context *ac,+ struct ext3_buddy *e3b)+{+ struct ext3_free_extent ex = ac->ac_b_ex;+ int group = ex.fe_group, max, err;++ J_ASSERT(ex.fe_len > 0);+ err = ext3_mb_load_buddy(ac->ac_sb, group, e3b);+ if (err)+ return err;++ ext3_lock_group(ac->ac_sb, group);+ max = mb_find_extent(e3b, 0, ex.fe_start, ex.fe_len, &ex);++ if (max > 0) {+ ac->ac_b_ex = ex;+ ext3_mb_use_best_found(ac, e3b);+ }++ ext3_unlock_group(ac->ac_sb, group);++ ext3_mb_release_desc(e3b);++ return 0;+}++static int ext3_mb_find_by_goal(struct ext3_allocation_context *ac,+ struct ext3_buddy *e3b)+{+ int group = ac->ac_g_ex.fe_group, max, err;+ struct ext3_sb_info *sbi = EXT3_SB(ac->ac_sb);+ struct ext3_super_block *es = sbi->s_es;+ struct ext3_free_extent ex;++ err = ext3_mb_load_buddy(ac->ac_sb, group, e3b);+ if (err)+ return err;++ ext3_lock_group(ac->ac_sb, group);+ max = mb_find_extent(e3b, 0, ac->ac_g_ex.fe_start,+ ac->ac_g_ex.fe_len, &ex);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -