inode.c
来自「Linux Kernel 2.6.9 for OMAP1710」· C语言 代码 · 共 2,219 行 · 第 1/5 页
C
2,219 行
{ while (p < q) if (*p++) return 0; return 1;}/** * ext3_find_shared - find the indirect blocks for partial truncation. * @inode: inode in question * @depth: depth of the affected branch * @offsets: offsets of pointers in that branch (see ext3_block_to_path) * @chain: place to store the pointers to partial indirect blocks * @top: place to the (detached) top of branch * * This is a helper function used by ext3_truncate(). * * When we do truncate() we may have to clean the ends of several * indirect blocks but leave the blocks themselves alive. Block is * partially truncated if some data below the new i_size is refered * from it (and it is on the path to the first completely truncated * data block, indeed). We have to free the top of that path along * with everything to the right of the path. Since no allocation * past the truncation point is possible until ext3_truncate() * finishes, we may safely do the latter, but top of branch may * require special attention - pageout below the truncation point * might try to populate it. * * We atomically detach the top of branch from the tree, store the * block number of its root in *@top, pointers to buffer_heads of * partially truncated blocks - in @chain[].bh and pointers to * their last elements that should not be removed - in * @chain[].p. Return value is the pointer to last filled element * of @chain. * * The work left to caller to do the actual freeing of subtrees: * a) free the subtree starting from *@top * b) free the subtrees whose roots are stored in * (@chain[i].p+1 .. end of @chain[i].bh->b_data) * c) free the subtrees growing from the inode past the @chain[0]. * (no partially truncated stuff there). */static Indirect *ext3_find_shared(struct inode *inode, int depth, int offsets[4], Indirect chain[4], __le32 *top){ Indirect *partial, *p; int k, err; *top = 0; /* Make k index the deepest non-null offest + 1 */ for (k = depth; k > 1 && !offsets[k-1]; k--) ; partial = ext3_get_branch(inode, k, offsets, chain, &err); /* Writer: pointers */ if (!partial) partial = chain + k-1; /* * If the branch acquired continuation since we've looked at it - * fine, it should all survive and (new) top doesn't belong to us. */ if (!partial->key && *partial->p) /* Writer: end */ goto no_top; for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--) ; /* * OK, we've found the last block that must survive. The rest of our * branch should be detached before unlocking. However, if that rest * of branch is all ours and does not grow immediately from the inode * it's easier to cheat and just decrement partial->p. */ if (p == chain + k - 1 && p > chain) { p->p--; } else { *top = *p->p; /* Nope, don't do this in ext3. Must leave the tree intact */#if 0 *p->p = 0;#endif } /* Writer: end */ while(partial > p) { brelse(partial->bh); partial--; }no_top: return partial;}/* * Zero a number of block pointers in either an inode or an indirect block. * If we restart the transaction we must again get write access to the * indirect block for further modification. * * We release `count' blocks on disk, but (last - first) may be greater * than `count' because there can be holes in there. */static voidext3_clear_blocks(handle_t *handle, struct inode *inode, struct buffer_head *bh, unsigned long block_to_free, unsigned long count, __le32 *first, __le32 *last){ __le32 *p; if (try_to_extend_transaction(handle, inode)) { if (bh) { BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); ext3_journal_dirty_metadata(handle, bh); } ext3_mark_inode_dirty(handle, inode); ext3_journal_test_restart(handle, inode); if (bh) { BUFFER_TRACE(bh, "retaking write access"); ext3_journal_get_write_access(handle, bh); } } /* * Any buffers which are on the journal will be in memory. We find * them on the hash table so journal_revoke() will run journal_forget() * on them. We've already detached each block from the file, so * bforget() in journal_forget() should be safe. * * AKPM: turn on bforget in journal_forget()!!! */ for (p = first; p < last; p++) { u32 nr = le32_to_cpu(*p); if (nr) { struct buffer_head *bh; *p = 0; bh = sb_find_get_block(inode->i_sb, nr); ext3_forget(handle, 0, inode, bh, nr); } } ext3_free_blocks(handle, inode, block_to_free, count);}/** * ext3_free_data - free a list of data blocks * @handle: handle for this transaction * @inode: inode we are dealing with * @this_bh: indirect buffer_head which contains *@first and *@last * @first: array of block numbers * @last: points immediately past the end of array * * We are freeing all blocks refered from that array (numbers are stored as * little-endian 32-bit) and updating @inode->i_blocks appropriately. * * We accumulate contiguous runs of blocks to free. Conveniently, if these * blocks are contiguous then releasing them at one time will only affect one * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't * actually use a lot of journal space. * * @this_bh will be %NULL if @first and @last point into the inode's direct * block pointers. */static void ext3_free_data(handle_t *handle, struct inode *inode, struct buffer_head *this_bh, __le32 *first, __le32 *last){ unsigned long block_to_free = 0; /* Starting block # of a run */ unsigned long count = 0; /* Number of blocks in the run */ __le32 *block_to_free_p = NULL; /* Pointer into inode/ind corresponding to block_to_free */ unsigned long nr; /* Current block # */ __le32 *p; /* Pointer into inode/ind for current block */ int err; if (this_bh) { /* For indirect block */ BUFFER_TRACE(this_bh, "get_write_access"); err = ext3_journal_get_write_access(handle, this_bh); /* Important: if we can't update the indirect pointers * to the blocks, we can't free them. */ if (err) return; } for (p = first; p < last; p++) { nr = le32_to_cpu(*p); if (nr) { /* accumulate blocks to free if they're contiguous */ if (count == 0) { block_to_free = nr; block_to_free_p = p; count = 1; } else if (nr == block_to_free + count) { count++; } else { ext3_clear_blocks(handle, inode, this_bh, block_to_free, count, block_to_free_p, p); block_to_free = nr; block_to_free_p = p; count = 1; } } } if (count > 0) ext3_clear_blocks(handle, inode, this_bh, block_to_free, count, block_to_free_p, p); if (this_bh) { BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata"); ext3_journal_dirty_metadata(handle, this_bh); }}/** * ext3_free_branches - free an array of branches * @handle: JBD handle for this transaction * @inode: inode we are dealing with * @parent_bh: the buffer_head which contains *@first and *@last * @first: array of block numbers * @last: pointer immediately past the end of array * @depth: depth of the branches to free * * We are freeing all blocks refered from these branches (numbers are * stored as little-endian 32-bit) and updating @inode->i_blocks * appropriately. */static void ext3_free_branches(handle_t *handle, struct inode *inode, struct buffer_head *parent_bh, __le32 *first, __le32 *last, int depth){ unsigned long nr; __le32 *p; if (is_handle_aborted(handle)) return; if (depth--) { struct buffer_head *bh; int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); p = last; while (--p >= first) { nr = le32_to_cpu(*p); if (!nr) continue; /* A hole */ /* Go read the buffer for the next level down */ bh = sb_bread(inode->i_sb, nr); /* * A read failure? Report error and clear slot * (should be rare). */ if (!bh) { ext3_error(inode->i_sb, "ext3_free_branches", "Read failure, inode=%ld, block=%ld", inode->i_ino, nr); continue; } /* This zaps the entire block. Bottom up. */ BUFFER_TRACE(bh, "free child branches"); ext3_free_branches(handle, inode, bh, (__le32*)bh->b_data, (__le32*)bh->b_data + addr_per_block, depth); /* * We've probably journalled the indirect block several * times during the truncate. But it's no longer * needed and we now drop it from the transaction via * journal_revoke(). * * That's easy if it's exclusively part of this * transaction. But if it's part of the committing * transaction then journal_forget() will simply * brelse() it. That means that if the underlying * block is reallocated in ext3_get_block(), * unmap_underlying_metadata() will find this block * and will try to get rid of it. damn, damn. * * If this block has already been committed to the * journal, a revoke record will be written. And * revoke records must be emitted *before* clearing * this block's bit in the bitmaps. */ ext3_forget(handle, 1, inode, bh, bh->b_blocknr); /* * Everything below this this pointer has been * released. Now let this top-of-subtree go. * * We want the freeing of this indirect block to be * atomic in the journal with the updating of the * bitmap block which owns it. So make some room in * the journal. * * We zero the parent pointer *after* freeing its * pointee in the bitmaps, so if extend_transaction() * for some reason fails to put the bitmap changes and * the release into the same transaction, recovery * will merely complain about releasing a free block, * rather than leaking blocks. */ if (is_handle_aborted(handle)) return; if (try_to_extend_transaction(handle, inode)) { ext3_mark_inode_dirty(handle, inode); ext3_journal_test_restart(handle, inode); } ext3_free_blocks(handle, inode, nr, 1); if (parent_bh) { /* * The block which we have just freed is * pointed to by an indirect block: journal it */ BUFFER_TRACE(parent_bh, "get_write_access"); if (!ext3_journal_get_write_access(handle, parent_bh)){ *p = 0; BUFFER_TRACE(parent_bh, "call ext3_journal_dirty_metadata"); ext3_journal_dirty_metadata(handle, parent_bh); } } } } else { /* We have reached the bottom of the tree. */ BUFFER_TRACE(parent_bh, "free data blocks"); ext3_free_data(handle, inode, parent_bh, first, last); }}/* * ext3_truncate() * * We block out ext3_get_block() block instantiations across the entire * transaction, and VFS/VM ensures that ext3_truncate() cannot run * simultaneously on behalf of the same inode. * * As we work through the truncate and commmit bits of it to the journal there * is one core, guiding principle: the file's tree must always be consistent on * disk. We must be able to restart the truncate after a crash. * * The file's tree may be transiently inconsistent in memory (although it * probably isn't), but whenever we close off and commit a journal transaction, * the contents of (the filesystem + the journal) must be consistent and * restartable. It's pretty simple, really: bottom up, right to left (although * left-to-right works OK too). * * Note that at recovery time, journal replay occurs *before* the restart of * truncate against the orphan inode list. * * The committed inode has the new, desired i_size (which is the same as * i_disksize in this case). After a crash, ext3_orphan_cleanup() will see * that this inode's truncate did not complete and it will again call * ext3_truncate() to have another go. So there will be instantiated blocks * to the right of the truncation point in a crashed ext3 filesystem. But * that's fine - as long as they are linked from the inode, the post-crash * ext3_truncate() run will find them and release them. */void ext3_truncate(struct inode * inode){ handle_t *handle; struct ext3_inode_info *ei = EXT3_I(inode); __le32 *i_data = ei->i_data; int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); struct address_space *mapping = inode->i_mapping; int offsets[4]; Indirect chain[4]; Indirect *partial; __le32 nr = 0; int n; long last_block; unsigned blocksize = inode->i_sb->s_blocksize; struct page *page; if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) return; if (ext3_inode_is_fast_symlink(inode)) return; if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return; ext3_discard_prealloc(inode); /* * We have to lock the EOF page here, because lock_page() nests * outside journal_start(). */ if ((inode->i_size & (blocksize - 1)) == 0) { /* Block boundary? Nothing to do */ page = NULL; } else { page = grab_cache_page(mapping, inode->i_size >> PAGE_CACHE_SHIFT); if (!page) return; } handle = start_transaction(inode); if (IS_ERR(handle)) { if (page) { clear_highpage(page); flush_dcache_page(page); unlock_page(page); page_cache_release(page); } return; /* AKPM: return what? */ } last_block = (inode->i_size + blocksize-1) >> EXT3_BLOCK_SIZE_BITS(inode->i_sb); if (page) ext3_block_truncate_page(handle, page, mapping, inode->i_size); n = ext3_block_to_path(inode, last_block, offsets, NULL); if (n == 0) goto out_stop; /* error */ /* * OK. This truncate is going to happen. We add the inode to the * orphan list, so that if this truncate spans multiple transactions, * and we crash, we will resume the truncate when the filesystem * recovers. It also marks the inode dirty, to catch the new size. * * Implication: the file must always be in a sane, consistent * truncatable state while each transaction commits. */ if (ext3_orphan_add(handle, inode)) goto out_stop; /* * The orphan list entry will now protect us from any crash which * occurs before the trunc
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?