📄 inode.c
字号:
/* * linux/fs/ext3/inode.c * * Copyright (C) 1992, 1993, 1994, 1995 * Remy Card (card@masi.ibp.fr) * Laboratoire MASI - Institut Blaise Pascal * Universite Pierre et Marie Curie (Paris VI) * * from * * linux/fs/minix/inode.c * * Copyright (C) 1991, 1992 Linus Torvalds * * Goal-directed block allocation by Stephen Tweedie * (sct@redhat.com), 1993, 1998 * Big-endian to little-endian byte-swapping/bitmaps by * David S. Miller (davem@caip.rutgers.edu), 1995 * 64-bit file support on 64-bit platforms by Jakub Jelinek * (jj@sunsite.ms.mff.cuni.cz) * * Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000 */#include <linux/fs.h>#include <linux/sched.h>#include <linux/ext3_jbd.h>#include <linux/jbd.h>#include <linux/locks.h>#include <linux/smp_lock.h>#include <linux/highuid.h>#include <linux/quotaops.h>#include <linux/module.h>/* * SEARCH_FROM_ZERO forces each block allocation to search from the start * of the filesystem. This is to force rapid reallocation of recently-freed * blocks. The file fragmentation is horrendous. */#undef SEARCH_FROM_ZERO/* The ext3 forget function must perform a revoke if we are freeing data * which has been journaled. Metadata (eg. indirect blocks) must be * revoked in all cases. * * "bh" may be NULL: a metadata block may have been freed from memory * but there may still be a record of it in the journal, and that record * still needs to be revoked. */static int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode, struct buffer_head *bh, int blocknr){ int err; BUFFER_TRACE(bh, "enter"); jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " "data mode %lx\n", bh, is_metadata, inode->i_mode, test_opt(inode->i_sb, DATA_FLAGS)); /* Never use the revoke function if we are doing full data * journaling: there is no need to, and a V1 superblock won't * support it. Otherwise, only skip the revoke on un-journaled * data blocks. */ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA || (!is_metadata && !ext3_should_journal_data(inode))) { if (bh) { BUFFER_TRACE(bh, "call journal_forget"); ext3_journal_forget(handle, bh); } return 0; } /* * data!=journal && (is_metadata || should_journal_data(inode)) */ BUFFER_TRACE(bh, "call ext3_journal_revoke"); err = ext3_journal_revoke(handle, blocknr, bh); if (err) ext3_abort(inode->i_sb, __FUNCTION__, "error %d when attempting revoke", err); BUFFER_TRACE(bh, "exit"); return err;}/* * Truncate transactions can be complex and absolutely huge. So we need to * be able to restart the transaction at a conventient checkpoint to make * sure we don't overflow the journal. * * start_transaction gets us a new handle for a truncate transaction, * and extend_transaction tries to extend the existing one a bit. If * extend fails, we need to propagate the failure up and restart the * transaction in the top-level truncate loop. --sct */static handle_t *start_transaction(struct inode *inode) { long needed; handle_t *result; needed = inode->i_blocks; if (needed > EXT3_MAX_TRANS_DATA) needed = EXT3_MAX_TRANS_DATA; result = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS + needed); if (!IS_ERR(result)) return result; ext3_std_error(inode->i_sb, PTR_ERR(result)); return result;}/* * Try to extend this transaction for the purposes of truncation. * * Returns 0 if we managed to create more room. If we can't create more * room, and the transaction must be restarted we return 1. */static int try_to_extend_transaction(handle_t *handle, struct inode *inode){ long needed; if (handle->h_buffer_credits > EXT3_RESERVE_TRANS_BLOCKS) return 0; needed = inode->i_blocks; if (needed > EXT3_MAX_TRANS_DATA) needed = EXT3_MAX_TRANS_DATA; if (!ext3_journal_extend(handle, EXT3_RESERVE_TRANS_BLOCKS + needed)) return 0; return 1;}/* * Restart the transaction associated with *handle. This does a commit, * so before we call here everything must be consistently dirtied against * this transaction. */static int ext3_journal_test_restart(handle_t *handle, struct inode *inode){ long needed = inode->i_blocks; if (needed > EXT3_MAX_TRANS_DATA) needed = EXT3_MAX_TRANS_DATA; jbd_debug(2, "restarting handle %p\n", handle); return ext3_journal_restart(handle, EXT3_DATA_TRANS_BLOCKS + needed);}/* * Called at each iput() */void ext3_put_inode (struct inode * inode){ ext3_discard_prealloc (inode);}/* * Called at the last iput() if i_nlink is zero. */void ext3_delete_inode (struct inode * inode){ handle_t *handle; if (is_bad_inode(inode) || inode->i_ino == EXT3_ACL_IDX_INO || inode->i_ino == EXT3_ACL_DATA_INO) goto no_delete; lock_kernel(); handle = start_transaction(inode); if (IS_ERR(handle)) { /* If we're going to skip the normal cleanup, we still * need to make sure that the in-core orphan linked list * is properly cleaned up. */ ext3_orphan_del(NULL, inode); ext3_std_error(inode->i_sb, PTR_ERR(handle)); unlock_kernel(); goto no_delete; } if (IS_SYNC(inode)) handle->h_sync = 1; inode->i_size = 0; if (inode->i_blocks) ext3_truncate(inode); /* * Kill off the orphan record which ext3_truncate created. * AKPM: I think this can be inside the above `if'. * Note that ext3_orphan_del() has to be able to cope with the * deletion of a non-existent orphan - this is because we don't * know if ext3_truncate() actually created an orphan record. * (Well, we could do this if we need to, but heck - it works) */ ext3_orphan_del(handle, inode); inode->u.ext3_i.i_dtime = CURRENT_TIME; /* * One subtle ordering requirement: if anything has gone wrong * (transaction abort, IO errors, whatever), then we can still * do these next steps (the fs will already have been marked as * having errors), but we can't free the inode if the mark_dirty * fails. */ if (ext3_mark_inode_dirty(handle, inode)) /* If that failed, just do the required in-core inode clear. */ clear_inode(inode); else ext3_free_inode(handle, inode); ext3_journal_stop(handle, inode); unlock_kernel(); return;no_delete: clear_inode(inode); /* We must guarantee clearing of inode... */}void ext3_discard_prealloc (struct inode * inode){#ifdef EXT3_PREALLOCATE lock_kernel(); /* Writer: ->i_prealloc* */ if (inode->u.ext3_i.i_prealloc_count) { unsigned short total = inode->u.ext3_i.i_prealloc_count; unsigned long block = inode->u.ext3_i.i_prealloc_block; inode->u.ext3_i.i_prealloc_count = 0; inode->u.ext3_i.i_prealloc_block = 0; /* Writer: end */ ext3_free_blocks (inode, block, total); } unlock_kernel();#endif}static int ext3_alloc_block (handle_t *handle, struct inode * inode, unsigned long goal, int *err){#ifdef EXT3FS_DEBUG static unsigned long alloc_hits = 0, alloc_attempts = 0;#endif unsigned long result;#ifdef EXT3_PREALLOCATE /* Writer: ->i_prealloc* */ if (inode->u.ext3_i.i_prealloc_count && (goal == inode->u.ext3_i.i_prealloc_block || goal + 1 == inode->u.ext3_i.i_prealloc_block)) { result = inode->u.ext3_i.i_prealloc_block++; inode->u.ext3_i.i_prealloc_count--; /* Writer: end */ ext3_debug ("preallocation hit (%lu/%lu).\n", ++alloc_hits, ++alloc_attempts); } else { ext3_discard_prealloc (inode); ext3_debug ("preallocation miss (%lu/%lu).\n", alloc_hits, ++alloc_attempts); if (S_ISREG(inode->i_mode)) result = ext3_new_block (inode, goal, &inode->u.ext3_i.i_prealloc_count, &inode->u.ext3_i.i_prealloc_block, err); else result = ext3_new_block (inode, goal, 0, 0, err); /* * AKPM: this is somewhat sticky. I'm not surprised it was * disabled in 2.2's ext3. Need to integrate b_committed_data * guarding with preallocation, if indeed preallocation is * effective. */ }#else result = ext3_new_block (handle, inode, goal, 0, 0, err);#endif return result;}typedef struct { u32 *p; u32 key; struct buffer_head *bh;} Indirect;static inline void add_chain(Indirect *p, struct buffer_head *bh, u32 *v){ p->key = *(p->p = v); p->bh = bh;}static inline int verify_chain(Indirect *from, Indirect *to){ while (from <= to && from->key == *from->p) from++; return (from > to);}/** * ext3_block_to_path - parse the block number into array of offsets * @inode: inode in question (we are only interested in its superblock) * @i_block: block number to be parsed * @offsets: array to store the offsets in * * To store the locations of file's data ext3 uses a data structure common * for UNIX filesystems - tree of pointers anchored in the inode, with * data blocks at leaves and indirect blocks in intermediate nodes. * This function translates the block number into path in that tree - * return value is the path length and @offsets[n] is the offset of * pointer to (n+1)th node in the nth one. If @block is out of range * (negative or too large) warning is printed and zero returned. * * Note: function doesn't find node addresses, so no IO is needed. All * we need to know is the capacity of indirect blocks (taken from the * inode->i_sb). *//* * Portability note: the last comparison (check that we fit into triple * indirect block) is spelled differently, because otherwise on an * architecture with 32-bit longs and 8Kb pages we might get into trouble * if our filesystem had 8Kb blocks. We might use long long, but that would * kill us on x86. Oh, well, at least the sign propagation does not matter - * i_block would have to be negative in the very beginning, so we would not * get there at all. */static int ext3_block_to_path(struct inode *inode, long i_block, int offsets[4]){ int ptrs = EXT3_ADDR_PER_BLOCK(inode->i_sb); int ptrs_bits = EXT3_ADDR_PER_BLOCK_BITS(inode->i_sb); const long direct_blocks = EXT3_NDIR_BLOCKS, indirect_blocks = ptrs, double_blocks = (1 << (ptrs_bits * 2)); int n = 0; if (i_block < 0) { ext3_warning (inode->i_sb, "ext3_block_to_path", "block < 0"); } else if (i_block < direct_blocks) { offsets[n++] = i_block; } else if ( (i_block -= direct_blocks) < indirect_blocks) { offsets[n++] = EXT3_IND_BLOCK; offsets[n++] = i_block; } else if ((i_block -= indirect_blocks) < double_blocks) { offsets[n++] = EXT3_DIND_BLOCK; offsets[n++] = i_block >> ptrs_bits; offsets[n++] = i_block & (ptrs - 1); } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { offsets[n++] = EXT3_TIND_BLOCK; offsets[n++] = i_block >> (ptrs_bits * 2); offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); offsets[n++] = i_block & (ptrs - 1); } else { ext3_warning (inode->i_sb, "ext3_block_to_path", "block > big"); } return n;}/** * ext3_get_branch - read the chain of indirect blocks leading to data * @inode: inode in question * @depth: depth of the chain (1 - direct pointer, etc.) * @offsets: offsets of pointers in inode/indirect blocks * @chain: place to store the result * @err: here we store the error value * * Function fills the array of triples <key, p, bh> and returns %NULL * if everything went OK or the pointer to the last filled triple * (incomplete one) otherwise. Upon the return chain[i].key contains * the number of (i+1)-th block in the chain (as it is stored in memory, * i.e. little-endian 32-bit), chain[i].p contains the address of that * number (it points into struct inode for i==0 and into the bh->b_data * for i>0) and chain[i].bh points to the buffer_head of i-th indirect * block for i>0 and NULL for i==0. In other words, it holds the block * numbers of the chain, addresses they were taken from (and where we can * verify that chain did not change) and buffer_heads hosting these * numbers. * * Function stops when it stumbles upon zero pointer (absent block) * (pointer to last triple returned, *@err == 0) * or when it gets an IO error reading an indirect block * (ditto, *@err == -EIO) * or when it notices that chain had been changed while it was reading * (ditto, *@err == -EAGAIN) * or when it reads all @depth-1 indirect blocks successfully and finds * the whole chain, all way to the data (returns %NULL, *err == 0). */static Indirect *ext3_get_branch(struct inode *inode, int depth, int *offsets, Indirect chain[4], int *err){ struct super_block *sb = inode->i_sb; Indirect *p = chain; struct buffer_head *bh; *err = 0; /* i_data is not going away, no lock needed */ add_chain (chain, NULL, inode->u.ext3_i.i_data + *offsets); if (!p->key) goto no_block; while (--depth) { bh = sb_bread(sb, le32_to_cpu(p->key)); if (!bh) goto failure; /* Reader: pointers */ if (!verify_chain(chain, p)) goto changed; add_chain(++p, bh, (u32*)bh->b_data + *++offsets); /* Reader: end */ if (!p->key) goto no_block; } return NULL;changed: *err = -EAGAIN; goto no_block;failure: *err = -EIO;no_block: return p;}/** * ext3_find_near - find a place for allocation with sufficient locality * @inode: owner * @ind: descriptor of indirect block. * * This function returns the prefered place for block allocation. * It is used when heuristic for sequential allocation fails. * Rules are:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -