📄 inode.c
字号:
/* * linux/fs/ext3/inode.c * * Copyright (C) 1992, 1993, 1994, 1995 * Remy Card (card@masi.ibp.fr) * Laboratoire MASI - Institut Blaise Pascal * Universite Pierre et Marie Curie (Paris VI) * * from * * linux/fs/minix/inode.c * * Copyright (C) 1991, 1992 Linus Torvalds * * Goal-directed block allocation by Stephen Tweedie * (sct@redhat.com), 1993, 1998 * Big-endian to little-endian byte-swapping/bitmaps by * David S. Miller (davem@caip.rutgers.edu), 1995 * 64-bit file support on 64-bit platforms by Jakub Jelinek * (jj@sunsite.ms.mff.cuni.cz) * * Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000 */#include <linux/module.h>#include <linux/fs.h>#include <linux/time.h>#include <linux/ext3_jbd.h>#include <linux/jbd.h>#include <linux/highuid.h>#include <linux/pagemap.h>#include <linux/quotaops.h>#include <linux/string.h>#include <linux/buffer_head.h>#include <linux/writeback.h>#include <linux/mpage.h>#include <linux/uio.h>#include <linux/bio.h>#include "xattr.h"#include "acl.h"static int ext3_writepage_trans_blocks(struct inode *inode);/* * Test whether an inode is a fast symlink. */static int ext3_inode_is_fast_symlink(struct inode *inode){ int ea_blocks = EXT3_I(inode)->i_file_acl ? (inode->i_sb->s_blocksize >> 9) : 0; return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);}/* * The ext3 forget function must perform a revoke if we are freeing data * which has been journaled. Metadata (eg. indirect blocks) must be * revoked in all cases. * * "bh" may be NULL: a metadata block may have been freed from memory * but there may still be a record of it in the journal, and that record * still needs to be revoked. */int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode, struct buffer_head *bh, ext3_fsblk_t blocknr){ int err; might_sleep(); BUFFER_TRACE(bh, "enter"); jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " "data mode %lx\n", bh, is_metadata, inode->i_mode, test_opt(inode->i_sb, DATA_FLAGS)); /* Never use the revoke function if we are doing full data * journaling: there is no need to, and a V1 superblock won't * support it. Otherwise, only skip the revoke on un-journaled * data blocks. */ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA || (!is_metadata && !ext3_should_journal_data(inode))) { if (bh) { BUFFER_TRACE(bh, "call journal_forget"); return ext3_journal_forget(handle, bh); } return 0; } /* * data!=journal && (is_metadata || should_journal_data(inode)) */ BUFFER_TRACE(bh, "call ext3_journal_revoke"); err = ext3_journal_revoke(handle, blocknr, bh); if (err) ext3_abort(inode->i_sb, __FUNCTION__, "error %d when attempting revoke", err); BUFFER_TRACE(bh, "exit"); return err;}/* * Work out how many blocks we need to proceed with the next chunk of a * truncate transaction. */static unsigned long blocks_for_truncate(struct inode *inode){ unsigned long needed; needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); /* Give ourselves just enough room to cope with inodes in which * i_blocks is corrupt: we've seen disk corruptions in the past * which resulted in random data in an inode which looked enough * like a regular file for ext3 to try to delete it. Things * will go a bit crazy if that happens, but at least we should * try not to panic the whole kernel. */ if (needed < 2) needed = 2; /* But we need to bound the transaction so we don't overflow the * journal. */ if (needed > EXT3_MAX_TRANS_DATA) needed = EXT3_MAX_TRANS_DATA; return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed;}/* * Truncate transactions can be complex and absolutely huge. So we need to * be able to restart the transaction at a conventient checkpoint to make * sure we don't overflow the journal. * * start_transaction gets us a new handle for a truncate transaction, * and extend_transaction tries to extend the existing one a bit. If * extend fails, we need to propagate the failure up and restart the * transaction in the top-level truncate loop. --sct */static handle_t *start_transaction(struct inode *inode){ handle_t *result; result = ext3_journal_start(inode, blocks_for_truncate(inode)); if (!IS_ERR(result)) return result; ext3_std_error(inode->i_sb, PTR_ERR(result)); return result;}/* * Try to extend this transaction for the purposes of truncation. * * Returns 0 if we managed to create more room. If we can't create more * room, and the transaction must be restarted we return 1. */static int try_to_extend_transaction(handle_t *handle, struct inode *inode){ if (handle->h_buffer_credits > EXT3_RESERVE_TRANS_BLOCKS) return 0; if (!ext3_journal_extend(handle, blocks_for_truncate(inode))) return 0; return 1;}/* * Restart the transaction associated with *handle. This does a commit, * so before we call here everything must be consistently dirtied against * this transaction. */static int ext3_journal_test_restart(handle_t *handle, struct inode *inode){ jbd_debug(2, "restarting handle %p\n", handle); return ext3_journal_restart(handle, blocks_for_truncate(inode));}/* * Called at the last iput() if i_nlink is zero. */void ext3_delete_inode (struct inode * inode){ handle_t *handle; truncate_inode_pages(&inode->i_data, 0); if (is_bad_inode(inode)) goto no_delete; handle = start_transaction(inode); if (IS_ERR(handle)) { /* * If we're going to skip the normal cleanup, we still need to * make sure that the in-core orphan linked list is properly * cleaned up. */ ext3_orphan_del(NULL, inode); goto no_delete; } if (IS_SYNC(inode)) handle->h_sync = 1; inode->i_size = 0; if (inode->i_blocks) ext3_truncate(inode); /* * Kill off the orphan record which ext3_truncate created. * AKPM: I think this can be inside the above `if'. * Note that ext3_orphan_del() has to be able to cope with the * deletion of a non-existent orphan - this is because we don't * know if ext3_truncate() actually created an orphan record. * (Well, we could do this if we need to, but heck - it works) */ ext3_orphan_del(handle, inode); EXT3_I(inode)->i_dtime = get_seconds(); /* * One subtle ordering requirement: if anything has gone wrong * (transaction abort, IO errors, whatever), then we can still * do these next steps (the fs will already have been marked as * having errors), but we can't free the inode if the mark_dirty * fails. */ if (ext3_mark_inode_dirty(handle, inode)) /* If that failed, just do the required in-core inode clear. */ clear_inode(inode); else ext3_free_inode(handle, inode); ext3_journal_stop(handle); return;no_delete: clear_inode(inode); /* We must guarantee clearing of inode... */}typedef struct { __le32 *p; __le32 key; struct buffer_head *bh;} Indirect;static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v){ p->key = *(p->p = v); p->bh = bh;}static int verify_chain(Indirect *from, Indirect *to){ while (from <= to && from->key == *from->p) from++; return (from > to);}/** * ext3_block_to_path - parse the block number into array of offsets * @inode: inode in question (we are only interested in its superblock) * @i_block: block number to be parsed * @offsets: array to store the offsets in * @boundary: set this non-zero if the referred-to block is likely to be * followed (on disk) by an indirect block. * * To store the locations of file's data ext3 uses a data structure common * for UNIX filesystems - tree of pointers anchored in the inode, with * data blocks at leaves and indirect blocks in intermediate nodes. * This function translates the block number into path in that tree - * return value is the path length and @offsets[n] is the offset of * pointer to (n+1)th node in the nth one. If @block is out of range * (negative or too large) warning is printed and zero returned. * * Note: function doesn't find node addresses, so no IO is needed. All * we need to know is the capacity of indirect blocks (taken from the * inode->i_sb). *//* * Portability note: the last comparison (check that we fit into triple * indirect block) is spelled differently, because otherwise on an * architecture with 32-bit longs and 8Kb pages we might get into trouble * if our filesystem had 8Kb blocks. We might use long long, but that would * kill us on x86. Oh, well, at least the sign propagation does not matter - * i_block would have to be negative in the very beginning, so we would not * get there at all. */static int ext3_block_to_path(struct inode *inode, long i_block, int offsets[4], int *boundary){ int ptrs = EXT3_ADDR_PER_BLOCK(inode->i_sb); int ptrs_bits = EXT3_ADDR_PER_BLOCK_BITS(inode->i_sb); const long direct_blocks = EXT3_NDIR_BLOCKS, indirect_blocks = ptrs, double_blocks = (1 << (ptrs_bits * 2)); int n = 0; int final = 0; if (i_block < 0) { ext3_warning (inode->i_sb, "ext3_block_to_path", "block < 0"); } else if (i_block < direct_blocks) { offsets[n++] = i_block; final = direct_blocks; } else if ( (i_block -= direct_blocks) < indirect_blocks) { offsets[n++] = EXT3_IND_BLOCK; offsets[n++] = i_block; final = ptrs; } else if ((i_block -= indirect_blocks) < double_blocks) { offsets[n++] = EXT3_DIND_BLOCK; offsets[n++] = i_block >> ptrs_bits; offsets[n++] = i_block & (ptrs - 1); final = ptrs; } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { offsets[n++] = EXT3_TIND_BLOCK; offsets[n++] = i_block >> (ptrs_bits * 2); offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); offsets[n++] = i_block & (ptrs - 1); final = ptrs; } else { ext3_warning(inode->i_sb, "ext3_block_to_path", "block > big"); } if (boundary) *boundary = final - 1 - (i_block & (ptrs - 1)); return n;}/** * ext3_get_branch - read the chain of indirect blocks leading to data * @inode: inode in question * @depth: depth of the chain (1 - direct pointer, etc.) * @offsets: offsets of pointers in inode/indirect blocks * @chain: place to store the result * @err: here we store the error value * * Function fills the array of triples <key, p, bh> and returns %NULL * if everything went OK or the pointer to the last filled triple * (incomplete one) otherwise. Upon the return chain[i].key contains * the number of (i+1)-th block in the chain (as it is stored in memory, * i.e. little-endian 32-bit), chain[i].p contains the address of that * number (it points into struct inode for i==0 and into the bh->b_data * for i>0) and chain[i].bh points to the buffer_head of i-th indirect * block for i>0 and NULL for i==0. In other words, it holds the block * numbers of the chain, addresses they were taken from (and where we can * verify that chain did not change) and buffer_heads hosting these * numbers. * * Function stops when it stumbles upon zero pointer (absent block) * (pointer to last triple returned, *@err == 0) * or when it gets an IO error reading an indirect block * (ditto, *@err == -EIO) * or when it notices that chain had been changed while it was reading * (ditto, *@err == -EAGAIN) * or when it reads all @depth-1 indirect blocks successfully and finds * the whole chain, all way to the data (returns %NULL, *err == 0). */static Indirect *ext3_get_branch(struct inode *inode, int depth, int *offsets, Indirect chain[4], int *err){ struct super_block *sb = inode->i_sb; Indirect *p = chain; struct buffer_head *bh; *err = 0; /* i_data is not going away, no lock needed */ add_chain (chain, NULL, EXT3_I(inode)->i_data + *offsets); if (!p->key) goto no_block; while (--depth) { bh = sb_bread(sb, le32_to_cpu(p->key)); if (!bh) goto failure; /* Reader: pointers */ if (!verify_chain(chain, p)) goto changed; add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); /* Reader: end */ if (!p->key) goto no_block; } return NULL;changed: brelse(bh); *err = -EAGAIN; goto no_block;failure: *err = -EIO;no_block: return p;}/** * ext3_find_near - find a place for allocation with sufficient locality * @inode: owner * @ind: descriptor of indirect block. * * This function returns the prefered place for block allocation. * It is used when heuristic for sequential allocation fails. * Rules are: * + if there is a block to the left of our position - allocate near it. * + if pointer will live in indirect block - allocate near that block. * + if pointer will live in inode - allocate in the same * cylinder group. * * In the latter case we colour the starting block by the callers PID to * prevent it from clashing with concurrent allocations for a different inode * in the same block group. The PID is used here so that functionally related * files will be close-by on-disk. * * Caller must make sure that @ind is valid and will stay that way. */static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind){ struct ext3_inode_info *ei = EXT3_I(inode); __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data; __le32 *p; ext3_fsblk_t bg_start; ext3_grpblk_t colour; /* Try to find previous block */ for (p = ind->p - 1; p >= start; p--) { if (*p) return le32_to_cpu(*p); } /* No such thing, so let's try location of indirect block */ if (ind->bh) return ind->bh->b_blocknr; /* * It is going to be referred to from the inode itself? OK, just put it * into the same cylinder group then. */ bg_start = ext3_group_first_block_no(inode->i_sb, ei->i_block_group); colour = (current->pid % 16) * (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); return bg_start + colour;}/** * ext3_find_goal - find a prefered place for allocation. * @inode: owner * @block: block we want * @chain: chain of indirect blocks * @partial: pointer to the last triple within a chain * @goal: place to store the result. * * Normally this function find the prefered place for block allocation,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -