inode.c
来自「Linux Kernel 2.6.9 for OMAP1710」· C语言 代码 · 共 2,219 行 · 第 1/5 页
C
2,219 行
if (handle->h_transaction->t_state == T_LOCKED) { /* * Huge direct-io writes can hold off commits for long * periods of time. Let this commit run. */ ext3_journal_stop(handle); handle = ext3_journal_start(inode, DIO_CREDITS); if (IS_ERR(handle)) ret = PTR_ERR(handle); goto get_block; } if (handle->h_buffer_credits <= EXT3_RESERVE_TRANS_BLOCKS) { /* * Getting low on buffer credits... */ ret = ext3_journal_extend(handle, DIO_CREDITS); if (ret > 0) { /* * Couldn't extend the transaction. Start a new one. */ ret = ext3_journal_restart(handle, DIO_CREDITS); } }get_block: if (ret == 0) ret = ext3_get_block_handle(handle, inode, iblock, bh_result, create, 0); bh_result->b_size = (1 << inode->i_blkbits); return ret;}/* * `handle' can be NULL if create is zero */struct buffer_head *ext3_getblk(handle_t *handle, struct inode * inode, long block, int create, int * errp){ struct buffer_head dummy; int fatal = 0, err; J_ASSERT(handle != NULL || create == 0); dummy.b_state = 0; dummy.b_blocknr = -1000; buffer_trace_init(&dummy.b_history); *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1); if (!*errp && buffer_mapped(&dummy)) { struct buffer_head *bh; bh = sb_getblk(inode->i_sb, dummy.b_blocknr); if (buffer_new(&dummy)) { J_ASSERT(create != 0); J_ASSERT(handle != 0); /* Now that we do not always journal data, we should keep in mind whether this should always journal the new buffer as metadata. For now, regular file writes use ext3_get_block instead, so it's not a problem. */ lock_buffer(bh); BUFFER_TRACE(bh, "call get_create_access"); fatal = ext3_journal_get_create_access(handle, bh); if (!fatal && !buffer_uptodate(bh)) { memset(bh->b_data, 0, inode->i_sb->s_blocksize); set_buffer_uptodate(bh); } unlock_buffer(bh); BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); err = ext3_journal_dirty_metadata(handle, bh); if (!fatal) fatal = err; } else { BUFFER_TRACE(bh, "not a new buffer"); } if (fatal) { *errp = fatal; brelse(bh); bh = NULL; } return bh; } return NULL;}struct buffer_head *ext3_bread(handle_t *handle, struct inode * inode, int block, int create, int *err){ struct buffer_head * bh; int prev_blocks; prev_blocks = inode->i_blocks; bh = ext3_getblk (handle, inode, block, create, err); if (!bh) return bh;#ifdef EXT3_PREALLOCATE /* * If the inode has grown, and this is a directory, then use a few * more of the preallocated blocks to keep directory fragmentation * down. The preallocated blocks are guaranteed to be contiguous. */ if (create && S_ISDIR(inode->i_mode) && inode->i_blocks > prev_blocks && EXT3_HAS_COMPAT_FEATURE(inode->i_sb, EXT3_FEATURE_COMPAT_DIR_PREALLOC)) { int i; struct buffer_head *tmp_bh; for (i = 1; EXT3_I(inode)->i_prealloc_count && i < EXT3_SB(inode->i_sb)->s_es->s_prealloc_dir_blocks; i++) { /* * ext3_getblk will zero out the contents of the * directory for us */ tmp_bh = ext3_getblk(handle, inode, block+i, create, err); if (!tmp_bh) { brelse (bh); return 0; } brelse (tmp_bh); } }#endif if (buffer_uptodate(bh)) return bh; ll_rw_block (READ, 1, &bh); wait_on_buffer (bh); if (buffer_uptodate(bh)) return bh; brelse (bh); *err = -EIO; return NULL;}static int walk_page_buffers( handle_t *handle, struct buffer_head *head, unsigned from, unsigned to, int *partial, int (*fn)( handle_t *handle, struct buffer_head *bh)){ struct buffer_head *bh; unsigned block_start, block_end; unsigned blocksize = head->b_size; int err, ret = 0; struct buffer_head *next; for ( bh = head, block_start = 0; ret == 0 && (bh != head || !block_start); block_start = block_end, bh = next) { next = bh->b_this_page; block_end = block_start + blocksize; if (block_end <= from || block_start >= to) { if (partial && !buffer_uptodate(bh)) *partial = 1; continue; } err = (*fn)(handle, bh); if (!ret) ret = err; } return ret;}/* * To preserve ordering, it is essential that the hole instantiation and * the data write be encapsulated in a single transaction. We cannot * close off a transaction and start a new one between the ext3_get_block() * and the commit_write(). So doing the journal_start at the start of * prepare_write() is the right place. * * Also, this function can nest inside ext3_writepage() -> * block_write_full_page(). In that case, we *know* that ext3_writepage() * has generated enough buffer credits to do the whole page. So we won't * block on the journal in that case, which is good, because the caller may * be PF_MEMALLOC. * * By accident, ext3 can be reentered when a transaction is open via * quota file writes. If we were to commit the transaction while thus * reentered, there can be a deadlock - we would be holding a quota * lock, and the commit would never complete if another thread had a * transaction open and was blocking on the quota lock - a ranking * violation. * * So what we do is to rely on the fact that journal_stop/journal_start * will _not_ run commit under these circumstances because handle->h_ref * is elevated. We'll still have enough credits for the tiny quotafile * write. */static int do_journal_get_write_access(handle_t *handle, struct buffer_head *bh){ if (!buffer_mapped(bh) || buffer_freed(bh)) return 0; return ext3_journal_get_write_access(handle, bh);}static int ext3_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to){ struct inode *inode = page->mapping->host; int ret, needed_blocks = ext3_writepage_trans_blocks(inode); handle_t *handle; int retries = 0;retry: handle = ext3_journal_start(inode, needed_blocks); if (IS_ERR(handle)) { ret = PTR_ERR(handle); goto out; } ret = block_prepare_write(page, from, to, ext3_get_block); if (ret) goto prepare_write_failed; if (ext3_should_journal_data(inode)) { ret = walk_page_buffers(handle, page_buffers(page), from, to, NULL, do_journal_get_write_access); }prepare_write_failed: if (ret) ext3_journal_stop(handle); if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) goto retry;out: return ret;}static intext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh){ int err = journal_dirty_data(handle, bh); if (err) ext3_journal_abort_handle(__FUNCTION__, __FUNCTION__, bh, handle,err); return err;}/* For commit_write() in data=journal mode */static int commit_write_fn(handle_t *handle, struct buffer_head *bh){ if (!buffer_mapped(bh) || buffer_freed(bh)) return 0; set_buffer_uptodate(bh); return ext3_journal_dirty_metadata(handle, bh);}/* * We need to pick up the new inode size which generic_commit_write gave us * `file' can be NULL - eg, when called from page_symlink(). * * ext3 never places buffers on inode->i_mapping->private_list. metadata * buffers are managed internally. */static int ext3_ordered_commit_write(struct file *file, struct page *page, unsigned from, unsigned to){ handle_t *handle = ext3_journal_current_handle(); struct inode *inode = page->mapping->host; int ret = 0, ret2; ret = walk_page_buffers(handle, page_buffers(page), from, to, NULL, ext3_journal_dirty_data); if (ret == 0) { /* * generic_commit_write() will run mark_inode_dirty() if i_size * changes. So let's piggyback the i_disksize mark_inode_dirty * into that. */ loff_t new_i_size; new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; if (new_i_size > EXT3_I(inode)->i_disksize) EXT3_I(inode)->i_disksize = new_i_size; ret = generic_commit_write(file, page, from, to); } ret2 = ext3_journal_stop(handle); if (!ret) ret = ret2; return ret;}static int ext3_writeback_commit_write(struct file *file, struct page *page, unsigned from, unsigned to){ handle_t *handle = ext3_journal_current_handle(); struct inode *inode = page->mapping->host; int ret = 0, ret2; loff_t new_i_size; new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; if (new_i_size > EXT3_I(inode)->i_disksize) EXT3_I(inode)->i_disksize = new_i_size; ret = generic_commit_write(file, page, from, to); ret2 = ext3_journal_stop(handle); if (!ret) ret = ret2; return ret;}static int ext3_journalled_commit_write(struct file *file, struct page *page, unsigned from, unsigned to){ handle_t *handle = ext3_journal_current_handle(); struct inode *inode = page->mapping->host; int ret = 0, ret2; int partial = 0; loff_t pos; /* * Here we duplicate the generic_commit_write() functionality */ pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; ret = walk_page_buffers(handle, page_buffers(page), from, to, &partial, commit_write_fn); if (!partial) SetPageUptodate(page); if (pos > inode->i_size) i_size_write(inode, pos); EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; if (inode->i_size > EXT3_I(inode)->i_disksize) { EXT3_I(inode)->i_disksize = inode->i_size; ret2 = ext3_mark_inode_dirty(handle, inode); if (!ret) ret = ret2; } ret2 = ext3_journal_stop(handle); if (!ret) ret = ret2; return ret;}/* * bmap() is special. It gets used by applications such as lilo and by * the swapper to find the on-disk block of a specific piece of data. * * Naturally, this is dangerous if the block concerned is still in the * journal. If somebody makes a swapfile on an ext3 data-journaling * filesystem and enables swap, then they may get a nasty shock when the * data getting swapped to that swapfile suddenly gets overwritten by * the original zero's written out previously to the journal and * awaiting writeback in the kernel's buffer cache. * * So, if we see any bmap calls here on a modified, data-journaled file, * take extra steps to flush any blocks which might be in the cache. */static sector_t ext3_bmap(struct address_space *mapping, sector_t block){ struct inode *inode = mapping->host; journal_t *journal; int err; if (EXT3_I(inode)->i_state & EXT3_STATE_JDATA) { /* * This is a REALLY heavyweight approach, but the use of * bmap on dirty files is expected to be extremely rare: * only if we run lilo or swapon on a freshly made file * do we expect this to happen. * * (bmap requires CAP_SYS_RAWIO so this does not * represent an unprivileged user DOS attack --- we'd be * in trouble if mortal users could trigger this path at * will.) * * NB. EXT3_STATE_JDATA is not set on files other than * regular files. If somebody wants to bmap a directory * or symlink and gets confused because the buffer * hasn't yet been flushed to disk, they deserve * everything they get. */ EXT3_I(inode)->i_state &= ~EXT3_STATE_JDATA; journal = EXT3_JOURNAL(inode); journal_lock_updates(journal); err = journal_flush(journal); journal_unlock_updates(journal); if (err) return 0; } return generic_block_bmap(mapping,block,ext3_get_block);}static int bget_one(handle_t *handle, struct buffer_head *bh){ get_bh(bh); return 0;}static int bput_one(handle_t *handle, struct buffer_head *bh){ put_bh(bh); return 0;}static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh){ if (buffer_mapped(bh)) return ext3_journal_dirty_data(handle, bh); return 0;}/* * Note that we always start a transaction even if we're not journalling * data. This is to preserve ordering: any hole instantiation within * __block_write_full_page -> ext3_get_block() should be journalled * along with the data so we don't crash and then get metadata which * refers to old data. * * In all journalling modes block_write_full_page() will start the I/O. * * Problem: * * ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> * ext3_writepage() * * Similar for: * * ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ... * * Same applies to ext3_get_block(). We will deadlock on various things like * lock_journal and i_truncate_sem. * * Setting PF_MEMALLOC here doesn't work - too many internal memory * allocations fail. * * 16May01: If we're reentered then journal_current_handle() will be * non-zero. We simply *return*. * * 1 July 2001: @@@ FIXME: * In journalled data mode, a data buffer may be metadata against the
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?