📄 inode.c
字号:
if (buffer_new(&dummy)) { J_ASSERT(create != 0); J_ASSERT(handle != 0); /* Now that we do not always journal data, we should keep in mind whether this should always journal the new buffer as metadata. For now, regular file writes use ext3_get_block instead, so it's not a problem. */ lock_kernel(); lock_buffer(bh); BUFFER_TRACE(bh, "call get_create_access"); fatal = ext3_journal_get_create_access(handle, bh); if (!fatal) { memset(bh->b_data, 0, inode->i_sb->s_blocksize); mark_buffer_uptodate(bh, 1); } unlock_buffer(bh); BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); err = ext3_journal_dirty_metadata(handle, bh); if (!fatal) fatal = err; unlock_kernel(); } else { BUFFER_TRACE(bh, "not a new buffer"); } if (fatal) { *errp = fatal; brelse(bh); bh = NULL; } return bh; } return NULL;}struct buffer_head *ext3_bread(handle_t *handle, struct inode * inode, int block, int create, int *err){ struct buffer_head * bh; int prev_blocks; prev_blocks = inode->i_blocks; bh = ext3_getblk (handle, inode, block, create, err); if (!bh) return bh;#ifdef EXT3_PREALLOCATE /* * If the inode has grown, and this is a directory, then use a few * more of the preallocated blocks to keep directory fragmentation * down. The preallocated blocks are guaranteed to be contiguous. */ if (create && S_ISDIR(inode->i_mode) && inode->i_blocks > prev_blocks && EXT3_HAS_COMPAT_FEATURE(inode->i_sb, EXT3_FEATURE_COMPAT_DIR_PREALLOC)) { int i; struct buffer_head *tmp_bh; for (i = 1; inode->u.ext3_i.i_prealloc_count && i < EXT3_SB(inode->i_sb)->s_es->s_prealloc_dir_blocks; i++) { /* * ext3_getblk will zero out the contents of the * directory for us */ tmp_bh = ext3_getblk(handle, inode, block+i, create, err); if (!tmp_bh) { brelse (bh); return 0; } brelse (tmp_bh); } }#endif if (buffer_uptodate(bh)) return bh; ll_rw_block (READ, 1, &bh); wait_on_buffer (bh); if (buffer_uptodate(bh)) return bh; brelse (bh); *err = -EIO; return NULL;}static int walk_page_buffers( handle_t *handle, struct buffer_head *head, unsigned from, unsigned to, int *partial, int (*fn)( handle_t *handle, struct buffer_head *bh)){ struct buffer_head *bh; unsigned block_start, block_end; unsigned blocksize = head->b_size; int err, ret = 0; for ( bh = head, block_start = 0; ret == 0 && (bh != head || !block_start); block_start = block_end, bh = bh->b_this_page) { block_end = block_start + blocksize; if (block_end <= from || block_start >= to) { if (partial && !buffer_uptodate(bh)) *partial = 1; continue; } err = (*fn)(handle, bh); if (!ret) ret = err; } return ret;}/* * To preserve ordering, it is essential that the hole instantiation and * the data write be encapsulated in a single transaction. We cannot * close off a transaction and start a new one between the ext3_get_block() * and the commit_write(). So doing the journal_start at the start of * prepare_write() is the right place. * * Also, this function can nest inside ext3_writepage() -> * block_write_full_page(). In that case, we *know* that ext3_writepage() * has generated enough buffer credits to do the whole page. So we won't * block on the journal in that case, which is good, because the caller may * be PF_MEMALLOC. * * By accident, ext3 can be reentered when a transaction is open via * quota file writes. If we were to commit the transaction while thus * reentered, there can be a deadlock - we would be holding a quota * lock, and the commit would never complete if another thread had a * transaction open and was blocking on the quota lock - a ranking * violation. * * So what we do is to rely on the fact that journal_stop/journal_start * will _not_ run commit under these circumstances because handle->h_ref * is elevated. We'll still have enough credits for the tiny quotafile * write. */static int do_journal_get_write_access(handle_t *handle, struct buffer_head *bh){ return ext3_journal_get_write_access(handle, bh);}static int ext3_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to){ struct inode *inode = page->mapping->host; handle_t *handle = ext3_journal_current_handle(); int ret, needed_blocks = ext3_writepage_trans_blocks(inode); lock_kernel(); handle = ext3_journal_start(inode, needed_blocks); if (IS_ERR(handle)) { ret = PTR_ERR(handle); goto out; } unlock_kernel(); ret = block_prepare_write(page, from, to, ext3_get_block); lock_kernel(); if (ret != 0) goto prepare_write_failed; if (ext3_should_journal_data(inode)) { ret = walk_page_buffers(handle, page->buffers, from, to, NULL, do_journal_get_write_access); if (ret) { /* * We're going to fail this prepare_write(), * so commit_write() will not be called. * We need to undo block_prepare_write()'s kmap(). * AKPM: Do we need to clear PageUptodate? I don't * think so. */ kunmap(page); } }prepare_write_failed: if (ret) ext3_journal_stop(handle, inode);out: unlock_kernel(); return ret;}static int journal_dirty_sync_data(handle_t *handle, struct buffer_head *bh){ return ext3_journal_dirty_data(handle, bh, 0);}/* * For ext3_writepage(). We also brelse() the buffer to account for * the bget() which ext3_writepage() performs. */static int journal_dirty_async_data(handle_t *handle, struct buffer_head *bh){ int ret = ext3_journal_dirty_data(handle, bh, 1); __brelse(bh); return ret;}/* For commit_write() in data=journal mode */static int commit_write_fn(handle_t *handle, struct buffer_head *bh){ set_bit(BH_Uptodate, &bh->b_state); return ext3_journal_dirty_metadata(handle, bh);}/* * We need to pick up the new inode size which generic_commit_write gave us * `file' can be NULL - eg, when called from block_symlink(). * * ext3 inode->i_dirty_buffers policy: If we're journalling data we * definitely don't want them to appear on the inode at all - instead * we need to manage them at the JBD layer and we need to intercept * the relevant sync operations and translate them into journal operations. * * If we're not journalling data then we can just leave the buffers * on ->i_dirty_buffers. If someone writes them out for us then thanks. * Otherwise we'll do it in commit, if we're using ordered data. */static int ext3_commit_write(struct file *file, struct page *page, unsigned from, unsigned to){ handle_t *handle = ext3_journal_current_handle(); struct inode *inode = page->mapping->host; int ret = 0, ret2; lock_kernel(); if (ext3_should_journal_data(inode)) { /* * Here we duplicate the generic_commit_write() functionality */ int partial = 0; loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; ret = walk_page_buffers(handle, page->buffers, from, to, &partial, commit_write_fn); if (!partial) SetPageUptodate(page); kunmap(page); if (pos > inode->i_size) inode->i_size = pos; EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; } else { if (ext3_should_order_data(inode)) { ret = walk_page_buffers(handle, page->buffers, from, to, NULL, journal_dirty_sync_data); } /* Be careful here if generic_commit_write becomes a * required invocation after block_prepare_write. */ if (ret == 0) { ret = generic_commit_write(file, page, from, to); } else { /* * block_prepare_write() was called, but we're not * going to call generic_commit_write(). So we * need to perform generic_commit_write()'s kunmap * by hand. */ kunmap(page); } } if (inode->i_size > inode->u.ext3_i.i_disksize) { inode->u.ext3_i.i_disksize = inode->i_size; ret2 = ext3_mark_inode_dirty(handle, inode); if (!ret) ret = ret2; } ret2 = ext3_journal_stop(handle, inode); unlock_kernel(); if (!ret) ret = ret2; return ret;}/* * bmap() is special. It gets used by applications such as lilo and by * the swapper to find the on-disk block of a specific piece of data. * * Naturally, this is dangerous if the block concerned is still in the * journal. If somebody makes a swapfile on an ext3 data-journaling * filesystem and enables swap, then they may get a nasty shock when the * data getting swapped to that swapfile suddenly gets overwritten by * the original zero's written out previously to the journal and * awaiting writeback in the kernel's buffer cache. * * So, if we see any bmap calls here on a modified, data-journaled file, * take extra steps to flush any blocks which might be in the cache. */static int ext3_bmap(struct address_space *mapping, long block){ struct inode *inode = mapping->host; journal_t *journal; int err; if (EXT3_I(inode)->i_state & EXT3_STATE_JDATA) { /* * This is a REALLY heavyweight approach, but the use of * bmap on dirty files is expected to be extremely rare: * only if we run lilo or swapon on a freshly made file * do we expect this to happen. * * (bmap requires CAP_SYS_RAWIO so this does not * represent an unprivileged user DOS attack --- we'd be * in trouble if mortal users could trigger this path at * will.) * * NB. EXT3_STATE_JDATA is not set on files other than * regular files. If somebody wants to bmap a directory * or symlink and gets confused because the buffer * hasn't yet been flushed to disk, they deserve * everything they get. */ EXT3_I(inode)->i_state &= ~EXT3_STATE_JDATA; journal = EXT3_JOURNAL(inode); journal_lock_updates(journal); err = journal_flush(journal); journal_unlock_updates(journal); if (err) return 0; } return generic_block_bmap(mapping,block,ext3_get_block);}static int bget_one(handle_t *handle, struct buffer_head *bh){ atomic_inc(&bh->b_count); return 0;}/* * Note that we always start a transaction even if we're not journalling * data. This is to preserve ordering: any hole instantiation within * __block_write_full_page -> ext3_get_block() should be journalled * along with the data so we don't crash and then get metadata which * refers to old data. * * In all journalling modes block_write_full_page() will start the I/O. * * Problem: * * ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> * ext3_writepage() * * Similar for: * * ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ... * * Same applies to ext3_get_block(). We will deadlock on various things like * lock_journal and i_truncate_sem. * * Setting PF_MEMALLOC here doesn't work - too many internal memory * allocations fail. * * 16May01: If we're reentered then journal_current_handle() will be * non-zero. We simply *return*. * * 1 July 2001: @@@ FIXME: * In journalled data mode, a data buffer may be metadata against the * current transaction. But the same file is part of a shared mapping * and someone does a writepage() on it. * * We will move the buffer onto the async_data list, but *after* it has * been dirtied. So there's a small window where we have dirty data on * BJ_Metadata. * * Note that this only applies to the last partial page in the file. The * bit which block_write_full_page() uses prepare/commit for. (That's * broken code anyway: it's wrong for msync()). * * It's a rare case: affects the final partial page, for journalled data * where the file is subject to bith write() and writepage() in the same * transction. To fix it we'll need a custom block_write_full_page(). * We'll probably need that anyway for journalling writepage() output. * * We don't honour synchronous mounts for writepage(). That would be * disastrous. Any write() or metadata operation will sync the fs for * us. */static int ext3_writepage(struct page *page){ struct inode *inode = page->mapping->host; struct buffer_head *page_buffers; handle_t *handle = NULL; int ret = 0, err; int needed; int order_data; J_ASSERT(PageLocked(page)); /* * We give up here if we're reentered, because it might be * for a different filesystem. One *could* look for a * nested transaction opportunity. */ lock_kernel(); if (ext3_journal_current_handle()) goto out_fail; needed = ext3_writepage_trans_blocks(inode); if (current->flags & PF_MEMALLOC) handle = ext3_journal_try_start(inode, needed); else handle = ext3_journal_start(inode, needed); if (IS_ERR(handle)) { ret = PTR_ERR(handle); goto out_fail; } order_data = ext3_should_order_data(inode) || ext3_should_journal_data(inode); unlock_kernel(); page_buffers = NULL; /* Purely to prevent compiler warning */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -