📄 inode.c
字号:
ret = walk_page_buffers(handle, page_buffers(page), from, to, &partial, write_end_fn); if (!partial) SetPageUptodate(page); if (pos+copied > inode->i_size) i_size_write(inode, pos+copied); EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; if (inode->i_size > EXT4_I(inode)->i_disksize) { EXT4_I(inode)->i_disksize = inode->i_size; ret2 = ext4_mark_inode_dirty(handle, inode); if (!ret) ret = ret2; } ret2 = ext4_journal_stop(handle); if (!ret) ret = ret2; unlock_page(page); page_cache_release(page); return ret ? ret : copied;}/* * bmap() is special. It gets used by applications such as lilo and by * the swapper to find the on-disk block of a specific piece of data. * * Naturally, this is dangerous if the block concerned is still in the * journal. If somebody makes a swapfile on an ext4 data-journaling * filesystem and enables swap, then they may get a nasty shock when the * data getting swapped to that swapfile suddenly gets overwritten by * the original zero's written out previously to the journal and * awaiting writeback in the kernel's buffer cache. * * So, if we see any bmap calls here on a modified, data-journaled file, * take extra steps to flush any blocks which might be in the cache. */static sector_t ext4_bmap(struct address_space *mapping, sector_t block){ struct inode *inode = mapping->host; journal_t *journal; int err; if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { /* * This is a REALLY heavyweight approach, but the use of * bmap on dirty files is expected to be extremely rare: * only if we run lilo or swapon on a freshly made file * do we expect this to happen. * * (bmap requires CAP_SYS_RAWIO so this does not * represent an unprivileged user DOS attack --- we'd be * in trouble if mortal users could trigger this path at * will.) * * NB. EXT4_STATE_JDATA is not set on files other than * regular files. If somebody wants to bmap a directory * or symlink and gets confused because the buffer * hasn't yet been flushed to disk, they deserve * everything they get. */ EXT4_I(inode)->i_state &= ~EXT4_STATE_JDATA; journal = EXT4_JOURNAL(inode); jbd2_journal_lock_updates(journal); err = jbd2_journal_flush(journal); jbd2_journal_unlock_updates(journal); if (err) return 0; } return generic_block_bmap(mapping,block,ext4_get_block);}static int bget_one(handle_t *handle, struct buffer_head *bh){ get_bh(bh); return 0;}static int bput_one(handle_t *handle, struct buffer_head *bh){ put_bh(bh); return 0;}static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh){ if (buffer_mapped(bh)) return ext4_journal_dirty_data(handle, bh); return 0;}/* * Note that we always start a transaction even if we're not journalling * data. This is to preserve ordering: any hole instantiation within * __block_write_full_page -> ext4_get_block() should be journalled * along with the data so we don't crash and then get metadata which * refers to old data. * * In all journalling modes block_write_full_page() will start the I/O. * * Problem: * * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> * ext4_writepage() * * Similar for: * * ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ... * * Same applies to ext4_get_block(). We will deadlock on various things like * lock_journal and i_truncate_mutex. * * Setting PF_MEMALLOC here doesn't work - too many internal memory * allocations fail. * * 16May01: If we're reentered then journal_current_handle() will be * non-zero. We simply *return*. * * 1 July 2001: @@@ FIXME: * In journalled data mode, a data buffer may be metadata against the * current transaction. But the same file is part of a shared mapping * and someone does a writepage() on it. * * We will move the buffer onto the async_data list, but *after* it has * been dirtied. So there's a small window where we have dirty data on * BJ_Metadata. * * Note that this only applies to the last partial page in the file. The * bit which block_write_full_page() uses prepare/commit for. (That's * broken code anyway: it's wrong for msync()). * * It's a rare case: affects the final partial page, for journalled data * where the file is subject to bith write() and writepage() in the same * transction. To fix it we'll need a custom block_write_full_page(). * We'll probably need that anyway for journalling writepage() output. * * We don't honour synchronous mounts for writepage(). That would be * disastrous. Any write() or metadata operation will sync the fs for * us. * * AKPM2: if all the page's buffers are mapped to disk and !data=journal, * we don't need to open a transaction here. */static int ext4_ordered_writepage(struct page *page, struct writeback_control *wbc){ struct inode *inode = page->mapping->host; struct buffer_head *page_bufs; handle_t *handle = NULL; int ret = 0; int err; J_ASSERT(PageLocked(page)); /* * We give up here if we're reentered, because it might be for a * different filesystem. */ if (ext4_journal_current_handle()) goto out_fail; handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); if (IS_ERR(handle)) { ret = PTR_ERR(handle); goto out_fail; } if (!page_has_buffers(page)) { create_empty_buffers(page, inode->i_sb->s_blocksize, (1 << BH_Dirty)|(1 << BH_Uptodate)); } page_bufs = page_buffers(page); walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL, bget_one); ret = block_write_full_page(page, ext4_get_block, wbc); /* * The page can become unlocked at any point now, and * truncate can then come in and change things. So we * can't touch *page from now on. But *page_bufs is * safe due to elevated refcount. */ /* * And attach them to the current transaction. But only if * block_write_full_page() succeeded. Otherwise they are unmapped, * and generally junk. */ if (ret == 0) { err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL, jbd2_journal_dirty_data_fn); if (!ret) ret = err; } walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL, bput_one); err = ext4_journal_stop(handle); if (!ret) ret = err; return ret;out_fail: redirty_page_for_writepage(wbc, page); unlock_page(page); return ret;}static int ext4_writeback_writepage(struct page *page, struct writeback_control *wbc){ struct inode *inode = page->mapping->host; handle_t *handle = NULL; int ret = 0; int err; if (ext4_journal_current_handle()) goto out_fail; handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); if (IS_ERR(handle)) { ret = PTR_ERR(handle); goto out_fail; } if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) ret = nobh_writepage(page, ext4_get_block, wbc); else ret = block_write_full_page(page, ext4_get_block, wbc); err = ext4_journal_stop(handle); if (!ret) ret = err; return ret;out_fail: redirty_page_for_writepage(wbc, page); unlock_page(page); return ret;}static int ext4_journalled_writepage(struct page *page, struct writeback_control *wbc){ struct inode *inode = page->mapping->host; handle_t *handle = NULL; int ret = 0; int err; if (ext4_journal_current_handle()) goto no_write; handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); if (IS_ERR(handle)) { ret = PTR_ERR(handle); goto no_write; } if (!page_has_buffers(page) || PageChecked(page)) { /* * It's mmapped pagecache. Add buffers and journal it. There * doesn't seem much point in redirtying the page here. */ ClearPageChecked(page); ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, ext4_get_block); if (ret != 0) { ext4_journal_stop(handle); goto out_unlock; } ret = walk_page_buffers(handle, page_buffers(page), 0, PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); err = walk_page_buffers(handle, page_buffers(page), 0, PAGE_CACHE_SIZE, NULL, write_end_fn); if (ret == 0) ret = err; EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; unlock_page(page); } else { /* * It may be a page full of checkpoint-mode buffers. We don't * really know unless we go poke around in the buffer_heads. * But block_write_full_page will do the right thing. */ ret = block_write_full_page(page, ext4_get_block, wbc); } err = ext4_journal_stop(handle); if (!ret) ret = err;out: return ret;no_write: redirty_page_for_writepage(wbc, page);out_unlock: unlock_page(page); goto out;}static int ext4_readpage(struct file *file, struct page *page){ return mpage_readpage(page, ext4_get_block);}static intext4_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages){ return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);}static void ext4_invalidatepage(struct page *page, unsigned long offset){ journal_t *journal = EXT4_JOURNAL(page->mapping->host); /* * If it's a full truncate we just forget about the pending dirtying */ if (offset == 0) ClearPageChecked(page); jbd2_journal_invalidatepage(journal, page, offset);}static int ext4_releasepage(struct page *page, gfp_t wait){ journal_t *journal = EXT4_JOURNAL(page->mapping->host); WARN_ON(PageChecked(page)); if (!page_has_buffers(page)) return 0; return jbd2_journal_try_to_free_buffers(journal, page, wait);}/* * If the O_DIRECT write will extend the file then add this inode to the * orphan list. So recovery will truncate it back to the original size * if the machine crashes during the write. * * If the O_DIRECT write is intantiating holes inside i_size and the machine * crashes then stale disk data _may_ be exposed inside the file. */static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs){ struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct ext4_inode_info *ei = EXT4_I(inode); handle_t *handle = NULL; ssize_t ret; int orphan = 0; size_t count = iov_length(iov, nr_segs); if (rw == WRITE) { loff_t final_size = offset + count; handle = ext4_journal_start(inode, DIO_CREDITS); if (IS_ERR(handle)) { ret = PTR_ERR(handle); goto out; } if (final_size > inode->i_size) { ret = ext4_orphan_add(handle, inode); if (ret) goto out_stop; orphan = 1; ei->i_disksize = inode->i_size; } } ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, ext4_get_block, NULL); /* * Reacquire the handle: ext4_get_block() can restart the transaction */ handle = ext4_journal_current_handle();out_stop: if (handle) { int err; if (orphan && inode->i_nlink) ext4_orphan_del(handle, inode); if (orphan && ret > 0) { loff_t end = offset + ret; if (end > inode->i_size) { ei->i_disksize = end; i_size_write(inode, end); /* * We're going to return a positive `ret' * here due to non-zero-length I/O, so there's * no way of reporting error returns from * ext4_mark_inode_dirty() to userspace. So * ignore it. */ ext4_mark_inode_dirty(handle, inode); } } err = ext4_journal_stop(handle); if (ret == 0) ret = err; }out: return ret;}/* * Pages can be marked dirty completely asynchronously from ext4's journalling * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do * much here because ->set_page_dirty is called under VFS locks. The page is * not necessarily locked. * * We cannot just dirty the page and leave attached buffers clean, because the * buffers' dirty state is "definitive". We cannot just set the buffers dirty * or jbddirty because all the journalling code will explode. * * So what we do is to mark the page "pending dirty" and next time writepage * is called, propagate that into the buffers appropriately. */static int ext4_journalled_set_page_dirty(struct page *page){ SetPageChecked(page); return __set_page_dirty_nobuffers(page);}static const struct address_space_operations ext4_ordered_aops = { .readpage = ext4_readpage, .readpages = ext4_readpages, .writepage = ext4_ordered_writepage, .sync_page = block_sync_page, .write_begin = ext4_write_begin, .write_end = ext4_ordered_write_end, .bmap = ext4_bmap, .invalidatepage = ext4_invalidatepage, .releasepage = ext4_releasepage, .direct_IO = ext4_direct_IO, .migratepage = buffer_migrate_page,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -