inode.c
来自「Linux Kernel 2.6.9 for OMAP1710」· C语言 代码 · 共 2,219 行 · 第 1/5 页
C
2,219 行
* current transaction. But the same file is part of a shared mapping * and someone does a writepage() on it. * * We will move the buffer onto the async_data list, but *after* it has * been dirtied. So there's a small window where we have dirty data on * BJ_Metadata. * * Note that this only applies to the last partial page in the file. The * bit which block_write_full_page() uses prepare/commit for. (That's * broken code anyway: it's wrong for msync()). * * It's a rare case: affects the final partial page, for journalled data * where the file is subject to bith write() and writepage() in the same * transction. To fix it we'll need a custom block_write_full_page(). * We'll probably need that anyway for journalling writepage() output. * * We don't honour synchronous mounts for writepage(). That would be * disastrous. Any write() or metadata operation will sync the fs for * us. * * AKPM2: if all the page's buffers are mapped to disk and !data=journal, * we don't need to open a transaction here. */static int ext3_ordered_writepage(struct page *page, struct writeback_control *wbc){ struct inode *inode = page->mapping->host; struct buffer_head *page_bufs; handle_t *handle = NULL; int ret = 0; int err; J_ASSERT(PageLocked(page)); /* * We give up here if we're reentered, because it might be for a * different filesystem. */ if (ext3_journal_current_handle()) goto out_fail; handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); if (IS_ERR(handle)) { ret = PTR_ERR(handle); goto out_fail; } if (!page_has_buffers(page)) { create_empty_buffers(page, inode->i_sb->s_blocksize, (1 << BH_Dirty)|(1 << BH_Uptodate)); } page_bufs = page_buffers(page); walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL, bget_one); ret = block_write_full_page(page, ext3_get_block, wbc); /* * The page can become unlocked at any point now, and * truncate can then come in and change things. So we * can't touch *page from now on. But *page_bufs is * safe due to elevated refcount. */ /* * And attach them to the current transaction. But only if * block_write_full_page() succeeded. Otherwise they are unmapped, * and generally junk. */ if (ret == 0) { err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL, journal_dirty_data_fn); if (!ret) ret = err; } walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL, bput_one); err = ext3_journal_stop(handle); if (!ret) ret = err; return ret;out_fail: redirty_page_for_writepage(wbc, page); unlock_page(page); return ret;}static int ext3_writeback_writepage(struct page *page, struct writeback_control *wbc){ struct inode *inode = page->mapping->host; handle_t *handle = NULL; int ret = 0; int err; if (ext3_journal_current_handle()) goto out_fail; handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); if (IS_ERR(handle)) { ret = PTR_ERR(handle); goto out_fail; } ret = block_write_full_page(page, ext3_get_block, wbc); err = ext3_journal_stop(handle); if (!ret) ret = err; return ret;out_fail: redirty_page_for_writepage(wbc, page); unlock_page(page); return ret;}static int ext3_journalled_writepage(struct page *page, struct writeback_control *wbc){ struct inode *inode = page->mapping->host; handle_t *handle = NULL; int ret = 0; int err; if (ext3_journal_current_handle()) goto no_write; handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); if (IS_ERR(handle)) { ret = PTR_ERR(handle); goto no_write; } if (!page_has_buffers(page) || PageChecked(page)) { /* * It's mmapped pagecache. Add buffers and journal it. There * doesn't seem much point in redirtying the page here. */ ClearPageChecked(page); ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, ext3_get_block); if (ret != 0) goto out_unlock; ret = walk_page_buffers(handle, page_buffers(page), 0, PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); err = walk_page_buffers(handle, page_buffers(page), 0, PAGE_CACHE_SIZE, NULL, commit_write_fn); if (ret == 0) ret = err; EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; unlock_page(page); } else { /* * It may be a page full of checkpoint-mode buffers. We don't * really know unless we go poke around in the buffer_heads. * But block_write_full_page will do the right thing. */ ret = block_write_full_page(page, ext3_get_block, wbc); } err = ext3_journal_stop(handle); if (!ret) ret = err;out: return ret;no_write: redirty_page_for_writepage(wbc, page);out_unlock: unlock_page(page); goto out;}static int ext3_readpage(struct file *file, struct page *page){ return mpage_readpage(page, ext3_get_block);}static intext3_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages){ return mpage_readpages(mapping, pages, nr_pages, ext3_get_block);}static int ext3_invalidatepage(struct page *page, unsigned long offset){ journal_t *journal = EXT3_JOURNAL(page->mapping->host); /* * If it's a full truncate we just forget about the pending dirtying */ if (offset == 0) ClearPageChecked(page); return journal_invalidatepage(journal, page, offset);}static int ext3_releasepage(struct page *page, int wait){ journal_t *journal = EXT3_JOURNAL(page->mapping->host); WARN_ON(PageChecked(page)); return journal_try_to_free_buffers(journal, page, wait);}/* * If the O_DIRECT write will extend the file then add this inode to the * orphan list. So recovery will truncate it back to the original size * if the machine crashes during the write. * * If the O_DIRECT write is intantiating holes inside i_size and the machine * crashes then stale disk data _may_ be exposed inside the file. */static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs){ struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct ext3_inode_info *ei = EXT3_I(inode); handle_t *handle = NULL; ssize_t ret; int orphan = 0; size_t count = iov_length(iov, nr_segs); if (rw == WRITE) { loff_t final_size = offset + count; handle = ext3_journal_start(inode, DIO_CREDITS); if (IS_ERR(handle)) { ret = PTR_ERR(handle); goto out; } if (final_size > inode->i_size) { ret = ext3_orphan_add(handle, inode); if (ret) goto out_stop; orphan = 1; ei->i_disksize = inode->i_size; } } ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, ext3_direct_io_get_blocks, NULL); /* * Reacquire the handle: ext3_direct_io_get_block() can restart the * transaction */ handle = journal_current_handle();out_stop: if (handle) { int err; if (orphan) ext3_orphan_del(handle, inode); if (orphan && ret > 0) { loff_t end = offset + ret; if (end > inode->i_size) { ei->i_disksize = end; i_size_write(inode, end); err = ext3_mark_inode_dirty(handle, inode); if (!ret) ret = err; } } err = ext3_journal_stop(handle); if (ret == 0) ret = err; }out: return ret;}/* * Pages can be marked dirty completely asynchronously from ext3's journalling * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do * much here because ->set_page_dirty is called under VFS locks. The page is * not necessarily locked. * * We cannot just dirty the page and leave attached buffers clean, because the * buffers' dirty state is "definitive". We cannot just set the buffers dirty * or jbddirty because all the journalling code will explode. * * So what we do is to mark the page "pending dirty" and next time writepage * is called, propagate that into the buffers appropriately. */static int ext3_journalled_set_page_dirty(struct page *page){ SetPageChecked(page); return __set_page_dirty_nobuffers(page);}static struct address_space_operations ext3_ordered_aops = { .readpage = ext3_readpage, .readpages = ext3_readpages, .writepage = ext3_ordered_writepage, .sync_page = block_sync_page, .prepare_write = ext3_prepare_write, .commit_write = ext3_ordered_commit_write, .bmap = ext3_bmap, .invalidatepage = ext3_invalidatepage, .releasepage = ext3_releasepage, .direct_IO = ext3_direct_IO,};static struct address_space_operations ext3_writeback_aops = { .readpage = ext3_readpage, .readpages = ext3_readpages, .writepage = ext3_writeback_writepage, .sync_page = block_sync_page, .prepare_write = ext3_prepare_write, .commit_write = ext3_writeback_commit_write, .bmap = ext3_bmap, .invalidatepage = ext3_invalidatepage, .releasepage = ext3_releasepage, .direct_IO = ext3_direct_IO,};static struct address_space_operations ext3_journalled_aops = { .readpage = ext3_readpage, .readpages = ext3_readpages, .writepage = ext3_journalled_writepage, .sync_page = block_sync_page, .prepare_write = ext3_prepare_write, .commit_write = ext3_journalled_commit_write, .set_page_dirty = ext3_journalled_set_page_dirty, .bmap = ext3_bmap, .invalidatepage = ext3_invalidatepage, .releasepage = ext3_releasepage,};void ext3_set_aops(struct inode *inode){ if (ext3_should_order_data(inode)) inode->i_mapping->a_ops = &ext3_ordered_aops; else if (ext3_should_writeback_data(inode)) inode->i_mapping->a_ops = &ext3_writeback_aops; else inode->i_mapping->a_ops = &ext3_journalled_aops;}/* * ext3_block_truncate_page() zeroes out a mapping from file offset `from' * up to the end of the block which corresponds to `from'. * This required during truncate. We need to physically zero the tail end * of that block so it doesn't yield old data if the file is later grown. */static int ext3_block_truncate_page(handle_t *handle, struct page *page, struct address_space *mapping, loff_t from){ unsigned long index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); unsigned blocksize, iblock, length, pos; struct inode *inode = mapping->host; struct buffer_head *bh; int err; void *kaddr; blocksize = inode->i_sb->s_blocksize; length = blocksize - (offset & (blocksize - 1)); iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); if (!page_has_buffers(page)) create_empty_buffers(page, blocksize, 0); /* Find the buffer that contains "offset" */ bh = page_buffers(page); pos = blocksize; while (offset >= pos) { bh = bh->b_this_page; iblock++; pos += blocksize; } err = 0; if (buffer_freed(bh)) { BUFFER_TRACE(bh, "freed: skip"); goto unlock; } if (!buffer_mapped(bh)) { BUFFER_TRACE(bh, "unmapped"); ext3_get_block(inode, iblock, bh, 0); /* unmapped? It's a hole - nothing to do */ if (!buffer_mapped(bh)) { BUFFER_TRACE(bh, "still unmapped"); goto unlock; } } /* Ok, it's mapped. Make sure it's up-to-date */ if (PageUptodate(page)) set_buffer_uptodate(bh); if (!buffer_uptodate(bh)) { err = -EIO; ll_rw_block(READ, 1, &bh); wait_on_buffer(bh); /* Uhhuh. Read error. Complain and punt. */ if (!buffer_uptodate(bh)) goto unlock; } if (ext3_should_journal_data(inode)) { BUFFER_TRACE(bh, "get write access"); err = ext3_journal_get_write_access(handle, bh); if (err) goto unlock; } kaddr = kmap_atomic(page, KM_USER0); memset(kaddr + offset, 0, length); flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); BUFFER_TRACE(bh, "zeroed end of block"); err = 0; if (ext3_should_journal_data(inode)) { err = ext3_journal_dirty_metadata(handle, bh); } else { if (ext3_should_order_data(inode)) err = ext3_journal_dirty_data(handle, bh); mark_buffer_dirty(bh); }unlock: unlock_page(page); page_cache_release(page); return err;}/* * Probably it should be a library function... search for first non-zero word * or memcmp with zero_page, whatever is better for particular architecture. * Linus? */static inline int all_zeroes(__le32 *p, __le32 *q)
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?