📄 page-writeback.c
字号:
done_index = index; while (!done && (index <= end)) { int i; nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); if (nr_pages == 0) break; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; /* * At this point, the page may be truncated or * invalidated (changing page->mapping to NULL), or * even swizzled back from swapper_space to tmpfs file * mapping. However, page->index will not change * because we have a reference on the page. */ if (page->index > end) { /* * can't be range_cyclic (1st pass) because * end == -1 in that case. */ done = 1; break; } done_index = page->index + 1; lock_page(page); /* * Page truncated or invalidated. We can freely skip it * then, even for data integrity operations: the page * has disappeared concurrently, so there could be no * real expectation of this data interity operation * even if there is now a new, dirty page at the same * pagecache address. */ if (unlikely(page->mapping != mapping)) {continue_unlock: unlock_page(page); continue; } if (!PageDirty(page)) { /* someone wrote it for us */ goto continue_unlock; } if (PageWriteback(page)) { if (wbc->sync_mode != WB_SYNC_NONE) wait_on_page_writeback(page); else goto continue_unlock; } BUG_ON(PageWriteback(page)); if (!clear_page_dirty_for_io(page)) goto continue_unlock; ret = (*writepage)(page, wbc, data); if (unlikely(ret)) { if (ret == AOP_WRITEPAGE_ACTIVATE) { unlock_page(page); ret = 0; } else { /* * done_index is set past this page, * so media errors will not choke * background writeout for the entire * file. This has consequences for * range_cyclic semantics (ie. it may * not be suitable for data integrity * writeout). */ done = 1; break; } } if (nr_to_write > 0) { nr_to_write--; if (nr_to_write == 0 && wbc->sync_mode == WB_SYNC_NONE) { /* * We stop writing back only if we are * not doing integrity sync. In case of * integrity sync we have to keep going * because someone may be concurrently * dirtying pages, and we might have * synced a lot of newly appeared dirty * pages, but have not synced all of the * old dirty pages. */ done = 1; break; } } if (wbc->nonblocking && bdi_write_congested(bdi)) { wbc->encountered_congestion = 1; done = 1; break; } } pagevec_release(&pvec); cond_resched(); } if (!cycled && !done) { /* * range_cyclic: * We hit the last page and there is more work to be done: wrap * back to the start of the file */ cycled = 1; index = 0; end = writeback_index - 1; goto retry; } if (!wbc->no_nrwrite_index_update) { if (wbc->range_cyclic || (range_whole && nr_to_write > 0)) mapping->writeback_index = done_index; wbc->nr_to_write = nr_to_write; } return ret;}EXPORT_SYMBOL(write_cache_pages);/* * Function used by generic_writepages to call the real writepage * function and set the mapping flags on error */static int __writepage(struct page *page, struct writeback_control *wbc, void *data){ struct address_space *mapping = data; int ret = mapping->a_ops->writepage(page, wbc); mapping_set_error(mapping, ret); return ret;}/** * generic_writepages - walk the list of dirty pages of the given address space and writepage() all of them. * @mapping: address space structure to write * @wbc: subtract the number of written pages from *@wbc->nr_to_write * * This is a library function, which implements the writepages() * address_space_operation. */int generic_writepages(struct address_space *mapping, struct writeback_control *wbc){ /* deal with chardevs and other special file */ if (!mapping->a_ops->writepage) return 0; return write_cache_pages(mapping, wbc, __writepage, mapping);}EXPORT_SYMBOL(generic_writepages);int do_writepages(struct address_space *mapping, struct writeback_control *wbc){ int ret; if (wbc->nr_to_write <= 0) return 0; wbc->for_writepages = 1; if (mapping->a_ops->writepages) ret = mapping->a_ops->writepages(mapping, wbc); else ret = generic_writepages(mapping, wbc); wbc->for_writepages = 0; return ret;}/** * write_one_page - write out a single page and optionally wait on I/O * @page: the page to write * @wait: if true, wait on writeout * * The page must be locked by the caller and will be unlocked upon return. * * write_one_page() returns a negative error code if I/O failed. */int write_one_page(struct page *page, int wait){ struct address_space *mapping = page->mapping; int ret = 0; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = 1, }; BUG_ON(!PageLocked(page)); if (wait) wait_on_page_writeback(page); if (clear_page_dirty_for_io(page)) { page_cache_get(page); ret = mapping->a_ops->writepage(page, &wbc); if (ret == 0 && wait) { wait_on_page_writeback(page); if (PageError(page)) ret = -EIO; } page_cache_release(page); } else { unlock_page(page); } return ret;}EXPORT_SYMBOL(write_one_page);/* * For address_spaces which do not use buffers nor write back. */int __set_page_dirty_no_writeback(struct page *page){ if (!PageDirty(page)) SetPageDirty(page); return 0;}/* * For address_spaces which do not use buffers. Just tag the page as dirty in * its radix tree. * * This is also used when a single buffer is being dirtied: we want to set the * page dirty in that case, but not all the buffers. This is a "bottom-up" * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying. * * Most callers have locked the page, which pins the address_space in memory. * But zap_pte_range() does not lock the page, however in that case the * mapping is pinned by the vma's ->vm_file reference. * * We take care to handle the case where the page was truncated from the * mapping by re-checking page_mapping() inside tree_lock. */int __set_page_dirty_nobuffers(struct page *page){ if (!TestSetPageDirty(page)) { struct address_space *mapping = page_mapping(page); struct address_space *mapping2; if (!mapping) return 1; spin_lock_irq(&mapping->tree_lock); mapping2 = page_mapping(page); if (mapping2) { /* Race with truncate? */ BUG_ON(mapping2 != mapping); WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page)); if (mapping_cap_account_dirty(mapping)) { __inc_zone_page_state(page, NR_FILE_DIRTY); __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); task_dirty_inc(current); task_io_account_write(PAGE_CACHE_SIZE); } radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } spin_unlock_irq(&mapping->tree_lock); if (mapping->host) { /* !PageAnon && !swapper_space */ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); } return 1; } return 0;}EXPORT_SYMBOL(__set_page_dirty_nobuffers);/* * When a writepage implementation decides that it doesn't want to write this * page for some reason, it should redirty the locked page via * redirty_page_for_writepage() and it should then unlock the page and return 0 */int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page){ wbc->pages_skipped++; return __set_page_dirty_nobuffers(page);}EXPORT_SYMBOL(redirty_page_for_writepage);/* * If the mapping doesn't provide a set_page_dirty a_op, then * just fall through and assume that it wants buffer_heads. */int set_page_dirty(struct page *page){ struct address_space *mapping = page_mapping(page); if (likely(mapping)) { int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;#ifdef CONFIG_BLOCK if (!spd) spd = __set_page_dirty_buffers;#endif return (*spd)(page); } if (!PageDirty(page)) { if (!TestSetPageDirty(page)) return 1; } return 0;}EXPORT_SYMBOL(set_page_dirty);/* * set_page_dirty() is racy if the caller has no reference against * page->mapping->host, and if the page is unlocked. This is because another * CPU could truncate the page off the mapping and then free the mapping. * * Usually, the page _is_ locked, or the caller is a user-space process which * holds a reference on the inode by having an open file. * * In other cases, the page should be locked before running set_page_dirty(). */int set_page_dirty_lock(struct page *page){ int ret; lock_page_nosync(page); ret = set_page_dirty(page); unlock_page(page); return ret;}EXPORT_SYMBOL(set_page_dirty_lock);/* * Clear a page's dirty flag, while caring for dirty memory accounting. * Returns true if the page was previously dirty. * * This is for preparing to put the page under writeout. We leave the page * tagged as dirty in the radix tree so that a concurrent write-for-sync * can discover it via a PAGECACHE_TAG_DIRTY walk. The ->writepage * implementation will run either set_page_writeback() or set_page_dirty(), * at which stage we bring the page's dirty flag and radix-tree dirty tag * back into sync. * * This incoherency between the page's dirty flag and radix-tree tag is * unfortunate, but it only exists while the page is locked. */int clear_page_dirty_for_io(struct page *page){ struct address_space *mapping = page_mapping(page); BUG_ON(!PageLocked(page)); ClearPageReclaim(page); if (mapping && mapping_cap_account_dirty(mapping)) { /* * Yes, Virginia, this is indeed insane. * * We use this sequence to make sure that * (a) we account for dirty stats properly * (b) we tell the low-level filesystem to * mark the whole page dirty if it was * dirty in a pagetable. Only to then * (c) clean the page again and return 1 to * cause the writeback. * * This way we avoid all nasty races with the * dirty bit in multiple places and clearing * them concurrently from different threads. * * Note! Normally the "set_page_dirty(page)" * has no effect on the actual dirty bit - since * that will already usually be set. But we * need the side effects, and it can help us * avoid races. * * We basically use the page "master dirty bit" * as a serialization point for all the different * threads doing their things. */ if (page_mkclean(page)) set_page_dirty(page); /* * We carefully synchronise fault handlers against * installing a dirty pte and marking the page dirty * at this point. We do this by having them hold the * page lock at some point after installing their * pte, but before marking the page dirty. * Pages are always locked coming in here, so we get * the desired exclusion. See mm/memory.c:do_wp_page() * for more comments. */ if (TestClearPageDirty(page)) { dec_zone_page_state(page, NR_FILE_DIRTY); dec_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); return 1; } return 0; } return TestClearPageDirty(page);}EXPORT_SYMBOL(clear_page_dirty_for_io);int test_clear_page_writeback(struct page *page){ struct address_space *mapping = page_mapping(page); int ret; if (mapping) { struct backing_dev_info *bdi = mapping->backing_dev_info; unsigned long flags; spin_lock_irqsave(&mapping->tree_lock, flags); ret = TestClearPageWriteback(page); if (ret) { radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_WRITEBACK); if (bdi_cap_account_writeback(bdi)) { __dec_bdi_stat(bdi, BDI_WRITEBACK); __bdi_writeout_inc(bdi); } } spin_unlock_irqrestore(&mapping->tree_lock, flags); } else { ret = TestClearPageWriteback(page); } if (ret) dec_zone_page_state(page, NR_WRITEBACK); return ret;}int test_set_page_writeback(struct page *page){ struct address_space *mapping = page_mapping(page); int ret; if (mapping) { struct backing_dev_info *bdi = mapping->backing_dev_info; unsigned long flags; spin_lock_irqsave(&mapping->tree_lock, flags); ret = TestSetPageWriteback(page); if (!ret) { radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_WRITEBACK); if (bdi_cap_account_writeback(bdi)) __inc_bdi_stat(bdi, BDI_WRITEBACK); } if (!PageDirty(page)) radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); spin_unlock_irqrestore(&mapping->tree_lock, flags); } else { ret = TestSetPageWriteback(page); } if (!ret) inc_zone_page_state(page, NR_WRITEBACK); return ret;}EXPORT_SYMBOL(test_set_page_writeback);/* * Return true if any of the pages in the mapping are marked with the * passed tag. */int mapping_tagged(struct address_space *mapping, int tag){ int ret; rcu_read_lock(); ret = radix_tree_tagged(&mapping->page_tree, tag); rcu_read_unlock(); return ret;}EXPORT_SYMBOL(mapping_tagged);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -