📄 xfs_aops.c
字号:
xfs_iomap_t *mp, xfs_off_t offset, uint block_bits){ sector_t bn; ASSERT(mp->iomap_bn != IOMAP_DADDR_NULL); bn = (mp->iomap_bn >> (block_bits - BBSHIFT)) + ((offset - mp->iomap_offset) >> block_bits); ASSERT(bn || (mp->iomap_flags & IOMAP_REALTIME)); bh->b_blocknr = bn; set_buffer_mapped(bh);}STATIC voidxfs_map_at_offset( struct buffer_head *bh, loff_t offset, int block_bits, xfs_iomap_t *iomapp){ ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY)); lock_buffer(bh); xfs_map_buffer(bh, iomapp, offset, block_bits); bh->b_bdev = iomapp->iomap_target->bt_bdev; set_buffer_mapped(bh); clear_buffer_delay(bh); clear_buffer_unwritten(bh);}/* * Look for a page at index that is suitable for clustering. */STATIC unsigned intxfs_probe_page( struct page *page, unsigned int pg_offset, int mapped){ int ret = 0; if (PageWriteback(page)) return 0; if (page->mapping && PageDirty(page)) { if (page_has_buffers(page)) { struct buffer_head *bh, *head; bh = head = page_buffers(page); do { if (!buffer_uptodate(bh)) break; if (mapped != buffer_mapped(bh)) break; ret += bh->b_size; if (ret >= pg_offset) break; } while ((bh = bh->b_this_page) != head); } else ret = mapped ? 0 : PAGE_CACHE_SIZE; } return ret;}STATIC size_txfs_probe_cluster( struct inode *inode, struct page *startpage, struct buffer_head *bh, struct buffer_head *head, int mapped){ struct pagevec pvec; pgoff_t tindex, tlast, tloff; size_t total = 0; int done = 0, i; /* First sum forwards in this page */ do { if (!buffer_uptodate(bh) || (mapped != buffer_mapped(bh))) return total; total += bh->b_size; } while ((bh = bh->b_this_page) != head); /* if we reached the end of the page, sum forwards in following pages */ tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; tindex = startpage->index + 1; /* Prune this back to avoid pathological behavior */ tloff = min(tlast, startpage->index + 64); pagevec_init(&pvec, 0); while (!done && tindex <= tloff) { unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) break; for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; size_t pg_offset, len = 0; if (tindex == tlast) { pg_offset = i_size_read(inode) & (PAGE_CACHE_SIZE - 1); if (!pg_offset) { done = 1; break; } } else pg_offset = PAGE_CACHE_SIZE; if (page->index == tindex && !TestSetPageLocked(page)) { len = xfs_probe_page(page, pg_offset, mapped); unlock_page(page); } if (!len) { done = 1; break; } total += len; tindex++; } pagevec_release(&pvec); cond_resched(); } return total;}/* * Test if a given page is suitable for writing as part of an unwritten * or delayed allocate extent. */STATIC intxfs_is_delayed_page( struct page *page, unsigned int type){ if (PageWriteback(page)) return 0; if (page->mapping && page_has_buffers(page)) { struct buffer_head *bh, *head; int acceptable = 0; bh = head = page_buffers(page); do { if (buffer_unwritten(bh)) acceptable = (type == IOMAP_UNWRITTEN); else if (buffer_delay(bh)) acceptable = (type == IOMAP_DELAY); else if (buffer_dirty(bh) && buffer_mapped(bh)) acceptable = (type == 0); else break; } while ((bh = bh->b_this_page) != head); if (acceptable) return 1; } return 0;}/* * Allocate & map buffers for page given the extent map. Write it out. * except for the original page of a writepage, this is called on * delalloc/unwritten pages only, for the original page it is possible * that the page has no mapping at all. */STATIC intxfs_convert_page( struct inode *inode, struct page *page, loff_t tindex, xfs_iomap_t *mp, xfs_ioend_t **ioendp, struct writeback_control *wbc, int startio, int all_bh){ struct buffer_head *bh, *head; xfs_off_t end_offset; unsigned long p_offset; unsigned int type; int bbits = inode->i_blkbits; int len, page_dirty; int count = 0, done = 0, uptodate = 1; xfs_off_t offset = page_offset(page); if (page->index != tindex) goto fail; if (TestSetPageLocked(page)) goto fail; if (PageWriteback(page)) goto fail_unlock_page; if (page->mapping != inode->i_mapping) goto fail_unlock_page; if (!xfs_is_delayed_page(page, (*ioendp)->io_type)) goto fail_unlock_page; /* * page_dirty is initially a count of buffers on the page before * EOF and is decremented as we move each into a cleanable state. * * Derivation: * * End offset is the highest offset that this page should represent. * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) * will evaluate non-zero and be less than PAGE_CACHE_SIZE and * hence give us the correct page_dirty count. On any other page, * it will be zero and in that case we need page_dirty to be the * count of buffers on the page. */ end_offset = min_t(unsigned long long, (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, i_size_read(inode)); len = 1 << inode->i_blkbits; p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), PAGE_CACHE_SIZE); p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; page_dirty = p_offset / len; bh = head = page_buffers(page); do { if (offset >= end_offset) break; if (!buffer_uptodate(bh)) uptodate = 0; if (!(PageUptodate(page) || buffer_uptodate(bh))) { done = 1; continue; } if (buffer_unwritten(bh) || buffer_delay(bh)) { if (buffer_unwritten(bh)) type = IOMAP_UNWRITTEN; else type = IOMAP_DELAY; if (!xfs_iomap_valid(mp, offset)) { done = 1; continue; } ASSERT(!(mp->iomap_flags & IOMAP_HOLE)); ASSERT(!(mp->iomap_flags & IOMAP_DELAY)); xfs_map_at_offset(bh, offset, bbits, mp); if (startio) { xfs_add_to_ioend(inode, bh, offset, type, ioendp, done); } else { set_buffer_dirty(bh); unlock_buffer(bh); mark_buffer_dirty(bh); } page_dirty--; count++; } else { type = 0; if (buffer_mapped(bh) && all_bh && startio) { lock_buffer(bh); xfs_add_to_ioend(inode, bh, offset, type, ioendp, done); count++; page_dirty--; } else { done = 1; } } } while (offset += len, (bh = bh->b_this_page) != head); if (uptodate && bh == head) SetPageUptodate(page); if (startio) { if (count) { struct backing_dev_info *bdi; bdi = inode->i_mapping->backing_dev_info; wbc->nr_to_write--; if (bdi_write_congested(bdi)) { wbc->encountered_congestion = 1; done = 1; } else if (wbc->nr_to_write <= 0) { done = 1; } } xfs_start_page_writeback(page, wbc, !page_dirty, count); } return done; fail_unlock_page: unlock_page(page); fail: return 1;}/* * Convert & write out a cluster of pages in the same extent as defined * by mp and following the start page. */STATIC voidxfs_cluster_write( struct inode *inode, pgoff_t tindex, xfs_iomap_t *iomapp, xfs_ioend_t **ioendp, struct writeback_control *wbc, int startio, int all_bh, pgoff_t tlast){ struct pagevec pvec; int done = 0, i; pagevec_init(&pvec, 0); while (!done && tindex <= tlast) { unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) break; for (i = 0; i < pagevec_count(&pvec); i++) { done = xfs_convert_page(inode, pvec.pages[i], tindex++, iomapp, ioendp, wbc, startio, all_bh); if (done) break; } pagevec_release(&pvec); cond_resched(); }}/* * Calling this without startio set means we are being asked to make a dirty * page ready for freeing it's buffers. When called with startio set then * we are coming from writepage. * * When called with startio set it is important that we write the WHOLE * page if possible. * The bh->b_state's cannot know if any of the blocks or which block for * that matter are dirty due to mmap writes, and therefore bh uptodate is * only valid if the page itself isn't completely uptodate. Some layers * may clear the page dirty flag prior to calling write page, under the * assumption the entire page will be written out; by not writing out the * whole page the page can be reused before all valid dirty data is * written out. Note: in the case of a page that has been dirty'd by * mapwrite and but partially setup by block_prepare_write the * bh->b_states's will not agree and only ones setup by BPW/BCW will have * valid state, thus the whole page must be written out thing. */STATIC intxfs_page_state_convert( struct inode *inode, struct page *page, struct writeback_control *wbc, int startio, int unmapped) /* also implies page uptodate */{ struct buffer_head *bh, *head; xfs_iomap_t iomap; xfs_ioend_t *ioend = NULL, *iohead = NULL; loff_t offset; unsigned long p_offset = 0; unsigned int type; __uint64_t end_offset; pgoff_t end_index, last_index, tlast; ssize_t size, len; int flags, err, iomap_valid = 0, uptodate = 1; int page_dirty, count = 0; int trylock = 0; int all_bh = unmapped; if (startio) { if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking) trylock |= BMAPI_TRYLOCK; } /* Is this page beyond the end of the file? */ offset = i_size_read(inode); end_index = offset >> PAGE_CACHE_SHIFT; last_index = (offset - 1) >> PAGE_CACHE_SHIFT; if (page->index >= end_index) { if ((page->index >= end_index + 1) || !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { if (startio) unlock_page(page); return 0; } } /* * page_dirty is initially a count of buffers on the page before * EOF and is decremented as we move each into a cleanable state. * * Derivation: * * End offset is the highest offset that this page should represent. * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) * will evaluate non-zero and be less than PAGE_CACHE_SIZE and * hence give us the correct page_dirty count. On any other page, * it will be zero and in that case we need page_dirty to be the * count of buffers on the page. */ end_offset = min_t(unsigned long long, (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset); len = 1 << inode->i_blkbits; p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), PAGE_CACHE_SIZE); p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; page_dirty = p_offset / len; bh = head = page_buffers(page); offset = page_offset(page); flags = -1; type = 0; /* TODO: cleanup count and page_dirty */ do { if (offset >= end_offset) break; if (!buffer_uptodate(bh)) uptodate = 0; if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) { /* * the iomap is actually still valid, but the ioend * isn't. shouldn't happen too often. */ iomap_valid = 0; continue; } if (iomap_valid) iomap_valid = xfs_iomap_valid(&iomap, offset); /* * First case, map an unwritten extent and prepare for * extent state conversion transaction on completion. * * Second case, allocate space for a delalloc buffer. * We can return EAGAIN here in the release page case. * * Third case, an unmapped buffer was found, and we are * in a path where we need to write the whole page out. */ if (buffer_unwritten(bh) || buffer_delay(bh) || ((buffer_uptodate(bh) || PageUptodate(page)) && !buffer_mapped(bh) && (unmapped || startio))) { /* * Make sure we don't use a read-only iomap */ if (flags == BMAPI_READ) iomap_valid = 0; if (buffer_unwritten(bh)) { type = IOMAP_UNWRITTEN; flags = BMAPI_WRITE | BMAPI_IGNSTATE; } else if (buffer_delay(bh)) { type = IOMAP_DELAY; flags = BMAPI_ALLOCATE | trylock; } else { type = IOMAP_NEW; flags = BMAPI_WRITE | BMAPI_MMAP; } if (!iomap_valid) { if (type == IOMAP_NEW) { size = xfs_probe_cluster(inode, page, bh, head, 0); } else { size = len; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -