📄 xfs_aops.c
字号:
/* * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation. * * This program is distributed in the hope that it would be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */#include "xfs.h"#include "xfs_bit.h"#include "xfs_log.h"#include "xfs_inum.h"#include "xfs_sb.h"#include "xfs_ag.h"#include "xfs_dir.h"#include "xfs_dir2.h"#include "xfs_trans.h"#include "xfs_dmapi.h"#include "xfs_mount.h"#include "xfs_bmap_btree.h"#include "xfs_alloc_btree.h"#include "xfs_ialloc_btree.h"#include "xfs_dir_sf.h"#include "xfs_dir2_sf.h"#include "xfs_attr_sf.h"#include "xfs_dinode.h"#include "xfs_inode.h"#include "xfs_alloc.h"#include "xfs_btree.h"#include "xfs_error.h"#include "xfs_rw.h"#include "xfs_iomap.h"#include <linux/mpage.h>#include <linux/writeback.h>STATIC void xfs_count_page_state(struct page *, int *, int *, int *);STATIC void xfs_convert_page(struct inode *, struct page *, xfs_iomap_t *, struct writeback_control *wbc, void *, int, int);#if defined(XFS_RW_TRACE)voidxfs_page_trace( int tag, struct inode *inode, struct page *page, int mask){ xfs_inode_t *ip; bhv_desc_t *bdp; vnode_t *vp = LINVFS_GET_VP(inode); loff_t isize = i_size_read(inode); loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT; int delalloc = -1, unmapped = -1, unwritten = -1; if (page_has_buffers(page)) xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops); ip = XFS_BHVTOI(bdp); if (!ip->i_rwtrace) return; ktrace_enter(ip->i_rwtrace, (void *)((unsigned long)tag), (void *)ip, (void *)inode, (void *)page, (void *)((unsigned long)mask), (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)), (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)), (void *)((unsigned long)((isize >> 32) & 0xffffffff)), (void *)((unsigned long)(isize & 0xffffffff)), (void *)((unsigned long)((offset >> 32) & 0xffffffff)), (void *)((unsigned long)(offset & 0xffffffff)), (void *)((unsigned long)delalloc), (void *)((unsigned long)unmapped), (void *)((unsigned long)unwritten), (void *)NULL, (void *)NULL);}#else#define xfs_page_trace(tag, inode, page, mask)#endif/* * Schedule IO completion handling on a xfsdatad if this was * the final hold on this ioend. */STATIC voidxfs_finish_ioend( xfs_ioend_t *ioend){ if (atomic_dec_and_test(&ioend->io_remaining)) queue_work(xfsdatad_workqueue, &ioend->io_work);}STATIC voidxfs_destroy_ioend( xfs_ioend_t *ioend){ vn_iowake(ioend->io_vnode); mempool_free(ioend, xfs_ioend_pool);}/* * Issue transactions to convert a buffer range from unwritten * to written extents. */STATIC voidxfs_end_bio_unwritten( void *data){ xfs_ioend_t *ioend = data; vnode_t *vp = ioend->io_vnode; xfs_off_t offset = ioend->io_offset; size_t size = ioend->io_size; struct buffer_head *bh, *next; int error; if (ioend->io_uptodate) VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error); /* ioend->io_buffer_head is only non-NULL for buffered I/O */ for (bh = ioend->io_buffer_head; bh; bh = next) { next = bh->b_private; bh->b_end_io = NULL; clear_buffer_unwritten(bh); end_buffer_async_write(bh, ioend->io_uptodate); } xfs_destroy_ioend(ioend);}/* * Allocate and initialise an IO completion structure. * We need to track unwritten extent write completion here initially. * We'll need to extend this for updating the ondisk inode size later * (vs. incore size). */STATIC xfs_ioend_t *xfs_alloc_ioend( struct inode *inode){ xfs_ioend_t *ioend; ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS); /* * Set the count to 1 initially, which will prevent an I/O * completion callback from happening before we have started * all the I/O from calling the completion routine too early. */ atomic_set(&ioend->io_remaining, 1); ioend->io_uptodate = 1; /* cleared if any I/O fails */ ioend->io_vnode = LINVFS_GET_VP(inode); ioend->io_buffer_head = NULL; atomic_inc(&ioend->io_vnode->v_iocount); ioend->io_offset = 0; ioend->io_size = 0; INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend); return ioend;}voidlinvfs_unwritten_done( struct buffer_head *bh, int uptodate){ xfs_ioend_t *ioend = bh->b_private; static spinlock_t unwritten_done_lock = SPIN_LOCK_UNLOCKED; unsigned long flags; ASSERT(buffer_unwritten(bh)); bh->b_end_io = NULL; if (!uptodate) ioend->io_uptodate = 0; /* * Deep magic here. We reuse b_private in the buffer_heads to build * a chain for completing the I/O from user context after we've issued * a transaction to convert the unwritten extent. */ spin_lock_irqsave(&unwritten_done_lock, flags); bh->b_private = ioend->io_buffer_head; ioend->io_buffer_head = bh; spin_unlock_irqrestore(&unwritten_done_lock, flags); xfs_finish_ioend(ioend);}STATIC intxfs_map_blocks( struct inode *inode, loff_t offset, ssize_t count, xfs_iomap_t *mapp, int flags){ vnode_t *vp = LINVFS_GET_VP(inode); int error, nmaps = 1; VOP_BMAP(vp, offset, count, flags, mapp, &nmaps, error); if (!error && (flags & (BMAPI_WRITE|BMAPI_ALLOCATE))) VMODIFY(vp); return -error;}/* * Finds the corresponding mapping in block @map array of the * given @offset within a @page. */STATIC xfs_iomap_t *xfs_offset_to_map( struct page *page, xfs_iomap_t *iomapp, unsigned long offset){ loff_t full_offset; /* offset from start of file */ ASSERT(offset < PAGE_CACHE_SIZE); full_offset = page->index; /* NB: using 64bit number */ full_offset <<= PAGE_CACHE_SHIFT; /* offset from file start */ full_offset += offset; /* offset from page start */ if (full_offset < iomapp->iomap_offset) return NULL; if (iomapp->iomap_offset + (iomapp->iomap_bsize -1) >= full_offset) return iomapp; return NULL;}STATIC voidxfs_map_at_offset( struct page *page, struct buffer_head *bh, unsigned long offset, int block_bits, xfs_iomap_t *iomapp){ xfs_daddr_t bn; loff_t delta; int sector_shift; ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY)); ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL); delta = page->index; delta <<= PAGE_CACHE_SHIFT; delta += offset; delta -= iomapp->iomap_offset; delta >>= block_bits; sector_shift = block_bits - BBSHIFT; bn = iomapp->iomap_bn >> sector_shift; bn += delta; BUG_ON(!bn && !(iomapp->iomap_flags & IOMAP_REALTIME)); ASSERT((bn << sector_shift) >= iomapp->iomap_bn); lock_buffer(bh); bh->b_blocknr = bn; bh->b_bdev = iomapp->iomap_target->pbr_bdev; set_buffer_mapped(bh); clear_buffer_delay(bh);}/* * Look for a page at index which is unlocked and contains our * unwritten extent flagged buffers at its head. Returns page * locked and with an extra reference count, and length of the * unwritten extent component on this page that we can write, * in units of filesystem blocks. */STATIC struct page *xfs_probe_unwritten_page( struct address_space *mapping, pgoff_t index, xfs_iomap_t *iomapp, xfs_ioend_t *ioend, unsigned long max_offset, unsigned long *fsbs, unsigned int bbits){ struct page *page; page = find_trylock_page(mapping, index); if (!page) return NULL; if (PageWriteback(page)) goto out; if (page->mapping && page_has_buffers(page)) { struct buffer_head *bh, *head; unsigned long p_offset = 0; *fsbs = 0; bh = head = page_buffers(page); do { if (!buffer_unwritten(bh) || !buffer_uptodate(bh)) break; if (!xfs_offset_to_map(page, iomapp, p_offset)) break; if (p_offset >= max_offset) break; xfs_map_at_offset(page, bh, p_offset, bbits, iomapp); set_buffer_unwritten_io(bh); bh->b_private = ioend; p_offset += bh->b_size; (*fsbs)++; } while ((bh = bh->b_this_page) != head); if (p_offset) return page; }out: unlock_page(page); return NULL;}/* * Look for a page at index which is unlocked and not mapped * yet - clustering for mmap write case. */STATIC unsigned intxfs_probe_unmapped_page( struct address_space *mapping, pgoff_t index, unsigned int pg_offset){ struct page *page; int ret = 0; page = find_trylock_page(mapping, index); if (!page) return 0; if (PageWriteback(page)) goto out; if (page->mapping && PageDirty(page)) { if (page_has_buffers(page)) { struct buffer_head *bh, *head; bh = head = page_buffers(page); do { if (buffer_mapped(bh) || !buffer_uptodate(bh)) break; ret += bh->b_size; if (ret >= pg_offset) break; } while ((bh = bh->b_this_page) != head); } else ret = PAGE_CACHE_SIZE; }out: unlock_page(page); return ret;}STATIC unsigned intxfs_probe_unmapped_cluster( struct inode *inode, struct page *startpage, struct buffer_head *bh, struct buffer_head *head){ pgoff_t tindex, tlast, tloff; unsigned int pg_offset, len, total = 0; struct address_space *mapping = inode->i_mapping; /* First sum forwards in this page */ do { if (buffer_mapped(bh) || !buffer_uptodate(bh)) break; total += bh->b_size; } while ((bh = bh->b_this_page) != head); /* If we reached the end of the page, sum forwards in * following pages. */ if (bh == head) { tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT; /* Prune this back to avoid pathological behavior */ tloff = min(tlast, startpage->index + 64); for (tindex = startpage->index + 1; tindex < tloff; tindex++) { len = xfs_probe_unmapped_page(mapping, tindex, PAGE_CACHE_SIZE); if (!len) return total; total += len; } if (tindex == tlast && (pg_offset = i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { total += xfs_probe_unmapped_page(mapping, tindex, pg_offset); } } return total;}/* * Probe for a given page (index) in the inode and test if it is delayed * and without unwritten buffers. Returns page locked and with an extra * reference count. */STATIC struct page *xfs_probe_delalloc_page( struct inode *inode, pgoff_t index){ struct page *page; page = find_trylock_page(inode->i_mapping, index); if (!page) return NULL; if (PageWriteback(page)) goto out; if (page->mapping && page_has_buffers(page)) { struct buffer_head *bh, *head; int acceptable = 0; bh = head = page_buffers(page); do { if (buffer_unwritten(bh)) { acceptable = 0; break; } else if (buffer_delay(bh)) { acceptable = 1; } } while ((bh = bh->b_this_page) != head); if (acceptable) return page; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -