📄 xfs_buf.c
字号:
xfs_buf_t *pb){ DECLARE_WAITQUEUE (wait, current); if (atomic_read(&pb->pb_pin_count) == 0) return; add_wait_queue(&pb->pb_waiters, &wait); for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); if (atomic_read(&pb->pb_pin_count) == 0) break; if (atomic_read(&pb->pb_io_remaining)) run_task_queue(&tq_disk); schedule(); } remove_wait_queue(&pb->pb_waiters, &wait); set_current_state(TASK_RUNNING);}/* * Buffer Utility Routines *//* * pagebuf_iodone * * pagebuf_iodone marks a buffer for which I/O is in progress * done with respect to that I/O. The pb_iodone routine, if * present, will be called as a side-effect. */voidpagebuf_iodone_sched( void *v){ xfs_buf_t *bp = (xfs_buf_t *)v; if (bp->pb_iodone) (*(bp->pb_iodone))(bp); else if (bp->pb_flags & PBF_ASYNC) xfs_buf_relse(bp);}voidpagebuf_iodone( xfs_buf_t *pb, int dataio, int schedule){ pb->pb_flags &= ~(PBF_READ | PBF_WRITE); if (pb->pb_error == 0) { pb->pb_flags &= ~(PBF_PARTIAL | PBF_NONE); } PB_TRACE(pb, "iodone", pb->pb_iodone); if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) { if (schedule) { int daemon = CPU_TO_DAEMON(smp_processor_id()); INIT_TQUEUE(&pb->pb_iodone_sched, pagebuf_iodone_sched, (void *)pb); queue_task(&pb->pb_iodone_sched, dataio ? &pagebuf_dataiodone_tq[daemon] : &pagebuf_logiodone_tq[daemon]); wake_up(dataio ? &pagebuf_dataiodone_wait[daemon] : &pagebuf_logiodone_wait[daemon]); } else { pagebuf_iodone_sched(pb); } } else { up(&pb->pb_iodonesema); }}/* * pagebuf_ioerror * * pagebuf_ioerror sets the error code for a buffer. */voidpagebuf_ioerror( /* mark/clear buffer error flag */ xfs_buf_t *pb, /* buffer to mark */ int error) /* error to store (0 if none) */{ ASSERT(error >= 0 && error <= 0xffff); pb->pb_error = (unsigned short)error; PB_TRACE(pb, "ioerror", (unsigned long)error);}/* * pagebuf_iostart * * pagebuf_iostart initiates I/O on a buffer, based on the flags supplied. * If necessary, it will arrange for any disk space allocation required, * and it will break up the request if the block mappings require it. * The pb_iodone routine in the buffer supplied will only be called * when all of the subsidiary I/O requests, if any, have been completed. * pagebuf_iostart calls the pagebuf_ioinitiate routine or * pagebuf_iorequest, if the former routine is not defined, to start * the I/O on a given low-level request. */intpagebuf_iostart( /* start I/O on a buffer */ xfs_buf_t *pb, /* buffer to start */ page_buf_flags_t flags) /* PBF_LOCK, PBF_ASYNC, PBF_READ, */ /* PBF_WRITE, PBF_DELWRI, */ /* PBF_DONT_BLOCK */{ int status = 0; PB_TRACE(pb, "iostart", (unsigned long)flags); if (flags & PBF_DELWRI) { pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC); pb->pb_flags |= flags & (PBF_DELWRI | PBF_ASYNC); pagebuf_delwri_queue(pb, 1); return status; } pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC | PBF_DELWRI | \ PBF_READ_AHEAD | _PBF_RUN_QUEUES); pb->pb_flags |= flags & (PBF_READ | PBF_WRITE | PBF_ASYNC | \ PBF_READ_AHEAD | _PBF_RUN_QUEUES); BUG_ON(pb->pb_bn == XFS_BUF_DADDR_NULL); /* For writes allow an alternate strategy routine to precede * the actual I/O request (which may not be issued at all in * a shutdown situation, for example). */ status = (flags & PBF_WRITE) ? pagebuf_iostrategy(pb) : pagebuf_iorequest(pb); /* Wait for I/O if we are not an async request. * Note: async I/O request completion will release the buffer, * and that can already be done by this point. So using the * buffer pointer from here on, after async I/O, is invalid. */ if (!status && !(flags & PBF_ASYNC)) status = pagebuf_iowait(pb); return status;}/* * Helper routines for pagebuf_iorequest (pagebuf I/O completion) */STATIC __inline__ int_pagebuf_iolocked( xfs_buf_t *pb){ ASSERT(pb->pb_flags & (PBF_READ|PBF_WRITE)); if (pb->pb_target->pbr_bsize < PAGE_CACHE_SIZE) return pb->pb_locked; if (pb->pb_flags & PBF_READ) return pb->pb_locked; return (pb->pb_flags & _PBF_PAGE_CACHE);}STATIC void_pagebuf_iodone( xfs_buf_t *pb, int schedule){ int i; if (atomic_dec_and_test(&pb->pb_io_remaining) != 1) return; if (_pagebuf_iolocked(pb)) for (i = 0; i < pb->pb_page_count; i++) unlock_page(pb->pb_pages[i]); pb->pb_locked = 0; pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), schedule);}STATIC void_end_io_pagebuf( struct buffer_head *bh, int uptodate, int fullpage){ struct page *page = bh->b_page; xfs_buf_t *pb = (xfs_buf_t *)bh->b_private; mark_buffer_uptodate(bh, uptodate); put_bh(bh); if (!uptodate) { SetPageError(page); pb->pb_error = EIO; } if (fullpage) { unlock_buffer(bh); _pagebuf_free_bh(bh); if (!PageError(page)) SetPageUptodate(page); } else { static spinlock_t page_uptodate_lock = SPIN_LOCK_UNLOCKED; struct buffer_head *bp; unsigned long flags; ASSERT(PageLocked(page)); spin_lock_irqsave(&page_uptodate_lock, flags); clear_buffer_async(bh); unlock_buffer(bh); for (bp = bh->b_this_page; bp != bh; bp = bp->b_this_page) { if (buffer_locked(bp)) { if (buffer_async(bp)) break; } else if (!buffer_uptodate(bp)) break; } spin_unlock_irqrestore(&page_uptodate_lock, flags); if (bp == bh && !PageError(page)) SetPageUptodate(page); } _pagebuf_iodone(pb, 1);}STATIC void_pagebuf_end_io_complete_pages( struct buffer_head *bh, int uptodate){ _end_io_pagebuf(bh, uptodate, 1);}STATIC void_pagebuf_end_io_partial_pages( struct buffer_head *bh, int uptodate){ _end_io_pagebuf(bh, uptodate, 0);}/* * Handling of buftargs. *//* * Wait for any bufs with callbacks that have been submitted but * have not yet returned... walk the hash list for the target. */voidxfs_wait_buftarg( xfs_buftarg_t *target){ xfs_buf_t *pb, *n; pb_hash_t *h; int i; for (i = 0; i < NHASH; i++) { h = &pbhash[i];again: spin_lock(&h->pb_hash_lock); list_for_each_entry_safe(pb, n, &h->pb_hash, pb_hash_list) { if (pb->pb_target == target && !(pb->pb_flags & PBF_FS_MANAGED)) { spin_unlock(&h->pb_hash_lock); delay(100); goto again; } } spin_unlock(&h->pb_hash_lock); }}voidxfs_free_buftarg( xfs_buftarg_t *btp, int external){ xfs_flush_buftarg(btp, 1); if (external) xfs_blkdev_put(btp->pbr_bdev); iput(btp->pbr_mapping->host); kmem_free(btp, sizeof(*btp));}voidxfs_incore_relse( xfs_buftarg_t *btp, int delwri_only, int wait){ destroy_buffers(btp->pbr_kdev); truncate_inode_pages(btp->pbr_mapping, 0LL);}intxfs_setsize_buftarg( xfs_buftarg_t *btp, unsigned int blocksize, unsigned int sectorsize){ btp->pbr_bsize = blocksize; btp->pbr_sshift = ffs(sectorsize) - 1; btp->pbr_smask = sectorsize - 1; if (set_blocksize(btp->pbr_kdev, sectorsize)) { printk(KERN_WARNING "XFS: Cannot set_blocksize to %u on device 0x%x\n", sectorsize, kdev_t_to_nr(btp->pbr_kdev)); return EINVAL; } return 0;}STATIC intxfs_mapping_buftarg( xfs_buftarg_t *btp, struct block_device *bdev){ kdev_t kdev; struct inode *inode; struct address_space *mapping; static struct address_space_operations mapping_aops = { .sync_page = block_sync_page, }; kdev = to_kdev_t(bdev->bd_dev); inode = new_inode(bdev->bd_inode->i_sb); if (!inode) { printk(KERN_WARNING "XFS: Cannot allocate mapping inode for device %s\n", XFS_BUFTARG_NAME(btp)); return ENOMEM; } inode->i_mode = S_IFBLK; inode->i_dev = kdev; inode->i_rdev = kdev; inode->i_bdev = bdev; mapping = &inode->i_data; mapping->a_ops = &mapping_aops; mapping->gfp_mask = GFP_KERNEL; btp->pbr_mapping = mapping; return 0;}xfs_buftarg_t *xfs_alloc_buftarg( struct block_device *bdev){ xfs_buftarg_t *btp; kdev_t kdev; btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); kdev = to_kdev_t(bdev->bd_dev); btp->pbr_dev = bdev->bd_dev; btp->pbr_kdev = kdev; btp->pbr_bdev = bdev; switch (MAJOR(btp->pbr_dev)) { case MD_MAJOR: case EVMS_MAJOR: btp->pbr_flags = PBR_ALIGNED_ONLY; break; case LOOP_MAJOR: case LVM_BLK_MAJOR: btp->pbr_flags = PBR_SECTOR_ONLY; break; } if (xfs_setsize_buftarg(btp, PAGE_CACHE_SIZE, get_hardsect_size(kdev))) goto error; if (xfs_mapping_buftarg(btp, bdev)) goto error; return btp;error: kmem_free(btp, sizeof(*btp)); return NULL;}/* * Initiate I/O on part of a page we are interested in */STATIC int_pagebuf_page_io( struct page *page, /* Page structure we are dealing with */ xfs_buftarg_t *pbr, /* device parameters (bsz, ssz, dev) */ xfs_buf_t *pb, /* pagebuf holding it, can be NULL */ xfs_daddr_t bn, /* starting block number */ size_t pg_offset, /* starting offset in page */ size_t pg_length, /* count of data to process */ int rw, /* read/write operation */ int flush){ size_t sector; size_t blk_length = 0; struct buffer_head *bh, *head, *bufferlist[MAX_BUF_PER_PAGE]; int sector_shift = pbr->pbr_sshift; int i = 0, cnt = 0; int public_bh = 0; int multi_ok; if ((pbr->pbr_bsize < PAGE_CACHE_SIZE) && !(pb->pb_flags & _PBF_PRIVATE_BH)) { int cache_ok; cache_ok = !((pb->pb_flags & PBF_FORCEIO) || (rw == WRITE)); public_bh = multi_ok = 1; sector = 1 << sector_shift; ASSERT(PageLocked(page)); if (!page_has_buffers(page)) create_empty_buffers(page, pbr->pbr_kdev, sector); i = sector >> BBSHIFT; bn -= (pg_offset >> BBSHIFT); /* Find buffer_heads belonging to just this pagebuf */ bh = head = page_buffers(page); do { if (buffer_uptodate(bh) && cache_ok) continue; if (blk_length < pg_offset) continue; if (blk_length >= pg_offset + pg_length) break; lock_buffer(bh); get_bh(bh); bh->b_size = sector; bh->b_blocknr = bn; bufferlist[cnt++] = bh; } while ((bn += i), (blk_length += sector), (bh = bh->b_this_page) != head); goto request; } /* Calculate the block offsets and length we will be using */ if (pg_offset) { size_t block_offset; block_offset = pg_offset >> sector_shift; block_offset = pg_offset - (block_offset << sector_shift); blk_length = (pg_length + block_offset + pbr->pbr_smask) >> sector_shift; } else { blk_length = (pg_length + pbr->pbr_smask) >> sector_shift; } /* This will attempt to make a request bigger than the sector * size if we are well aligned. */ switch (pb->pb_target->pbr_flags) { case 0: sector = blk_length << sector_shift; blk_length = 1; break; case PBR_ALIGNED_ONLY: if ((pg_offset == 0) && (pg_length == PAGE_CACHE_SIZE) && (((unsigned int) bn) & BN_ALIGN_MASK) == 0) { sector = blk_length << sector_shift; blk_length = 1; break; } case PBR_SECTOR_ONLY: /* Fallthrough, same as default */ default: sector = 1 << sector_shift; } /* If we are doing I/O larger than the bh->b_size field then * we need to split this request up. */ while (sector > ((1ULL << NBBY * sizeof(bh->b_size)) - 1)) { sector >>= 1; blk_length++; } multi_ok = (blk_length != 1); i = sector >> BBSHIFT; for (; blk_length > 0; bn += i, blk_length--, pg_offset += sector) { bh = kmem_cache_alloc(bh_cachep, SLAB_NOFS); if (!bh) bh = _pagebuf_get_prealloc_bh(); memset(bh, 0, sizeof(*bh)); bh->b_blocknr = bn; bh->b_size = sector; bh->b_dev = pbr->pbr_kdev; set_buffer_locked(bh); set_bh_page(bh, page, pg_offset); init_waitqueue_head(&bh->b_wait); atomic_set(&bh->b_count, 1); bufferlist[cnt++] = bh; }request: if (cnt) { void (*callback)(struct buffer_head *, int); callback = (multi_ok && public_bh) ? _pagebuf_end_io_partial_pages : _pagebuf_end_io_complete_pages; /* Account for additional buffers in progress */ atomic_add(cnt, &pb->pb_io_remaining);#ifdef RQ_WRITE_ORDERED if (flush) set_bit(BH_Ordered_Flush, &bufferlist[cnt-1]->b_state);#endif for (i = 0; i < cnt; i++) { bh = bufferlist[i]; init_buffer(bh, callback, pb); bh->b_rdev = bh->b_dev; bh->b_rsector = bh->b_blocknr; set_buffer_mapped(bh); set_buffer_async(bh); set_buffer_req(bh); if (rw == WRITE) set_buffer_uptodate(bh); generic_make_request(rw, bh); } return 0; } /* * We have no I/O to submit, let the caller know that * we have skipped over this page entirely. */ return 1;}STATIC void_pagebuf_page_apply( xfs_buf_t *pb, loff_t offset, struct page *page, size_t pg_offset, size_t pg_length, int last){ xfs_daddr_t bn = pb->pb_bn; xfs_buftarg_t *pbr = pb->pb_target; loff_t pb_offset; int status, locking; ASSERT(page); ASSERT(pb->pb_flags & (PBF_READ|PBF_WRITE)); if ((pbr->pbr_bsize == PAGE_CACHE_SIZE) && (pb->pb_buffer_length < PAGE_CACHE_SIZE) && (pb->pb_flags & PBF_READ) && pb->pb_locked) { bn -= (pb->pb_offset >> BBSHIFT); pg_offset = 0; pg_length = PAGE_CACHE_SIZE; } else { pb_offset = offset - pb->pb_file_offset; if (pb_offset) { bn += (pb_offset + BBMASK) >> BBSHIFT; } } locking = _pagebuf_iolocked(pb); if (pb->pb_flags & PBF_WRITE) { if (locking && !pb->pb_locked) lock_page(page); status = _pagebuf_page_io(page, pbr, pb, bn, pg_offset, pg_length, WRITE, last && (pb->pb_flags & PBF_FLUSH)); } else { status = _pagebuf_page_io(page, pbr, pb, bn, pg_offset, pg_length, READ, 0); } if (status && locking && !(pb->pb_target->pbr_bsize < PAGE_CACHE_SIZE)) unlock_page(page);}/* * pagebuf_iorequest -- the core I/O request routine. */intpagebuf_iorequest( /* start real I/O */ xfs_buf_t *pb) /* buffer to convey to device */{ PB_TRACE(pb, "iorequest", 0); if (pb->pb_flags & PBF_DELWRI) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -