📄 xfs_buf.c
字号:
submit_io: if (likely(bio->bi_size)) { submit_bio(rw, bio); if (size) goto next_chunk; } else { bio_put(bio); xfs_buf_ioerror(bp, EIO); }}intxfs_buf_iorequest( xfs_buf_t *bp){ XB_TRACE(bp, "iorequest", 0); if (bp->b_flags & XBF_DELWRI) { xfs_buf_delwri_queue(bp, 1); return 0; } if (bp->b_flags & XBF_WRITE) { xfs_buf_wait_unpin(bp); } xfs_buf_hold(bp); /* Set the count to 1 initially, this will stop an I/O * completion callout which happens before we have started * all the I/O from calling xfs_buf_ioend too early. */ atomic_set(&bp->b_io_remaining, 1); _xfs_buf_ioapply(bp); _xfs_buf_ioend(bp, 0); xfs_buf_rele(bp); return 0;}/* * Waits for I/O to complete on the buffer supplied. * It returns immediately if no I/O is pending. * It returns the I/O error code, if any, or 0 if there was no error. */intxfs_buf_iowait( xfs_buf_t *bp){ XB_TRACE(bp, "iowait", 0); if (atomic_read(&bp->b_io_remaining)) blk_run_address_space(bp->b_target->bt_mapping); down(&bp->b_iodonesema); XB_TRACE(bp, "iowaited", (long)bp->b_error); return bp->b_error;}xfs_caddr_txfs_buf_offset( xfs_buf_t *bp, size_t offset){ struct page *page; if (bp->b_flags & XBF_MAPPED) return XFS_BUF_PTR(bp) + offset; offset += bp->b_offset; page = bp->b_pages[offset >> PAGE_CACHE_SHIFT]; return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1));}/* * Move data into or out of a buffer. */voidxfs_buf_iomove( xfs_buf_t *bp, /* buffer to process */ size_t boff, /* starting buffer offset */ size_t bsize, /* length to copy */ caddr_t data, /* data address */ xfs_buf_rw_t mode) /* read/write/zero flag */{ size_t bend, cpoff, csize; struct page *page; bend = boff + bsize; while (boff < bend) { page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; cpoff = xfs_buf_poff(boff + bp->b_offset); csize = min_t(size_t, PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff); ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); switch (mode) { case XBRW_ZERO: memset(page_address(page) + cpoff, 0, csize); break; case XBRW_READ: memcpy(data, page_address(page) + cpoff, csize); break; case XBRW_WRITE: memcpy(page_address(page) + cpoff, data, csize); } boff += csize; data += csize; }}/* * Handling of buffer targets (buftargs). *//* * Wait for any bufs with callbacks that have been submitted but * have not yet returned... walk the hash list for the target. */voidxfs_wait_buftarg( xfs_buftarg_t *btp){ xfs_buf_t *bp, *n; xfs_bufhash_t *hash; uint i; for (i = 0; i < (1 << btp->bt_hashshift); i++) { hash = &btp->bt_hash[i];again: spin_lock(&hash->bh_lock); list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) { ASSERT(btp == bp->b_target); if (!(bp->b_flags & XBF_FS_MANAGED)) { spin_unlock(&hash->bh_lock); /* * Catch superblock reference count leaks * immediately */ BUG_ON(bp->b_bn == 0); delay(100); goto again; } } spin_unlock(&hash->bh_lock); }}/* * Allocate buffer hash table for a given target. * For devices containing metadata (i.e. not the log/realtime devices) * we need to allocate a much larger hash table. */STATIC voidxfs_alloc_bufhash( xfs_buftarg_t *btp, int external){ unsigned int i; btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) * sizeof(xfs_bufhash_t), KM_SLEEP); for (i = 0; i < (1 << btp->bt_hashshift); i++) { spin_lock_init(&btp->bt_hash[i].bh_lock); INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); }}STATIC voidxfs_free_bufhash( xfs_buftarg_t *btp){ kmem_free(btp->bt_hash, (1<<btp->bt_hashshift) * sizeof(xfs_bufhash_t)); btp->bt_hash = NULL;}/* * buftarg list for delwrite queue processing */STATIC LIST_HEAD(xfs_buftarg_list);STATIC DEFINE_SPINLOCK(xfs_buftarg_lock);STATIC voidxfs_register_buftarg( xfs_buftarg_t *btp){ spin_lock(&xfs_buftarg_lock); list_add(&btp->bt_list, &xfs_buftarg_list); spin_unlock(&xfs_buftarg_lock);}STATIC voidxfs_unregister_buftarg( xfs_buftarg_t *btp){ spin_lock(&xfs_buftarg_lock); list_del(&btp->bt_list); spin_unlock(&xfs_buftarg_lock);}voidxfs_free_buftarg( xfs_buftarg_t *btp, int external){ xfs_flush_buftarg(btp, 1); if (external) xfs_blkdev_put(btp->bt_bdev); xfs_free_bufhash(btp); iput(btp->bt_mapping->host); /* Unregister the buftarg first so that we don't get a * wakeup finding a non-existent task */ xfs_unregister_buftarg(btp); kthread_stop(btp->bt_task); kmem_free(btp, sizeof(*btp));}STATIC intxfs_setsize_buftarg_flags( xfs_buftarg_t *btp, unsigned int blocksize, unsigned int sectorsize, int verbose){ btp->bt_bsize = blocksize; btp->bt_sshift = ffs(sectorsize) - 1; btp->bt_smask = sectorsize - 1; if (set_blocksize(btp->bt_bdev, sectorsize)) { printk(KERN_WARNING "XFS: Cannot set_blocksize to %u on device %s\n", sectorsize, XFS_BUFTARG_NAME(btp)); return EINVAL; } if (verbose && (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) { printk(KERN_WARNING "XFS: %u byte sectors in use on device %s. " "This is suboptimal; %u or greater is ideal.\n", sectorsize, XFS_BUFTARG_NAME(btp), (unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG); } return 0;}/* * When allocating the initial buffer target we have not yet * read in the superblock, so don't know what sized sectors * are being used is at this early stage. Play safe. */STATIC intxfs_setsize_buftarg_early( xfs_buftarg_t *btp, struct block_device *bdev){ return xfs_setsize_buftarg_flags(btp, PAGE_CACHE_SIZE, bdev_hardsect_size(bdev), 0);}intxfs_setsize_buftarg( xfs_buftarg_t *btp, unsigned int blocksize, unsigned int sectorsize){ return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1);}STATIC intxfs_mapping_buftarg( xfs_buftarg_t *btp, struct block_device *bdev){ struct backing_dev_info *bdi; struct inode *inode; struct address_space *mapping; static struct address_space_operations mapping_aops = { .sync_page = block_sync_page, .migratepage = fail_migrate_page, }; inode = new_inode(bdev->bd_inode->i_sb); if (!inode) { printk(KERN_WARNING "XFS: Cannot allocate mapping inode for device %s\n", XFS_BUFTARG_NAME(btp)); return ENOMEM; } inode->i_mode = S_IFBLK; inode->i_bdev = bdev; inode->i_rdev = bdev->bd_dev; bdi = blk_get_backing_dev_info(bdev); if (!bdi) bdi = &default_backing_dev_info; mapping = &inode->i_data; mapping->a_ops = &mapping_aops; mapping->backing_dev_info = bdi; mapping_set_gfp_mask(mapping, GFP_NOFS); btp->bt_mapping = mapping; return 0;}STATIC intxfs_alloc_delwrite_queue( xfs_buftarg_t *btp){ int error = 0; INIT_LIST_HEAD(&btp->bt_list); INIT_LIST_HEAD(&btp->bt_delwrite_queue); spinlock_init(&btp->bt_delwrite_lock, "delwri_lock"); btp->bt_flags = 0; btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd"); if (IS_ERR(btp->bt_task)) { error = PTR_ERR(btp->bt_task); goto out_error; } xfs_register_buftarg(btp);out_error: return error;}xfs_buftarg_t *xfs_alloc_buftarg( struct block_device *bdev, int external){ xfs_buftarg_t *btp; btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); btp->bt_dev = bdev->bd_dev; btp->bt_bdev = bdev; if (xfs_setsize_buftarg_early(btp, bdev)) goto error; if (xfs_mapping_buftarg(btp, bdev)) goto error; if (xfs_alloc_delwrite_queue(btp)) goto error; xfs_alloc_bufhash(btp, external); return btp;error: kmem_free(btp, sizeof(*btp)); return NULL;}/* * Delayed write buffer handling */STATIC voidxfs_buf_delwri_queue( xfs_buf_t *bp, int unlock){ struct list_head *dwq = &bp->b_target->bt_delwrite_queue; spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; XB_TRACE(bp, "delwri_q", (long)unlock); ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC)); spin_lock(dwlk); /* If already in the queue, dequeue and place at tail */ if (!list_empty(&bp->b_list)) { ASSERT(bp->b_flags & _XBF_DELWRI_Q); if (unlock) atomic_dec(&bp->b_hold); list_del(&bp->b_list); } bp->b_flags |= _XBF_DELWRI_Q; list_add_tail(&bp->b_list, dwq); bp->b_queuetime = jiffies; spin_unlock(dwlk); if (unlock) xfs_buf_unlock(bp);}voidxfs_buf_delwri_dequeue( xfs_buf_t *bp){ spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; int dequeued = 0; spin_lock(dwlk); if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) { ASSERT(bp->b_flags & _XBF_DELWRI_Q); list_del_init(&bp->b_list); dequeued = 1; } bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); spin_unlock(dwlk); if (dequeued) xfs_buf_rele(bp); XB_TRACE(bp, "delwri_dq", (long)dequeued);}STATIC voidxfs_buf_runall_queues( struct workqueue_struct *queue){ flush_workqueue(queue);}STATIC intxfsbufd_wakeup( int priority, gfp_t mask){ xfs_buftarg_t *btp; spin_lock(&xfs_buftarg_lock); list_for_each_entry(btp, &xfs_buftarg_list, bt_list) { if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) continue; set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); wake_up_process(btp->bt_task); } spin_unlock(&xfs_buftarg_lock); return 0;}STATIC intxfsbufd( void *data){ struct list_head tmp; unsigned long age; xfs_buftarg_t *target = (xfs_buftarg_t *)data; xfs_buf_t *bp, *n; struct list_head *dwq = &target->bt_delwrite_queue; spinlock_t *dwlk = &target->bt_delwrite_lock; current->flags |= PF_MEMALLOC; INIT_LIST_HEAD(&tmp); do { if (unlikely(freezing(current))) { set_bit(XBT_FORCE_SLEEP, &target->bt_flags); refrigerator(); } else { clear_bit(XBT_FORCE_SLEEP, &target->bt_flags); } schedule_timeout_interruptible( xfs_buf_timer_centisecs * msecs_to_jiffies(10)); age = xfs_buf_age_centisecs * msecs_to_jiffies(10); spin_lock(dwlk); list_for_each_entry_safe(bp, n, dwq, b_list) { XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp)); ASSERT(bp->b_flags & XBF_DELWRI); if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) { if (!test_bit(XBT_FORCE_FLUSH, &target->bt_flags) && time_before(jiffies, bp->b_queuetime + age)) { xfs_buf_unlock(bp); break; } bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); bp->b_flags |= XBF_WRITE; list_move(&bp->b_list, &tmp); } } spin_unlock(dwlk); while (!list_empty(&tmp)) { bp = list_entry(tmp.next, xfs_buf_t, b_list); ASSERT(target == bp->b_target); list_del_init(&bp->b_list); xfs_buf_iostrategy(bp); blk_run_address_space(target->bt_mapping); } if (as_list_len > 0) purge_addresses(); clear_bit(XBT_FORCE_FLUSH, &target->bt_flags); } while (!kthread_should_stop()); return 0;}/* * Go through all incore buffers, and release buffers if they belong to * the given device. This is used in filesystem error handling to * preserve the consistency of its metadata. */intxfs_flush_buftarg( xfs_buftarg_t *target, int wait){ struct list_head tmp; xfs_buf_t *bp, *n; int pincount = 0; struct list_head *dwq = &target->bt_delwrite_queue; spinlock_t *dwlk = &target->bt_delwrite_lock; xfs_buf_runall_queues(xfsdatad_workqueue); xfs_buf_runall_queues(xfslogd_workqueue); INIT_LIST_HEAD(&tmp); spin_lock(dwlk); list_for_each_entry_safe(bp, n, dwq, b_list) { ASSERT(bp->b_target == target); ASSERT(bp->b_flags & (XBF_DELWRI | _XBF_DELWRI_Q)); XB_TRACE(bp, "walkq2", (long)xfs_buf_ispin(bp)); if (xfs_buf_ispin(bp)) { pincount++; continue; } list_move(&bp->b_list, &tmp); } spin_unlock(dwlk); /* * Dropped the delayed write list lock, now walk the temporary list */ list_for_each_entry_safe(bp, n, &tmp, b_list) { xfs_buf_lock(bp); bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); bp->b_flags |= XBF_WRITE; if (wait) bp->b_flags &= ~XBF_ASYNC; else list_del_init(&bp->b_list); xfs_buf_iostrategy(bp); } /* * Remaining list items must be flushed before returning */ while (!list_empty(&tmp)) { bp = list_entry(tmp.next, xfs_buf_t, b_list); list_del_init(&bp->b_list); xfs_iowait(bp); xfs_buf_relse(bp); } if (wait) blk_run_address_space(target->bt_mapping); return pincount;}int __initxfs_buf_init(void){#ifdef XFS_BUF_TRACE xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_SLEEP);#endif xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf", KM_ZONE_HWALIGN, NULL); if (!xfs_buf_zone) goto out_free_trace_buf; xfslogd_workqueue = create_workqueue("xfslogd"); if (!xfslogd_workqueue) goto out_free_buf_zone; xfsdatad_workqueue = create_workqueue("xfsdatad"); if (!xfsdatad_workqueue) goto out_destroy_xfslogd_workqueue; xfs_buf_shake = kmem_shake_register(xfsbufd_wakeup); if (!xfs_buf_shake) goto out_destroy_xfsdatad_workqueue; return 0; out_destroy_xfsdatad_workqueue: destroy_workqueue(xfsdatad_workqueue); out_destroy_xfslogd_workqueue: destroy_workqueue(xfslogd_workqueue); out_free_buf_zone: kmem_zone_destroy(xfs_buf_zone); out_free_trace_buf:#ifdef XFS_BUF_TRACE ktrace_free(xfs_buf_trace_buf);#endif return -ENOMEM;}voidxfs_buf_terminate(void){ kmem_shake_deregister(xfs_buf_shake); destroy_workqueue(xfsdatad_workqueue); destroy_workqueue(xfslogd_workqueue); kmem_zone_destroy(xfs_buf_zone);#ifdef XFS_BUF_TRACE ktrace_free(xfs_buf_trace_buf);#endif}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -