📄 xfs_buf.c
字号:
size = 0; atomic_inc(&pb->pb_io_remaining); goto submit_io; } /* Lock down the pages which we need to for the request */ if (locking && (pb->pb_flags & PBF_WRITE) && (pb->pb_locked == 0)) { for (i = 0; size; i++) { int nbytes = PAGE_CACHE_SIZE - offset; struct page *page = pb->pb_pages[i]; if (nbytes > size) nbytes = size; lock_page(page); size -= nbytes; offset = 0; } offset = pb->pb_offset; size = pb->pb_count_desired; }next_chunk: atomic_inc(&pb->pb_io_remaining); nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); if (nr_pages > total_nr_pages) nr_pages = total_nr_pages; bio = bio_alloc(GFP_NOIO, nr_pages); bio->bi_bdev = pb->pb_target->pbr_bdev; bio->bi_sector = sector; bio->bi_end_io = bio_end_io_pagebuf; bio->bi_private = pb; for (; size && nr_pages; nr_pages--, map_i++) { int nbytes = PAGE_CACHE_SIZE - offset; if (nbytes > size) nbytes = size; if (bio_add_page(bio, pb->pb_pages[map_i], nbytes, offset) < nbytes) break; offset = 0; sector += nbytes >> BBSHIFT; size -= nbytes; total_nr_pages--; }submit_io: if (likely(bio->bi_size)) { submit_bio(rw, bio); if (size) goto next_chunk; } else { bio_put(bio); pagebuf_ioerror(pb, EIO); }}/* * pagebuf_iorequest -- the core I/O request routine. */intpagebuf_iorequest( /* start real I/O */ xfs_buf_t *pb) /* buffer to convey to device */{ PB_TRACE(pb, "iorequest", 0); if (pb->pb_flags & PBF_DELWRI) { pagebuf_delwri_queue(pb, 1); return 0; } if (pb->pb_flags & PBF_WRITE) { _pagebuf_wait_unpin(pb); } pagebuf_hold(pb); /* Set the count to 1 initially, this will stop an I/O * completion callout which happens before we have started * all the I/O from calling pagebuf_iodone too early. */ atomic_set(&pb->pb_io_remaining, 1); _pagebuf_ioapply(pb); _pagebuf_iodone(pb, 0); pagebuf_rele(pb); return 0;}/* * pagebuf_iowait * * pagebuf_iowait waits for I/O to complete on the buffer supplied. * It returns immediately if no I/O is pending. In any case, it returns * the error code, if any, or 0 if there is no error. */intpagebuf_iowait( xfs_buf_t *pb){ PB_TRACE(pb, "iowait", 0); if (atomic_read(&pb->pb_io_remaining)) blk_run_address_space(pb->pb_target->pbr_mapping); down(&pb->pb_iodonesema); PB_TRACE(pb, "iowaited", (long)pb->pb_error); return pb->pb_error;}caddr_tpagebuf_offset( xfs_buf_t *pb, size_t offset){ struct page *page; offset += pb->pb_offset; page = pb->pb_pages[offset >> PAGE_CACHE_SHIFT]; return (caddr_t) page_address(page) + (offset & (PAGE_CACHE_SIZE - 1));}/* * pagebuf_iomove * * Move data into or out of a buffer. */voidpagebuf_iomove( xfs_buf_t *pb, /* buffer to process */ size_t boff, /* starting buffer offset */ size_t bsize, /* length to copy */ caddr_t data, /* data address */ page_buf_rw_t mode) /* read/write flag */{ size_t bend, cpoff, csize; struct page *page; bend = boff + bsize; while (boff < bend) { page = pb->pb_pages[page_buf_btoct(boff + pb->pb_offset)]; cpoff = page_buf_poff(boff + pb->pb_offset); csize = min_t(size_t, PAGE_CACHE_SIZE-cpoff, pb->pb_count_desired-boff); ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); switch (mode) { case PBRW_ZERO: memset(page_address(page) + cpoff, 0, csize); break; case PBRW_READ: memcpy(data, page_address(page) + cpoff, csize); break; case PBRW_WRITE: memcpy(page_address(page) + cpoff, data, csize); } boff += csize; data += csize; }}/* * Handling of buftargs. *//* * Wait for any bufs with callbacks that have been submitted but * have not yet returned... walk the hash list for the target. */voidxfs_wait_buftarg( xfs_buftarg_t *btp){ xfs_buf_t *bp, *n; xfs_bufhash_t *hash; uint i; for (i = 0; i < (1 << btp->bt_hashshift); i++) { hash = &btp->bt_hash[i];again: spin_lock(&hash->bh_lock); list_for_each_entry_safe(bp, n, &hash->bh_list, pb_hash_list) { ASSERT(btp == bp->pb_target); if (!(bp->pb_flags & PBF_FS_MANAGED)) { spin_unlock(&hash->bh_lock); /* * Catch superblock reference count leaks * immediately */ BUG_ON(bp->pb_bn == 0); delay(100); goto again; } } spin_unlock(&hash->bh_lock); }}/* * Allocate buffer hash table for a given target. * For devices containing metadata (i.e. not the log/realtime devices) * we need to allocate a much larger hash table. */STATIC voidxfs_alloc_bufhash( xfs_buftarg_t *btp, int external){ unsigned int i; btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) * sizeof(xfs_bufhash_t), KM_SLEEP); for (i = 0; i < (1 << btp->bt_hashshift); i++) { spin_lock_init(&btp->bt_hash[i].bh_lock); INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); }}STATIC voidxfs_free_bufhash( xfs_buftarg_t *btp){ kmem_free(btp->bt_hash, (1 << btp->bt_hashshift) * sizeof(xfs_bufhash_t)); btp->bt_hash = NULL;}voidxfs_free_buftarg( xfs_buftarg_t *btp, int external){ xfs_flush_buftarg(btp, 1); if (external) xfs_blkdev_put(btp->pbr_bdev); xfs_free_bufhash(btp); iput(btp->pbr_mapping->host); kmem_free(btp, sizeof(*btp));}STATIC intxfs_setsize_buftarg_flags( xfs_buftarg_t *btp, unsigned int blocksize, unsigned int sectorsize, int verbose){ btp->pbr_bsize = blocksize; btp->pbr_sshift = ffs(sectorsize) - 1; btp->pbr_smask = sectorsize - 1; if (set_blocksize(btp->pbr_bdev, sectorsize)) { printk(KERN_WARNING "XFS: Cannot set_blocksize to %u on device %s\n", sectorsize, XFS_BUFTARG_NAME(btp)); return EINVAL; } if (verbose && (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) { printk(KERN_WARNING "XFS: %u byte sectors in use on device %s. " "This is suboptimal; %u or greater is ideal.\n", sectorsize, XFS_BUFTARG_NAME(btp), (unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG); } return 0;}/** When allocating the initial buffer target we have not yet* read in the superblock, so don't know what sized sectors* are being used is at this early stage. Play safe.*/STATIC intxfs_setsize_buftarg_early( xfs_buftarg_t *btp, struct block_device *bdev){ return xfs_setsize_buftarg_flags(btp, PAGE_CACHE_SIZE, bdev_hardsect_size(bdev), 0);}intxfs_setsize_buftarg( xfs_buftarg_t *btp, unsigned int blocksize, unsigned int sectorsize){ return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1);}STATIC intxfs_mapping_buftarg( xfs_buftarg_t *btp, struct block_device *bdev){ struct backing_dev_info *bdi; struct inode *inode; struct address_space *mapping; static struct address_space_operations mapping_aops = { .sync_page = block_sync_page, }; inode = new_inode(bdev->bd_inode->i_sb); if (!inode) { printk(KERN_WARNING "XFS: Cannot allocate mapping inode for device %s\n", XFS_BUFTARG_NAME(btp)); return ENOMEM; } inode->i_mode = S_IFBLK; inode->i_bdev = bdev; inode->i_rdev = bdev->bd_dev; bdi = blk_get_backing_dev_info(bdev); if (!bdi) bdi = &default_backing_dev_info; mapping = &inode->i_data; mapping->a_ops = &mapping_aops; mapping->backing_dev_info = bdi; mapping_set_gfp_mask(mapping, GFP_NOFS); btp->pbr_mapping = mapping; return 0;}xfs_buftarg_t *xfs_alloc_buftarg( struct block_device *bdev, int external){ xfs_buftarg_t *btp; btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); btp->pbr_dev = bdev->bd_dev; btp->pbr_bdev = bdev; if (xfs_setsize_buftarg_early(btp, bdev)) goto error; if (xfs_mapping_buftarg(btp, bdev)) goto error; xfs_alloc_bufhash(btp, external); return btp;error: kmem_free(btp, sizeof(*btp)); return NULL;}/* * Pagebuf delayed write buffer handling */STATIC LIST_HEAD(pbd_delwrite_queue);STATIC DEFINE_SPINLOCK(pbd_delwrite_lock);STATIC voidpagebuf_delwri_queue( xfs_buf_t *pb, int unlock){ PB_TRACE(pb, "delwri_q", (long)unlock); ASSERT((pb->pb_flags & (PBF_DELWRI|PBF_ASYNC)) == (PBF_DELWRI|PBF_ASYNC)); spin_lock(&pbd_delwrite_lock); /* If already in the queue, dequeue and place at tail */ if (!list_empty(&pb->pb_list)) { ASSERT(pb->pb_flags & _PBF_DELWRI_Q); if (unlock) { atomic_dec(&pb->pb_hold); } list_del(&pb->pb_list); } pb->pb_flags |= _PBF_DELWRI_Q; list_add_tail(&pb->pb_list, &pbd_delwrite_queue); pb->pb_queuetime = jiffies; spin_unlock(&pbd_delwrite_lock); if (unlock) pagebuf_unlock(pb);}voidpagebuf_delwri_dequeue( xfs_buf_t *pb){ int dequeued = 0; spin_lock(&pbd_delwrite_lock); if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) { ASSERT(pb->pb_flags & _PBF_DELWRI_Q); list_del_init(&pb->pb_list); dequeued = 1; } pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); spin_unlock(&pbd_delwrite_lock); if (dequeued) pagebuf_rele(pb); PB_TRACE(pb, "delwri_dq", (long)dequeued);}STATIC voidpagebuf_runall_queues( struct workqueue_struct *queue){ flush_workqueue(queue);}/* Defines for pagebuf daemon */STATIC struct task_struct *xfsbufd_task;STATIC int xfsbufd_force_flush;STATIC int xfsbufd_force_sleep;STATIC intxfsbufd_wakeup( int priority, gfp_t mask){ if (xfsbufd_force_sleep) return 0; xfsbufd_force_flush = 1; barrier(); wake_up_process(xfsbufd_task); return 0;}STATIC intxfsbufd( void *data){ struct list_head tmp; unsigned long age; xfs_buftarg_t *target; xfs_buf_t *pb, *n; current->flags |= PF_MEMALLOC; INIT_LIST_HEAD(&tmp); do { if (unlikely(freezing(current))) { xfsbufd_force_sleep = 1; refrigerator(); } else { xfsbufd_force_sleep = 0; } schedule_timeout_interruptible( xfs_buf_timer_centisecs * msecs_to_jiffies(10)); age = xfs_buf_age_centisecs * msecs_to_jiffies(10); spin_lock(&pbd_delwrite_lock); list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb)); ASSERT(pb->pb_flags & PBF_DELWRI); if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) { if (!xfsbufd_force_flush && time_before(jiffies, pb->pb_queuetime + age)) { pagebuf_unlock(pb); break; } pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); pb->pb_flags |= PBF_WRITE; list_move(&pb->pb_list, &tmp); } } spin_unlock(&pbd_delwrite_lock); while (!list_empty(&tmp)) { pb = list_entry(tmp.next, xfs_buf_t, pb_list); target = pb->pb_target; list_del_init(&pb->pb_list); pagebuf_iostrategy(pb); blk_run_address_space(target->pbr_mapping); } if (as_list_len > 0) purge_addresses(); xfsbufd_force_flush = 0; } while (!kthread_should_stop()); return 0;}/* * Go through all incore buffers, and release buffers if they belong to * the given device. This is used in filesystem error handling to * preserve the consistency of its metadata. */intxfs_flush_buftarg( xfs_buftarg_t *target, int wait){ struct list_head tmp; xfs_buf_t *pb, *n; int pincount = 0; pagebuf_runall_queues(xfsdatad_workqueue); pagebuf_runall_queues(xfslogd_workqueue); INIT_LIST_HEAD(&tmp); spin_lock(&pbd_delwrite_lock); list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { if (pb->pb_target != target) continue; ASSERT(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)); PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb)); if (pagebuf_ispin(pb)) { pincount++; continue; } list_move(&pb->pb_list, &tmp); } spin_unlock(&pbd_delwrite_lock); /* * Dropped the delayed write list lock, now walk the temporary list */ list_for_each_entry_safe(pb, n, &tmp, pb_list) { pagebuf_lock(pb); pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); pb->pb_flags |= PBF_WRITE; if (wait) pb->pb_flags &= ~PBF_ASYNC; else list_del_init(&pb->pb_list); pagebuf_iostrategy(pb); } /* * Remaining list items must be flushed before returning */ while (!list_empty(&tmp)) { pb = list_entry(tmp.next, xfs_buf_t, pb_list); list_del_init(&pb->pb_list); xfs_iowait(pb); xfs_buf_relse(pb); } if (wait) blk_run_address_space(target->pbr_mapping); return pincount;}int __initpagebuf_init(void){ int error = -ENOMEM;#ifdef PAGEBUF_TRACE pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP);#endif pagebuf_zone = kmem_zone_init(sizeof(xfs_buf_t), "xfs_buf"); if (!pagebuf_zone) goto out_free_trace_buf; xfslogd_workqueue = create_workqueue("xfslogd"); if (!xfslogd_workqueue) goto out_free_buf_zone; xfsdatad_workqueue = create_workqueue("xfsdatad"); if (!xfsdatad_workqueue) goto out_destroy_xfslogd_workqueue; xfsbufd_task = kthread_run(xfsbufd, NULL, "xfsbufd"); if (IS_ERR(xfsbufd_task)) { error = PTR_ERR(xfsbufd_task); goto out_destroy_xfsdatad_workqueue; } pagebuf_shake = kmem_shake_register(xfsbufd_wakeup); if (!pagebuf_shake) goto out_stop_xfsbufd; return 0; out_stop_xfsbufd: kthread_stop(xfsbufd_task); out_destroy_xfsdatad_workqueue: destroy_workqueue(xfsdatad_workqueue); out_destroy_xfslogd_workqueue: destroy_workqueue(xfslogd_workqueue); out_free_buf_zone: kmem_zone_destroy(pagebuf_zone); out_free_trace_buf:#ifdef PAGEBUF_TRACE ktrace_free(pagebuf_trace_buf);#endif return error;}voidpagebuf_terminate(void){ kmem_shake_deregister(pagebuf_shake); kthread_stop(xfsbufd_task); destroy_workqueue(xfsdatad_workqueue); destroy_workqueue(xfslogd_workqueue); kmem_zone_destroy(pagebuf_zone);#ifdef PAGEBUF_TRACE ktrace_free(pagebuf_trace_buf);#endif}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -