xfs_buf.c
来自「优龙2410linux2.6.8内核源代码」· C语言 代码 · 共 1,839 行 · 第 1/3 页
C
1,839 行
int error){ xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private; unsigned int i, blocksize = pb->pb_target->pbr_bsize; unsigned int sectorshift = pb->pb_target->pbr_sshift; struct bio_vec *bvec = bio->bi_io_vec; if (bio->bi_size) return 1; if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) pb->pb_error = EIO; for (i = 0; i < bio->bi_vcnt; i++, bvec++) { struct page *page = bvec->bv_page; if (pb->pb_error) { SetPageError(page); } else if (blocksize == PAGE_CACHE_SIZE) { SetPageUptodate(page); } else if (!PagePrivate(page) && (pb->pb_flags & _PBF_PAGE_CACHE)) { unsigned long j, range; ASSERT(blocksize < PAGE_CACHE_SIZE); range = (bvec->bv_offset + bvec->bv_len) >> sectorshift; for (j = bvec->bv_offset >> sectorshift; j < range; j++) set_bit(j, &page->private); if (page->private == (unsigned long)(PAGE_CACHE_SIZE-1)) SetPageUptodate(page); } if (_pagebuf_iolocked(pb)) { unlock_page(page); } } _pagebuf_iodone(pb, 1); bio_put(bio); return 0;}void_pagebuf_ioapply( xfs_buf_t *pb){ int i, map_i, total_nr_pages, nr_pages; struct bio *bio; int offset = pb->pb_offset; int size = pb->pb_count_desired; sector_t sector = pb->pb_bn; unsigned int blocksize = pb->pb_target->pbr_bsize; int locking = _pagebuf_iolocked(pb); total_nr_pages = pb->pb_page_count; map_i = 0; /* Special code path for reading a sub page size pagebuf in -- * we populate up the whole page, and hence the other metadata * in the same page. This optimization is only valid when the * filesystem block size and the page size are equal. */ if ((pb->pb_buffer_length < PAGE_CACHE_SIZE) && (pb->pb_flags & PBF_READ) && locking && (blocksize == PAGE_CACHE_SIZE)) { bio = bio_alloc(GFP_NOIO, 1); bio->bi_bdev = pb->pb_target->pbr_bdev; bio->bi_sector = sector - (offset >> BBSHIFT); bio->bi_end_io = bio_end_io_pagebuf; bio->bi_private = pb; bio_add_page(bio, pb->pb_pages[0], PAGE_CACHE_SIZE, 0); size = 0; atomic_inc(&pb->pb_io_remaining); goto submit_io; } /* Lock down the pages which we need to for the request */ if (locking && (pb->pb_flags & PBF_WRITE) && (pb->pb_locked == 0)) { for (i = 0; size; i++) { int nbytes = PAGE_CACHE_SIZE - offset; struct page *page = pb->pb_pages[i]; if (nbytes > size) nbytes = size; lock_page(page); size -= nbytes; offset = 0; } offset = pb->pb_offset; size = pb->pb_count_desired; }next_chunk: atomic_inc(&pb->pb_io_remaining); nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); if (nr_pages > total_nr_pages) nr_pages = total_nr_pages; bio = bio_alloc(GFP_NOIO, nr_pages); bio->bi_bdev = pb->pb_target->pbr_bdev; bio->bi_sector = sector; bio->bi_end_io = bio_end_io_pagebuf; bio->bi_private = pb; for (; size && nr_pages; nr_pages--, map_i++) { int nbytes = PAGE_CACHE_SIZE - offset; if (nbytes > size) nbytes = size; if (bio_add_page(bio, pb->pb_pages[map_i], nbytes, offset) < nbytes) break; offset = 0; sector += nbytes >> BBSHIFT; size -= nbytes; total_nr_pages--; }submit_io: if (likely(bio->bi_size)) { submit_bio((pb->pb_flags & PBF_READ) ? READ : WRITE, bio); if (size) goto next_chunk; } else { bio_put(bio); pagebuf_ioerror(pb, EIO); } if (pb->pb_flags & _PBF_RUN_QUEUES) { pb->pb_flags &= ~_PBF_RUN_QUEUES; if (atomic_read(&pb->pb_io_remaining) > 1) blk_run_address_space(pb->pb_target->pbr_mapping); }}/* * pagebuf_iorequest -- the core I/O request routine. */intpagebuf_iorequest( /* start real I/O */ xfs_buf_t *pb) /* buffer to convey to device */{ PB_TRACE(pb, "iorequest", 0); if (pb->pb_flags & PBF_DELWRI) { pagebuf_delwri_queue(pb, 1); return 0; } if (pb->pb_flags & PBF_WRITE) { _pagebuf_wait_unpin(pb); } pagebuf_hold(pb); /* Set the count to 1 initially, this will stop an I/O * completion callout which happens before we have started * all the I/O from calling pagebuf_iodone too early. */ atomic_set(&pb->pb_io_remaining, 1); _pagebuf_ioapply(pb); _pagebuf_iodone(pb, 0); pagebuf_rele(pb); return 0;}/* * pagebuf_iowait * * pagebuf_iowait waits for I/O to complete on the buffer supplied. * It returns immediately if no I/O is pending. In any case, it returns * the error code, if any, or 0 if there is no error. */intpagebuf_iowait( xfs_buf_t *pb){ PB_TRACE(pb, "iowait", 0); if (atomic_read(&pb->pb_io_remaining)) blk_run_address_space(pb->pb_target->pbr_mapping); down(&pb->pb_iodonesema); PB_TRACE(pb, "iowaited", (long)pb->pb_error); return pb->pb_error;}caddr_tpagebuf_offset( xfs_buf_t *pb, size_t offset){ struct page *page; offset += pb->pb_offset; page = pb->pb_pages[offset >> PAGE_CACHE_SHIFT]; return (caddr_t) page_address(page) + (offset & (PAGE_CACHE_SIZE - 1));}/* * pagebuf_iomove * * Move data into or out of a buffer. */voidpagebuf_iomove( xfs_buf_t *pb, /* buffer to process */ size_t boff, /* starting buffer offset */ size_t bsize, /* length to copy */ caddr_t data, /* data address */ page_buf_rw_t mode) /* read/write flag */{ size_t bend, cpoff, csize; struct page *page; bend = boff + bsize; while (boff < bend) { page = pb->pb_pages[page_buf_btoct(boff + pb->pb_offset)]; cpoff = page_buf_poff(boff + pb->pb_offset); csize = min_t(size_t, PAGE_CACHE_SIZE-cpoff, pb->pb_count_desired-boff); ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); switch (mode) { case PBRW_ZERO: memset(page_address(page) + cpoff, 0, csize); break; case PBRW_READ: memcpy(data, page_address(page) + cpoff, csize); break; case PBRW_WRITE: memcpy(page_address(page) + cpoff, data, csize); } boff += csize; data += csize; }}/* * Handling of buftargs. */voidxfs_free_buftarg( xfs_buftarg_t *btp, int external){ xfs_flush_buftarg(btp, 1); if (external) xfs_blkdev_put(btp->pbr_bdev); kmem_free(btp, sizeof(*btp));}voidxfs_incore_relse( xfs_buftarg_t *btp, int delwri_only, int wait){ invalidate_bdev(btp->pbr_bdev, 1); truncate_inode_pages(btp->pbr_mapping, 0LL);}voidxfs_setsize_buftarg( xfs_buftarg_t *btp, unsigned int blocksize, unsigned int sectorsize){ btp->pbr_bsize = blocksize; btp->pbr_sshift = ffs(sectorsize) - 1; btp->pbr_smask = sectorsize - 1; if (set_blocksize(btp->pbr_bdev, sectorsize)) { printk(KERN_WARNING "XFS: Cannot set_blocksize to %u on device %s\n", sectorsize, XFS_BUFTARG_NAME(btp)); }}xfs_buftarg_t *xfs_alloc_buftarg( struct block_device *bdev){ xfs_buftarg_t *btp; btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); btp->pbr_dev = bdev->bd_dev; btp->pbr_bdev = bdev; btp->pbr_mapping = bdev->bd_inode->i_mapping; xfs_setsize_buftarg(btp, PAGE_CACHE_SIZE, bdev_hardsect_size(bdev)); return btp;}/* * Pagebuf delayed write buffer handling */STATIC LIST_HEAD(pbd_delwrite_queue);STATIC spinlock_t pbd_delwrite_lock = SPIN_LOCK_UNLOCKED;STATIC voidpagebuf_delwri_queue( xfs_buf_t *pb, int unlock){ PB_TRACE(pb, "delwri_q", (long)unlock); ASSERT(pb->pb_flags & PBF_DELWRI); spin_lock(&pbd_delwrite_lock); /* If already in the queue, dequeue and place at tail */ if (!list_empty(&pb->pb_list)) { if (unlock) { atomic_dec(&pb->pb_hold); } list_del(&pb->pb_list); } list_add_tail(&pb->pb_list, &pbd_delwrite_queue); pb->pb_queuetime = jiffies; spin_unlock(&pbd_delwrite_lock); if (unlock) pagebuf_unlock(pb);}voidpagebuf_delwri_dequeue( xfs_buf_t *pb){ int dequeued = 0; spin_lock(&pbd_delwrite_lock); if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) { list_del_init(&pb->pb_list); dequeued = 1; } pb->pb_flags &= ~PBF_DELWRI; spin_unlock(&pbd_delwrite_lock); if (dequeued) pagebuf_rele(pb); PB_TRACE(pb, "delwri_dq", (long)dequeued);}STATIC voidpagebuf_runall_queues( struct workqueue_struct *queue){ flush_workqueue(queue);}/* Defines for pagebuf daemon */STATIC DECLARE_COMPLETION(pagebuf_daemon_done);STATIC struct task_struct *pagebuf_daemon_task;STATIC int pagebuf_daemon_active;STATIC int force_flush;STATIC intpagebuf_daemon_wakeup( int priority, unsigned int mask){ force_flush = 1; barrier(); wake_up_process(pagebuf_daemon_task); return 0;}STATIC intpagebuf_daemon( void *data){ struct list_head tmp; unsigned long age; xfs_buftarg_t *target; xfs_buf_t *pb, *n; /* Set up the thread */ daemonize("xfsbufd"); current->flags |= PF_MEMALLOC; pagebuf_daemon_task = current; pagebuf_daemon_active = 1; barrier(); INIT_LIST_HEAD(&tmp); do { /* swsusp */ if (current->flags & PF_FREEZE) refrigerator(PF_FREEZE); set_current_state(TASK_INTERRUPTIBLE); schedule_timeout((xfs_buf_timer_centisecs * HZ) / 100); age = (xfs_buf_age_centisecs * HZ) / 100; spin_lock(&pbd_delwrite_lock); list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb)); ASSERT(pb->pb_flags & PBF_DELWRI); if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) { if (!force_flush && time_before(jiffies, pb->pb_queuetime + age)) { pagebuf_unlock(pb); break; } pb->pb_flags &= ~PBF_DELWRI; pb->pb_flags |= PBF_WRITE; list_move(&pb->pb_list, &tmp); } } spin_unlock(&pbd_delwrite_lock); while (!list_empty(&tmp)) { pb = list_entry(tmp.next, xfs_buf_t, pb_list); target = pb->pb_target; list_del_init(&pb->pb_list); pagebuf_iostrategy(pb); blk_run_address_space(target->pbr_mapping); } if (as_list_len > 0) purge_addresses(); force_flush = 0; } while (pagebuf_daemon_active); complete_and_exit(&pagebuf_daemon_done, 0);}/* * Go through all incore buffers, and release buffers if they belong to * the given device. This is used in filesystem error handling to * preserve the consistency of its metadata. */intxfs_flush_buftarg( xfs_buftarg_t *target, int wait){ struct list_head tmp; xfs_buf_t *pb, *n; int pincount = 0; pagebuf_runall_queues(pagebuf_dataio_workqueue); pagebuf_runall_queues(pagebuf_logio_workqueue); INIT_LIST_HEAD(&tmp); spin_lock(&pbd_delwrite_lock); list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { if (pb->pb_target != target) continue; ASSERT(pb->pb_flags & PBF_DELWRI); PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb)); if (pagebuf_ispin(pb)) { pincount++; continue; } pb->pb_flags &= ~PBF_DELWRI; pb->pb_flags |= PBF_WRITE; list_move(&pb->pb_list, &tmp); } spin_unlock(&pbd_delwrite_lock); /* * Dropped the delayed write list lock, now walk the temporary list */ list_for_each_entry_safe(pb, n, &tmp, pb_list) { if (wait) pb->pb_flags &= ~PBF_ASYNC; else list_del_init(&pb->pb_list); pagebuf_lock(pb); pagebuf_iostrategy(pb); } /* * Remaining list items must be flushed before returning */ while (!list_empty(&tmp)) { pb = list_entry(tmp.next, xfs_buf_t, pb_list); list_del_init(&pb->pb_list); xfs_iowait(pb); xfs_buf_relse(pb); } if (wait) blk_run_address_space(target->pbr_mapping); return pincount;}STATIC intpagebuf_daemon_start(void){ int rval; pagebuf_logio_workqueue = create_workqueue("xfslogd"); if (!pagebuf_logio_workqueue) return -ENOMEM; pagebuf_dataio_workqueue = create_workqueue("xfsdatad"); if (!pagebuf_dataio_workqueue) { destroy_workqueue(pagebuf_logio_workqueue); return -ENOMEM; } rval = kernel_thread(pagebuf_daemon, NULL, CLONE_FS|CLONE_FILES); if (rval < 0) { destroy_workqueue(pagebuf_logio_workqueue); destroy_workqueue(pagebuf_dataio_workqueue); } return rval;}/* * pagebuf_daemon_stop * * Note: do not mark as __exit, it is called from pagebuf_terminate. */STATIC voidpagebuf_daemon_stop(void){ pagebuf_daemon_active = 0; barrier(); wait_for_completion(&pagebuf_daemon_done); destroy_workqueue(pagebuf_logio_workqueue); destroy_workqueue(pagebuf_dataio_workqueue);}/* * Initialization and Termination */int __initpagebuf_init(void){ int i; pagebuf_cache = kmem_cache_create("xfs_buf_t", sizeof(xfs_buf_t), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (pagebuf_cache == NULL) { printk("XFS: couldn't init xfs_buf_t cache\n"); pagebuf_terminate(); return -ENOMEM; }#ifdef PAGEBUF_TRACE pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP);#endif pagebuf_daemon_start(); pagebuf_shake = kmem_shake_register(pagebuf_daemon_wakeup); if (pagebuf_shake == NULL) { pagebuf_terminate(); return -ENOMEM; } for (i = 0; i < NHASH; i++) { spin_lock_init(&pbhash[i].pb_hash_lock); INIT_LIST_HEAD(&pbhash[i].pb_hash); } return 0;}/* * pagebuf_terminate. * * Note: do not mark as __exit, this is also called from the __init code. */voidpagebuf_terminate(void){ pagebuf_daemon_stop();#ifdef PAGEBUF_TRACE ktrace_free(pagebuf_trace_buf);#endif kmem_zone_destroy(pagebuf_cache); kmem_shake_deregister(pagebuf_shake);}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?