📄 ll_rw_blk.c
字号:
/* * linux/drivers/block/ll_rw_blk.c * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 1994, Karl Keyte: Added support for disk statistics * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de> * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000 * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001 *//* * This handles all read/write requests to block devices */#include <linux/config.h>#include <linux/kernel.h>#include <linux/module.h>#include <linux/backing-dev.h>#include <linux/bio.h>#include <linux/blkdev.h>#include <linux/highmem.h>#include <linux/mm.h>#include <linux/kernel_stat.h>#include <linux/string.h>#include <linux/init.h>#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */#include <linux/completion.h>#include <linux/slab.h>#include <linux/swap.h>#include <linux/writeback.h>#include <linux/blkdev.h>/* * for max sense size */#include <scsi/scsi_cmnd.h>static void blk_unplug_work(void *data);static void blk_unplug_timeout(unsigned long data);static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);/* * For the allocated request tables */static kmem_cache_t *request_cachep;/* * For queue allocation */static kmem_cache_t *requestq_cachep;/* * For io context allocations */static kmem_cache_t *iocontext_cachep;static wait_queue_head_t congestion_wqh[2] = { __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) };/* * Controlling structure to kblockd */static struct workqueue_struct *kblockd_workqueue; unsigned long blk_max_low_pfn, blk_max_pfn;EXPORT_SYMBOL(blk_max_low_pfn);EXPORT_SYMBOL(blk_max_pfn);/* Amount of time in which a process may batch requests */#define BLK_BATCH_TIME (HZ/50UL)/* Number of requests a "batching" process may submit */#define BLK_BATCH_REQ 32/* * Return the threshold (number of used requests) at which the queue is * considered to be congested. It include a little hysteresis to keep the * context switch rate down. */static inline int queue_congestion_on_threshold(struct request_queue *q){ return q->nr_congestion_on;}/* * The threshold at which a queue is considered to be uncongested */static inline int queue_congestion_off_threshold(struct request_queue *q){ return q->nr_congestion_off;}static void blk_queue_congestion_threshold(struct request_queue *q){ int nr; nr = q->nr_requests - (q->nr_requests / 8) + 1; if (nr > q->nr_requests) nr = q->nr_requests; q->nr_congestion_on = nr; nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1; if (nr < 1) nr = 1; q->nr_congestion_off = nr;}/* * A queue has just exitted congestion. Note this in the global counter of * congested queues, and wake up anyone who was waiting for requests to be * put back. */static void clear_queue_congested(request_queue_t *q, int rw){ enum bdi_state bit; wait_queue_head_t *wqh = &congestion_wqh[rw]; bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; clear_bit(bit, &q->backing_dev_info.state); smp_mb__after_clear_bit(); if (waitqueue_active(wqh)) wake_up(wqh);}/* * A queue has just entered congestion. Flag that in the queue's VM-visible * state flags and increment the global gounter of congested queues. */static void set_queue_congested(request_queue_t *q, int rw){ enum bdi_state bit; bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; set_bit(bit, &q->backing_dev_info.state);}/** * blk_get_backing_dev_info - get the address of a queue's backing_dev_info * @bdev: device * * Locates the passed device's request queue and returns the address of its * backing_dev_info * * Will return NULL if the request queue cannot be located. */struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev){ struct backing_dev_info *ret = NULL; request_queue_t *q = bdev_get_queue(bdev); if (q) ret = &q->backing_dev_info; return ret;}EXPORT_SYMBOL(blk_get_backing_dev_info);void blk_queue_activity_fn(request_queue_t *q, activity_fn *fn, void *data){ q->activity_fn = fn; q->activity_data = data;}EXPORT_SYMBOL(blk_queue_activity_fn);/** * blk_queue_prep_rq - set a prepare_request function for queue * @q: queue * @pfn: prepare_request function * * It's possible for a queue to register a prepare_request callback which * is invoked before the request is handed to the request_fn. The goal of * the function is to prepare a request for I/O, it can be used to build a * cdb from the request data for instance. * */void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn){ q->prep_rq_fn = pfn;}EXPORT_SYMBOL(blk_queue_prep_rq);/** * blk_queue_merge_bvec - set a merge_bvec function for queue * @q: queue * @mbfn: merge_bvec_fn * * Usually queues have static limitations on the max sectors or segments that * we can put in a request. Stacking drivers may have some settings that * are dynamic, and thus we have to query the queue whether it is ok to * add a new bio_vec to a bio at a given offset or not. If the block device * has such limitations, it needs to register a merge_bvec_fn to control * the size of bio's sent to it. Note that a block device *must* allow a * single page to be added to an empty bio. The block device driver may want * to use the bio_split() function to deal with these bio's. By default * no merge_bvec_fn is defined for a queue, and only the fixed limits are * honored. */void blk_queue_merge_bvec(request_queue_t *q, merge_bvec_fn *mbfn){ q->merge_bvec_fn = mbfn;}EXPORT_SYMBOL(blk_queue_merge_bvec);/** * blk_queue_make_request - define an alternate make_request function for a device * @q: the request queue for the device to be affected * @mfn: the alternate make_request function * * Description: * The normal way for &struct bios to be passed to a device * driver is for them to be collected into requests on a request * queue, and then to allow the device driver to select requests * off that queue when it is ready. This works well for many block * devices. However some block devices (typically virtual devices * such as md or lvm) do not benefit from the processing on the * request queue, and are served best by having the requests passed * directly to them. This can be achieved by providing a function * to blk_queue_make_request(). * * Caveat: * The driver that does this *must* be able to deal appropriately * with buffers in "highmemory". This can be accomplished by either calling * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling * blk_queue_bounce() to create a buffer in normal memory. **/void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn){ /* * set defaults */ q->nr_requests = BLKDEV_MAX_RQ; blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); q->make_request_fn = mfn; q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; q->backing_dev_info.state = 0; q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; blk_queue_max_sectors(q, MAX_SECTORS); blk_queue_hardsect_size(q, 512); blk_queue_dma_alignment(q, 511); blk_queue_congestion_threshold(q); q->nr_batching = BLK_BATCH_REQ; q->unplug_thresh = 4; /* hmm */ q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */ if (q->unplug_delay == 0) q->unplug_delay = 1; INIT_WORK(&q->unplug_work, blk_unplug_work, q); q->unplug_timer.function = blk_unplug_timeout; q->unplug_timer.data = (unsigned long)q; /* * by default assume old behaviour and bounce for any highmem page */ blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); blk_queue_activity_fn(q, NULL, NULL); INIT_LIST_HEAD(&q->drain_list);}EXPORT_SYMBOL(blk_queue_make_request);static inline void rq_init(request_queue_t *q, struct request *rq){ INIT_LIST_HEAD(&rq->queuelist); rq->errors = 0; rq->rq_status = RQ_ACTIVE; rq->bio = rq->biotail = NULL; rq->ioprio = 0; rq->buffer = NULL; rq->ref_count = 1; rq->q = q; rq->waiting = NULL; rq->special = NULL; rq->data_len = 0; rq->data = NULL; rq->nr_phys_segments = 0; rq->sense = NULL; rq->end_io = NULL; rq->end_io_data = NULL;}/** * blk_queue_ordered - does this queue support ordered writes * @q: the request queue * @flag: see below * * Description: * For journalled file systems, doing ordered writes on a commit * block instead of explicitly doing wait_on_buffer (which is bad * for performance) can be a big win. Block drivers supporting this * feature should call this function and indicate so. * **/void blk_queue_ordered(request_queue_t *q, int flag){ switch (flag) { case QUEUE_ORDERED_NONE: if (q->flush_rq) kmem_cache_free(request_cachep, q->flush_rq); q->flush_rq = NULL; q->ordered = flag; break; case QUEUE_ORDERED_TAG: q->ordered = flag; break; case QUEUE_ORDERED_FLUSH: q->ordered = flag; if (!q->flush_rq) q->flush_rq = kmem_cache_alloc(request_cachep, GFP_KERNEL); break; default: printk("blk_queue_ordered: bad value %d\n", flag); break; }}EXPORT_SYMBOL(blk_queue_ordered);/** * blk_queue_issue_flush_fn - set function for issuing a flush * @q: the request queue * @iff: the function to be called issuing the flush * * Description: * If a driver supports issuing a flush command, the support is notified * to the block layer by defining it through this call. * **/void blk_queue_issue_flush_fn(request_queue_t *q, issue_flush_fn *iff){ q->issue_flush_fn = iff;}EXPORT_SYMBOL(blk_queue_issue_flush_fn);/* * Cache flushing for ordered writes handling */static void blk_pre_flush_end_io(struct request *flush_rq){ struct request *rq = flush_rq->end_io_data; request_queue_t *q = rq->q; rq->flags |= REQ_BAR_PREFLUSH; if (!flush_rq->errors) elv_requeue_request(q, rq); else { q->end_flush_fn(q, flush_rq); clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); q->request_fn(q); }}static void blk_post_flush_end_io(struct request *flush_rq){ struct request *rq = flush_rq->end_io_data; request_queue_t *q = rq->q; rq->flags |= REQ_BAR_POSTFLUSH; q->end_flush_fn(q, flush_rq); clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); q->request_fn(q);}struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq){ struct request *flush_rq = q->flush_rq; BUG_ON(!blk_barrier_rq(rq)); if (test_and_set_bit(QUEUE_FLAG_FLUSH, &q->queue_flags)) return NULL; rq_init(q, flush_rq); flush_rq->elevator_private = NULL; flush_rq->flags = REQ_BAR_FLUSH; flush_rq->rq_disk = rq->rq_disk; flush_rq->rl = NULL; /* * prepare_flush returns 0 if no flush is needed, just mark both * pre and post flush as done in that case */ if (!q->prepare_flush_fn(q, flush_rq)) { rq->flags |= REQ_BAR_PREFLUSH | REQ_BAR_POSTFLUSH; clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); return rq; } /* * some drivers dequeue requests right away, some only after io * completion. make sure the request is dequeued. */ if (!list_empty(&rq->queuelist)) blkdev_dequeue_request(rq); elv_deactivate_request(q, rq); flush_rq->end_io_data = rq; flush_rq->end_io = blk_pre_flush_end_io; __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0); return flush_rq;}static void blk_start_post_flush(request_queue_t *q, struct request *rq){ struct request *flush_rq = q->flush_rq; BUG_ON(!blk_barrier_rq(rq)); rq_init(q, flush_rq); flush_rq->elevator_private = NULL; flush_rq->flags = REQ_BAR_FLUSH; flush_rq->rq_disk = rq->rq_disk; flush_rq->rl = NULL; if (q->prepare_flush_fn(q, flush_rq)) { flush_rq->end_io_data = rq; flush_rq->end_io = blk_post_flush_end_io; __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0); q->request_fn(q); }}static inline int blk_check_end_barrier(request_queue_t *q, struct request *rq, int sectors){ if (sectors > rq->nr_sectors) sectors = rq->nr_sectors; rq->nr_sectors -= sectors; return rq->nr_sectors;}static int __blk_complete_barrier_rq(request_queue_t *q, struct request *rq, int sectors, int queue_locked){ if (q->ordered != QUEUE_ORDERED_FLUSH) return 0; if (!blk_fs_request(rq) || !blk_barrier_rq(rq)) return 0; if (blk_barrier_postflush(rq)) return 0; if (!blk_check_end_barrier(q, rq, sectors)) { unsigned long flags = 0; if (!queue_locked) spin_lock_irqsave(q->queue_lock, flags); blk_start_post_flush(q, rq); if (!queue_locked) spin_unlock_irqrestore(q->queue_lock, flags); } return 1;}/** * blk_complete_barrier_rq - complete possible barrier request * @q: the request queue for the device * @rq: the request * @sectors: number of sectors to complete * * Description: * Used in driver end_io handling to determine whether to postpone * completion of a barrier request until a post flush has been done. This * is the unlocked variant, used if the caller doesn't already hold the * queue lock. **/int blk_complete_barrier_rq(request_queue_t *q, struct request *rq, int sectors){ return __blk_complete_barrier_rq(q, rq, sectors, 0);}EXPORT_SYMBOL(blk_complete_barrier_rq);/** * blk_complete_barrier_rq_locked - complete possible barrier request * @q: the request queue for the device * @rq: the request
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -