📄 cfq-iosched.c
字号:
/* * linux/drivers/block/cfq-iosched.c * * CFQ, or complete fairness queueing, disk scheduler. * * Based on ideas from a previously unfinished io * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli. * * Copyright (C) 2003 Jens Axboe <axboe@suse.de> */#include <linux/kernel.h>#include <linux/fs.h>#include <linux/blkdev.h>#include <linux/elevator.h>#include <linux/bio.h>#include <linux/config.h>#include <linux/module.h>#include <linux/slab.h>#include <linux/init.h>#include <linux/compiler.h>#include <linux/hash.h>#include <linux/rbtree.h>#include <linux/mempool.h>#include <linux/ioprio.h>#include <linux/writeback.h>/* * tunables */static int cfq_quantum = 4; /* max queue in one round of service */static int cfq_queued = 8; /* minimum rq allocate limit per-queue*/static int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };static int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */static int cfq_back_penalty = 2; /* penalty of a backwards seek */static int cfq_slice_sync = HZ / 10;static int cfq_slice_async = HZ / 25;static int cfq_slice_async_rq = 2;static int cfq_slice_idle = HZ / 100;#define CFQ_IDLE_GRACE (HZ / 10)#define CFQ_SLICE_SCALE (5)#define CFQ_KEY_ASYNC (0)#define CFQ_KEY_ANY (0xffff)/* * disable queueing at the driver/hardware level */static int cfq_max_depth = 2;/* * for the hash of cfqq inside the cfqd */#define CFQ_QHASH_SHIFT 6#define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT)#define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash)/* * for the hash of crq inside the cfqq */#define CFQ_MHASH_SHIFT 6#define CFQ_MHASH_BLOCK(sec) ((sec) >> 3)#define CFQ_MHASH_ENTRIES (1 << CFQ_MHASH_SHIFT)#define CFQ_MHASH_FN(sec) hash_long(CFQ_MHASH_BLOCK(sec), CFQ_MHASH_SHIFT)#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)#define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash)#define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list)#define list_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)#define RQ_DATA(rq) (rq)->elevator_private/* * rb-tree defines */#define RB_NONE (2)#define RB_EMPTY(node) ((node)->rb_node == NULL)#define RB_CLEAR_COLOR(node) (node)->rb_color = RB_NONE#define RB_CLEAR(node) do { \ (node)->rb_parent = NULL; \ RB_CLEAR_COLOR((node)); \ (node)->rb_right = NULL; \ (node)->rb_left = NULL; \} while (0)#define RB_CLEAR_ROOT(root) ((root)->rb_node = NULL)#define ON_RB(node) ((node)->rb_color != RB_NONE)#define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node)#define rq_rb_key(rq) (rq)->sectorstatic kmem_cache_t *crq_pool;static kmem_cache_t *cfq_pool;static kmem_cache_t *cfq_ioc_pool;#define CFQ_PRIO_LISTS IOPRIO_BE_NR#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)#define cfq_class_be(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_BE)#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)#define ASYNC (0)#define SYNC (1)#define cfq_cfqq_dispatched(cfqq) \ ((cfqq)->on_dispatch[ASYNC] + (cfqq)->on_dispatch[SYNC])#define cfq_cfqq_class_sync(cfqq) ((cfqq)->key != CFQ_KEY_ASYNC)#define cfq_cfqq_sync(cfqq) \ (cfq_cfqq_class_sync(cfqq) || (cfqq)->on_dispatch[SYNC])/* * Per block device queue structure */struct cfq_data { atomic_t ref; request_queue_t *queue; /* * rr list of queues with requests and the count of them */ struct list_head rr_list[CFQ_PRIO_LISTS]; struct list_head busy_rr; struct list_head cur_rr; struct list_head idle_rr; unsigned int busy_queues; /* * non-ordered list of empty cfqq's */ struct list_head empty_list; /* * cfqq lookup hash */ struct hlist_head *cfq_hash; /* * global crq hash for all queues */ struct hlist_head *crq_hash; unsigned int max_queued; mempool_t *crq_pool; int rq_in_driver; /* * schedule slice state info */ /* * idle window management */ struct timer_list idle_slice_timer; struct work_struct unplug_work; struct cfq_queue *active_queue; struct cfq_io_context *active_cic; int cur_prio, cur_end_prio; unsigned int dispatch_slice; struct timer_list idle_class_timer; sector_t last_sector; unsigned long last_end_request; unsigned int rq_starved; /* * tunables, see top of file */ unsigned int cfq_quantum; unsigned int cfq_queued; unsigned int cfq_fifo_expire[2]; unsigned int cfq_back_penalty; unsigned int cfq_back_max; unsigned int cfq_slice[2]; unsigned int cfq_slice_async_rq; unsigned int cfq_slice_idle; unsigned int cfq_max_depth;};/* * Per process-grouping structure */struct cfq_queue { /* reference count */ atomic_t ref; /* parent cfq_data */ struct cfq_data *cfqd; /* cfqq lookup hash */ struct hlist_node cfq_hash; /* hash key */ unsigned int key; /* on either rr or empty list of cfqd */ struct list_head cfq_list; /* sorted list of pending requests */ struct rb_root sort_list; /* if fifo isn't expired, next request to serve */ struct cfq_rq *next_crq; /* requests queued in sort_list */ int queued[2]; /* currently allocated requests */ int allocated[2]; /* fifo list of requests in sort_list */ struct list_head fifo; unsigned long slice_start; unsigned long slice_end; unsigned long slice_left; unsigned long service_last; /* number of requests that are on the dispatch list */ int on_dispatch[2]; /* io prio of this group */ unsigned short ioprio, org_ioprio; unsigned short ioprio_class, org_ioprio_class; /* various state flags, see below */ unsigned int flags;};struct cfq_rq { struct rb_node rb_node; sector_t rb_key; struct request *request; struct hlist_node hash; struct cfq_queue *cfq_queue; struct cfq_io_context *io_context; unsigned int crq_flags;};enum cfqq_state_flags { CFQ_CFQQ_FLAG_on_rr = 0, CFQ_CFQQ_FLAG_wait_request, CFQ_CFQQ_FLAG_must_alloc, CFQ_CFQQ_FLAG_must_alloc_slice, CFQ_CFQQ_FLAG_must_dispatch, CFQ_CFQQ_FLAG_fifo_expire, CFQ_CFQQ_FLAG_idle_window, CFQ_CFQQ_FLAG_prio_changed, CFQ_CFQQ_FLAG_expired,};#define CFQ_CFQQ_FNS(name) \static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \{ \ cfqq->flags |= (1 << CFQ_CFQQ_FLAG_##name); \} \static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \{ \ cfqq->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \} \static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \{ \ return (cfqq->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \}CFQ_CFQQ_FNS(on_rr);CFQ_CFQQ_FNS(wait_request);CFQ_CFQQ_FNS(must_alloc);CFQ_CFQQ_FNS(must_alloc_slice);CFQ_CFQQ_FNS(must_dispatch);CFQ_CFQQ_FNS(fifo_expire);CFQ_CFQQ_FNS(idle_window);CFQ_CFQQ_FNS(prio_changed);CFQ_CFQQ_FNS(expired);#undef CFQ_CFQQ_FNSenum cfq_rq_state_flags { CFQ_CRQ_FLAG_in_flight = 0, CFQ_CRQ_FLAG_in_driver, CFQ_CRQ_FLAG_is_sync, CFQ_CRQ_FLAG_requeued,};#define CFQ_CRQ_FNS(name) \static inline void cfq_mark_crq_##name(struct cfq_rq *crq) \{ \ crq->crq_flags |= (1 << CFQ_CRQ_FLAG_##name); \} \static inline void cfq_clear_crq_##name(struct cfq_rq *crq) \{ \ crq->crq_flags &= ~(1 << CFQ_CRQ_FLAG_##name); \} \static inline int cfq_crq_##name(const struct cfq_rq *crq) \{ \ return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0; \}CFQ_CRQ_FNS(in_flight);CFQ_CRQ_FNS(in_driver);CFQ_CRQ_FNS(is_sync);CFQ_CRQ_FNS(requeued);#undef CFQ_CRQ_FNSstatic struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short);static void cfq_dispatch_sort(request_queue_t *, struct cfq_rq *);static void cfq_put_cfqd(struct cfq_data *cfqd);#define process_sync(tsk) ((tsk)->flags & PF_SYNCWRITE)/* * lots of deadline iosched dupes, can be abstracted later... */static inline void cfq_del_crq_hash(struct cfq_rq *crq){ hlist_del_init(&crq->hash);}static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq){ cfq_del_crq_hash(crq); if (q->last_merge == crq->request) q->last_merge = NULL;}static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq){ const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request)); hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]);}static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset){ struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)]; struct hlist_node *entry, *next; hlist_for_each_safe(entry, next, hash_list) { struct cfq_rq *crq = list_entry_hash(entry); struct request *__rq = crq->request; if (!rq_mergeable(__rq)) { cfq_del_crq_hash(crq); continue; } if (rq_hash_key(__rq) == offset) return __rq; } return NULL;}static inline int cfq_pending_requests(struct cfq_data *cfqd){ return !list_empty(&cfqd->queue->queue_head) || cfqd->busy_queues;}/* * scheduler run of queue, if there are requests pending and no one in the * driver that will restart queueing */static inline void cfq_schedule_dispatch(struct cfq_data *cfqd){ if (!cfqd->rq_in_driver && cfq_pending_requests(cfqd)) kblockd_schedule_work(&cfqd->unplug_work);}static int cfq_queue_empty(request_queue_t *q){ struct cfq_data *cfqd = q->elevator->elevator_data; return !cfq_pending_requests(cfqd);}/* * Lifted from AS - choose which of crq1 and crq2 that is best served now. * We choose the request that is closest to the head right now. Distance * behind the head are penalized and only allowed to a certain extent. */static struct cfq_rq *cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2){ sector_t last, s1, s2, d1 = 0, d2 = 0; int r1_wrap = 0, r2_wrap = 0; /* requests are behind the disk head */ unsigned long back_max; if (crq1 == NULL || crq1 == crq2) return crq2; if (crq2 == NULL) return crq1; if (cfq_crq_requeued(crq1) && !cfq_crq_requeued(crq2)) return crq1; else if (cfq_crq_requeued(crq2) && !cfq_crq_requeued(crq1)) return crq2; if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2)) return crq1; else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1)) return crq2; s1 = crq1->request->sector; s2 = crq2->request->sector; last = cfqd->last_sector; /* * by definition, 1KiB is 2 sectors */ back_max = cfqd->cfq_back_max * 2; /* * Strict one way elevator _except_ in the case where we allow * short backward seeks which are biased as twice the cost of a * similar forward seek. */ if (s1 >= last) d1 = s1 - last; else if (s1 + back_max >= last) d1 = (last - s1) * cfqd->cfq_back_penalty; else r1_wrap = 1; if (s2 >= last) d2 = s2 - last; else if (s2 + back_max >= last) d2 = (last - s2) * cfqd->cfq_back_penalty; else r2_wrap = 1; /* Found required data */ if (!r1_wrap && r2_wrap) return crq1; else if (!r2_wrap && r1_wrap) return crq2; else if (r1_wrap && r2_wrap) { /* both behind the head */ if (s1 <= s2) return crq1; else return crq2; } /* Both requests in front of the head */ if (d1 < d2) return crq1; else if (d2 < d1) return crq2; else { if (s1 >= s2) return crq1; else return crq2; }}/* * would be nice to take fifo expire time into account as well */static struct cfq_rq *cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct cfq_rq *last){ struct cfq_rq *crq_next = NULL, *crq_prev = NULL; struct rb_node *rbnext, *rbprev; rbnext = NULL; if (ON_RB(&last->rb_node)) rbnext = rb_next(&last->rb_node); if (!rbnext) { rbnext = rb_first(&cfqq->sort_list); if (rbnext == &last->rb_node) rbnext = NULL; } rbprev = rb_prev(&last->rb_node); if (rbprev) crq_prev = rb_entry_crq(rbprev); if (rbnext) crq_next = rb_entry_crq(rbnext); return cfq_choose_req(cfqd, crq_next, crq_prev);}static void cfq_update_next_crq(struct cfq_rq *crq){ struct cfq_queue *cfqq = crq->cfq_queue; if (cfqq->next_crq == crq) cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq);}static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted){ struct cfq_data *cfqd = cfqq->cfqd; struct list_head *list, *entry; BUG_ON(!cfq_cfqq_on_rr(cfqq)); list_del(&cfqq->cfq_list); if (cfq_class_rt(cfqq)) list = &cfqd->cur_rr; else if (cfq_class_idle(cfqq)) list = &cfqd->idle_rr; else { /* * if cfqq has requests in flight, don't allow it to be * found in cfq_set_active_queue before it has finished them. * this is done to increase fairness between a process that * has lots of io pending vs one that only generates one * sporadically or synchronously */ if (cfq_cfqq_dispatched(cfqq)) list = &cfqd->busy_rr; else list = &cfqd->rr_list[cfqq->ioprio]; } /* * if queue was preempted, just add to front to be fair. busy_rr * isn't sorted. */ if (preempted || list == &cfqd->busy_rr) { list_add(&cfqq->cfq_list, list); return; } /* * sort by when queue was last serviced */ entry = list; while ((entry = entry->prev) != list) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -