📄 as-iosched.c
字号:
/* * linux/drivers/block/as-iosched.c * * Anticipatory & deadline i/o scheduler. * * Copyright (C) 2002 Jens Axboe <axboe@suse.de> * Nick Piggin <piggin@cyberone.com.au> * */#include <linux/kernel.h>#include <linux/fs.h>#include <linux/blkdev.h>#include <linux/elevator.h>#include <linux/bio.h>#include <linux/config.h>#include <linux/module.h>#include <linux/slab.h>#include <linux/init.h>#include <linux/compiler.h>#include <linux/hash.h>#include <linux/rbtree.h>#include <linux/interrupt.h>#define REQ_SYNC 1#define REQ_ASYNC 0/* * See Documentation/block/as-iosched.txt *//* * max time before a read is submitted. */#define default_read_expire (HZ / 8)/* * ditto for writes, these limits are not hard, even * if the disk is capable of satisfying them. */#define default_write_expire (HZ / 4)/* * read_batch_expire describes how long we will allow a stream of reads to * persist before looking to see whether it is time to switch over to writes. */#define default_read_batch_expire (HZ / 2)/* * write_batch_expire describes how long we want a stream of writes to run for. * This is not a hard limit, but a target we set for the auto-tuning thingy. * See, the problem is: we can send a lot of writes to disk cache / TCQ in * a short amount of time... */#define default_write_batch_expire (HZ / 8)/* * max time we may wait to anticipate a read (default around 6ms) */#define default_antic_expire ((HZ / 150) ? HZ / 150 : 1)/* * Keep track of up to 20ms thinktimes. We can go as big as we like here, * however huge values tend to interfere and not decay fast enough. A program * might be in a non-io phase of operation. Waiting on user input for example, * or doing a lengthy computation. A small penalty can be justified there, and * will still catch out those processes that constantly have large thinktimes. */#define MAX_THINKTIME (HZ/50UL)/* Bits in as_io_context.state */enum as_io_states { AS_TASK_RUNNING=0, /* Process has not exitted */ AS_TASK_IOSTARTED, /* Process has started some IO */ AS_TASK_IORUNNING, /* Process has completed some IO */};enum anticipation_status { ANTIC_OFF=0, /* Not anticipating (normal operation) */ ANTIC_WAIT_REQ, /* The last read has not yet completed */ ANTIC_WAIT_NEXT, /* Currently anticipating a request vs last read (which has completed) */ ANTIC_FINISHED, /* Anticipating but have found a candidate * or timed out */};struct as_data { /* * run time data */ struct request_queue *q; /* the "owner" queue */ /* * requests (as_rq s) are present on both sort_list and fifo_list */ struct rb_root sort_list[2]; struct list_head fifo_list[2]; struct as_rq *next_arq[2]; /* next in sort order */ sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */ struct list_head *dispatch; /* driver dispatch queue */ struct list_head *hash; /* request hash */ unsigned long exit_prob; /* probability a task will exit while being waited on */ unsigned long new_ttime_total; /* mean thinktime on new proc */ unsigned long new_ttime_mean; u64 new_seek_total; /* mean seek on new proc */ sector_t new_seek_mean; unsigned long current_batch_expires; unsigned long last_check_fifo[2]; int changed_batch; /* 1: waiting for old batch to end */ int new_batch; /* 1: waiting on first read complete */ int batch_data_dir; /* current batch REQ_SYNC / REQ_ASYNC */ int write_batch_count; /* max # of reqs in a write batch */ int current_write_count; /* how many requests left this batch */ int write_batch_idled; /* has the write batch gone idle? */ mempool_t *arq_pool; enum anticipation_status antic_status; unsigned long antic_start; /* jiffies: when it started */ struct timer_list antic_timer; /* anticipatory scheduling timer */ struct work_struct antic_work; /* Deferred unplugging */ struct io_context *io_context; /* Identify the expected process */ int ioc_finished; /* IO associated with io_context is finished */ int nr_dispatched; /* * settings that change how the i/o scheduler behaves */ unsigned long fifo_expire[2]; unsigned long batch_expire[2]; unsigned long antic_expire;};#define list_entry_fifo(ptr) list_entry((ptr), struct as_rq, fifo)/* * per-request data. */enum arq_state { AS_RQ_NEW=0, /* New - not referenced and not on any lists */ AS_RQ_QUEUED, /* In the request queue. It belongs to the scheduler */ AS_RQ_DISPATCHED, /* On the dispatch list. It belongs to the driver now */ AS_RQ_PRESCHED, /* Debug poisoning for requests being used */ AS_RQ_REMOVED, AS_RQ_MERGED, AS_RQ_POSTSCHED, /* when they shouldn't be */};struct as_rq { /* * rbtree index, key is the starting offset */ struct rb_node rb_node; sector_t rb_key; struct request *request; struct io_context *io_context; /* The submitting task */ /* * request hash, key is the ending offset (for back merge lookup) */ struct list_head hash; unsigned int on_hash; /* * expire fifo */ struct list_head fifo; unsigned long expires; unsigned int is_sync; enum arq_state state;};#define RQ_DATA(rq) ((struct as_rq *) (rq)->elevator_private)static kmem_cache_t *arq_pool;/* * IO Context helper functions *//* Called to deallocate the as_io_context */static void free_as_io_context(struct as_io_context *aic){ kfree(aic);}/* Called when the task exits */static void exit_as_io_context(struct as_io_context *aic){ WARN_ON(!test_bit(AS_TASK_RUNNING, &aic->state)); clear_bit(AS_TASK_RUNNING, &aic->state);}static struct as_io_context *alloc_as_io_context(void){ struct as_io_context *ret; ret = kmalloc(sizeof(*ret), GFP_ATOMIC); if (ret) { ret->dtor = free_as_io_context; ret->exit = exit_as_io_context; ret->state = 1 << AS_TASK_RUNNING; atomic_set(&ret->nr_queued, 0); atomic_set(&ret->nr_dispatched, 0); spin_lock_init(&ret->lock); ret->ttime_total = 0; ret->ttime_samples = 0; ret->ttime_mean = 0; ret->seek_total = 0; ret->seek_samples = 0; ret->seek_mean = 0; } return ret;}/* * If the current task has no AS IO context then create one and initialise it. * Then take a ref on the task's io context and return it. */static struct io_context *as_get_io_context(void){ struct io_context *ioc = get_io_context(GFP_ATOMIC); if (ioc && !ioc->aic) { ioc->aic = alloc_as_io_context(); if (!ioc->aic) { put_io_context(ioc); ioc = NULL; } } return ioc;}/* * the back merge hash support functions */static const int as_hash_shift = 6;#define AS_HASH_BLOCK(sec) ((sec) >> 3)#define AS_HASH_FN(sec) (hash_long(AS_HASH_BLOCK((sec)), as_hash_shift))#define AS_HASH_ENTRIES (1 << as_hash_shift)#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)#define list_entry_hash(ptr) list_entry((ptr), struct as_rq, hash)static inline void __as_del_arq_hash(struct as_rq *arq){ arq->on_hash = 0; list_del_init(&arq->hash);}static inline void as_del_arq_hash(struct as_rq *arq){ if (arq->on_hash) __as_del_arq_hash(arq);}static void as_remove_merge_hints(request_queue_t *q, struct as_rq *arq){ as_del_arq_hash(arq); if (q->last_merge == arq->request) q->last_merge = NULL;}static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq){ struct request *rq = arq->request; BUG_ON(arq->on_hash); arq->on_hash = 1; list_add(&arq->hash, &ad->hash[AS_HASH_FN(rq_hash_key(rq))]);}/* * move hot entry to front of chain */static inline void as_hot_arq_hash(struct as_data *ad, struct as_rq *arq){ struct request *rq = arq->request; struct list_head *head = &ad->hash[AS_HASH_FN(rq_hash_key(rq))]; if (!arq->on_hash) { WARN_ON(1); return; } if (arq->hash.prev != head) { list_del(&arq->hash); list_add(&arq->hash, head); }}static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset){ struct list_head *hash_list = &ad->hash[AS_HASH_FN(offset)]; struct list_head *entry, *next = hash_list->next; while ((entry = next) != hash_list) { struct as_rq *arq = list_entry_hash(entry); struct request *__rq = arq->request; next = entry->next; BUG_ON(!arq->on_hash); if (!rq_mergeable(__rq)) { as_remove_merge_hints(ad->q, arq); continue; } if (rq_hash_key(__rq) == offset) return __rq; } return NULL;}/* * rb tree support functions */#define RB_NONE (2)#define RB_EMPTY(root) ((root)->rb_node == NULL)#define ON_RB(node) ((node)->rb_color != RB_NONE)#define RB_CLEAR(node) ((node)->rb_color = RB_NONE)#define rb_entry_arq(node) rb_entry((node), struct as_rq, rb_node)#define ARQ_RB_ROOT(ad, arq) (&(ad)->sort_list[(arq)->is_sync])#define rq_rb_key(rq) (rq)->sector/* * as_find_first_arq finds the first (lowest sector numbered) request * for the specified data_dir. Used to sweep back to the start of the disk * (1-way elevator) after we process the last (highest sector) request. */static struct as_rq *as_find_first_arq(struct as_data *ad, int data_dir){ struct rb_node *n = ad->sort_list[data_dir].rb_node; if (n == NULL) return NULL; for (;;) { if (n->rb_left == NULL) return rb_entry_arq(n); n = n->rb_left; }}/* * Add the request to the rb tree if it is unique. If there is an alias (an * existing request against the same sector), which can happen when using * direct IO, then return the alias. */static struct as_rq *as_add_arq_rb(struct as_data *ad, struct as_rq *arq){ struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node; struct rb_node *parent = NULL; struct as_rq *__arq; struct request *rq = arq->request; arq->rb_key = rq_rb_key(rq); while (*p) { parent = *p; __arq = rb_entry_arq(parent); if (arq->rb_key < __arq->rb_key) p = &(*p)->rb_left; else if (arq->rb_key > __arq->rb_key) p = &(*p)->rb_right; else return __arq; } rb_link_node(&arq->rb_node, parent, p); rb_insert_color(&arq->rb_node, ARQ_RB_ROOT(ad, arq)); return NULL;}static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq){ if (!ON_RB(&arq->rb_node)) { WARN_ON(1); return; } rb_erase(&arq->rb_node, ARQ_RB_ROOT(ad, arq)); RB_CLEAR(&arq->rb_node);}static struct request *as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir){ struct rb_node *n = ad->sort_list[data_dir].rb_node; struct as_rq *arq; while (n) { arq = rb_entry_arq(n); if (sector < arq->rb_key) n = n->rb_left; else if (sector > arq->rb_key) n = n->rb_right; else return arq->request; } return NULL;}/* * IO Scheduler proper */#define MAXBACK (1024 * 1024) /* * Maximum distance the disk will go backward * for a request. */#define BACK_PENALTY 2/* * as_choose_req selects the preferred one of two requests of the same data_dir * ignoring time - eg. timeouts, which is the job of as_dispatch_request */static struct as_rq *as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2){ int data_dir; sector_t last, s1, s2, d1, d2; int r1_wrap=0, r2_wrap=0; /* requests are behind the disk head */ const sector_t maxback = MAXBACK; if (arq1 == NULL || arq1 == arq2) return arq2; if (arq2 == NULL) return arq1; data_dir = arq1->is_sync; last = ad->last_sector[data_dir]; s1 = arq1->request->sector; s2 = arq2->request->sector; BUG_ON(data_dir != arq2->is_sync); /* * Strict one way elevator _except_ in the case where we allow * short backward seeks which are biased as twice the cost of a * similar forward seek. */ if (s1 >= last) d1 = s1 - last; else if (s1+maxback >= last) d1 = (last - s1)*BACK_PENALTY; else { r1_wrap = 1; d1 = 0; /* shut up, gcc */ } if (s2 >= last) d2 = s2 - last; else if (s2+maxback >= last) d2 = (last - s2)*BACK_PENALTY; else { r2_wrap = 1; d2 = 0; } /* Found required data */ if (!r1_wrap && r2_wrap) return arq1; else if (!r2_wrap && r1_wrap) return arq2; else if (r1_wrap && r2_wrap) { /* both behind the head */ if (s1 <= s2) return arq1; else return arq2; } /* Both requests in front of the head */ if (d1 < d2) return arq1; else if (d2 < d1) return arq2; else { if (s1 >= s2) return arq1; else return arq2; }}/* * as_find_next_arq finds the next request after @prev in elevator order. * this with as_choose_req form the basis for how the scheduler chooses * what request to process next. Anticipation works on top of this. */static struct as_rq *as_find_next_arq(struct as_data *ad, struct as_rq *last){ const int data_dir = last->is_sync; struct as_rq *ret; struct rb_node *rbnext = rb_next(&last->rb_node); struct rb_node *rbprev = rb_prev(&last->rb_node); struct as_rq *arq_next, *arq_prev; BUG_ON(!ON_RB(&last->rb_node)); if (rbprev) arq_prev = rb_entry_arq(rbprev); else arq_prev = NULL; if (rbnext) arq_next = rb_entry_arq(rbnext); else { arq_next = as_find_first_arq(ad, data_dir); if (arq_next == last) arq_next = NULL; } ret = as_choose_req(ad, arq_next, arq_prev);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -