📄 as-iosched.c

📁 Linux块设备驱动源码
💻 C
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
/* *  linux/drivers/block/as-iosched.c * *  Anticipatory & deadline i/o scheduler. * *  Copyright (C) 2002 Jens Axboe <axboe@suse.de> *                     Nick Piggin <piggin@cyberone.com.au> * */#include <linux/kernel.h>#include <linux/fs.h>#include <linux/blkdev.h>#include <linux/elevator.h>#include <linux/bio.h>#include <linux/config.h>#include <linux/module.h>#include <linux/slab.h>#include <linux/init.h>#include <linux/compiler.h>#include <linux/hash.h>#include <linux/rbtree.h>#include <linux/interrupt.h>#define REQ_SYNC	1#define REQ_ASYNC	0/* * See Documentation/block/as-iosched.txt *//* * max time before a read is submitted. */#define default_read_expire (HZ / 8)/* * ditto for writes, these limits are not hard, even * if the disk is capable of satisfying them. */#define default_write_expire (HZ / 4)/* * read_batch_expire describes how long we will allow a stream of reads to * persist before looking to see whether it is time to switch over to writes. */#define default_read_batch_expire (HZ / 2)/* * write_batch_expire describes how long we want a stream of writes to run for. * This is not a hard limit, but a target we set for the auto-tuning thingy. * See, the problem is: we can send a lot of writes to disk cache / TCQ in * a short amount of time... */#define default_write_batch_expire (HZ / 8)/* * max time we may wait to anticipate a read (default around 6ms) */#define default_antic_expire ((HZ / 150) ? HZ / 150 : 1)/* * Keep track of up to 20ms thinktimes. We can go as big as we like here, * however huge values tend to interfere and not decay fast enough. A program * might be in a non-io phase of operation. Waiting on user input for example, * or doing a lengthy computation. A small penalty can be justified there, and * will still catch out those processes that constantly have large thinktimes. */#define MAX_THINKTIME (HZ/50UL)/* Bits in as_io_context.state */enum as_io_states {	AS_TASK_RUNNING=0,	/* Process has not exitted */	AS_TASK_IOSTARTED,	/* Process has started some IO */	AS_TASK_IORUNNING,	/* Process has completed some IO */};enum anticipation_status {	ANTIC_OFF=0,		/* Not anticipating (normal operation)	*/	ANTIC_WAIT_REQ,		/* The last read has not yet completed  */	ANTIC_WAIT_NEXT,	/* Currently anticipating a request vs				   last read (which has completed) */	ANTIC_FINISHED,		/* Anticipating but have found a candidate				 * or timed out */};struct as_data {	/*	 * run time data	 */	struct request_queue *q;	/* the "owner" queue */	/*	 * requests (as_rq s) are present on both sort_list and fifo_list	 */	struct rb_root sort_list[2];	struct list_head fifo_list[2];	struct as_rq *next_arq[2];	/* next in sort order */	sector_t last_sector[2];	/* last REQ_SYNC & REQ_ASYNC sectors */	struct list_head *dispatch;	/* driver dispatch queue */	struct list_head *hash;		/* request hash */	unsigned long exit_prob;	/* probability a task will exit while					   being waited on */	unsigned long new_ttime_total; 	/* mean thinktime on new proc */	unsigned long new_ttime_mean;	u64 new_seek_total;		/* mean seek on new proc */	sector_t new_seek_mean;	unsigned long current_batch_expires;	unsigned long last_check_fifo[2];	int changed_batch;		/* 1: waiting for old batch to end */	int new_batch;			/* 1: waiting on first read complete */	int batch_data_dir;		/* current batch REQ_SYNC / REQ_ASYNC */	int write_batch_count;		/* max # of reqs in a write batch */	int current_write_count;	/* how many requests left this batch */	int write_batch_idled;		/* has the write batch gone idle? */	mempool_t *arq_pool;	enum anticipation_status antic_status;	unsigned long antic_start;	/* jiffies: when it started */	struct timer_list antic_timer;	/* anticipatory scheduling timer */	struct work_struct antic_work;	/* Deferred unplugging */	struct io_context *io_context;	/* Identify the expected process */	int ioc_finished; /* IO associated with io_context is finished */	int nr_dispatched;	/*	 * settings that change how the i/o scheduler behaves	 */	unsigned long fifo_expire[2];	unsigned long batch_expire[2];	unsigned long antic_expire;};#define list_entry_fifo(ptr)	list_entry((ptr), struct as_rq, fifo)/* * per-request data. */enum arq_state {	AS_RQ_NEW=0,		/* New - not referenced and not on any lists */	AS_RQ_QUEUED,		/* In the request queue. It belongs to the				   scheduler */	AS_RQ_DISPATCHED,	/* On the dispatch list. It belongs to the				   driver now */	AS_RQ_PRESCHED,		/* Debug poisoning for requests being used */	AS_RQ_REMOVED,	AS_RQ_MERGED,	AS_RQ_POSTSCHED,	/* when they shouldn't be */};struct as_rq {	/*	 * rbtree index, key is the starting offset	 */	struct rb_node rb_node;	sector_t rb_key;	struct request *request;	struct io_context *io_context;	/* The submitting task */	/*	 * request hash, key is the ending offset (for back merge lookup)	 */	struct list_head hash;	unsigned int on_hash;	/*	 * expire fifo	 */	struct list_head fifo;	unsigned long expires;	unsigned int is_sync;	enum arq_state state;};#define RQ_DATA(rq)	((struct as_rq *) (rq)->elevator_private)static kmem_cache_t *arq_pool;/* * IO Context helper functions *//* Called to deallocate the as_io_context */static void free_as_io_context(struct as_io_context *aic){	kfree(aic);}/* Called when the task exits */static void exit_as_io_context(struct as_io_context *aic){	WARN_ON(!test_bit(AS_TASK_RUNNING, &aic->state));	clear_bit(AS_TASK_RUNNING, &aic->state);}static struct as_io_context *alloc_as_io_context(void){	struct as_io_context *ret;	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);	if (ret) {		ret->dtor = free_as_io_context;		ret->exit = exit_as_io_context;		ret->state = 1 << AS_TASK_RUNNING;		atomic_set(&ret->nr_queued, 0);		atomic_set(&ret->nr_dispatched, 0);		spin_lock_init(&ret->lock);		ret->ttime_total = 0;		ret->ttime_samples = 0;		ret->ttime_mean = 0;		ret->seek_total = 0;		ret->seek_samples = 0;		ret->seek_mean = 0;	}	return ret;}/* * If the current task has no AS IO context then create one and initialise it. * Then take a ref on the task's io context and return it. */static struct io_context *as_get_io_context(void){	struct io_context *ioc = get_io_context(GFP_ATOMIC);	if (ioc && !ioc->aic) {		ioc->aic = alloc_as_io_context();		if (!ioc->aic) {			put_io_context(ioc);			ioc = NULL;		}	}	return ioc;}/* * the back merge hash support functions */static const int as_hash_shift = 6;#define AS_HASH_BLOCK(sec)	((sec) >> 3)#define AS_HASH_FN(sec)		(hash_long(AS_HASH_BLOCK((sec)), as_hash_shift))#define AS_HASH_ENTRIES		(1 << as_hash_shift)#define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)#define list_entry_hash(ptr)	list_entry((ptr), struct as_rq, hash)static inline void __as_del_arq_hash(struct as_rq *arq){	arq->on_hash = 0;	list_del_init(&arq->hash);}static inline void as_del_arq_hash(struct as_rq *arq){	if (arq->on_hash)		__as_del_arq_hash(arq);}static void as_remove_merge_hints(request_queue_t *q, struct as_rq *arq){	as_del_arq_hash(arq);	if (q->last_merge == arq->request)		q->last_merge = NULL;}static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq){	struct request *rq = arq->request;	BUG_ON(arq->on_hash);	arq->on_hash = 1;	list_add(&arq->hash, &ad->hash[AS_HASH_FN(rq_hash_key(rq))]);}/* * move hot entry to front of chain */static inline void as_hot_arq_hash(struct as_data *ad, struct as_rq *arq){	struct request *rq = arq->request;	struct list_head *head = &ad->hash[AS_HASH_FN(rq_hash_key(rq))];	if (!arq->on_hash) {		WARN_ON(1);		return;	}	if (arq->hash.prev != head) {		list_del(&arq->hash);		list_add(&arq->hash, head);	}}static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset){	struct list_head *hash_list = &ad->hash[AS_HASH_FN(offset)];	struct list_head *entry, *next = hash_list->next;	while ((entry = next) != hash_list) {		struct as_rq *arq = list_entry_hash(entry);		struct request *__rq = arq->request;		next = entry->next;		BUG_ON(!arq->on_hash);		if (!rq_mergeable(__rq)) {			as_remove_merge_hints(ad->q, arq);			continue;		}		if (rq_hash_key(__rq) == offset)			return __rq;	}	return NULL;}/* * rb tree support functions */#define RB_NONE		(2)#define RB_EMPTY(root)	((root)->rb_node == NULL)#define ON_RB(node)	((node)->rb_color != RB_NONE)#define RB_CLEAR(node)	((node)->rb_color = RB_NONE)#define rb_entry_arq(node)	rb_entry((node), struct as_rq, rb_node)#define ARQ_RB_ROOT(ad, arq)	(&(ad)->sort_list[(arq)->is_sync])#define rq_rb_key(rq)		(rq)->sector/* * as_find_first_arq finds the first (lowest sector numbered) request * for the specified data_dir. Used to sweep back to the start of the disk * (1-way elevator) after we process the last (highest sector) request. */static struct as_rq *as_find_first_arq(struct as_data *ad, int data_dir){	struct rb_node *n = ad->sort_list[data_dir].rb_node;	if (n == NULL)		return NULL;	for (;;) {		if (n->rb_left == NULL)			return rb_entry_arq(n);		n = n->rb_left;	}}/* * Add the request to the rb tree if it is unique.  If there is an alias (an * existing request against the same sector), which can happen when using * direct IO, then return the alias. */static struct as_rq *as_add_arq_rb(struct as_data *ad, struct as_rq *arq){	struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node;	struct rb_node *parent = NULL;	struct as_rq *__arq;	struct request *rq = arq->request;	arq->rb_key = rq_rb_key(rq);	while (*p) {		parent = *p;		__arq = rb_entry_arq(parent);		if (arq->rb_key < __arq->rb_key)			p = &(*p)->rb_left;		else if (arq->rb_key > __arq->rb_key)			p = &(*p)->rb_right;		else			return __arq;	}	rb_link_node(&arq->rb_node, parent, p);	rb_insert_color(&arq->rb_node, ARQ_RB_ROOT(ad, arq));	return NULL;}static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq){	if (!ON_RB(&arq->rb_node)) {		WARN_ON(1);		return;	}	rb_erase(&arq->rb_node, ARQ_RB_ROOT(ad, arq));	RB_CLEAR(&arq->rb_node);}static struct request *as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir){	struct rb_node *n = ad->sort_list[data_dir].rb_node;	struct as_rq *arq;	while (n) {		arq = rb_entry_arq(n);		if (sector < arq->rb_key)			n = n->rb_left;		else if (sector > arq->rb_key)			n = n->rb_right;		else			return arq->request;	}	return NULL;}/* * IO Scheduler proper */#define MAXBACK (1024 * 1024)	/*				 * Maximum distance the disk will go backward				 * for a request.				 */#define BACK_PENALTY	2/* * as_choose_req selects the preferred one of two requests of the same data_dir * ignoring time - eg. timeouts, which is the job of as_dispatch_request */static struct as_rq *as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2){	int data_dir;	sector_t last, s1, s2, d1, d2;	int r1_wrap=0, r2_wrap=0;	/* requests are behind the disk head */	const sector_t maxback = MAXBACK;	if (arq1 == NULL || arq1 == arq2)		return arq2;	if (arq2 == NULL)		return arq1;	data_dir = arq1->is_sync;	last = ad->last_sector[data_dir];	s1 = arq1->request->sector;	s2 = arq2->request->sector;	BUG_ON(data_dir != arq2->is_sync);	/*	 * Strict one way elevator _except_ in the case where we allow	 * short backward seeks which are biased as twice the cost of a	 * similar forward seek.	 */	if (s1 >= last)		d1 = s1 - last;	else if (s1+maxback >= last)		d1 = (last - s1)*BACK_PENALTY;	else {		r1_wrap = 1;		d1 = 0; /* shut up, gcc */	}	if (s2 >= last)		d2 = s2 - last;	else if (s2+maxback >= last)		d2 = (last - s2)*BACK_PENALTY;	else {		r2_wrap = 1;		d2 = 0;	}	/* Found required data */	if (!r1_wrap && r2_wrap)		return arq1;	else if (!r2_wrap && r1_wrap)		return arq2;	else if (r1_wrap && r2_wrap) {		/* both behind the head */		if (s1 <= s2)			return arq1;		else			return arq2;	}	/* Both requests in front of the head */	if (d1 < d2)		return arq1;	else if (d2 < d1)		return arq2;	else {		if (s1 >= s2)			return arq1;		else			return arq2;	}}/* * as_find_next_arq finds the next request after @prev in elevator order. * this with as_choose_req form the basis for how the scheduler chooses * what request to process next. Anticipation works on top of this. */static struct as_rq *as_find_next_arq(struct as_data *ad, struct as_rq *last){	const int data_dir = last->is_sync;	struct as_rq *ret;	struct rb_node *rbnext = rb_next(&last->rb_node);	struct rb_node *rbprev = rb_prev(&last->rb_node);	struct as_rq *arq_next, *arq_prev;	BUG_ON(!ON_RB(&last->rb_node));	if (rbprev)		arq_prev = rb_entry_arq(rbprev);	else		arq_prev = NULL;	if (rbnext)		arq_next = rb_entry_arq(rbnext);	else {		arq_next = as_find_first_arq(ad, data_dir);		if (arq_next == last)			arq_next = NULL;	}	ret = as_choose_req(ad,	arq_next, arq_prev);
12 3 4 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -