📄 as-iosched.c
字号:
return ret;}/* * anticipatory scheduling functions follow *//* * as_antic_expired tells us when we have anticipated too long. * The funny "absolute difference" math on the elapsed time is to handle * jiffy wraps, and disks which have been idle for 0x80000000 jiffies. */static int as_antic_expired(struct as_data *ad){ long delta_jif; delta_jif = jiffies - ad->antic_start; if (unlikely(delta_jif < 0)) delta_jif = -delta_jif; if (delta_jif < ad->antic_expire) return 0; return 1;}/* * as_antic_waitnext starts anticipating that a nice request will soon be * submitted. See also as_antic_waitreq */static void as_antic_waitnext(struct as_data *ad){ unsigned long timeout; BUG_ON(ad->antic_status != ANTIC_OFF && ad->antic_status != ANTIC_WAIT_REQ); timeout = ad->antic_start + ad->antic_expire; mod_timer(&ad->antic_timer, timeout); ad->antic_status = ANTIC_WAIT_NEXT;}/* * as_antic_waitreq starts anticipating. We don't start timing the anticipation * until the request that we're anticipating on has finished. This means we * are timing from when the candidate process wakes up hopefully. */static void as_antic_waitreq(struct as_data *ad){ BUG_ON(ad->antic_status == ANTIC_FINISHED); if (ad->antic_status == ANTIC_OFF) { if (!ad->io_context || ad->ioc_finished) as_antic_waitnext(ad); else ad->antic_status = ANTIC_WAIT_REQ; }}/* * This is called directly by the functions in this file to stop anticipation. * We kill the timer and schedule a call to the request_fn asap. */static void as_antic_stop(struct as_data *ad){ int status = ad->antic_status; if (status == ANTIC_WAIT_REQ || status == ANTIC_WAIT_NEXT) { if (status == ANTIC_WAIT_NEXT) del_timer(&ad->antic_timer); ad->antic_status = ANTIC_FINISHED; /* see as_work_handler */ kblockd_schedule_work(&ad->antic_work); }}/* * as_antic_timeout is the timer function set by as_antic_waitnext. */static void as_antic_timeout(unsigned long data){ struct request_queue *q = (struct request_queue *)data; struct as_data *ad = q->elevator->elevator_data; unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); if (ad->antic_status == ANTIC_WAIT_REQ || ad->antic_status == ANTIC_WAIT_NEXT) { struct as_io_context *aic = ad->io_context->aic; ad->antic_status = ANTIC_FINISHED; kblockd_schedule_work(&ad->antic_work); if (aic->ttime_samples == 0) { /* process anticipated on has exitted or timed out*/ ad->exit_prob = (7*ad->exit_prob + 256)/8; } } spin_unlock_irqrestore(q->queue_lock, flags);}/* * as_close_req decides if one request is considered "close" to the * previous one issued. */static int as_close_req(struct as_data *ad, struct as_rq *arq){ unsigned long delay; /* milliseconds */ sector_t last = ad->last_sector[ad->batch_data_dir]; sector_t next = arq->request->sector; sector_t delta; /* acceptable close offset (in sectors) */ if (ad->antic_status == ANTIC_OFF || !ad->ioc_finished) delay = 0; else delay = ((jiffies - ad->antic_start) * 1000) / HZ; if (delay <= 1) delta = 64; else if (delay <= 20 && delay <= ad->antic_expire) delta = 64 << (delay-1); else return 1; return (last - (delta>>1) <= next) && (next <= last + delta);}/* * as_can_break_anticipation returns true if we have been anticipating this * request. * * It also returns true if the process against which we are anticipating * submits a write - that's presumably an fsync, O_SYNC write, etc. We want to * dispatch it ASAP, because we know that application will not be submitting * any new reads. * * If the task which has submitted the request has exitted, break anticipation. * * If this task has queued some other IO, do not enter enticipation. */static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq){ struct io_context *ioc; struct as_io_context *aic; sector_t s; ioc = ad->io_context; BUG_ON(!ioc); if (arq && ioc == arq->io_context) { /* request from same process */ return 1; } if (ad->ioc_finished && as_antic_expired(ad)) { /* * In this situation status should really be FINISHED, * however the timer hasn't had the chance to run yet. */ return 1; } aic = ioc->aic; if (!aic) return 0; if (!test_bit(AS_TASK_RUNNING, &aic->state)) { /* process anticipated on has exitted */ if (aic->ttime_samples == 0) ad->exit_prob = (7*ad->exit_prob + 256)/8; return 1; } if (atomic_read(&aic->nr_queued) > 0) { /* process has more requests queued */ return 1; } if (atomic_read(&aic->nr_dispatched) > 0) { /* process has more requests dispatched */ return 1; } if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, arq)) { /* * Found a close request that is not one of ours. * * This makes close requests from another process reset * our thinktime delay. Is generally useful when there are * two or more cooperating processes working in the same * area. */ spin_lock(&aic->lock); aic->last_end_request = jiffies; spin_unlock(&aic->lock); return 1; } if (aic->ttime_samples == 0) { if (ad->new_ttime_mean > ad->antic_expire) return 1; if (ad->exit_prob > 128) return 1; } else if (aic->ttime_mean > ad->antic_expire) { /* the process thinks too much between requests */ return 1; } if (!arq) return 0; if (ad->last_sector[REQ_SYNC] < arq->request->sector) s = arq->request->sector - ad->last_sector[REQ_SYNC]; else s = ad->last_sector[REQ_SYNC] - arq->request->sector; if (aic->seek_samples == 0) { /* * Process has just started IO. Use past statistics to * guage success possibility */ if (ad->new_seek_mean > s) { /* this request is better than what we're expecting */ return 1; } } else { if (aic->seek_mean > s) { /* this request is better than what we're expecting */ return 1; } } return 0;}/* * as_can_anticipate indicates weather we should either run arq * or keep anticipating a better request. */static int as_can_anticipate(struct as_data *ad, struct as_rq *arq){ if (!ad->io_context) /* * Last request submitted was a write */ return 0; if (ad->antic_status == ANTIC_FINISHED) /* * Don't restart if we have just finished. Run the next request */ return 0; if (as_can_break_anticipation(ad, arq)) /* * This request is a good candidate. Don't keep anticipating, * run it. */ return 0; /* * OK from here, we haven't finished, and don't have a decent request! * Status is either ANTIC_OFF so start waiting, * ANTIC_WAIT_REQ so continue waiting for request to finish * or ANTIC_WAIT_NEXT so continue waiting for an acceptable request. * */ return 1;}static void as_update_thinktime(struct as_data *ad, struct as_io_context *aic, unsigned long ttime){ /* fixed point: 1.0 == 1<<8 */ if (aic->ttime_samples == 0) { ad->new_ttime_total = (7*ad->new_ttime_total + 256*ttime) / 8; ad->new_ttime_mean = ad->new_ttime_total / 256; ad->exit_prob = (7*ad->exit_prob)/8; } aic->ttime_samples = (7*aic->ttime_samples + 256) / 8; aic->ttime_total = (7*aic->ttime_total + 256*ttime) / 8; aic->ttime_mean = (aic->ttime_total + 128) / aic->ttime_samples;}static void as_update_seekdist(struct as_data *ad, struct as_io_context *aic, sector_t sdist){ u64 total; if (aic->seek_samples == 0) { ad->new_seek_total = (7*ad->new_seek_total + 256*(u64)sdist)/8; ad->new_seek_mean = ad->new_seek_total / 256; } /* * Don't allow the seek distance to get too large from the * odd fragment, pagein, etc */ if (aic->seek_samples <= 60) /* second&third seek */ sdist = min(sdist, (aic->seek_mean * 4) + 2*1024*1024); else sdist = min(sdist, (aic->seek_mean * 4) + 2*1024*64); aic->seek_samples = (7*aic->seek_samples + 256) / 8; aic->seek_total = (7*aic->seek_total + (u64)256*sdist) / 8; total = aic->seek_total + (aic->seek_samples/2); do_div(total, aic->seek_samples); aic->seek_mean = (sector_t)total;}/* * as_update_iohist keeps a decaying histogram of IO thinktimes, and * updates @aic->ttime_mean based on that. It is called when a new * request is queued. */static void as_update_iohist(struct as_data *ad, struct as_io_context *aic, struct request *rq){ struct as_rq *arq = RQ_DATA(rq); int data_dir = arq->is_sync; unsigned long thinktime; sector_t seek_dist; if (aic == NULL) return; if (data_dir == REQ_SYNC) { unsigned long in_flight = atomic_read(&aic->nr_queued) + atomic_read(&aic->nr_dispatched); spin_lock(&aic->lock); if (test_bit(AS_TASK_IORUNNING, &aic->state) || test_bit(AS_TASK_IOSTARTED, &aic->state)) { /* Calculate read -> read thinktime */ if (test_bit(AS_TASK_IORUNNING, &aic->state) && in_flight == 0) { thinktime = jiffies - aic->last_end_request; thinktime = min(thinktime, MAX_THINKTIME-1); } else thinktime = 0; as_update_thinktime(ad, aic, thinktime); /* Calculate read -> read seek distance */ if (aic->last_request_pos < rq->sector) seek_dist = rq->sector - aic->last_request_pos; else seek_dist = aic->last_request_pos - rq->sector; as_update_seekdist(ad, aic, seek_dist); } aic->last_request_pos = rq->sector + rq->nr_sectors; set_bit(AS_TASK_IOSTARTED, &aic->state); spin_unlock(&aic->lock); }}/* * as_update_arq must be called whenever a request (arq) is added to * the sort_list. This function keeps caches up to date, and checks if the * request might be one we are "anticipating" */static void as_update_arq(struct as_data *ad, struct as_rq *arq){ const int data_dir = arq->is_sync; /* keep the next_arq cache up to date */ ad->next_arq[data_dir] = as_choose_req(ad, arq, ad->next_arq[data_dir]); /* * have we been anticipating this request? * or does it come from the same process as the one we are anticipating * for? */ if (ad->antic_status == ANTIC_WAIT_REQ || ad->antic_status == ANTIC_WAIT_NEXT) { if (as_can_break_anticipation(ad, arq)) as_antic_stop(ad); }}/* * Gathers timings and resizes the write batch automatically */static void update_write_batch(struct as_data *ad){ unsigned long batch = ad->batch_expire[REQ_ASYNC]; long write_time; write_time = (jiffies - ad->current_batch_expires) + batch; if (write_time < 0) write_time = 0; if (write_time > batch && !ad->write_batch_idled) { if (write_time > batch * 3) ad->write_batch_count /= 2; else ad->write_batch_count--; } else if (write_time < batch && ad->current_write_count == 0) { if (batch > write_time * 3) ad->write_batch_count *= 2; else ad->write_batch_count++; } if (ad->write_batch_count < 1) ad->write_batch_count = 1;}/* * as_completed_request is to be called when a request has completed and * returned something to the requesting process, be it an error or data. */static void as_completed_request(request_queue_t *q, struct request *rq){ struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(rq); WARN_ON(!list_empty(&rq->queuelist)); if (arq->state == AS_RQ_PRESCHED) { WARN_ON(arq->io_context); goto out; } if (arq->state == AS_RQ_MERGED) goto out_ioc; if (arq->state != AS_RQ_REMOVED) { printk("arq->state %d\n", arq->state); WARN_ON(1); goto out; } if (!blk_fs_request(rq)) goto out; if (ad->changed_batch && ad->nr_dispatched == 1) { kblockd_schedule_work(&ad->antic_work); ad->changed_batch = 0; if (ad->batch_data_dir == REQ_SYNC) ad->new_batch = 1; } WARN_ON(ad->nr_dispatched == 0); ad->nr_dispatched--; /* * Start counting the batch from when a request of that direction is * actually serviced. This should help devices with big TCQ windows * and writeback caches */ if (ad->new_batch && ad->batch_data_dir == arq->is_sync) { update_write_batch(ad); ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC]; ad->new_batch = 0; } if (ad->io_context == arq->io_context && ad->io_context) { ad->antic_start = jiffies; ad->ioc_finished = 1; if (ad->antic_status == ANTIC_WAIT_REQ) { /* * We were waiting on this request, now anticipate * the next one */ as_antic_waitnext(ad); } }out_ioc: if (!arq->io_context) goto out; if (arq->is_sync == REQ_SYNC) { struct as_io_context *aic = arq->io_context->aic; if (aic) { spin_lock(&aic->lock); set_bit(AS_TASK_IORUNNING, &aic->state); aic->last_end_request = jiffies; spin_unlock(&aic->lock); } } put_io_context(arq->io_context);out: arq->state = AS_RQ_POSTSCHED;}/* * as_remove_queued_request removes a request from the pre dispatch queue * without updating refcounts. It is expected the caller will drop the * reference unless it replaces the request at somepart of the elevator * (ie. the dispatch queue) */static void as_remove_queued_request(request_queue_t *q, struct request *rq){ struct as_rq *arq = RQ_DATA(rq); const int data_dir = arq->is_sync; struct as_data *ad = q->elevator->elevator_data; WARN_ON(arq->state != AS_RQ_QUEUED); if (arq->io_context && arq->io_context->aic) { BUG_ON(!atomic_read(&arq->io_context->aic->nr_queued)); atomic_dec(&arq->io_context->aic->nr_queued); } /* * Update the "next_arq" cache if we are about to remove its * entry */ if (ad->next_arq[data_dir] == arq) ad->next_arq[data_dir] = as_find_next_arq(ad, arq); list_del_init(&arq->fifo); as_remove_merge_hints(q, arq); as_del_arq_rb(ad, arq);}/* * as_remove_dispatched_request is called to remove a request which has gone * to the dispatch list. */static void as_remove_dispatched_request(request_queue_t *q, struct request *rq){ struct as_rq *arq = RQ_DATA(rq); struct as_io_context *aic; if (!arq) { WARN_ON(1); return; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -