📄 026_pre_fs_c.html
字号:
<pre> generic_make_request(rw, bh);</pre>
<pre> switch (rw) {<br> case WRITE:<br> kstat.pgpgout++;<br> break;<br> default:<br> kstat.pgpgin++;<br> break;<br> }<br>}<br> </pre>
<pre> 看看如何向磁盘驱动提交申请:<br>void <font color=#0000ff>generic_make_request</font> (int rw, struct buffer_head * bh)<br>{<br> int major = MAJOR(bh->b_rdev);<br> request_queue_t *q;<br> </pre>
<pre> .....//检查读取范围是否存在于磁盘,比如超出最大扇区号</pre>
<pre> /*<br> * Resolve the mapping until finished. (drivers are<br> * still free to implement/resolve their own stacking<br> * by explicitly returning 0)<br> */<br> /* NOTE: we don't repeat the blk_size check for each new device.<br> * Stacking drivers are expected to know what they are doing.<br> */<br> do {<br> q = blk_get_queue(bh->b_rdev);<br> if (!q) {<br> printk(KERN_ERR<br> "generic_make_request: Trying to access nonexistent block-device %s (%ld)\n",<br> kdevname(bh->b_rdev), bh->b_rsector);<br> buffer_IO_error(bh);<br> break;<br> }</pre>
<pre> }<br> while (q->make_request_fn(q, rw, bh)); /*参考blk_init_queue,初始化为 __make_request*/<br>}<br> </pre>
<pre> 这里通过一个while循环来提交一个请求,但是对于IDE,这是没有必要的.__make_request总是返回0.<br>static int<font color=#0000ff> __make_request</font>(request_queue_t * q, int rw,<br> struct buffer_head * bh)<br>{<br> unsigned int sector, count;<br> int max_segments = MAX_SEGMENTS;<br> struct request * req = NULL, *freereq = NULL;<br> int rw_ahead, max_sectors, el_ret;<br> struct list_head *head;<br> int latency;<br> elevator_t *elevator = &q->elevator;<br> </pre>
<pre> again:<br> ........</pre>
<pre> if (list_empty(head)) {<br> q-><b><font color=#0000ff>plug_device_fn</font></b>(q, bh->b_rdev); /* is atomic */<br> /*<font color=#0000ff>这个函数对IDE来讲是generic_plug_device,见blk_init_queue</font>*/<br> goto get_rq;<br> }</pre>
<pre> el_ret = elevator->elevator_merge_fn(q, &req, bh, rw,<br> &max_sectors, &max_segments);<br> switch (el_ret) {</pre>
<pre> case ELEVATOR_BACK_MERGE:<br> if (!q->back_merge_fn(q, req, bh, max_segments))<br> break;<br> req->bhtail->b_reqnext = bh;<br> req->bhtail = bh;<br> req->nr_sectors = req->hard_nr_sectors += count;<br> req->e = elevator;<br> drive_stat_acct(req->rq_dev, req->cmd, count, 0);<br> attempt_back_merge(q, req, max_sectors, max_segments);<br> goto out;</pre>
<pre> case ELEVATOR_FRONT_MERGE:<br> if (!q->front_merge_fn(q, req, bh, max_segments))<br> break;<br> bh->b_reqnext = req->bh;<br> req->bh = bh;<br> req->buffer = bh->b_data;<br> req->current_nr_sectors = count;<br> req->sector = req->hard_sector = sector;<br> req->nr_sectors = req->hard_nr_sectors += count;<br> req->e = elevator;<br> drive_stat_acct(req->rq_dev, req->cmd, count, 0);<br> attempt_front_merge(q, head, req, max_sectors, max_segments);<br> goto out;<br> /*<br> * elevator says don't/can't merge. get new request<br> */<br> case ELEVATOR_NO_MERGE:<br> break;</pre>
<pre> default:<br> printk("elevator returned crap (%d)\n", el_ret);<br> BUG();<br> }<br> </pre>
<pre> /*<br> * Grab a free request from the freelist. Read first try their<br> * own queue - if that is empty, we steal from the write list.<br> * Writes must block if the write list is empty, and read aheads<br> * are not crucial.<br> */<br>get_rq:<br> if (freereq) {<br> req = freereq;<br> freereq = NULL;<br> } else if ((req = get_request(q, rw)) == NULL) {<br> spin_unlock_irq(&io_request_lock);<br> if (rw_ahead)<br> goto end_io;</pre>
<pre> freereq = __get_request_wait(q, rw);<br> goto again;<br> }</pre>
<pre>/* fill up the request-info, and add it to the queue */<br> req->cmd = rw;<br> req->errors = 0;<br> req->hard_sector = req->sector = sector;<br> req->hard_nr_sectors = req->nr_sectors = count;<br> req->current_nr_sectors = count;<br> req->nr_segments = 1; /* Always 1 for a new request. */<br> req->nr_hw_segments = 1; /* Always 1 for a new request. */<br> req->buffer = bh->b_data;<br> req->sem = NULL;<br> req->bh = bh;<br> req->bhtail = bh;<br> req->rq_dev = bh->b_rdev;<br> req->e = elevator;<br> add_request(q, req, head, latency);<font color=#0000ff> /*提交给磁盘驱动*/</font>
out:
if (!q->plugged)
(q->request_fn)(q);<font color=#0000ff>/*见ide_init_queue,将其初始化为do_ide_request */</font>
</pre>
<pre> if (freereq)<br> blkdev_release_request(freereq);<br> spin_unlock_irq(&io_request_lock);<br> return 0;<br>end_io:<br> bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));<br> return 0;<br>}<br> <font color=#0000ff>待会儿再说q->plugged的含义.先看看do_ide_request做了什么:</font>
void do_ide_request(request_queue_t *q)
{
ide_do_request(q->queuedata, 0);
}
static void <font color=#0000ff>ide_do_request</font>(ide_hwgroup_t *hwgroup, int masked_irq)<br>{<br> ide_drive_t *drive;<br> ide_hwif_t *hwif;<br> ide_startstop_t startstop;</pre>
<pre> ide_get_lock(&ide_lock, ide_intr, hwgroup); /* for atari only: POSSIBLY BROKEN HERE(?) */</pre>
<pre> __cli(); /* necessary paranoia: ensure IRQs are masked on local CPU */</pre>
<pre> while (!hwgroup->busy) { <font color=#0000ff>/*hwgroup不忙的时候需要处理,否则这就是一个空函数而已*/</font>
hwgroup->busy = 1; <font color=#0000ff>/*如果busy置位,代表其他进程已经进入次循环,第一个进入此循环的<br> 线程负责处理所有连接到此hwgroup上drive的请求。一个hwgorp共享<br> 同一个中断。<br> */</font>
drive = choose_drive(hwgroup); <font color=#0000ff>/*选择一个控制器,呵呵,处理的请求未必就是你刚刚提交的<br> 那个,甚至你读hda,这里却选中了hdc,注意<br> drive->queue.plugged ==0 才会被选中,</font><font color=#ff0000><b>plugged 置位代表<br> 这个drive开始处理请求,这种情况下不需要这个线程调用<br> ide_do_request而是通过中断ide_intr->ide_do_request(drive);<br> 来获取cpu处理请求</b></font><font color=#0000ff>
*/</font>
if (drive == NULL) {
unsigned long sleep = 0;
hwgroup->rq = NULL;
drive = hwgroup->drive;
do {
if (drive->sleep && (!sleep || 0 < (signed long)(sleep - drive->sleep)))
sleep = drive->sleep;
} while ((drive = drive->next) != hwgroup->drive);
if (sleep) {
/*
* Take a short snooze, and then wake up this hwgroup again.
* This gives other hwgroups on the same a chance to
* play fairly with us, just in case there are big differences
* in relative throughputs.. don't want to hog the cpu too much.
*/
if (0 < (signed long)(jiffies + WAIT_MIN_SLEEP - sleep))
sleep = jiffies + WAIT_MIN_SLEEP;
#if 1
if (timer_pending(&hwgroup->timer))
printk("ide_set_handler: timer already active\n");
#endif
hwgroup->sleeping = 1; /* so that ide_timer_expiry knows what to do */
mod_timer(&hwgroup->timer, sleep);
/* we purposely leave hwgroup->busy==1 while sleeping */
} else {
/* Ugly, but how can we sleep for the lock otherwise? perhaps from tq_disk? */
ide_release_lock(&ide_lock); /* for atari only */
hwgroup->busy = 0;
}
return; /* no more work for this hwgroup (for now) */
}
hwif = HWIF(drive);
if (hwgroup->hwif->sharing_irq && hwif != hwgroup->hwif && hwif->io_ports[IDE_CONTROL_OFFSET]) {
/* set nIEN for previous hwif */
SELECT_INTERRUPT(hwif, drive);
}
hwgroup->hwif = hwif;
hwgroup->drive = drive;
drive->sleep = 0;
drive->service_start = jiffies;</pre>
<pre> if ( drive->queue.plugged ) /* paranoia */<br> printk("%s: Huh? nuking plugged queue\n", drive->name);<br> hwgroup->rq = blkdev_entry_next_request(&drive->queue.queue_head);<br> /*<br> * Some systems have trouble with IDE IRQs arriving while<br> * the driver is still setting things up. So, here we disable<br> * the IRQ used by this interface while the request is being started.<br> * This may look bad at first, but pretty much the same thing<br> * happens anyway when any interrupt comes in, IDE or otherwise<br> * -- the kernel masks the IRQ while it is being handled.<br> */<br> if (masked_irq && hwif->irq != masked_irq)<br> disable_irq_nosync(hwif->irq);<br> spin_unlock(&io_request_lock);<br> ide__sti(); /* allow other IRQs while we start this request */<br> startstop = <font color=#0000ff><b>start_request</b></font>(drive);<br> spin_lock_irq(&io_request_lock);<br> if (masked_irq && hwif->irq != masked_irq)<br> enable_irq(hwif->irq);<br> if (startstop == ide_stopped)<br> hwgroup->busy = 0;<br> }<br>}</pre>
<pre> IDE分析到这种地步,我们开始接触磁盘操作的‘核心’逻辑:<font color=#0000ff><b>__make_request,ide_do_request,plugged,ide_intr,tq_disk。<br>_</b>_make_request,tq_disk </font>主要负责调度磁盘的读写请求。<font color=#0000ff>ide_do_request,ide_intr</font>完成ide借口的操作,真正的完成读写磁盘。<font color=#0000ff>
<b>_</b>_make_request </font>第一次接到磁盘读写请求(que为空),直接将请求挂如队列,置plug,放入tq_task(延后对<b>ide_do_request</b>的调用)。后续<br>的读写请求则首先进行调度,然后再决定是否马上向hw发起操作。当向hw请求发出后(<b>ide_do_request</b>得以执行),intr接管对<b>ide_do_request<br></b>的调用<font color=#0000ff>,</font>同时que plug位被清除,hwgroup的busy位置位 。<font color=#0000ff>(</font>当plug到tq_disk时,不会进行hw操作的<b>ide_do_request</b>只选择非plug的队列<font color=#0000ff>)。<br><br> </font>intr接管对<b>ide_do_request</b>的调用之后,也不见得会将所有的读写请求处理完,这要看磁盘级别的调度结果,<b>ide_do_request</b>负责在磁盘<br>之间调度。这里注意一下head_acitve,对于ide,此位总是 1,这代表在对读写请求调度时,如果处于unplug状态,则不能操作第一个req(unplug时<br>有可能在进行io操作,即ide_intr已经在进行真正的io操作了)。<br><br> 处于plug状态的队列其实是在等待进行读写请求的调度,以便达到比较好的io吞吐率。但是也不能这样长久的等待下去。所以,如果我们搜索一下<br>tq_task,就会发现内核有许多地方在调整着吞吐率和延迟之间的矛盾。具体细节就不再罗列了。</pre>
<pre> 真正操作ide的代码是<font color=#0000ff>start_request,drive->do_request(对于ide 硬盘是do_rw_disk):</font>
</pre>
<pre>/*<br> * do_rw_disk() issues READ and WRITE commands to a disk,<br> * using LBA if supported, or CHS otherwise, to address sectors.<br> * It also takes care of issuing special DRIVE_CMDs.<br> */<br>static ide_startstop_t <font color=#0000ff>do_rw_disk</font> (ide_drive_t *drive, struct request *rq, unsigned long block)<br>{<br> if (IDE_CONTROL_REG)<br> OUT_BYTE(drive->ctl,IDE_CONTROL_REG);<br> OUT_BYTE(rq->nr_sectors,IDE_NSECTOR_REG);<br><br> if (<font color=#0000ff>drive->select.b.lba</font>) <font color=#0000ff>{ /*LBA,可以看到,2.4.0的内核还不支持48bitLBA操作,不能支持〉137G的硬盘*/</font>
#ifdef DEBUG
printk("%s: %sing: LBAsect=%ld, sectors=%ld, buffer=0x%08lx\n",
drive->name, (rq->cmd==READ)?"read":"writ",
block, rq->nr_sectors, (unsigned long) rq->buffer);
#endif
OUT_BYTE(block,IDE_SECTOR_REG);
OUT_BYTE(block>>=8,IDE_LCYL_REG);
OUT_BYTE(block>>=8,IDE_HCYL_REG);
OUT_BYTE(((block>>8)&0x0f)|drive->select.all,IDE_SELECT_REG);
<font color=#0000ff>}</font> else <font color=#0000ff>{</font>
unsigned int sect,head,cyl,track;
track = block / drive->sect;
sect = block % drive->sect + 1;
OUT_BYTE(sect,IDE_SECTOR_REG);
head = track % drive->head;
cyl = track / drive->head;
OUT_BYTE(cyl,IDE_LCYL_REG);
OUT_BYTE(cyl>>8,IDE_HCYL_REG);
OUT_BYTE(head|drive->select.all,IDE_SELECT_REG);
#ifdef DEBUG
printk("%s: %sing: CHS=%d/%d/%d, sectors=%ld, buffer=0x%08lx\n",
drive->name, (rq->cmd==READ)?"read":"writ", cyl,
head, sect, rq->nr_sectors, (unsigned long) rq->buffer);
#endif
<font color=#0000ff>}</font>
#ifdef CONFIG_BLK_DEV_PDC4030
if (IS_PDC4030_DRIVE) {
extern ide_startstop_t do_pdc4030_io(ide_drive_t *, struct request *);
return do_pdc4030_io (drive, rq);
}
#endif /* CONFIG_BLK_DEV_PDC4030 */
<font color=#0000ff>if (rq->cmd == READ)</font> <font color=#0000ff>{</font>
#ifdef CONFIG_BLK_DEV_IDEDMA
if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_read, drive)))
return ide_started;
#endif /* CONFIG_BLK_DEV_IDEDMA */
ide_set_handler(drive, &read_intr, WAIT_CMD, NULL);
OUT_BYTE(drive->mult_count ? WIN_MULTREAD : WIN_READ, IDE_COMMAND_REG);
return ide_started;
<font color=#0000ff>}</font>
if <font color=#0000ff>(rq->cmd == WRITE)</font> <font color=#0000ff>{</font>
ide_startstop_t startstop;
#ifdef CONFIG_BLK_DEV_IDEDMA
if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_write, drive)))
return ide_started;
#endif /* CONFIG_BLK_DEV_IDEDMA */
OUT_BYTE(drive->mult_count ? WIN_MULTWRITE : WIN_WRITE, IDE_COMMAND_REG);
if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) {
printk(KERN_ERR "%s: no DRQ after issuing %s\n", drive->name,
drive->mult_count ? "MULTWRITE" : "WRITE");
return startstop;
}
if (!drive->unmask)
__cli(); /* local CPU only */
if (drive->mult_count) {
ide_hwgroup_t *hwgroup = HWGROUP(drive);
/*
* Ugh.. this part looks ugly because we MUST set up
* the interrupt handler before outputting the first block
* of data to be written. If we hit an error (corrupted buffer list)
* in ide_multwrite(), then we need to remove the handler/timer
* before returning. Fortunately, this NEVER happens (right?).
*
* Except when you get an error it seems...
*/
hwgroup->wrq = *rq; /* scratchpad */
ide_set_handler (drive, &multwrite_intr, WAIT_CMD, NULL);
if (ide_multwrite(drive, drive->mult_count)) {
unsigned long flags;
spin_lock_irqsave(&io_request_lock, flags);
hwgroup->handler = NULL;
del_timer(&hwgroup->timer);
spin_unlock_irqrestore(&io_request_lock, flags);
return ide_stopped;
}
} else {
ide_set_handler (drive, &write_intr, WAIT_CMD, NULL);
idedisk_output_data(drive, rq->buffer, SECTOR_WORDS);
}
return ide_started;
<font color=#0000ff>}</font>
printk(KERN_ERR "%s: bad command: %d\n", drive->name, rq->cmd);
ide_end_request(0, HWGROUP(drive));
return ide_stopped;
}</pre>
<pre> </pre>
<pre><font color=#0000ff> <br></font></pre>
</td>
</tr>
</tbody>
</table></body></html>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -