📄 026_pre_fs_c.html

📁 重读linux 2.4.2o所写的笔记
💻 HTML
📖 第 1 页 / 共 2 页
字号:
上一页 12
      <pre>	generic_make_request(rw, bh);</pre>
      <pre>	switch (rw) {<br>		case WRITE:<br>			kstat.pgpgout++;<br>			break;<br>		default:<br>			kstat.pgpgin++;<br>			break;<br>	}<br>}<br>                          </pre>
      <pre>  看看如何向磁盘驱动提交申请:<br>void <font color=#0000ff>generic_make_request</font> (int rw, struct buffer_head * bh)<br>{<br>	int major = MAJOR(bh-&gt;b_rdev);<br>	request_queue_t *q;<br> </pre>
      <pre>  .....//检查读取范围是否存在于磁盘,比如超出最大扇区号</pre>
      <pre>	/*<br>	 * Resolve the mapping until finished. (drivers are<br>	 * still free to implement/resolve their own stacking<br>	 * by explicitly returning 0)<br>	 */<br>	/* NOTE: we don't repeat the blk_size check for each new device.<br>	 * Stacking drivers are expected to know what they are doing.<br>	 */<br>	do {<br>		q = blk_get_queue(bh-&gt;b_rdev);<br>		if (!q) {<br>			printk(KERN_ERR<br>			       "generic_make_request: Trying to access nonexistent block-device %s (%ld)\n",<br>			       kdevname(bh-&gt;b_rdev), bh-&gt;b_rsector);<br>			buffer_IO_error(bh);<br>			break;<br>		}</pre>
      <pre>	}<br>	while (q-&gt;make_request_fn(q, rw, bh)); /*参考blk_init_queue,初始化为 __make_request*/<br>}<br> </pre>
      <pre>  这里通过一个while循环来提交一个请求,但是对于IDE,这是没有必要的.__make_request总是返回0.<br>static int<font color=#0000ff> __make_request</font>(request_queue_t * q, int rw,<br>				  struct buffer_head * bh)<br>{<br>	unsigned int sector, count;<br>	int max_segments = MAX_SEGMENTS;<br>	struct request * req = NULL, *freereq = NULL;<br>	int rw_ahead, max_sectors, el_ret;<br>	struct list_head *head;<br>	int latency;<br>	elevator_t *elevator = &amp;q-&gt;elevator;<br> </pre>
      <pre> again:<br>  ........</pre>
      <pre>	if (list_empty(head)) {<br>		q-&gt;<b><font color=#0000ff>plug_device_fn</font></b>(q, bh-&gt;b_rdev); /* is atomic */<br>		                                /*<font color=#0000ff>这个函数对IDE来讲是generic_plug_device,见blk_init_queue</font>*/<br>		goto get_rq;<br>	}</pre>
      <pre>	el_ret = elevator-&gt;elevator_merge_fn(q, &amp;req, bh, rw,<br>					     &amp;max_sectors, &amp;max_segments);<br>	switch (el_ret) {</pre>
      <pre>		case ELEVATOR_BACK_MERGE:<br>			if (!q-&gt;back_merge_fn(q, req, bh, max_segments))<br>				break;<br>			req-&gt;bhtail-&gt;b_reqnext = bh;<br>			req-&gt;bhtail = bh;<br>			req-&gt;nr_sectors = req-&gt;hard_nr_sectors += count;<br>			req-&gt;e = elevator;<br>			drive_stat_acct(req-&gt;rq_dev, req-&gt;cmd, count, 0);<br>			attempt_back_merge(q, req, max_sectors, max_segments);<br>			goto out;</pre>
      <pre>		case ELEVATOR_FRONT_MERGE:<br>			if (!q-&gt;front_merge_fn(q, req, bh, max_segments))<br>				break;<br>			bh-&gt;b_reqnext = req-&gt;bh;<br>			req-&gt;bh = bh;<br>			req-&gt;buffer = bh-&gt;b_data;<br>			req-&gt;current_nr_sectors = count;<br>			req-&gt;sector = req-&gt;hard_sector = sector;<br>			req-&gt;nr_sectors = req-&gt;hard_nr_sectors += count;<br>			req-&gt;e = elevator;<br>			drive_stat_acct(req-&gt;rq_dev, req-&gt;cmd, count, 0);<br>			attempt_front_merge(q, head, req, max_sectors, max_segments);<br>			goto out;<br>		/*<br>		 * elevator says don't/can't merge. get new request<br>		 */<br>		case ELEVATOR_NO_MERGE:<br>			break;</pre>
      <pre>		default:<br>			printk("elevator returned crap (%d)\n", el_ret);<br>			BUG();<br>	}<br>		</pre>
      <pre>	/*<br>	 * Grab a free request from the freelist. Read first try their<br>	 * own queue - if that is empty, we steal from the write list.<br>	 * Writes must block if the write list is empty, and read aheads<br>	 * are not crucial.<br>	 */<br>get_rq:<br>	if (freereq) {<br>		req = freereq;<br>		freereq = NULL;<br>	} else if ((req = get_request(q, rw)) == NULL) {<br>		spin_unlock_irq(&amp;io_request_lock);<br>		if (rw_ahead)<br>			goto end_io;</pre>
      <pre>		freereq = __get_request_wait(q, rw);<br>		goto again;<br>	}</pre>
      <pre>/* fill up the request-info, and add it to the queue */<br>	req-&gt;cmd = rw;<br>	req-&gt;errors = 0;<br>	req-&gt;hard_sector = req-&gt;sector = sector;<br>	req-&gt;hard_nr_sectors = req-&gt;nr_sectors = count;<br>	req-&gt;current_nr_sectors = count;<br>	req-&gt;nr_segments = 1; /* Always 1 for a new request. */<br>	req-&gt;nr_hw_segments = 1; /* Always 1 for a new request. */<br>	req-&gt;buffer = bh-&gt;b_data;<br>	req-&gt;sem = NULL;<br>	req-&gt;bh = bh;<br>	req-&gt;bhtail = bh;<br>	req-&gt;rq_dev = bh-&gt;b_rdev;<br>	req-&gt;e = elevator;<br>	add_request(q, req, head, latency);<font color=#0000ff> /*提交给磁盘驱动*/</font>
out:
	if (!q-&gt;plugged)  
  	(q-&gt;request_fn)(q);<font color=#0000ff>/*见ide_init_queue,将其初始化为do_ide_request */</font>
	</pre>
      <pre>     if (freereq)<br>	blkdev_release_request(freereq);<br>	spin_unlock_irq(&amp;io_request_lock);<br>	return 0;<br>end_io:<br>	bh-&gt;b_end_io(bh, test_bit(BH_Uptodate, &amp;bh-&gt;b_state));<br>	return 0;<br>}<br>  <font color=#0000ff>待会儿再说q-&gt;plugged的含义.先看看do_ide_request做了什么:</font>
void do_ide_request(request_queue_t *q)
{
	ide_do_request(q-&gt;queuedata, 0);
}
static void <font color=#0000ff>ide_do_request</font>(ide_hwgroup_t *hwgroup, int masked_irq)<br>{<br>	ide_drive_t	*drive;<br>	ide_hwif_t	*hwif;<br>	ide_startstop_t	startstop;</pre>
      <pre>	ide_get_lock(&amp;ide_lock, ide_intr, hwgroup);	/* for atari only: POSSIBLY BROKEN HERE(?) */</pre>
      <pre>	__cli();	/* necessary paranoia: ensure IRQs are masked on local CPU */</pre>
      <pre>	while (!hwgroup-&gt;busy) {               <font color=#0000ff>/*hwgroup不忙的时候需要处理，否则这就是一个空函数而已*/</font>
		hwgroup-&gt;busy = 1;           <font color=#0000ff>/*如果busy置位，代表其他进程已经进入次循环，第一个进入此循环的<br>		                                线程负责处理所有连接到此hwgroup上drive的请求。一个hwgorp共享<br>		                                同一个中断。<br>		                               */</font>
		drive = choose_drive(hwgroup); <font color=#0000ff>/*选择一个控制器，呵呵，处理的请求未必就是你刚刚提交的<br>		                                 那个，甚至你读hda，这里却选中了hdc，注意<br>		                                 drive-&gt;queue.plugged ==0 才会被选中，</font><font color=#ff0000><b>plugged 置位代表<br>		                                 这个drive开始处理请求，这种情况下不需要这个线程调用<br>		                                 ide_do_request而是通过中断ide_intr-&gt;ide_do_request(drive);<br>		                                 来获取cpu处理请求</b></font><font color=#0000ff>
		                               */</font>
		if (drive == NULL) {
			unsigned long sleep = 0;
			hwgroup-&gt;rq = NULL;
			drive = hwgroup-&gt;drive;
			do {
				if (drive-&gt;sleep &amp;&amp; (!sleep || 0 &lt; (signed long)(sleep - drive-&gt;sleep)))
					sleep = drive-&gt;sleep;
			} while ((drive = drive-&gt;next) != hwgroup-&gt;drive);
			if (sleep) {
				/*
				 * Take a short snooze, and then wake up this hwgroup again.
				 * This gives other hwgroups on the same a chance to
				 * play fairly with us, just in case there are big differences
				 * in relative throughputs.. don't want to hog the cpu too much.
				 */
				if (0 &lt; (signed long)(jiffies + WAIT_MIN_SLEEP - sleep)) 
					sleep = jiffies + WAIT_MIN_SLEEP;
#if 1
				if (timer_pending(&amp;hwgroup-&gt;timer))
					printk("ide_set_handler: timer already active\n");
#endif
				hwgroup-&gt;sleeping = 1;	/* so that ide_timer_expiry knows what to do */
				mod_timer(&amp;hwgroup-&gt;timer, sleep);
				/* we purposely leave hwgroup-&gt;busy==1 while sleeping */
			} else {
				/* Ugly, but how can we sleep for the lock otherwise? perhaps from tq_disk? */
				ide_release_lock(&amp;ide_lock);	/* for atari only */
				hwgroup-&gt;busy = 0;
			}
			return;		/* no more work for this hwgroup (for now) */
		}
		hwif = HWIF(drive);
		if (hwgroup-&gt;hwif-&gt;sharing_irq &amp;&amp; hwif != hwgroup-&gt;hwif &amp;&amp; hwif-&gt;io_ports[IDE_CONTROL_OFFSET]) {
			/* set nIEN for previous hwif */
			SELECT_INTERRUPT(hwif, drive);
		}
		hwgroup-&gt;hwif = hwif;
		hwgroup-&gt;drive = drive;
		drive-&gt;sleep = 0;
		drive-&gt;service_start = jiffies;</pre>
      <pre>		if ( drive-&gt;queue.plugged )	/* paranoia */<br>			printk("%s: Huh? nuking plugged queue\n", drive-&gt;name);<br>		hwgroup-&gt;rq = blkdev_entry_next_request(&amp;drive-&gt;queue.queue_head);<br>		/*<br>		 * Some systems have trouble with IDE IRQs arriving while<br>		 * the driver is still setting things up.  So, here we disable<br>		 * the IRQ used by this interface while the request is being started.<br>		 * This may look bad at first, but pretty much the same thing<br>		 * happens anyway when any interrupt comes in, IDE or otherwise<br>		 *  -- the kernel masks the IRQ while it is being handled.<br>		 */<br>		if (masked_irq &amp;&amp; hwif-&gt;irq != masked_irq)<br>			disable_irq_nosync(hwif-&gt;irq);<br>		spin_unlock(&amp;io_request_lock);<br>		ide__sti();	/* allow other IRQs while we start this request */<br>		startstop = <font color=#0000ff><b>start_request</b></font>(drive);<br>		spin_lock_irq(&amp;io_request_lock);<br>		if (masked_irq &amp;&amp; hwif-&gt;irq != masked_irq)<br>			enable_irq(hwif-&gt;irq);<br>		if (startstop == ide_stopped)<br>			hwgroup-&gt;busy = 0;<br>	}<br>}</pre>
      <pre>   IDE分析到这种地步，我们开始接触磁盘操作的‘核心’逻辑：<font color=#0000ff><b>__make_request，ide_do_request，plugged，ide_intr，tq_disk。<br>_</b>_make_request，tq_disk </font>主要负责调度磁盘的读写请求。<font color=#0000ff>ide_do_request，ide_intr</font>完成ide借口的操作，真正的完成读写磁盘。<font color=#0000ff>
   <b>_</b>_make_request </font>第一次接到磁盘读写请求（que为空），直接将请求挂如队列，置plug，放入tq_task(延后对<b>ide_do_request</b>的调用)。后续<br>的读写请求则首先进行调度，然后再决定是否马上向hw发起操作。当向hw请求发出后（<b>ide_do_request</b>得以执行），intr接管对<b>ide_do_request<br></b>的调用<font color=#0000ff>，</font>同时que plug位被清除，hwgroup的busy位置位 。<font color=#0000ff>（</font>当plug到tq_disk时，不会进行hw操作的<b>ide_do_request</b>只选择非plug的队列<font color=#0000ff>）。<br><br>   </font>intr接管对<b>ide_do_request</b>的调用之后，也不见得会将所有的读写请求处理完，这要看磁盘级别的调度结果，<b>ide_do_request</b>负责在磁盘<br>之间调度。这里注意一下head_acitve,对于ide，此位总是 1，这代表在对读写请求调度时，如果处于unplug状态，则不能操作第一个req(unplug时<br>有可能在进行io操作，即ide_intr已经在进行真正的io操作了)。<br><br>   处于plug状态的队列其实是在等待进行读写请求的调度，以便达到比较好的io吞吐率。但是也不能这样长久的等待下去。所以，如果我们搜索一下<br>tq_task,就会发现内核有许多地方在调整着吞吐率和延迟之间的矛盾。具体细节就不再罗列了。</pre>
      <pre>   真正操作ide的代码是<font color=#0000ff>start_request，drive-&gt;do_request（对于ide 硬盘是do_rw_disk）：</font>
</pre>
      <pre>/*<br> * do_rw_disk() issues READ and WRITE commands to a disk,<br> * using LBA if supported, or CHS otherwise, to address sectors.<br> * It also takes care of issuing special DRIVE_CMDs.<br> */<br>static ide_startstop_t <font color=#0000ff>do_rw_disk</font> (ide_drive_t *drive, struct request *rq, unsigned long block)<br>{<br>	if (IDE_CONTROL_REG)<br>		OUT_BYTE(drive-&gt;ctl,IDE_CONTROL_REG);<br>	OUT_BYTE(rq-&gt;nr_sectors,IDE_NSECTOR_REG);<br><br>	if (<font color=#0000ff>drive-&gt;select.b.lba</font>) <font color=#0000ff>{ /*LBA,可以看到，2.4.0的内核还不支持48bitLBA操作，不能支持〉137G的硬盘*/</font>

#ifdef DEBUG
		printk("%s: %sing: LBAsect=%ld, sectors=%ld, buffer=0x%08lx\n",
			drive-&gt;name, (rq-&gt;cmd==READ)?"read":"writ",
			block, rq-&gt;nr_sectors, (unsigned long) rq-&gt;buffer);
#endif
		OUT_BYTE(block,IDE_SECTOR_REG);
		OUT_BYTE(block&gt;&gt;=8,IDE_LCYL_REG);
		OUT_BYTE(block&gt;&gt;=8,IDE_HCYL_REG);
		OUT_BYTE(((block&gt;&gt;8)&amp;0x0f)|drive-&gt;select.all,IDE_SELECT_REG);
	<font color=#0000ff>}</font> else <font color=#0000ff>{</font>
		unsigned int sect,head,cyl,track;
		track = block / drive-&gt;sect;
		sect  = block % drive-&gt;sect + 1;
		OUT_BYTE(sect,IDE_SECTOR_REG);
		head  = track % drive-&gt;head;
		cyl   = track / drive-&gt;head;
		OUT_BYTE(cyl,IDE_LCYL_REG);
		OUT_BYTE(cyl&gt;&gt;8,IDE_HCYL_REG);
		OUT_BYTE(head|drive-&gt;select.all,IDE_SELECT_REG);
#ifdef DEBUG
		printk("%s: %sing: CHS=%d/%d/%d, sectors=%ld, buffer=0x%08lx\n",
			drive-&gt;name, (rq-&gt;cmd==READ)?"read":"writ", cyl,
			head, sect, rq-&gt;nr_sectors, (unsigned long) rq-&gt;buffer);
#endif
	<font color=#0000ff>}</font>
#ifdef CONFIG_BLK_DEV_PDC4030
	if (IS_PDC4030_DRIVE) {
		extern ide_startstop_t do_pdc4030_io(ide_drive_t *, struct request *);
		return do_pdc4030_io (drive, rq);
	}
#endif /* CONFIG_BLK_DEV_PDC4030 */
	<font color=#0000ff>if (rq-&gt;cmd == READ)</font> <font color=#0000ff>{</font>
#ifdef CONFIG_BLK_DEV_IDEDMA
		if (drive-&gt;using_dma &amp;&amp; !(HWIF(drive)-&gt;dmaproc(ide_dma_read, drive)))
			return ide_started;
#endif /* CONFIG_BLK_DEV_IDEDMA */
		ide_set_handler(drive, &amp;read_intr, WAIT_CMD, NULL);
		OUT_BYTE(drive-&gt;mult_count ? WIN_MULTREAD : WIN_READ, IDE_COMMAND_REG);
		return ide_started;
	<font color=#0000ff>}</font>
	if <font color=#0000ff>(rq-&gt;cmd == WRITE)</font> <font color=#0000ff>{</font>
		ide_startstop_t startstop;
#ifdef CONFIG_BLK_DEV_IDEDMA
		if (drive-&gt;using_dma &amp;&amp; !(HWIF(drive)-&gt;dmaproc(ide_dma_write, drive)))
			return ide_started;
#endif /* CONFIG_BLK_DEV_IDEDMA */
		OUT_BYTE(drive-&gt;mult_count ? WIN_MULTWRITE : WIN_WRITE, IDE_COMMAND_REG);
		if (ide_wait_stat(&amp;startstop, drive, DATA_READY, drive-&gt;bad_wstat, WAIT_DRQ)) {
			printk(KERN_ERR "%s: no DRQ after issuing %s\n", drive-&gt;name,
				drive-&gt;mult_count ? "MULTWRITE" : "WRITE");
			return startstop;
		}
		if (!drive-&gt;unmask)
			__cli();	/* local CPU only */
		if (drive-&gt;mult_count) {
			ide_hwgroup_t *hwgroup = HWGROUP(drive);
			/*
			 * Ugh.. this part looks ugly because we MUST set up
			 * the interrupt handler before outputting the first block
			 * of data to be written.  If we hit an error (corrupted buffer list)
			 * in ide_multwrite(), then we need to remove the handler/timer
			 * before returning.  Fortunately, this NEVER happens (right?).
			 *
			 * Except when you get an error it seems...
			 */
			hwgroup-&gt;wrq = *rq; /* scratchpad */
			ide_set_handler (drive, &amp;multwrite_intr, WAIT_CMD, NULL);
			if (ide_multwrite(drive, drive-&gt;mult_count)) {
				unsigned long flags;
				spin_lock_irqsave(&amp;io_request_lock, flags);
				hwgroup-&gt;handler = NULL;
				del_timer(&amp;hwgroup-&gt;timer);
				spin_unlock_irqrestore(&amp;io_request_lock, flags);
				return ide_stopped;
			}
		} else {
			ide_set_handler (drive, &amp;write_intr, WAIT_CMD, NULL);
			idedisk_output_data(drive, rq-&gt;buffer, SECTOR_WORDS);
		}
		return ide_started;
	<font color=#0000ff>}</font>
	printk(KERN_ERR "%s: bad command: %d\n", drive-&gt;name, rq-&gt;cmd);
	ide_end_request(0, HWGROUP(drive));
	return ide_stopped;
}</pre>
      <pre>　</pre>
      <pre><font color=#0000ff>   <br></font></pre>
    </td>
  </tr>
  </tbody>
</table></body></html>
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -