📄 035_fs_buffer_c.html

📁 重读linux 2.4.2o所写的笔记
💻 HTML
📖 第 1 页 / 共 5 页
字号:
        &nbsp;* dirty (hopefully that will not happen until we will free that
        block ;-)<br>
        &nbsp;* We don't even need to mark it not-uptodate - nobody can expect<br>
        &nbsp;* anything from a newly allocated buffer anyway. We used to used<br>
        &nbsp;* unmap_buffer() for such invalidation, but that was wrong. We
        definitely<br>
        &nbsp;* don't want to mark the alias unmapped, for example - it would
        confuse<br>
        &nbsp;* anyone who might pick it with bread() afterwards...<br>
        &nbsp;*/<br>
        <br>
        static void<b> unmap_underlying_metadata</b>(struct buffer_head * bh)<br>
        {<br>
        &nbsp;&nbsp;&nbsp; struct buffer_head *old_bh;<br>
        <br>
        &nbsp;&nbsp;&nbsp; old_bh = get_hash_table(bh-&gt;b_dev,
        bh-&gt;b_blocknr, bh-&gt;b_size);<br>
        &nbsp;&nbsp;&nbsp; if (old_bh) {<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; mark_buffer_clean(old_bh);<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; wait_on_buffer(old_bh);<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; clear_bit(BH_Req,
        &amp;old_bh-&gt;b_state);<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; /* Here we could run brelse or
        bforget. We use<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp; bforget because it
        will try to put the buffer<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp; in the freelist. */<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;
        <font size=4><b>__bforget</b></font>(old_bh);<br>
        &nbsp;&nbsp;&nbsp; }<br>
        }<br>
        <br>
        //mapping-&gt;a_ops-&gt;commit_write -&gt;
        <font color=#006600>block_commit_write --&gt;</font>__block_commit_write<br>
        static int <font color=#006600><b>__block_commit_write</b></font>(struct
        inode *inode, struct page *page,<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; unsigned from, unsigned to)<br>
        {<br>
        &nbsp;&nbsp;<br>
        &nbsp;&nbsp;&nbsp; for(bh = head = page-&gt;buffers, block_start = 0;<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; bh != head || !block_start;<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; block_start=block_end, bh =
        bh-&gt;b_this_page) { <b>//遍历所有的bh</b><br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; block_end = block_start +
        blocksize;<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; if (block_end &lt;= from ||
        block_start &gt;= to) {<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; if
        (!buffer_uptodate(bh))<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;
        &nbsp;&nbsp;&nbsp; partial = 1;<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; } else {<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;
        set_bit(BH_Uptodate, &amp;bh-&gt;b_state);<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; if
        (!atomic_set_buffer_dirty(bh)) {<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;
        &nbsp;&nbsp;&nbsp;
        <b>__mark_dirty(bh)</b>;//<b>bh加入了lru队列,不代表就是加入了buffer
        cache.加入hash才是加入buffer cache的标志</b><br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;
        &nbsp;&nbsp;&nbsp;<b> buffer_insert_inode_queue</b>(bh, inode);
        <b>//呵呵这里证明,文件数据的bh关联一个inode,<br>
        </b>&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;
        &nbsp;&nbsp;&nbsp; need_balance_dirty = 1;<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; }<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; }<br>
        &nbsp;&nbsp;&nbsp; }<br>
        &nbsp;&nbsp;&nbsp; .............<br>
        &nbsp;&nbsp;&nbsp; if (!partial)<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; SetPageUptodate(page);&nbsp;
        //给page标记uptodate就够了,
        通过后备任务,写入到磁盘(<font color=#006600>inode-&gt;i_dirty_buffers)</font><br>
        &nbsp;&nbsp;&nbsp; return 0;<br>
        }<br>
        写文件的时候仅仅是标记dirty,连block
        dev的io都没有启动,除非要求了syn,见<b>generic_file_write</b><br>
        (if ((status &gt;= 0) &amp;&amp; (file-&gt;f_flags &amp; O_SYNC)))<br>
        这样才能速度快.<br>
        <br>
        然后看看写整个磁盘文件的函数:这个函数提供给filemap的sync和page_lunder使用,所以是启动了磁盘的io操作的.不具体分析<br>
        int<span style=COLOR:#006600> <b>block_write_full_page</b>(</span>struct
        page *page, get_block_t *<b>get_block</b>)<br>
        <b>//对ext2,就是ext2_get_block,map bh到具体设备上的block</b><br>
        {<br>
        &nbsp;&nbsp;&nbsp; struct inode *inode = page-&gt;mapping-&gt;host;<br>
        &nbsp;&nbsp;&nbsp; unsigned long end_index = inode-&gt;i_size &gt;&gt;
        PAGE_CACHE_SHIFT;<br>
        &nbsp;&nbsp;&nbsp; unsigned offset;<br>
        &nbsp;&nbsp;&nbsp; int err;<br>
        <br>
        &nbsp;&nbsp;&nbsp; /* easy case */<br>
        &nbsp;&nbsp;&nbsp; if (page-&gt;index &lt; end_index)<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &nbsp;return
        __block_write_full_page(inode, page, get_block); <b>//可以整个页面写入的</b><br>
        <br>
        &nbsp;&nbsp;&nbsp; /* things got complicated... */<br>
        &nbsp;&nbsp;&nbsp; offset = inode-&gt;i_size &amp; (PAGE_CACHE_SIZE-1);<br>
        &nbsp;&nbsp;&nbsp; /* OK, are we completely out? */<br>
        &nbsp;&nbsp;&nbsp; if (page-&gt;index &gt;= end_index+1 || !offset) {<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &nbsp;UnlockPage(page);<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &nbsp;return -EIO;<br>
        &nbsp;&nbsp;&nbsp; }<br>
        <br>
        &nbsp;&nbsp;&nbsp; /* Sigh... will have to work, then... */<br>
        &nbsp;&nbsp;&nbsp; err = __block_prepare_write(inode, page, 0, offset,
        get_block); <b>//否则得拆分开写1部分</b><br>
        &nbsp;&nbsp;&nbsp; if (!err) {<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &nbsp;memset(page_address(page) +
        offset, 0, PAGE_CACHE_SIZE - offset);//clear无效部分<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &nbsp;flush_dcache_page(page);<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;
        &nbsp;__block_commit_write(inode,page,0,offset);
        <b>//分开写和写一页，出了写了不同数量的bh，其余都类似</b><br>
        done:<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &nbsp;kunmap(page);<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &nbsp;UnlockPage(page);<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &nbsp;return err;<br>
        &nbsp;&nbsp;&nbsp; }<br>
        &nbsp;&nbsp;&nbsp; ClearPageUptodate(page);<br>
        &nbsp;&nbsp;&nbsp; goto done;<br>
        }&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br>
        另外一个prepare write就是为不准有空洞的文件系统准备的:<br>
        int <font color=#006600><b>cont_prepare_write</b></font>(struct page
        *page, unsigned offset, unsigned to, get_block_t *get_block, unsigned
        long *bytes) //bytes 是当前这个文件的最后一个byte的位置<br>
        {<br>
        &nbsp;&nbsp;&nbsp; .....<br>
        <br>
        &nbsp;&nbsp;&nbsp; while(page-&gt;index &gt; (pgpos =
        *bytes&gt;&gt;PAGE_CACHE_SHIFT)) {
        <b>//如果请求页超过当前最后一个byte,就要将空洞部分全部分配并填上0</b><br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; status = -ENOMEM;<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; new_page =
        grab_cache_page(mapping, pgpos);&nbsp; //分配或者查找page cache<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; .....<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; zerofrom = *bytes &amp;
        ~PAGE_CACHE_MASK;<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; if (zerofrom &amp; (blocksize-1))
        {<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; *bytes |=
        (blocksize-1);<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; (*bytes)++;<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; }<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; status =
        __block_prepare_write(inode, new_page, zerofrom,<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;
        PAGE_CACHE_SIZE, get_block);<b> //将中间位置的页面填 0 并写入文件</b><br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; if (status)<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; goto out_unmap;<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; kaddr =
        page_address(new_page);<b>//将中间位置的页面填 0 并写入文件</b><br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; memset(kaddr+zerofrom, 0,
        PAGE_CACHE_SIZE-zerofrom);<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; flush_dcache_page(new_page);<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; __block_commit_write(inode,
        new_page, zerofrom, PAGE_CACHE_SIZE);<b>//将中间位置的页面填 0 并写入文件</b><br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; kunmap(new_page);<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; UnlockPage(new_page);<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; page_cache_release(new_page);<br>
        &nbsp;&nbsp;&nbsp; }<br>
        <br>
        &nbsp;&nbsp; &nbsp; ......&nbsp;&nbsp; //零头处理,略<br>
        &nbsp;&nbsp;&nbsp; return 0;<br>
        &nbsp;&nbsp;&nbsp; return status;<br>
        }<br>
        对比下read, read的操作都是启动了磁盘io的.<br>
        <br>
        brw_page: 提供给swap buffer 使用.<br>
        brw_kiovec: raw.c使用,以后再说吧,逻辑不复杂.
        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br>
        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br>
        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br>
        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br>
        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br>
        <div style=TEXT-ALIGN:center>
          <b>4)Buffer cache 和 Inode 的关系总结
          &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
          </b><br>
        </div>
        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<br>
        &nbsp;&nbsp; 在分析<font color=#006600><b>__block_commit_write
        </b><font color=#000000>的时候,
        我们知道file的数据进入了</font></font><font color=#006600>inode-&gt;i_dirty_buffers,
        <font color=#000000>并且加入了buffer的lru队列,但是这不代表文件数据加入了buffer cache.&nbsp;
        另外一个加入</font></font><font color=#006600>inode-&gt;i_dirty_buffers</font><font color=#006600><font color=#000000>的方式是<br>
        static inline void <b>mark_buffer_dirty_inode</b>(struct buffer_head
        *bh, struct inode *inode)<br>
        {<br>
        &nbsp;&nbsp;&nbsp; mark_buffer_dirty(bh);<br>
        &nbsp;&nbsp;&nbsp; buffer_insert_inode_queue(bh, inode);<br>
        }<br>
        &nbsp; 稍微搜索一下调用者就知道,
        元数据也加入了</font></font><font color=#006600>inode-&gt;i_dirty_buffers</font><font color=#006600><font color=#000000>.<br>
        </font></font><font color=#000000>&nbsp; 好就是这样.<br>
        <br>
        &nbsp;</font><br>
        <br>
        <div style=TEXT-ALIGN:center>
          <b>5）buffer cache的老化回收:lru 队列</b><br>
        </div>
        <b><br>
        </b>bdflash进程是主要负责将dirty的buffer 写入磁盘的任务,
        通过上面的分析我们知道无论是元数据还是文件数据,都通过bh进入lru队列。<br>
        <br>
        union bdflush_param {<br>
        } <font color=#006600>bdf_prm</font> = {{30, 64, 64, 256, 5*HZ, 30*HZ,
        60, 0, 0}};<br>
        /* These are the min and max parameter values that we will allow to be
        assigned */<br>
        int <font color=#006600>bdflush_min</font>[N_PARAM] = {&nbsp; 0,&nbsp;
        10,&nbsp;&nbsp;&nbsp; 5,&nbsp;&nbsp; 25,&nbsp; 0,&nbsp;&nbsp;
        1*HZ,&nbsp;&nbsp; 0, 0, 0};<br>
        int<font color=#006600> bdflush_max</font>[N_PARAM] = {100,50000, 20000,
        20000,600*HZ, 6000*HZ, 100, 0, 0};<br>
        <br>
        &nbsp; 作为buffer cache，必须有buffer_head, struct
        page，和数据区（物理内存页面），缺一不可，并且要同时(几乎都是同时的呵呵)加入lru list 和hash表，这个我们在分析page
        cache （filemap.c) 的时候就见过类似的概念了。<br>
        &nbsp;&nbsp; 另外文件数据只进入lru 队列,并不加入buffer cache,要时刻记住了.<br>
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -