📄 035_fs_buffer_c.html
字号:
* dirty (hopefully that will not happen until we will free that
block ;-)<br>
* We don't even need to mark it not-uptodate - nobody can expect<br>
* anything from a newly allocated buffer anyway. We used to used<br>
* unmap_buffer() for such invalidation, but that was wrong. We
definitely<br>
* don't want to mark the alias unmapped, for example - it would
confuse<br>
* anyone who might pick it with bread() afterwards...<br>
*/<br>
<br>
static void<b> unmap_underlying_metadata</b>(struct buffer_head * bh)<br>
{<br>
struct buffer_head *old_bh;<br>
<br>
old_bh = get_hash_table(bh->b_dev,
bh->b_blocknr, bh->b_size);<br>
if (old_bh) {<br>
mark_buffer_clean(old_bh);<br>
wait_on_buffer(old_bh);<br>
clear_bit(BH_Req,
&old_bh->b_state);<br>
/* Here we could run brelse or
bforget. We use<br>
bforget because it
will try to put the buffer<br>
in the freelist. */<br>
<font size=4><b>__bforget</b></font>(old_bh);<br>
}<br>
}<br>
<br>
//mapping->a_ops->commit_write ->
<font color=#006600>block_commit_write --></font>__block_commit_write<br>
static int <font color=#006600><b>__block_commit_write</b></font>(struct
inode *inode, struct page *page,<br>
unsigned from, unsigned to)<br>
{<br>
<br>
for(bh = head = page->buffers, block_start = 0;<br>
bh != head || !block_start;<br>
block_start=block_end, bh =
bh->b_this_page) { <b>//遍历所有的bh</b><br>
block_end = block_start +
blocksize;<br>
if (block_end <= from ||
block_start >= to) {<br>
if
(!buffer_uptodate(bh))<br>
partial = 1;<br>
} else {<br>
set_bit(BH_Uptodate, &bh->b_state);<br>
if
(!atomic_set_buffer_dirty(bh)) {<br>
<b>__mark_dirty(bh)</b>;//<b>bh加入了lru队列,不代表就是加入了buffer
cache.加入hash才是加入buffer cache的标志</b><br>
<b> buffer_insert_inode_queue</b>(bh, inode);
<b>//呵呵这里证明,文件数据的bh关联一个inode,<br>
</b>
need_balance_dirty = 1;<br>
}<br>
}<br>
}<br>
.............<br>
if (!partial)<br>
SetPageUptodate(page);
//给page标记uptodate就够了,
通过后备任务,写入到磁盘(<font color=#006600>inode->i_dirty_buffers)</font><br>
return 0;<br>
}<br>
写文件的时候仅仅是标记dirty,连block
dev的io都没有启动,除非要求了syn,见<b>generic_file_write</b><br>
(if ((status >= 0) && (file->f_flags & O_SYNC)))<br>
这样才能速度快.<br>
<br>
然后看看写整个磁盘文件的函数:这个函数提供给filemap的sync和page_lunder使用,所以是启动了磁盘的io操作的.不具体分析<br>
int<span style=COLOR:#006600> <b>block_write_full_page</b>(</span>struct
page *page, get_block_t *<b>get_block</b>)<br>
<b>//对ext2,就是ext2_get_block,map bh到具体设备上的block</b><br>
{<br>
struct inode *inode = page->mapping->host;<br>
unsigned long end_index = inode->i_size >>
PAGE_CACHE_SHIFT;<br>
unsigned offset;<br>
int err;<br>
<br>
/* easy case */<br>
if (page->index < end_index)<br>
return
__block_write_full_page(inode, page, get_block); <b>//可以整个页面写入的</b><br>
<br>
/* things got complicated... */<br>
offset = inode->i_size & (PAGE_CACHE_SIZE-1);<br>
/* OK, are we completely out? */<br>
if (page->index >= end_index+1 || !offset) {<br>
UnlockPage(page);<br>
return -EIO;<br>
}<br>
<br>
/* Sigh... will have to work, then... */<br>
err = __block_prepare_write(inode, page, 0, offset,
get_block); <b>//否则得拆分开写1部分</b><br>
if (!err) {<br>
memset(page_address(page) +
offset, 0, PAGE_CACHE_SIZE - offset);//clear无效部分<br>
flush_dcache_page(page);<br>
__block_commit_write(inode,page,0,offset);
<b>//分开写和写一页,出了写了不同数量的bh,其余都类似</b><br>
done:<br>
kunmap(page);<br>
UnlockPage(page);<br>
return err;<br>
}<br>
ClearPageUptodate(page);<br>
goto done;<br>
} <br>
另外一个prepare write就是为不准有空洞的文件系统准备的:<br>
int <font color=#006600><b>cont_prepare_write</b></font>(struct page
*page, unsigned offset, unsigned to, get_block_t *get_block, unsigned
long *bytes) //bytes 是当前这个文件的最后一个byte的位置<br>
{<br>
.....<br>
<br>
while(page->index > (pgpos =
*bytes>>PAGE_CACHE_SHIFT)) {
<b>//如果请求页超过当前最后一个byte,就要将空洞部分全部分配并填上0</b><br>
status = -ENOMEM;<br>
new_page =
grab_cache_page(mapping, pgpos); //分配或者查找page cache<br>
.....<br>
zerofrom = *bytes &
~PAGE_CACHE_MASK;<br>
if (zerofrom & (blocksize-1))
{<br>
*bytes |=
(blocksize-1);<br>
(*bytes)++;<br>
}<br>
status =
__block_prepare_write(inode, new_page, zerofrom,<br>
PAGE_CACHE_SIZE, get_block);<b> //将中间位置的页面填 0 并写入文件</b><br>
if (status)<br>
goto out_unmap;<br>
kaddr =
page_address(new_page);<b>//将中间位置的页面填 0 并写入文件</b><br>
memset(kaddr+zerofrom, 0,
PAGE_CACHE_SIZE-zerofrom);<br>
flush_dcache_page(new_page);<br>
__block_commit_write(inode,
new_page, zerofrom, PAGE_CACHE_SIZE);<b>//将中间位置的页面填 0 并写入文件</b><br>
kunmap(new_page);<br>
UnlockPage(new_page);<br>
page_cache_release(new_page);<br>
}<br>
<br>
...... //零头处理,略<br>
return 0;<br>
return status;<br>
}<br>
对比下read, read的操作都是启动了磁盘io的.<br>
<br>
brw_page: 提供给swap buffer 使用.<br>
brw_kiovec: raw.c使用,以后再说吧,逻辑不复杂.
<br>
<br>
<br>
<br>
<br>
<div style=TEXT-ALIGN:center>
<b>4)Buffer cache 和 Inode 的关系总结
</b><br>
</div>
<br>
在分析<font color=#006600><b>__block_commit_write
</b><font color=#000000>的时候,
我们知道file的数据进入了</font></font><font color=#006600>inode->i_dirty_buffers,
<font color=#000000>并且加入了buffer的lru队列,但是这不代表文件数据加入了buffer cache.
另外一个加入</font></font><font color=#006600>inode->i_dirty_buffers</font><font color=#006600><font color=#000000>的方式是<br>
static inline void <b>mark_buffer_dirty_inode</b>(struct buffer_head
*bh, struct inode *inode)<br>
{<br>
mark_buffer_dirty(bh);<br>
buffer_insert_inode_queue(bh, inode);<br>
}<br>
稍微搜索一下调用者就知道,
元数据也加入了</font></font><font color=#006600>inode->i_dirty_buffers</font><font color=#006600><font color=#000000>.<br>
</font></font><font color=#000000> 好就是这样.<br>
<br>
</font><br>
<br>
<div style=TEXT-ALIGN:center>
<b>5)buffer cache的老化回收:lru 队列</b><br>
</div>
<b><br>
</b>bdflash进程是主要负责将dirty的buffer 写入磁盘的任务,
通过上面的分析我们知道无论是元数据还是文件数据,都通过bh进入lru队列。<br>
<br>
union bdflush_param {<br>
} <font color=#006600>bdf_prm</font> = {{30, 64, 64, 256, 5*HZ, 30*HZ,
60, 0, 0}};<br>
/* These are the min and max parameter values that we will allow to be
assigned */<br>
int <font color=#006600>bdflush_min</font>[N_PARAM] = { 0,
10, 5, 25, 0,
1*HZ, 0, 0, 0};<br>
int<font color=#006600> bdflush_max</font>[N_PARAM] = {100,50000, 20000,
20000,600*HZ, 6000*HZ, 100, 0, 0};<br>
<br>
作为buffer cache,必须有buffer_head, struct
page,和数据区(物理内存页面),缺一不可,并且要同时(几乎都是同时的呵呵)加入lru list 和hash表,这个我们在分析page
cache (filemap.c) 的时候就见过类似的概念了。<br>
另外文件数据只进入lru 队列,并不加入buffer cache,要时刻记住了.<br>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -