📄 035_fs_buffer_c.html

📁 重读linux 2.4.2o所写的笔记
💻 HTML
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
        <br>
        &nbsp;我们从buflash开始吧.<br>
        sys_bdflush: 配置,略.<br>
        从 __init bdflush_init(void) 知道有两个内核线程专注于回收buffers: <b>bdflush</b> 和<b>
        kupdate.</b><br>
        /*<br>
        &nbsp;* This is the actual bdflush daemon itself. It used to be started
        from<br>
        &nbsp;* the syscall above, but now we launch it ourselves internally
        with<br>
        &nbsp;* kernel_thread(...)&nbsp; directly after the first thread in
        init/main.c<br>
        &nbsp;*/<br>
        int <font color=#006600><b>bdflush</b></font>(void *sem)<br>
        {<br>
        &nbsp;&nbsp;&nbsp; struct task_struct *tsk = current;<br>
        &nbsp;&nbsp;&nbsp; int flushed;<br>
        &nbsp;&nbsp;&nbsp;&nbsp; ....// 初始化,略<br>
        &nbsp;&nbsp;&nbsp;&nbsp; ....//clear signal,略<br>
        &nbsp;&nbsp;&nbsp; for (;;) { //主要任务:<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; CHECK_EMERGENCY_SYNC&nbsp;
        //这玩意以后再说吧<br>
        <br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; flushed =
        flush_dirty_buffers(0);&nbsp;<b> //flush
        buffers:遍历所有lru,启动磁盘io操作,仅此而已.</b><br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; if (free_shortage()) //如果物理页面不够了<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; flushed +=
        page_launder(GFP_KERNEL, 0); <b>//试图回收一些页面,会有更多dirty page通过bh进入buffer
        lru</b><br>
        <br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; /*<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;* If there are still a lot
        of dirty buffers around,<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;* skip the sleep and flush
        some more. Otherwise, we<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;* go to sleep waiting a
        wakeup.<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;*/<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;
        set_current_state(TASK_INTERRUPTIBLE);<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; if (!flushed ||<b>
        balance_dirty_state</b>(NODEV) &lt; 0) {<b>//根据dirt buffer
        的数量,以及是否短缺free 页面决定是做同步flsuh,异步flush 还是不做.<br>
        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</b>&nbsp;&nbsp;
        <b>run_task_queue(&amp;tq_disk);
        //进到这里代表再用flash,那就让进入tq_disk的队列的bh开始进行真正的io吧()<br>
        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
        //见pre fs对这个tq_disk 的分析, bh有可能停留在这里(如果没有人调用这个函数进行驱动的话)<br>
        </b>&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; schedule();<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; }<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; /* Remember to mark us as running
        otherwise<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp; the next schedule
        will block. */<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; __set_current_state(TASK_RUNNING);<br>
        &nbsp;&nbsp;&nbsp; }<br>
        }<br>
        <br>
        /*<br>
        &nbsp;* This is the kernel update daemon. It was used to live in
        userspace<br>
        &nbsp;* but since it's need to run safely we want it unkillable by
        mistake.<br>
        &nbsp;* You don't need to change your userspace configuration since<br>
        &nbsp;* the userspace `update` will do_exit(0) at the first
        sys_bdflush().<br>
        &nbsp;*/<br>
        int <font color=#006600><b>kupdate</b></font>(void *sem)<br>
        {<br>
        &nbsp;&nbsp;&nbsp; ....// 初始化,略<br>
        &nbsp;&nbsp;&nbsp;&nbsp; ....//clear signal,略<br>
        <br>
        &nbsp;&nbsp;&nbsp; for (;;) {<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; /* update interval */<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; interval = bdf_prm.b_un.interval;<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; if (interval) {<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; tsk-&gt;state =
        TASK_INTERRUPTIBLE;<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;
        schedule_timeout(interval); <b>//以一定的间隔运行</b><br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; } else {<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; stop_kupdate:<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; tsk-&gt;state =
        TASK_STOPPED;<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; schedule(); /*
        wait for SIGCONT */<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; }<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; /* check for sigstop */<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; if (signal_pending(tsk)) {<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; int stopped =
        0;<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;
        spin_lock_irq(&amp;tsk-&gt;sigmask_lock);<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; if
        (sigismember(&amp;tsk-&gt;pending.signal, <b>SIGSTOP</b>))
        {//收到<b>SIGSTOP</b>就停止运行<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;
        &nbsp;&nbsp;&nbsp; sigdelset(&amp;tsk-&gt;pending.signal, SIGSTOP);<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;
        &nbsp;&nbsp;&nbsp; stopped = 1;<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; }<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;
        recalc_sigpending(tsk);<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;
        spin_unlock_irq(&amp;tsk-&gt;sigmask_lock);<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; if (stopped)<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;
        &nbsp;&nbsp;&nbsp; goto stop_kupdate;<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; }<br>
        #ifdef DEBUG<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; printk("kupdate()
        activated...\n");<br>
        #endif<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; sync_old_buffers();
        <b>//结果就是以一定的见个运行这个函数</b><br>
        &nbsp;&nbsp;&nbsp; }<br>
        }<br>
        /*<br>
        &nbsp;* Here we attempt to write back old buffers.&nbsp; We also try to
        flush inodes<br>
        &nbsp;* and supers as well, since this function is essentially "update",
        and<br>
        &nbsp;* otherwise there would be no way of ensuring that these
        quantities ever<br>
        &nbsp;* get written back.&nbsp; Ideally, we would have a timestamp on
        the inodes<br>
        &nbsp;* and superblocks so that we could write back only the old ones as
        well<br>
        &nbsp;*/<br>
        <br>
        static int <b>sync_old_buffers</b>(void)<br>
        {<br>
        &nbsp;&nbsp;&nbsp; lock_kernel();<br>
        &nbsp;&nbsp;&nbsp; sync_supers(0);&nbsp; //回写super<br>
        &nbsp;&nbsp;&nbsp; sync_inodes(0);&nbsp; //回写inode本身和 filemap的那些页面<br>
        &nbsp;&nbsp;&nbsp; unlock_kernel();<br>
        &nbsp;&nbsp;&nbsp; //回写完了就有更多的bh在lru队列了!!<br>
        <br>
        &nbsp;&nbsp;&nbsp; <b>flush_dirty_buffers(1)</b>;
        <b>//检查时戳,老到一定程度再flush,和bdflush的工作一样:启动磁盘io</b><br>
        &nbsp;&nbsp;&nbsp; /* must really sync all the active I/O request to
        disk here */<br>
        &nbsp;&nbsp;&nbsp; run_task_queue(&amp;tq_disk);<b>//不要让bh
        在磁盘调度队列中永远沉睡下去(没有timer驱动的,只有byhand调用了)</b><br>
        &nbsp;&nbsp;&nbsp; return 0;<br>
        }<br>
        顺便去看看tq_disk: 这是一个task queue, 但是不是所有的task queue 都会得到自动执行的.
        其实本系列所覆盖的代码(kernel fs(only ext2/proc/devfs and common fs surport ) mm
        driver/(ide pci )) 只有extern task_queue tq_timer, tq_immediate, tq_disk;<br>
        这三个task queue, 而其中tq_disk没有像另外两个一样挂接到bottom half的处理中去.<br>
        <div id=iwsd style="PADDING:1em 0pt; TEXT-ALIGN:left">
          <div id=bmom style="PADDING:1em 0pt; TEXT-ALIGN:left">
            <img src=035_fs_buffer_c_images/dcbsxfpf_17czwbbgd4.gif style="WIDTH:674px; HEIGHT:344px">
          </div>
        </div>
        其他接口函数:<br>
        int block_sync_page(struct page *page)<br>
        void wakeup_bdflush(int block)<br>
        <br>
        <br>
        <div style=TEXT-ALIGN:center>
          <b>再说一下buffer head 的回收</b><br>
        </div>
        <br>
        &nbsp; try_to_free_buffers 是buffer 回收和buffer head 回收的主要入口. 不论是buffer
        cache 中的buffer 以及bh还是作为io entry的buffer 以及bh, 绝大多数都是通过page
        cache的lru队列进行回收的. 我们看到buffer cache 中的page 页面也加入了page
        cache的lru队列(不过仅仅是加入lru队列而已,不会在page cache 的hash队列中看到的). 另外在flash 一个page
        的时候也会试图释放buffer head<br>
        见block_flushpage(用于文件的truncate).<br>
        <br>
        <div style=TEXT-ALIGN:center>
          <b>剩余部分:sync&nbsp; invalidate truncate</b><br>
        </div>
        <br>
        <font size=5><b>Sync:</b></font>
        文件系统的dirty数据是以一定的策略,定时回写的,有时需要马上把dirty数据回写到硬盘上,这就需要sync的支持了.<br>
        &nbsp;&nbsp; 这里边,sync_page_buffers(struct buffer_head *bh, int
        wait)就是为了try_to_free_buffers用用.不太关乎这里的文件sync操作.<br>
        不妨看看sync操作的几种情形:<br>
        1) fync 和 fdatasync(int fd):希望下面的man出来的信息已经足够理解这两个操作了<br>
        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; fdatasync() flushes all data
        buffers of a file to disk (before the sys-tem call returns).&nbsp; It
        resembles fsync() but is not required to&nbsp; update the metadata such
        as access time.<br>
        asmlinkage long <b>sys_fsync</b>(unsigned int fd)<br>
        {<br>
        &nbsp;&nbsp;&nbsp; struct file * file;<br>
        &nbsp;&nbsp;&nbsp; struct dentry * dentry;<br>
        &nbsp;&nbsp;&nbsp; struct inode * inode;<br>
        &nbsp;&nbsp;&nbsp; int err;<br>
        <br>
        &nbsp;&nbsp;&nbsp; err = -EBADF;<br>
        &nbsp;&nbsp;&nbsp; file = fget(fd);<br>
        &nbsp;&nbsp;&nbsp; if (!file)<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; goto out;<br>
        <br>
        &nbsp;&nbsp;&nbsp; dentry = file-&gt;f_dentry;<br>
        &nbsp;&nbsp;&nbsp; inode = dentry-&gt;d_inode;<br>
        <br>
        &nbsp;&nbsp;&nbsp; err = -EINVAL;<br>
        &nbsp;&nbsp;&nbsp; if (!file-&gt;f_op || !file-&gt;f_op-&gt;fsync)<br>
        &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; goto out_putf;<br>
        <br>
        &nbsp;&nbsp;&nbsp; /* We need to protect against concurrent writers.. */<br>
        &nbsp;&nbsp;&nbsp; down(&amp;inode-&gt;i_sem);<br>
        &nbsp;&nbsp;&nbsp; filemap_fdatasync(inode-&gt;i_mapping); /*<br>
        &nbsp;&nbsp;&nbsp; &nbsp;<font color=#3333ff>&nbsp;&nbsp;&nbsp;<b> int
        (*writepage)(struct page *) =
        mapping-&gt;a_ops-&gt;writepage;</b><br style=FONT-WEIGHT:bold>
        <b>&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
        ie,ext2_writepage-&gt;block_write_full_page-&gt;sumit all bh to
        driver</b></font><br>
        &nbsp;&nbsp;&nbsp; */<br>
        &nbsp;&nbsp;&nbsp; err = file-&gt;f_op-&gt;fsync(file, dentry, 0);
        <font color=#3333ff>/*
        基本就是调用file_fsync,ext2是fsync_inode_buffers*/</font><br>
        &nbsp;&nbsp;
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -