📄 buffer.c
字号:
#define buffer_busy(bh) ((bh)->b_count || ((bh)->b_state & BUFFER_BUSY_BITS))#ifndef OSKIT/* * try_to_free_buffers() checks if all the buffers on this particular page * are unused, and free's the page if so. * * Wake up bdflush() if this fails - if we're running low on memory due * to dirty buffers, we need to flush them out as quickly as possible. */int try_to_free_buffers(struct page * page_map){ struct buffer_head * tmp, * bh = page_map->buffers; tmp = bh; do { struct buffer_head * p = tmp; tmp = tmp->b_this_page; if (!buffer_busy(p)) continue; wakeup_bdflush(0); return 0; } while (tmp != bh); tmp = bh; do { struct buffer_head * p = tmp; tmp = tmp->b_this_page; nr_buffers--; remove_from_queues(p); put_unused_buffer_head(p); } while (tmp != bh); /* Wake up anyone waiting for buffer heads */ wake_up(&buffer_wait); /* And free the page */ buffermem -= PAGE_SIZE; page_map->buffers = NULL; __free_page(page_map); return 1;}#endif/* ================== Debugging =================== */void show_buffers(void){ struct buffer_head * bh; int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0; int protected = 0; int nlist; static char *buf_types[NR_LIST] = {"CLEAN","LOCKED","DIRTY"}; printk("Buffer memory: %6dkB\n",buffermem>>10); printk("Buffer heads: %6d\n",nr_buffer_heads); printk("Buffer blocks: %6d\n",nr_buffers); printk("Buffer hashed: %6d\n",nr_hashed_buffers); for(nlist = 0; nlist < NR_LIST; nlist++) { found = locked = dirty = used = lastused = protected = 0; bh = lru_list[nlist]; if(!bh) continue; do { found++; if (buffer_locked(bh)) locked++; if (buffer_protected(bh)) protected++; if (buffer_dirty(bh)) dirty++; if (bh->b_count) used++, lastused = found; bh = bh->b_next_free; } while (bh != lru_list[nlist]); printk("%8s: %d buffers, %d used (last=%d), " "%d locked, %d protected, %d dirty\n", buf_types[nlist], found, used, lastused, locked, protected, dirty); };}/* ===================== Init ======================= *//* * allocate the hash table and init the free list * Use gfp() for the hash table to decrease TLB misses, use * SLAB cache for buffer heads. */void __init buffer_init(unsigned long memory_size){ int order; unsigned int nr_hash; /* we need to guess at the right sort of size for a buffer cache. the heuristic from working with large databases and getting fsync times (ext2) manageable, is the following */ memory_size >>= 20; for (order = 5; (1UL << order) < memory_size; order++); /* try to allocate something until we get it or we're asking for something that is really too small */ do { nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct buffer_head *); hash_table = (struct buffer_head **) __get_free_pages(GFP_ATOMIC, order); } while (hash_table == NULL && --order > 4); if (!hash_table) panic("Failed to allocate buffer hash table\n"); memset(hash_table, 0, nr_hash * sizeof(struct buffer_head *)); bh_hash_mask = nr_hash-1; bh_cachep = kmem_cache_create("buffer_head", sizeof(struct buffer_head), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if(!bh_cachep) panic("Cannot create buffer head SLAB cache\n"); /* * Allocate the reserved buffer heads. */ while (nr_buffer_heads < NR_RESERVED) { struct buffer_head * bh; bh = kmem_cache_alloc(bh_cachep, SLAB_ATOMIC); if (!bh) break; put_unused_buffer_head(bh); nr_buffer_heads++; } lru_list[BUF_CLEAN] = 0; grow_buffers(BLOCK_SIZE);}#ifndef OSKIT/* ====================== bdflush support =================== *//* This is a simple kernel daemon, whose job it is to provide a dynamic * response to dirty buffers. Once this process is activated, we write back * a limited number of buffers to the disks and then go back to sleep again. */static struct wait_queue * bdflush_wait = NULL;static struct wait_queue * bdflush_done = NULL;struct task_struct *bdflush_tsk = 0;#endif /* OSKIT */void wakeup_bdflush(int wait){#ifdef OSKIT printk("WARNING: wakeup_bdflush\n");#else if (current == bdflush_tsk) return; wake_up(&bdflush_wait); if (wait) { run_task_queue(&tq_disk); sleep_on(&bdflush_done); }#endif /* OSKIT */}#ifndef OSKIT/* * Here we attempt to write back old buffers. We also try to flush inodes * and supers as well, since this function is essentially "update", and * otherwise there would be no way of ensuring that these quantities ever * get written back. Ideally, we would have a timestamp on the inodes * and superblocks so that we could write back only the old ones as well */static int sync_old_buffers(void){ int i; int ndirty, nwritten; int nlist; int ncount; struct buffer_head * bh, *next; sync_supers(0); sync_inodes(0); ncount = 0;#ifdef DEBUG for(nlist = 0; nlist < NR_LIST; nlist++)#else for(nlist = BUF_LOCKED; nlist <= BUF_DIRTY; nlist++)#endif { ndirty = 0; nwritten = 0; repeat: bh = lru_list[nlist]; if(bh) for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) { /* We may have stalled while waiting for I/O to complete. */ if(bh->b_list != nlist) goto repeat; next = bh->b_next_free; if(!lru_list[nlist]) { printk("Dirty list empty %d\n", i); break; } /* Clean buffer on dirty list? Refile it */ if (nlist == BUF_DIRTY && !buffer_dirty(bh) && !buffer_locked(bh)) { refile_buffer(bh); continue; } /* Unlocked buffer on locked list? Refile it */ if (nlist == BUF_LOCKED && !buffer_locked(bh)) { refile_buffer(bh); continue; } if (buffer_locked(bh) || !buffer_dirty(bh)) continue; ndirty++; if(time_before(jiffies, bh->b_flushtime)) continue; nwritten++; next->b_count++; bh->b_count++; bh->b_flushtime = 0;#ifdef DEBUG if(nlist != BUF_DIRTY) ncount++;#endif ll_rw_block(WRITE, 1, &bh); bh->b_count--; next->b_count--; } } run_task_queue(&tq_disk);#ifdef DEBUG if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount); printk("Wrote %d/%d buffers\n", nwritten, ndirty);#endif run_task_queue(&tq_disk); return 0;}/* This is the interface to bdflush. As we get more sophisticated, we can * pass tuning parameters to this "process", to adjust how it behaves. * We would want to verify each parameter, however, to make sure that it * is reasonable. */asmlinkage int sys_bdflush(int func, long data){ int i, error = -EPERM; lock_kernel(); if (!capable(CAP_SYS_ADMIN)) goto out; if (func == 1) { unlock_kernel(); /* do_exit directly and let kupdate to do its work alone. */ do_exit(0); } /* Basically func 1 means read param 1, 2 means write param 1, etc */ if (func >= 2) { i = (func-2) >> 1; error = -EINVAL; if (i < 0 || i >= N_PARAM) goto out; if((func & 1) == 0) { error = put_user(bdf_prm.data[i], (int*)data); goto out; } if (data < bdflush_min[i] || data > bdflush_max[i]) goto out; bdf_prm.data[i] = data; error = 0; goto out; }; /* Having func 0 used to launch the actual bdflush and then never * return (unless explicitly killed). We return zero here to * remain semi-compatible with present update(8) programs. */ error = 0;out: unlock_kernel(); return error;}/* This is the actual bdflush daemon itself. It used to be started from * the syscall above, but now we launch it ourselves internally with * kernel_thread(...) directly after the first thread in init/main.c *//* To prevent deadlocks for a loop device: * 1) Do non-blocking writes to loop (avoids deadlock with running * out of request blocks). * 2) But do a blocking write if the only dirty buffers are loop buffers * (otherwise we go into an infinite busy-loop). * 3) Quit writing loop blocks if a freelist went low (avoids deadlock * with running out of free buffers for loop's "real" device).*/int bdflush(void * unused) { int i; int ndirty; int nlist; int ncount; struct buffer_head * bh, *next; int major; int wrta_cmd = WRITEA; /* non-blocking write for LOOP */ /* * We have a bare-bones task_struct, and really should fill * in a few more things so "top" and /proc/2/{exe,root,cwd} * display semi-sane things. Not real crucial though... */ current->session = 1; current->pgrp = 1; sprintf(current->comm, "kflushd"); bdflush_tsk = current; /* * As a kernel thread we want to tamper with system buffers * and other internals and thus be subject to the SMP locking * rules. (On a uniprocessor box this does nothing). */ lock_kernel(); for (;;) {#ifdef DEBUG printk("bdflush() activated...");#endif CHECK_EMERGENCY_SYNC ncount = 0;#ifdef DEBUG for(nlist = 0; nlist < NR_LIST; nlist++)#else for(nlist = BUF_LOCKED; nlist <= BUF_DIRTY; nlist++)#endif { ndirty = 0; repeat: bh = lru_list[nlist]; if(bh) for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty; bh = next) { /* We may have stalled while waiting for I/O to complete. */ if(bh->b_list != nlist) goto repeat; next = bh->b_next_free; if(!lru_list[nlist]) { printk("Dirty list empty %d\n", i); break; } /* Clean buffer on dirty list? Refile it */ if (nlist == BUF_DIRTY && !buffer_dirty(bh)) { refile_buffer(bh); continue; } /* Unlocked buffer on locked list? Refile it */ if (nlist == BUF_LOCKED && !buffer_locked(bh)) { refile_buffer(bh); continue; } if (buffer_locked(bh) || !buffer_dirty(bh)) continue; major = MAJOR(bh->b_dev); /* Should we write back buffers that are shared or not?? currently dirty buffers are not shared, so it does not matter */ next->b_count++; bh->b_count++; ndirty++; bh->b_flushtime = 0; if (major == LOOP_MAJOR) { ll_rw_block(wrta_cmd,1, &bh); wrta_cmd = WRITEA; if (buffer_dirty(bh)) --ndirty; } else ll_rw_block(WRITE, 1, &bh);#ifdef DEBUG if(nlist != BUF_DIRTY) ncount++;#endif bh->b_count--; next->b_count--; } }#ifdef DEBUG if (ncount) printk("sys_bdflush: %d dirty buffers not on dirty list\n", ncount); printk("sleeping again.\n");#endif /* If we didn't write anything, but there are still * dirty buffers, then make the next write to a * loop device to be a blocking write. * This lets us block--which we _must_ do! */ if (ndirty == 0 && nr_buffers_type[BUF_DIRTY] > 0 && wrta_cmd != WRITE) { wrta_cmd = WRITE; continue; } run_task_queue(&tq_disk); wake_up(&bdflush_done); /* If there are still a lot of dirty buffers around, skip the sleep and flush some more */ if(ndirty == 0 || nr_buffers_type[BUF_DIRTY] <= nr_buffers * bdf_prm.b_un.nfract/100) { spin_lock_irq(¤t->sigmask_lock); flush_signals(current); spin_unlock_irq(¤t->sigmask_lock); interruptible_sleep_on(&bdflush_wait); } }}/* * This is the kernel update daemon. It was used to live in userspace * but since it's need to run safely we want it unkillable by mistake. * You don't need to change your userspace configuration since * the userspace `update` will do_exit(0) at the first sys_bdflush(). */int kupdate(void * unused) { struct task_struct * tsk = current; int interval; tsk->session = 1; tsk->pgrp = 1; strcpy(tsk->comm, "kupdate"); sigfillset(&tsk->blocked); /* sigcont will wakeup kupdate after setting interval to 0 */ sigdelset(&tsk->blocked, SIGCONT); lock_kernel(); for (;;) { interval = bdf_prm.b_un.interval; if (interval) { tsk->state = TASK_INTERRUPTIBLE; schedule_timeout(interval); } else { tsk->state = TASK_STOPPED; schedule(); /* wait for SIGCONT */ }#ifdef DEBUG printk("kupdate() activated...\n");#endif sync_old_buffers(); }}#endif /* OSKIT */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -