📄 buffer.c
字号:
/* * linux/fs/buffer.c * * Copyright (C) 1991, 1992 Linus Torvalds *//* * 'buffer.c' implements the buffer-cache functions. Race-conditions have * been avoided by NEVER letting an interrupt change a buffer (except for the * data, of course), but instead letting the caller do it. *//* Start bdflush() with kernel_thread not syscall - Paul Gortmaker, 12/95 *//* Removed a lot of unnecessary code and simplified things now that * the buffer cache isn't our primary cache - Andrew Tridgell 12/96 *//* Speed up hash, lru, and free list operations. Use gfp() for allocating * hash table, use SLAB cache for buffer heads. -DaveM *//* Added 32k buffer block sizes - these are required older ARM systems. * - RMK *//* Thread it... -DaveM *//* async buffer flushing, 1999 Andrea Arcangeli <andrea@suse.de> */#include <linux/config.h>#include <linux/sched.h>#include <linux/fs.h>#include <linux/slab.h>#include <linux/locks.h>#include <linux/errno.h>#include <linux/swap.h>#include <linux/swapctl.h>#include <linux/smp_lock.h>#include <linux/vmalloc.h>#include <linux/blkdev.h>#include <linux/sysrq.h>#include <linux/file.h>#include <linux/init.h>#include <linux/quotaops.h>#include <linux/iobuf.h>#include <linux/highmem.h>#include <linux/module.h>#include <linux/completion.h>#include <asm/uaccess.h>#include <asm/io.h>#include <asm/bitops.h>#include <asm/mmu_context.h>#define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512)#define NR_RESERVED (10*MAX_BUF_PER_PAGE)#define MAX_UNUSED_BUFFERS NR_RESERVED+20 /* don't ever have more than this number of unused buffer heads *//* Anti-deadlock ordering: * lru_list_lock > hash_table_lock > unused_list_lock */#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_inode_buffers)/* * Hash table gook.. */static unsigned int bh_hash_mask;static unsigned int bh_hash_shift;static struct buffer_head **hash_table;static rwlock_t hash_table_lock = RW_LOCK_UNLOCKED;static struct buffer_head *lru_list[NR_LIST];static spinlock_t lru_list_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;static int nr_buffers_type[NR_LIST];static unsigned long size_buffers_type[NR_LIST];static struct buffer_head * unused_list;static int nr_unused_buffer_heads;static spinlock_t unused_list_lock = SPIN_LOCK_UNLOCKED;static DECLARE_WAIT_QUEUE_HEAD(buffer_wait);static int grow_buffers(kdev_t dev, unsigned long block, int size);static void __refile_buffer(struct buffer_head *);/* This is used by some architectures to estimate available memory. */atomic_t buffermem_pages = ATOMIC_INIT(0);/* Here is the parameter block for the bdflush process. If you add or * remove any of the parameters, make sure to update kernel/sysctl.c * and the documentation at linux/Documentation/sysctl/vm.txt. */#define N_PARAM 9/* The dummy values in this structure are left in there for compatibility * with old programs that play with the /proc entries. */union bdflush_param { struct { int nfract; /* Percentage of buffer cache dirty to activate bdflush */ int dummy1; /* old "ndirty" */ int dummy2; /* old "nrefill" */ int dummy3; /* unused */ int interval; /* jiffies delay between kupdate flushes */ int age_buffer; /* Time for normal buffer to age before we flush it */ int nfract_sync;/* Percentage of buffer cache dirty to activate bdflush synchronously */ int dummy4; /* unused */ int dummy5; /* unused */ } b_un; unsigned int data[N_PARAM];} bdf_prm = {{40, 0, 0, 0, 5*HZ, 30*HZ, 60, 0, 0}};/* These are the min and max parameter values that we will allow to be assigned */int bdflush_min[N_PARAM] = { 0, 10, 5, 25, 0, 1*HZ, 0, 0, 0};int bdflush_max[N_PARAM] = {100,50000, 20000, 20000,10000*HZ, 6000*HZ, 100, 0, 0};void unlock_buffer(struct buffer_head *bh){ clear_bit(BH_Wait_IO, &bh->b_state); clear_bit(BH_launder, &bh->b_state); clear_bit(BH_Lock, &bh->b_state); smp_mb__after_clear_bit(); if (waitqueue_active(&bh->b_wait)) wake_up(&bh->b_wait);}/* * Rewrote the wait-routines to use the "new" wait-queue functionality, * and getting rid of the cli-sti pairs. The wait-queue routines still * need cli-sti, but now it's just a couple of 386 instructions or so. * * Note that the real wait_on_buffer() is an inline function that checks * if 'b_wait' is set before calling this, so that the queues aren't set * up unnecessarily. */void __wait_on_buffer(struct buffer_head * bh){ struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); get_bh(bh); add_wait_queue(&bh->b_wait, &wait); do { run_task_queue(&tq_disk); set_task_state(tsk, TASK_UNINTERRUPTIBLE); if (!buffer_locked(bh)) break; schedule(); } while (buffer_locked(bh)); tsk->state = TASK_RUNNING; remove_wait_queue(&bh->b_wait, &wait); put_bh(bh);}/* * Default synchronous end-of-IO handler.. Just mark it up-to-date and * unlock the buffer. This is what ll_rw_block uses too. */void end_buffer_io_sync(struct buffer_head *bh, int uptodate){ mark_buffer_uptodate(bh, uptodate); unlock_buffer(bh); put_bh(bh);}/* * The buffers have been marked clean and locked. Just submit the dang * things.. */static void write_locked_buffers(struct buffer_head **array, unsigned int count){ do { struct buffer_head * bh = *array++; bh->b_end_io = end_buffer_io_sync; submit_bh(WRITE, bh); } while (--count);}/* * Write some buffers from the head of the dirty queue. * * This must be called with the LRU lock held, and will * return without it! */#define NRSYNC (32)static int write_some_buffers(kdev_t dev){ struct buffer_head *next; struct buffer_head *array[NRSYNC]; unsigned int count; int nr; next = lru_list[BUF_DIRTY]; nr = nr_buffers_type[BUF_DIRTY]; count = 0; while (next && --nr >= 0) { struct buffer_head * bh = next; next = bh->b_next_free; if (dev && bh->b_dev != dev) continue; if (test_and_set_bit(BH_Lock, &bh->b_state)) continue; if (atomic_set_buffer_clean(bh)) { __refile_buffer(bh); get_bh(bh); array[count++] = bh; if (count < NRSYNC) continue; spin_unlock(&lru_list_lock); write_locked_buffers(array, count); return -EAGAIN; } unlock_buffer(bh); __refile_buffer(bh); } spin_unlock(&lru_list_lock); if (count) write_locked_buffers(array, count); return 0;}/* * Write out all buffers on the dirty list. */static void write_unlocked_buffers(kdev_t dev){ do { spin_lock(&lru_list_lock); } while (write_some_buffers(dev)); run_task_queue(&tq_disk);}/* * Wait for a buffer on the proper list. * * This must be called with the LRU lock held, and * will return with it released. */static int wait_for_buffers(kdev_t dev, int index, int refile){ struct buffer_head * next; int nr; next = lru_list[index]; nr = nr_buffers_type[index]; while (next && --nr >= 0) { struct buffer_head *bh = next; next = bh->b_next_free; if (!buffer_locked(bh)) { if (refile) __refile_buffer(bh); continue; } if (dev && bh->b_dev != dev) continue; get_bh(bh); spin_unlock(&lru_list_lock); wait_on_buffer (bh); put_bh(bh); return -EAGAIN; } spin_unlock(&lru_list_lock); return 0;}static inline void wait_for_some_buffers(kdev_t dev){ spin_lock(&lru_list_lock); wait_for_buffers(dev, BUF_LOCKED, 1);}static int wait_for_locked_buffers(kdev_t dev, int index, int refile){ do { spin_lock(&lru_list_lock); } while (wait_for_buffers(dev, index, refile)); return 0;}/* Call sync_buffers with wait!=0 to ensure that the call does not * return until all buffer writes have completed. Sync() may return * before the writes have finished; fsync() may not. *//* Godamity-damn. Some buffers (bitmaps for filesystems) * spontaneously dirty themselves without ever brelse being called. * We will ultimately want to put these in a separate list, but for * now we search all of the lists for dirty buffers. */int sync_buffers(kdev_t dev, int wait){ int err = 0; /* One pass for no-wait, three for wait: * 0) write out all dirty, unlocked buffers; * 1) wait for all dirty locked buffers; * 2) write out all dirty, unlocked buffers; * 2) wait for completion by waiting for all buffers to unlock. */ write_unlocked_buffers(dev); if (wait) { err = wait_for_locked_buffers(dev, BUF_DIRTY, 0); write_unlocked_buffers(dev); err |= wait_for_locked_buffers(dev, BUF_LOCKED, 1); } return err;}int fsync_super(struct super_block *sb){ kdev_t dev = sb->s_dev; sync_buffers(dev, 0); lock_kernel(); sync_inodes_sb(sb); DQUOT_SYNC(dev); lock_super(sb); if (sb->s_dirt && sb->s_op && sb->s_op->write_super) sb->s_op->write_super(sb); unlock_super(sb); unlock_kernel(); return sync_buffers(dev, 1);}int fsync_no_super(kdev_t dev){ sync_buffers(dev, 0); return sync_buffers(dev, 1);}int fsync_dev(kdev_t dev){ sync_buffers(dev, 0); lock_kernel(); sync_inodes(dev); DQUOT_SYNC(dev); sync_supers(dev); unlock_kernel(); return sync_buffers(dev, 1);}/* * There's no real reason to pretend we should * ever do anything differently */void sync_dev(kdev_t dev){ fsync_dev(dev);}asmlinkage long sys_sync(void){ fsync_dev(0); return 0;}/* * filp may be NULL if called via the msync of a vma. */ int file_fsync(struct file *filp, struct dentry *dentry, int datasync){ struct inode * inode = dentry->d_inode; struct super_block * sb; kdev_t dev; int ret; lock_kernel(); /* sync the inode to buffers */ write_inode_now(inode, 0); /* sync the superblock to buffers */ sb = inode->i_sb; lock_super(sb); if (sb->s_op && sb->s_op->write_super) sb->s_op->write_super(sb); unlock_super(sb); /* .. finally sync the buffers to disk */ dev = inode->i_dev; ret = sync_buffers(dev, 1); unlock_kernel(); return ret;}asmlinkage long sys_fsync(unsigned int fd){ struct file * file; struct dentry * dentry; struct inode * inode; int ret, err; ret = -EBADF; file = fget(fd); if (!file) goto out; dentry = file->f_dentry; inode = dentry->d_inode; ret = -EINVAL; if (!file->f_op || !file->f_op->fsync) { /* Why? We can still call filemap_fdatasync */ goto out_putf; } /* We need to protect against concurrent writers.. */ down(&inode->i_sem); ret = filemap_fdatasync(inode->i_mapping); err = file->f_op->fsync(file, dentry, 0); if (err && !ret) ret = err; err = filemap_fdatawait(inode->i_mapping); if (err && !ret) ret = err; up(&inode->i_sem);out_putf: fput(file);out: return ret;}asmlinkage long sys_fdatasync(unsigned int fd){ struct file * file; struct dentry * dentry; struct inode * inode; int ret, err; ret = -EBADF; file = fget(fd); if (!file) goto out; dentry = file->f_dentry; inode = dentry->d_inode; ret = -EINVAL; if (!file->f_op || !file->f_op->fsync) goto out_putf; down(&inode->i_sem); ret = filemap_fdatasync(inode->i_mapping); err = file->f_op->fsync(file, dentry, 1); if (err && !ret) ret = err; err = filemap_fdatawait(inode->i_mapping); if (err && !ret) ret = err; up(&inode->i_sem);out_putf: fput(file);out: return ret;}/* After several hours of tedious analysis, the following hash * function won. Do not mess with it... -DaveM */#define _hashfn(dev,block) \ ((((dev)<<(bh_hash_shift - 6)) ^ ((dev)<<(bh_hash_shift - 9))) ^ \ (((block)<<(bh_hash_shift - 6)) ^ ((block) >> 13) ^ \ ((block) << (bh_hash_shift - 12))))#define hash(dev,block) hash_table[(_hashfn(HASHDEV(dev),block) & bh_hash_mask)]static inline void __insert_into_hash_list(struct buffer_head *bh){ struct buffer_head **head = &hash(bh->b_dev, bh->b_blocknr); struct buffer_head *next = *head; *head = bh; bh->b_pprev = head; bh->b_next = next; if (next != NULL) next->b_pprev = &bh->b_next;}static __inline__ void __hash_unlink(struct buffer_head *bh){ struct buffer_head **pprev = bh->b_pprev; if (pprev) { struct buffer_head *next = bh->b_next; if (next) next->b_pprev = pprev; *pprev = next; bh->b_pprev = NULL; }}static void __insert_into_lru_list(struct buffer_head * bh, int blist){ struct buffer_head **bhp = &lru_list[blist]; if (bh->b_prev_free || bh->b_next_free) BUG(); if(!*bhp) { *bhp = bh; bh->b_prev_free = bh; } bh->b_next_free = *bhp; bh->b_prev_free = (*bhp)->b_prev_free; (*bhp)->b_prev_free->b_next_free = bh; (*bhp)->b_prev_free = bh; nr_buffers_type[blist]++; size_buffers_type[blist] += bh->b_size;}static void __remove_from_lru_list(struct buffer_head * bh){ struct buffer_head *next = bh->b_next_free; if (next) { struct buffer_head *prev = bh->b_prev_free; int blist = bh->b_list; prev->b_next_free = next; next->b_prev_free = prev; if (lru_list[blist] == bh) { if (next == bh) next = NULL; lru_list[blist] = next; } bh->b_next_free = NULL; bh->b_prev_free = NULL; nr_buffers_type[blist]--;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -