📄 xfs_buf.c
字号:
pagebuf_delwri_queue(pb, 1); return 0; } if (pb->pb_flags & PBF_WRITE) { _pagebuf_wait_unpin(pb); } pagebuf_hold(pb); /* Set the count to 1 initially, this will stop an I/O * completion callout which happens before we have started * all the I/O from calling pagebuf_iodone too early. */ atomic_set(&pb->pb_io_remaining, 1); _pagebuf_ioapply(pb); _pagebuf_iodone(pb, 0); pagebuf_rele(pb); return 0;}/* * pagebuf_iowait * * pagebuf_iowait waits for I/O to complete on the buffer supplied. * It returns immediately if no I/O is pending. In any case, it returns * the error code, if any, or 0 if there is no error. */intpagebuf_iowait( xfs_buf_t *pb){ PB_TRACE(pb, "iowait", 0); if (atomic_read(&pb->pb_io_remaining)) run_task_queue(&tq_disk); if ((pb->pb_flags & PBF_FS_DATAIOD)) pagebuf_runall_queues(pagebuf_dataiodone_tq); down(&pb->pb_iodonesema); PB_TRACE(pb, "iowaited", (long)pb->pb_error); return pb->pb_error;}caddr_tpagebuf_offset( xfs_buf_t *pb, size_t offset){ struct page *page; offset += pb->pb_offset; page = pb->pb_pages[offset >> PAGE_CACHE_SHIFT]; return (caddr_t) page_address(page) + (offset & (PAGE_CACHE_SIZE - 1));}/* * pagebuf_iomove * * Move data into or out of a buffer. */voidpagebuf_iomove( xfs_buf_t *pb, /* buffer to process */ size_t boff, /* starting buffer offset */ size_t bsize, /* length to copy */ caddr_t data, /* data address */ page_buf_rw_t mode) /* read/write flag */{ size_t bend, cpoff, csize; struct page *page; bend = boff + bsize; while (boff < bend) { page = pb->pb_pages[page_buf_btoct(boff + pb->pb_offset)]; cpoff = page_buf_poff(boff + pb->pb_offset); csize = min_t(size_t, PAGE_CACHE_SIZE-cpoff, pb->pb_count_desired-boff); ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); switch (mode) { case PBRW_ZERO: memset(page_address(page) + cpoff, 0, csize); break; case PBRW_READ: memcpy(data, page_address(page) + cpoff, csize); break; case PBRW_WRITE: memcpy(page_address(page) + cpoff, data, csize); } boff += csize; data += csize; }}/* * _pagebuf_ioapply * * Applies _pagebuf_page_apply to each page of the xfs_buf_t. */STATIC void_pagebuf_ioapply( /* apply function to pages */ xfs_buf_t *pb) /* buffer to examine */{ int index; loff_t buffer_offset = pb->pb_file_offset; size_t buffer_len = pb->pb_count_desired; size_t page_offset, len; size_t cur_offset, cur_len; cur_offset = pb->pb_offset; cur_len = buffer_len; if (!pb->pb_locked && !(pb->pb_flags & PBF_DIRECTIO) && (pb->pb_target->pbr_bsize < PAGE_CACHE_SIZE)) { for (index = 0; index < pb->pb_page_count; index++) lock_page(pb->pb_pages[index]); pb->pb_locked = 1; } for (index = 0; index < pb->pb_page_count; index++) { if (cur_len == 0) break; if (cur_offset >= PAGE_CACHE_SIZE) { cur_offset -= PAGE_CACHE_SIZE; continue; } page_offset = cur_offset; cur_offset = 0; len = PAGE_CACHE_SIZE - page_offset; if (len > cur_len) len = cur_len; cur_len -= len; _pagebuf_page_apply(pb, buffer_offset, pb->pb_pages[index], page_offset, len, index + 1 == pb->pb_page_count); buffer_offset += len; buffer_len -= len; } /* * Run the block device task queue here, while we have * a hold on the pagebuf (important to have that hold). */ if (pb->pb_flags & _PBF_RUN_QUEUES) { pb->pb_flags &= ~_PBF_RUN_QUEUES; if (atomic_read(&pb->pb_io_remaining) > 1) run_task_queue(&tq_disk); }}/* * Delayed write buffer list handling */STATIC LIST_HEAD(pbd_delwrite_queue);STATIC spinlock_t pbd_delwrite_lock = SPIN_LOCK_UNLOCKED;STATIC voidpagebuf_delwri_queue( xfs_buf_t *pb, int unlock){ PB_TRACE(pb, "delwri_q", (long)unlock); ASSERT(pb->pb_flags & PBF_DELWRI); spin_lock(&pbd_delwrite_lock); /* If already in the queue, dequeue and place at tail */ if (!list_empty(&pb->pb_list)) { if (unlock) atomic_dec(&pb->pb_hold); list_del(&pb->pb_list); } list_add_tail(&pb->pb_list, &pbd_delwrite_queue); pb->pb_queuetime = jiffies; spin_unlock(&pbd_delwrite_lock); if (unlock) pagebuf_unlock(pb);}voidpagebuf_delwri_dequeue( xfs_buf_t *pb){ int dequeued = 0; spin_lock(&pbd_delwrite_lock); if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) { list_del_init(&pb->pb_list); dequeued = 1; } pb->pb_flags &= ~PBF_DELWRI; spin_unlock(&pbd_delwrite_lock); if (dequeued) pagebuf_rele(pb); PB_TRACE(pb, "delwri_dq", (long)dequeued);}/* * The pagebuf iodone daemons */STATIC intpagebuf_iodone_daemon( void *__bind_cpu, const char *name, int pagebuf_daemons[], struct list_head pagebuf_iodone_tq[], wait_queue_head_t pagebuf_iodone_wait[]){ int bind_cpu, cpu; DECLARE_WAITQUEUE (wait, current); bind_cpu = (int) (long)__bind_cpu; cpu = CPU_TO_DAEMON(cpu_logical_map(bind_cpu)); /* Set up the thread */ daemonize(); /* Avoid signals */ sigmask_lock(); sigfillset(¤t->blocked); __recalc_sigpending(current); sigmask_unlock(); /* Migrate to the right CPU */ migrate_to_cpu(cpu);#ifdef __HAVE_NEW_SCHEDULER if (smp_processor_id() != cpu) BUG();#else while (smp_processor_id() != cpu) schedule();#endif sprintf(current->comm, "%s/%d", name, bind_cpu); INIT_LIST_HEAD(&pagebuf_iodone_tq[cpu]); init_waitqueue_head(&pagebuf_iodone_wait[cpu]); __set_current_state(TASK_INTERRUPTIBLE); mb(); pagebuf_daemons[cpu] = 1; for (;;) { add_wait_queue(&pagebuf_iodone_wait[cpu], &wait); if (TQ_ACTIVE(pagebuf_iodone_tq[cpu])) __set_task_state(current, TASK_RUNNING); schedule(); remove_wait_queue(&pagebuf_iodone_wait[cpu], &wait); run_task_queue(&pagebuf_iodone_tq[cpu]); if (pagebuf_daemons[cpu] == 0) break; __set_current_state(TASK_INTERRUPTIBLE); } pagebuf_daemons[cpu] = -1; wake_up_interruptible(&pagebuf_iodone_wait[cpu]); return 0;}STATIC voidpagebuf_runall_queues( struct list_head pagebuf_iodone_tq[]){ int pcpu, cpu; for (cpu = 0; cpu < min(smp_num_cpus, MAX_IO_DAEMONS); cpu++) { pcpu = CPU_TO_DAEMON(cpu_logical_map(cpu)); run_task_queue(&pagebuf_iodone_tq[pcpu]); }}STATIC intpagebuf_logiodone_daemon( void *__bind_cpu){ return pagebuf_iodone_daemon(__bind_cpu, "xfslogd", pb_logio_daemons, pagebuf_logiodone_tq, pagebuf_logiodone_wait);}STATIC intpagebuf_dataiodone_daemon( void *__bind_cpu){ return pagebuf_iodone_daemon(__bind_cpu, "xfsdatad", pb_dataio_daemons, pagebuf_dataiodone_tq, pagebuf_dataiodone_wait);}/* Defines for pagebuf daemon */STATIC DECLARE_COMPLETION(pagebuf_daemon_done);STATIC struct task_struct *pagebuf_daemon_task;STATIC int pagebuf_daemon_active;STATIC int force_flush;STATIC intpagebuf_daemon_wakeup( int priority, unsigned int mask){ force_flush = 1; barrier(); wake_up_process(pagebuf_daemon_task); return 0;}STATIC intpagebuf_daemon( void *data){ struct list_head tmp; unsigned long age; xfs_buf_t *pb, *n; int count; /* Set up the thread */ daemonize(); /* Mark it active */ pagebuf_daemon_task = current; pagebuf_daemon_active = 1; barrier(); /* Avoid signals */ sigmask_lock(); sigfillset(¤t->blocked); __recalc_sigpending(current); sigmask_unlock(); strcpy(current->comm, "xfsbufd"); current->flags |= PF_MEMALLOC; INIT_LIST_HEAD(&tmp); do { set_current_state(TASK_INTERRUPTIBLE); schedule_timeout((xfs_buf_timer_centisecs * HZ) / 100); count = 0; age = (xfs_buf_age_centisecs * HZ) / 100; spin_lock(&pbd_delwrite_lock); list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb)); ASSERT(pb->pb_flags & PBF_DELWRI); if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) { if (!force_flush && time_before(jiffies, pb->pb_queuetime + age)) { pagebuf_unlock(pb); break; } pb->pb_flags &= ~PBF_DELWRI; pb->pb_flags |= PBF_WRITE; list_move(&pb->pb_list, &tmp); count++; } } spin_unlock(&pbd_delwrite_lock); while (!list_empty(&tmp)) { pb = list_entry(tmp.next, xfs_buf_t, pb_list); list_del_init(&pb->pb_list); pagebuf_iostrategy(pb); } if (as_list_len > 0) purge_addresses(); if (count) run_task_queue(&tq_disk); force_flush = 0; } while (pagebuf_daemon_active); complete_and_exit(&pagebuf_daemon_done, 0);}/* * Go through all incore buffers, and release buffers if they belong to * the given device. This is used in filesystem error handling to * preserve the consistency of its metadata. */intxfs_flush_buftarg( xfs_buftarg_t *target, int wait){ struct list_head tmp; xfs_buf_t *pb, *n; int pincount = 0; int flush_cnt = 0; pagebuf_runall_queues(pagebuf_dataiodone_tq); pagebuf_runall_queues(pagebuf_logiodone_tq); INIT_LIST_HEAD(&tmp); spin_lock(&pbd_delwrite_lock); list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { if (pb->pb_target != target) continue; ASSERT(pb->pb_flags & PBF_DELWRI); PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb)); if (pagebuf_ispin(pb)) { pincount++; continue; } pb->pb_flags &= ~PBF_DELWRI; pb->pb_flags |= PBF_WRITE; list_move(&pb->pb_list, &tmp); } spin_unlock(&pbd_delwrite_lock); /* * Dropped the delayed write list lock, now walk the temporary list */ list_for_each_entry_safe(pb, n, &tmp, pb_list) { if (wait) pb->pb_flags &= ~PBF_ASYNC; else list_del_init(&pb->pb_list); pagebuf_lock(pb); pagebuf_iostrategy(pb); if (++flush_cnt > 32) { run_task_queue(&tq_disk); flush_cnt = 0; } } run_task_queue(&tq_disk); /* * Remaining list items must be flushed before returning */ while (!list_empty(&tmp)) { pb = list_entry(tmp.next, xfs_buf_t, pb_list); list_del_init(&pb->pb_list); xfs_iowait(pb); xfs_buf_relse(pb); } return pincount;}STATIC intpagebuf_daemon_start(void){ int cpu, pcpu; kernel_thread(pagebuf_daemon, NULL, CLONE_FS|CLONE_FILES|CLONE_VM); for (cpu = 0; cpu < min(smp_num_cpus, MAX_IO_DAEMONS); cpu++) { pcpu = CPU_TO_DAEMON(cpu_logical_map(cpu)); if (kernel_thread(pagebuf_logiodone_daemon, (void *)(long) cpu, CLONE_FS|CLONE_FILES|CLONE_VM) < 0) { printk("pagebuf_logiodone daemon failed to start\n"); } else { while (!pb_logio_daemons[pcpu]) yield(); } } for (cpu = 0; cpu < min(smp_num_cpus, MAX_IO_DAEMONS); cpu++) { pcpu = CPU_TO_DAEMON(cpu_logical_map(cpu)); if (kernel_thread(pagebuf_dataiodone_daemon, (void *)(long) cpu, CLONE_FS|CLONE_FILES|CLONE_VM) < 0) { printk("pagebuf_dataiodone daemon failed to start\n"); } else { while (!pb_dataio_daemons[pcpu]) yield(); } } return 0;}/* * pagebuf_daemon_stop * * Note: do not mark as __exit, it is called from pagebuf_terminate. */STATIC voidpagebuf_daemon_stop(void){ int cpu, pcpu; pagebuf_daemon_active = 0; barrier(); wait_for_completion(&pagebuf_daemon_done); for (pcpu = 0; pcpu < min(smp_num_cpus, MAX_IO_DAEMONS); pcpu++) { cpu = CPU_TO_DAEMON(cpu_logical_map(pcpu)); pb_logio_daemons[cpu] = 0; wake_up(&pagebuf_logiodone_wait[cpu]); wait_event_interruptible(pagebuf_logiodone_wait[cpu], pb_logio_daemons[cpu] == -1); pb_dataio_daemons[cpu] = 0; wake_up(&pagebuf_dataiodone_wait[cpu]); wait_event_interruptible(pagebuf_dataiodone_wait[cpu], pb_dataio_daemons[cpu] == -1); }}/* * Initialization and Termination */int __initpagebuf_init(void){ int i; pagebuf_cache = kmem_cache_create("xfs_buf_t", sizeof(xfs_buf_t), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (pagebuf_cache == NULL) { printk("XFS: couldn't init xfs_buf_t cache\n"); return -ENOMEM; } if (_pagebuf_prealloc_bh(NR_RESERVED_BH) < NR_RESERVED_BH) { printk("XFS: couldn't allocate %d reserved buffers\n", NR_RESERVED_BH); kmem_zone_destroy(pagebuf_cache); return -ENOMEM; } init_waitqueue_head(&pb_resv_bh_wait);#ifdef PAGEBUF_TRACE pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP);#endif pagebuf_daemon_start(); pagebuf_shake = kmem_shake_register(pagebuf_daemon_wakeup); if (pagebuf_shake == NULL) { pagebuf_terminate(); return -ENOMEM; } for (i = 0; i < NHASH; i++) { spin_lock_init(&pbhash[i].pb_hash_lock); INIT_LIST_HEAD(&pbhash[i].pb_hash); } return 0;}/* * pagebuf_terminate. * * Note: do not mark as __exit, this is also called from the __init code. */voidpagebuf_terminate(void){ pagebuf_daemon_stop();#ifdef PAGEBUF_TRACE ktrace_free(pagebuf_trace_buf);#endif kmem_zone_destroy(pagebuf_cache); kmem_shake_deregister(pagebuf_shake);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -