📄 rcupreempt.c
字号:
/* * Read-Copy Update mechanism for mutual exclusion, realtime implementation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * Copyright IBM Corporation, 2006 * * Authors: Paul E. McKenney <paulmck@us.ibm.com> * With thanks to Esben Nielsen, Bill Huey, and Ingo Molnar * for pushing me away from locks and towards counters, and * to Suparna Bhattacharya for pushing me completely away * from atomic instructions on the read side. * * - Added handling of Dynamic Ticks * Copyright 2007 - Paul E. Mckenney <paulmck@us.ibm.com> * - Steven Rostedt <srostedt@redhat.com> * * Papers: http://www.rdrop.com/users/paulmck/RCU * * Design Document: http://lwn.net/Articles/253651/ * * For detailed explanation of Read-Copy Update mechanism see - * Documentation/RCU/ *.txt * */#include <linux/types.h>#include <linux/kernel.h>#include <linux/init.h>#include <linux/spinlock.h>#include <linux/smp.h>#include <linux/rcupdate.h>#include <linux/interrupt.h>#include <linux/sched.h>#include <asm/atomic.h>#include <linux/bitops.h>#include <linux/module.h>#include <linux/kthread.h>#include <linux/completion.h>#include <linux/moduleparam.h>#include <linux/percpu.h>#include <linux/notifier.h>#include <linux/cpu.h>#include <linux/random.h>#include <linux/delay.h>#include <linux/byteorder/swabb.h>#include <linux/cpumask.h>#include <linux/rcupreempt_trace.h>/* * Macro that prevents the compiler from reordering accesses, but does * absolutely -nothing- to prevent CPUs from reordering. This is used * only to mediate communication between mainline code and hardware * interrupt and NMI handlers. */#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))/* * PREEMPT_RCU data structures. *//* * GP_STAGES specifies the number of times the state machine has * to go through the all the rcu_try_flip_states (see below) * in a single Grace Period. * * GP in GP_STAGES stands for Grace Period ;) */#define GP_STAGES 2struct rcu_data { spinlock_t lock; /* Protect rcu_data fields. */ long completed; /* Number of last completed batch. */ int waitlistcount; struct rcu_head *nextlist; struct rcu_head **nexttail; struct rcu_head *waitlist[GP_STAGES]; struct rcu_head **waittail[GP_STAGES]; struct rcu_head *donelist; /* from waitlist & waitschedlist */ struct rcu_head **donetail; long rcu_flipctr[2]; struct rcu_head *nextschedlist; struct rcu_head **nextschedtail; struct rcu_head *waitschedlist; struct rcu_head **waitschedtail; int rcu_sched_sleeping;#ifdef CONFIG_RCU_TRACE struct rcupreempt_trace trace;#endif /* #ifdef CONFIG_RCU_TRACE */};/* * States for rcu_try_flip() and friends. */enum rcu_try_flip_states { /* * Stay here if nothing is happening. Flip the counter if somthing * starts happening. Denoted by "I" */ rcu_try_flip_idle_state, /* * Wait here for all CPUs to notice that the counter has flipped. This * prevents the old set of counters from ever being incremented once * we leave this state, which in turn is necessary because we cannot * test any individual counter for zero -- we can only check the sum. * Denoted by "A". */ rcu_try_flip_waitack_state, /* * Wait here for the sum of the old per-CPU counters to reach zero. * Denoted by "Z". */ rcu_try_flip_waitzero_state, /* * Wait here for each of the other CPUs to execute a memory barrier. * This is necessary to ensure that these other CPUs really have * completed executing their RCU read-side critical sections, despite * their CPUs wildly reordering memory. Denoted by "M". */ rcu_try_flip_waitmb_state,};/* * States for rcu_ctrlblk.rcu_sched_sleep. */enum rcu_sched_sleep_states { rcu_sched_not_sleeping, /* Not sleeping, callbacks need GP. */ rcu_sched_sleep_prep, /* Thinking of sleeping, rechecking. */ rcu_sched_sleeping, /* Sleeping, awaken if GP needed. */};struct rcu_ctrlblk { spinlock_t fliplock; /* Protect state-machine transitions. */ long completed; /* Number of last completed batch. */ enum rcu_try_flip_states rcu_try_flip_state; /* The current state of the rcu state machine */ spinlock_t schedlock; /* Protect rcu_sched sleep state. */ enum rcu_sched_sleep_states sched_sleep; /* rcu_sched state. */ wait_queue_head_t sched_wq; /* Place for rcu_sched to sleep. */};static DEFINE_PER_CPU(struct rcu_data, rcu_data);static struct rcu_ctrlblk rcu_ctrlblk = { .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock), .completed = 0, .rcu_try_flip_state = rcu_try_flip_idle_state, .schedlock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.schedlock), .sched_sleep = rcu_sched_not_sleeping, .sched_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rcu_ctrlblk.sched_wq),};static struct task_struct *rcu_sched_grace_period_task;#ifdef CONFIG_RCU_TRACEstatic char *rcu_try_flip_state_names[] = { "idle", "waitack", "waitzero", "waitmb" };#endif /* #ifdef CONFIG_RCU_TRACE */static cpumask_t rcu_cpu_online_map __read_mostly = CPU_MASK_NONE;/* * Enum and per-CPU flag to determine when each CPU has seen * the most recent counter flip. */enum rcu_flip_flag_values { rcu_flip_seen, /* Steady/initial state, last flip seen. */ /* Only GP detector can update. */ rcu_flipped /* Flip just completed, need confirmation. */ /* Only corresponding CPU can update. */};static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_flip_flag_values, rcu_flip_flag) = rcu_flip_seen;/* * Enum and per-CPU flag to determine when each CPU has executed the * needed memory barrier to fence in memory references from its last RCU * read-side critical section in the just-completed grace period. */enum rcu_mb_flag_values { rcu_mb_done, /* Steady/initial state, no mb()s required. */ /* Only GP detector can update. */ rcu_mb_needed /* Flip just completed, need an mb(). */ /* Only corresponding CPU can update. */};static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag) = rcu_mb_done;/* * RCU_DATA_ME: find the current CPU's rcu_data structure. * RCU_DATA_CPU: find the specified CPU's rcu_data structure. */#define RCU_DATA_ME() (&__get_cpu_var(rcu_data))#define RCU_DATA_CPU(cpu) (&per_cpu(rcu_data, cpu))/* * Helper macro for tracing when the appropriate rcu_data is not * cached in a local variable, but where the CPU number is so cached. */#define RCU_TRACE_CPU(f, cpu) RCU_TRACE(f, &(RCU_DATA_CPU(cpu)->trace));/* * Helper macro for tracing when the appropriate rcu_data is not * cached in a local variable. */#define RCU_TRACE_ME(f) RCU_TRACE(f, &(RCU_DATA_ME()->trace));/* * Helper macro for tracing when the appropriate rcu_data is pointed * to by a local variable. */#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace));#define RCU_SCHED_BATCH_TIME (HZ / 50)/* * Return the number of RCU batches processed thus far. Useful * for debug and statistics. */long rcu_batches_completed(void){ return rcu_ctrlblk.completed;}EXPORT_SYMBOL_GPL(rcu_batches_completed);void __rcu_read_lock(void){ int idx; struct task_struct *t = current; int nesting; nesting = ACCESS_ONCE(t->rcu_read_lock_nesting); if (nesting != 0) { /* An earlier rcu_read_lock() covers us, just count it. */ t->rcu_read_lock_nesting = nesting + 1; } else { unsigned long flags; /* * We disable interrupts for the following reasons: * - If we get scheduling clock interrupt here, and we * end up acking the counter flip, it's like a promise * that we will never increment the old counter again. * Thus we will break that promise if that * scheduling clock interrupt happens between the time * we pick the .completed field and the time that we * increment our counter. * * - We don't want to be preempted out here. * * NMIs can still occur, of course, and might themselves * contain rcu_read_lock(). */ local_irq_save(flags); /* * Outermost nesting of rcu_read_lock(), so increment * the current counter for the current CPU. Use volatile * casts to prevent the compiler from reordering. */ idx = ACCESS_ONCE(rcu_ctrlblk.completed) & 0x1; ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])++; /* * Now that the per-CPU counter has been incremented, we * are protected from races with rcu_read_lock() invoked * from NMI handlers on this CPU. We can therefore safely * increment the nesting counter, relieving further NMIs * of the need to increment the per-CPU counter. */ ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting + 1; /* * Now that we have preventing any NMIs from storing * to the ->rcu_flipctr_idx, we can safely use it to * remember which counter to decrement in the matching * rcu_read_unlock(). */ ACCESS_ONCE(t->rcu_flipctr_idx) = idx; local_irq_restore(flags); }}EXPORT_SYMBOL_GPL(__rcu_read_lock);void __rcu_read_unlock(void){ int idx; struct task_struct *t = current; int nesting; nesting = ACCESS_ONCE(t->rcu_read_lock_nesting); if (nesting > 1) { /* * We are still protected by the enclosing rcu_read_lock(), * so simply decrement the counter. */ t->rcu_read_lock_nesting = nesting - 1; } else { unsigned long flags; /* * Disable local interrupts to prevent the grace-period * detection state machine from seeing us half-done. * NMIs can still occur, of course, and might themselves * contain rcu_read_lock() and rcu_read_unlock(). */ local_irq_save(flags); /* * Outermost nesting of rcu_read_unlock(), so we must * decrement the current counter for the current CPU. * This must be done carefully, because NMIs can * occur at any point in this code, and any rcu_read_lock() * and rcu_read_unlock() pairs in the NMI handlers * must interact non-destructively with this code. * Lots of volatile casts, and -very- careful ordering. * * Changes to this code, including this one, must be * inspected, validated, and tested extremely carefully!!! */ /* * First, pick up the index. */ idx = ACCESS_ONCE(t->rcu_flipctr_idx); /* * Now that we have fetched the counter index, it is * safe to decrement the per-task RCU nesting counter. * After this, any interrupts or NMIs will increment and * decrement the per-CPU counters. */ ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting - 1; /* * It is now safe to decrement this task's nesting count. * NMIs that occur after this statement will route their * rcu_read_lock() calls through this "else" clause, and * will thus start incrementing the per-CPU counter on * their own. They will also clobber ->rcu_flipctr_idx, * but that is OK, since we have already fetched it. */ ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])--; local_irq_restore(flags); }}EXPORT_SYMBOL_GPL(__rcu_read_unlock);/* * If a global counter flip has occurred since the last time that we * advanced callbacks, advance them. Hardware interrupts must be * disabled when calling this function. */static void __rcu_advance_callbacks(struct rcu_data *rdp){ int cpu; int i; int wlc = 0; if (rdp->completed != rcu_ctrlblk.completed) { if (rdp->waitlist[GP_STAGES - 1] != NULL) { *rdp->donetail = rdp->waitlist[GP_STAGES - 1]; rdp->donetail = rdp->waittail[GP_STAGES - 1]; RCU_TRACE_RDP(rcupreempt_trace_move2done, rdp); } for (i = GP_STAGES - 2; i >= 0; i--) { if (rdp->waitlist[i] != NULL) { rdp->waitlist[i + 1] = rdp->waitlist[i]; rdp->waittail[i + 1] = rdp->waittail[i]; wlc++; } else { rdp->waitlist[i + 1] = NULL; rdp->waittail[i + 1] = &rdp->waitlist[i + 1]; } } if (rdp->nextlist != NULL) { rdp->waitlist[0] = rdp->nextlist; rdp->waittail[0] = rdp->nexttail; wlc++; rdp->nextlist = NULL; rdp->nexttail = &rdp->nextlist; RCU_TRACE_RDP(rcupreempt_trace_move2wait, rdp); } else { rdp->waitlist[0] = NULL; rdp->waittail[0] = &rdp->waitlist[0]; } rdp->waitlistcount = wlc; rdp->completed = rcu_ctrlblk.completed; } /* * Check to see if this CPU needs to report that it has seen * the most recent counter flip, thereby declaring that all * subsequent rcu_read_lock() invocations will respect this flip. */ cpu = raw_smp_processor_id(); if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) { smp_mb(); /* Subsequent counter accesses must see new value */ per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen; smp_mb(); /* Subsequent RCU read-side critical sections */ /* seen -after- acknowledgement. */ }}DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = { .dynticks = 1,};#ifdef CONFIG_NO_HZstatic DEFINE_PER_CPU(int, rcu_update_flag);/** * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI. * * If the CPU was idle with dynamic ticks active, this updates the * rcu_dyntick_sched.dynticks to let the RCU handling know that the * CPU is active. */void rcu_irq_enter(void){ int cpu = smp_processor_id(); struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); if (per_cpu(rcu_update_flag, cpu)) per_cpu(rcu_update_flag, cpu)++; /* * Only update if we are coming from a stopped ticks mode * (rcu_dyntick_sched.dynticks is even). */ if (!in_interrupt() && (rdssp->dynticks & 0x1) == 0) { /* * The following might seem like we could have a race * with NMI/SMIs. But this really isn't a problem. * Here we do a read/modify/write, and the race happens * when an NMI/SMI comes in after the read and before * the write. But NMI/SMIs will increment this counter * twice before returning, so the zero bit will not * be corrupted by the NMI/SMI which is the most important * part. * * The only thing is that we would bring back the counter * to a postion that it was in during the NMI/SMI. * But the zero bit would be set, so the rest of the * counter would again be ignored. * * On return from the IRQ, the counter may have the zero * bit be 0 and the counter the same as the return from * the NMI/SMI. If the state machine was so unlucky to * see that, it still doesn't matter, since all * RCU read-side critical sections on this CPU would * have already completed. */ rdssp->dynticks++; /* * The following memory barrier ensures that any * rcu_read_lock() primitives in the irq handler * are seen by other CPUs to follow the above * increment to rcu_dyntick_sched.dynticks. This is * required in order for other CPUs to correctly * determine when it is safe to advance the RCU * grace-period state machine. */ smp_mb(); /* see above block comment. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -