📄 xpc_main.c
字号:
/* * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive * for more details. * * Copyright (c) 2004-2007 Silicon Graphics, Inc. All Rights Reserved. *//* * Cross Partition Communication (XPC) support - standard version. * * XPC provides a message passing capability that crosses partition * boundaries. This module is made up of two parts: * * partition This part detects the presence/absence of other * partitions. It provides a heartbeat and monitors * the heartbeats of other partitions. * * channel This part manages the channels and sends/receives * messages across them to/from other partitions. * * There are a couple of additional functions residing in XP, which * provide an interface to XPC for its users. * * * Caveats: * * . We currently have no way to determine which nasid an IPI came * from. Thus, xpc_IPI_send() does a remote AMO write followed by * an IPI. The AMO indicates where data is to be pulled from, so * after the IPI arrives, the remote partition checks the AMO word. * The IPI can actually arrive before the AMO however, so other code * must periodically check for this case. Also, remote AMO operations * do not reliably time out. Thus we do a remote PIO read solely to * know whether the remote partition is down and whether we should * stop sending IPIs to it. This remote PIO read operation is set up * in a special nofault region so SAL knows to ignore (and cleanup) * any errors due to the remote AMO write, PIO read, and/or PIO * write operations. * * If/when new hardware solves this IPI problem, we should abandon * the current approach. * */#include <linux/kernel.h>#include <linux/module.h>#include <linux/init.h>#include <linux/sched.h>#include <linux/syscalls.h>#include <linux/cache.h>#include <linux/interrupt.h>#include <linux/delay.h>#include <linux/reboot.h>#include <linux/completion.h>#include <linux/kdebug.h>#include <asm/sn/intr.h>#include <asm/sn/sn_sal.h>#include <asm/uaccess.h>#include <asm/sn/xpc.h>/* define two XPC debug device structures to be used with dev_dbg() et al */struct device_driver xpc_dbg_name = { .name = "xpc"};struct device xpc_part_dbg_subname = { .bus_id = {0}, /* set to "part" at xpc_init() time */ .driver = &xpc_dbg_name};struct device xpc_chan_dbg_subname = { .bus_id = {0}, /* set to "chan" at xpc_init() time */ .driver = &xpc_dbg_name};struct device *xpc_part = &xpc_part_dbg_subname;struct device *xpc_chan = &xpc_chan_dbg_subname;static int xpc_kdebug_ignore;/* systune related variables for /proc/sys directories */static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;static int xpc_hb_min_interval = 1;static int xpc_hb_max_interval = 10;static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;static int xpc_hb_check_min_interval = 10;static int xpc_hb_check_max_interval = 120;int xpc_disengage_request_timelimit = XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT;static int xpc_disengage_request_min_timelimit = 0;static int xpc_disengage_request_max_timelimit = 120;static ctl_table xpc_sys_xpc_hb_dir[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "hb_interval", .data = &xpc_hb_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, .strategy = &sysctl_intvec, .extra1 = &xpc_hb_min_interval, .extra2 = &xpc_hb_max_interval }, { .ctl_name = CTL_UNNUMBERED, .procname = "hb_check_interval", .data = &xpc_hb_check_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, .strategy = &sysctl_intvec, .extra1 = &xpc_hb_check_min_interval, .extra2 = &xpc_hb_check_max_interval }, {}};static ctl_table xpc_sys_xpc_dir[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "hb", .mode = 0555, .child = xpc_sys_xpc_hb_dir }, { .ctl_name = CTL_UNNUMBERED, .procname = "disengage_request_timelimit", .data = &xpc_disengage_request_timelimit, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, .strategy = &sysctl_intvec, .extra1 = &xpc_disengage_request_min_timelimit, .extra2 = &xpc_disengage_request_max_timelimit }, {}};static ctl_table xpc_sys_dir[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "xpc", .mode = 0555, .child = xpc_sys_xpc_dir }, {}};static struct ctl_table_header *xpc_sysctl;/* non-zero if any remote partition disengage request was timed out */int xpc_disengage_request_timedout;/* #of IRQs received */static atomic_t xpc_act_IRQ_rcvd;/* IRQ handler notifies this wait queue on receipt of an IRQ */static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq);static unsigned long xpc_hb_check_timeout;/* notification that the xpc_hb_checker thread has exited */static DECLARE_COMPLETION(xpc_hb_checker_exited);/* notification that the xpc_discovery thread has exited */static DECLARE_COMPLETION(xpc_discovery_exited);static struct timer_list xpc_hb_timer;static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);static struct notifier_block xpc_reboot_notifier = { .notifier_call = xpc_system_reboot,};static int xpc_system_die(struct notifier_block *, unsigned long, void *);static struct notifier_block xpc_die_notifier = { .notifier_call = xpc_system_die,};/* * Timer function to enforce the timelimit on the partition disengage request. */static voidxpc_timeout_partition_disengage_request(unsigned long data){ struct xpc_partition *part = (struct xpc_partition *) data; DBUG_ON(jiffies < part->disengage_request_timeout); (void) xpc_partition_disengaged(part); DBUG_ON(part->disengage_request_timeout != 0); DBUG_ON(xpc_partition_engaged(1UL << XPC_PARTID(part)) != 0);}/* * Notify the heartbeat check thread that an IRQ has been received. */static irqreturn_txpc_act_IRQ_handler(int irq, void *dev_id){ atomic_inc(&xpc_act_IRQ_rcvd); wake_up_interruptible(&xpc_act_IRQ_wq); return IRQ_HANDLED;}/* * Timer to produce the heartbeat. The timer structures function is * already set when this is initially called. A tunable is used to * specify when the next timeout should occur. */static voidxpc_hb_beater(unsigned long dummy){ xpc_vars->heartbeat++; if (jiffies >= xpc_hb_check_timeout) { wake_up_interruptible(&xpc_act_IRQ_wq); } xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ); add_timer(&xpc_hb_timer);}/* * This thread is responsible for nearly all of the partition * activation/deactivation. */static intxpc_hb_checker(void *ignore){ int last_IRQ_count = 0; int new_IRQ_count; int force_IRQ=0; /* this thread was marked active by xpc_hb_init() */ daemonize(XPC_HB_CHECK_THREAD_NAME); set_cpus_allowed(current, cpumask_of_cpu(XPC_HB_CHECK_CPU)); /* set our heartbeating to other partitions into motion */ xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ); xpc_hb_beater(0); while (!(volatile int) xpc_exiting) { dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have " "been received\n", (int) (xpc_hb_check_timeout - jiffies), atomic_read(&xpc_act_IRQ_rcvd) - last_IRQ_count); /* checking of remote heartbeats is skewed by IRQ handling */ if (jiffies >= xpc_hb_check_timeout) { dev_dbg(xpc_part, "checking remote heartbeats\n"); xpc_check_remote_hb(); /* * We need to periodically recheck to ensure no * IPI/AMO pairs have been missed. That check * must always reset xpc_hb_check_timeout. */ force_IRQ = 1; } /* check for outstanding IRQs */ new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd); if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) { force_IRQ = 0; dev_dbg(xpc_part, "found an IRQ to process; will be " "resetting xpc_hb_check_timeout\n"); last_IRQ_count += xpc_identify_act_IRQ_sender(); if (last_IRQ_count < new_IRQ_count) { /* retry once to help avoid missing AMO */ (void) xpc_identify_act_IRQ_sender(); } last_IRQ_count = new_IRQ_count; xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ); } /* wait for IRQ or timeout */ (void) wait_event_interruptible(xpc_act_IRQ_wq, (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) || jiffies >= xpc_hb_check_timeout || (volatile int) xpc_exiting)); } dev_dbg(xpc_part, "heartbeat checker is exiting\n"); /* mark this thread as having exited */ complete(&xpc_hb_checker_exited); return 0;}/* * This thread will attempt to discover other partitions to activate * based on info provided by SAL. This new thread is short lived and * will exit once discovery is complete. */static intxpc_initiate_discovery(void *ignore){ daemonize(XPC_DISCOVERY_THREAD_NAME); xpc_discovery(); dev_dbg(xpc_part, "discovery thread is exiting\n"); /* mark this thread as having exited */ complete(&xpc_discovery_exited); return 0;}/* * Establish first contact with the remote partititon. This involves pulling * the XPC per partition variables from the remote partition and waiting for * the remote partition to pull ours. */static enum xpc_retvalxpc_make_first_contact(struct xpc_partition *part){ enum xpc_retval ret; while ((ret = xpc_pull_remote_vars_part(part)) != xpcSuccess) { if (ret != xpcRetry) { XPC_DEACTIVATE_PARTITION(part, ret); return ret; } dev_dbg(xpc_chan, "waiting to make first contact with " "partition %d\n", XPC_PARTID(part)); /* wait a 1/4 of a second or so */ (void) msleep_interruptible(250); if (part->act_state == XPC_P_DEACTIVATING) { return part->reason; } } return xpc_mark_partition_active(part);}/* * The first kthread assigned to a newly activated partition is the one * created by XPC HB with which it calls xpc_partition_up(). XPC hangs on to * that kthread until the partition is brought down, at which time that kthread * returns back to XPC HB. (The return of that kthread will signify to XPC HB * that XPC has dismantled all communication infrastructure for the associated * partition.) This kthread becomes the channel manager for that partition. * * Each active partition has a channel manager, who, besides connecting and * disconnecting channels, will ensure that each of the partition's connected * channels has the required number of assigned kthreads to get the work done. */static voidxpc_channel_mgr(struct xpc_partition *part){ while (part->act_state != XPC_P_DEACTIVATING || atomic_read(&part->nchannels_active) > 0 || !xpc_partition_disengaged(part)) { xpc_process_channel_activity(part); /* * Wait until we've been requested to activate kthreads or * all of the channel's message queues have been torn down or * a signal is pending. * * The channel_mgr_requests is set to 1 after being awakened, * This is done to prevent the channel mgr from making one pass * through the loop for each request, since he will * be servicing all the requests in one pass. The reason it's * set to 1 instead of 0 is so that other kthreads will know * that the channel mgr is running and won't bother trying to * wake him up. */ atomic_dec(&part->channel_mgr_requests); (void) wait_event_interruptible(part->channel_mgr_wq, (atomic_read(&part->channel_mgr_requests) > 0 || (volatile u64) part->local_IPI_amo != 0 || ((volatile u8) part->act_state == XPC_P_DEACTIVATING && atomic_read(&part->nchannels_active) == 0 && xpc_partition_disengaged(part)))); atomic_set(&part->channel_mgr_requests, 1); // >>> Does it need to wakeup periodically as well? In case we // >>> miscalculated the #of kthreads to wakeup or create? }}/* * When XPC HB determines that a partition has come up, it will create a new * kthread and that kthread will call this function to attempt to set up the * basic infrastructure used for Cross Partition Communication with the newly * upped partition. * * The kthread that was created by XPC HB and which setup the XPC * infrastructure will remain assigned to the partition until the partition * goes down. At which time the kthread will teardown the XPC infrastructure * and then exit. * * XPC HB will put the remote partition's XPC per partition specific variables * physical address into xpc_partitions[partid].remote_vars_part_pa prior to * calling xpc_partition_up(). */static voidxpc_partition_up(struct xpc_partition *part){ DBUG_ON(part->channels != NULL); dev_dbg(xpc_chan, "activating partition %d\n", XPC_PARTID(part)); if (xpc_setup_infrastructure(part) != xpcSuccess) { return; } /* * The kthread that XPC HB called us with will become the * channel manager for this partition. It will not return * back to XPC HB until the partition's XPC infrastructure * has been dismantled. */ (void) xpc_part_ref(part); /* this will always succeed */ if (xpc_make_first_contact(part) == xpcSuccess) { xpc_channel_mgr(part); } xpc_part_deref(part); xpc_teardown_infrastructure(part);}static intxpc_activating(void *__partid){ partid_t partid = (u64) __partid; struct xpc_partition *part = &xpc_partitions[partid]; unsigned long irq_flags; struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; int ret;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -