xpc_main.c
来自「底层驱动开发」· C语言 代码 · 共 1,063 行 · 第 1/2 页
C
1,063 行
/* * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive * for more details. * * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. *//* * Cross Partition Communication (XPC) support - standard version. * * XPC provides a message passing capability that crosses partition * boundaries. This module is made up of two parts: * * partition This part detects the presence/absence of other * partitions. It provides a heartbeat and monitors * the heartbeats of other partitions. * * channel This part manages the channels and sends/receives * messages across them to/from other partitions. * * There are a couple of additional functions residing in XP, which * provide an interface to XPC for its users. * * * Caveats: * * . We currently have no way to determine which nasid an IPI came * from. Thus, xpc_IPI_send() does a remote AMO write followed by * an IPI. The AMO indicates where data is to be pulled from, so * after the IPI arrives, the remote partition checks the AMO word. * The IPI can actually arrive before the AMO however, so other code * must periodically check for this case. Also, remote AMO operations * do not reliably time out. Thus we do a remote PIO read solely to * know whether the remote partition is down and whether we should * stop sending IPIs to it. This remote PIO read operation is set up * in a special nofault region so SAL knows to ignore (and cleanup) * any errors due to the remote AMO write, PIO read, and/or PIO * write operations. * * If/when new hardware solves this IPI problem, we should abandon * the current approach. * */#include <linux/kernel.h>#include <linux/module.h>#include <linux/init.h>#include <linux/sched.h>#include <linux/syscalls.h>#include <linux/cache.h>#include <linux/interrupt.h>#include <linux/slab.h>#include <linux/delay.h>#include <asm/sn/intr.h>#include <asm/sn/sn_sal.h>#include <asm/uaccess.h>#include "xpc.h"/* define two XPC debug device structures to be used with dev_dbg() et al */struct device_driver xpc_dbg_name = { .name = "xpc"};struct device xpc_part_dbg_subname = { .bus_id = {0}, /* set to "part" at xpc_init() time */ .driver = &xpc_dbg_name};struct device xpc_chan_dbg_subname = { .bus_id = {0}, /* set to "chan" at xpc_init() time */ .driver = &xpc_dbg_name};struct device *xpc_part = &xpc_part_dbg_subname;struct device *xpc_chan = &xpc_chan_dbg_subname;/* systune related variables for /proc/sys directories */static int xpc_hb_min = 1;static int xpc_hb_max = 10;static int xpc_hb_check_min = 10;static int xpc_hb_check_max = 120;static ctl_table xpc_sys_xpc_hb_dir[] = { { 1, "hb_interval", &xpc_hb_interval, sizeof(int), 0644, NULL, &proc_dointvec_minmax, &sysctl_intvec, NULL, &xpc_hb_min, &xpc_hb_max }, { 2, "hb_check_interval", &xpc_hb_check_interval, sizeof(int), 0644, NULL, &proc_dointvec_minmax, &sysctl_intvec, NULL, &xpc_hb_check_min, &xpc_hb_check_max }, {0}};static ctl_table xpc_sys_xpc_dir[] = { { 1, "hb", NULL, 0, 0555, xpc_sys_xpc_hb_dir }, {0}};static ctl_table xpc_sys_dir[] = { { 1, "xpc", NULL, 0, 0555, xpc_sys_xpc_dir }, {0}};static struct ctl_table_header *xpc_sysctl;/* #of IRQs received */static atomic_t xpc_act_IRQ_rcvd;/* IRQ handler notifies this wait queue on receipt of an IRQ */static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq);static unsigned long xpc_hb_check_timeout;/* xpc_hb_checker thread exited notification */static DECLARE_MUTEX_LOCKED(xpc_hb_checker_exited);/* xpc_discovery thread exited notification */static DECLARE_MUTEX_LOCKED(xpc_discovery_exited);static struct timer_list xpc_hb_timer;static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);/* * Notify the heartbeat check thread that an IRQ has been received. */static irqreturn_txpc_act_IRQ_handler(int irq, void *dev_id, struct pt_regs *regs){ atomic_inc(&xpc_act_IRQ_rcvd); wake_up_interruptible(&xpc_act_IRQ_wq); return IRQ_HANDLED;}/* * Timer to produce the heartbeat. The timer structures function is * already set when this is initially called. A tunable is used to * specify when the next timeout should occur. */static voidxpc_hb_beater(unsigned long dummy){ xpc_vars->heartbeat++; if (jiffies >= xpc_hb_check_timeout) { wake_up_interruptible(&xpc_act_IRQ_wq); } xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ); add_timer(&xpc_hb_timer);}/* * This thread is responsible for nearly all of the partition * activation/deactivation. */static intxpc_hb_checker(void *ignore){ int last_IRQ_count = 0; int new_IRQ_count; int force_IRQ=0; /* this thread was marked active by xpc_hb_init() */ daemonize(XPC_HB_CHECK_THREAD_NAME); set_cpus_allowed(current, cpumask_of_cpu(XPC_HB_CHECK_CPU)); xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ); while (!(volatile int) xpc_exiting) { /* wait for IRQ or timeout */ (void) wait_event_interruptible(xpc_act_IRQ_wq, (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) || jiffies >= xpc_hb_check_timeout || (volatile int) xpc_exiting)); dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have " "been received\n", (int) (xpc_hb_check_timeout - jiffies), atomic_read(&xpc_act_IRQ_rcvd) - last_IRQ_count); /* checking of remote heartbeats is skewed by IRQ handling */ if (jiffies >= xpc_hb_check_timeout) { dev_dbg(xpc_part, "checking remote heartbeats\n"); xpc_check_remote_hb(); /* * We need to periodically recheck to ensure no * IPI/AMO pairs have been missed. That check * must always reset xpc_hb_check_timeout. */ force_IRQ = 1; } new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd); if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) { force_IRQ = 0; dev_dbg(xpc_part, "found an IRQ to process; will be " "resetting xpc_hb_check_timeout\n"); last_IRQ_count += xpc_identify_act_IRQ_sender(); if (last_IRQ_count < new_IRQ_count) { /* retry once to help avoid missing AMO */ (void) xpc_identify_act_IRQ_sender(); } last_IRQ_count = new_IRQ_count; xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ); } } dev_dbg(xpc_part, "heartbeat checker is exiting\n"); /* mark this thread as inactive */ up(&xpc_hb_checker_exited); return 0;}/* * This thread will attempt to discover other partitions to activate * based on info provided by SAL. This new thread is short lived and * will exit once discovery is complete. */static intxpc_initiate_discovery(void *ignore){ daemonize(XPC_DISCOVERY_THREAD_NAME); xpc_discovery(); dev_dbg(xpc_part, "discovery thread is exiting\n"); /* mark this thread as inactive */ up(&xpc_discovery_exited); return 0;}/* * Establish first contact with the remote partititon. This involves pulling * the XPC per partition variables from the remote partition and waiting for * the remote partition to pull ours. */static enum xpc_retvalxpc_make_first_contact(struct xpc_partition *part){ enum xpc_retval ret; while ((ret = xpc_pull_remote_vars_part(part)) != xpcSuccess) { if (ret != xpcRetry) { XPC_DEACTIVATE_PARTITION(part, ret); return ret; } dev_dbg(xpc_chan, "waiting to make first contact with " "partition %d\n", XPC_PARTID(part)); /* wait a 1/4 of a second or so */ msleep_interruptible(250); if (part->act_state == XPC_P_DEACTIVATING) { return part->reason; } } return xpc_mark_partition_active(part);}/* * The first kthread assigned to a newly activated partition is the one * created by XPC HB with which it calls xpc_partition_up(). XPC hangs on to * that kthread until the partition is brought down, at which time that kthread * returns back to XPC HB. (The return of that kthread will signify to XPC HB * that XPC has dismantled all communication infrastructure for the associated * partition.) This kthread becomes the channel manager for that partition. * * Each active partition has a channel manager, who, besides connecting and * disconnecting channels, will ensure that each of the partition's connected * channels has the required number of assigned kthreads to get the work done. */static voidxpc_channel_mgr(struct xpc_partition *part){ while (part->act_state != XPC_P_DEACTIVATING || atomic_read(&part->nchannels_active) > 0) { xpc_process_channel_activity(part); /* * Wait until we've been requested to activate kthreads or * all of the channel's message queues have been torn down or * a signal is pending. * * The channel_mgr_requests is set to 1 after being awakened, * This is done to prevent the channel mgr from making one pass * through the loop for each request, since he will * be servicing all the requests in one pass. The reason it's * set to 1 instead of 0 is so that other kthreads will know * that the channel mgr is running and won't bother trying to * wake him up. */ atomic_dec(&part->channel_mgr_requests); (void) wait_event_interruptible(part->channel_mgr_wq, (atomic_read(&part->channel_mgr_requests) > 0 || (volatile u64) part->local_IPI_amo != 0 || ((volatile u8) part->act_state == XPC_P_DEACTIVATING && atomic_read(&part->nchannels_active) == 0))); atomic_set(&part->channel_mgr_requests, 1); // >>> Does it need to wakeup periodically as well? In case we // >>> miscalculated the #of kthreads to wakeup or create? }}/* * When XPC HB determines that a partition has come up, it will create a new * kthread and that kthread will call this function to attempt to set up the * basic infrastructure used for Cross Partition Communication with the newly * upped partition. * * The kthread that was created by XPC HB and which setup the XPC * infrastructure will remain assigned to the partition until the partition * goes down. At which time the kthread will teardown the XPC infrastructure * and then exit. * * XPC HB will put the remote partition's XPC per partition specific variables * physical address into xpc_partitions[partid].remote_vars_part_pa prior to * calling xpc_partition_up(). */static voidxpc_partition_up(struct xpc_partition *part){ DBUG_ON(part->channels != NULL); dev_dbg(xpc_chan, "activating partition %d\n", XPC_PARTID(part)); if (xpc_setup_infrastructure(part) != xpcSuccess) { return; } /* * The kthread that XPC HB called us with will become the * channel manager for this partition. It will not return * back to XPC HB until the partition's XPC infrastructure * has been dismantled. */ (void) xpc_part_ref(part); /* this will always succeed */ if (xpc_make_first_contact(part) == xpcSuccess) { xpc_channel_mgr(part); } xpc_part_deref(part); xpc_teardown_infrastructure(part);}static intxpc_activating(void *__partid){ partid_t partid = (u64) __partid; struct xpc_partition *part = &xpc_partitions[partid]; unsigned long irq_flags; struct sched_param param = { sched_priority: MAX_RT_PRIO - 1 }; int ret; DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS); spin_lock_irqsave(&part->act_lock, irq_flags); if (part->act_state == XPC_P_DEACTIVATING) { part->act_state = XPC_P_INACTIVE; spin_unlock_irqrestore(&part->act_lock, irq_flags); part->remote_rp_pa = 0; return 0; } /* indicate the thread is activating */ DBUG_ON(part->act_state != XPC_P_ACTIVATION_REQ); part->act_state = XPC_P_ACTIVATING; XPC_SET_REASON(part, 0, 0); spin_unlock_irqrestore(&part->act_lock, irq_flags); dev_dbg(xpc_part, "bringing partition %d up\n", partid); daemonize("xpc%02d", partid); /* * This thread needs to run at a realtime priority to prevent a * significant performance degradation. */ ret = sched_setscheduler(current, SCHED_FIFO, ¶m); if (ret != 0) { dev_warn(xpc_part, "unable to set pid %d to a realtime " "priority, ret=%d\n", current->pid, ret); } /* allow this thread and its children to run on any CPU */ set_cpus_allowed(current, CPU_MASK_ALL); /* * Register the remote partition's AMOs with SAL so it can handle * and cleanup errors within that address range should the remote * partition go down. We don't unregister this range because it is * difficult to tell when outstanding writes to the remote partition * are finished and thus when it is safe to unregister. This should * not result in wasted space in the SAL xp_addr_region table because * we should get the same page for remote_amos_page_pa after module * reloads and system reboots. */ if (sn_register_xp_addr_region(part->remote_amos_page_pa, PAGE_SIZE, 1) < 0) { dev_warn(xpc_part, "xpc_partition_up(%d) failed to register " "xp_addr region\n", partid); spin_lock_irqsave(&part->act_lock, irq_flags); part->act_state = XPC_P_INACTIVE; XPC_SET_REASON(part, xpcPhysAddrRegFailed, __LINE__); spin_unlock_irqrestore(&part->act_lock, irq_flags); part->remote_rp_pa = 0; return 0; } XPC_ALLOW_HB(partid, xpc_vars); xpc_IPI_send_activated(part); /* * xpc_partition_up() holds this thread and marks this partition as * XPC_P_ACTIVE by calling xpc_hb_mark_active(). */ (void) xpc_partition_up(part); xpc_mark_partition_inactive(part); if (part->reason == xpcReactivating) { /* interrupting ourselves results in activating partition */ xpc_IPI_send_reactivate(part); } return 0;}voidxpc_activate_partition(struct xpc_partition *part){ partid_t partid = XPC_PARTID(part); unsigned long irq_flags; pid_t pid; spin_lock_irqsave(&part->act_lock, irq_flags); pid = kernel_thread(xpc_activating, (void *) ((u64) partid), 0); DBUG_ON(part->act_state != XPC_P_INACTIVE); if (pid > 0) { part->act_state = XPC_P_ACTIVATION_REQ; XPC_SET_REASON(part, xpcCloneKThread, __LINE__); } else { XPC_SET_REASON(part, xpcCloneKThreadFailed, __LINE__); } spin_unlock_irqrestore(&part->act_lock, irq_flags);}/* * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?