📄 perfmon.c
字号:
/* * This file implements the perfmon-2 subsystem which is used * to program the IA-64 Performance Monitoring Unit (PMU). * * The initial version of perfmon.c was written by * Ganesh Venkitachalam, IBM Corp. * * Then it was modified for perfmon-1.x by Stephane Eranian and * David Mosberger, Hewlett Packard Co. * * Version Perfmon-2.x is a rewrite of perfmon-1.x * by Stephane Eranian, Hewlett Packard Co. * * Copyright (C) 1999-2005 Hewlett Packard Co * Stephane Eranian <eranian@hpl.hp.com> * David Mosberger-Tang <davidm@hpl.hp.com> * * More information about perfmon available at: * http://www.hpl.hp.com/research/linux/perfmon * * * For Xen/IA64 xenoprof * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp> * VA Linux Systems Japan K.K. * */#include <linux/config.h>#include <linux/module.h>#include <linux/kernel.h>#include <linux/sched.h>#include <linux/interrupt.h>#include <linux/smp_lock.h>#include <linux/proc_fs.h>#include <linux/seq_file.h>#include <linux/init.h>#include <linux/vmalloc.h>#include <linux/mm.h>#include <linux/sysctl.h>#include <linux/list.h>#include <linux/file.h>#include <linux/poll.h>#include <linux/vfs.h>#include <linux/pagemap.h>#include <linux/mount.h>#include <linux/bitops.h>#include <linux/capability.h>#include <linux/rcupdate.h>#include <linux/completion.h>#ifndef XEN#include <asm/errno.h>#else#include <xen/errno.h>#endif#include <asm/intrinsics.h>#include <asm/page.h>#include <asm/perfmon.h>#include <asm/processor.h>#include <asm/signal.h>#include <asm/system.h>#include <asm/uaccess.h>#include <asm/delay.h>#ifdef XEN#include <xen/guest_access.h>#include <asm/hw_irq.h>#define CONFIG_PERFMON#define pid vcpu_id#define thread arch._thread#define task_pt_regs vcpu_regs#define PMC_USER (1UL << 3)#define PMC_KERNEL (1UL << 0)#define PMC_XEN_AND_GUEST ((1UL << 0) | (1UL << 1) | (1UL << 2))#define PMC_PRIV_MONITOR (1UL << 6)#undef ia64_set_pmc#define ia64_set_pmc(index, val) \do { \ u64 __index = (index); \ u64 __val = (val); \ /* bad hack! \ * At this moment Linux perfmon knows only kernel and user \ * so that it sets only pmc.plm[0] and pmc.plm[3]. \ * On the other hand what we want is to sample on the whole \ * system. i.e. user, guest kernel and xen VMM. \ * Thus here we enable pmc.plm[2:1] too for generic pmc/pmd. \ * \ * But we can not do it genericly for the implementation \ * dependent pmc/pmd. \ * Probably such knowlege should be taught to the oprofiled or \ * the xenified perfmon. \ */ \ if (pmu_conf != NULL && PMC_IS_COUNTING(__index) && \ (__val & PMC_KERNEL)) \ __val |= PMC_XEN_AND_GUEST | PMC_PRIV_MONITOR; \ asm volatile ("mov pmc[%0]=%1" :: \ "r"(__index), "r"(__val) : "memory"); \} while (0)#endif#ifdef CONFIG_PERFMON/* * perfmon context state */#define PFM_CTX_UNLOADED 1 /* context is not loaded onto any task */#define PFM_CTX_LOADED 2 /* context is loaded onto a task */#define PFM_CTX_MASKED 3 /* context is loaded but monitoring is masked due to overflow */#define PFM_CTX_ZOMBIE 4 /* owner of the context is closing it */#define PFM_INVALID_ACTIVATION (~0UL)/* * depth of message queue */#define PFM_MAX_MSGS 32#define PFM_CTXQ_EMPTY(g) ((g)->ctx_msgq_head == (g)->ctx_msgq_tail)/* * type of a PMU register (bitmask). * bitmask structure: * bit0 : register implemented * bit1 : end marker * bit2-3 : reserved * bit4 : pmc has pmc.pm * bit5 : pmc controls a counter (has pmc.oi), pmd is used as counter * bit6-7 : register type * bit8-31: reserved */#define PFM_REG_NOTIMPL 0x0 /* not implemented at all */#define PFM_REG_IMPL 0x1 /* register implemented */#define PFM_REG_END 0x2 /* end marker */#define PFM_REG_MONITOR (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */#define PFM_REG_COUNTING (0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */#define PFM_REG_CONTROL (0x4<<4|PFM_REG_IMPL) /* PMU control register */#define PFM_REG_CONFIG (0x8<<4|PFM_REG_IMPL) /* configuration register */#define PFM_REG_BUFFER (0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */#define PMC_IS_LAST(i) (pmu_conf->pmc_desc[i].type & PFM_REG_END)#define PMD_IS_LAST(i) (pmu_conf->pmd_desc[i].type & PFM_REG_END)#define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY)/* i assumed unsigned */#define PMC_IS_IMPL(i) (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL))#define PMD_IS_IMPL(i) (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL))/* XXX: these assume that register i is implemented */#define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)#define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)#define PMC_IS_MONITOR(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR) == PFM_REG_MONITOR)#define PMC_IS_CONTROL(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL) == PFM_REG_CONTROL)#define PMC_DFL_VAL(i) pmu_conf->pmc_desc[i].default_value#define PMC_RSVD_MASK(i) pmu_conf->pmc_desc[i].reserved_mask#define PMD_PMD_DEP(i) pmu_conf->pmd_desc[i].dep_pmd[0]#define PMC_PMD_DEP(i) pmu_conf->pmc_desc[i].dep_pmd[0]#define PFM_NUM_IBRS IA64_NUM_DBG_REGS#define PFM_NUM_DBRS IA64_NUM_DBG_REGS#define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0)#define CTX_HAS_SMPL(c) ((c)->ctx_fl_is_sampling)#define PFM_CTX_TASK(h) (h)->ctx_task#define PMU_PMC_OI 5 /* position of pmc.oi bit *//* XXX: does not support more than 64 PMDs */#define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask)#define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL)#define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask)#define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64)#define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64)#define CTX_USES_DBREGS(ctx) (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1)#define PFM_CODE_RR 0 /* requesting code range restriction */#define PFM_DATA_RR 1 /* requestion data range restriction */#define PFM_CPUINFO_CLEAR(v) pfm_get_cpu_var(pfm_syst_info) &= ~(v)#define PFM_CPUINFO_SET(v) pfm_get_cpu_var(pfm_syst_info) |= (v)#define PFM_CPUINFO_GET() pfm_get_cpu_var(pfm_syst_info)#define RDEP(x) (1UL<<(x))/* * context protection macros * in SMP: * - we need to protect against CPU concurrency (spin_lock) * - we need to protect against PMU overflow interrupts (local_irq_disable) * in UP: * - we need to protect against PMU overflow interrupts (local_irq_disable) * * spin_lock_irqsave()/spin_lock_irqrestore(): * in SMP: local_irq_disable + spin_lock * in UP : local_irq_disable * * spin_lock()/spin_lock(): * in UP : removed automatically * in SMP: protect against context accesses from other CPU. interrupts * are not masked. This is useful for the PMU interrupt handler * because we know we will not get PMU concurrency in that code. */#define PROTECT_CTX(c, f) \ do { \ DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, current->pid)); \ spin_lock_irqsave(&(c)->ctx_lock, f); \ DPRINT(("spinlocked ctx %p by [%d]\n", c, current->pid)); \ } while(0)#define UNPROTECT_CTX(c, f) \ do { \ DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, current->pid)); \ spin_unlock_irqrestore(&(c)->ctx_lock, f); \ } while(0)#define PROTECT_CTX_NOPRINT(c, f) \ do { \ spin_lock_irqsave(&(c)->ctx_lock, f); \ } while(0)#define UNPROTECT_CTX_NOPRINT(c, f) \ do { \ spin_unlock_irqrestore(&(c)->ctx_lock, f); \ } while(0)#define PROTECT_CTX_NOIRQ(c) \ do { \ spin_lock(&(c)->ctx_lock); \ } while(0)#define UNPROTECT_CTX_NOIRQ(c) \ do { \ spin_unlock(&(c)->ctx_lock); \ } while(0)#ifdef CONFIG_SMP#define GET_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)#define INC_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)++#define SET_ACTIVATION(c) (c)->ctx_last_activation = GET_ACTIVATION()#else /* !CONFIG_SMP */#define SET_ACTIVATION(t) do {} while(0)#define GET_ACTIVATION(t) do {} while(0)#define INC_ACTIVATION(t) do {} while(0)#endif /* CONFIG_SMP */#define SET_PMU_OWNER(t, c) do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0)#define GET_PMU_OWNER() pfm_get_cpu_var(pmu_owner)#define GET_PMU_CTX() pfm_get_cpu_var(pmu_ctx)#define LOCK_PFS(g) spin_lock_irqsave(&pfm_sessions.pfs_lock, g)#define UNLOCK_PFS(g) spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g)#define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0)/* * cmp0 must be the value of pmc0 */#define PMC0_HAS_OVFL(cmp0) (cmp0 & ~0x1UL)#define PFMFS_MAGIC 0xa0b4d889/* * debugging */#define PFM_DEBUGGING 1#ifdef PFM_DEBUGGING#define DPRINT(a) \ do { \ if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \ } while (0)#define DPRINT_ovfl(a) \ do { \ if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \ } while (0)#endif/* * 64-bit software counter structure * * the next_reset_type is applied to the next call to pfm_reset_regs() */typedef struct { unsigned long val; /* virtual 64bit counter value */ unsigned long lval; /* last reset value */ unsigned long long_reset; /* reset value on sampling overflow */ unsigned long short_reset; /* reset value on overflow */ unsigned long reset_pmds[4]; /* which other pmds to reset when this counter overflows */ unsigned long smpl_pmds[4]; /* which pmds are accessed when counter overflow */ unsigned long seed; /* seed for random-number generator */ unsigned long mask; /* mask for random-number generator */ unsigned int flags; /* notify/do not notify */ unsigned long eventid; /* overflow event identifier */} pfm_counter_t;/* * context flags */typedef struct { unsigned int block:1; /* when 1, task will blocked on user notifications */ unsigned int system:1; /* do system wide monitoring */ unsigned int using_dbreg:1; /* using range restrictions (debug registers) */ unsigned int is_sampling:1; /* true if using a custom format */ unsigned int excl_idle:1; /* exclude idle task in system wide session */ unsigned int going_zombie:1; /* context is zombie (MASKED+blocking) */ unsigned int trap_reason:2; /* reason for going into pfm_handle_work() */ unsigned int no_msg:1; /* no message sent on overflow */ unsigned int can_restart:1; /* allowed to issue a PFM_RESTART */ unsigned int reserved:22;} pfm_context_flags_t;#define PFM_TRAP_REASON_NONE 0x0 /* default value */#define PFM_TRAP_REASON_BLOCK 0x1 /* we need to block on overflow */#define PFM_TRAP_REASON_RESET 0x2 /* we need to reset PMDs *//* * perfmon context: encapsulates all the state of a monitoring session */typedef struct pfm_context { spinlock_t ctx_lock; /* context protection */ pfm_context_flags_t ctx_flags; /* bitmask of flags (block reason incl.) */ unsigned int ctx_state; /* state: active/inactive (no bitfield) */ struct task_struct *ctx_task; /* task to which context is attached */ unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */#ifndef XEN struct completion ctx_restart_done; /* use for blocking notification mode */#endif unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */ unsigned long ctx_all_pmds[4]; /* bitmask of all accessible PMDs */ unsigned long ctx_reload_pmds[4]; /* bitmask of force reload PMD on ctxsw in */ unsigned long ctx_all_pmcs[4]; /* bitmask of all accessible PMCs */ unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */ unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */ unsigned long ctx_pmcs[IA64_NUM_PMC_REGS]; /* saved copies of PMC values */ unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */ unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */ unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */ unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */ pfm_counter_t ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */ u64 ctx_saved_psr_up; /* only contains psr.up value */ unsigned long ctx_last_activation; /* context last activation number for last_cpu */ unsigned int ctx_last_cpu; /* CPU id of current or last CPU used (SMP only) */ unsigned int ctx_cpu; /* cpu to which perfmon is applied (system wide) */ int ctx_fd; /* file descriptor used my this context */ pfm_ovfl_arg_t ctx_ovfl_arg; /* argument to custom buffer format handler */ pfm_buffer_fmt_t *ctx_buf_fmt; /* buffer format callbacks */ void *ctx_smpl_hdr; /* points to sampling buffer header kernel vaddr */ unsigned long ctx_smpl_size; /* size of sampling buffer */ void *ctx_smpl_vaddr; /* user level virtual address of smpl buffer */#ifndef XEN wait_queue_head_t ctx_msgq_wait; pfm_msg_t ctx_msgq[PFM_MAX_MSGS]; int ctx_msgq_head; int ctx_msgq_tail; struct fasync_struct *ctx_async_queue; wait_queue_head_t ctx_zombieq; /* termination cleanup wait queue */#endif} pfm_context_t;/* * magic number used to verify that structure is really * a perfmon context */#define PFM_IS_FILE(f) ((f)->f_op == &pfm_file_ops)#define PFM_GET_CTX(t) ((pfm_context_t *)(t)->thread.pfm_context)#ifdef CONFIG_SMP#define SET_LAST_CPU(ctx, v) (ctx)->ctx_last_cpu = (v)#define GET_LAST_CPU(ctx) (ctx)->ctx_last_cpu#else#define SET_LAST_CPU(ctx, v) do {} while(0)#define GET_LAST_CPU(ctx) do {} while(0)#endif#define ctx_fl_block ctx_flags.block#define ctx_fl_system ctx_flags.system#define ctx_fl_using_dbreg ctx_flags.using_dbreg#define ctx_fl_is_sampling ctx_flags.is_sampling#define ctx_fl_excl_idle ctx_flags.excl_idle#define ctx_fl_going_zombie ctx_flags.going_zombie#define ctx_fl_trap_reason ctx_flags.trap_reason#define ctx_fl_no_msg ctx_flags.no_msg#define ctx_fl_can_restart ctx_flags.can_restart#define PFM_SET_WORK_PENDING(t, v) do { (t)->thread.pfm_needs_checking = v; } while(0);#define PFM_GET_WORK_PENDING(t) (t)->thread.pfm_needs_checking/* * global information about all sessions * mostly used to synchronize between system wide and per-process */typedef struct { spinlock_t pfs_lock; /* lock the structure */ unsigned int pfs_task_sessions; /* number of per task sessions */ unsigned int pfs_sys_sessions; /* number of per system wide sessions */ unsigned int pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */ unsigned int pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */ struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */#ifdef XEN#define XENOPROF_TASK ((struct task_struct*)1)#endif} pfm_session_t;/* * information about a PMC or PMD. * dep_pmd[]: a bitmask of dependent PMD registers * dep_pmc[]: a bitmask of dependent PMC registers */typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);typedef struct { unsigned int type; int pm_pos; unsigned long default_value; /* power-on default value */ unsigned long reserved_mask; /* bitmask of reserved bits */ pfm_reg_check_t read_check; pfm_reg_check_t write_check; unsigned long dep_pmd[4]; unsigned long dep_pmc[4];} pfm_reg_desc_t;/* assume cnum is a valid monitor */#define PMC_PM(cnum, val) (((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1)/* * This structure is initialized at boot time and contains * a description of the PMU main characteristics. * * If the probe function is defined, detection is based * on its return value: * - 0 means recognized PMU * - anything else means not supported * When the probe function is not defined, then the pmu_family field * is used and it must match the host CPU family such that: * - cpu->family & config->pmu_family != 0 */typedef struct { unsigned long ovfl_val; /* overflow value for counters */ pfm_reg_desc_t *pmc_desc; /* detailed PMC register dependencies descriptions */ pfm_reg_desc_t *pmd_desc; /* detailed PMD register dependencies descriptions */ unsigned int num_pmcs; /* number of PMCS: computed at init time */ unsigned int num_pmds; /* number of PMDS: computed at init time */ unsigned long impl_pmcs[4]; /* bitmask of implemented PMCS */ unsigned long impl_pmds[4]; /* bitmask of implemented PMDS */ char *pmu_name; /* PMU family name */ unsigned int pmu_family; /* cpuid family pattern used to identify pmu */ unsigned int flags; /* pmu specific flags */ unsigned int num_ibrs; /* number of IBRS: computed at init time */ unsigned int num_dbrs; /* number of DBRS: computed at init time */ unsigned int num_counters; /* PMC/PMD counting pairs : computed at init time */ int (*probe)(void); /* customized probe routine */ unsigned int use_rr_dbregs:1; /* set if debug registers used for range restriction */} pmu_config_t;/* * PMU specific flags */#define PFM_PMU_IRQ_RESEND 1 /* PMU needs explicit IRQ resend *//* * debug register related type definitions */typedef struct { unsigned long ibr_mask:56; unsigned long ibr_plm:4; unsigned long ibr_ig:3; unsigned long ibr_x:1;} ibr_mask_reg_t;typedef struct { unsigned long dbr_mask:56; unsigned long dbr_plm:4; unsigned long dbr_ig:2; unsigned long dbr_w:1; unsigned long dbr_r:1;} dbr_mask_reg_t;typedef union { unsigned long val;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -