📄 perfmon.c
字号:
/* * This file implements the perfmon subsystem which is used * to program the IA-64 Performance Monitoring Unit (PMU). * * Originaly Written by Ganesh Venkitachalam, IBM Corp. * Copyright (C) 1999 Ganesh Venkitachalam <venkitac@us.ibm.com> * * Modifications by Stephane Eranian, Hewlett-Packard Co. * Modifications by David Mosberger-Tang, Hewlett-Packard Co. * * Copyright (C) 1999-2002 Hewlett Packard Co * Stephane Eranian <eranian@hpl.hp.com> * David Mosberger-Tang <davidm@hpl.hp.com> */#include <linux/config.h>#include <linux/kernel.h>#include <linux/sched.h>#include <linux/interrupt.h>#include <linux/smp_lock.h>#include <linux/proc_fs.h>#include <linux/init.h>#include <linux/vmalloc.h>#include <linux/wrapper.h>#include <linux/mm.h>#include <asm/bitops.h>#include <asm/errno.h>#include <asm/page.h>#include <asm/pal.h>#include <asm/perfmon.h>#include <asm/processor.h>#include <asm/signal.h>#include <asm/system.h>#include <asm/uaccess.h>#include <asm/delay.h> /* for ia64_get_itc() */#ifdef CONFIG_PERFMON/* * For PMUs which rely on the debug registers for some features, you must * you must enable the following flag to activate the support for * accessing the registers via the perfmonctl() interface. */#ifdef CONFIG_ITANIUM#define PFM_PMU_USES_DBR 1#endif/* * perfmon context states */#define PFM_CTX_DISABLED 0#define PFM_CTX_ENABLED 1/* * Reset register flags */#define PFM_RELOAD_LONG_RESET 1#define PFM_RELOAD_SHORT_RESET 2/* * Misc macros and definitions */#define PMU_FIRST_COUNTER 4#define PFM_IS_DISABLED() pmu_conf.pfm_is_disabled#define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_soft_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY)#define PFM_FL_INHERIT_MASK (PFM_FL_INHERIT_NONE|PFM_FL_INHERIT_ONCE|PFM_FL_INHERIT_ALL)#define PMC_IS_IMPL(i) (i<pmu_conf.num_pmcs && pmu_conf.impl_regs[i>>6] & (1UL<< (i) %64))#define PMD_IS_IMPL(i) (i<pmu_conf.num_pmds && pmu_conf.impl_regs[4+(i>>6)] & (1UL<<(i) % 64))#define PMD_IS_COUNTING(i) (i >=0 && i < 256 && pmu_conf.counter_pmds[i>>6] & (1UL <<(i) % 64))#define PMC_IS_COUNTING(i) PMD_IS_COUNTING(i)#define IBR_IS_IMPL(k) (k<pmu_conf.num_ibrs)#define DBR_IS_IMPL(k) (k<pmu_conf.num_dbrs)#define PMC_IS_BTB(a) (((pfm_monitor_t *)(a))->pmc_es == PMU_BTB_EVENT)#define LSHIFT(x) (1UL<<(x))#define PMM(x) LSHIFT(x)#define PMC_IS_MONITOR(c) ((pmu_conf.monitor_pmcs[0] & PMM((c))) != 0)#define CTX_IS_ENABLED(c) ((c)->ctx_flags.state == PFM_CTX_ENABLED)#define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0)#define CTX_INHERIT_MODE(c) ((c)->ctx_fl_inherit)#define CTX_HAS_SMPL(c) ((c)->ctx_psb != NULL)#define CTX_USED_PMD(ctx,n) (ctx)->ctx_used_pmds[(n)>>6] |= 1UL<< ((n) % 64)#define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64)#define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64)#define CTX_USES_DBREGS(ctx) (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1)#define LOCK_CTX(ctx) spin_lock(&(ctx)->ctx_lock)#define UNLOCK_CTX(ctx) spin_unlock(&(ctx)->ctx_lock)#define SET_PMU_OWNER(t) do { pmu_owners[smp_processor_id()].owner = (t); } while(0)#define PMU_OWNER() pmu_owners[smp_processor_id()].owner#define LOCK_PFS() spin_lock(&pfm_sessions.pfs_lock)#define UNLOCK_PFS() spin_unlock(&pfm_sessions.pfs_lock)#define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0)/* * debugging */#define DBprintk(a) \ do { \ if (pfm_debug_mode >0) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \ } while (0)/* * These are some helpful architected PMC and IBR/DBR register layouts */typedef struct { unsigned long pmc_plm:4; /* privilege level mask */ unsigned long pmc_ev:1; /* external visibility */ unsigned long pmc_oi:1; /* overflow interrupt */ unsigned long pmc_pm:1; /* privileged monitor */ unsigned long pmc_ig1:1; /* reserved */ unsigned long pmc_es:8; /* event select */ unsigned long pmc_ig2:48; /* reserved */} pfm_monitor_t;/* * There is one such data structure per perfmon context. It is used to describe the * sampling buffer. It is to be shared among siblings whereas the pfm_context * is not. * Therefore we maintain a refcnt which is incremented on fork(). * This buffer is private to the kernel only the actual sampling buffer * including its header are exposed to the user. This construct allows us to * export the buffer read-write, if needed, without worrying about security * problems. */typedef struct _pfm_smpl_buffer_desc { spinlock_t psb_lock; /* protection lock */ unsigned long psb_refcnt; /* how many users for the buffer */ int psb_flags; /* bitvector of flags */ void *psb_addr; /* points to location of first entry */ unsigned long psb_entries; /* maximum number of entries */ unsigned long psb_size; /* aligned size of buffer */ unsigned long psb_index; /* next free entry slot XXX: must use the one in buffer */ unsigned long psb_entry_size; /* size of each entry including entry header */ perfmon_smpl_hdr_t *psb_hdr; /* points to sampling buffer header */ struct _pfm_smpl_buffer_desc *psb_next; /* next psb, used for rvfreeing of psb_hdr */} pfm_smpl_buffer_desc_t;#define LOCK_PSB(p) spin_lock(&(p)->psb_lock)#define UNLOCK_PSB(p) spin_unlock(&(p)->psb_lock)#define PFM_PSB_VMA 0x1 /* a VMA is describing the buffer *//* * This structure is initialized at boot time and contains * a description of the PMU main characteristic as indicated * by PAL */typedef struct { unsigned long pfm_is_disabled; /* indicates if perfmon is working properly */ unsigned long perf_ovfl_val; /* overflow value for generic counters */ unsigned long max_counters; /* upper limit on counter pair (PMC/PMD) */ unsigned long num_pmcs ; /* highest PMC implemented (may have holes) */ unsigned long num_pmds; /* highest PMD implemented (may have holes) */ unsigned long impl_regs[16]; /* buffer used to hold implememted PMC/PMD mask */ unsigned long num_ibrs; /* number of instruction debug registers */ unsigned long num_dbrs; /* number of data debug registers */ unsigned long monitor_pmcs[4]; /* which pmc are controlling monitors */ unsigned long counter_pmds[4]; /* which pmd are used as counters */} pmu_config_t;/* * 64-bit software counter structure */typedef struct { u64 val; /* virtual 64bit counter value */ u64 ival; /* initial value from user */ u64 long_reset; /* reset value on sampling overflow */ u64 short_reset;/* reset value on overflow */ u64 reset_pmds[4]; /* which other pmds to reset when this counter overflows */ int flags; /* notify/do not notify */} pfm_counter_t;/* * perfmon context. One per process, is cloned on fork() depending on * inheritance flags */typedef struct { unsigned int state:1; /* 0=disabled, 1=enabled */ unsigned int inherit:2; /* inherit mode */ unsigned int block:1; /* when 1, task will blocked on user notifications */ unsigned int system:1; /* do system wide monitoring */ unsigned int frozen:1; /* pmu must be kept frozen on ctxsw in */ unsigned int protected:1; /* allow access to creator of context only */ unsigned int using_dbreg:1; /* using range restrictions (debug registers) */ unsigned int reserved:24;} pfm_context_flags_t;/* * perfmon context: encapsulates all the state of a monitoring session * XXX: probably need to change layout */typedef struct pfm_context { pfm_smpl_buffer_desc_t *ctx_psb; /* sampling buffer, if any */ unsigned long ctx_smpl_vaddr; /* user level virtual address of smpl buffer */ spinlock_t ctx_lock; pfm_context_flags_t ctx_flags; /* block/noblock */ struct task_struct *ctx_notify_task; /* who to notify on overflow */ struct task_struct *ctx_owner; /* pid of creator (debug) */ unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */ unsigned long ctx_smpl_regs[4]; /* which registers to record on overflow */ struct semaphore ctx_restart_sem; /* use for blocking notification mode */ unsigned long ctx_used_pmds[4]; /* bitmask of used PMD (speedup ctxsw) */ unsigned long ctx_saved_pmcs[4]; /* bitmask of PMC to save on ctxsw */ unsigned long ctx_reload_pmcs[4]; /* bitmask of PMC to reload on ctxsw (SMP) */ unsigned long ctx_used_ibrs[4]; /* bitmask of used IBR (speedup ctxsw) */ unsigned long ctx_used_dbrs[4]; /* bitmask of used DBR (speedup ctxsw) */ pfm_counter_t ctx_soft_pmds[IA64_NUM_PMD_REGS]; /* XXX: size should be dynamic */ u64 ctx_saved_psr; /* copy of psr used for lazy ctxsw */ unsigned long ctx_saved_cpus_allowed; /* copy of the task cpus_allowed (system wide) */ unsigned long ctx_cpu; /* cpu to which perfmon is applied (system wide) */ atomic_t ctx_saving_in_progress; /* flag indicating actual save in progress */ atomic_t ctx_last_cpu; /* CPU id of current or last CPU used */} pfm_context_t;#define ctx_fl_inherit ctx_flags.inherit#define ctx_fl_block ctx_flags.block#define ctx_fl_system ctx_flags.system#define ctx_fl_frozen ctx_flags.frozen#define ctx_fl_protected ctx_flags.protected#define ctx_fl_using_dbreg ctx_flags.using_dbreg/* * global information about all sessions * mostly used to synchronize between system wide and per-process */typedef struct { spinlock_t pfs_lock; /* lock the structure */ unsigned long pfs_task_sessions; /* number of per task sessions */ unsigned long pfs_sys_sessions; /* number of per system wide sessions */ unsigned long pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */ unsigned long pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */ struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */} pfm_session_t;/* * structure used to pass argument to/from remote CPU * using IPI to check and possibly save the PMU context on SMP systems. * * not used in UP kernels */typedef struct { struct task_struct *task; /* which task we are interested in */ int retval; /* return value of the call: 0=you can proceed, 1=need to wait for completion */} pfm_smp_ipi_arg_t;/* * perfmon command descriptions */typedef struct { int (*cmd_func)(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); int cmd_flags; unsigned int cmd_narg; size_t cmd_argsize;} pfm_cmd_desc_t;#define PFM_CMD_PID 0x1 /* command requires pid argument */#define PFM_CMD_ARG_READ 0x2 /* command must read argument(s) */#define PFM_CMD_ARG_WRITE 0x4 /* command must write argument(s) */#define PFM_CMD_CTX 0x8 /* command needs a perfmon context */#define PFM_CMD_NOCHK 0x10 /* command does not need to check task's state */#define PFM_CMD_IDX(cmd) (cmd)#define PFM_CMD_IS_VALID(cmd) ((PFM_CMD_IDX(cmd) >= 0) && (PFM_CMD_IDX(cmd) < PFM_CMD_COUNT) \ && pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_func != NULL)#define PFM_CMD_USE_PID(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_PID) != 0)#define PFM_CMD_READ_ARG(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_ARG_READ) != 0)#define PFM_CMD_WRITE_ARG(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_ARG_WRITE) != 0)#define PFM_CMD_USE_CTX(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_CTX) != 0)#define PFM_CMD_CHK(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_NOCHK) == 0)#define PFM_CMD_ARG_MANY -1 /* cannot be zero */#define PFM_CMD_NARG(cmd) (pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_narg)#define PFM_CMD_ARG_SIZE(cmd) (pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_argsize)/* * perfmon internal variables */static pmu_config_t pmu_conf; /* PMU configuration */static int pfm_debug_mode; /* 0= nodebug, >0= debug output on */static pfm_session_t pfm_sessions; /* global sessions information */static struct proc_dir_entry *perfmon_dir; /* for debug only */static unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */static unsigned long pfm_ovfl_intr_count; /* keep track of spurious ovfl interrupts */static unsigned long pfm_recorded_samples_count;static unsigned long reset_pmcs[IA64_NUM_PMC_REGS]; /* contains PAL reset values for PMCS */static void pfm_vm_close(struct vm_area_struct * area);static struct vm_operations_struct pfm_vm_ops={ close: pfm_vm_close};/* * keep track of task owning the PMU per CPU. */static struct { struct task_struct *owner;} ____cacheline_aligned pmu_owners[NR_CPUS];/* * forward declarations */static void ia64_reset_pmu(struct task_struct *);#ifdef CONFIG_SMPstatic void pfm_fetch_regs(int cpu, struct task_struct *task, pfm_context_t *ctx);#endifstatic void pfm_lazy_save_regs (struct task_struct *ta);static inline unsigned longpfm_read_soft_counter(pfm_context_t *ctx, int i){ return ctx->ctx_soft_pmds[i].val + (ia64_get_pmd(i) & pmu_conf.perf_ovfl_val);}static inline voidpfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val){ ctx->ctx_soft_pmds[i].val = val & ~pmu_conf.perf_ovfl_val; /* * writing to unimplemented part is ignore, so we do not need to * mask off top part */ ia64_set_pmd(i, val);}/* * finds the number of PM(C|D) registers given * the bitvector returned by PAL */static unsigned long __initfind_num_pm_regs(long *buffer){ int i=3; /* 4 words/per bitvector */ /* start from the most significant word */ while (i>=0 && buffer[i] == 0 ) i--; if (i< 0) { printk(KERN_ERR "perfmon: No bit set in pm_buffer\n"); return 0; } return 1+ ia64_fls(buffer[i]) + 64 * i;}/* * Generates a unique (per CPU) timestamp */static inline unsigned longpfm_get_stamp(void){ /* * XXX: must find something more efficient */ return ia64_get_itc();}/* Given PGD from the address space's page table, return the kernel * virtual mapping of the physical memory mapped at ADR. */static inline unsigned longuvirt_to_kva(pgd_t *pgd, unsigned long adr){ unsigned long ret = 0UL; pmd_t *pmd; pte_t *ptep, pte; if (!pgd_none(*pgd)) { pmd = pmd_offset(pgd, adr); if (!pmd_none(*pmd)) { ptep = pte_offset(pmd, adr); pte = *ptep; if (pte_present(pte)) { ret = (unsigned long) page_address(pte_page(pte)); ret |= (adr & (PAGE_SIZE - 1)); } } } DBprintk(("[%d] uv2kva(%lx-->%lx)\n", current->pid, adr, ret)); return ret;}/* Here we want the physical address of the memory. * This is used when initializing the contents of the * area and marking the pages as reserved. */static inline unsigned longpfm_kvirt_to_pa(unsigned long adr){ __u64 pa = ia64_tpa(adr); //DBprintk(("kv2pa(%lx-->%lx)\n", adr, pa)); return pa;}static void *pfm_rvmalloc(unsigned long size){ void *mem; unsigned long adr, page; mem=vmalloc(size); if (mem) { //printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem); memset(mem, 0, size); /* Clear the ram out, no junk to the user */ adr=(unsigned long) mem; while (size > 0) { page = pfm_kvirt_to_pa(adr); mem_map_reserve(virt_to_page(__va(page))); adr += PAGE_SIZE; size -= PAGE_SIZE; } } return mem;}static voidpfm_rvfree(void *mem, unsigned long size){ unsigned long adr, page = 0; if (mem) { adr=(unsigned long) mem; while (size > 0) { page = pfm_kvirt_to_pa(adr); mem_map_unreserve(virt_to_page(__va(page))); adr+=PAGE_SIZE; size-=PAGE_SIZE; } vfree(mem); } return;}/* * This function gets called from mm/mmap.c:exit_mmap() only when there is a sampling buffer * attached to the context AND the current task has a mapping for it, i.e., it is the original * creator of the context. *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -