📄 perfmon.c
字号:
/* * This file contains the code to configure and read/write the ia64 performance * monitoring stuff. * * Originaly Written by Ganesh Venkitachalam, IBM Corp. * Modifications by David Mosberger-Tang, Hewlett-Packard Co. * Modifications by Stephane Eranian, Hewlett-Packard Co. * Copyright (C) 1999 Ganesh Venkitachalam <venkitac@us.ibm.com> * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> * Copyright (C) 2000-2001 Stephane Eranian <eranian@hpl.hp.com> */#include <linux/config.h>#include <linux/kernel.h>#include <linux/sched.h>#include <linux/interrupt.h>#include <linux/smp_lock.h>#include <linux/proc_fs.h>#include <linux/init.h>#include <linux/vmalloc.h>#include <linux/wrapper.h>#include <linux/mm.h>#include <asm/bitops.h>#include <asm/efi.h>#include <asm/errno.h>#include <asm/hw_irq.h>#include <asm/page.h>#include <asm/pal.h>#include <asm/perfmon.h>#include <asm/pgtable.h>#include <asm/processor.h>#include <asm/signal.h>#include <asm/system.h>#include <asm/system.h>#include <asm/uaccess.h>#include <asm/delay.h> /* for ia64_get_itc() */#ifdef CONFIG_PERFMON#define PFM_VERSION "0.3"#define PFM_SMPL_HDR_VERSION 1#define PMU_FIRST_COUNTER 4 /* first generic counter */#define PFM_WRITE_PMCS 0xa0#define PFM_WRITE_PMDS 0xa1#define PFM_READ_PMDS 0xa2#define PFM_STOP 0xa3#define PFM_START 0xa4#define PFM_ENABLE 0xa5 /* unfreeze only */#define PFM_DISABLE 0xa6 /* freeze only */#define PFM_RESTART 0xcf#define PFM_CREATE_CONTEXT 0xa7#define PFM_DESTROY_CONTEXT 0xa8/* * Those 2 are just meant for debugging. I considered using sysctl() for * that but it is a little bit too pervasive. This solution is at least * self-contained. */#define PFM_DEBUG_ON 0xe0#define PFM_DEBUG_OFF 0xe1#define PFM_DEBUG_BASE PFM_DEBUG_ON/* * perfmon API flags */#define PFM_FL_INHERIT_NONE 0x00 /* never inherit a context across fork (default) */#define PFM_FL_INHERIT_ONCE 0x01 /* clone pfm_context only once across fork() */#define PFM_FL_INHERIT_ALL 0x02 /* always clone pfm_context across fork() */#define PFM_FL_SMPL_OVFL_NOBLOCK 0x04 /* do not block on sampling buffer overflow */#define PFM_FL_SYSTEM_WIDE 0x08 /* create a system wide context */#define PFM_FL_EXCL_INTR 0x10 /* exclude interrupt from system wide monitoring *//* * PMC API flags */#define PFM_REGFL_OVFL_NOTIFY 1 /* send notification on overflow *//* * Private flags and masks */#define PFM_FL_INHERIT_MASK (PFM_FL_INHERIT_NONE|PFM_FL_INHERIT_ONCE|PFM_FL_INHERIT_ALL)#ifdef CONFIG_SMP#define cpu_is_online(i) (cpu_online_map & (1UL << i))#else#define cpu_is_online(i) 1#endif#define PMC_IS_IMPL(i) (i < pmu_conf.num_pmcs && pmu_conf.impl_regs[i>>6] & (1<< (i&~(64-1))))#define PMD_IS_IMPL(i) (i < pmu_conf.num_pmds && pmu_conf.impl_regs[4+(i>>6)] & (1<< (i&~(64-1))))#define PMD_IS_COUNTER(i) (i>=PMU_FIRST_COUNTER && i < (PMU_FIRST_COUNTER+pmu_conf.max_counters))#define PMC_IS_COUNTER(i) (i>=PMU_FIRST_COUNTER && i < (PMU_FIRST_COUNTER+pmu_conf.max_counters))/* This is the Itanium-specific PMC layout for counter config */typedef struct { unsigned long pmc_plm:4; /* privilege level mask */ unsigned long pmc_ev:1; /* external visibility */ unsigned long pmc_oi:1; /* overflow interrupt */ unsigned long pmc_pm:1; /* privileged monitor */ unsigned long pmc_ig1:1; /* reserved */ unsigned long pmc_es:7; /* event select */ unsigned long pmc_ig2:1; /* reserved */ unsigned long pmc_umask:4; /* unit mask */ unsigned long pmc_thres:3; /* threshold */ unsigned long pmc_ig3:1; /* reserved (missing from table on p6-17) */ unsigned long pmc_ism:2; /* instruction set mask */ unsigned long pmc_ig4:38; /* reserved */} pmc_counter_reg_t;/* test for EAR/BTB configuration */#define PMU_DEAR_EVENT 0x67#define PMU_IEAR_EVENT 0x23#define PMU_BTB_EVENT 0x11#define PMC_IS_DEAR(a) (((pmc_counter_reg_t *)(a))->pmc_es == PMU_DEAR_EVENT)#define PMC_IS_IEAR(a) (((pmc_counter_reg_t *)(a))->pmc_es == PMU_IEAR_EVENT)#define PMC_IS_BTB(a) (((pmc_counter_reg_t *)(a))->pmc_es == PMU_BTB_EVENT)/* * This header is at the beginning of the sampling buffer returned to the user. * It is exported as Read-Only at this point. It is directly followed with the * first record. */typedef struct { int hdr_version; /* could be used to differentiate formats */ int hdr_reserved; unsigned long hdr_entry_size; /* size of one entry in bytes */ unsigned long hdr_count; /* how many valid entries */ unsigned long hdr_pmds; /* which pmds are recorded */} perfmon_smpl_hdr_t;/* * Header entry in the buffer as a header as follows. * The header is directly followed with the PMDS to saved in increasing index order: * PMD4, PMD5, .... How many PMDs are present is determined by the tool which must * keep track of it when generating the final trace file. */typedef struct { int pid; /* identification of process */ int cpu; /* which cpu was used */ unsigned long rate; /* initial value of this counter */ unsigned long stamp; /* timestamp */ unsigned long ip; /* where did the overflow interrupt happened */ unsigned long regs; /* which registers overflowed (up to 64)*/} perfmon_smpl_entry_t;/* * There is one such data structure per perfmon context. It is used to describe the * sampling buffer. It is to be shared among siblings whereas the pfm_context isn't. * Therefore we maintain a refcnt which is incremented on fork(). * This buffer is private to the kernel only the actual sampling buffer including its * header are exposed to the user. This construct allows us to export the buffer read-write, * if needed, without worrying about security problems. */typedef struct { atomic_t psb_refcnt; /* how many users for the buffer */ int reserved; void *psb_addr; /* points to location of first entry */ unsigned long psb_entries; /* maximum number of entries */ unsigned long psb_size; /* aligned size of buffer */ unsigned long psb_index; /* next free entry slot */ unsigned long psb_entry_size; /* size of each entry including entry header */ perfmon_smpl_hdr_t *psb_hdr; /* points to sampling buffer header */} pfm_smpl_buffer_desc_t;/* * This structure is initialized at boot time and contains * a description of the PMU main characteristic as indicated * by PAL */typedef struct { unsigned long pfm_is_disabled; /* indicates if perfmon is working properly */ unsigned long perf_ovfl_val; /* overflow value for generic counters */ unsigned long max_counters; /* upper limit on counter pair (PMC/PMD) */ unsigned long num_pmcs ; /* highest PMC implemented (may have holes) */ unsigned long num_pmds; /* highest PMD implemented (may have holes) */ unsigned long impl_regs[16]; /* buffer used to hold implememted PMC/PMD mask */} pmu_config_t;#define PERFMON_IS_DISABLED() pmu_conf.pfm_is_disabledtypedef struct { __u64 val; /* virtual 64bit counter value */ __u64 ival; /* initial value from user */ __u64 smpl_rval; /* reset value on sampling overflow */ __u64 ovfl_rval; /* reset value on overflow */ int flags; /* notify/do not notify */} pfm_counter_t;#define PMD_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY)/* * perfmon context. One per process, is cloned on fork() depending on inheritance flags */typedef struct { unsigned int inherit:2; /* inherit mode */ unsigned int noblock:1; /* block/don't block on overflow with notification */ unsigned int system:1; /* do system wide monitoring */ unsigned int frozen:1; /* pmu must be kept frozen on ctxsw in */ unsigned int exclintr:1;/* exlcude interrupts from system wide monitoring */ unsigned int reserved:26;} pfm_context_flags_t;typedef struct pfm_context { pfm_smpl_buffer_desc_t *ctx_smpl_buf; /* sampling buffer descriptor, if any */ unsigned long ctx_dear_counter; /* which PMD holds D-EAR */ unsigned long ctx_iear_counter; /* which PMD holds I-EAR */ unsigned long ctx_btb_counter; /* which PMD holds BTB */ spinlock_t ctx_notify_lock; pfm_context_flags_t ctx_flags; /* block/noblock */ int ctx_notify_sig; /* XXX: SIGPROF or other */ struct task_struct *ctx_notify_task; /* who to notify on overflow */ struct task_struct *ctx_creator; /* pid of creator (debug) */ unsigned long ctx_ovfl_regs; /* which registers just overflowed (notification) */ unsigned long ctx_smpl_regs; /* which registers to record on overflow */ struct semaphore ctx_restart_sem; /* use for blocking notification mode */ unsigned long ctx_used_pmds[4]; /* bitmask of used PMD (speedup ctxsw) */ unsigned long ctx_used_pmcs[4]; /* bitmask of used PMC (speedup ctxsw) */ pfm_counter_t ctx_pmds[IA64_NUM_PMD_COUNTERS]; /* XXX: size should be dynamic */} pfm_context_t;#define CTX_USED_PMD(ctx,n) (ctx)->ctx_used_pmds[(n)>>6] |= 1<< ((n) % 64)#define CTX_USED_PMC(ctx,n) (ctx)->ctx_used_pmcs[(n)>>6] |= 1<< ((n) % 64)#define ctx_fl_inherit ctx_flags.inherit#define ctx_fl_noblock ctx_flags.noblock#define ctx_fl_system ctx_flags.system#define ctx_fl_frozen ctx_flags.frozen#define ctx_fl_exclintr ctx_flags.exclintr#define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_noblock == 1)#define CTX_INHERIT_MODE(c) ((c)->ctx_fl_inherit)#define CTX_HAS_SMPL(c) ((c)->ctx_smpl_buf != NULL)static pmu_config_t pmu_conf;/* for debug only */static int pfm_debug=0; /* 0= nodebug, >0= debug output on */#define DBprintk(a) \ do { \ if (pfm_debug >0) { printk(__FUNCTION__" %d: ", __LINE__); printk a; } \ } while (0);static void ia64_reset_pmu(void);/* * structure used to pass information between the interrupt handler * and the tasklet. */typedef struct { pid_t to_pid; /* which process to notify */ pid_t from_pid; /* which process is source of overflow */ int sig; /* with which signal */ unsigned long bitvect; /* which counters have overflowed */} notification_info_t;typedef struct { unsigned long pfs_proc_sessions; unsigned long pfs_sys_session; /* can only be 0/1 */ unsigned long pfs_dfl_dcr; /* XXX: hack */ unsigned int pfs_pp;} pfm_session_t;struct { struct task_struct *owner;} ____cacheline_aligned pmu_owners[NR_CPUS];/* * helper macros */#define SET_PMU_OWNER(t) do { pmu_owners[smp_processor_id()].owner = (t); } while(0);#define PMU_OWNER() pmu_owners[smp_processor_id()].owner#ifdef CONFIG_SMP#define PFM_CAN_DO_LAZY() (smp_num_cpus==1 && pfs_info.pfs_sys_session==0)#else#define PFM_CAN_DO_LAZY() (pfs_info.pfs_sys_session==0)#endifstatic void pfm_lazy_save_regs (struct task_struct *ta);/* for debug only */static struct proc_dir_entry *perfmon_dir;/* * XXX: hack to indicate that a system wide monitoring session is active */static pfm_session_t pfs_info;/* * finds the number of PM(C|D) registers given * the bitvector returned by PAL */static unsigned long __initfind_num_pm_regs(long *buffer){ int i=3; /* 4 words/per bitvector */ /* start from the most significant word */ while (i>=0 && buffer[i] == 0 ) i--; if (i< 0) { printk(KERN_ERR "perfmon: No bit set in pm_buffer\n"); return 0; } return 1+ ia64_fls(buffer[i]) + 64 * i;}/* * Generates a unique (per CPU) timestamp */static inline unsigned longperfmon_get_stamp(void){ /* * XXX: maybe find something more efficient */ return ia64_get_itc();}/* Given PGD from the address space's page table, return the kernel * virtual mapping of the physical memory mapped at ADR. */static inline unsigned longuvirt_to_kva(pgd_t *pgd, unsigned long adr){ unsigned long ret = 0UL; pmd_t *pmd; pte_t *ptep, pte; if (!pgd_none(*pgd)) { pmd = pmd_offset(pgd, adr); if (!pmd_none(*pmd)) { ptep = pte_offset(pmd, adr); pte = *ptep; if (pte_present(pte)) { ret = (unsigned long) page_address(pte_page(pte)); ret |= (adr & (PAGE_SIZE - 1)); } } } DBprintk(("uv2kva(%lx-->%lx)\n", adr, ret)); return ret;}/* Here we want the physical address of the memory. * This is used when initializing the contents of the * area and marking the pages as reserved. */static inline unsigned longkvirt_to_pa(unsigned long adr){ __u64 pa = ia64_tpa(adr); DBprintk(("kv2pa(%lx-->%lx)\n", adr, pa)); return pa;}static void *rvmalloc(unsigned long size){ void *mem; unsigned long adr, page; /* XXX: may have to revisit this part because * vmalloc() does not necessarily return a page-aligned buffer. * This maybe a security problem when mapped at user level */ mem=vmalloc(size); if (mem) { memset(mem, 0, size); /* Clear the ram out, no junk to the user */ adr=(unsigned long) mem; while (size > 0) { page = kvirt_to_pa(adr); mem_map_reserve(virt_to_page(__va(page))); adr+=PAGE_SIZE; size-=PAGE_SIZE; } } return mem;}static voidrvfree(void *mem, unsigned long size){ unsigned long adr, page; if (mem) { adr=(unsigned long) mem; while (size > 0) { page = kvirt_to_pa(adr); mem_map_unreserve(virt_to_page(__va(page))); adr+=PAGE_SIZE; size-=PAGE_SIZE; } vfree(mem); }}static pfm_context_t *pfm_context_alloc(void){ pfm_context_t *pfc; /* allocate context descriptor */ pfc = vmalloc(sizeof(*pfc)); if (pfc) memset(pfc, 0, sizeof(*pfc)); return pfc;}static voidpfm_context_free(pfm_context_t *pfc){ if (pfc) vfree(pfc);}static intpfm_remap_buffer(unsigned long buf, unsigned long addr, unsigned long size){ unsigned long page; while (size > 0) { page = kvirt_to_pa(buf); if (remap_page_range(addr, page, PAGE_SIZE, PAGE_SHARED)) return -ENOMEM; addr += PAGE_SIZE; buf += PAGE_SIZE; size -= PAGE_SIZE; } return 0;}/* * counts the number of PMDS to save per entry. * This code is generic enough to accomodate more than 64 PMDS when they become available */static unsigned longpfm_smpl_entry_size(unsigned long *which, unsigned long size){ unsigned long res = 0; int i; for (i=0; i < size; i++, which++) res += hweight64(*which); DBprintk((" res=%ld\n", res)); return res;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -