📄 perf.c
字号:
/* * Parisc performance counters * Copyright (C) 2001 Randolph Chung <tausq@debian.org> * * This code is derived, with permission, from HP/UX sources. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * Edited comment from original sources: * * This driver programs the PCX-U/PCX-W performance counters * on the PA-RISC 2.0 chips. The driver keeps all images now * internally to the kernel to hopefully eliminate the possiblity * of a bad image halting the CPU. Also, there are different * images for the PCX-W and later chips vs the PCX-U chips. * * Only 1 process is allowed to access the driver at any time, * so the only protection that is needed is at open and close. * A variable "perf_enabled" is used to hold the state of the * driver. The spinlock "perf_lock" is used to protect the * modification of the state during open/close operations so * multiple processes don't get into the driver simultaneously. * * This driver accesses the processor directly vs going through * the PDC INTRIGUE calls. This is done to eliminate bugs introduced * in various PDC revisions. The code is much more maintainable * and reliable this way vs having to debug on every version of PDC * on every box. */#include <linux/config.h>#include <linux/init.h>#include <linux/proc_fs.h>#include <linux/miscdevice.h>#include <linux/spinlock.h>#include <asm/uaccess.h>#include <asm/perf.h>#include <asm/parisc-device.h>#include <asm/processor.h>#include <asm/runway.h>#include <asm/io.h> /* for __raw_read() */#include "perf_images.h"#define MAX_RDR_WORDS 24#define PERF_VERSION 2 /* derived from hpux's PI v2 interface *//* definition of RDR regs */struct rdr_tbl_ent { uint16_t width; uint8_t num_words; uint8_t write_control;};static int perf_processor_interface = UNKNOWN_INTF;static int perf_enabled = 0;static spinlock_t perf_lock;struct parisc_device *cpu_device = NULL;/* RDRs to write for PCX-W */static int perf_rdrs_W[] = { 0, 1, 4, 5, 6, 15, 16, 17, 18, 20, 21, 22, 23, 24, 25, -1 };/* RDRs to write for PCX-U */static int perf_rdrs_U[] = { 0, 1, 4, 5, 6, 7, 16, 17, 18, 20, 21, 22, 23, 24, 25, -1 };/* RDR register descriptions for PCX-W */static struct rdr_tbl_ent perf_rdr_tbl_W[] = { { 19, 1, 8 }, /* RDR 0 */ { 16, 1, 16 }, /* RDR 1 */ { 72, 2, 0 }, /* RDR 2 */ { 81, 2, 0 }, /* RDR 3 */ { 328, 6, 0 }, /* RDR 4 */ { 160, 3, 0 }, /* RDR 5 */ { 336, 6, 0 }, /* RDR 6 */ { 164, 3, 0 }, /* RDR 7 */ { 0, 0, 0 }, /* RDR 8 */ { 35, 1, 0 }, /* RDR 9 */ { 6, 1, 0 }, /* RDR 10 */ { 18, 1, 0 }, /* RDR 11 */ { 13, 1, 0 }, /* RDR 12 */ { 8, 1, 0 }, /* RDR 13 */ { 8, 1, 0 }, /* RDR 14 */ { 8, 1, 0 }, /* RDR 15 */ { 1530, 24, 0 }, /* RDR 16 */ { 16, 1, 0 }, /* RDR 17 */ { 4, 1, 0 }, /* RDR 18 */ { 0, 0, 0 }, /* RDR 19 */ { 152, 3, 24 }, /* RDR 20 */ { 152, 3, 24 }, /* RDR 21 */ { 233, 4, 48 }, /* RDR 22 */ { 233, 4, 48 }, /* RDR 23 */ { 71, 2, 0 }, /* RDR 24 */ { 71, 2, 0 }, /* RDR 25 */ { 11, 1, 0 }, /* RDR 26 */ { 18, 1, 0 }, /* RDR 27 */ { 128, 2, 0 }, /* RDR 28 */ { 0, 0, 0 }, /* RDR 29 */ { 16, 1, 0 }, /* RDR 30 */ { 16, 1, 0 }, /* RDR 31 */};/* RDR register descriptions for PCX-U */static struct rdr_tbl_ent perf_rdr_tbl_U[] = { { 19, 1, 8 }, /* RDR 0 */ { 32, 1, 16 }, /* RDR 1 */ { 20, 1, 0 }, /* RDR 2 */ { 0, 0, 0 }, /* RDR 3 */ { 344, 6, 0 }, /* RDR 4 */ { 176, 3, 0 }, /* RDR 5 */ { 336, 6, 0 }, /* RDR 6 */ { 0, 0, 0 }, /* RDR 7 */ { 0, 0, 0 }, /* RDR 8 */ { 0, 0, 0 }, /* RDR 9 */ { 28, 1, 0 }, /* RDR 10 */ { 33, 1, 0 }, /* RDR 11 */ { 0, 0, 0 }, /* RDR 12 */ { 230, 4, 0 }, /* RDR 13 */ { 32, 1, 0 }, /* RDR 14 */ { 128, 2, 0 }, /* RDR 15 */ { 1494, 24, 0 }, /* RDR 16 */ { 18, 1, 0 }, /* RDR 17 */ { 4, 1, 0 }, /* RDR 18 */ { 0, 0, 0 }, /* RDR 19 */ { 158, 3, 24 }, /* RDR 20 */ { 158, 3, 24 }, /* RDR 21 */ { 194, 4, 48 }, /* RDR 22 */ { 194, 4, 48 }, /* RDR 23 */ { 71, 2, 0 }, /* RDR 24 */ { 71, 2, 0 }, /* RDR 25 */ { 28, 1, 0 }, /* RDR 26 */ { 33, 1, 0 }, /* RDR 27 */ { 88, 2, 0 }, /* RDR 28 */ { 32, 1, 0 }, /* RDR 29 */ { 24, 1, 0 }, /* RDR 30 */ { 16, 1, 0 }, /* RDR 31 */};/* * A non-zero write_control in the above tables is a byte offset into * this array. */static uint64_t perf_bitmasks[] = { 0x0000000000000000, /* first dbl word must be zero */ 0xfdffe00000000000, /* RDR0 bitmask */ 0x003f000000000000, /* RDR1 bitmask */ 0x00ffffffffffffff, /* RDR20-RDR21 bitmask (152 bits) */ 0xffffffffffffffff, 0xfffffffc00000000, 0xffffffffffffffff, /* RDR22-RDR23 bitmask (233 bits) */ 0xffffffffffffffff, 0xfffffffffffffffc, 0xff00000000000000};/* * Write control bitmasks for Pa-8700 processor given * somethings have changed slightly. */static uint64_t perf_bitmasks_piranha[] = { 0x0000000000000000, /* first dbl word must be zero */ 0xfdffe00000000000, /* RDR0 bitmask */ 0x003f000000000000, /* RDR1 bitmask */ 0x00ffffffffffffff, /* RDR20-RDR21 bitmask (158 bits) */ 0xffffffffffffffff, 0xfffffffc00000000, 0xffffffffffffffff, /* RDR22-RDR23 bitmask (210 bits) */ 0xffffffffffffffff, 0xffffffffffffffff, 0xfffc000000000000};static uint64_t *bitmask_array; /* array of bitmasks to use *//****************************************************************************** * Function Prototypes *****************************************************************************/static int perf_config(uint32_t *image_ptr);static int perf_release(struct inode *inode, struct file *file);static int perf_open(struct inode *inode, struct file *file);static ssize_t perf_read(struct file *file, char *buf, size_t cnt, loff_t *ppos);static ssize_t perf_write(struct file *file, const char *buf, size_t count, loff_t *ppos);static int perf_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg);static void perf_start_counters(void);static int perf_stop_counters(uint32_t *raddr);static struct rdr_tbl_ent * perf_rdr_get_entry(uint32_t rdr_num);static int perf_rdr_read_ubuf(uint32_t rdr_num, uint64_t *buffer);static int perf_rdr_clear(uint32_t rdr_num);static int perf_write_image(uint64_t *memaddr);static void perf_rdr_write(uint32_t rdr_num, uint64_t *buffer);/* External Assembly Routines */extern uint64_t perf_rdr_shift_in_W (uint32_t rdr_num, uint16_t width);extern uint64_t perf_rdr_shift_in_U (uint32_t rdr_num, uint16_t width);extern void perf_rdr_shift_out_W (uint32_t rdr_num, uint64_t buffer);extern void perf_rdr_shift_out_U (uint32_t rdr_num, uint64_t buffer);extern void perf_intrigue_enable_perf_counters (void);extern void perf_intrigue_disable_perf_counters (void);/****************************************************************************** * Function Definitions *****************************************************************************//* * configure: * * Configure the cpu with a given data image. First turn off the counters, * then download the image, then turn the counters back on. */static int perf_config(uint32_t *image_ptr){ long error; uint32_t raddr[4]; /* Stop the counters*/ error = perf_stop_counters(raddr); if (error != 0) { printk("perf_config: perf_stop_counters = %ld\n", error); return -EINVAL; }printk("Preparing to write image\n"); /* Write the image to the chip */ error = perf_write_image((uint64_t *)image_ptr); if (error != 0) { printk("perf_config: DOWNLOAD = %ld\n", error); return -EINVAL; }printk("Preparing to start counters\n"); /* Start the counters */ perf_start_counters(); return sizeof(uint32_t);}/* * Open the device and initialize all of its memory. The device is only * opened once, but can be "queried" by multiple processes that know its * file descriptor. */static int perf_open(struct inode *inode, struct file *file){ spin_lock(&perf_lock); if (perf_enabled) { spin_unlock(&perf_lock); return -EBUSY; } perf_enabled = 1; spin_unlock(&perf_lock); return 0;}/* * Close the device. */static int perf_release(struct inode *inode, struct file *file){ spin_lock(&perf_lock); perf_enabled = 0; spin_unlock(&perf_lock); return 0;}/* * Read does nothing for this driver */static ssize_t perf_read(struct file *file, char *buf, size_t cnt, loff_t *ppos){ return 0;}/* * write: * * This routine downloads the image to the chip. It must be * called on the processor that the download should happen * on. */static ssize_t perf_write(struct file *file, const char *buf, size_t count, loff_t *ppos){ int err; size_t image_size; uint32_t image_type; uint32_t interface_type; uint32_t test; if (perf_processor_interface == ONYX_INTF) image_size = PCXU_IMAGE_SIZE; else if (perf_processor_interface == CUDA_INTF) image_size = PCXW_IMAGE_SIZE; else return -EFAULT; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (count != sizeof(uint32_t)) return -EIO; if ((err = copy_from_user(&image_type, buf, sizeof(uint32_t))) != 0) return err; /* Get the interface type and test type */ interface_type = (image_type >> 16) & 0xffff; test = (image_type & 0xffff); /* Make sure everything makes sense */ /* First check the machine type is correct for the requested image */ if (((perf_processor_interface == CUDA_INTF) && (interface_type != CUDA_INTF)) || ((perf_processor_interface == ONYX_INTF) && (interface_type != ONYX_INTF))) return -EINVAL; /* Next check to make sure the requested image is valid */ if (((interface_type == CUDA_INTF) && (test >= MAX_CUDA_IMAGES)) || ((interface_type == ONYX_INTF) && (test >= MAX_ONYX_IMAGES))) return -EINVAL; /* Copy the image into the processor */ if (interface_type == CUDA_INTF) return perf_config(cuda_images[test]); else return perf_config(onyx_images[test]); return count;}/* * Patch the images that need to know the IVA addresses. */static void perf_patch_images(void){#if 0 /* FIXME!! *//* * NOTE: this routine is VERY specific to the current TLB image. * If the image is changed, this routine might also need to be changed. */ extern void $i_itlb_miss_2_0(); extern void $i_dtlb_miss_2_0(); extern void PA2_0_iva(); /* * We can only use the lower 32-bits, the upper 32-bits should be 0 * anyway given this is in the kernel */ uint32_t itlb_addr = (uint32_t)&($i_itlb_miss_2_0); uint32_t dtlb_addr = (uint32_t)&($i_dtlb_miss_2_0); uint32_t IVAaddress = (uint32_t)&PA2_0_iva; if (perf_processor_interface == ONYX_INTF) { /* clear last 2 bytes */ onyx_images[TLBMISS][15] &= 0xffffff00; /* set 2 bytes */ onyx_images[TLBMISS][15] |= (0x000000ff&((dtlb_addr) >> 24)); onyx_images[TLBMISS][16] = (dtlb_addr << 8)&0xffffff00; onyx_images[TLBMISS][17] = itlb_addr; /* clear last 2 bytes */ onyx_images[TLBHANDMISS][15] &= 0xffffff00; /* set 2 bytes */ onyx_images[TLBHANDMISS][15] |= (0x000000ff&((dtlb_addr) >> 24)); onyx_images[TLBHANDMISS][16] = (dtlb_addr << 8)&0xffffff00; onyx_images[TLBHANDMISS][17] = itlb_addr; /* clear last 2 bytes */ onyx_images[BIG_CPI][15] &= 0xffffff00; /* set 2 bytes */ onyx_images[BIG_CPI][15] |= (0x000000ff&((dtlb_addr) >> 24)); onyx_images[BIG_CPI][16] = (dtlb_addr << 8)&0xffffff00; onyx_images[BIG_CPI][17] = itlb_addr; onyx_images[PANIC][15] &= 0xffffff00; /* clear last 2 bytes */ onyx_images[PANIC][15] |= (0x000000ff&((IVAaddress) >> 24)); /* set 2 bytes */ onyx_images[PANIC][16] = (IVAaddress << 8)&0xffffff00; } else if (perf_processor_interface == CUDA_INTF) { /* Cuda interface */ cuda_images[TLBMISS][16] = (cuda_images[TLBMISS][16]&0xffff0000) | ((dtlb_addr >> 8)&0x0000ffff); cuda_images[TLBMISS][17] = ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); cuda_images[TLBMISS][18] = (itlb_addr << 16)&0xffff0000; cuda_images[TLBHANDMISS][16] = (cuda_images[TLBHANDMISS][16]&0xffff0000) | ((dtlb_addr >> 8)&0x0000ffff); cuda_images[TLBHANDMISS][17] = ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -