📄 perf.c
字号:
((dtlb_addr >> 8)&0x0000ffff); cuda_images[TLBHANDMISS][17] = ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000; cuda_images[BIG_CPI][16] = (cuda_images[BIG_CPI][16]&0xffff0000) | ((dtlb_addr >> 8)&0x0000ffff); cuda_images[BIG_CPI][17] = ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); cuda_images[BIG_CPI][18] = (itlb_addr << 16)&0xffff0000; } else { /* Unknown type */ }#endif}/* * ioctl routine * All routines effect the processor that they are executed on. Thus you * must be running on the processor that you wish to change. */static int perf_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg){ long error_start; uint32_t raddr[4]; switch (cmd) { case PA_PERF_ON: /* Start the counters */ perf_start_counters(); return 0; case PA_PERF_OFF: error_start = perf_stop_counters(raddr); if (error_start != 0) { printk(KERN_ERR "perf_off: perf_stop_counters = %ld\n", error_start); return -EFAULT; } /* copy out the Counters */ if (copy_to_user((void *)arg, raddr, sizeof (raddr)) != 0) { return -EFAULT; } return 0; case PA_PERF_VERSION: /* Return the version # */ return put_user(PERF_VERSION, (int *)arg); default: break; } return -ENOTTY;}static struct file_operations perf_fops = { llseek: no_llseek, read: perf_read, write: perf_write, ioctl: perf_ioctl, open: perf_open, release: perf_release}; static struct miscdevice perf_dev = { MISC_DYNAMIC_MINOR, PA_PERF_DEV, &perf_fops};/* * Initialize the module */static int __init perf_init(void){ /* Determine correct processor interface to use */ bitmask_array = perf_bitmasks; if (boot_cpu_data.cpu_type == pcxu || boot_cpu_data.cpu_type == pcxu_) { perf_processor_interface = ONYX_INTF; } else if (boot_cpu_data.cpu_type == pcxw || boot_cpu_data.cpu_type == pcxw_ || boot_cpu_data.cpu_type == pcxw2) { perf_processor_interface = CUDA_INTF; if (boot_cpu_data.cpu_type == pcxw2) bitmask_array = perf_bitmasks_piranha; } else { perf_processor_interface = UNKNOWN_INTF; printk("Performance monitoring counters not supported on this processor\n"); return -ENODEV; } /* Patch the images to match the system */ perf_patch_images(); spin_lock_init(&perf_lock); misc_register(&perf_dev); /* TODO: this only lets us access the first cpu.. what to do for SMP? */ cpu_device = cpu_data[0].dev; printk("Performance monitoring counters enabled for %s\n", cpu_data[0].dev->name); return 0;}/* * perf_start_counters(void) * * Start the counters. */static void perf_start_counters(void){ /* Enable performance monitor counters */ perf_intrigue_enable_perf_counters();}/* * perf_stop_counters * * Stop the performance counters and save counts * in a per_processor array. */static int perf_stop_counters(uint32_t *raddr){ uint64_t userbuf[MAX_RDR_WORDS]; /* Disable performance counters */ perf_intrigue_disable_perf_counters(); if (perf_processor_interface == ONYX_INTF) { uint64_t tmp64; /* * Read the counters */ if (!perf_rdr_read_ubuf(16, userbuf)) return -13; /* Counter0 is bits 1398 thru 1429 */ tmp64 = (userbuf[21] << 22) & 0x00000000ffc00000; tmp64 |= (userbuf[22] >> 42) & 0x00000000003fffff; /* OR sticky0 (bit 1430) to counter0 bit 32 */ tmp64 |= (userbuf[22] >> 10) & 0x0000000080000000; raddr[0] = (uint32_t)tmp64; /* Counter1 is bits 1431 thru 1462 */ tmp64 = (userbuf[22] >> 9) & 0x00000000ffffffff; /* OR sticky1 (bit 1463) to counter1 bit 32 */ tmp64 |= (userbuf[22] << 23) & 0x0000000080000000; raddr[1] = (uint32_t)tmp64; /* Counter2 is bits 1464 thru 1495 */ tmp64 = (userbuf[22] << 24) & 0x00000000ff000000; tmp64 |= (userbuf[23] >> 40) & 0x0000000000ffffff; /* OR sticky2 (bit 1496) to counter2 bit 32 */ tmp64 |= (userbuf[23] >> 8) & 0x0000000080000000; raddr[2] = (uint32_t)tmp64; /* Counter3 is bits 1497 thru 1528 */ tmp64 = (userbuf[23] >> 7) & 0x00000000ffffffff; /* OR sticky3 (bit 1529) to counter3 bit 32 */ tmp64 |= (userbuf[23] << 25) & 0x0000000080000000; raddr[3] = (uint32_t)tmp64; /* * Zero out the counters */ /* * The counters and sticky-bits comprise the last 132 bits * (1398 - 1529) of RDR16 on a U chip. We'll zero these * out the easy way: zero out last 10 bits of dword 21, * all of dword 22 and 58 bits (plus 6 don't care bits) of * dword 23. */ userbuf[21] &= 0xfffffffffffffc00; /* 0 to last 10 bits */ userbuf[22] = 0; userbuf[23] = 0; /* * Write back the zero'ed bytes + the image given * the read was destructive. */ perf_rdr_write(16, userbuf); } else { /* * Read RDR-15 which contains the counters and sticky bits */ if (!perf_rdr_read_ubuf(15, userbuf)) { return -13; } /* * Clear out the counters */ perf_rdr_clear(15); /* * Copy the counters */ raddr[0] = (uint32_t)((userbuf[0] >> 32) & 0x00000000ffffffffUL); raddr[1] = (uint32_t)(userbuf[0] & 0x00000000ffffffffUL); raddr[2] = (uint32_t)((userbuf[1] >> 32) & 0x00000000ffffffffUL); raddr[3] = (uint32_t)(userbuf[1] & 0x00000000ffffffffUL); } return 0;}/* * perf_rdr_get_entry * * Retrieve a pointer to the description of what this * RDR contains. */static struct rdr_tbl_ent * perf_rdr_get_entry(uint32_t rdr_num){ if (perf_processor_interface == ONYX_INTF) { return &perf_rdr_tbl_U[rdr_num]; } else { return &perf_rdr_tbl_W[rdr_num]; }}/* * perf_rdr_read_ubuf * * Read the RDR value into the buffer specified. */static int perf_rdr_read_ubuf(uint32_t rdr_num, uint64_t *buffer){ uint64_t data, data_mask = 0; uint32_t width, xbits, i; struct rdr_tbl_ent *tentry; tentry = perf_rdr_get_entry(rdr_num); if ((width = tentry->width) == 0) return 0; /* Clear out buffer */ i = tentry->num_words; while (i--) { buffer[i] = 0; } /* Check for bits an even number of 64 */ if ((xbits = width & 0x03f) != 0) { data_mask = 1; data_mask <<= (64 - xbits); data_mask--; } /* Grab all of the data */ i = tentry->num_words; while (i--) { if (perf_processor_interface == ONYX_INTF) { data = perf_rdr_shift_in_U(rdr_num, width); } else { data = perf_rdr_shift_in_W(rdr_num, width); } if (xbits) { buffer[i] |= (data << (64 - xbits)); if (i) { buffer[i-1] |= ((data >> xbits) & data_mask); } } else { buffer[i] = data; } } return 1;}/* * perf_rdr_clear * * Zero out the given RDR register */static int perf_rdr_clear(uint32_t rdr_num){ struct rdr_tbl_ent *tentry; int32_t i; tentry = perf_rdr_get_entry(rdr_num); if (tentry->width == 0) { return -1; } i = tentry->num_words; while (i--) { if (perf_processor_interface == ONYX_INTF) { perf_rdr_shift_out_U(rdr_num, 0UL); } else { perf_rdr_shift_out_W(rdr_num, 0UL); } } return 0;}/* * perf_write_image * * Write the given image out to the processor */static int perf_write_image(uint64_t *memaddr){ uint64_t buffer[MAX_RDR_WORDS]; uint64_t *bptr; uint32_t dwords; uint32_t *intrigue_rdr; uint64_t *intrigue_bitmask, tmp64, proc_hpa, *ptr64; struct rdr_tbl_ent *tentry; int i; /* Clear out counters */ if (perf_processor_interface == ONYX_INTF) { perf_rdr_clear(16); /* Toggle performance monitor */ perf_intrigue_enable_perf_counters(); perf_intrigue_disable_perf_counters(); intrigue_rdr = perf_rdrs_U; } else { perf_rdr_clear(15); intrigue_rdr = perf_rdrs_W; } /* Write all RDRs */ while (*intrigue_rdr != -1) { tentry = perf_rdr_get_entry(*intrigue_rdr); perf_rdr_read_ubuf(*intrigue_rdr, buffer); bptr = &buffer[0]; dwords = tentry->num_words; if (tentry->write_control) { intrigue_bitmask = &bitmask_array[tentry->write_control >> 3]; while (dwords--) { tmp64 = *intrigue_bitmask & *memaddr++; tmp64 |= (~(*intrigue_bitmask++)) & *bptr; *bptr++ = tmp64; } } else { while (dwords--) { *bptr++ = *memaddr++; } } perf_rdr_write(*intrigue_rdr, buffer); intrigue_rdr++; } /* * Now copy out the Runway stuff which is not in RDRs */ if (cpu_device == NULL) { printk(KERN_ERR "write_image: cpu_device not yet initialized!\n"); return -1; } proc_hpa = cpu_device->hpa; /* Merge intrigue bits into Runway STATUS 0 */ ptr64 = (uint64_t *)(proc_hpa + 0x10); /* Runway STATUS 0 */ tmp64 = __raw_readq((u64 *)ptr64) & 0xffecffffffffffff; __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000), (u64 *)ptr64); /* Write RUNWAY DEBUG registers */ ptr64 = (uint64_t *)(proc_hpa + 0x40); /* Runway DEBUG 0 */ for (i = 0; i < 8; i++) { __raw_writeq(*memaddr++, (u64 *)ptr64); ptr64++; } return 0; }/* * perf_rdr_write * * Write the given RDR register with the contents * of the given buffer. */static void perf_rdr_write(uint32_t rdr_num, uint64_t *buffer){ struct rdr_tbl_ent *tentry; int32_t i;printk("perf_rdr_write\n"); tentry = perf_rdr_get_entry(rdr_num); if (tentry->width == 0) { return; } i = tentry->num_words; while (i--) { if (perf_processor_interface == ONYX_INTF) { perf_rdr_shift_out_U(rdr_num, buffer[i]); } else { perf_rdr_shift_out_W(rdr_num, buffer[i]); } }printk("perf_rdr_write done\n");}module_init(perf_init);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -