📄 perfmon.c
字号:
* Generates a unique (per CPU) timestamp */static inline unsigned longpfm_get_stamp(void){ /* * XXX: must find something more efficient */ return ia64_get_itc();}/* Given PGD from the address space's page table, return the kernel * virtual mapping of the physical memory mapped at ADR. */static inline unsigned longuvirt_to_kva(pgd_t *pgd, unsigned long adr){ unsigned long ret = 0UL; pmd_t *pmd; pte_t *ptep, pte; if (!pgd_none(*pgd)) { pmd = pmd_offset(pgd, adr); if (!pmd_none(*pmd)) { ptep = pte_offset(pmd, adr); pte = *ptep; if (pte_present(pte)) { ret = (unsigned long) page_address(pte_page(pte)); ret |= (adr & (PAGE_SIZE - 1)); } } } DBprintk(("[%d] uv2kva(%lx-->%lx)\n", current->pid, adr, ret)); return ret;}/* Here we want the physical address of the memory. * This is used when initializing the contents of the * area and marking the pages as reserved. */static inline unsigned longpfm_kvirt_to_pa(unsigned long adr){ __u64 pa = ia64_tpa(adr); //DBprintk(("kv2pa(%lx-->%lx)\n", adr, pa)); return pa;}static void *pfm_rvmalloc(unsigned long size){ void *mem; unsigned long adr, page; mem=vmalloc(size); if (mem) { //printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem); memset(mem, 0, size); /* Clear the ram out, no junk to the user */ adr=(unsigned long) mem; while (size > 0) { page = pfm_kvirt_to_pa(adr); mem_map_reserve(virt_to_page(__va(page))); adr += PAGE_SIZE; size -= PAGE_SIZE; } } return mem;}static voidpfm_rvfree(void *mem, unsigned long size){ unsigned long adr, page = 0; if (mem) { adr=(unsigned long) mem; while (size > 0) { page = pfm_kvirt_to_pa(adr); mem_map_unreserve(virt_to_page(__va(page))); adr+=PAGE_SIZE; size-=PAGE_SIZE; } vfree(mem); } return;}/* * This function gets called from mm/mmap.c:exit_mmap() only when there is a sampling buffer * attached to the context AND the current task has a mapping for it, i.e., it is the original * creator of the context. * * This function is used to remember the fact that the vma describing the sampling buffer * has now been removed. It can only be called when no other tasks share the same mm context. * */static void pfm_vm_close(struct vm_area_struct *vma){ pfm_smpl_buffer_desc_t *psb = (pfm_smpl_buffer_desc_t *)vma->vm_private_data; if (psb == NULL) { printk("perfmon: psb is null in [%d]\n", current->pid); return; } /* * Add PSB to list of buffers to free on release_thread() when no more users * * This call is safe because, once the count is zero is cannot be modified anymore. * This is not because there is no more user of the mm context, that the sampling * buffer is not being used anymore outside of this task. In fact, it can still * be accessed from within the kernel by another task (such as the monitored task). * * Therefore, we only move the psb into the list of buffers to free when we know * nobody else is using it. * The linked list if independent of the perfmon context, because in the case of * multi-threaded processes, the last thread may not have been involved with * monitoring however it will be the one removing the vma and it should therefore * also remove the sampling buffer. This buffer cannot be removed until the vma * is removed. * * This function cannot remove the buffer from here, because exit_mmap() must first * complete. Given that there is no other vma related callback in the generic code, * we have created our own with the linked list of sampling buffers to free. The list * is part of the thread structure. In release_thread() we check if the list is * empty. If not we call into perfmon to free the buffer and psb. That is the only * way to ensure a safe deallocation of the sampling buffer which works when * the buffer is shared between distinct processes or with multi-threaded programs. * * We need to lock the psb because the refcnt test and flag manipulation must * looked like an atomic operation vis a vis pfm_context_exit() */ LOCK_PSB(psb); if (psb->psb_refcnt == 0) { psb->psb_next = current->thread.pfm_smpl_buf_list; current->thread.pfm_smpl_buf_list = psb; DBprintk(("[%d] add smpl @%p size %lu to smpl_buf_list psb_flags=0x%x\n", current->pid, psb->psb_hdr, psb->psb_size, psb->psb_flags)); } DBprintk(("[%d] clearing psb_flags=0x%x smpl @%p size %lu\n", current->pid, psb->psb_flags, psb->psb_hdr, psb->psb_size)); /* * decrement the number vma for the buffer */ psb->psb_flags &= ~PSB_HAS_VMA; UNLOCK_PSB(psb);}/* * This function is called from pfm_destroy_context() and also from pfm_inherit() * to explicitely remove the sampling buffer mapping from the user level address space. */static intpfm_remove_smpl_mapping(struct task_struct *task){ pfm_context_t *ctx = task->thread.pfm_context; pfm_smpl_buffer_desc_t *psb; int r; /* * some sanity checks first */ if (ctx == NULL || task->mm == NULL || ctx->ctx_smpl_vaddr == 0 || ctx->ctx_psb == NULL) { printk("perfmon: invalid context mm=%p\n", task->mm); return -1; } psb = ctx->ctx_psb; down_write(&task->mm->mmap_sem); r = do_munmap(task->mm, ctx->ctx_smpl_vaddr, psb->psb_size); up_write(&task->mm->mmap_sem); if (r !=0) { printk("perfmon: pid %d unable to unmap sampling buffer @0x%lx size=%ld\n", task->pid, ctx->ctx_smpl_vaddr, psb->psb_size); } DBprintk(("[%d] do_unmap(0x%lx, %ld)=%d refcnt=%lu psb_flags=0x%x\n", task->pid, ctx->ctx_smpl_vaddr, psb->psb_size, r, psb->psb_refcnt, psb->psb_flags)); return 0;}static pfm_context_t *pfm_context_alloc(void){ pfm_context_t *ctx; /* allocate context descriptor */ ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL); if (ctx) memset(ctx, 0, sizeof(pfm_context_t)); return ctx;}static voidpfm_context_free(pfm_context_t *ctx){ if (ctx) kfree(ctx);}static intpfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long addr, unsigned long size){ unsigned long page; DBprintk(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size)); while (size > 0) { page = pfm_kvirt_to_pa(buf); if (remap_page_range(vma, addr, page, PAGE_SIZE, PAGE_READONLY)) return -ENOMEM; addr += PAGE_SIZE; buf += PAGE_SIZE; size -= PAGE_SIZE; } return 0;}/* * counts the number of PMDS to save per entry. * This code is generic enough to accomodate more than 64 PMDS when they become available */static unsigned longpfm_smpl_entry_size(unsigned long *which, unsigned long size){ unsigned long res = 0; int i; for (i=0; i < size; i++, which++) res += hweight64(*which); DBprintk(("weight=%ld\n", res)); return res;}/* * Allocates the sampling buffer and remaps it into caller's address space */static intpfm_smpl_buffer_alloc(pfm_context_t *ctx, unsigned long *which_pmds, unsigned long entries, void **user_vaddr){ struct mm_struct *mm = current->mm; struct vm_area_struct *vma = NULL; unsigned long size, regcount; void *smpl_buf; pfm_smpl_buffer_desc_t *psb; /* note that regcount might be 0, in this case only the header for each * entry will be recorded. */ regcount = pfm_smpl_entry_size(which_pmds, 1); if ((sizeof(perfmon_smpl_hdr_t)+ entries*sizeof(perfmon_smpl_entry_t)) <= entries) { DBprintk(("requested entries %lu is too big\n", entries)); return -EINVAL; } /* * 1 buffer hdr and for each entry a header + regcount PMDs to save */ size = PAGE_ALIGN( sizeof(perfmon_smpl_hdr_t) + entries * (sizeof(perfmon_smpl_entry_t) + regcount*sizeof(u64))); DBprintk(("sampling buffer size=%lu bytes\n", size)); /* * check requested size to avoid Denial-of-service attacks * XXX: may have to refine this test * Check against address space limit. * * if ((mm->total_vm << PAGE_SHIFT) + len> current->rlim[RLIMIT_AS].rlim_cur) * return -ENOMEM; */ if (size > current->rlim[RLIMIT_MEMLOCK].rlim_cur) return -EAGAIN; /* * We do the easy to undo allocations first. * * pfm_rvmalloc(), clears the buffer, so there is no leak */ smpl_buf = pfm_rvmalloc(size); if (smpl_buf == NULL) { DBprintk(("Can't allocate sampling buffer\n")); return -ENOMEM; } DBprintk(("smpl_buf @%p\n", smpl_buf)); /* allocate sampling buffer descriptor now */ psb = kmalloc(sizeof(*psb), GFP_KERNEL); if (psb == NULL) { DBprintk(("Can't allocate sampling buffer descriptor\n")); goto error_kmalloc; } /* allocate vma */ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); if (!vma) { DBprintk(("Cannot allocate vma\n")); goto error_kmem; } /* * partially initialize the vma for the sampling buffer * * The VM_DONTCOPY flag is very important as it ensures that the mapping * will never be inherited for any child process (via fork()) which is always * what we want. */ vma->vm_mm = mm; vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED|VM_DONTCOPY; vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ vma->vm_ops = &pfm_vm_ops; /* necesarry to get the close() callback */ vma->vm_pgoff = 0; vma->vm_file = NULL; vma->vm_raend = 0; vma->vm_private_data = psb; /* information needed by the pfm_vm_close() function */ /* * Now we have everything we need and we can initialize * and connect all the data structures */ psb->psb_hdr = smpl_buf; psb->psb_addr = ((char *)smpl_buf)+sizeof(perfmon_smpl_hdr_t); /* first entry */ psb->psb_size = size; /* aligned size */ psb->psb_index = 0; psb->psb_entries = entries; psb->psb_refcnt = 1; psb->psb_flags = PSB_HAS_VMA; spin_lock_init(&psb->psb_lock); /* * XXX: will need to do cacheline alignment to avoid false sharing in SMP mode and * multitask monitoring. */ psb->psb_entry_size = sizeof(perfmon_smpl_entry_t) + regcount*sizeof(u64); DBprintk(("psb @%p entry_size=%ld hdr=%p addr=%p refcnt=%lu psb_flags=0x%x\n", (void *)psb,psb->psb_entry_size, (void *)psb->psb_hdr, (void *)psb->psb_addr, psb->psb_refcnt, psb->psb_flags)); /* initialize some of the fields of user visible buffer header */ psb->psb_hdr->hdr_version = PFM_SMPL_VERSION; psb->psb_hdr->hdr_entry_size = psb->psb_entry_size; psb->psb_hdr->hdr_pmds[0] = which_pmds[0]; /* * Let's do the difficult operations next. * * now we atomically find some area in the address space and * remap the buffer in it. */ down_write(¤t->mm->mmap_sem); /* find some free area in address space, must have mmap sem held */ vma->vm_start = get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS); if (vma->vm_start == 0UL) { DBprintk(("Cannot find unmapped area for size %ld\n", size)); up_write(¤t->mm->mmap_sem); goto error; } vma->vm_end = vma->vm_start + size; DBprintk(("entries=%ld aligned size=%ld, unmapped @0x%lx\n", entries, size, vma->vm_start)); /* can only be applied to current, need to have the mm semaphore held when called */ if (pfm_remap_buffer(vma, (unsigned long)smpl_buf, vma->vm_start, size)) { DBprintk(("Can't remap buffer\n")); up_write(¤t->mm->mmap_sem); goto error; } /* * now insert the vma in the vm list for the process, must be * done with mmap lock held */ insert_vm_struct(mm, vma); mm->total_vm += size >> PAGE_SHIFT; up_write(¤t->mm->mmap_sem); /* store which PMDS to record */ ctx->ctx_smpl_regs[0] = which_pmds[0]; /* link to perfmon context */ ctx->ctx_psb = psb; /* * keep track of user level virtual address */ ctx->ctx_smpl_vaddr = *(unsigned long *)user_vaddr = vma->vm_start; return 0;error: kmem_cache_free(vm_area_cachep, vma);error_kmem: kfree(psb);error_kmalloc: pfm_rvfree(smpl_buf, size); return -ENOMEM;}static intpfm_reserve_session(struct task_struct *task, int is_syswide, unsigned long cpu_mask){ unsigned long m, undo_mask; unsigned int n, i; /* * validy checks on cpu_mask have been done upstream */ LOCK_PFS(); if (is_syswide) { /* * cannot mix system wide and per-task sessions */ if (pfm_sessions.pfs_task_sessions > 0UL) { DBprintk(("system wide not possible, %u conflicting task_sessions\n", pfm_sessions.pfs_task_sessions)); goto abort; } m = cpu_mask; undo_mask = 0UL; n = 0; for(i=0; m; i++, m>>=1) { if ((m & 0x1) == 0UL) continue; if (pfm_sessions.pfs_sys_session[i]) goto undo; DBprintk(("reserving CPU%d currently on CPU%d\n", i, smp_processor_id())); pfm_sessions.pfs_sys_session[i] = task; undo_mask |= 1UL << i; n++; } pfm_sessions.pfs_sys_sessions += n; } else { if (pfm_sessions.pfs_sys_sessions) goto abort; pfm_sessions.pfs_task_sessions++; } UNLOCK_PFS(); return 0;undo: DBprintk(("system wide not possible, conflicting session [%d] on CPU%d\n", pfm_sessions.pfs_sys_session[i]->pid, i)); for(i=0; undo_mask; i++, undo_mask >>=1) { pfm_sessions.pfs_sys_session[i] = NULL; }abort:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -