📄 perfmon.c
字号:
* This function is used to remember the fact that the vma describing the sampling buffer * has now been removed. It can only be called when no other tasks share the same mm context. * */static void pfm_vm_close(struct vm_area_struct *vma){ pfm_smpl_buffer_desc_t *psb = (pfm_smpl_buffer_desc_t *)vma->vm_private_data; if (psb == NULL) { printk("perfmon: psb is null in [%d]\n", current->pid); return; } /* * Add PSB to list of buffers to free on release_thread() when no more users * * This call is safe because, once the count is zero is cannot be modified anymore. * This is not because there is no more user of the mm context, that the sampling * buffer is not being used anymore outside of this task. In fact, it can still * be accessed from within the kernel by another task (such as the monitored task). * * Therefore, we only move the psb into the list of buffers to free when we know * nobody else is using it. * The linked list if independent of the perfmon context, because in the case of * multi-threaded processes, the last thread may not have been involved with * monitoring however it will be the one removing the vma and it should therefore * also remove the sampling buffer. This buffer cannot be removed until the vma * is removed. * * This function cannot remove the buffer from here, because exit_mmap() must first * complete. Given that there is no other vma related callback in the generic code, * we have created on own with the linked list of sampling buffer to free which * is part of the thread structure. In release_thread() we check if the list is * empty. If not we call into perfmon to free the buffer and psb. That is the only * way to ensure a safe deallocation of the sampling buffer which works when * the buffer is shared between distinct processes or with multi-threaded programs. * * We need to lock the psb because the refcnt test and flag manipulation must * looked like an atomic operation vis a vis pfm_context_exit() */ LOCK_PSB(psb); if (psb->psb_refcnt == 0) { psb->psb_next = current->thread.pfm_smpl_buf_list; current->thread.pfm_smpl_buf_list = psb; DBprintk(("psb for [%d] smpl @%p size %ld inserted into list\n", current->pid, psb->psb_hdr, psb->psb_size)); } DBprintk(("psb vma flag cleared for [%d] smpl @%p size %ld inserted into list\n", current->pid, psb->psb_hdr, psb->psb_size)); /* * indicate to pfm_context_exit() that the vma has been removed. */ psb->psb_flags &= ~PFM_PSB_VMA; UNLOCK_PSB(psb);}/* * This function is called from pfm_destroy_context() and also from pfm_inherit() * to explicitely remove the sampling buffer mapping from the user level address space. */static intpfm_remove_smpl_mapping(struct task_struct *task){ pfm_context_t *ctx = task->thread.pfm_context; pfm_smpl_buffer_desc_t *psb; int r; /* * some sanity checks first */ if (ctx == NULL || task->mm == NULL || ctx->ctx_smpl_vaddr == 0 || ctx->ctx_psb == NULL) { printk("perfmon: invalid context mm=%p\n", task->mm); return -1; } psb = ctx->ctx_psb; down_write(&task->mm->mmap_sem); r = do_munmap(task->mm, ctx->ctx_smpl_vaddr, psb->psb_size); up_write(&task->mm->mmap_sem); if (r !=0) { printk("perfmon: pid %d unable to unmap sampling buffer @0x%lx size=%ld\n", task->pid, ctx->ctx_smpl_vaddr, psb->psb_size); } DBprintk(("[%d] do_unmap(0x%lx, %ld)=%d\n", task->pid, ctx->ctx_smpl_vaddr, psb->psb_size, r)); /* * make sure we suppress all traces of this buffer * (important for pfm_inherit) */ ctx->ctx_smpl_vaddr = 0; return 0;}static pfm_context_t *pfm_context_alloc(void){ pfm_context_t *ctx; /* allocate context descriptor */ ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL); if (ctx) memset(ctx, 0, sizeof(pfm_context_t)); return ctx;}static voidpfm_context_free(pfm_context_t *ctx){ if (ctx) kfree(ctx);}static intpfm_remap_buffer(unsigned long buf, unsigned long addr, unsigned long size){ unsigned long page; DBprintk(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size)); while (size > 0) { page = pfm_kvirt_to_pa(buf); if (remap_page_range(addr, page, PAGE_SIZE, PAGE_SHARED)) return -ENOMEM; addr += PAGE_SIZE; buf += PAGE_SIZE; size -= PAGE_SIZE; } return 0;}/* * counts the number of PMDS to save per entry. * This code is generic enough to accomodate more than 64 PMDS when they become available */static unsigned longpfm_smpl_entry_size(unsigned long *which, unsigned long size){ unsigned long res = 0; int i; for (i=0; i < size; i++, which++) res += hweight64(*which); DBprintk(("weight=%ld\n", res)); return res;}/* * Allocates the sampling buffer and remaps it into caller's address space */static intpfm_smpl_buffer_alloc(pfm_context_t *ctx, unsigned long *which_pmds, unsigned long entries, void **user_vaddr){ struct mm_struct *mm = current->mm; struct vm_area_struct *vma = NULL; unsigned long size, regcount; void *smpl_buf; pfm_smpl_buffer_desc_t *psb; regcount = pfm_smpl_entry_size(which_pmds, 1); /* note that regcount might be 0, in this case only the header for each * entry will be recorded. */ /* * 1 buffer hdr and for each entry a header + regcount PMDs to save */ size = PAGE_ALIGN( sizeof(perfmon_smpl_hdr_t) + entries * (sizeof(perfmon_smpl_entry_t) + regcount*sizeof(u64))); /* * check requested size to avoid Denial-of-service attacks * XXX: may have to refine this test * Check against address space limit. * * if ((mm->total_vm << PAGE_SHIFT) + len> current->rlim[RLIMIT_AS].rlim_cur) * return -ENOMEM; */ if (size > current->rlim[RLIMIT_MEMLOCK].rlim_cur) return -EAGAIN; /* * We do the easy to undo allocations first. * * pfm_rvmalloc(), clears the buffer, so there is no leak */ smpl_buf = pfm_rvmalloc(size); if (smpl_buf == NULL) { DBprintk(("Can't allocate sampling buffer\n")); return -ENOMEM; } DBprintk(("smpl_buf @%p\n", smpl_buf)); /* allocate sampling buffer descriptor now */ psb = kmalloc(sizeof(*psb), GFP_KERNEL); if (psb == NULL) { DBprintk(("Can't allocate sampling buffer descriptor\n")); pfm_rvfree(smpl_buf, size); return -ENOMEM; } /* allocate vma */ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); if (!vma) { DBprintk(("Cannot allocate vma\n")); goto error; } /* * partially initialize the vma for the sampling buffer */ vma->vm_mm = mm; vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED; vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ vma->vm_ops = &pfm_vm_ops; /* necesarry to get the close() callback */ vma->vm_pgoff = 0; vma->vm_file = NULL; vma->vm_raend = 0; vma->vm_private_data = psb; /* information needed by the pfm_vm_close() function */ /* * Now we have everything we need and we can initialize * and connect all the data structures */ psb->psb_hdr = smpl_buf; psb->psb_addr = ((char *)smpl_buf)+sizeof(perfmon_smpl_hdr_t); /* first entry */ psb->psb_size = size; /* aligned size */ psb->psb_index = 0; psb->psb_entries = entries; psb->psb_flags = PFM_PSB_VMA; /* remember that there is a vma describing the buffer */ psb->psb_refcnt = 1; spin_lock_init(&psb->psb_lock); /* * XXX: will need to do cacheline alignment to avoid false sharing in SMP mode and * multitask monitoring. */ psb->psb_entry_size = sizeof(perfmon_smpl_entry_t) + regcount*sizeof(u64); DBprintk(("psb @%p entry_size=%ld hdr=%p addr=%p\n", (void *)psb,psb->psb_entry_size, (void *)psb->psb_hdr, (void *)psb->psb_addr)); /* initialize some of the fields of user visible buffer header */ psb->psb_hdr->hdr_version = PFM_SMPL_VERSION; psb->psb_hdr->hdr_entry_size = psb->psb_entry_size; psb->psb_hdr->hdr_pmds[0] = which_pmds[0]; /* * Let's do the difficult operations next. * * now we atomically find some area in the address space and * remap the buffer in it. */ down_write(¤t->mm->mmap_sem); /* find some free area in address space, must have mmap sem held */ vma->vm_start = get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS); if (vma->vm_start == 0UL) { DBprintk(("Cannot find unmapped area for size %ld\n", size)); up_write(¤t->mm->mmap_sem); goto error; } vma->vm_end = vma->vm_start + size; DBprintk(("entries=%ld aligned size=%ld, unmapped @0x%lx\n", entries, size, vma->vm_start)); /* can only be applied to current, need to have the mm semaphore held when called */ if (pfm_remap_buffer((unsigned long)smpl_buf, vma->vm_start, size)) { DBprintk(("Can't remap buffer\n")); up_write(¤t->mm->mmap_sem); goto error; } /* * now insert the vma in the vm list for the process, must be * done with mmap lock held */ insert_vm_struct(mm, vma); mm->total_vm += size >> PAGE_SHIFT; up_write(¤t->mm->mmap_sem); /* store which PMDS to record */ ctx->ctx_smpl_regs[0] = which_pmds[0]; /* link to perfmon context */ ctx->ctx_psb = psb; /* * keep track of user level virtual address */ ctx->ctx_smpl_vaddr = *(unsigned long *)user_vaddr = vma->vm_start; return 0;error: pfm_rvfree(smpl_buf, size); kfree(psb); return -ENOMEM;}/* * XXX: do something better here */static intpfm_bad_permissions(struct task_struct *task){ /* stolen from bad_signal() */ return (current->session != task->session) && (current->euid ^ task->suid) && (current->euid ^ task->uid) && (current->uid ^ task->suid) && (current->uid ^ task->uid);}static intpfx_is_sane(struct task_struct *task, pfarg_context_t *pfx){ int ctx_flags; int cpu; /* valid signal */ /* cannot send to process 1, 0 means do not notify */ if (pfx->ctx_notify_pid == 1) { DBprintk(("invalid notify_pid %d\n", pfx->ctx_notify_pid)); return -EINVAL; } ctx_flags = pfx->ctx_flags; if (ctx_flags & PFM_FL_SYSTEM_WIDE) { DBprintk(("cpu_mask=0x%lx\n", pfx->ctx_cpu_mask)); /* * cannot block in this mode */ if (ctx_flags & PFM_FL_NOTIFY_BLOCK) { DBprintk(("cannot use blocking mode when in system wide monitoring\n")); return -EINVAL; } /* * must only have one bit set in the CPU mask */ if (hweight64(pfx->ctx_cpu_mask) != 1UL) { DBprintk(("invalid CPU mask specified\n")); return -EINVAL; } /* * and it must be a valid CPU */ cpu = ffs(pfx->ctx_cpu_mask); if (cpu > smp_num_cpus) { DBprintk(("CPU%d is not online\n", cpu)); return -EINVAL; } /* * check for pre-existing pinning, if conflicting reject */ if (task->cpus_allowed != ~0UL && (task->cpus_allowed & (1UL<<cpu)) == 0) { DBprintk(("[%d] pinned on 0x%lx, mask for CPU%d \n", task->pid, task->cpus_allowed, cpu)); return -EINVAL; } } else { /* * must provide a target for the signal in blocking mode even when * no counter is configured with PFM_FL_REG_OVFL_NOTIFY */ if ((ctx_flags & PFM_FL_NOTIFY_BLOCK) && pfx->ctx_notify_pid == 0) return -EINVAL; } /* probably more to add here */ return 0;}static intpfm_create_context(struct task_struct *task, pfm_context_t *ctx, void *req, int count, struct pt_regs *regs){ pfarg_context_t tmp; void *uaddr = NULL; int ret, cpu = 0; int ctx_flags; pid_t notify_pid; /* a context has already been defined */ if (ctx) return -EBUSY; /* * not yet supported */ if (task != current) return -EINVAL; if (copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT; ret = pfx_is_sane(task, &tmp); if (ret < 0) return ret; ctx_flags = tmp.ctx_flags; ret = -EBUSY; LOCK_PFS(); if (ctx_flags & PFM_FL_SYSTEM_WIDE) { /* at this point, we know there is at least one bit set */ cpu = ffs(tmp.ctx_cpu_mask) - 1; DBprintk(("requesting CPU%d currently on CPU%d\n",cpu, smp_processor_id())); if (pfm_sessions.pfs_task_sessions > 0) { DBprintk(("system wide not possible, task_sessions=%ld\n", pfm_sessions.pfs_task_sessions)); goto abort; } if (pfm_sessions.pfs_sys_session[cpu]) { DBprintk(("system wide not possible, conflicting session [%d] on CPU%d\n",pfm_sessions.pfs_sys_session[cpu]->pid, cpu)); goto abort; } pfm_sessions.pfs_sys_session[cpu] = task; /* * count the number of system wide sessions */ pfm_sessions.pfs_sys_sessions++; } else if (pfm_sessions.pfs_sys_sessions == 0) { pfm_sessions.pfs_task_sessions++; } else { /* no per-process monitoring while there is a system wide session */ goto abort; } UNLOCK_PFS(); ret = -ENOMEM; ctx = pfm_context_alloc(); if (!ctx) goto error; /* record the creator (important for inheritance) */ ctx->ctx_owner = current; notify_pid = tmp.ctx_notify_pid; spin_lock_init(&ctx->ctx_lock); if (notify_pid == current->pid) { ctx->ctx_notify_task = task = current; current->thread.pfm_context = ctx; } else if (notify_pid!=0) { struct task_struct *notify_task; read_lock(&tasklist_lock);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -