📄 perfmon.c
字号:
* task to report completion * * the context is protected by mutex, therefore there * is no risk of being notified of completion before * begin actually on the waitq. */ set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&ctx->ctx_zombieq, &wait); UNPROTECT_CTX(ctx, flags); /* * XXX: check for signals : * - ok of explicit close * - not ok when coming from exit_files() */ schedule(); PROTECT_CTX(ctx, flags); remove_wait_queue(&ctx->ctx_zombieq, &wait); set_current_state(TASK_RUNNING); /* * context is unloaded at this point */ DPRINT(("after zombie wakeup ctx_state=%d for\n", state)); } else if (task != current) {#ifdef CONFIG_SMP /* * switch context to zombie state */ ctx->ctx_state = PFM_CTX_ZOMBIE; DPRINT(("zombie ctx for [%d]\n", task->pid)); /* * cannot free the context on the spot. deferred until * the task notices the ZOMBIE state */ free_possible = 0;#else pfm_context_unload(ctx, NULL, 0, regs);#endif }doit: /* reload state, may have changed during opening of critical section */ state = ctx->ctx_state; /* * the context is still attached to a task (possibly current) * we cannot destroy it right now */ /* * we must free the sampling buffer right here because * we cannot rely on it being cleaned up later by the * monitored task. It is not possible to free vmalloc'ed * memory in pfm_load_regs(). Instead, we remove the buffer * now. should there be subsequent PMU overflow originally * meant for sampling, the will be converted to spurious * and that's fine because the monitoring tools is gone anyway. */ if (ctx->ctx_smpl_hdr) { smpl_buf_addr = ctx->ctx_smpl_hdr; smpl_buf_size = ctx->ctx_smpl_size; /* no more sampling */ ctx->ctx_smpl_hdr = NULL; ctx->ctx_fl_is_sampling = 0; } DPRINT(("ctx_state=%d free_possible=%d addr=%p size=%lu\n", state, free_possible, smpl_buf_addr, smpl_buf_size)); if (smpl_buf_addr) pfm_exit_smpl_buffer(ctx->ctx_buf_fmt); /* * UNLOADED that the session has already been unreserved. */ if (state == PFM_CTX_ZOMBIE) { pfm_unreserve_session(ctx, ctx->ctx_fl_system , ctx->ctx_cpu); } /* * disconnect file descriptor from context must be done * before we unlock. */ filp->private_data = NULL; /* * if we free on the spot, the context is now completely unreacheable * from the callers side. The monitored task side is also cut, so we * can freely cut. * * If we have a deferred free, only the caller side is disconnected. */ UNPROTECT_CTX(ctx, flags); /* * All memory free operations (especially for vmalloc'ed memory) * MUST be done with interrupts ENABLED. */ if (smpl_buf_addr) pfm_rvfree(smpl_buf_addr, smpl_buf_size); /* * return the memory used by the context */ if (free_possible) pfm_context_free(ctx); return 0;}static intpfm_no_open(struct inode *irrelevant, struct file *dontcare){ DPRINT(("pfm_no_open called\n")); return -ENXIO;}static struct file_operations pfm_file_ops = { .llseek = no_llseek, .read = pfm_read, .write = pfm_write, .poll = pfm_poll, .ioctl = pfm_ioctl, .open = pfm_no_open, /* special open code to disallow open via /proc */ .fasync = pfm_fasync, .release = pfm_close, .flush = pfm_flush};static intpfmfs_delete_dentry(struct dentry *dentry){ return 1;}static struct dentry_operations pfmfs_dentry_operations = { .d_delete = pfmfs_delete_dentry,};static intpfm_alloc_fd(struct file **cfile){ int fd, ret = 0; struct file *file = NULL; struct inode * inode; char name[32]; struct qstr this; fd = get_unused_fd(); if (fd < 0) return -ENFILE; ret = -ENFILE; file = get_empty_filp(); if (!file) goto out; /* * allocate a new inode */ inode = new_inode(pfmfs_mnt->mnt_sb); if (!inode) goto out; DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode)); inode->i_sb = pfmfs_mnt->mnt_sb; inode->i_mode = S_IFCHR|S_IRUGO; inode->i_sock = 0; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; sprintf(name, "[%lu]", inode->i_ino); this.name = name; this.len = strlen(name); this.hash = inode->i_ino; ret = -ENOMEM; /* * allocate a new dcache entry */ file->f_dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this); if (!file->f_dentry) goto out; file->f_dentry->d_op = &pfmfs_dentry_operations; d_add(file->f_dentry, inode); file->f_vfsmnt = mntget(pfmfs_mnt); file->f_mapping = inode->i_mapping; file->f_op = &pfm_file_ops; file->f_mode = FMODE_READ; file->f_flags = O_RDONLY; file->f_pos = 0; /* * may have to delay until context is attached? */ fd_install(fd, file); /* * the file structure we will use */ *cfile = file; return fd;out: if (file) put_filp(file); put_unused_fd(fd); return ret;}static voidpfm_free_fd(int fd, struct file *file){ if (file) put_filp(file); put_unused_fd(fd);}static intpfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long addr, unsigned long size){ unsigned long page; DPRINT(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size)); while (size > 0) { page = pfm_kvirt_to_pa(buf); if (pfm_remap_page_range(vma, addr, page, PAGE_SIZE, PAGE_READONLY)) return -ENOMEM; addr += PAGE_SIZE; buf += PAGE_SIZE; size -= PAGE_SIZE; } return 0;}/* * allocate a sampling buffer and remaps it into the user address space of the task */static intpfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned long rsize, void **user_vaddr){ struct mm_struct *mm = task->mm; struct vm_area_struct *vma = NULL; unsigned long size; void *smpl_buf; /* * the fixed header + requested size and align to page boundary */ size = PAGE_ALIGN(rsize); DPRINT(("sampling buffer rsize=%lu size=%lu bytes\n", rsize, size)); /* * check requested size to avoid Denial-of-service attacks * XXX: may have to refine this test * Check against address space limit. * * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur) * return -ENOMEM; */ if (size > task->rlim[RLIMIT_MEMLOCK].rlim_cur) return -EAGAIN; /* * We do the easy to undo allocations first. * * pfm_rvmalloc(), clears the buffer, so there is no leak */ smpl_buf = pfm_rvmalloc(size); if (smpl_buf == NULL) { DPRINT(("Can't allocate sampling buffer\n")); return -ENOMEM; } DPRINT(("smpl_buf @%p\n", smpl_buf)); /* allocate vma */ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); if (!vma) { DPRINT(("Cannot allocate vma\n")); goto error_kmem; } memset(vma, 0, sizeof(*vma)); /* * partially initialize the vma for the sampling buffer * * The VM_DONTCOPY flag is very important as it ensures that the mapping * will never be inherited for any child process (via fork()) which is always * what we want. */ vma->vm_mm = mm; vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED; vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ /* * Now we have everything we need and we can initialize * and connect all the data structures */ ctx->ctx_smpl_hdr = smpl_buf; ctx->ctx_smpl_size = size; /* aligned size */ /* * Let's do the difficult operations next. * * now we atomically find some area in the address space and * remap the buffer in it. */ down_write(&task->mm->mmap_sem); /* find some free area in address space, must have mmap sem held */ vma->vm_start = pfm_get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS, 0); if (vma->vm_start == 0UL) { DPRINT(("Cannot find unmapped area for size %ld\n", size)); up_write(&task->mm->mmap_sem); goto error; } vma->vm_end = vma->vm_start + size; DPRINT(("aligned size=%ld, hdr=%p mapped @0x%lx\n", size, ctx->ctx_smpl_hdr, vma->vm_start)); /* can only be applied to current task, need to have the mm semaphore held when called */ if (pfm_remap_buffer(vma, (unsigned long)smpl_buf, vma->vm_start, size)) { DPRINT(("Can't remap buffer\n")); up_write(&task->mm->mmap_sem); goto error; } /* * now insert the vma in the vm list for the process, must be * done with mmap lock held */ insert_vm_struct(mm, vma); mm->total_vm += size >> PAGE_SHIFT; up_write(&task->mm->mmap_sem); /* * keep track of user level virtual address */ ctx->ctx_smpl_vaddr = (void *)vma->vm_start; *(unsigned long *)user_vaddr = vma->vm_start; return 0;error: kmem_cache_free(vm_area_cachep, vma);error_kmem: pfm_rvfree(smpl_buf, size); return -ENOMEM;}/* * XXX: do something better here */static intpfm_bad_permissions(struct task_struct *task){ /* inspired by ptrace_attach() */ DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n", current->uid, current->gid, task->euid, task->suid, task->uid, task->egid, task->sgid)); return ((current->uid != task->euid) || (current->uid != task->suid) || (current->uid != task->uid) || (current->gid != task->egid) || (current->gid != task->sgid) || (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE);}static intpfarg_is_sane(struct task_struct *task, pfarg_context_t *pfx){ int ctx_flags; /* valid signal */ ctx_flags = pfx->ctx_flags; if (ctx_flags & PFM_FL_SYSTEM_WIDE) { /* * cannot block in this mode */ if (ctx_flags & PFM_FL_NOTIFY_BLOCK) { DPRINT(("cannot use blocking mode when in system wide monitoring\n")); return -EINVAL; } } else { } /* probably more to add here */ return 0;}static intpfm_setup_buffer_fmt(struct task_struct *task, pfm_context_t *ctx, unsigned int ctx_flags, unsigned int cpu, pfarg_context_t *arg){ pfm_buffer_fmt_t *fmt = NULL; unsigned long size = 0UL; void *uaddr = NULL; void *fmt_arg = NULL; int ret = 0;#define PFM_CTXARG_BUF_ARG(a) (pfm_buffer_fmt_t *)(a+1) /* invoke and lock buffer format, if found */ fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id); if (fmt == NULL) { DPRINT(("[%d] cannot find buffer format\n", task->pid)); return -EINVAL; } /* * buffer argument MUST be contiguous to pfarg_context_t */ if (fmt->fmt_arg_size) fmt_arg = PFM_CTXARG_BUF_ARG(arg); ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg); DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task->pid, ctx_flags, cpu, fmt_arg, ret)); if (ret) goto error; /* link buffer format and context */ ctx->ctx_buf_fmt = fmt; /* * check if buffer format wants to use perfmon buffer allocation/mapping service */ ret = pfm_buf_fmt_getsize(fmt, task, ctx_flags, cpu, fmt_arg, &size); if (ret) goto error; if (size) { /* * buffer is always remapped into the caller's address space */ ret = pfm_smpl_buffer_alloc(current, ctx, size, &uaddr); if (ret) goto error; /* keep track of user address of buffer */ arg->ctx_smpl_vaddr = uaddr; } ret = pfm_buf_fmt_init(fmt, task, ctx->ctx_smpl_hdr, ctx_flags, cpu, fmt_arg);error: return ret;}static voidpfm_reset_pmu_state(pfm_context_t *ctx){ int i; /* * install reset values for PMC. */ for (i=1; PMC_IS_LAST(i) == 0; i++) { if (PMC_IS_IMPL(i) == 0) continue; ctx->ctx_pmcs[i] = PMC_DFL_VAL(i); DPRINT(("pmc[%d]=0x%lx\n", i, ctx->ctx_pmcs[i])); } /* * PMD registers are set to 0UL when the context in memset() */ /* * On context switched restore, we must restore ALL pmc and ALL pmd even * when they are not actively used by the task. In UP, the incoming process * may otherwise pick up left over PMC, PMD state from the previous process. * As opposed to PMD, stale PMC can cause harm to the incoming * process because they may change what is being measured. * Therefore, we must systematically reinstall the entire * PMC state. In SMP, the same thing is possible on the * same CPU but also on between
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -