📄 cgroup.c
字号:
return -ENODEV; if (cft->read) return cft->read(cgrp, cft, file, buf, nbytes, ppos); if (cft->read_u64) return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos); if (cft->read_s64) return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos); return -EINVAL;}/* * seqfile ops/methods for returning structured data. Currently just * supports string->u64 maps, but can be extended in future. */struct cgroup_seqfile_state { struct cftype *cft; struct cgroup *cgroup;};static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value){ struct seq_file *sf = cb->state; return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value);}static int cgroup_seqfile_show(struct seq_file *m, void *arg){ struct cgroup_seqfile_state *state = m->private; struct cftype *cft = state->cft; if (cft->read_map) { struct cgroup_map_cb cb = { .fill = cgroup_map_add, .state = m, }; return cft->read_map(state->cgroup, cft, &cb); } return cft->read_seq_string(state->cgroup, cft, m);}static int cgroup_seqfile_release(struct inode *inode, struct file *file){ struct seq_file *seq = file->private_data; kfree(seq->private); return single_release(inode, file);}static struct file_operations cgroup_seqfile_operations = { .read = seq_read, .write = cgroup_file_write, .llseek = seq_lseek, .release = cgroup_seqfile_release,};static int cgroup_file_open(struct inode *inode, struct file *file){ int err; struct cftype *cft; err = generic_file_open(inode, file); if (err) return err; cft = __d_cft(file->f_dentry); if (!cft) return -ENODEV; if (cft->read_map || cft->read_seq_string) { struct cgroup_seqfile_state *state = kzalloc(sizeof(*state), GFP_USER); if (!state) return -ENOMEM; state->cft = cft; state->cgroup = __d_cgrp(file->f_dentry->d_parent); file->f_op = &cgroup_seqfile_operations; err = single_open(file, cgroup_seqfile_show, state); if (err < 0) kfree(state); } else if (cft->open) err = cft->open(inode, file); else err = 0; return err;}static int cgroup_file_release(struct inode *inode, struct file *file){ struct cftype *cft = __d_cft(file->f_dentry); if (cft->release) return cft->release(inode, file); return 0;}/* * cgroup_rename - Only allow simple rename of directories in place. */static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry){ if (!S_ISDIR(old_dentry->d_inode->i_mode)) return -ENOTDIR; if (new_dentry->d_inode) return -EEXIST; if (old_dir != new_dir) return -EIO; return simple_rename(old_dir, old_dentry, new_dir, new_dentry);}static struct file_operations cgroup_file_operations = { .read = cgroup_file_read, .write = cgroup_file_write, .llseek = generic_file_llseek, .open = cgroup_file_open, .release = cgroup_file_release,};static struct inode_operations cgroup_dir_inode_operations = { .lookup = simple_lookup, .mkdir = cgroup_mkdir, .rmdir = cgroup_rmdir, .rename = cgroup_rename,};static int cgroup_create_file(struct dentry *dentry, int mode, struct super_block *sb){ static struct dentry_operations cgroup_dops = { .d_iput = cgroup_diput, }; struct inode *inode; if (!dentry) return -ENOENT; if (dentry->d_inode) return -EEXIST; inode = cgroup_new_inode(mode, sb); if (!inode) return -ENOMEM; if (S_ISDIR(mode)) { inode->i_op = &cgroup_dir_inode_operations; inode->i_fop = &simple_dir_operations; /* start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); /* start with the directory inode held, so that we can * populate it without racing with another mkdir */ mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); } else if (S_ISREG(mode)) { inode->i_size = 0; inode->i_fop = &cgroup_file_operations; } dentry->d_op = &cgroup_dops; d_instantiate(dentry, inode); dget(dentry); /* Extra count - pin the dentry in core */ return 0;}/* * cgroup_create_dir - create a directory for an object. * @cgrp: the cgroup we create the directory for. It must have a valid * ->parent field. And we are going to fill its ->dentry field. * @dentry: dentry of the new cgroup * @mode: mode to set on new directory. */static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry, int mode){ struct dentry *parent; int error = 0; parent = cgrp->parent->dentry; error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb); if (!error) { dentry->d_fsdata = cgrp; inc_nlink(parent->d_inode); cgrp->dentry = dentry; dget(dentry); } dput(dentry); return error;}int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, const struct cftype *cft){ struct dentry *dir = cgrp->dentry; struct dentry *dentry; int error; char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) { strcpy(name, subsys->name); strcat(name, "."); } strcat(name, cft->name); BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex)); dentry = lookup_one_len(name, dir, strlen(name)); if (!IS_ERR(dentry)) { error = cgroup_create_file(dentry, 0644 | S_IFREG, cgrp->root->sb); if (!error) dentry->d_fsdata = (void *)cft; dput(dentry); } else error = PTR_ERR(dentry); return error;}int cgroup_add_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, const struct cftype cft[], int count){ int i, err; for (i = 0; i < count; i++) { err = cgroup_add_file(cgrp, subsys, &cft[i]); if (err) return err; } return 0;}/** * cgroup_task_count - count the number of tasks in a cgroup. * @cgrp: the cgroup in question * * Return the number of tasks in the cgroup. */int cgroup_task_count(const struct cgroup *cgrp){ int count = 0; struct cg_cgroup_link *link; read_lock(&css_set_lock); list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) { count += atomic_read(&link->cg->ref.refcount); } read_unlock(&css_set_lock); return count;}/* * Advance a list_head iterator. The iterator should be positioned at * the start of a css_set */static void cgroup_advance_iter(struct cgroup *cgrp, struct cgroup_iter *it){ struct list_head *l = it->cg_link; struct cg_cgroup_link *link; struct css_set *cg; /* Advance to the next non-empty css_set */ do { l = l->next; if (l == &cgrp->css_sets) { it->cg_link = NULL; return; } link = list_entry(l, struct cg_cgroup_link, cgrp_link_list); cg = link->cg; } while (list_empty(&cg->tasks)); it->cg_link = l; it->task = cg->tasks.next;}/* * To reduce the fork() overhead for systems that are not actually * using their cgroups capability, we don't maintain the lists running * through each css_set to its tasks until we see the list actually * used - in other words after the first call to cgroup_iter_start(). * * The tasklist_lock is not held here, as do_each_thread() and * while_each_thread() are protected by RCU. */static void cgroup_enable_task_cg_lists(void){ struct task_struct *p, *g; write_lock(&css_set_lock); use_task_css_set_links = 1; do_each_thread(g, p) { task_lock(p); /* * We should check if the process is exiting, otherwise * it will race with cgroup_exit() in that the list * entry won't be deleted though the process has exited. */ if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list)) list_add(&p->cg_list, &p->cgroups->tasks); task_unlock(p); } while_each_thread(g, p); write_unlock(&css_set_lock);}void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it){ /* * The first time anyone tries to iterate across a cgroup, * we need to enable the list linking each css_set to its * tasks, and fix up all existing tasks. */ if (!use_task_css_set_links) cgroup_enable_task_cg_lists(); read_lock(&css_set_lock); it->cg_link = &cgrp->css_sets; cgroup_advance_iter(cgrp, it);}struct task_struct *cgroup_iter_next(struct cgroup *cgrp, struct cgroup_iter *it){ struct task_struct *res; struct list_head *l = it->task; /* If the iterator cg is NULL, we have no tasks */ if (!it->cg_link) return NULL; res = list_entry(l, struct task_struct, cg_list); /* Advance iterator to find next entry */ l = l->next; if (l == &res->cgroups->tasks) { /* We reached the end of this task list - move on to * the next cg_cgroup_link */ cgroup_advance_iter(cgrp, it); } else { it->task = l; } return res;}void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it){ read_unlock(&css_set_lock);}static inline int started_after_time(struct task_struct *t1, struct timespec *time, struct task_struct *t2){ int start_diff = timespec_compare(&t1->start_time, time); if (start_diff > 0) { return 1; } else if (start_diff < 0) { return 0; } else { /* * Arbitrarily, if two processes started at the same * time, we'll say that the lower pointer value * started first. Note that t2 may have exited by now * so this may not be a valid pointer any longer, but * that's fine - it still serves to distinguish * between two tasks started (effectively) simultaneously. */ return t1 > t2; }}/* * This function is a callback from heap_insert() and is used to order * the heap. * In this case we order the heap in descending task start time. */static inline int started_after(void *p1, void *p2){ struct task_struct *t1 = p1; struct task_struct *t2 = p2; return started_after_time(t1, &t2->start_time, t2);}/** * cgroup_scan_tasks - iterate though all the tasks in a cgroup * @scan: struct cgroup_scanner containing arguments for the scan * * Arguments include pointers to callback functions test_task() and * process_task(). * Iterate through all the tasks in a cgroup, calling test_task() for each, * and if it returns true, call process_task() for it also. * The test_task pointer may be NULL, meaning always true (select all tasks). * Effectively duplicates cgroup_iter_{start,next,end}() * but does not lock css_set_lock for the call to process_task(). * The struct cgroup_scanner may be embedded in any structure of the caller's * creation. * It is guaranteed that process_task() will act on every task that * is a member of the cgroup for the duration of this call. This * function may or may not call process_task() for tasks that exit * or move to a different cgroup during the call, or are forked or * move into the cgroup during the call. * * Note that test_task() may be called with locks held, and may in some * situations be called multiple times for the same task, so it should * be cheap. * If the heap pointer in the struct cgroup_scanner is non-NULL, a heap has been * pre-allocated and will be used for heap operations (and its "gt" member will * be overwritten), else a temporary heap will be used (allocation of which * may cause this function to fail). */int cgroup_scan_tasks(struct cgroup_scanner *scan){ int retval, i; struct cgroup_iter it; struct task_struct *p, *dropped; /* Never dereference latest_task, since it's not refcounted */ struct task_struct *latest_task = NULL; struct ptr_heap tmp_heap; struct ptr_heap *heap; struct timespec latest_time = { 0, 0 }; if (scan->heap) { /* The caller supplied our heap and pre-allocated its memory */ heap = scan->heap; heap->gt = &started_after; } else { /* We need to allocate our own heap memory */ heap = &tmp_heap; retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after); if (retval) /* cannot allocate the heap */ return retval; } again: /* * Scan tasks in the cgroup, using the scanner's "test_task" callback * to determine which are of interest, and using the scanner's * "process_task" callback to process any of them that need an update. * Since we don't want to hold any locks during the task updates, * gather tasks to be processed in a heap structure. * The heap is sorted by descending task start time. * If the statically-sized heap fills up, we overflow tasks that * started later, and in future iterations only consider tasks that * started after the latest task in the previous pass. This * guarantees forward progress and that we don't miss any tasks. */ heap->size = 0; cgroup_iter_start(scan->cg, &it); while ((p = cgroup_iter_next(scan->cg, &it))) { /* * Only affect tasks that qualify per the caller's callback, * if he provided one */ if (scan->test_task && !scan->test_task(p, scan)) continue; /* * Only process tasks that started after the last task * we processed */ if (!started_after_time(p, &latest_time, latest_task)) continue; dropped = heap_insert(heap, p); if (dropped == NULL) { /* * The new task was inserted; the heap wasn't * previously full */ get_task_struct(p); } else if (dropped != p) { /* * The new task was inserted, and pushed out a * different task */ get_task_struct(p); put_task_struct(dropped); } /* * Else the new task was newer than anything already in * the heap and wasn't inserted */ } cgroup_iter_end(scan->cg, &it); if (heap->size) { for (i = 0; i < heap->size; i++) { struct task_struct *q = heap->ptrs[i]; if (i == 0) { latest_time = q->start_time; latest_task = q; } /* Process the task per the caller's callback */ scan->process_task(q, scan); put_task_struct(q); } /* * If we had to process any tasks at all, scan again * in case some of them were in the middle of forking * children that didn't get processed. * Not the most efficient way to do it, but it avoids * having to take callback_mutex in the fork path */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -