From a63b9072ea4e32c1fde8b783ccf6e4288414660b Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 20 Jan 2012 11:58:09 +0800 Subject: cgroup: remove cgroup_attach_task_current_cg() It's just a wrapper of cgroup_attach_task_all(), and it's no longer used after commit 87d6a412bd1ed82c14cabd4b408003b23bbd2880 (vhost: fix attach to cgroups regression) Signed-off-by: Li Zefan Signed-off-by: Tejun Heo diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index e9b6021..dee53bd 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -602,11 +602,6 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan); int cgroup_attach_task(struct cgroup *, struct task_struct *); int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); -static inline int cgroup_attach_task_current_cg(struct task_struct *tsk) -{ - return cgroup_attach_task_all(current, tsk); -} - /* * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works * if cgroup_subsys.use_id == true. It can be used for looking up and scanning. @@ -669,10 +664,6 @@ static inline int cgroup_attach_task_all(struct task_struct *from, { return 0; } -static inline int cgroup_attach_task_current_cg(struct task_struct *t) -{ - return 0; -} #endif /* !CONFIG_CGROUPS */ -- cgit v0.10.2 From 245282557c49754af3dbcc732316e814340d6bce Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 20 Jan 2012 11:58:43 +0800 Subject: cgroup: move struct cgroup_pidlist out from the header file It's internally used only. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index dee53bd..7da3e74 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -160,38 +160,6 @@ enum { CGRP_CLONE_CHILDREN, }; -/* which pidlist file are we talking about? */ -enum cgroup_filetype { - CGROUP_FILE_PROCS, - CGROUP_FILE_TASKS, -}; - -/* - * A pidlist is a list of pids that virtually represents the contents of one - * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists, - * a pair (one each for procs, tasks) for each pid namespace that's relevant - * to the cgroup. - */ -struct cgroup_pidlist { - /* - * used to find which pidlist is wanted. doesn't change as long as - * this particular list stays in the list. - */ - struct { enum cgroup_filetype type; struct pid_namespace *ns; } key; - /* array of xids */ - pid_t *list; - /* how many elements the above list has */ - int length; - /* how many files are using the current array */ - int use_count; - /* each of these stored in a list by its cgroup */ - struct list_head links; - /* pointer to the cgroup we belong to, for list removal purposes */ - struct cgroup *owner; - /* protects the other fields */ - struct rw_semaphore mutex; -}; - struct cgroup { unsigned long flags; /* "unsigned long" so bitops work */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a5d3b53..6ca7aca 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3043,6 +3043,38 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) * */ +/* which pidlist file are we talking about? */ +enum cgroup_filetype { + CGROUP_FILE_PROCS, + CGROUP_FILE_TASKS, +}; + +/* + * A pidlist is a list of pids that virtually represents the contents of one + * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists, + * a pair (one each for procs, tasks) for each pid namespace that's relevant + * to the cgroup. + */ +struct cgroup_pidlist { + /* + * used to find which pidlist is wanted. doesn't change as long as + * this particular list stays in the list. + */ + struct { enum cgroup_filetype type; struct pid_namespace *ns; } key; + /* array of xids */ + pid_t *list; + /* how many elements the above list has */ + int length; + /* how many files are using the current array */ + int use_count; + /* each of these stored in a list by its cgroup */ + struct list_head links; + /* pointer to the cgroup we belong to, for list removal purposes */ + struct cgroup *owner; + /* protects the other fields */ + struct rw_semaphore mutex; +}; + /* * The following two functions "fix" the issue where there are more pids * than kmalloc will give memory for; in such cases, we use vmalloc/vfree. -- cgit v0.10.2 From b78949ebfb563c29808a9d0a772e3adb5561bc80 Mon Sep 17 00:00:00 2001 From: Mandeep Singh Baines Date: Tue, 3 Jan 2012 21:18:30 -0800 Subject: cgroup: simplify double-check locking in cgroup_attach_proc To keep the complexity of the double-check locking in one place, move the thread_group_leader check up into attach_task_by_pid(). This allows us to use a goto instead of returning -EAGAIN. While at it, convert a couple of returns to gotos and use rcu for the !pid case also in order to simplify the logic. Changes in V2: * https://lkml.org/lkml/2011/12/22/86 (Tejun Heo) * Use a goto instead of returning -EAGAIN Signed-off-by: Mandeep Singh Baines Acked-by: Li Zefan Signed-off-by: Tejun Heo Cc: Frederic Weisbecker Cc: containers@lists.linux-foundation.org Cc: cgroups@vger.kernel.org Cc: KAMEZAWA Hiroyuki Cc: Oleg Nesterov Cc: Andrew Morton Cc: Paul Menage diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 6ca7aca..12c07e8 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2104,19 +2104,6 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) /* prevent changes to the threadgroup list while we take a snapshot. */ read_lock(&tasklist_lock); - if (!thread_group_leader(leader)) { - /* - * a race with de_thread from another thread's exec() may strip - * us of our leadership, making while_each_thread unsafe to use - * on this task. if this happens, there is no choice but to - * throw this task away and try again (from cgroup_procs_write); - * this is "double-double-toil-and-trouble-check locking". - */ - read_unlock(&tasklist_lock); - retval = -EAGAIN; - goto out_free_group_list; - } - tsk = leader; i = 0; do { @@ -2245,22 +2232,14 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup) if (!cgroup_lock_live_group(cgrp)) return -ENODEV; +retry_find_task: + rcu_read_lock(); if (pid) { - rcu_read_lock(); tsk = find_task_by_vpid(pid); if (!tsk) { rcu_read_unlock(); - cgroup_unlock(); - return -ESRCH; - } - if (threadgroup) { - /* - * RCU protects this access, since tsk was found in the - * tid map. a race with de_thread may cause group_leader - * to stop being the leader, but cgroup_attach_proc will - * detect it later. - */ - tsk = tsk->group_leader; + ret= -ESRCH; + goto out_unlock_cgroup; } /* * even if we're attaching all tasks in the thread group, we @@ -2271,29 +2250,38 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup) cred->euid != tcred->uid && cred->euid != tcred->suid) { rcu_read_unlock(); - cgroup_unlock(); - return -EACCES; + ret = -EACCES; + goto out_unlock_cgroup; } - get_task_struct(tsk); - rcu_read_unlock(); - } else { - if (threadgroup) - tsk = current->group_leader; - else - tsk = current; - get_task_struct(tsk); - } - - threadgroup_lock(tsk); + } else + tsk = current; if (threadgroup) + tsk = tsk->group_leader; + get_task_struct(tsk); + rcu_read_unlock(); + + threadgroup_lock(tsk); + if (threadgroup) { + if (!thread_group_leader(tsk)) { + /* + * a race with de_thread from another thread's exec() + * may strip us of our leadership, if this happens, + * there is no choice but to throw this task away and + * try again; this is + * "double-double-toil-and-trouble-check locking". + */ + threadgroup_unlock(tsk); + put_task_struct(tsk); + goto retry_find_task; + } ret = cgroup_attach_proc(cgrp, tsk); - else + } else ret = cgroup_attach_task(cgrp, tsk); - threadgroup_unlock(tsk); put_task_struct(tsk); +out_unlock_cgroup: cgroup_unlock(); return ret; } @@ -2305,16 +2293,7 @@ static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid) static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid) { - int ret; - do { - /* - * attach_proc fails with -EAGAIN if threadgroup leadership - * changes in the middle of the operation, in which case we need - * to find the task_struct for the new leader and start over. - */ - ret = attach_task_by_pid(cgrp, tgid, true); - } while (ret == -EAGAIN); - return ret; + return attach_task_by_pid(cgrp, tgid, true); } /** -- cgit v0.10.2 From fb5d2b4cfc24963d0e8a7df57de1ecffa10a04cf Mon Sep 17 00:00:00 2001 From: Mandeep Singh Baines Date: Tue, 3 Jan 2012 21:18:31 -0800 Subject: cgroup: replace tasklist_lock with rcu_read_lock We can replace the tasklist_lock in cgroup_attach_proc with an rcu_read_lock(). Changes in V4: * https://lkml.org/lkml/2011/12/23/284 (Frederic Weisbecker) * Minimize size of rcu_read_lock critical section * Add comment * https://lkml.org/lkml/2011/12/26/136 (Li Zefan) * Split into two patches Changes in V3: * https://lkml.org/lkml/2011/12/22/419 (Frederic Weisbecker) * Add an rcu_read_lock to protect against exit Changes in V2: * https://lkml.org/lkml/2011/12/22/86 (Tejun Heo) * Use a goto instead of returning -EAGAIN Suggested-by: Frederic Weisbecker Signed-off-by: Mandeep Singh Baines Acked-by: Li Zefan Acked-by: Frederic Weisbecker Signed-off-by: Tejun Heo Cc: containers@lists.linux-foundation.org Cc: cgroups@vger.kernel.org Cc: KAMEZAWA Hiroyuki Cc: Oleg Nesterov Cc: Andrew Morton Cc: Paul Menage diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 12c07e8..1626152 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2102,10 +2102,14 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) if (retval) goto out_free_group_list; - /* prevent changes to the threadgroup list while we take a snapshot. */ - read_lock(&tasklist_lock); tsk = leader; i = 0; + /* + * Prevent freeing of tasks while we take a snapshot. Tasks that are + * already PF_EXITING could be freed from underneath us unless we + * take an rcu_read_lock. + */ + rcu_read_lock(); do { struct task_and_cgroup ent; @@ -2128,11 +2132,11 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) BUG_ON(retval != 0); i++; } while_each_thread(leader, tsk); + rcu_read_unlock(); /* remember the number of threads in the array for later. */ group_size = i; tset.tc_array = group; tset.tc_array_len = group_size; - read_unlock(&tasklist_lock); /* methods shouldn't be called if no task is actually migrating */ retval = 0; -- cgit v0.10.2 From 61d1d219c4c0761059236a46867bc49943c4d29d Mon Sep 17 00:00:00 2001 From: Mandeep Singh Baines Date: Mon, 30 Jan 2012 12:51:56 -0800 Subject: cgroup: remove extra calls to find_existing_css_set In cgroup_attach_proc, we indirectly call find_existing_css_set 3 times. It is an expensive call so we want to call it a minimum of times. This patch only calls it once and stores the result so that it can be used later on when we call cgroup_task_migrate. This required modifying cgroup_task_migrate to take the new css_set (which we obtained from find_css_set) as a parameter. The nice side effect of this is that cgroup_task_migrate is now identical for cgroup_attach_task and cgroup_attach_proc. It also now returns a void since it can never fail. Changes in V5: * https://lkml.org/lkml/2012/1/20/344 (Tejun Heo) * Remove css_set_refs Changes in V4: * https://lkml.org/lkml/2011/12/22/421 (Li Zefan) * Avoid GFP_KERNEL (sleep) in rcu_read_lock by getting css_set in a separate loop not under an rcu_read_lock Changes in V3: * https://lkml.org/lkml/2011/12/22/13 (Li Zefan) * Fixed earlier bug by creating a seperate patch to remove tasklist_lock Changes in V2: * https://lkml.org/lkml/2011/12/20/372 (Tejun Heo) * Move find_css_set call into loop which creates the flex array * Author * Kill css_set_refs and use group_size instead * Fix an off-by-one error in counting css_set refs * Add a retval check in out_list_teardown Signed-off-by: Mandeep Singh Baines Acked-by: Li Zefan Signed-off-by: Tejun Heo Cc: containers@lists.linux-foundation.org Cc: cgroups@vger.kernel.org Cc: KAMEZAWA Hiroyuki Cc: Frederic Weisbecker Cc: Oleg Nesterov Cc: Andrew Morton Cc: Paul Menage diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 1626152..43a224f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1763,6 +1763,7 @@ EXPORT_SYMBOL_GPL(cgroup_path); struct task_and_cgroup { struct task_struct *task; struct cgroup *cgrp; + struct css_set *cg; }; struct cgroup_taskset { @@ -1843,11 +1844,10 @@ EXPORT_SYMBOL_GPL(cgroup_taskset_size); * will already exist. If not set, this function might sleep, and can fail with * -ENOMEM. Must be called with cgroup_mutex and threadgroup locked. */ -static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, - struct task_struct *tsk, bool guarantee) +static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, + struct task_struct *tsk, struct css_set *newcg) { struct css_set *oldcg; - struct css_set *newcg; /* * We are synchronized through threadgroup_lock() against PF_EXITING @@ -1857,23 +1857,6 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, WARN_ON_ONCE(tsk->flags & PF_EXITING); oldcg = tsk->cgroups; - /* locate or allocate a new css_set for this task. */ - if (guarantee) { - /* we know the css_set we want already exists. */ - struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT]; - read_lock(&css_set_lock); - newcg = find_existing_css_set(oldcg, cgrp, template); - BUG_ON(!newcg); - get_css_set(newcg); - read_unlock(&css_set_lock); - } else { - might_sleep(); - /* find_css_set will give us newcg already referenced. */ - newcg = find_css_set(oldcg, cgrp); - if (!newcg) - return -ENOMEM; - } - task_lock(tsk); rcu_assign_pointer(tsk->cgroups, newcg); task_unlock(tsk); @@ -1892,7 +1875,6 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, put_css_set(oldcg); set_bit(CGRP_RELEASABLE, &oldcgrp->flags); - return 0; } /** @@ -1910,6 +1892,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) struct cgroup *oldcgrp; struct cgroupfs_root *root = cgrp->root; struct cgroup_taskset tset = { }; + struct css_set *newcg; /* @tsk either already exited or can't exit until the end */ if (tsk->flags & PF_EXITING) @@ -1939,9 +1922,13 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) } } - retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false); - if (retval) + newcg = find_css_set(tsk->cgroups, cgrp); + if (!newcg) { + retval = -ENOMEM; goto out; + } + + cgroup_task_migrate(cgrp, oldcgrp, tsk, newcg); for_each_subsys(root, ss) { if (ss->attach) @@ -1997,66 +1984,6 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) } EXPORT_SYMBOL_GPL(cgroup_attach_task_all); -/* - * cgroup_attach_proc works in two stages, the first of which prefetches all - * new css_sets needed (to make sure we have enough memory before committing - * to the move) and stores them in a list of entries of the following type. - * TODO: possible optimization: use css_set->rcu_head for chaining instead - */ -struct cg_list_entry { - struct css_set *cg; - struct list_head links; -}; - -static bool css_set_check_fetched(struct cgroup *cgrp, - struct task_struct *tsk, struct css_set *cg, - struct list_head *newcg_list) -{ - struct css_set *newcg; - struct cg_list_entry *cg_entry; - struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT]; - - read_lock(&css_set_lock); - newcg = find_existing_css_set(cg, cgrp, template); - read_unlock(&css_set_lock); - - /* doesn't exist at all? */ - if (!newcg) - return false; - /* see if it's already in the list */ - list_for_each_entry(cg_entry, newcg_list, links) - if (cg_entry->cg == newcg) - return true; - - /* not found */ - return false; -} - -/* - * Find the new css_set and store it in the list in preparation for moving the - * given task to the given cgroup. Returns 0 or -ENOMEM. - */ -static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg, - struct list_head *newcg_list) -{ - struct css_set *newcg; - struct cg_list_entry *cg_entry; - - /* ensure a new css_set will exist for this thread */ - newcg = find_css_set(cg, cgrp); - if (!newcg) - return -ENOMEM; - /* add it to the list */ - cg_entry = kmalloc(sizeof(struct cg_list_entry), GFP_KERNEL); - if (!cg_entry) { - put_css_set(newcg); - return -ENOMEM; - } - cg_entry->cg = newcg; - list_add(&cg_entry->links, newcg_list); - return 0; -} - /** * cgroup_attach_proc - attach all threads in a threadgroup to a cgroup * @cgrp: the cgroup to attach to @@ -2070,20 +1997,12 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) int retval, i, group_size; struct cgroup_subsys *ss, *failed_ss = NULL; /* guaranteed to be initialized later, but the compiler needs this */ - struct css_set *oldcg; struct cgroupfs_root *root = cgrp->root; /* threadgroup list cursor and array */ struct task_struct *tsk; struct task_and_cgroup *tc; struct flex_array *group; struct cgroup_taskset tset = { }; - /* - * we need to make sure we have css_sets for all the tasks we're - * going to move -before- we actually start moving them, so that in - * case we get an ENOMEM we can bail out before making any changes. - */ - struct list_head newcg_list; - struct cg_list_entry *cg_entry, *temp_nobe; /* * step 0: in order to do expensive, possibly blocking operations for @@ -2119,15 +2038,15 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) /* as per above, nr_threads may decrease, but not increase. */ BUG_ON(i >= group_size); - /* - * saying GFP_ATOMIC has no effect here because we did prealloc - * earlier, but it's good form to communicate our expectations. - */ ent.task = tsk; ent.cgrp = task_cgroup_from_root(tsk, root); /* nothing to do if this task is already in the cgroup */ if (ent.cgrp == cgrp) continue; + /* + * saying GFP_ATOMIC has no effect here because we did prealloc + * earlier, but it's good form to communicate our expectations. + */ retval = flex_array_put(group, i, &ent, GFP_ATOMIC); BUG_ON(retval != 0); i++; @@ -2160,17 +2079,12 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) * step 2: make sure css_sets exist for all threads to be migrated. * we use find_css_set, which allocates a new one if necessary. */ - INIT_LIST_HEAD(&newcg_list); for (i = 0; i < group_size; i++) { tc = flex_array_get(group, i); - oldcg = tc->task->cgroups; - - /* if we don't already have it in the list get a new one */ - if (!css_set_check_fetched(cgrp, tc->task, oldcg, - &newcg_list)) { - retval = css_set_prefetch(cgrp, oldcg, &newcg_list); - if (retval) - goto out_list_teardown; + tc->cg = find_css_set(tc->task->cgroups, cgrp); + if (!tc->cg) { + retval = -ENOMEM; + goto out_put_css_set_refs; } } @@ -2181,8 +2095,7 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) */ for (i = 0; i < group_size; i++) { tc = flex_array_get(group, i); - retval = cgroup_task_migrate(cgrp, tc->cgrp, tc->task, true); - BUG_ON(retval); + cgroup_task_migrate(cgrp, tc->cgrp, tc->task, tc->cg); } /* nothing is sensitive to fork() after this point. */ @@ -2200,15 +2113,16 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) synchronize_rcu(); cgroup_wakeup_rmdir_waiter(cgrp); retval = 0; -out_list_teardown: - /* clean up the list of prefetched css_sets. */ - list_for_each_entry_safe(cg_entry, temp_nobe, &newcg_list, links) { - list_del(&cg_entry->links); - put_css_set(cg_entry->cg); - kfree(cg_entry); +out_put_css_set_refs: + if (retval) { + for (i = 0; i < group_size; i++) { + tc = flex_array_get(group, i); + if (!tc->cg) + break; + put_css_set(tc->cg); + } } out_cancel_attach: - /* same deal as in cgroup_attach_task */ if (retval) { for_each_subsys(root, ss) { if (ss == failed_ss) -- cgit v0.10.2 From 761b3ef50e1c2649cffbfa67a4dcb2dcdb7982ed Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 31 Jan 2012 13:47:36 +0800 Subject: cgroup: remove cgroup_subsys argument from callbacks The argument is not used at all, and it's not necessary, because a specific callback handler of course knows which subsys it belongs to. Now only ->pupulate() takes this argument, because the handlers of this callback always call cgroup_add_file()/cgroup_add_files(). So we reduce a few lines of code, though the shrinking of object size is minimal. 16 files changed, 113 insertions(+), 162 deletions(-) text data bss dec hex filename 5486240 656987 7039960 13183187 c928d3 vmlinux.o.orig 5486170 656987 7039960 13183117 c9288d vmlinux.o Signed-off-by: Li Zefan Signed-off-by: Tejun Heo diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index a7c96ae..8e74980 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt @@ -558,8 +558,7 @@ Each subsystem may export the following methods. The only mandatory methods are create/destroy. Any others that are null are presumed to be successful no-ops. -struct cgroup_subsys_state *create(struct cgroup_subsys *ss, - struct cgroup *cgrp) +struct cgroup_subsys_state *create(struct cgroup *cgrp) (cgroup_mutex held by caller) Called to create a subsystem state object for a cgroup. The @@ -574,7 +573,7 @@ identified by the passed cgroup object having a NULL parent (since it's the root of the hierarchy) and may be an appropriate place for initialization code. -void destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) +void destroy(struct cgroup *cgrp) (cgroup_mutex held by caller) The cgroup system is about to destroy the passed cgroup; the subsystem @@ -585,7 +584,7 @@ cgroup->parent is still valid. (Note - can also be called for a newly-created cgroup if an error occurs after this subsystem's create() method has been called for the new cgroup). -int pre_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp); +int pre_destroy(struct cgroup *cgrp); Called before checking the reference count on each subsystem. This may be useful for subsystems which have some extra references even if @@ -593,8 +592,7 @@ there are not tasks in the cgroup. If pre_destroy() returns error code, rmdir() will fail with it. From this behavior, pre_destroy() can be called multiple times against a cgroup. -int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup_taskset *tset) +int can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) (cgroup_mutex held by caller) Called prior to moving one or more tasks into a cgroup; if the @@ -615,8 +613,7 @@ fork. If this method returns 0 (success) then this should remain valid while the caller holds cgroup_mutex and it is ensured that either attach() or cancel_attach() will be called in future. -void cancel_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup_taskset *tset) +void cancel_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) (cgroup_mutex held by caller) Called when a task attach operation has failed after can_attach() has succeeded. @@ -625,23 +622,22 @@ function, so that the subsystem can implement a rollback. If not, not necessary. This will be called only about subsystems whose can_attach() operation have succeeded. The parameters are identical to can_attach(). -void attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup_taskset *tset) +void attach(struct cgroup *cgrp, struct cgroup_taskset *tset) (cgroup_mutex held by caller) Called after the task has been attached to the cgroup, to allow any post-attachment activity that requires memory allocations or blocking. The parameters are identical to can_attach(). -void fork(struct cgroup_subsy *ss, struct task_struct *task) +void fork(struct task_struct *task) Called when a task is forked into a cgroup. -void exit(struct cgroup_subsys *ss, struct task_struct *task) +void exit(struct task_struct *task) Called during task exit. -int populate(struct cgroup_subsys *ss, struct cgroup *cgrp) +int populate(struct cgroup *cgrp) (cgroup_mutex held by caller) Called after creation of a cgroup to allow a subsystem to populate @@ -651,7 +647,7 @@ include/linux/cgroup.h for details). Note that although this method can return an error code, the error code is currently not always handled well. -void post_clone(struct cgroup_subsys *ss, struct cgroup *cgrp) +void post_clone(struct cgroup *cgrp) (cgroup_mutex held by caller) Called during cgroup_create() to do any parameter @@ -659,7 +655,7 @@ initialization which might be required before a task could attach. For example in cpusets, no task may attach before 'cpus' and 'mems' are set up. -void bind(struct cgroup_subsys *ss, struct cgroup *root) +void bind(struct cgroup *root) (cgroup_mutex and ss->hierarchy_mutex held by caller) Called when a cgroup subsystem is rebound to a different hierarchy diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index fa8f263..1359d63 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -28,13 +28,10 @@ static LIST_HEAD(blkio_list); struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT }; EXPORT_SYMBOL_GPL(blkio_root_cgroup); -static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *, - struct cgroup *); -static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *, - struct cgroup_taskset *); -static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *, - struct cgroup_taskset *); -static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *); +static struct cgroup_subsys_state *blkiocg_create(struct cgroup *); +static int blkiocg_can_attach(struct cgroup *, struct cgroup_taskset *); +static void blkiocg_attach(struct cgroup *, struct cgroup_taskset *); +static void blkiocg_destroy(struct cgroup *); static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *); /* for encoding cft->private value on file */ @@ -1548,7 +1545,7 @@ static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup) ARRAY_SIZE(blkio_files)); } -static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) +static void blkiocg_destroy(struct cgroup *cgroup) { struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); unsigned long flags; @@ -1598,8 +1595,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) kfree(blkcg); } -static struct cgroup_subsys_state * -blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup) +static struct cgroup_subsys_state *blkiocg_create(struct cgroup *cgroup) { struct blkio_cgroup *blkcg; struct cgroup *parent = cgroup->parent; @@ -1628,8 +1624,7 @@ done: * of the main cic data structures. For now we allow a task to change * its cgroup only if it's the only owner of its ioc. */ -static int blkiocg_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup_taskset *tset) +static int blkiocg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *task; struct io_context *ioc; @@ -1648,8 +1643,7 @@ static int blkiocg_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, return ret; } -static void blkiocg_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup_taskset *tset) +static void blkiocg_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *task; struct io_context *ioc; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 7da3e74..501adb1 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -452,23 +452,18 @@ int cgroup_taskset_size(struct cgroup_taskset *tset); */ struct cgroup_subsys { - struct cgroup_subsys_state *(*create)(struct cgroup_subsys *ss, - struct cgroup *cgrp); - int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); - void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); - int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup_taskset *tset); - void (*cancel_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup_taskset *tset); - void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup_taskset *tset); - void (*fork)(struct cgroup_subsys *ss, struct task_struct *task); - void (*exit)(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup *old_cgrp, struct task_struct *task); - int (*populate)(struct cgroup_subsys *ss, - struct cgroup *cgrp); - void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp); - void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); + struct cgroup_subsys_state *(*create)(struct cgroup *cgrp); + int (*pre_destroy)(struct cgroup *cgrp); + void (*destroy)(struct cgroup *cgrp); + int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); + void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); + void (*attach)(struct cgroup *cgrp, struct cgroup_taskset *tset); + void (*fork)(struct task_struct *task); + void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp, + struct task_struct *task); + int (*populate)(struct cgroup_subsys *ss, struct cgroup *cgrp); + void (*post_clone)(struct cgroup *cgrp); + void (*bind)(struct cgroup *root); int subsys_id; int active; diff --git a/include/net/sock.h b/include/net/sock.h index bb972d2..705d1ad 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -68,7 +68,7 @@ struct cgroup; struct cgroup_subsys; #ifdef CONFIG_NET int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss); -void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss); +void mem_cgroup_sockets_destroy(struct cgroup *cgrp); #else static inline int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss) @@ -76,7 +76,7 @@ int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss) return 0; } static inline -void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss) +void mem_cgroup_sockets_destroy(struct cgroup *cgrp) { } #endif @@ -869,8 +869,7 @@ struct proto { */ int (*init_cgroup)(struct cgroup *cgrp, struct cgroup_subsys *ss); - void (*destroy_cgroup)(struct cgroup *cgrp, - struct cgroup_subsys *ss); + void (*destroy_cgroup)(struct cgroup *cgrp); struct cg_proto *(*proto_cgroup)(struct mem_cgroup *memcg); #endif }; diff --git a/include/net/tcp_memcontrol.h b/include/net/tcp_memcontrol.h index 3512082..48410ff 100644 --- a/include/net/tcp_memcontrol.h +++ b/include/net/tcp_memcontrol.h @@ -13,7 +13,7 @@ struct tcp_memcontrol { struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg); int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss); -void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss); +void tcp_destroy_cgroup(struct cgroup *cgrp); unsigned long long tcp_max_memory(const struct mem_cgroup *memcg); void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx); #endif /* _TCP_MEMCG_H */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 43a224f..865d89a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -818,7 +818,7 @@ static int cgroup_call_pre_destroy(struct cgroup *cgrp) for_each_subsys(cgrp->root, ss) if (ss->pre_destroy) { - ret = ss->pre_destroy(ss, cgrp); + ret = ss->pre_destroy(cgrp); if (ret) break; } @@ -846,7 +846,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) * Release the subsystem state objects. */ for_each_subsys(cgrp->root, ss) - ss->destroy(ss, cgrp); + ss->destroy(cgrp); cgrp->root->number_of_cgroups--; mutex_unlock(&cgroup_mutex); @@ -1015,7 +1015,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, list_move(&ss->sibling, &root->subsys_list); ss->root = root; if (ss->bind) - ss->bind(ss, cgrp); + ss->bind(cgrp); mutex_unlock(&ss->hierarchy_mutex); /* refcount was already taken, and we're keeping it */ } else if (bit & removed_bits) { @@ -1025,7 +1025,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, BUG_ON(cgrp->subsys[i]->cgroup != cgrp); mutex_lock(&ss->hierarchy_mutex); if (ss->bind) - ss->bind(ss, dummytop); + ss->bind(dummytop); dummytop->subsys[i]->cgroup = dummytop; cgrp->subsys[i] = NULL; subsys[i]->root = &rootnode; @@ -1908,7 +1908,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) for_each_subsys(root, ss) { if (ss->can_attach) { - retval = ss->can_attach(ss, cgrp, &tset); + retval = ss->can_attach(cgrp, &tset); if (retval) { /* * Remember on which subsystem the can_attach() @@ -1932,7 +1932,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) for_each_subsys(root, ss) { if (ss->attach) - ss->attach(ss, cgrp, &tset); + ss->attach(cgrp, &tset); } synchronize_rcu(); @@ -1954,7 +1954,7 @@ out: */ break; if (ss->cancel_attach) - ss->cancel_attach(ss, cgrp, &tset); + ss->cancel_attach(cgrp, &tset); } } return retval; @@ -2067,7 +2067,7 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) */ for_each_subsys(root, ss) { if (ss->can_attach) { - retval = ss->can_attach(ss, cgrp, &tset); + retval = ss->can_attach(cgrp, &tset); if (retval) { failed_ss = ss; goto out_cancel_attach; @@ -2104,7 +2104,7 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) */ for_each_subsys(root, ss) { if (ss->attach) - ss->attach(ss, cgrp, &tset); + ss->attach(cgrp, &tset); } /* @@ -2128,7 +2128,7 @@ out_cancel_attach: if (ss == failed_ss) break; if (ss->cancel_attach) - ss->cancel_attach(ss, cgrp, &tset); + ss->cancel_attach(cgrp, &tset); } } out_free_group_list: @@ -3756,7 +3756,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); for_each_subsys(root, ss) { - struct cgroup_subsys_state *css = ss->create(ss, cgrp); + struct cgroup_subsys_state *css = ss->create(cgrp); if (IS_ERR(css)) { err = PTR_ERR(css); @@ -3770,7 +3770,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, } /* At error, ->destroy() callback has to free assigned ID. */ if (clone_children(parent) && ss->post_clone) - ss->post_clone(ss, cgrp); + ss->post_clone(cgrp); } cgroup_lock_hierarchy(root); @@ -3804,7 +3804,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, for_each_subsys(root, ss) { if (cgrp->subsys[ss->subsys_id]) - ss->destroy(ss, cgrp); + ss->destroy(cgrp); } mutex_unlock(&cgroup_mutex); @@ -4028,7 +4028,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) /* Create the top cgroup state for this subsystem */ list_add(&ss->sibling, &rootnode.subsys_list); ss->root = &rootnode; - css = ss->create(ss, dummytop); + css = ss->create(dummytop); /* We don't handle early failures gracefully */ BUG_ON(IS_ERR(css)); init_cgroup_css(css, ss, dummytop); @@ -4117,7 +4117,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) * no ss->create seems to need anything important in the ss struct, so * this can happen first (i.e. before the rootnode attachment). */ - css = ss->create(ss, dummytop); + css = ss->create(dummytop); if (IS_ERR(css)) { /* failure case - need to deassign the subsys[] slot. */ subsys[i] = NULL; @@ -4135,7 +4135,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) int ret = cgroup_init_idr(ss, css); if (ret) { dummytop->subsys[ss->subsys_id] = NULL; - ss->destroy(ss, dummytop); + ss->destroy(dummytop); subsys[i] = NULL; mutex_unlock(&cgroup_mutex); return ret; @@ -4233,7 +4233,7 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) * pointer to find their state. note that this also takes care of * freeing the css_id. */ - ss->destroy(ss, dummytop); + ss->destroy(dummytop); dummytop->subsys[ss->subsys_id] = NULL; mutex_unlock(&cgroup_mutex); @@ -4509,7 +4509,7 @@ void cgroup_fork_callbacks(struct task_struct *child) for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; if (ss->fork) - ss->fork(ss, child); + ss->fork(child); } } } @@ -4611,7 +4611,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) struct cgroup *old_cgrp = rcu_dereference_raw(cg->subsys[i])->cgroup; struct cgroup *cgrp = task_cgroup(tsk, i); - ss->exit(ss, cgrp, old_cgrp, tsk); + ss->exit(cgrp, old_cgrp, tsk); } } } @@ -5066,8 +5066,7 @@ struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id) } #ifdef CONFIG_CGROUP_DEBUG -static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss, - struct cgroup *cont) +static struct cgroup_subsys_state *debug_create(struct cgroup *cont) { struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); @@ -5077,7 +5076,7 @@ static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss, return css; } -static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont) +static void debug_destroy(struct cgroup *cont) { kfree(cont->subsys[debug_subsys_id]); } diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index fc0646b..f86e939 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -128,8 +128,7 @@ struct cgroup_subsys freezer_subsys; * task->alloc_lock (inside __thaw_task(), prevents race with refrigerator()) * sighand->siglock */ -static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, - struct cgroup *cgroup) +static struct cgroup_subsys_state *freezer_create(struct cgroup *cgroup) { struct freezer *freezer; @@ -142,8 +141,7 @@ static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, return &freezer->css; } -static void freezer_destroy(struct cgroup_subsys *ss, - struct cgroup *cgroup) +static void freezer_destroy(struct cgroup *cgroup) { struct freezer *freezer = cgroup_freezer(cgroup); @@ -164,8 +162,7 @@ static bool is_task_frozen_enough(struct task_struct *task) * a write to that file racing against an attach, and hence the * can_attach() result will remain valid until the attach completes. */ -static int freezer_can_attach(struct cgroup_subsys *ss, - struct cgroup *new_cgroup, +static int freezer_can_attach(struct cgroup *new_cgroup, struct cgroup_taskset *tset) { struct freezer *freezer; @@ -185,7 +182,7 @@ static int freezer_can_attach(struct cgroup_subsys *ss, return 0; } -static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) +static void freezer_fork(struct task_struct *task) { struct freezer *freezer; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index a09ac2b..5d57583 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1399,8 +1399,7 @@ static nodemask_t cpuset_attach_nodemask_from; static nodemask_t cpuset_attach_nodemask_to; /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ -static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup_taskset *tset) +static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct cpuset *cs = cgroup_cs(cgrp); struct task_struct *task; @@ -1436,8 +1435,7 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, return 0; } -static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup_taskset *tset) +static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct mm_struct *mm; struct task_struct *task; @@ -1833,8 +1831,7 @@ static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont) * (and likewise for mems) to the new cgroup. Called with cgroup_mutex * held. */ -static void cpuset_post_clone(struct cgroup_subsys *ss, - struct cgroup *cgroup) +static void cpuset_post_clone(struct cgroup *cgroup) { struct cgroup *parent, *child; struct cpuset *cs, *parent_cs; @@ -1857,13 +1854,10 @@ static void cpuset_post_clone(struct cgroup_subsys *ss, /* * cpuset_create - create a cpuset - * ss: cpuset cgroup subsystem * cont: control group that the new cpuset will be part of */ -static struct cgroup_subsys_state *cpuset_create( - struct cgroup_subsys *ss, - struct cgroup *cont) +static struct cgroup_subsys_state *cpuset_create(struct cgroup *cont) { struct cpuset *cs; struct cpuset *parent; @@ -1902,7 +1896,7 @@ static struct cgroup_subsys_state *cpuset_create( * will call async_rebuild_sched_domains(). */ -static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont) +static void cpuset_destroy(struct cgroup *cont) { struct cpuset *cs = cgroup_cs(cont); diff --git a/kernel/events/core.c b/kernel/events/core.c index a8f4ac0..a5d1ee9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6906,8 +6906,7 @@ unlock: device_initcall(perf_event_sysfs_init); #ifdef CONFIG_CGROUP_PERF -static struct cgroup_subsys_state *perf_cgroup_create( - struct cgroup_subsys *ss, struct cgroup *cont) +static struct cgroup_subsys_state *perf_cgroup_create(struct cgroup *cont) { struct perf_cgroup *jc; @@ -6924,8 +6923,7 @@ static struct cgroup_subsys_state *perf_cgroup_create( return &jc->css; } -static void perf_cgroup_destroy(struct cgroup_subsys *ss, - struct cgroup *cont) +static void perf_cgroup_destroy(struct cgroup *cont) { struct perf_cgroup *jc; jc = container_of(cgroup_subsys_state(cont, perf_subsys_id), @@ -6941,8 +6939,7 @@ static int __perf_cgroup_move(void *info) return 0; } -static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup_taskset *tset) +static void perf_cgroup_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *task; @@ -6950,8 +6947,8 @@ static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, task_function_call(task, __perf_cgroup_move, task); } -static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup *old_cgrp, struct task_struct *task) +static void perf_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp, + struct task_struct *task) { /* * cgroup_exit() is called in the copy_process() failure path. diff --git a/kernel/sched/core.c b/kernel/sched/core.c index df00cb0..ff12f72 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7530,8 +7530,7 @@ static inline struct task_group *cgroup_tg(struct cgroup *cgrp) struct task_group, css); } -static struct cgroup_subsys_state * -cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp) +static struct cgroup_subsys_state *cpu_cgroup_create(struct cgroup *cgrp) { struct task_group *tg, *parent; @@ -7548,15 +7547,14 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp) return &tg->css; } -static void -cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) +static void cpu_cgroup_destroy(struct cgroup *cgrp) { struct task_group *tg = cgroup_tg(cgrp); sched_destroy_group(tg); } -static int cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, +static int cpu_cgroup_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *task; @@ -7574,7 +7572,7 @@ static int cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, return 0; } -static void cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, +static void cpu_cgroup_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *task; @@ -7584,8 +7582,8 @@ static void cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, } static void -cpu_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup *old_cgrp, struct task_struct *task) +cpu_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp, + struct task_struct *task) { /* * cgroup_exit() is called in the copy_process() failure path. @@ -7935,8 +7933,7 @@ struct cgroup_subsys cpu_cgroup_subsys = { */ /* create a new cpu accounting group */ -static struct cgroup_subsys_state *cpuacct_create( - struct cgroup_subsys *ss, struct cgroup *cgrp) +static struct cgroup_subsys_state *cpuacct_create(struct cgroup *cgrp) { struct cpuacct *ca; @@ -7966,8 +7963,7 @@ out: } /* destroy an existing cpu accounting group */ -static void -cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) +static void cpuacct_destroy(struct cgroup *cgrp) { struct cpuacct *ca = cgroup_ca(cgrp); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 3dbff4d..ae2f0a8 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4580,10 +4580,9 @@ static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss) return mem_cgroup_sockets_init(cont, ss); }; -static void kmem_cgroup_destroy(struct cgroup_subsys *ss, - struct cgroup *cont) +static void kmem_cgroup_destroy(struct cgroup *cont) { - mem_cgroup_sockets_destroy(cont, ss); + mem_cgroup_sockets_destroy(cont); } #else static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss) @@ -4591,8 +4590,7 @@ static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss) return 0; } -static void kmem_cgroup_destroy(struct cgroup_subsys *ss, - struct cgroup *cont) +static void kmem_cgroup_destroy(struct cgroup *cont) { } #endif @@ -4884,7 +4882,7 @@ err_cleanup: } static struct cgroup_subsys_state * __ref -mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) +mem_cgroup_create(struct cgroup *cont) { struct mem_cgroup *memcg, *parent; long error = -ENOMEM; @@ -4946,20 +4944,18 @@ free_out: return ERR_PTR(error); } -static int mem_cgroup_pre_destroy(struct cgroup_subsys *ss, - struct cgroup *cont) +static int mem_cgroup_pre_destroy(struct cgroup *cont) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); return mem_cgroup_force_empty(memcg, false); } -static void mem_cgroup_destroy(struct cgroup_subsys *ss, - struct cgroup *cont) +static void mem_cgroup_destroy(struct cgroup *cont) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); - kmem_cgroup_destroy(ss, cont); + kmem_cgroup_destroy(cont); mem_cgroup_put(memcg); } @@ -5296,9 +5292,8 @@ static void mem_cgroup_clear_mc(void) mem_cgroup_end_move(from); } -static int mem_cgroup_can_attach(struct cgroup_subsys *ss, - struct cgroup *cgroup, - struct cgroup_taskset *tset) +static int mem_cgroup_can_attach(struct cgroup *cgroup, + struct cgroup_taskset *tset) { struct task_struct *p = cgroup_taskset_first(tset); int ret = 0; @@ -5336,9 +5331,8 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss, return ret; } -static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss, - struct cgroup *cgroup, - struct cgroup_taskset *tset) +static void mem_cgroup_cancel_attach(struct cgroup *cgroup, + struct cgroup_taskset *tset) { mem_cgroup_clear_mc(); } @@ -5453,9 +5447,8 @@ retry: up_read(&mm->mmap_sem); } -static void mem_cgroup_move_task(struct cgroup_subsys *ss, - struct cgroup *cont, - struct cgroup_taskset *tset) +static void mem_cgroup_move_task(struct cgroup *cont, + struct cgroup_taskset *tset) { struct task_struct *p = cgroup_taskset_first(tset); struct mm_struct *mm = get_task_mm(p); @@ -5470,20 +5463,17 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, mem_cgroup_clear_mc(); } #else /* !CONFIG_MMU */ -static int mem_cgroup_can_attach(struct cgroup_subsys *ss, - struct cgroup *cgroup, - struct cgroup_taskset *tset) +static int mem_cgroup_can_attach(struct cgroup *cgroup, + struct cgroup_taskset *tset) { return 0; } -static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss, - struct cgroup *cgroup, - struct cgroup_taskset *tset) +static void mem_cgroup_cancel_attach(struct cgroup *cgroup, + struct cgroup_taskset *tset) { } -static void mem_cgroup_move_task(struct cgroup_subsys *ss, - struct cgroup *cont, - struct cgroup_taskset *tset) +static void mem_cgroup_move_task(struct cgroup *cont, + struct cgroup_taskset *tset) { } #endif diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 3a9fd48..22036ab 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -23,9 +23,8 @@ #include #include -static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, - struct cgroup *cgrp); -static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp); +static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp); +static void cgrp_destroy(struct cgroup *cgrp); static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp); struct cgroup_subsys net_prio_subsys = { @@ -120,8 +119,7 @@ static void update_netdev_tables(void) rtnl_unlock(); } -static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, - struct cgroup *cgrp) +static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) { struct cgroup_netprio_state *cs; int ret; @@ -145,7 +143,7 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, return &cs->css; } -static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) +static void cgrp_destroy(struct cgroup *cgrp) { struct cgroup_netprio_state *cs; struct net_device *dev; diff --git a/net/core/sock.c b/net/core/sock.c index 5c5af998..688037c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -160,19 +160,19 @@ int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss) out: list_for_each_entry_continue_reverse(proto, &proto_list, node) if (proto->destroy_cgroup) - proto->destroy_cgroup(cgrp, ss); + proto->destroy_cgroup(cgrp); mutex_unlock(&proto_list_mutex); return ret; } -void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss) +void mem_cgroup_sockets_destroy(struct cgroup *cgrp) { struct proto *proto; mutex_lock(&proto_list_mutex); list_for_each_entry_reverse(proto, &proto_list, node) if (proto->destroy_cgroup) - proto->destroy_cgroup(cgrp, ss); + proto->destroy_cgroup(cgrp); mutex_unlock(&proto_list_mutex); } #endif diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 4997878..e714c68 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -94,7 +94,7 @@ create_files: } EXPORT_SYMBOL(tcp_init_cgroup); -void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss) +void tcp_destroy_cgroup(struct cgroup *cgrp) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); struct cg_proto *cg_proto; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index f84fdc3..1afaa28 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -22,9 +22,8 @@ #include #include -static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, - struct cgroup *cgrp); -static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp); +static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp); +static void cgrp_destroy(struct cgroup *cgrp); static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp); struct cgroup_subsys net_cls_subsys = { @@ -51,8 +50,7 @@ static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p) struct cgroup_cls_state, css); } -static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, - struct cgroup *cgrp) +static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) { struct cgroup_cls_state *cs; @@ -66,7 +64,7 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, return &cs->css; } -static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) +static void cgrp_destroy(struct cgroup *cgrp) { kfree(cgrp_cls_state(cgrp)); } diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 8b5b5d8..c43a332 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -61,8 +61,8 @@ static inline struct dev_cgroup *task_devcgroup(struct task_struct *task) struct cgroup_subsys devices_subsys; -static int devcgroup_can_attach(struct cgroup_subsys *ss, - struct cgroup *new_cgrp, struct cgroup_taskset *set) +static int devcgroup_can_attach(struct cgroup *new_cgrp, + struct cgroup_taskset *set) { struct task_struct *task = cgroup_taskset_first(set); @@ -156,8 +156,7 @@ remove: /* * called from kernel/cgroup.c with cgroup_lock() held. */ -static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss, - struct cgroup *cgroup) +static struct cgroup_subsys_state *devcgroup_create(struct cgroup *cgroup) { struct dev_cgroup *dev_cgroup, *parent_dev_cgroup; struct cgroup *parent_cgroup; @@ -195,8 +194,7 @@ static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss, return &dev_cgroup->css; } -static void devcgroup_destroy(struct cgroup_subsys *ss, - struct cgroup *cgroup) +static void devcgroup_destroy(struct cgroup *cgroup) { struct dev_cgroup *dev_cgroup; struct dev_whitelist_item *wh, *tmp; -- cgit v0.10.2 From 9a4b430451bb6d8d6b7dcdfbee0e1330b7c475a6 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Feb 2012 03:37:26 +0100 Subject: cgroup: Remove wrong comment on cgroup_enable_task_cg_list() Remove the stale comment about RCU protection. Many callers (all of them?) of cgroup_enable_task_cg_list() don't seem to be in an RCU read side critical section. Besides, RCU is not helpful to protect against while_each_thread(). Signed-off-by: Frederic Weisbecker Acked-by: Li Zefan Signed-off-by: Tejun Heo Cc: Mandeep Singh Baines Cc: Oleg Nesterov Cc: Andrew Morton Cc: Paul E. McKenney diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 865d89a..6e4eb43 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2701,9 +2701,6 @@ static void cgroup_advance_iter(struct cgroup *cgrp, * using their cgroups capability, we don't maintain the lists running * through each css_set to its tasks until we see the list actually * used - in other words after the first call to cgroup_iter_start(). - * - * The tasklist_lock is not held here, as do_each_thread() and - * while_each_thread() are protected by RCU. */ static void cgroup_enable_task_cg_lists(void) { -- cgit v0.10.2 From 3ce3230a0cff484e5130153f244d4fb8a56b3a8b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 8 Feb 2012 03:37:27 +0100 Subject: cgroup: Walk task list under tasklist_lock in cgroup_enable_task_cg_list Walking through the tasklist in cgroup_enable_task_cg_list() inside an RCU read side critical section is not enough because: - RCU is not (yet) safe against while_each_thread() - If we use only RCU, a forking task that has passed cgroup_post_fork() without seeing use_task_css_set_links == 1 is not guaranteed to have its child immediately visible in the tasklist if we walk through it remotely with RCU. In this case it will be missing in its css_set's task list. Thus we need to traverse the list (unfortunately) under the tasklist_lock. It makes us safe against while_each_thread() and also make sure we see all forked task that have been added to the tasklist. As a secondary effect, reading and writing use_task_css_set_links are now well ordered against tasklist traversing and modification. The new layout is: CPU 0 CPU 1 use_task_css_set_links = 1 write_lock(tasklist_lock) read_lock(tasklist_lock) add task to tasklist do_each_thread() { write_unlock(tasklist_lock) add thread to css set links if (use_task_css_set_links) } while_each_thread() add thread to css set links read_unlock(tasklist_lock) If CPU 0 traverse the list after the task has been added to the tasklist then it is correctly added to the css set links. OTOH if CPU 0 traverse the tasklist before the new task had the opportunity to be added to the tasklist because it was too early in the fork process, then CPU 1 catches up and add the task to the css set links after it added the task to the tasklist. The right value of use_task_css_set_links is guaranteed to be visible from CPU 1 due to the LOCK/UNLOCK implicit barrier properties: the read_unlock on CPU 0 makes the write on use_task_css_set_links happening and the write_lock on CPU 1 make the read of use_task_css_set_links that comes afterward to return the correct value. Signed-off-by: Frederic Weisbecker Acked-by: Li Zefan Signed-off-by: Tejun Heo Cc: Mandeep Singh Baines Cc: Oleg Nesterov Cc: Andrew Morton Cc: Paul E. McKenney diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 6e4eb43..c6877fe 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2707,6 +2707,14 @@ static void cgroup_enable_task_cg_lists(void) struct task_struct *p, *g; write_lock(&css_set_lock); use_task_css_set_links = 1; + /* + * We need tasklist_lock because RCU is not safe against + * while_each_thread(). Besides, a forking task that has passed + * cgroup_post_fork() without seeing use_task_css_set_links = 1 + * is not guaranteed to have its child immediately visible in the + * tasklist if we walk through it with RCU. + */ + read_lock(&tasklist_lock); do_each_thread(g, p) { task_lock(p); /* @@ -2718,6 +2726,7 @@ static void cgroup_enable_task_cg_lists(void) list_add(&p->cg_list, &p->cgroups->tasks); task_unlock(p); } while_each_thread(g, p); + read_unlock(&tasklist_lock); write_unlock(&css_set_lock); } @@ -4522,6 +4531,17 @@ void cgroup_fork_callbacks(struct task_struct *child) */ void cgroup_post_fork(struct task_struct *child) { + /* + * use_task_css_set_links is set to 1 before we walk the tasklist + * under the tasklist_lock and we read it here after we added the child + * to the tasklist under the tasklist_lock as well. If the child wasn't + * yet in the tasklist when we walked through it from + * cgroup_enable_task_cg_lists(), then use_task_css_set_links value + * should be visible now due to the paired locking and barriers implied + * by LOCK/UNLOCK: it is written before the tasklist_lock unlock + * in cgroup_enable_task_cg_lists() and read here after the tasklist_lock + * lock on fork. + */ if (use_task_css_set_links) { write_lock(&css_set_lock); if (list_empty(&child->cg_list)) { -- cgit v0.10.2