Skip to content

Commit ea15f8c

Browse files
committed
cgroup: split cgroup destruction into two steps
Split cgroup_destroy_locked() into two steps and put the latter half into cgroup_offline_fn() which is executed from a work item. The latter half is responsible for offlining the css's, removing the cgroup from internal lists, and propagating release notification to the parent. The separation is to allow using percpu refcnt for css. Note that this allows for other cgroup operations to happen between the first and second halves of destruction, including creating a new cgroup with the same name. As the target cgroup is marked DEAD in the first half and cgroup internals don't care about the names of cgroups, this should be fine. A comment explaining this will be added by the next patch which implements the actual percpu refcnting. As RCU freeing is guaranteed to happen after the second step of destruction, we can use the same work item for both. This patch renames cgroup->free_work to ->destroy_work and uses it for both purposes. INIT_WORK() is now performed right before queueing the work item. Signed-off-by: Tejun Heo <[email protected]> Acked-by: Li Zefan <[email protected]>
1 parent 455050d commit ea15f8c

File tree

2 files changed

+28
-12
lines changed

2 files changed

+28
-12
lines changed

include/linux/cgroup.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ struct cgroup {
233233

234234
/* For RCU-protected deletion */
235235
struct rcu_head rcu_head;
236-
struct work_struct free_work;
236+
struct work_struct destroy_work;
237237

238238
/* List of events which userspace want to receive */
239239
struct list_head event_list;

kernel/cgroup.c

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ static struct cgroup_name root_cgroup_name = { .name = "/" };
208208
*/
209209
static int need_forkexit_callback __read_mostly;
210210

211+
static void cgroup_offline_fn(struct work_struct *work);
211212
static int cgroup_destroy_locked(struct cgroup *cgrp);
212213
static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
213214
struct cftype cfts[], bool is_add);
@@ -830,7 +831,7 @@ static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry)
830831

831832
static void cgroup_free_fn(struct work_struct *work)
832833
{
833-
struct cgroup *cgrp = container_of(work, struct cgroup, free_work);
834+
struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
834835
struct cgroup_subsys *ss;
835836

836837
mutex_lock(&cgroup_mutex);
@@ -875,7 +876,8 @@ static void cgroup_free_rcu(struct rcu_head *head)
875876
{
876877
struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head);
877878

878-
schedule_work(&cgrp->free_work);
879+
INIT_WORK(&cgrp->destroy_work, cgroup_free_fn);
880+
schedule_work(&cgrp->destroy_work);
879881
}
880882

881883
static void cgroup_diput(struct dentry *dentry, struct inode *inode)
@@ -1407,7 +1409,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
14071409
INIT_LIST_HEAD(&cgrp->allcg_node);
14081410
INIT_LIST_HEAD(&cgrp->release_list);
14091411
INIT_LIST_HEAD(&cgrp->pidlists);
1410-
INIT_WORK(&cgrp->free_work, cgroup_free_fn);
14111412
mutex_init(&cgrp->pidlist_mutex);
14121413
INIT_LIST_HEAD(&cgrp->event_list);
14131414
spin_lock_init(&cgrp->event_list_lock);
@@ -2991,12 +2992,13 @@ struct cgroup *cgroup_next_sibling(struct cgroup *pos)
29912992
/*
29922993
* @pos could already have been removed. Once a cgroup is removed,
29932994
* its ->sibling.next is no longer updated when its next sibling
2994-
* changes. As CGRP_DEAD is set on removal which is fully
2995-
* serialized, if we see it unasserted, it's guaranteed that the
2996-
* next sibling hasn't finished its grace period even if it's
2997-
* already removed, and thus safe to dereference from this RCU
2998-
* critical section. If ->sibling.next is inaccessible,
2999-
* cgroup_is_dead() is guaranteed to be visible as %true here.
2995+
* changes. As CGRP_DEAD assertion is serialized and happens
2996+
* before the cgroup is taken off the ->sibling list, if we see it
2997+
* unasserted, it's guaranteed that the next sibling hasn't
2998+
* finished its grace period even if it's already removed, and thus
2999+
* safe to dereference from this RCU critical section. If
3000+
* ->sibling.next is inaccessible, cgroup_is_dead() is guaranteed
3001+
* to be visible as %true here.
30003002
*/
30013003
if (likely(!cgroup_is_dead(pos))) {
30023004
next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling);
@@ -4359,7 +4361,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
43594361
__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
43604362
{
43614363
struct dentry *d = cgrp->dentry;
4362-
struct cgroup *parent = cgrp->parent;
43634364
struct cgroup_event *event, *tmp;
43644365
struct cgroup_subsys *ss;
43654366
bool empty;
@@ -4423,6 +4424,21 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
44234424
}
44244425
spin_unlock(&cgrp->event_list_lock);
44254426

4427+
INIT_WORK(&cgrp->destroy_work, cgroup_offline_fn);
4428+
schedule_work(&cgrp->destroy_work);
4429+
4430+
return 0;
4431+
};
4432+
4433+
static void cgroup_offline_fn(struct work_struct *work)
4434+
{
4435+
struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
4436+
struct cgroup *parent = cgrp->parent;
4437+
struct dentry *d = cgrp->dentry;
4438+
struct cgroup_subsys *ss;
4439+
4440+
mutex_lock(&cgroup_mutex);
4441+
44264442
/* tell subsystems to initate destruction */
44274443
for_each_subsys(cgrp->root, ss)
44284444
offline_css(ss, cgrp);
@@ -4446,7 +4462,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
44464462
set_bit(CGRP_RELEASABLE, &parent->flags);
44474463
check_for_release(parent);
44484464

4449-
return 0;
4465+
mutex_unlock(&cgroup_mutex);
44504466
}
44514467

44524468
static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)

0 commit comments

Comments
 (0)