@@ -1132,7 +1132,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
11321132 if (css == & root -> css )
11331133 break ;
11341134
1135- if (css_tryget_online (css )) {
1135+ if (css_tryget (css )) {
11361136 /*
11371137 * Make sure the memcg is initialized:
11381138 * mem_cgroup_css_online() orders the the
@@ -3316,79 +3316,6 @@ static int mem_cgroup_move_account(struct page *page,
33163316 return ret ;
33173317}
33183318
3319- /**
3320- * mem_cgroup_move_parent - moves page to the parent group
3321- * @page: the page to move
3322- * @pc: page_cgroup of the page
3323- * @child: page's cgroup
3324- *
3325- * move charges to its parent or the root cgroup if the group has no
3326- * parent (aka use_hierarchy==0).
3327- * Although this might fail (get_page_unless_zero, isolate_lru_page or
3328- * mem_cgroup_move_account fails) the failure is always temporary and
3329- * it signals a race with a page removal/uncharge or migration. In the
3330- * first case the page is on the way out and it will vanish from the LRU
3331- * on the next attempt and the call should be retried later.
3332- * Isolation from the LRU fails only if page has been isolated from
3333- * the LRU since we looked at it and that usually means either global
3334- * reclaim or migration going on. The page will either get back to the
3335- * LRU or vanish.
3336- * Finaly mem_cgroup_move_account fails only if the page got uncharged
3337- * (!PageCgroupUsed) or moved to a different group. The page will
3338- * disappear in the next attempt.
3339- */
3340- static int mem_cgroup_move_parent (struct page * page ,
3341- struct page_cgroup * pc ,
3342- struct mem_cgroup * child )
3343- {
3344- struct mem_cgroup * parent ;
3345- unsigned int nr_pages ;
3346- unsigned long uninitialized_var (flags );
3347- int ret ;
3348-
3349- VM_BUG_ON (mem_cgroup_is_root (child ));
3350-
3351- ret = - EBUSY ;
3352- if (!get_page_unless_zero (page ))
3353- goto out ;
3354- if (isolate_lru_page (page ))
3355- goto put ;
3356-
3357- nr_pages = hpage_nr_pages (page );
3358-
3359- parent = parent_mem_cgroup (child );
3360- /*
3361- * If no parent, move charges to root cgroup.
3362- */
3363- if (!parent )
3364- parent = root_mem_cgroup ;
3365-
3366- if (nr_pages > 1 ) {
3367- VM_BUG_ON_PAGE (!PageTransHuge (page ), page );
3368- flags = compound_lock_irqsave (page );
3369- }
3370-
3371- ret = mem_cgroup_move_account (page , nr_pages ,
3372- pc , child , parent );
3373- if (!ret ) {
3374- if (!mem_cgroup_is_root (parent ))
3375- css_get_many (& parent -> css , nr_pages );
3376- /* Take charge off the local counters */
3377- page_counter_cancel (& child -> memory , nr_pages );
3378- if (do_swap_account )
3379- page_counter_cancel (& child -> memsw , nr_pages );
3380- css_put_many (& child -> css , nr_pages );
3381- }
3382-
3383- if (nr_pages > 1 )
3384- compound_unlock_irqrestore (page , flags );
3385- putback_lru_page (page );
3386- put :
3387- put_page (page );
3388- out :
3389- return ret ;
3390- }
3391-
33923319#ifdef CONFIG_MEMCG_SWAP
33933320static void mem_cgroup_swap_statistics (struct mem_cgroup * memcg ,
33943321 bool charge )
@@ -3682,105 +3609,6 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
36823609 return nr_reclaimed ;
36833610}
36843611
3685- /**
3686- * mem_cgroup_force_empty_list - clears LRU of a group
3687- * @memcg: group to clear
3688- * @node: NUMA node
3689- * @zid: zone id
3690- * @lru: lru to to clear
3691- *
3692- * Traverse a specified page_cgroup list and try to drop them all. This doesn't
3693- * reclaim the pages page themselves - pages are moved to the parent (or root)
3694- * group.
3695- */
3696- static void mem_cgroup_force_empty_list (struct mem_cgroup * memcg ,
3697- int node , int zid , enum lru_list lru )
3698- {
3699- struct lruvec * lruvec ;
3700- unsigned long flags ;
3701- struct list_head * list ;
3702- struct page * busy ;
3703- struct zone * zone ;
3704-
3705- zone = & NODE_DATA (node )-> node_zones [zid ];
3706- lruvec = mem_cgroup_zone_lruvec (zone , memcg );
3707- list = & lruvec -> lists [lru ];
3708-
3709- busy = NULL ;
3710- do {
3711- struct page_cgroup * pc ;
3712- struct page * page ;
3713-
3714- spin_lock_irqsave (& zone -> lru_lock , flags );
3715- if (list_empty (list )) {
3716- spin_unlock_irqrestore (& zone -> lru_lock , flags );
3717- break ;
3718- }
3719- page = list_entry (list -> prev , struct page , lru );
3720- if (busy == page ) {
3721- list_move (& page -> lru , list );
3722- busy = NULL ;
3723- spin_unlock_irqrestore (& zone -> lru_lock , flags );
3724- continue ;
3725- }
3726- spin_unlock_irqrestore (& zone -> lru_lock , flags );
3727-
3728- pc = lookup_page_cgroup (page );
3729-
3730- if (mem_cgroup_move_parent (page , pc , memcg )) {
3731- /* found lock contention or "pc" is obsolete. */
3732- busy = page ;
3733- } else
3734- busy = NULL ;
3735- cond_resched ();
3736- } while (!list_empty (list ));
3737- }
3738-
3739- /*
3740- * make mem_cgroup's charge to be 0 if there is no task by moving
3741- * all the charges and pages to the parent.
3742- * This enables deleting this mem_cgroup.
3743- *
3744- * Caller is responsible for holding css reference on the memcg.
3745- */
3746- static void mem_cgroup_reparent_charges (struct mem_cgroup * memcg )
3747- {
3748- int node , zid ;
3749-
3750- do {
3751- /* This is for making all *used* pages to be on LRU. */
3752- lru_add_drain_all ();
3753- drain_all_stock_sync (memcg );
3754- mem_cgroup_start_move (memcg );
3755- for_each_node_state (node , N_MEMORY ) {
3756- for (zid = 0 ; zid < MAX_NR_ZONES ; zid ++ ) {
3757- enum lru_list lru ;
3758- for_each_lru (lru ) {
3759- mem_cgroup_force_empty_list (memcg ,
3760- node , zid , lru );
3761- }
3762- }
3763- }
3764- mem_cgroup_end_move (memcg );
3765- memcg_oom_recover (memcg );
3766- cond_resched ();
3767-
3768- /*
3769- * Kernel memory may not necessarily be trackable to a specific
3770- * process. So they are not migrated, and therefore we can't
3771- * expect their value to drop to 0 here.
3772- * Having res filled up with kmem only is enough.
3773- *
3774- * This is a safety check because mem_cgroup_force_empty_list
3775- * could have raced with mem_cgroup_replace_page_cache callers
3776- * so the lru seemed empty but the page could have been added
3777- * right after the check. RES_USAGE should be safe as we always
3778- * charge before adding to the LRU.
3779- */
3780- } while (page_counter_read (& memcg -> memory ) -
3781- page_counter_read (& memcg -> kmem ) > 0 );
3782- }
3783-
37843612/*
37853613 * Test whether @memcg has children, dead or alive. Note that this
37863614 * function doesn't care whether @memcg has use_hierarchy enabled and
@@ -5323,7 +5151,6 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
53235151{
53245152 struct mem_cgroup * memcg = mem_cgroup_from_css (css );
53255153 struct mem_cgroup_event * event , * tmp ;
5326- struct cgroup_subsys_state * iter ;
53275154
53285155 /*
53295156 * Unregister events and notify userspace.
@@ -5337,56 +5164,13 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
53375164 }
53385165 spin_unlock (& memcg -> event_list_lock );
53395166
5340- /*
5341- * This requires that offlining is serialized. Right now that is
5342- * guaranteed because css_killed_work_fn() holds the cgroup_mutex.
5343- */
5344- css_for_each_descendant_post (iter , css )
5345- mem_cgroup_reparent_charges (mem_cgroup_from_css (iter ));
5346-
53475167 memcg_unregister_all_caches (memcg );
53485168 vmpressure_cleanup (& memcg -> vmpressure );
53495169}
53505170
53515171static void mem_cgroup_css_free (struct cgroup_subsys_state * css )
53525172{
53535173 struct mem_cgroup * memcg = mem_cgroup_from_css (css );
5354- /*
5355- * XXX: css_offline() would be where we should reparent all
5356- * memory to prepare the cgroup for destruction. However,
5357- * memcg does not do css_tryget_online() and page_counter charging
5358- * under the same RCU lock region, which means that charging
5359- * could race with offlining. Offlining only happens to
5360- * cgroups with no tasks in them but charges can show up
5361- * without any tasks from the swapin path when the target
5362- * memcg is looked up from the swapout record and not from the
5363- * current task as it usually is. A race like this can leak
5364- * charges and put pages with stale cgroup pointers into
5365- * circulation:
5366- *
5367- * #0 #1
5368- * lookup_swap_cgroup_id()
5369- * rcu_read_lock()
5370- * mem_cgroup_lookup()
5371- * css_tryget_online()
5372- * rcu_read_unlock()
5373- * disable css_tryget_online()
5374- * call_rcu()
5375- * offline_css()
5376- * reparent_charges()
5377- * page_counter_try_charge()
5378- * css_put()
5379- * css_free()
5380- * pc->mem_cgroup = dead memcg
5381- * add page to lru
5382- *
5383- * The bulk of the charges are still moved in offline_css() to
5384- * avoid pinning a lot of pages in case a long-term reference
5385- * like a swapout record is deferring the css_free() to long
5386- * after offlining. But this makes sure we catch any charges
5387- * made after offlining:
5388- */
5389- mem_cgroup_reparent_charges (memcg );
53905174
53915175 memcg_destroy_kmem (memcg );
53925176 __mem_cgroup_free (memcg );
0 commit comments