@@ -1270,7 +1270,7 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
12701270 ((node = hstate_next_node_to_free(hs, mask)) || 1); \
12711271 nr_nodes--)
12721272
1273- #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
1273+ /* used to demote non-gigantic_huge pages as well */
12741274static void __destroy_compound_gigantic_page (struct page * page ,
12751275 unsigned int order , bool demote )
12761276{
@@ -1293,6 +1293,13 @@ static void __destroy_compound_gigantic_page(struct page *page,
12931293 __ClearPageHead (page );
12941294}
12951295
1296+ static void destroy_compound_hugetlb_page_for_demote (struct page * page ,
1297+ unsigned int order )
1298+ {
1299+ __destroy_compound_gigantic_page (page , order , true);
1300+ }
1301+
1302+ #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
12961303static void destroy_compound_gigantic_page (struct page * page ,
12971304 unsigned int order )
12981305{
@@ -1438,6 +1445,12 @@ static void remove_hugetlb_page(struct hstate *h, struct page *page,
14381445 __remove_hugetlb_page (h , page , adjust_surplus , false);
14391446}
14401447
1448+ static void remove_hugetlb_page_for_demote (struct hstate * h , struct page * page ,
1449+ bool adjust_surplus )
1450+ {
1451+ __remove_hugetlb_page (h , page , adjust_surplus , true);
1452+ }
1453+
14411454static void add_hugetlb_page (struct hstate * h , struct page * page ,
14421455 bool adjust_surplus )
14431456{
@@ -1779,6 +1792,12 @@ static bool prep_compound_gigantic_page(struct page *page, unsigned int order)
17791792 return __prep_compound_gigantic_page (page , order , false);
17801793}
17811794
1795+ static bool prep_compound_gigantic_page_for_demote (struct page * page ,
1796+ unsigned int order )
1797+ {
1798+ return __prep_compound_gigantic_page (page , order , true);
1799+ }
1800+
17821801/*
17831802 * PageHuge() only returns true for hugetlbfs pages, but not for normal or
17841803 * transparent huge pages. See the PageTransHuge() documentation for more
@@ -3304,9 +3323,72 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
33043323 return 0 ;
33053324}
33063325
3326+ static int demote_free_huge_page (struct hstate * h , struct page * page )
3327+ {
3328+ int i , nid = page_to_nid (page );
3329+ struct hstate * target_hstate ;
3330+ int rc = 0 ;
3331+
3332+ target_hstate = size_to_hstate (PAGE_SIZE << h -> demote_order );
3333+
3334+ remove_hugetlb_page_for_demote (h , page , false);
3335+ spin_unlock_irq (& hugetlb_lock );
3336+
3337+ rc = alloc_huge_page_vmemmap (h , page );
3338+ if (rc ) {
3339+ /* Allocation of vmemmmap failed, we can not demote page */
3340+ spin_lock_irq (& hugetlb_lock );
3341+ set_page_refcounted (page );
3342+ add_hugetlb_page (h , page , false);
3343+ return rc ;
3344+ }
3345+
3346+ /*
3347+ * Use destroy_compound_hugetlb_page_for_demote for all huge page
3348+ * sizes as it will not ref count pages.
3349+ */
3350+ destroy_compound_hugetlb_page_for_demote (page , huge_page_order (h ));
3351+
3352+ /*
3353+ * Taking target hstate mutex synchronizes with set_max_huge_pages.
3354+ * Without the mutex, pages added to target hstate could be marked
3355+ * as surplus.
3356+ *
3357+ * Note that we already hold h->resize_lock. To prevent deadlock,
3358+ * use the convention of always taking larger size hstate mutex first.
3359+ */
3360+ mutex_lock (& target_hstate -> resize_lock );
3361+ for (i = 0 ; i < pages_per_huge_page (h );
3362+ i += pages_per_huge_page (target_hstate )) {
3363+ if (hstate_is_gigantic (target_hstate ))
3364+ prep_compound_gigantic_page_for_demote (page + i ,
3365+ target_hstate -> order );
3366+ else
3367+ prep_compound_page (page + i , target_hstate -> order );
3368+ set_page_private (page + i , 0 );
3369+ set_page_refcounted (page + i );
3370+ prep_new_huge_page (target_hstate , page + i , nid );
3371+ put_page (page + i );
3372+ }
3373+ mutex_unlock (& target_hstate -> resize_lock );
3374+
3375+ spin_lock_irq (& hugetlb_lock );
3376+
3377+ /*
3378+ * Not absolutely necessary, but for consistency update max_huge_pages
3379+ * based on pool changes for the demoted page.
3380+ */
3381+ h -> max_huge_pages -- ;
3382+ target_hstate -> max_huge_pages += pages_per_huge_page (h );
3383+
3384+ return rc ;
3385+ }
3386+
33073387static int demote_pool_huge_page (struct hstate * h , nodemask_t * nodes_allowed )
33083388 __must_hold (& hugetlb_lock )
33093389{
3390+ int nr_nodes , node ;
3391+ struct page * page ;
33103392 int rc = 0 ;
33113393
33123394 lockdep_assert_held (& hugetlb_lock );
@@ -3317,9 +3399,15 @@ static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
33173399 return - EINVAL ; /* internal error */
33183400 }
33193401
3320- /*
3321- * TODO - demote fucntionality will be added in subsequent patch
3322- */
3402+ for_each_node_mask_to_free (h , nr_nodes , node , nodes_allowed ) {
3403+ if (!list_empty (& h -> hugepage_freelists [node ])) {
3404+ page = list_entry (h -> hugepage_freelists [node ].next ,
3405+ struct page , lru );
3406+ rc = demote_free_huge_page (h , page );
3407+ break ;
3408+ }
3409+ }
3410+
33233411 return rc ;
33243412}
33253413
@@ -3554,10 +3642,6 @@ static ssize_t demote_store(struct kobject *kobj,
35543642 /*
35553643 * Check for available pages to demote each time thorough the
35563644 * loop as demote_pool_huge_page will drop hugetlb_lock.
3557- *
3558- * NOTE: demote_pool_huge_page does not yet drop hugetlb_lock
3559- * but will when full demote functionality is added in a later
3560- * patch.
35613645 */
35623646 if (nid != NUMA_NO_NODE )
35633647 nr_available = h -> free_huge_pages_node [nid ];
0 commit comments