Skip to content

Commit 6b251fc

Browse files
aagittorvalds
authored andcommitted
userfaultfd: call handle_userfault() for userfaultfd_missing() faults
This is where the page faults must be modified to call handle_userfault() if userfaultfd_missing() is true (so if the vma->vm_flags had VM_UFFD_MISSING set). handle_userfault() then takes care of blocking the page fault and delivering it to userland. The fault flags must also be passed as parameter so the "read|write" kind of fault can be passed to userland. Signed-off-by: Andrea Arcangeli <[email protected]> Acked-by: Pavel Emelyanov <[email protected]> Cc: Sanidhya Kashyap <[email protected]> Cc: [email protected] Cc: "Kirill A. Shutemov" <[email protected]> Cc: Andres Lagar-Cavilla <[email protected]> Cc: Dave Hansen <[email protected]> Cc: Paolo Bonzini <[email protected]> Cc: Rik van Riel <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Hugh Dickins <[email protected]> Cc: Peter Feiner <[email protected]> Cc: "Dr. David Alan Gilbert" <[email protected]> Cc: Johannes Weiner <[email protected]> Cc: "Huangpeng (Peter)" <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 16ba6f8 commit 6b251fc

File tree

2 files changed

+63
-22
lines changed

2 files changed

+63
-22
lines changed

mm/huge_memory.c

Lines changed: 47 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <linux/pagemap.h>
2424
#include <linux/migrate.h>
2525
#include <linux/hashtable.h>
26+
#include <linux/userfaultfd_k.h>
2627

2728
#include <asm/tlb.h>
2829
#include <asm/pgalloc.h>
@@ -717,20 +718,25 @@ static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot)
717718
static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
718719
struct vm_area_struct *vma,
719720
unsigned long haddr, pmd_t *pmd,
720-
struct page *page, gfp_t gfp)
721+
struct page *page, gfp_t gfp,
722+
unsigned int flags)
721723
{
722724
struct mem_cgroup *memcg;
723725
pgtable_t pgtable;
724726
spinlock_t *ptl;
725727

726728
VM_BUG_ON_PAGE(!PageCompound(page), page);
727729

728-
if (mem_cgroup_try_charge(page, mm, gfp, &memcg))
729-
return VM_FAULT_OOM;
730+
if (mem_cgroup_try_charge(page, mm, gfp, &memcg)) {
731+
put_page(page);
732+
count_vm_event(THP_FAULT_FALLBACK);
733+
return VM_FAULT_FALLBACK;
734+
}
730735

731736
pgtable = pte_alloc_one(mm, haddr);
732737
if (unlikely(!pgtable)) {
733738
mem_cgroup_cancel_charge(page, memcg);
739+
put_page(page);
734740
return VM_FAULT_OOM;
735741
}
736742

@@ -750,6 +756,21 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
750756
pte_free(mm, pgtable);
751757
} else {
752758
pmd_t entry;
759+
760+
/* Deliver the page fault to userland */
761+
if (userfaultfd_missing(vma)) {
762+
int ret;
763+
764+
spin_unlock(ptl);
765+
mem_cgroup_cancel_charge(page, memcg);
766+
put_page(page);
767+
pte_free(mm, pgtable);
768+
ret = handle_userfault(vma, haddr, flags,
769+
VM_UFFD_MISSING);
770+
VM_BUG_ON(ret & VM_FAULT_FALLBACK);
771+
return ret;
772+
}
773+
753774
entry = mk_huge_pmd(page, vma->vm_page_prot);
754775
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
755776
page_add_new_anon_rmap(page, vma, haddr);
@@ -760,6 +781,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
760781
add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
761782
atomic_long_inc(&mm->nr_ptes);
762783
spin_unlock(ptl);
784+
count_vm_event(THP_FAULT_ALLOC);
763785
}
764786

765787
return 0;
@@ -771,19 +793,16 @@ static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp)
771793
}
772794

773795
/* Caller must hold page table lock. */
774-
static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
796+
static void set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
775797
struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd,
776798
struct page *zero_page)
777799
{
778800
pmd_t entry;
779-
if (!pmd_none(*pmd))
780-
return false;
781801
entry = mk_pmd(zero_page, vma->vm_page_prot);
782802
entry = pmd_mkhuge(entry);
783803
pgtable_trans_huge_deposit(mm, pmd, pgtable);
784804
set_pmd_at(mm, haddr, pmd, entry);
785805
atomic_long_inc(&mm->nr_ptes);
786-
return true;
787806
}
788807

789808
int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -806,6 +825,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
806825
pgtable_t pgtable;
807826
struct page *zero_page;
808827
bool set;
828+
int ret;
809829
pgtable = pte_alloc_one(mm, haddr);
810830
if (unlikely(!pgtable))
811831
return VM_FAULT_OOM;
@@ -816,29 +836,36 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
816836
return VM_FAULT_FALLBACK;
817837
}
818838
ptl = pmd_lock(mm, pmd);
819-
set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
820-
zero_page);
821-
spin_unlock(ptl);
839+
ret = 0;
840+
set = false;
841+
if (pmd_none(*pmd)) {
842+
if (userfaultfd_missing(vma)) {
843+
spin_unlock(ptl);
844+
ret = handle_userfault(vma, haddr, flags,
845+
VM_UFFD_MISSING);
846+
VM_BUG_ON(ret & VM_FAULT_FALLBACK);
847+
} else {
848+
set_huge_zero_page(pgtable, mm, vma,
849+
haddr, pmd,
850+
zero_page);
851+
spin_unlock(ptl);
852+
set = true;
853+
}
854+
} else
855+
spin_unlock(ptl);
822856
if (!set) {
823857
pte_free(mm, pgtable);
824858
put_huge_zero_page();
825859
}
826-
return 0;
860+
return ret;
827861
}
828862
gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0);
829863
page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
830864
if (unlikely(!page)) {
831865
count_vm_event(THP_FAULT_FALLBACK);
832866
return VM_FAULT_FALLBACK;
833867
}
834-
if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page, gfp))) {
835-
put_page(page);
836-
count_vm_event(THP_FAULT_FALLBACK);
837-
return VM_FAULT_FALLBACK;
838-
}
839-
840-
count_vm_event(THP_FAULT_ALLOC);
841-
return 0;
868+
return __do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page, gfp, flags);
842869
}
843870

844871
int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -873,16 +900,14 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
873900
*/
874901
if (is_huge_zero_pmd(pmd)) {
875902
struct page *zero_page;
876-
bool set;
877903
/*
878904
* get_huge_zero_page() will never allocate a new page here,
879905
* since we already have a zero page to copy. It just takes a
880906
* reference.
881907
*/
882908
zero_page = get_huge_zero_page();
883-
set = set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd,
909+
set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd,
884910
zero_page);
885-
BUG_ON(!set); /* unexpected !pmd_none(dst_pmd) */
886911
ret = 0;
887912
goto out_unlock;
888913
}

mm/memory.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
#include <linux/string.h>
6262
#include <linux/dma-debug.h>
6363
#include <linux/debugfs.h>
64+
#include <linux/userfaultfd_k.h>
6465

6566
#include <asm/io.h>
6667
#include <asm/pgalloc.h>
@@ -2685,6 +2686,12 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
26852686
page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
26862687
if (!pte_none(*page_table))
26872688
goto unlock;
2689+
/* Deliver the page fault to userland, check inside PT lock */
2690+
if (userfaultfd_missing(vma)) {
2691+
pte_unmap_unlock(page_table, ptl);
2692+
return handle_userfault(vma, address, flags,
2693+
VM_UFFD_MISSING);
2694+
}
26882695
goto setpte;
26892696
}
26902697

@@ -2713,6 +2720,15 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
27132720
if (!pte_none(*page_table))
27142721
goto release;
27152722

2723+
/* Deliver the page fault to userland, check inside PT lock */
2724+
if (userfaultfd_missing(vma)) {
2725+
pte_unmap_unlock(page_table, ptl);
2726+
mem_cgroup_cancel_charge(page, memcg);
2727+
page_cache_release(page);
2728+
return handle_userfault(vma, address, flags,
2729+
VM_UFFD_MISSING);
2730+
}
2731+
27162732
inc_mm_counter_fast(mm, MM_ANONPAGES);
27172733
page_add_new_anon_rmap(page, vma, address);
27182734
mem_cgroup_commit_charge(page, memcg, false);

0 commit comments

Comments
 (0)