From 912efa17e5121693dfbadae29768f4144a3f9e62 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 27 Jan 2021 23:53:42 +0000 Subject: [PATCH 1/6] mm: proc: Invalidate TLB after clearing soft-dirty page state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 0758cd830494 ("asm-generic/tlb: avoid potential double flush"), TLB invalidation is elided in tlb_finish_mmu() if no entries were batched via the tlb_remove_*() functions. Consequently, the page-table modifications performed by clear_refs_write() in response to a write to /proc//clear_refs do not perform TLB invalidation. Although this is fine when simply aging the ptes, in the case of clearing the "soft-dirty" state we can end up with entries where pte_write() is false, yet a writable mapping remains in the TLB. Fix this by avoiding the mmu_gather API altogether: managing both the 'tlb_flush_pending' flag on the 'mm_struct' and explicit TLB invalidation for the sort-dirty path, much like mprotect() does already. Fixes: 0758cd830494 ("asm-generic/tlb: avoid potential double flush”) Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Yu Zhao Acked-by: Peter Zijlstra (Intel) Acked-by: Linus Torvalds Link: https://lkml.kernel.org/r/20210127235347.1402-2-will@kernel.org --- fs/proc/task_mmu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 602e3a52884d8..3cec6fbef725e 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1210,7 +1210,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, struct mm_struct *mm; struct vm_area_struct *vma; enum clear_refs_types type; - struct mmu_gather tlb; int itype; int rv; @@ -1249,7 +1248,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, goto out_unlock; } - tlb_gather_mmu(&tlb, mm, 0, -1); if (type == CLEAR_REFS_SOFT_DIRTY) { for (vma = mm->mmap; vma; vma = vma->vm_next) { if (!(vma->vm_flags & VM_SOFTDIRTY)) @@ -1258,15 +1256,18 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, vma_set_page_prot(vma); } + inc_tlb_flush_pending(mm); mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY, 0, NULL, mm, 0, -1UL); mmu_notifier_invalidate_range_start(&range); } walk_page_range(mm, 0, mm->highest_vm_end, &clear_refs_walk_ops, &cp); - if (type == CLEAR_REFS_SOFT_DIRTY) + if (type == CLEAR_REFS_SOFT_DIRTY) { mmu_notifier_invalidate_range_end(&range); - tlb_finish_mmu(&tlb, 0, -1); + flush_tlb_mm(mm); + dec_tlb_flush_pending(mm); + } out_unlock: mmap_write_unlock(mm); out_mm: From ae8eba8b5d723a4ca543024b6e51f4d0f4fb6b6b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 27 Jan 2021 23:53:43 +0000 Subject: [PATCH 2/6] tlb: mmu_gather: Remove unused start/end arguments from tlb_finish_mmu() Since commit 7a30df49f63a ("mm: mmu_gather: remove __tlb_reset_range() for force flush"), the 'start' and 'end' arguments to tlb_finish_mmu() are no longer used, since we flush the whole mm in case of a nested invalidation. Remove the unused arguments and update all callers. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Yu Zhao Acked-by: Peter Zijlstra (Intel) Acked-by: Linus Torvalds Link: https://lkml.kernel.org/r/20210127235347.1402-3-will@kernel.org --- arch/ia64/include/asm/tlb.h | 2 +- arch/x86/kernel/ldt.c | 2 +- fs/exec.c | 2 +- include/linux/mm_types.h | 3 +-- mm/hugetlb.c | 2 +- mm/madvise.c | 6 +++--- mm/memory.c | 4 ++-- mm/mmap.c | 4 ++-- mm/mmu_gather.c | 5 +---- mm/oom_kill.c | 4 ++-- 10 files changed, 15 insertions(+), 19 deletions(-) diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h index 8d9da6f08a62e..7059eb2e867a3 100644 --- a/arch/ia64/include/asm/tlb.h +++ b/arch/ia64/include/asm/tlb.h @@ -36,7 +36,7 @@ * tlb_end_vma(tlb, vma); * } * } - * tlb_finish_mmu(tlb, start, end); // finish unmap for address space MM + * tlb_finish_mmu(tlb); // finish unmap for address space MM */ #include #include diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index b8aee71840ae5..0d4e1253c9c90 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -400,7 +400,7 @@ static void free_ldt_pgtables(struct mm_struct *mm) tlb_gather_mmu(&tlb, mm, start, end); free_pgd_range(&tlb, start, end, start, end); - tlb_finish_mmu(&tlb, start, end); + tlb_finish_mmu(&tlb); #endif } diff --git a/fs/exec.c b/fs/exec.c index 5d4d52039105c..69d89a0c35e92 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -725,7 +725,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) free_pgd_range(&tlb, old_start, old_end, new_end, vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING); } - tlb_finish_mmu(&tlb, old_start, old_end); + tlb_finish_mmu(&tlb); /* * Shrink the vma to just the new range. Always succeeds. diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 07d9acb5b19c4..1fe6a51298a68 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -590,8 +590,7 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) struct mmu_gather; extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end); -extern void tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end); +extern void tlb_finish_mmu(struct mmu_gather *tlb); static inline void init_tlb_flush_pending(struct mm_struct *mm) { diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 18f6ee3179002..33db4fa62c7b5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3985,7 +3985,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, tlb_gather_mmu(&tlb, mm, tlb_start, tlb_end); __unmap_hugepage_range(&tlb, vma, start, end, ref_page); - tlb_finish_mmu(&tlb, tlb_start, tlb_end); + tlb_finish_mmu(&tlb); } /* diff --git a/mm/madvise.c b/mm/madvise.c index 6a660858784b8..1b68520ea3f4d 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -508,7 +508,7 @@ static long madvise_cold(struct vm_area_struct *vma, lru_add_drain(); tlb_gather_mmu(&tlb, mm, start_addr, end_addr); madvise_cold_page_range(&tlb, vma, start_addr, end_addr); - tlb_finish_mmu(&tlb, start_addr, end_addr); + tlb_finish_mmu(&tlb); return 0; } @@ -560,7 +560,7 @@ static long madvise_pageout(struct vm_area_struct *vma, lru_add_drain(); tlb_gather_mmu(&tlb, mm, start_addr, end_addr); madvise_pageout_page_range(&tlb, vma, start_addr, end_addr); - tlb_finish_mmu(&tlb, start_addr, end_addr); + tlb_finish_mmu(&tlb); return 0; } @@ -732,7 +732,7 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, &madvise_free_walk_ops, &tlb); tlb_end_vma(&tlb, vma); mmu_notifier_invalidate_range_end(&range); - tlb_finish_mmu(&tlb, range.start, range.end); + tlb_finish_mmu(&tlb); return 0; } diff --git a/mm/memory.c b/mm/memory.c index feff48e1465a6..7bd3f122bd10f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1540,7 +1540,7 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start, for ( ; vma && vma->vm_start < range.end; vma = vma->vm_next) unmap_single_vma(&tlb, vma, start, range.end, NULL); mmu_notifier_invalidate_range_end(&range); - tlb_finish_mmu(&tlb, start, range.end); + tlb_finish_mmu(&tlb); } /** @@ -1566,7 +1566,7 @@ static void zap_page_range_single(struct vm_area_struct *vma, unsigned long addr mmu_notifier_invalidate_range_start(&range); unmap_single_vma(&tlb, vma, address, range.end, details); mmu_notifier_invalidate_range_end(&range); - tlb_finish_mmu(&tlb, address, range.end); + tlb_finish_mmu(&tlb); } /** diff --git a/mm/mmap.c b/mm/mmap.c index dc7206032387c..7a9f493a4b83a 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2676,7 +2676,7 @@ static void unmap_region(struct mm_struct *mm, unmap_vmas(&tlb, vma, start, end); free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, next ? next->vm_start : USER_PGTABLES_CEILING); - tlb_finish_mmu(&tlb, start, end); + tlb_finish_mmu(&tlb); } /* @@ -3219,7 +3219,7 @@ void exit_mmap(struct mm_struct *mm) /* Use -1 here to ensure all VMAs in the mm are unmapped */ unmap_vmas(&tlb, vma, 0, -1); free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); - tlb_finish_mmu(&tlb, 0, -1); + tlb_finish_mmu(&tlb); /* * Walk the list again, actually closing and freeing it, diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 03c33c93a582b..b0be5a7aa08f6 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -290,14 +290,11 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, /** * tlb_finish_mmu - finish an mmu_gather structure * @tlb: the mmu_gather structure to finish - * @start: start of the region that will be removed from the page-table - * @end: end of the region that will be removed from the page-table * * Called at the end of the shootdown operation to free up any resources that * were required. */ -void tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) +void tlb_finish_mmu(struct mmu_gather *tlb) { /* * If there are parallel threads are doing PTE changes on same range diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 04b19b7b5435b..757e557211fb7 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -548,13 +548,13 @@ bool __oom_reap_task_mm(struct mm_struct *mm) vma->vm_end); tlb_gather_mmu(&tlb, mm, range.start, range.end); if (mmu_notifier_invalidate_range_start_nonblock(&range)) { - tlb_finish_mmu(&tlb, range.start, range.end); + tlb_finish_mmu(&tlb); ret = false; continue; } unmap_page_range(&tlb, vma, range.start, range.end, NULL); mmu_notifier_invalidate_range_end(&range); - tlb_finish_mmu(&tlb, range.start, range.end); + tlb_finish_mmu(&tlb); } } From d8b450530b90f8845ab962af18b8a10ed77fc889 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 27 Jan 2021 23:53:44 +0000 Subject: [PATCH 3/6] tlb: mmu_gather: Introduce tlb_gather_mmu_fullmm() Passing the range '0, -1' to tlb_gather_mmu() sets the 'fullmm' flag, which indicates that the mm_struct being operated on is going away. In this case, some architectures (such as arm64) can elide TLB invalidation by ensuring that the TLB tag (ASID) associated with this mm is not immediately reclaimed. Although this behaviour is documented in asm-generic/tlb.h, it's subtle and easily missed. Introduce tlb_gather_mmu_fullmm() to make it clearer that this is for the entire mm and WARN() if tlb_gather_mmu() is called with the 'fullmm' address range. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Yu Zhao Acked-by: Peter Zijlstra (Intel) Acked-by: Linus Torvalds Link: https://lkml.kernel.org/r/20210127235347.1402-4-will@kernel.org --- include/asm-generic/tlb.h | 6 ++++-- include/linux/mm_types.h | 1 + mm/mmap.c | 2 +- mm/mmu_gather.c | 16 ++++++++++++++-- 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 6661ee1cff479..2c68a545ffa7d 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -46,7 +46,9 @@ * * The mmu_gather API consists of: * - * - tlb_gather_mmu() / tlb_finish_mmu(); start and finish a mmu_gather + * - tlb_gather_mmu() / tlb_gather_mmu_fullmm() / tlb_finish_mmu() + * + * start and finish a mmu_gather * * Finish in particular will issue a (final) TLB invalidate and free * all (remaining) queued pages. @@ -91,7 +93,7 @@ * * - mmu_gather::fullmm * - * A flag set by tlb_gather_mmu() to indicate we're going to free + * A flag set by tlb_gather_mmu_fullmm() to indicate we're going to free * the entire mm; this allows a number of optimizations. * * - We can ignore tlb_{start,end}_vma(); because we don't diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 1fe6a51298a68..e49868bc12a75 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -590,6 +590,7 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) struct mmu_gather; extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end); +extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm); extern void tlb_finish_mmu(struct mmu_gather *tlb); static inline void init_tlb_flush_pending(struct mm_struct *mm) diff --git a/mm/mmap.c b/mm/mmap.c index 7a9f493a4b83a..4eac7c63edbe7 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3214,7 +3214,7 @@ void exit_mmap(struct mm_struct *mm) lru_add_drain(); flush_cache_mm(mm); - tlb_gather_mmu(&tlb, mm, 0, -1); + tlb_gather_mmu_fullmm(&tlb, mm); /* update_hiwater_rss(mm) here? but nobody should be looking */ /* Use -1 here to ensure all VMAs in the mm are unmapped */ unmap_vmas(&tlb, vma, 0, -1); diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index b0be5a7aa08f6..5f5e45d9eb50d 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -261,8 +261,8 @@ void tlb_flush_mmu(struct mmu_gather *tlb) * respectively when @mm is without users and we're going to destroy * the full address space (exit/execve). */ -void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, - unsigned long start, unsigned long end) +static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; @@ -287,6 +287,18 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, inc_tlb_flush_pending(tlb->mm); } +void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + WARN_ON(!(start | (end + 1))); /* Use _fullmm() instead */ + __tlb_gather_mmu(tlb, mm, start, end); +} + +void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm) +{ + __tlb_gather_mmu(tlb, mm, 0, -1); +} + /** * tlb_finish_mmu - finish an mmu_gather structure * @tlb: the mmu_gather structure to finish From a72afd873089c697053e9daa85ff343b3140d2e7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 27 Jan 2021 23:53:45 +0000 Subject: [PATCH 4/6] tlb: mmu_gather: Remove start/end arguments from tlb_gather_mmu() The 'start' and 'end' arguments to tlb_gather_mmu() are no longer needed now that there is a separate function for 'fullmm' flushing. Remove the unused arguments and update all callers. Suggested-by: Linus Torvalds Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Yu Zhao Acked-by: Peter Zijlstra (Intel) Acked-by: Linus Torvalds Link: https://lore.kernel.org/r/CAHk-=wjQWa14_4UpfDf=fiineNP+RH74kZeDMo_f1D35xNzq9w@mail.gmail.com --- arch/ia64/include/asm/tlb.h | 2 +- arch/x86/kernel/ldt.c | 2 +- fs/exec.c | 2 +- include/linux/mm_types.h | 3 +-- mm/hugetlb.c | 16 +--------------- mm/madvise.c | 6 +++--- mm/memory.c | 4 ++-- mm/mmap.c | 2 +- mm/mmu_gather.c | 22 ++++++++-------------- mm/oom_kill.c | 2 +- 10 files changed, 20 insertions(+), 41 deletions(-) diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h index 7059eb2e867a3..a15fe0809aaed 100644 --- a/arch/ia64/include/asm/tlb.h +++ b/arch/ia64/include/asm/tlb.h @@ -23,7 +23,7 @@ * unmapping a portion of the virtual address space, these hooks are called according to * the following template: * - * tlb <- tlb_gather_mmu(mm, start, end); // start unmap for address space MM + * tlb <- tlb_gather_mmu(mm); // start unmap for address space MM * { * for each vma that needs a shootdown do { * tlb_start_vma(tlb, vma); diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 0d4e1253c9c90..7ad9834e0d956 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -398,7 +398,7 @@ static void free_ldt_pgtables(struct mm_struct *mm) if (!boot_cpu_has(X86_FEATURE_PTI)) return; - tlb_gather_mmu(&tlb, mm, start, end); + tlb_gather_mmu(&tlb, mm); free_pgd_range(&tlb, start, end, start, end); tlb_finish_mmu(&tlb); #endif diff --git a/fs/exec.c b/fs/exec.c index 69d89a0c35e92..5a853f03c2332 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -708,7 +708,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) return -ENOMEM; lru_add_drain(); - tlb_gather_mmu(&tlb, mm, old_start, old_end); + tlb_gather_mmu(&tlb, mm); if (new_end > old_start) { /* * when the old and new regions overlap clear from new_end. diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e49868bc12a75..0974ad501a47c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -588,8 +588,7 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) } struct mmu_gather; -extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, - unsigned long start, unsigned long end); +extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm); extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm); extern void tlb_finish_mmu(struct mmu_gather *tlb); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 33db4fa62c7b5..89635f4072323 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3967,23 +3967,9 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb, void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page) { - struct mm_struct *mm; struct mmu_gather tlb; - unsigned long tlb_start = start; - unsigned long tlb_end = end; - /* - * If shared PMDs were possibly used within this vma range, adjust - * start/end for worst case tlb flushing. - * Note that we can not be sure if PMDs are shared until we try to - * unmap pages. However, we want to make sure TLB flushing covers - * the largest possible range. - */ - adjust_range_if_pmd_sharing_possible(vma, &tlb_start, &tlb_end); - - mm = vma->vm_mm; - - tlb_gather_mmu(&tlb, mm, tlb_start, tlb_end); + tlb_gather_mmu(&tlb, vma->vm_mm); __unmap_hugepage_range(&tlb, vma, start, end, ref_page); tlb_finish_mmu(&tlb); } diff --git a/mm/madvise.c b/mm/madvise.c index 1b68520ea3f4d..0938fd3ad2281 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -506,7 +506,7 @@ static long madvise_cold(struct vm_area_struct *vma, return -EINVAL; lru_add_drain(); - tlb_gather_mmu(&tlb, mm, start_addr, end_addr); + tlb_gather_mmu(&tlb, mm); madvise_cold_page_range(&tlb, vma, start_addr, end_addr); tlb_finish_mmu(&tlb); @@ -558,7 +558,7 @@ static long madvise_pageout(struct vm_area_struct *vma, return 0; lru_add_drain(); - tlb_gather_mmu(&tlb, mm, start_addr, end_addr); + tlb_gather_mmu(&tlb, mm); madvise_pageout_page_range(&tlb, vma, start_addr, end_addr); tlb_finish_mmu(&tlb); @@ -723,7 +723,7 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, range.start, range.end); lru_add_drain(); - tlb_gather_mmu(&tlb, mm, range.start, range.end); + tlb_gather_mmu(&tlb, mm); update_hiwater_rss(mm); mmu_notifier_invalidate_range_start(&range); diff --git a/mm/memory.c b/mm/memory.c index 7bd3f122bd10f..9e8576a831474 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1534,7 +1534,7 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start, lru_add_drain(); mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, start, start + size); - tlb_gather_mmu(&tlb, vma->vm_mm, start, range.end); + tlb_gather_mmu(&tlb, vma->vm_mm); update_hiwater_rss(vma->vm_mm); mmu_notifier_invalidate_range_start(&range); for ( ; vma && vma->vm_start < range.end; vma = vma->vm_next) @@ -1561,7 +1561,7 @@ static void zap_page_range_single(struct vm_area_struct *vma, unsigned long addr lru_add_drain(); mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, address, address + size); - tlb_gather_mmu(&tlb, vma->vm_mm, address, range.end); + tlb_gather_mmu(&tlb, vma->vm_mm); update_hiwater_rss(vma->vm_mm); mmu_notifier_invalidate_range_start(&range); unmap_single_vma(&tlb, vma, address, range.end, details); diff --git a/mm/mmap.c b/mm/mmap.c index 4eac7c63edbe7..90673febce6aa 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2671,7 +2671,7 @@ static void unmap_region(struct mm_struct *mm, struct mmu_gather tlb; lru_add_drain(); - tlb_gather_mmu(&tlb, mm, start, end); + tlb_gather_mmu(&tlb, mm); update_hiwater_rss(mm); unmap_vmas(&tlb, vma, start, end); free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 5f5e45d9eb50d..0dc7149b0c615 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -253,21 +253,17 @@ void tlb_flush_mmu(struct mmu_gather *tlb) * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down * @tlb: the mmu_gather structure to initialize * @mm: the mm_struct of the target address space - * @start: start of the region that will be removed from the page-table - * @end: end of the region that will be removed from the page-table + * @fullmm: @mm is without users and we're going to destroy the full address + * space (exit/execve) * * Called to initialize an (on-stack) mmu_gather structure for page-table - * tear-down from @mm. The @start and @end are set to 0 and -1 - * respectively when @mm is without users and we're going to destroy - * the full address space (exit/execve). + * tear-down from @mm. */ static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, - unsigned long start, unsigned long end) + bool fullmm) { tlb->mm = mm; - - /* Is it from 0 to ~0? */ - tlb->fullmm = !(start | (end+1)); + tlb->fullmm = fullmm; #ifndef CONFIG_MMU_GATHER_NO_GATHER tlb->need_flush_all = 0; @@ -287,16 +283,14 @@ static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, inc_tlb_flush_pending(tlb->mm); } -void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, - unsigned long start, unsigned long end) +void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm) { - WARN_ON(!(start | (end + 1))); /* Use _fullmm() instead */ - __tlb_gather_mmu(tlb, mm, start, end); + __tlb_gather_mmu(tlb, mm, false); } void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm) { - __tlb_gather_mmu(tlb, mm, 0, -1); + __tlb_gather_mmu(tlb, mm, true); } /** diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 757e557211fb7..c9a33ffe38b7c 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -546,7 +546,7 @@ bool __oom_reap_task_mm(struct mm_struct *mm) mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, vma->vm_start, vma->vm_end); - tlb_gather_mmu(&tlb, mm, range.start, range.end); + tlb_gather_mmu(&tlb, mm); if (mmu_notifier_invalidate_range_start_nonblock(&range)) { tlb_finish_mmu(&tlb); ret = false; From c7bd8010a335260927e3643e79360272f9aca266 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 27 Jan 2021 23:53:46 +0000 Subject: [PATCH 5/6] tlb: arch: Remove empty __tlb_remove_tlb_entry() stubs If __tlb_remove_tlb_entry() is not defined by the architecture then we provide an empty definition in asm-generic/tlb.h. Remove the redundant empty definitions for sparc64 and x86. Suggested-by: Peter Zijlstra Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Yu Zhao Acked-by: Peter Zijlstra (Intel) Acked-by: Linus Torvalds Link: https://lkml.kernel.org/r/20210127235347.1402-6-will@kernel.org --- arch/sparc/include/asm/tlb_64.h | 1 - arch/x86/include/asm/tlb.h | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h index e841cae544c2b..779a5a0f06080 100644 --- a/arch/sparc/include/asm/tlb_64.h +++ b/arch/sparc/include/asm/tlb_64.h @@ -24,7 +24,6 @@ void flush_tlb_pending(void); #define tlb_start_vma(tlb, vma) do { } while (0) #define tlb_end_vma(tlb, vma) do { } while (0) -#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) #define tlb_flush(tlb) flush_tlb_pending() /* diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h index 820082bd68804..1bfe979bb9bcd 100644 --- a/arch/x86/include/asm/tlb.h +++ b/arch/x86/include/asm/tlb.h @@ -4,7 +4,6 @@ #define tlb_start_vma(tlb, vma) do { } while (0) #define tlb_end_vma(tlb, vma) do { } while (0) -#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) #define tlb_flush tlb_flush static inline void tlb_flush(struct mmu_gather *tlb); From 8cf55f24ce6cf90eb8828421e15e9efcd508bd2c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 27 Jan 2021 23:53:47 +0000 Subject: [PATCH 6/6] x86/ldt: Use tlb_gather_mmu_fullmm() when freeing LDT page-tables free_ldt_pgtables() uses the MMU gather API for batching TLB flushes over the call to free_pgd_range(). However, tlb_gather_mmu() expects to operate on user addresses and so passing LDT_{BASE,END}_ADDR will confuse the range setting logic in __tlb_adjust_range(), causing the gather to identify a range starting at TASK_SIZE. Such a large range will be converted into a 'fullmm' flush by the low-level invalidation code, so change the caller to invoke tlb_gather_mmu_fullmm() directly. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Yu Zhao Acked-by: Peter Zijlstra (Intel) Acked-by: Linus Torvalds Link: https://lkml.kernel.org/r/20210127235347.1402-7-will@kernel.org --- arch/x86/kernel/ldt.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 7ad9834e0d956..aa15132228da5 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -398,7 +398,13 @@ static void free_ldt_pgtables(struct mm_struct *mm) if (!boot_cpu_has(X86_FEATURE_PTI)) return; - tlb_gather_mmu(&tlb, mm); + /* + * Although free_pgd_range() is intended for freeing user + * page-tables, it also works out for kernel mappings on x86. + * We use tlb_gather_mmu_fullmm() to avoid confusing the + * range-tracking logic in __tlb_adjust_range(). + */ + tlb_gather_mmu_fullmm(&tlb, mm); free_pgd_range(&tlb, start, end, start, end); tlb_finish_mmu(&tlb); #endif