Skip to content

Commit

Permalink
Merge branch 'akpm'
Browse files Browse the repository at this point in the history
* akpm:
  mm: madvise(MADV_DODUMP): allow hugetlbfs pages
  ocfs2: fix locking for res->tracking and dlm->tracking_list
  mm/vmscan.c: fix int overflow in callers of do_shrink_slab()
  mm/vmstat.c: skip NR_TLB_REMOTE_FLUSH* properly
  mm/vmstat.c: fix outdated vmstat_text
  proc: restrict kernel stack dumps to root
  mm/hugetlb: add mmap() encodings for 32MB and 512MB page sizes
  mm/migrate.c: split only transparent huge pages when allocation fails
  ipc/shm.c: use ERR_CAST() for shm_lock() error return
  mm/gup_benchmark: fix unsigned comparison to zero in __gup_benchmark_ioctl
  mm, thp: fix mlocking THP page with migration enabled
  ocfs2: fix crash in ocfs2_duplicate_clusters_by_page()
  hugetlb: take PMD sharing into account when flushing tlb/caches
  mm: migration: fix migration of huge PMD shared pages
  • Loading branch information
Greg Kroah-Hartman committed Oct 5, 2018
2 parents 5943a9b + d41aa52 commit 091a1ea
Show file tree
Hide file tree
Showing 18 changed files with 189 additions and 30 deletions.
4 changes: 2 additions & 2 deletions fs/ocfs2/dlm/dlmmaster.c
Original file line number Diff line number Diff line change
Expand Up @@ -584,9 +584,9 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,

res->last_used = 0;

spin_lock(&dlm->spinlock);
spin_lock(&dlm->track_lock);
list_add_tail(&res->tracking, &dlm->tracking_list);
spin_unlock(&dlm->spinlock);
spin_unlock(&dlm->track_lock);

memset(res->lvb, 0, DLM_LVB_LEN);
memset(res->refmap, 0, sizeof(res->refmap));
Expand Down
16 changes: 12 additions & 4 deletions fs/ocfs2/refcounttree.c
Original file line number Diff line number Diff line change
Expand Up @@ -2946,6 +2946,7 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
if (map_end & (PAGE_SIZE - 1))
to = map_end & (PAGE_SIZE - 1);

retry:
page = find_or_create_page(mapping, page_index, GFP_NOFS);
if (!page) {
ret = -ENOMEM;
Expand All @@ -2954,11 +2955,18 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
}

/*
* In case PAGE_SIZE <= CLUSTER_SIZE, This page
* can't be dirtied before we CoW it out.
* In case PAGE_SIZE <= CLUSTER_SIZE, we do not expect a dirty
* page, so write it back.
*/
if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize)
BUG_ON(PageDirty(page));
if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize) {
if (PageDirty(page)) {
/*
* write_on_page will unlock the page on return
*/
ret = write_one_page(page);
goto retry;
}
}

if (!PageUptodate(page)) {
ret = block_read_full_page(page, ocfs2_get_block);
Expand Down
14 changes: 14 additions & 0 deletions fs/proc/base.c
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,20 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
unsigned long *entries;
int err;

/*
* The ability to racily run the kernel stack unwinder on a running task
* and then observe the unwinder output is scary; while it is useful for
* debugging kernel issues, it can also allow an attacker to leak kernel
* stack contents.
* Doing this in a manner that is at least safe from races would require
* some work to ensure that the remote task can not be scheduled; and
* even then, this would still expose the unwinder as local attack
* surface.
* Therefore, this interface is restricted to root.
*/
if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))
return -EACCES;

entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries),
GFP_KERNEL);
if (!entries)
Expand Down
14 changes: 14 additions & 0 deletions include/linux/hugetlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
pte_t *huge_pte_offset(struct mm_struct *mm,
unsigned long addr, unsigned long sz);
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end);
struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
int write);
struct page *follow_huge_pd(struct vm_area_struct *vma,
Expand Down Expand Up @@ -170,6 +172,18 @@ static inline unsigned long hugetlb_total_pages(void)
return 0;
}

static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr,
pte_t *ptep)
{
return 0;
}

static inline void adjust_range_if_pmd_sharing_possible(
struct vm_area_struct *vma,
unsigned long *start, unsigned long *end)
{
}

#define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n) ({ BUG(); 0; })
#define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL)
#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
Expand Down
6 changes: 6 additions & 0 deletions include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -2455,6 +2455,12 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
return vma;
}

static inline bool range_in_vma(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
return (vma && vma->vm_start <= start && end <= vma->vm_end);
}

#ifdef CONFIG_MMU
pgprot_t vm_get_page_prot(unsigned long vm_flags);
void vma_set_page_prot(struct vm_area_struct *vma);
Expand Down
2 changes: 2 additions & 0 deletions include/uapi/asm-generic/hugetlb_encode.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
#define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_32MB (25 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_512MB (29 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT)
Expand Down
2 changes: 2 additions & 0 deletions include/uapi/linux/memfd.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
#define MFD_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB
#define MFD_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB
#define MFD_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB
#define MFD_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB
#define MFD_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
#define MFD_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB
#define MFD_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB
#define MFD_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
#define MFD_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB
Expand Down
2 changes: 2 additions & 0 deletions include/uapi/linux/mman.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@
#define MAP_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB
#define MAP_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB
#define MAP_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB
#define MAP_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB
#define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
#define MAP_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB
#define MAP_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB
#define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
#define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB
Expand Down
2 changes: 2 additions & 0 deletions include/uapi/linux/shm.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ struct shmid_ds {
#define SHM_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB
#define SHM_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB
#define SHM_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB
#define SHM_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB
#define SHM_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
#define SHM_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB
#define SHM_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB
#define SHM_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
#define SHM_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB
Expand Down
2 changes: 1 addition & 1 deletion ipc/shm.c
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
* Callers of shm_lock() must validate the status of the returned ipc
* object pointer and error out as appropriate.
*/
return (void *)ipcp;
return ERR_CAST(ipcp);
}

static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
Expand Down
3 changes: 2 additions & 1 deletion mm/gup_benchmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
struct gup_benchmark *gup)
{
ktime_t start_time, end_time;
unsigned long i, nr, nr_pages, addr, next;
unsigned long i, nr_pages, addr, next;
int nr;
struct page **pages;

nr_pages = gup->size / PAGE_SIZE;
Expand Down
2 changes: 1 addition & 1 deletion mm/huge_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -2931,7 +2931,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
else
page_add_file_rmap(new, true);
set_pmd_at(mm, mmun_start, pvmw->pmd, pmde);
if (vma->vm_flags & VM_LOCKED)
if ((vma->vm_flags & VM_LOCKED) && !PageDoubleMap(new))
mlock_vma_page(new);
update_mmu_cache_pmd(vma, address, pvmw->pmd);
}
Expand Down
90 changes: 79 additions & 11 deletions mm/hugetlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -3326,8 +3326,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
struct page *page;
struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h);
const unsigned long mmun_start = start; /* For mmu_notifiers */
const unsigned long mmun_end = end; /* For mmu_notifiers */
unsigned long mmun_start = start; /* For mmu_notifiers */
unsigned long mmun_end = end; /* For mmu_notifiers */

WARN_ON(!is_vm_hugetlb_page(vma));
BUG_ON(start & ~huge_page_mask(h));
Expand All @@ -3339,6 +3339,11 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
*/
tlb_remove_check_page_size_change(tlb, sz);
tlb_start_vma(tlb, vma);

/*
* If sharing possible, alert mmu notifiers of worst case.
*/
adjust_range_if_pmd_sharing_possible(vma, &mmun_start, &mmun_end);
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
address = start;
for (; address < end; address += sz) {
Expand All @@ -3349,6 +3354,10 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
ptl = huge_pte_lock(h, mm, ptep);
if (huge_pmd_unshare(mm, &address, ptep)) {
spin_unlock(ptl);
/*
* We just unmapped a page of PMDs by clearing a PUD.
* The caller's TLB flush range should cover this area.
*/
continue;
}

Expand Down Expand Up @@ -3431,12 +3440,23 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
{
struct mm_struct *mm;
struct mmu_gather tlb;
unsigned long tlb_start = start;
unsigned long tlb_end = end;

/*
* If shared PMDs were possibly used within this vma range, adjust
* start/end for worst case tlb flushing.
* Note that we can not be sure if PMDs are shared until we try to
* unmap pages. However, we want to make sure TLB flushing covers
* the largest possible range.
*/
adjust_range_if_pmd_sharing_possible(vma, &tlb_start, &tlb_end);

mm = vma->vm_mm;

tlb_gather_mmu(&tlb, mm, start, end);
tlb_gather_mmu(&tlb, mm, tlb_start, tlb_end);
__unmap_hugepage_range(&tlb, vma, start, end, ref_page);
tlb_finish_mmu(&tlb, start, end);
tlb_finish_mmu(&tlb, tlb_start, tlb_end);
}

/*
Expand Down Expand Up @@ -4298,11 +4318,21 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
pte_t pte;
struct hstate *h = hstate_vma(vma);
unsigned long pages = 0;
unsigned long f_start = start;
unsigned long f_end = end;
bool shared_pmd = false;

/*
* In the case of shared PMDs, the area to flush could be beyond
* start/end. Set f_start/f_end to cover the maximum possible
* range if PMD sharing is possible.
*/
adjust_range_if_pmd_sharing_possible(vma, &f_start, &f_end);

BUG_ON(address >= end);
flush_cache_range(vma, address, end);
flush_cache_range(vma, f_start, f_end);

mmu_notifier_invalidate_range_start(mm, start, end);
mmu_notifier_invalidate_range_start(mm, f_start, f_end);
i_mmap_lock_write(vma->vm_file->f_mapping);
for (; address < end; address += huge_page_size(h)) {
spinlock_t *ptl;
Expand All @@ -4313,6 +4343,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
if (huge_pmd_unshare(mm, &address, ptep)) {
pages++;
spin_unlock(ptl);
shared_pmd = true;
continue;
}
pte = huge_ptep_get(ptep);
Expand Down Expand Up @@ -4348,17 +4379,21 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
* Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare
* may have cleared our pud entry and done put_page on the page table:
* once we release i_mmap_rwsem, another task can do the final put_page
* and that page table be reused and filled with junk.
* and that page table be reused and filled with junk. If we actually
* did unshare a page of pmds, flush the range corresponding to the pud.
*/
flush_hugetlb_tlb_range(vma, start, end);
if (shared_pmd)
flush_hugetlb_tlb_range(vma, f_start, f_end);
else
flush_hugetlb_tlb_range(vma, start, end);
/*
* No need to call mmu_notifier_invalidate_range() we are downgrading
* page table protection not changing it to point to a new page.
*
* See Documentation/vm/mmu_notifier.rst
*/
i_mmap_unlock_write(vma->vm_file->f_mapping);
mmu_notifier_invalidate_range_end(mm, start, end);
mmu_notifier_invalidate_range_end(mm, f_start, f_end);

return pages << h->order;
}
Expand Down Expand Up @@ -4545,12 +4580,40 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
/*
* check on proper vm_flags and page table alignment
*/
if (vma->vm_flags & VM_MAYSHARE &&
vma->vm_start <= base && end <= vma->vm_end)
if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, base, end))
return true;
return false;
}

/*
* Determine if start,end range within vma could be mapped by shared pmd.
* If yes, adjust start and end to cover range associated with possible
* shared pmd mappings.
*/
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end)
{
unsigned long check_addr = *start;

if (!(vma->vm_flags & VM_MAYSHARE))
return;

for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {
unsigned long a_start = check_addr & PUD_MASK;
unsigned long a_end = a_start + PUD_SIZE;

/*
* If sharing is possible, adjust start/end if necessary.
*/
if (range_in_vma(vma, a_start, a_end)) {
if (a_start < *start)
*start = a_start;
if (a_end > *end)
*end = a_end;
}
}
}

/*
* Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
* and returns the corresponding pte. While this is not necessary for the
Expand Down Expand Up @@ -4648,6 +4711,11 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
{
return 0;
}

void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end)
{
}
#define want_pmd_share() (0)
#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */

Expand Down
2 changes: 1 addition & 1 deletion mm/madvise.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ static long madvise_behavior(struct vm_area_struct *vma,
new_flags |= VM_DONTDUMP;
break;
case MADV_DODUMP:
if (new_flags & VM_SPECIAL) {
if (!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL) {
error = -EINVAL;
goto out;
}
Expand Down
5 changes: 4 additions & 1 deletion mm/migrate.c
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,9 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
mlock_vma_page(new);

if (PageTransHuge(page) && PageMlocked(page))
clear_page_mlock(page);

/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, pvmw.address, pvmw.pte);
}
Expand Down Expand Up @@ -1411,7 +1414,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
* we encounter them after the rest of the list
* is processed.
*/
if (PageTransHuge(page)) {
if (PageTransHuge(page) && !PageHuge(page)) {
lock_page(page);
rc = split_huge_page_to_list(page, from);
unlock_page(page);
Expand Down
Loading

0 comments on commit 091a1ea

Please sign in to comment.