Skip to content

Commit

Permalink
Merge branch 'page-refs' (page ref overflow)
Browse files Browse the repository at this point in the history
Merge page ref overflow branch.

Jann Horn reported that he can overflow the page ref count with
sufficient memory (and a filesystem that is intentionally extremely
slow).

Admittedly it's not exactly easy.  To have more than four billion
references to a page requires a minimum of 32GB of kernel memory just
for the pointers to the pages, much less any metadata to keep track of
those pointers.  Jann needed a total of 140GB of memory and a specially
crafted filesystem that leaves all reads pending (in order to not ever
free the page references and just keep adding more).

Still, we have a fairly straightforward way to limit the two obvious
user-controllable sources of page references: direct-IO like page
references gotten through get_user_pages(), and the splice pipe page
duplication.  So let's just do that.

* branch page-refs:
  fs: prevent page refcount overflow in pipe_buf_get
  mm: prevent get_user_pages() from overflowing page refcount
  mm: add 'try_get_page()' helper function
  mm: make page ref count overflow check tighter and more explicit
  • Loading branch information
Linus Torvalds committed Apr 14, 2019
2 parents 4443f8e + 15fab63 commit 6b3a707
Showing 8 changed files with 92 additions and 28 deletions.
12 changes: 6 additions & 6 deletions fs/fuse/dev.c
Original file line number Diff line number Diff line change
@@ -2056,10 +2056,8 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;

ret = -EINVAL;
if (rem < len) {
pipe_unlock(pipe);
goto out;
}
if (rem < len)
goto out_free;

rem = len;
while (rem) {
@@ -2077,7 +2075,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
pipe->nrbufs--;
} else {
pipe_buf_get(pipe, ibuf);
if (!pipe_buf_get(pipe, ibuf))
goto out_free;

*obuf = *ibuf;
obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
obuf->len = rem;
@@ -2100,11 +2100,11 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
ret = fuse_dev_do_write(fud, &cs, len);

pipe_lock(pipe);
out_free:
for (idx = 0; idx < nbuf; idx++)
pipe_buf_release(pipe, &bufs[idx]);
pipe_unlock(pipe);

out:
kvfree(bufs);
return ret;
}
4 changes: 2 additions & 2 deletions fs/pipe.c
Original file line number Diff line number Diff line change
@@ -188,9 +188,9 @@ EXPORT_SYMBOL(generic_pipe_buf_steal);
* in the tee() system call, when we duplicate the buffers in one
* pipe into another.
*/
void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
{
get_page(buf->page);
return try_get_page(buf->page);
}
EXPORT_SYMBOL(generic_pipe_buf_get);

12 changes: 10 additions & 2 deletions fs/splice.c
Original file line number Diff line number Diff line change
@@ -1593,7 +1593,11 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
* Get a reference to this pipe buffer,
* so we can copy the contents over.
*/
pipe_buf_get(ipipe, ibuf);
if (!pipe_buf_get(ipipe, ibuf)) {
if (ret == 0)
ret = -EFAULT;
break;
}
*obuf = *ibuf;

/*
@@ -1667,7 +1671,11 @@ static int link_pipe(struct pipe_inode_info *ipipe,
* Get a reference to this pipe buffer,
* so we can copy the contents over.
*/
pipe_buf_get(ipipe, ibuf);
if (!pipe_buf_get(ipipe, ibuf)) {
if (ret == 0)
ret = -EFAULT;
break;
}

obuf = opipe->bufs + nbuf;
*obuf = *ibuf;
15 changes: 14 additions & 1 deletion include/linux/mm.h
Original file line number Diff line number Diff line change
@@ -966,15 +966,28 @@ static inline bool is_pci_p2pdma_page(const struct page *page)
}
#endif /* CONFIG_DEV_PAGEMAP_OPS */

/* 127: arbitrary random number, small enough to assemble well */
#define page_ref_zero_or_close_to_overflow(page) \
((unsigned int) page_ref_count(page) + 127u <= 127u)

static inline void get_page(struct page *page)
{
page = compound_head(page);
/*
* Getting a normal page or the head of a compound page
* requires to already have an elevated page->_refcount.
*/
VM_BUG_ON_PAGE(page_ref_count(page) <= 0, page);
VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page);
page_ref_inc(page);
}

static inline __must_check bool try_get_page(struct page *page)
{
page = compound_head(page);
if (WARN_ON_ONCE(page_ref_count(page) <= 0))
return false;
page_ref_inc(page);
return true;
}

static inline void put_page(struct page *page)
10 changes: 6 additions & 4 deletions include/linux/pipe_fs_i.h
Original file line number Diff line number Diff line change
@@ -101,18 +101,20 @@ struct pipe_buf_operations {
/*
* Get a reference to the pipe buffer.
*/
void (*get)(struct pipe_inode_info *, struct pipe_buffer *);
bool (*get)(struct pipe_inode_info *, struct pipe_buffer *);
};

/**
* pipe_buf_get - get a reference to a pipe_buffer
* @pipe: the pipe that the buffer belongs to
* @buf: the buffer to get a reference to
*
* Return: %true if the reference was successfully obtained.
*/
static inline void pipe_buf_get(struct pipe_inode_info *pipe,
static inline __must_check bool pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
buf->ops->get(pipe, buf);
return buf->ops->get(pipe, buf);
}

/**
@@ -171,7 +173,7 @@ struct pipe_inode_info *alloc_pipe_info(void);
void free_pipe_info(struct pipe_inode_info *);

/* Generic pipe buffer ops functions */
void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
bool generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
6 changes: 5 additions & 1 deletion kernel/trace/trace.c
Original file line number Diff line number Diff line change
@@ -7041,12 +7041,16 @@ static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
buf->private = 0;
}

static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
struct buffer_ref *ref = (struct buffer_ref *)buf->private;

if (ref->ref > INT_MAX/2)
return false;

ref->ref++;
return true;
}

/* Pipe buffer operations for a buffer. */
48 changes: 36 additions & 12 deletions mm/gup.c
Original file line number Diff line number Diff line change
@@ -160,8 +160,12 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
goto retry;
}

if (flags & FOLL_GET)
get_page(page);
if (flags & FOLL_GET) {
if (unlikely(!try_get_page(page))) {
page = ERR_PTR(-ENOMEM);
goto out;
}
}
if (flags & FOLL_TOUCH) {
if ((flags & FOLL_WRITE) &&
!pte_dirty(pte) && !PageDirty(page))
@@ -298,7 +302,10 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
if (pmd_trans_unstable(pmd))
ret = -EBUSY;
} else {
get_page(page);
if (unlikely(!try_get_page(page))) {
spin_unlock(ptl);
return ERR_PTR(-ENOMEM);
}
spin_unlock(ptl);
lock_page(page);
ret = split_huge_page(page);
@@ -500,7 +507,10 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
if (is_device_public_page(*page))
goto unmap;
}
get_page(*page);
if (unlikely(!try_get_page(*page))) {
ret = -ENOMEM;
goto unmap;
}
out:
ret = 0;
unmap:
@@ -1545,6 +1555,20 @@ static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
}
}

/*
* Return the compund head page with ref appropriately incremented,
* or NULL if that failed.
*/
static inline struct page *try_get_compound_head(struct page *page, int refs)
{
struct page *head = compound_head(page);
if (WARN_ON_ONCE(page_ref_count(head) < 0))
return NULL;
if (unlikely(!page_cache_add_speculative(head, refs)))
return NULL;
return head;
}

#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
@@ -1579,9 +1603,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,

VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
page = pte_page(pte);
head = compound_head(page);

if (!page_cache_get_speculative(head))
head = try_get_compound_head(page, 1);
if (!head)
goto pte_unmap;

if (unlikely(pte_val(pte) != pte_val(*ptep))) {
@@ -1720,8 +1744,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
refs++;
} while (addr += PAGE_SIZE, addr != end);

head = compound_head(pmd_page(orig));
if (!page_cache_add_speculative(head, refs)) {
head = try_get_compound_head(pmd_page(orig), refs);
if (!head) {
*nr -= refs;
return 0;
}
@@ -1758,8 +1782,8 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
refs++;
} while (addr += PAGE_SIZE, addr != end);

head = compound_head(pud_page(orig));
if (!page_cache_add_speculative(head, refs)) {
head = try_get_compound_head(pud_page(orig), refs);
if (!head) {
*nr -= refs;
return 0;
}
@@ -1795,8 +1819,8 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
refs++;
} while (addr += PAGE_SIZE, addr != end);

head = compound_head(pgd_page(orig));
if (!page_cache_add_speculative(head, refs)) {
head = try_get_compound_head(pgd_page(orig), refs);
if (!head) {
*nr -= refs;
return 0;
}
13 changes: 13 additions & 0 deletions mm/hugetlb.c
Original file line number Diff line number Diff line change
@@ -4299,6 +4299,19 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,

pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT;
page = pte_page(huge_ptep_get(pte));

/*
* Instead of doing 'try_get_page()' below in the same_page
* loop, just check the count once here.
*/
if (unlikely(page_count(page) <= 0)) {
if (pages) {
spin_unlock(ptl);
remainder = 0;
err = -ENOMEM;
break;
}
}
same_page:
if (pages) {
pages[i] = mem_map_offset(page, pfn_offset);

0 comments on commit 6b3a707

Please sign in to comment.