Skip to content

Commit

Permalink
Fix incorrect user space access locking in mincore()
Browse files Browse the repository at this point in the history
Doug Chapman noticed that mincore() will doa "copy_to_user()" of the
result while holding the mmap semaphore for reading, which is a big
no-no.  While a recursive read-lock on a semaphore in the case of a page
fault happens to work, we don't actually allow them due to deadlock
schenarios with writers due to fairness issues.

Doug and Marcel sent in a patch to fix it, but I decided to just rewrite
the mess instead - not just fixing the locking problem, but making the
code smaller and (imho) much easier to understand.

Cc: Doug Chapman <dchapman@redhat.com>
Cc: Marcel Holtmann <holtmann@redhat.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
  • Loading branch information
Linus Torvalds committed Dec 16, 2006
1 parent 0221872 commit 2f77d10
Showing 1 changed file with 86 additions and 104 deletions.
190 changes: 86 additions & 104 deletions mm/mincore.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* linux/mm/mincore.c
*
* Copyright (C) 1994-1999 Linus Torvalds
* Copyright (C) 1994-2006 Linus Torvalds
*/

/*
Expand Down Expand Up @@ -38,46 +38,60 @@ static unsigned char mincore_page(struct vm_area_struct * vma,
return present;
}

static long mincore_vma(struct vm_area_struct * vma,
unsigned long start, unsigned long end, unsigned char __user * vec)
/*
* Do a chunk of "sys_mincore()". We've already checked
* all the arguments, we hold the mmap semaphore: we should
* just return the amount of info we're asked for.
*/
static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pages)
{
long error, i, remaining;
unsigned char * tmp;
unsigned long i, nr, pgoff;
struct vm_area_struct *vma = find_vma(current->mm, addr);

error = -ENOMEM;
if (!vma->vm_file)
return error;

start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
if (end > vma->vm_end)
end = vma->vm_end;
end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
/*
* find_vma() didn't find anything: the address
* is above everything we have mapped.
*/
if (!vma) {
memset(vec, 0, pages);
return pages;
}

error = -EAGAIN;
tmp = (unsigned char *) __get_free_page(GFP_KERNEL);
if (!tmp)
return error;
/*
* find_vma() found something, but we might be
* below it: check for that.
*/
if (addr < vma->vm_start) {
unsigned long gap = (vma->vm_start - addr) >> PAGE_SHIFT;
if (gap > pages)
gap = pages;
memset(vec, 0, gap);
return gap;
}

/* (end - start) is # of pages, and also # of bytes in "vec */
remaining = (end - start),
/*
* Ok, got it. But check whether it's a segment we support
* mincore() on. Right now, we don't do any anonymous mappings.
*/
if (!vma->vm_file)
return -ENOMEM;

error = 0;
for (i = 0; remaining > 0; remaining -= PAGE_SIZE, i++) {
int j = 0;
long thispiece = (remaining < PAGE_SIZE) ?
remaining : PAGE_SIZE;
/*
* Calculate how many pages there are left in the vma, and
* what the pgoff is for our address.
*/
nr = (vma->vm_end - addr) >> PAGE_SHIFT;
if (nr > pages)
nr = pages;

while (j < thispiece)
tmp[j++] = mincore_page(vma, start++);
pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
pgoff += vma->vm_pgoff;

if (copy_to_user(vec + PAGE_SIZE * i, tmp, thispiece)) {
error = -EFAULT;
break;
}
}
/* And then we just fill the sucker in.. */
for (i = 0 ; i < nr; i++, pgoff++)
vec[i] = mincore_page(vma, pgoff);

free_page((unsigned long) tmp);
return error;
return nr;
}

/*
Expand Down Expand Up @@ -107,82 +121,50 @@ static long mincore_vma(struct vm_area_struct * vma,
asmlinkage long sys_mincore(unsigned long start, size_t len,
unsigned char __user * vec)
{
int index = 0;
unsigned long end, limit;
struct vm_area_struct * vma;
size_t max;
int unmapped_error = 0;
long error;

/* check the arguments */
if (start & ~PAGE_CACHE_MASK)
goto einval;

limit = TASK_SIZE;
if (start >= limit)
goto enomem;

if (!len)
return 0;
long retval;
unsigned long pages;
unsigned char *tmp;

max = limit - start;
len = PAGE_CACHE_ALIGN(len);
if (len > max || !len)
goto enomem;

end = start + len;
/* Check the start address: needs to be page-aligned.. */
if (start & ~PAGE_CACHE_MASK)
return -EINVAL;

/* check the output buffer whilst holding the lock */
error = -EFAULT;
down_read(&current->mm->mmap_sem);
/* ..and we need to be passed a valid user-space range */
if (!access_ok(VERIFY_READ, (void __user *) start, len))
return -ENOMEM;

if (!access_ok(VERIFY_WRITE, vec, len >> PAGE_SHIFT))
goto out;
/* This also avoids any overflows on PAGE_CACHE_ALIGN */
pages = len >> PAGE_SHIFT;
pages += (len & ~PAGE_MASK) != 0;

/*
* If the interval [start,end) covers some unmapped address
* ranges, just ignore them, but return -ENOMEM at the end.
*/
error = 0;

vma = find_vma(current->mm, start);
while (vma) {
/* Here start < vma->vm_end. */
if (start < vma->vm_start) {
unmapped_error = -ENOMEM;
start = vma->vm_start;
}
if (!access_ok(VERIFY_WRITE, vec, pages))
return -EFAULT;

/* Here vma->vm_start <= start < vma->vm_end. */
if (end <= vma->vm_end) {
if (start < end) {
error = mincore_vma(vma, start, end,
&vec[index]);
if (error)
goto out;
}
error = unmapped_error;
goto out;
tmp = (void *) __get_free_page(GFP_USER);
if (!tmp)
return -ENOMEM;

retval = 0;
while (pages) {
/*
* Do at most PAGE_SIZE entries per iteration, due to
* the temporary buffer size.
*/
down_read(&current->mm->mmap_sem);
retval = do_mincore(start, tmp, max(pages, PAGE_SIZE));
up_read(&current->mm->mmap_sem);

if (retval <= 0)
break;
if (copy_to_user(vec, tmp, retval)) {
retval = -EFAULT;
break;
}

/* Here vma->vm_start <= start < vma->vm_end < end. */
error = mincore_vma(vma, start, vma->vm_end, &vec[index]);
if (error)
goto out;
index += (vma->vm_end - start) >> PAGE_CACHE_SHIFT;
start = vma->vm_end;
vma = vma->vm_next;
pages -= retval;
vec += retval;
start += retval << PAGE_SHIFT;
retval = 0;
}

/* we found a hole in the area queried if we arrive here */
error = -ENOMEM;

out:
up_read(&current->mm->mmap_sem);
return error;

einval:
return -EINVAL;
enomem:
return -ENOMEM;
free_page((unsigned long) tmp);
return retval;
}

0 comments on commit 2f77d10

Please sign in to comment.