From 94e4b18ac5d9d1339a8e6a0b7ea4a164542b2bb8 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 29 Mar 2012 19:11:40 +0200 Subject: [PATCH] --- yaml --- r: 312242 b: refs/heads/master c: 8d240dd88cca33b704adf3fe281aa64b5aac2dd8 h: refs/heads/master v: v3 --- [refs] | 2 +- trunk/kernel/events/uprobes.c | 361 ++++++++++++++++++---------------- trunk/kernel/trace/ftrace.c | 8 +- 3 files changed, 196 insertions(+), 175 deletions(-) diff --git a/[refs] b/[refs] index 5df8671193a3..a446809ad801 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: e227051b13956b8f71c0abecc41ad351e64671c8 +refs/heads/master: 8d240dd88cca33b704adf3fe281aa64b5aac2dd8 diff --git a/trunk/kernel/events/uprobes.c b/trunk/kernel/events/uprobes.c index f93532748bca..b52376d02332 100644 --- a/trunk/kernel/events/uprobes.c +++ b/trunk/kernel/events/uprobes.c @@ -44,23 +44,6 @@ static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ #define UPROBES_HASH_SZ 13 -/* - * We need separate register/unregister and mmap/munmap lock hashes because - * of mmap_sem nesting. - * - * uprobe_register() needs to install probes on (potentially) all processes - * and thus needs to acquire multiple mmap_sems (consequtively, not - * concurrently), whereas uprobe_mmap() is called while holding mmap_sem - * for the particular process doing the mmap. - * - * uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem - * because of lock order against i_mmap_mutex. This means there's a hole in - * the register vma iteration where a mmap() can happen. - * - * Thus uprobe_register() can race with uprobe_mmap() and we can try and - * install a probe where one is already installed. - */ - /* serialize (un)register */ static struct mutex uprobes_mutex[UPROBES_HASH_SZ]; @@ -77,6 +60,17 @@ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; */ static atomic_t uprobe_events = ATOMIC_INIT(0); +/* + * Maintain a temporary per vma info that can be used to search if a vma + * has already been handled. This structure is introduced since extending + * vm_area_struct wasnt recommended. + */ +struct vma_info { + struct list_head probe_list; + struct mm_struct *mm; + loff_t vaddr; +}; + struct uprobe { struct rb_node rb_node; /* node in the rb tree */ atomic_t ref; @@ -105,8 +99,7 @@ static bool valid_vma(struct vm_area_struct *vma, bool is_register) if (!is_register) return true; - if ((vma->vm_flags & (VM_HUGETLB|VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) - == (VM_READ|VM_EXEC)) + if ((vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) == (VM_READ|VM_EXEC)) return true; return false; @@ -135,17 +128,33 @@ static loff_t vma_address(struct vm_area_struct *vma, loff_t offset) static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage) { struct mm_struct *mm = vma->vm_mm; - unsigned long addr; - spinlock_t *ptl; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; pte_t *ptep; + spinlock_t *ptl; + unsigned long addr; + int err = -EFAULT; addr = page_address_in_vma(page, vma); if (addr == -EFAULT) - return -EFAULT; + goto out; + + pgd = pgd_offset(mm, addr); + if (!pgd_present(*pgd)) + goto out; + + pud = pud_offset(pgd, addr); + if (!pud_present(*pud)) + goto out; + + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) + goto out; - ptep = page_check_address(page, mm, addr, &ptl, 0); + ptep = pte_offset_map_lock(mm, pmd, addr, &ptl); if (!ptep) - return -EAGAIN; + goto out; get_page(kpage); page_add_new_anon_rmap(kpage, vma, addr); @@ -164,8 +173,10 @@ static int __replace_page(struct vm_area_struct *vma, struct page *page, struct try_to_free_swap(page); put_page(page); pte_unmap_unlock(ptep, ptl); + err = 0; - return 0; +out: + return err; } /** @@ -210,8 +221,9 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, void *vaddr_old, *vaddr_new; struct vm_area_struct *vma; struct uprobe *uprobe; + loff_t addr; int ret; -retry: + /* Read the page with vaddr into memory */ ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); if (ret <= 0) @@ -233,6 +245,10 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, if (mapping != vma->vm_file->f_mapping) goto put_out; + addr = vma_address(vma, uprobe->offset); + if (vaddr != (unsigned long)addr) + goto put_out; + ret = -ENOMEM; new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); if (!new_page) @@ -250,7 +266,11 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, vaddr_new = kmap_atomic(new_page); memcpy(vaddr_new, vaddr_old, PAGE_SIZE); - memcpy(vaddr_new + (vaddr & ~PAGE_MASK), &opcode, UPROBE_SWBP_INSN_SIZE); + + /* poke the new insn in, ASSUMES we don't cross page boundary */ + vaddr &= ~PAGE_MASK; + BUG_ON(vaddr + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); + memcpy(vaddr_new + vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); kunmap_atomic(vaddr_new); kunmap_atomic(vaddr_old); @@ -270,8 +290,6 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, put_out: put_page(old_page); - if (unlikely(ret == -EAGAIN)) - goto retry; return ret; } @@ -346,9 +364,7 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) { int result; - /* - * See the comment near uprobes_hash(). - */ + result = is_swbp_at_addr(mm, vaddr); if (result == 1) return -EEXIST; @@ -513,6 +529,7 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) uprobe->inode = igrab(inode); uprobe->offset = offset; init_rwsem(&uprobe->consumer_rwsem); + INIT_LIST_HEAD(&uprobe->pending_list); /* add to uprobes_tree, sorted on inode:offset */ cur_uprobe = insert_uprobe(uprobe); @@ -580,22 +597,20 @@ static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc) } static int -__copy_insn(struct address_space *mapping, struct file *filp, char *insn, - unsigned long nbytes, loff_t offset) +__copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *insn, + unsigned long nbytes, unsigned long offset) { + struct file *filp = vma->vm_file; struct page *page; void *vaddr; - unsigned long off; - pgoff_t idx; + unsigned long off1; + unsigned long idx; if (!filp) return -EINVAL; - if (!mapping->a_ops->readpage) - return -EIO; - - idx = offset >> PAGE_CACHE_SHIFT; - off = offset & ~PAGE_MASK; + idx = (unsigned long)(offset >> PAGE_CACHE_SHIFT); + off1 = offset &= ~PAGE_MASK; /* * Ensure that the page that has the original instruction is @@ -606,20 +621,22 @@ __copy_insn(struct address_space *mapping, struct file *filp, char *insn, return PTR_ERR(page); vaddr = kmap_atomic(page); - memcpy(insn, vaddr + off, nbytes); + memcpy(insn, vaddr + off1, nbytes); kunmap_atomic(vaddr); page_cache_release(page); return 0; } -static int copy_insn(struct uprobe *uprobe, struct file *filp) +static int +copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr) { struct address_space *mapping; unsigned long nbytes; int bytes; - nbytes = PAGE_SIZE - (uprobe->offset & ~PAGE_MASK); + addr &= ~PAGE_MASK; + nbytes = PAGE_SIZE - addr; mapping = uprobe->inode->i_mapping; /* Instruction at end of binary; copy only available bytes */ @@ -630,13 +647,13 @@ static int copy_insn(struct uprobe *uprobe, struct file *filp) /* Instruction at the page-boundary; copy bytes in second page */ if (nbytes < bytes) { - int err = __copy_insn(mapping, filp, uprobe->arch.insn + nbytes, - bytes - nbytes, uprobe->offset + nbytes); - if (err) - return err; + if (__copy_insn(mapping, vma, uprobe->arch.insn + nbytes, + bytes - nbytes, uprobe->offset + nbytes)) + return -ENOMEM; + bytes = nbytes; } - return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset); + return __copy_insn(mapping, vma, uprobe->arch.insn, bytes, uprobe->offset); } /* @@ -664,8 +681,9 @@ static int copy_insn(struct uprobe *uprobe, struct file *filp) */ static int install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long vaddr) + struct vm_area_struct *vma, loff_t vaddr) { + unsigned long addr; int ret; /* @@ -678,22 +696,20 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, if (!uprobe->consumers) return -EEXIST; + addr = (unsigned long)vaddr; + if (!(uprobe->flags & UPROBE_COPY_INSN)) { - ret = copy_insn(uprobe, vma->vm_file); + ret = copy_insn(uprobe, vma, addr); if (ret) return ret; if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) - return -ENOTSUPP; + return -EEXIST; - ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); + ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, addr); if (ret) return ret; - /* write_opcode() assumes we don't cross page boundary */ - BUG_ON((uprobe->offset & ~PAGE_MASK) + - UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); - uprobe->flags |= UPROBE_COPY_INSN; } @@ -706,7 +722,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, * Hence increment before and decrement on failure. */ atomic_inc(&mm->uprobes_state.count); - ret = set_swbp(&uprobe->arch, mm, vaddr); + ret = set_swbp(&uprobe->arch, mm, addr); if (ret) atomic_dec(&mm->uprobes_state.count); @@ -714,9 +730,9 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, } static void -remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) +remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, loff_t vaddr) { - if (!set_orig_insn(&uprobe->arch, mm, vaddr, true)) + if (!set_orig_insn(&uprobe->arch, mm, (unsigned long)vaddr, true)) atomic_dec(&mm->uprobes_state.count); } @@ -737,135 +753,139 @@ static void delete_uprobe(struct uprobe *uprobe) atomic_dec(&uprobe_events); } -struct map_info { - struct map_info *next; - struct mm_struct *mm; - unsigned long vaddr; -}; - -static inline struct map_info *free_map_info(struct map_info *info) -{ - struct map_info *next = info->next; - kfree(info); - return next; -} - -static struct map_info * -build_map_info(struct address_space *mapping, loff_t offset, bool is_register) +static struct vma_info * +__find_next_vma_info(struct address_space *mapping, struct list_head *head, + struct vma_info *vi, loff_t offset, bool is_register) { - unsigned long pgoff = offset >> PAGE_SHIFT; struct prio_tree_iter iter; struct vm_area_struct *vma; - struct map_info *curr = NULL; - struct map_info *prev = NULL; - struct map_info *info; - int more = 0; + struct vma_info *tmpvi; + unsigned long pgoff; + int existing_vma; + loff_t vaddr; + + pgoff = offset >> PAGE_SHIFT; - again: - mutex_lock(&mapping->i_mmap_mutex); vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { if (!valid_vma(vma, is_register)) continue; - if (!prev && !more) { - /* - * Needs GFP_NOWAIT to avoid i_mmap_mutex recursion through - * reclaim. This is optimistic, no harm done if it fails. - */ - prev = kmalloc(sizeof(struct map_info), - GFP_NOWAIT | __GFP_NOMEMALLOC | __GFP_NOWARN); - if (prev) - prev->next = NULL; + existing_vma = 0; + vaddr = vma_address(vma, offset); + + list_for_each_entry(tmpvi, head, probe_list) { + if (tmpvi->mm == vma->vm_mm && tmpvi->vaddr == vaddr) { + existing_vma = 1; + break; + } } - if (!prev) { - more++; - continue; + + /* + * Another vma needs a probe to be installed. However skip + * installing the probe if the vma is about to be unlinked. + */ + if (!existing_vma && atomic_inc_not_zero(&vma->vm_mm->mm_users)) { + vi->mm = vma->vm_mm; + vi->vaddr = vaddr; + list_add(&vi->probe_list, head); + + return vi; } + } - if (!atomic_inc_not_zero(&vma->vm_mm->mm_users)) - continue; + return NULL; +} + +/* + * Iterate in the rmap prio tree and find a vma where a probe has not + * yet been inserted. + */ +static struct vma_info * +find_next_vma_info(struct address_space *mapping, struct list_head *head, + loff_t offset, bool is_register) +{ + struct vma_info *vi, *retvi; - info = prev; - prev = prev->next; - info->next = curr; - curr = info; + vi = kzalloc(sizeof(struct vma_info), GFP_KERNEL); + if (!vi) + return ERR_PTR(-ENOMEM); - info->mm = vma->vm_mm; - info->vaddr = vma_address(vma, offset); - } + mutex_lock(&mapping->i_mmap_mutex); + retvi = __find_next_vma_info(mapping, head, vi, offset, is_register); mutex_unlock(&mapping->i_mmap_mutex); - if (!more) - goto out; + if (!retvi) + kfree(vi); - prev = curr; - while (curr) { - mmput(curr->mm); - curr = curr->next; - } - - do { - info = kmalloc(sizeof(struct map_info), GFP_KERNEL); - if (!info) { - curr = ERR_PTR(-ENOMEM); - goto out; - } - info->next = prev; - prev = info; - } while (--more); - - goto again; - out: - while (prev) - prev = free_map_info(prev); - return curr; + return retvi; } static int register_for_each_vma(struct uprobe *uprobe, bool is_register) { - struct map_info *info; - int err = 0; + struct list_head try_list; + struct vm_area_struct *vma; + struct address_space *mapping; + struct vma_info *vi, *tmpvi; + struct mm_struct *mm; + loff_t vaddr; + int ret; - info = build_map_info(uprobe->inode->i_mapping, - uprobe->offset, is_register); - if (IS_ERR(info)) - return PTR_ERR(info); + mapping = uprobe->inode->i_mapping; + INIT_LIST_HEAD(&try_list); - while (info) { - struct mm_struct *mm = info->mm; - struct vm_area_struct *vma; + ret = 0; - if (err) - goto free; + for (;;) { + vi = find_next_vma_info(mapping, &try_list, uprobe->offset, is_register); + if (!vi) + break; - down_write(&mm->mmap_sem); - vma = find_vma(mm, (unsigned long)info->vaddr); - if (!vma || !valid_vma(vma, is_register)) - goto unlock; + if (IS_ERR(vi)) { + ret = PTR_ERR(vi); + break; + } + mm = vi->mm; + down_write(&mm->mmap_sem); + vma = find_vma(mm, (unsigned long)vi->vaddr); + if (!vma || !valid_vma(vma, is_register)) { + list_del(&vi->probe_list); + kfree(vi); + up_write(&mm->mmap_sem); + mmput(mm); + continue; + } + vaddr = vma_address(vma, uprobe->offset); if (vma->vm_file->f_mapping->host != uprobe->inode || - vma_address(vma, uprobe->offset) != info->vaddr) - goto unlock; - - if (is_register) { - err = install_breakpoint(uprobe, mm, vma, info->vaddr); - /* - * We can race against uprobe_mmap(), see the - * comment near uprobe_hash(). - */ - if (err == -EEXIST) - err = 0; - } else { - remove_breakpoint(uprobe, mm, info->vaddr); + vaddr != vi->vaddr) { + list_del(&vi->probe_list); + kfree(vi); + up_write(&mm->mmap_sem); + mmput(mm); + continue; } - unlock: + + if (is_register) + ret = install_breakpoint(uprobe, mm, vma, vi->vaddr); + else + remove_breakpoint(uprobe, mm, vi->vaddr); + up_write(&mm->mmap_sem); - free: mmput(mm); - info = free_map_info(info); + if (is_register) { + if (ret && ret == -EEXIST) + ret = 0; + if (ret) + break; + } } - return err; + list_for_each_entry_safe(vi, tmpvi, &try_list, probe_list) { + list_del(&vi->probe_list); + kfree(vi); + } + + return ret; } static int __uprobe_register(struct uprobe *uprobe) @@ -1031,7 +1051,7 @@ static void build_probe_list(struct inode *inode, struct list_head *head) int uprobe_mmap(struct vm_area_struct *vma) { struct list_head tmp_list; - struct uprobe *uprobe; + struct uprobe *uprobe, *u; struct inode *inode; int ret, count; @@ -1049,9 +1069,12 @@ int uprobe_mmap(struct vm_area_struct *vma) ret = 0; count = 0; - list_for_each_entry(uprobe, &tmp_list, pending_list) { + list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { + loff_t vaddr; + + list_del(&uprobe->pending_list); if (!ret) { - loff_t vaddr = vma_address(vma, uprobe->offset); + vaddr = vma_address(vma, uprobe->offset); if (vaddr < vma->vm_start || vaddr >= vma->vm_end) { put_uprobe(uprobe); @@ -1059,10 +1082,8 @@ int uprobe_mmap(struct vm_area_struct *vma) } ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); - /* - * We can race against uprobe_register(), see the - * comment near uprobe_hash(). - */ + + /* Ignore double add: */ if (ret == -EEXIST) { ret = 0; @@ -1097,7 +1118,7 @@ int uprobe_mmap(struct vm_area_struct *vma) void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) { struct list_head tmp_list; - struct uprobe *uprobe; + struct uprobe *uprobe, *u; struct inode *inode; if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) @@ -1114,8 +1135,11 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon mutex_lock(uprobes_mmap_hash(inode)); build_probe_list(inode, &tmp_list); - list_for_each_entry(uprobe, &tmp_list, pending_list) { - loff_t vaddr = vma_address(vma, uprobe->offset); + list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { + loff_t vaddr; + + list_del(&uprobe->pending_list); + vaddr = vma_address(vma, uprobe->offset); if (vaddr >= start && vaddr < end) { /* @@ -1392,6 +1416,7 @@ static struct uprobe_task *add_utask(void) if (unlikely(!utask)) return NULL; + utask->active_uprobe = NULL; current->utask = utask; return utask; } diff --git a/trunk/kernel/trace/ftrace.c b/trunk/kernel/trace/ftrace.c index a008663d86c8..b4f20fba09fc 100644 --- a/trunk/kernel/trace/ftrace.c +++ b/trunk/kernel/trace/ftrace.c @@ -312,7 +312,7 @@ static int remove_ftrace_list_ops(struct ftrace_ops **list, static int __register_ftrace_function(struct ftrace_ops *ops) { - if (ftrace_disabled) + if (unlikely(ftrace_disabled)) return -ENODEV; if (FTRACE_WARN_ON(ops == &global_ops)) @@ -4299,16 +4299,12 @@ int register_ftrace_function(struct ftrace_ops *ops) mutex_lock(&ftrace_lock); - if (unlikely(ftrace_disabled)) - goto out_unlock; - ret = __register_ftrace_function(ops); if (!ret) ret = ftrace_startup(ops, 0); - - out_unlock: mutex_unlock(&ftrace_lock); + return ret; } EXPORT_SYMBOL_GPL(register_ftrace_function);