From c2d3f25dda016d9697c5416810d4528770f0a281 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 9 Oct 2013 14:08:09 +0200 Subject: [PATCH 1/7] uprobes: Remove the wrong __weak attribute linux/uprobes.h declares arch_uprobe_skip_sstep() as a weak function. But as there is no definition of generic version so when trying to build uprobes for an architecture that doesn't yet have a arch_uprobe_skip_sstep() implementation, the vmlinux will try to call arch_uprobe_skip_sstep() somehwere in Stupidhistan leading to a system crash. We rather want a proper link error so remove arch_uprobe_skip_sstep(). Signed-off-by: Ralf Baechle Signed-off-by: Oleg Nesterov --- include/linux/uprobes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 06f28beed7c2c..e6fba627ea451 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -123,7 +123,7 @@ extern int uprobe_post_sstep_notifier(struct pt_regs *regs); extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); extern void uprobe_notify_resume(struct pt_regs *regs); extern bool uprobe_deny_signal(void); -extern bool __weak arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs); +extern bool arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs); extern void uprobe_clear_state(struct mm_struct *mm); #else /* !CONFIG_UPROBES */ struct uprobes_state { From b68e0749100e1b901bf11330f149b321c082178e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Oct 2013 21:18:31 +0200 Subject: [PATCH 2/7] uprobes: Change the callsite of uprobe_copy_process() Preparation for the next patches. Move the callsite of uprobe_copy_process() in copy_process() down to the succesfull return. We do not care if copy_process() fails, uprobe_free_utask() won't be called in this case so the wrong ->utask != NULL doesn't matter. OTOH, with this change we know that copy_process() can't fail when uprobe_copy_process() is called, the new task should either return to user-mode or call do_exit(). This way uprobe_copy_process() can: 1. setup p->utask != NULL if necessary 2. setup uprobes_state.xol_area 3. use task_work_add(p) Also, move the definition of uprobe_copy_process() down so that it can see get_utask(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 16 ++++++++-------- kernel/fork.c | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ad8e1bdca70e4..db7a1dcb3dd68 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1344,14 +1344,6 @@ void uprobe_free_utask(struct task_struct *t) t->utask = NULL; } -/* - * Called in context of a new clone/fork from copy_process. - */ -void uprobe_copy_process(struct task_struct *t) -{ - t->utask = NULL; -} - /* * Allocate a uprobe_task object for the task if if necessary. * Called when the thread hits a breakpoint. @@ -1367,6 +1359,14 @@ static struct uprobe_task *get_utask(void) return current->utask; } +/* + * Called in context of a new clone/fork from copy_process. + */ +void uprobe_copy_process(struct task_struct *t) +{ + t->utask = NULL; +} + /* * Current area->vaddr notion assume the trampoline address is always * equal area->vaddr. diff --git a/kernel/fork.c b/kernel/fork.c index 086fe73ad6bde..d3603b81246b5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1373,7 +1373,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; #endif - uprobe_copy_process(p); /* * sigaltstack should be cleared when sharing the same VM */ @@ -1490,6 +1489,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, perf_event_fork(p); trace_task_newtask(p, clone_flags); + uprobe_copy_process(p); return p; From 6441ec8b7c108b72789d120562b9f1d976e4aaaf Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Oct 2013 21:18:35 +0200 Subject: [PATCH 3/7] uprobes: Introduce __create_xol_area() No functional changes, preparation. Extract the code which actually allocates/installs the new area into the new helper, __create_xol_area(). While at it remove the unnecessary "ret = ENOMEM" and "ret = 0" in xol_add_vma(), they both have no effect. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 47 ++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index db7a1dcb3dd68..ad17d813e73e4 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1096,16 +1096,14 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon } /* Slot allocation for XOL */ -static int xol_add_vma(struct xol_area *area) +static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) { - struct mm_struct *mm = current->mm; int ret = -EALREADY; down_write(&mm->mmap_sem); if (mm->uprobes_state.xol_area) goto fail; - ret = -ENOMEM; /* Try to map as high as possible, this is only a hint. */ area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); if (area->vaddr & ~PAGE_MASK) { @@ -1120,28 +1118,17 @@ static int xol_add_vma(struct xol_area *area) smp_wmb(); /* pairs with get_xol_area() */ mm->uprobes_state.xol_area = area; - ret = 0; fail: up_write(&mm->mmap_sem); return ret; } -/* - * get_xol_area - Allocate process's xol_area if necessary. - * This area will be used for storing instructions for execution out of line. - * - * Returns the allocated area or NULL. - */ -static struct xol_area *get_xol_area(void) +static struct xol_area *__create_xol_area(void) { struct mm_struct *mm = current->mm; - struct xol_area *area; uprobe_opcode_t insn = UPROBE_SWBP_INSN; - - area = mm->uprobes_state.xol_area; - if (area) - goto ret; + struct xol_area *area; area = kzalloc(sizeof(*area), GFP_KERNEL); if (unlikely(!area)) @@ -1155,13 +1142,13 @@ static struct xol_area *get_xol_area(void) if (!area->page) goto free_bitmap; - /* allocate first slot of task's xol_area for the return probes */ + init_waitqueue_head(&area->wq); + /* Reserve the 1st slot for get_trampoline_vaddr() */ set_bit(0, area->bitmap); - copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE); atomic_set(&area->slot_count, 1); - init_waitqueue_head(&area->wq); + copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE); - if (!xol_add_vma(area)) + if (!xol_add_vma(mm, area)) return area; __free_page(area->page); @@ -1170,9 +1157,25 @@ static struct xol_area *get_xol_area(void) free_area: kfree(area); out: + return NULL; +} + +/* + * get_xol_area - Allocate process's xol_area if necessary. + * This area will be used for storing instructions for execution out of line. + * + * Returns the allocated area or NULL. + */ +static struct xol_area *get_xol_area(void) +{ + struct mm_struct *mm = current->mm; + struct xol_area *area; + + if (!mm->uprobes_state.xol_area) + __create_xol_area(); + area = mm->uprobes_state.xol_area; - ret: - smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ + smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ return area; } From af0d95af79773f7637107cd3871aaabcb425f15a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Oct 2013 21:18:38 +0200 Subject: [PATCH 4/7] uprobes: Teach __create_xol_area() to accept the predefined vaddr Currently xol_add_vma() uses get_unmapped_area() for area->vaddr, but the next patches need to use the fixed address. So this patch adds the new "vaddr" argument to __create_xol_area() which should be used as area->vaddr if it is nonzero. xol_add_vma() doesn't bother to verify that the predefined addr is not used, insert_vm_struct() should fail if find_vma_links() detects the overlap with the existing vma. Also, __create_xol_area() doesn't need __GFP_ZERO to allocate area. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ad17d813e73e4..7d12a45842a71 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1104,11 +1104,14 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) if (mm->uprobes_state.xol_area) goto fail; - /* Try to map as high as possible, this is only a hint. */ - area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); - if (area->vaddr & ~PAGE_MASK) { - ret = area->vaddr; - goto fail; + if (!area->vaddr) { + /* Try to map as high as possible, this is only a hint. */ + area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, + PAGE_SIZE, 0, 0); + if (area->vaddr & ~PAGE_MASK) { + ret = area->vaddr; + goto fail; + } } ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE, @@ -1124,13 +1127,13 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) return ret; } -static struct xol_area *__create_xol_area(void) +static struct xol_area *__create_xol_area(unsigned long vaddr) { struct mm_struct *mm = current->mm; uprobe_opcode_t insn = UPROBE_SWBP_INSN; struct xol_area *area; - area = kzalloc(sizeof(*area), GFP_KERNEL); + area = kmalloc(sizeof(*area), GFP_KERNEL); if (unlikely(!area)) goto out; @@ -1142,6 +1145,7 @@ static struct xol_area *__create_xol_area(void) if (!area->page) goto free_bitmap; + area->vaddr = vaddr; init_waitqueue_head(&area->wq); /* Reserve the 1st slot for get_trampoline_vaddr() */ set_bit(0, area->bitmap); @@ -1172,7 +1176,7 @@ static struct xol_area *get_xol_area(void) struct xol_area *area; if (!mm->uprobes_state.xol_area) - __create_xol_area(); + __create_xol_area(0); area = mm->uprobes_state.xol_area; smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ From 248d3a7b2f100078c5f6878351177859380582e9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Oct 2013 21:18:41 +0200 Subject: [PATCH 5/7] uprobes: Change uprobe_copy_process() to dup return_instances uprobe_copy_process() assumes that the new child doesn't need ->utask, it should be allocated by demand. But this is not true if the forking task has the pending ret- probes, the child should report them as well and thus it needs the copy of parent's ->return_instances chain. Otherwise the child crashes when it returns from the probed function. Alternatively we could cleanup the child's stack, but this needs per-arch changes and this is not what we want. At least systemtap expects a .return in the child too. Note: this change alone doesn't fix the problem, see the next change. Reported-by: Martin Cermak Reported-by: David Smith Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 43 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 7d12a45842a71..1c6cda68a5558 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1366,12 +1366,55 @@ static struct uprobe_task *get_utask(void) return current->utask; } +static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask) +{ + struct uprobe_task *n_utask; + struct return_instance **p, *o, *n; + + n_utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL); + if (!n_utask) + return -ENOMEM; + t->utask = n_utask; + + p = &n_utask->return_instances; + for (o = o_utask->return_instances; o; o = o->next) { + n = kmalloc(sizeof(struct return_instance), GFP_KERNEL); + if (!n) + return -ENOMEM; + + *n = *o; + atomic_inc(&n->uprobe->ref); + n->next = NULL; + + *p = n; + p = &n->next; + n_utask->depth++; + } + + return 0; +} + +static void uprobe_warn(struct task_struct *t, const char *msg) +{ + pr_warn("uprobe: %s:%d failed to %s\n", + current->comm, current->pid, msg); +} + /* * Called in context of a new clone/fork from copy_process. */ void uprobe_copy_process(struct task_struct *t) { + struct uprobe_task *utask = current->utask; + struct mm_struct *mm = current->mm; + t->utask = NULL; + + if (mm == t->mm || !utask || !utask->return_instances) + return; + + if (dup_utask(t, utask)) + return uprobe_warn(t, "dup ret instances"); } /* From aa59c53fd4599c91ccf9629af0c2777b89929076 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 13 Oct 2013 21:18:44 +0200 Subject: [PATCH 6/7] uprobes: Change uprobe_copy_process() to dup xol_area This finally fixes the serious bug in uretprobes: a forked child crashes if the parent called fork() with the pending ret probe. Trivial test-case: # perf probe -x /lib/libc.so.6 __fork%return # perf record -e probe_libc:__fork perl -le 'fork || print "OK"' (the child doesn't print "OK", it is killed by SIGSEGV) If the child returns from the probed function it actually returns to trampoline_vaddr, because it got the copy of parent's stack mangled by prepare_uretprobe() when the parent entered this func. It crashes because a) this address is not mapped and b) until the previous change it doesn't have the proper->return_instances info. This means that uprobe_copy_process() has to create xol_area which has the trampoline slot, and its vaddr should be equal to parent's xol_area->vaddr. Unfortunately, uprobe_copy_process() can not simply do __create_xol_area(child, xol_area->vaddr). This could actually work but perf_event_mmap() doesn't expect the usage of foreign ->mm. So we offload this to task_work_run(), and pass the argument via not yet used utask->vaddr. We know that this vaddr is fine for install_special_mapping(), the necessary hole was recently "created" by dup_mmap() which skips the parent's VM_DONTCOPY area, and nobody else could use the new mm. Unfortunately, this also means that we can not handle the errors properly, we obviously can not abort the already completed fork(). So we simply print the warning if GFP_KERNEL allocation (the only possible reason) fails. Reported-by: Martin Cermak Reported-by: David Smith Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- kernel/events/uprobes.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 1c6cda68a5558..9f282e14925dd 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -35,6 +35,7 @@ #include /* notifier mechanism */ #include "../../mm/internal.h" /* munlock_vma_page */ #include +#include #include @@ -1400,6 +1401,17 @@ static void uprobe_warn(struct task_struct *t, const char *msg) current->comm, current->pid, msg); } +static void dup_xol_work(struct callback_head *work) +{ + kfree(work); + + if (current->flags & PF_EXITING) + return; + + if (!__create_xol_area(current->utask->vaddr)) + uprobe_warn(current, "dup xol area"); +} + /* * Called in context of a new clone/fork from copy_process. */ @@ -1407,6 +1419,8 @@ void uprobe_copy_process(struct task_struct *t) { struct uprobe_task *utask = current->utask; struct mm_struct *mm = current->mm; + struct callback_head *work; + struct xol_area *area; t->utask = NULL; @@ -1415,6 +1429,20 @@ void uprobe_copy_process(struct task_struct *t) if (dup_utask(t, utask)) return uprobe_warn(t, "dup ret instances"); + + /* The task can fork() after dup_xol_work() fails */ + area = mm->uprobes_state.xol_area; + if (!area) + return uprobe_warn(t, "dup xol area"); + + /* TODO: move it into the union in uprobe_task */ + work = kmalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return uprobe_warn(t, "dup xol area"); + + utask->vaddr = area->vaddr; + init_task_work(work, dup_xol_work); + task_work_add(t, work, true); } /* From 3ab679661721b1ec2aaad99a801870ed59ab1110 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 16 Oct 2013 19:39:37 +0200 Subject: [PATCH 7/7] uprobes: Teach uprobe_copy_process() to handle CLONE_VFORK uprobe_copy_process() does nothing if the child shares ->mm with the forking process, but there is a special case: CLONE_VFORK. In this case it would be more correct to do dup_utask() but avoid dup_xol(). This is not that important, the child should not unwind its stack too much, this can corrupt the parent's stack, but at least we need this to allow to ret-probe __vfork() itself. Note: in theory, it would be better to check task_pt_regs(p)->sp instead of CLONE_VFORK, we need to dup_utask() if and only if the child can return from the function called by the parent. But this needs the arch-dependant helper, and I think that nobody actually does clone(same_stack, CLONE_VM). Reported-by: Martin Cermak Reported-by: David Smith Signed-off-by: Oleg Nesterov --- include/linux/uprobes.h | 4 ++-- kernel/events/uprobes.c | 10 ++++++++-- kernel/fork.c | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index e6fba627ea451..9e0d5a6fe7a82 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -117,7 +117,7 @@ extern void uprobe_start_dup_mmap(void); extern void uprobe_end_dup_mmap(void); extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm); extern void uprobe_free_utask(struct task_struct *t); -extern void uprobe_copy_process(struct task_struct *t); +extern void uprobe_copy_process(struct task_struct *t, unsigned long flags); extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); extern int uprobe_post_sstep_notifier(struct pt_regs *regs); extern int uprobe_pre_sstep_notifier(struct pt_regs *regs); @@ -174,7 +174,7 @@ static inline unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) static inline void uprobe_free_utask(struct task_struct *t) { } -static inline void uprobe_copy_process(struct task_struct *t) +static inline void uprobe_copy_process(struct task_struct *t, unsigned long flags) { } static inline void uprobe_clear_state(struct mm_struct *mm) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 9f282e14925dd..ae9e1d2ef256a 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1415,7 +1415,7 @@ static void dup_xol_work(struct callback_head *work) /* * Called in context of a new clone/fork from copy_process. */ -void uprobe_copy_process(struct task_struct *t) +void uprobe_copy_process(struct task_struct *t, unsigned long flags) { struct uprobe_task *utask = current->utask; struct mm_struct *mm = current->mm; @@ -1424,7 +1424,10 @@ void uprobe_copy_process(struct task_struct *t) t->utask = NULL; - if (mm == t->mm || !utask || !utask->return_instances) + if (!utask || !utask->return_instances) + return; + + if (mm == t->mm && !(flags & CLONE_VFORK)) return; if (dup_utask(t, utask)) @@ -1435,6 +1438,9 @@ void uprobe_copy_process(struct task_struct *t) if (!area) return uprobe_warn(t, "dup xol area"); + if (mm == t->mm) + return; + /* TODO: move it into the union in uprobe_task */ work = kmalloc(sizeof(*work), GFP_KERNEL); if (!work) diff --git a/kernel/fork.c b/kernel/fork.c index d3603b81246b5..8531609b6a824 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1489,7 +1489,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, perf_event_fork(p); trace_task_newtask(p, clone_flags); - uprobe_copy_process(p); + uprobe_copy_process(p, clone_flags); return p;