From a9241ea5fd709fc935dade130f4e3b2612bbe9e3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 6 Feb 2015 15:01:58 -0500 Subject: [PATCH 1/8] x86/fpu: Don't reset thread.fpu_counter The "else" branch clears ->fpu_counter as a remnant of the lazy FPU usage counting: e07e23e1fd30 ("[PATCH] non lazy "sleazy" fpu implementation") However, switch_fpu_prepare() does this now so that else branch is superfluous. If we do use_eager_fpu(), then this has no effect. Otherwise, if we actually wanted to prevent fpu preload after the context switch we would need to reset it unconditionally, even if __thread_has_fpu(). Signed-off-by: Oleg Nesterov Signed-off-by: Rik van Riel Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1423252925-14451-2-git-send-email-riel@redhat.com Signed-off-by: Borislav Petkov --- arch/x86/kernel/i387.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index a9a4229f6161..4d0db9ed58e0 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -108,8 +108,7 @@ void unlazy_fpu(struct task_struct *tsk) if (__thread_has_fpu(tsk)) { __save_init_fpu(tsk); __thread_fpu_end(tsk); - } else - tsk->thread.fpu_counter = 0; + } preempt_enable(); } EXPORT_SYMBOL(unlazy_fpu); From 1a2a7f4ec8e3a7ac582dac4d01fcc7e8acd3bb30 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 6 Feb 2015 15:01:59 -0500 Subject: [PATCH 2/8] x86/fpu: Don't do __thread_fpu_end() if use_eager_fpu() unlazy_fpu()->__thread_fpu_end() doesn't look right if use_eager_fpu(). Unconditional __thread_fpu_end() is only correct if we know that this thread can't return to user-mode and use FPU. Fortunately it has only 2 callers. fpu_copy() checks use_eager_fpu(), and init_fpu(current) can be only called by the coredumping thread via regset->get(). But it is exported to modules, and imo this should be fixed anyway. And if we check use_eager_fpu() we can use __save_fpu() like fpu_copy() and save_init_fpu() do. - It seems that even !use_eager_fpu() case doesn't need the unconditional __thread_fpu_end(), we only need it if __save_init_fpu() returns 0. - It is still not clear to me if __save_init_fpu() can safely nest with another save + restore from __kernel_fpu_begin(). If not, we can use kernel_fpu_disable() to fix the race. Signed-off-by: Oleg Nesterov Signed-off-by: Rik van Riel Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1423252925-14451-3-git-send-email-riel@redhat.com Signed-off-by: Borislav Petkov --- arch/x86/kernel/i387.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 4d0db9ed58e0..f3ced6f4b2b6 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -106,8 +106,12 @@ void unlazy_fpu(struct task_struct *tsk) { preempt_disable(); if (__thread_has_fpu(tsk)) { - __save_init_fpu(tsk); - __thread_fpu_end(tsk); + if (use_eager_fpu()) { + __save_fpu(tsk); + } else { + __save_init_fpu(tsk); + __thread_fpu_end(tsk); + } } preempt_enable(); } From 08a744c6bfded3d5fa66f94263f81773226113d1 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 6 Feb 2015 15:02:00 -0500 Subject: [PATCH 3/8] x86/fpu: Change math_error() to use unlazy_fpu(), kill (now) unused save_init_fpu() math_error() calls save_init_fpu() after conditional_sti(), this means that the caller can be preempted. If !use_eager_fpu() we can hit the WARN_ON_ONCE(!__thread_has_fpu(tsk)) and/or save the wrong FPU state. Change math_error() to use unlazy_fpu() and kill save_init_fpu(). Signed-off-by: Oleg Nesterov Signed-off-by: Rik van Riel Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1423252925-14451-4-git-send-email-riel@redhat.com Signed-off-by: Borislav Petkov --- arch/x86/include/asm/fpu-internal.h | 18 ------------------ arch/x86/kernel/traps.c | 2 +- 2 files changed, 1 insertion(+), 19 deletions(-) diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index e97622f57722..02f2e0817918 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -517,24 +517,6 @@ static inline void __save_fpu(struct task_struct *tsk) fpu_fxsave(&tsk->thread.fpu); } -/* - * These disable preemption on their own and are safe - */ -static inline void save_init_fpu(struct task_struct *tsk) -{ - WARN_ON_ONCE(!__thread_has_fpu(tsk)); - - if (use_eager_fpu()) { - __save_fpu(tsk); - return; - } - - preempt_disable(); - __save_init_fpu(tsk); - __thread_fpu_end(tsk); - preempt_enable(); -} - /* * i387 state interaction */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 88900e288021..9d889f74e806 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -663,7 +663,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) /* * Save the info for the exception handler and clear the error. */ - save_init_fpu(task); + unlazy_fpu(task); task->thread.trap_nr = trapnr; task->thread.error_code = error_code; info.si_signo = SIGFPE; From 1c927eea4cad83c439cb51e9c96ad19cb005157d Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Fri, 6 Feb 2015 15:02:01 -0500 Subject: [PATCH 4/8] x86/fpu: Move lazy restore functions up a few lines We need another lazy restore related function, that will be called from a function that is above where the lazy restore functions are now. It would be nice to keep all three functions grouped together. Signed-off-by: Rik van Riel Cc: Linus Torvalds Cc: Oleg Nesterov Link: http://lkml.kernel.org/r/1423252925-14451-5-git-send-email-riel@redhat.com Signed-off-by: Borislav Petkov --- arch/x86/include/asm/fpu-internal.h | 36 ++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 02f2e0817918..217d6d7b9cb0 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -67,6 +67,24 @@ extern void finit_soft_fpu(struct i387_soft_struct *soft); static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} #endif +/* + * Must be run with preemption disabled: this clears the fpu_owner_task, + * on this CPU. + * + * This will disable any lazy FPU state restore of the current FPU state, + * but if the current thread owns the FPU, it will still be saved by. + */ +static inline void __cpu_disable_lazy_restore(unsigned int cpu) +{ + per_cpu(fpu_owner_task, cpu) = NULL; +} + +static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) +{ + return new == this_cpu_read_stable(fpu_owner_task) && + cpu == new->thread.fpu.last_cpu; +} + static inline int is_ia32_compat_frame(void) { return config_enabled(CONFIG_IA32_EMULATION) && @@ -398,24 +416,6 @@ static inline void drop_init_fpu(struct task_struct *tsk) */ typedef struct { int preload; } fpu_switch_t; -/* - * Must be run with preemption disabled: this clears the fpu_owner_task, - * on this CPU. - * - * This will disable any lazy FPU state restore of the current FPU state, - * but if the current thread owns the FPU, it will still be saved by. - */ -static inline void __cpu_disable_lazy_restore(unsigned int cpu) -{ - per_cpu(fpu_owner_task, cpu) = NULL; -} - -static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) -{ - return new == this_cpu_read_stable(fpu_owner_task) && - cpu == new->thread.fpu.last_cpu; -} - static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) { fpu_switch_t fpu; From 33e03dedd759cc9396252d9641b25d01909a26bb Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Fri, 6 Feb 2015 15:02:02 -0500 Subject: [PATCH 5/8] x86/fpu: Introduce task_disable_lazy_fpu_restore() helper Currently there are a few magic assignments sprinkled through the code that disable lazy FPU state restoring, some more effective than others, and all equally mystifying. It would be easier to have a helper to explicitly disable lazy FPU state restoring for a task. Signed-off-by: Rik van Riel Cc: Linus Torvalds Cc: Oleg Nesterov Link: http://lkml.kernel.org/r/1423252925-14451-6-git-send-email-riel@redhat.com Signed-off-by: Borislav Petkov --- arch/x86/include/asm/fpu-internal.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 217d6d7b9cb0..9c27f44e1c5c 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -79,6 +79,16 @@ static inline void __cpu_disable_lazy_restore(unsigned int cpu) per_cpu(fpu_owner_task, cpu) = NULL; } +/* + * Used to indicate that the FPU state in memory is newer than the FPU + * state in registers, and the FPU state should be reloaded next time the + * task is run. Only safe on the current task, or non-running tasks. + */ +static inline void task_disable_lazy_fpu_restore(struct task_struct *tsk) +{ + tsk->thread.fpu.last_cpu = ~0; +} + static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) { return new == this_cpu_read_stable(fpu_owner_task) && From 1361ef29c7e49ae7cf37220c25fac1904b77f71a Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Fri, 6 Feb 2015 15:02:03 -0500 Subject: [PATCH 6/8] x86/fpu: Use an explicit if/else in switch_fpu_prepare() Use an explicit if/else branch after __save_init_fpu(old) in switch_fpu_prepare(). This makes substituting the assignment with a call in task_disable_lazy_fpu_restore() in the next patch easier to review. Signed-off-by: Rik van Riel Cc: Linus Torvalds Cc: Oleg Nesterov Link: http://lkml.kernel.org/r/1423252925-14451-7-git-send-email-riel@redhat.com [ Space out stuff for more readability. ] Signed-off-by: Borislav Petkov --- arch/x86/include/asm/fpu-internal.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 9c27f44e1c5c..04c2807aab66 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -434,13 +434,17 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta * If the task has used the math, pre-load the FPU on xsave processors * or if the past 5 consecutive context-switches used math. */ - fpu.preload = tsk_used_math(new) && (use_eager_fpu() || - new->thread.fpu_counter > 5); + fpu.preload = tsk_used_math(new) && + (use_eager_fpu() || new->thread.fpu_counter > 5); + if (__thread_has_fpu(old)) { if (!__save_init_fpu(old)) - cpu = ~0; - old->thread.fpu.last_cpu = cpu; - old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */ + old->thread.fpu.last_cpu = ~0; + else + old->thread.fpu.last_cpu = cpu; + + /* But leave fpu_owner_task! */ + old->thread.fpu.has_fpu = 0; /* Don't change CR0.TS if we just switch! */ if (fpu.preload) { From 6a5fe8952bd676baf382d14df21e7b32b5d8943e Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Fri, 6 Feb 2015 15:02:04 -0500 Subject: [PATCH 7/8] x86/fpu: Use task_disable_lazy_fpu_restore() helper Replace magic assignments of fpu.last_cpu = ~0 with more explicit task_disable_lazy_fpu_restore() calls. Signed-off-by: Rik van Riel Cc: Oleg Nesterov Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1423252925-14451-8-git-send-email-riel@redhat.com Signed-off-by: Borislav Petkov --- arch/x86/include/asm/fpu-internal.h | 4 ++-- arch/x86/kernel/i387.c | 2 +- arch/x86/kernel/process.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 04c2807aab66..e5f8f8eaf225 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -439,7 +439,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta if (__thread_has_fpu(old)) { if (!__save_init_fpu(old)) - old->thread.fpu.last_cpu = ~0; + task_disable_lazy_fpu_restore(old); else old->thread.fpu.last_cpu = cpu; @@ -455,7 +455,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta stts(); } else { old->thread.fpu_counter = 0; - old->thread.fpu.last_cpu = ~0; + task_disable_lazy_fpu_restore(old); if (fpu.preload) { new->thread.fpu_counter++; if (!use_eager_fpu() && fpu_lazy_restore(new, cpu)) diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index f3ced6f4b2b6..5722ab6c7c36 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -236,7 +236,7 @@ int init_fpu(struct task_struct *tsk) if (tsk_used_math(tsk)) { if (cpu_has_fpu && tsk == current) unlazy_fpu(tsk); - tsk->thread.fpu.last_cpu = ~0; + task_disable_lazy_fpu_restore(tsk); return 0; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e127ddaa2d5a..ce8b10351e28 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -68,8 +68,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) dst->thread.fpu_counter = 0; dst->thread.fpu.has_fpu = 0; - dst->thread.fpu.last_cpu = ~0; dst->thread.fpu.state = NULL; + task_disable_lazy_fpu_restore(dst); if (tsk_used_math(src)) { int err = fpu_alloc(&dst->thread.fpu); if (err) From 728e53fef429a0f3c9dda3587c3ccc57ad268b70 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Fri, 6 Feb 2015 15:02:05 -0500 Subject: [PATCH 8/8] x86/fpu: Also check fpu_lazy_restore() when use_eager_fpu() With Oleg's patch: 33a3ebdc077f ("x86, fpu: Don't abuse has_fpu in __kernel_fpu_begin/end()") kernel threads no longer have an FPU state even on systems with use_eager_fpu(). That in turn means that a task may still have its FPU state loaded in the FPU registers, if the task only got interrupted by kernel threads from when it went to sleep, to when it woke up again. In that case, there is no need to restore the FPU state for this task, since it is still in the registers. The kernel can simply use the same logic to determine this as is used for !use_eager_fpu() systems. Signed-off-by: Rik van Riel Cc: Linus Torvalds Cc: Oleg Nesterov Link: http://lkml.kernel.org/r/1423252925-14451-9-git-send-email-riel@redhat.com Signed-off-by: Borislav Petkov --- arch/x86/include/asm/fpu-internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index e5f8f8eaf225..19fb41cc4755 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -458,7 +458,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta task_disable_lazy_fpu_restore(old); if (fpu.preload) { new->thread.fpu_counter++; - if (!use_eager_fpu() && fpu_lazy_restore(new, cpu)) + if (fpu_lazy_restore(new, cpu)) fpu.preload = 0; else prefetch(new->thread.fpu.state);