Skip to content

Commit

Permalink
x86, fpu: lazy allocation of FPU area - v5
Browse files Browse the repository at this point in the history
Only allocate the FPU area when the application actually uses FPU, i.e., in the
first lazy FPU trap. This could save memory for non-fpu using apps.

for example: on my system after boot, there are around 300 processes, with
only 17 using FPU.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  • Loading branch information
Suresh Siddha authored and Ingo Molnar committed Apr 19, 2008
1 parent 61c4628 commit aa283f4
Show file tree
Hide file tree
Showing 8 changed files with 91 additions and 23 deletions.
38 changes: 30 additions & 8 deletions arch/x86/kernel/i387.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
#include <linux/module.h>
#include <linux/regset.h>
#include <linux/sched.h>
#include <linux/bootmem.h>

#include <asm/sigcontext.h>
#include <asm/processor.h>
Expand Down Expand Up @@ -63,7 +62,6 @@ void __init init_thread_xstate(void)
else
xstate_size = sizeof(struct i387_fsave_struct);
#endif
init_task.thread.xstate = alloc_bootmem(xstate_size);
}

#ifdef CONFIG_X86_64
Expand Down Expand Up @@ -93,12 +91,22 @@ void __cpuinit fpu_init(void)
* value at reset if we support XMM instructions and then
* remeber the current task has used the FPU.
*/
void init_fpu(struct task_struct *tsk)
int init_fpu(struct task_struct *tsk)
{
if (tsk_used_math(tsk)) {
if (tsk == current)
unlazy_fpu(tsk);
return;
return 0;
}

/*
* Memory allocation at the first usage of the FPU and other state.
*/
if (!tsk->thread.xstate) {
tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep,
GFP_KERNEL);
if (!tsk->thread.xstate)
return -ENOMEM;
}

if (cpu_has_fxsr) {
Expand All @@ -120,6 +128,7 @@ void init_fpu(struct task_struct *tsk)
* Only the device not available exception or ptrace can call init_fpu.
*/
set_stopped_child_used_math(tsk);
return 0;
}

int fpregs_active(struct task_struct *target, const struct user_regset *regset)
Expand All @@ -136,10 +145,14 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
int ret;

if (!cpu_has_fxsr)
return -ENODEV;

init_fpu(target);
ret = init_fpu(target);
if (ret)
return ret;

return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&target->thread.xstate->fxsave, 0, -1);
Expand All @@ -154,7 +167,10 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
if (!cpu_has_fxsr)
return -ENODEV;

init_fpu(target);
ret = init_fpu(target);
if (ret)
return ret;

set_stopped_child_used_math(target);

ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
Expand Down Expand Up @@ -312,11 +328,14 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
void *kbuf, void __user *ubuf)
{
struct user_i387_ia32_struct env;
int ret;

if (!HAVE_HWFP)
return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);

init_fpu(target);
ret = init_fpu(target);
if (ret)
return ret;

if (!cpu_has_fxsr) {
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
Expand Down Expand Up @@ -344,7 +363,10 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
if (!HAVE_HWFP)
return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);

init_fpu(target);
ret = init_fpu(target);
if (ret)
return ret;

set_stopped_child_used_math(target);

if (!cpu_has_fxsr) {
Expand Down
28 changes: 19 additions & 9 deletions arch/x86/kernel/process.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,34 @@
#include <linux/slab.h>
#include <linux/sched.h>

static struct kmem_cache *task_xstate_cachep;
struct kmem_cache *task_xstate_cachep;

int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
*dst = *src;
dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
if (!dst->thread.xstate)
return -ENOMEM;
WARN_ON((unsigned long)dst->thread.xstate & 15);
memcpy(dst->thread.xstate, src->thread.xstate, xstate_size);
if (src->thread.xstate) {
dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep,
GFP_KERNEL);
if (!dst->thread.xstate)
return -ENOMEM;
WARN_ON((unsigned long)dst->thread.xstate & 15);
memcpy(dst->thread.xstate, src->thread.xstate, xstate_size);
}
return 0;
}

void free_thread_info(struct thread_info *ti)
void free_thread_xstate(struct task_struct *tsk)
{
kmem_cache_free(task_xstate_cachep, ti->task->thread.xstate);
ti->task->thread.xstate = NULL;
if (tsk->thread.xstate) {
kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
tsk->thread.xstate = NULL;
}
}


void free_thread_info(struct thread_info *ti)
{
free_thread_xstate(ti->task);
free_pages((unsigned long)(ti), get_order(THREAD_SIZE));
}

Expand Down
4 changes: 4 additions & 0 deletions arch/x86/kernel/process_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,10 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
regs->cs = __USER_CS;
regs->ip = new_ip;
regs->sp = new_sp;
/*
* Free the old FP and other extended state
*/
free_thread_xstate(current);
}
EXPORT_SYMBOL_GPL(start_thread);

Expand Down
4 changes: 4 additions & 0 deletions arch/x86/kernel/process_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,10 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
regs->ss = __USER_DS;
regs->flags = 0x200;
set_fs(USER_DS);
/*
* Free the old FP and other extended state
*/
free_thread_xstate(current);
}
EXPORT_SYMBOL_GPL(start_thread);

Expand Down
17 changes: 15 additions & 2 deletions arch/x86/kernel/traps_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -1148,9 +1148,22 @@ asmlinkage void math_state_restore(void)
struct thread_info *thread = current_thread_info();
struct task_struct *tsk = thread->task;

if (!tsk_used_math(tsk)) {
local_irq_enable();
/*
* does a slab alloc which can sleep
*/
if (init_fpu(tsk)) {
/*
* ran out of memory!
*/
do_group_exit(SIGKILL);
return;
}
local_irq_disable();
}

clts(); /* Allow maths ops (or we recurse) */
if (!tsk_used_math(tsk))
init_fpu(tsk);
restore_fpu(tsk);
thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
tsk->fpu_counter++;
Expand Down
19 changes: 16 additions & 3 deletions arch/x86/kernel/traps_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -1124,10 +1124,23 @@ asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
asmlinkage void math_state_restore(void)
{
struct task_struct *me = current;
clts(); /* Allow maths ops (or we recurse) */

if (!used_math())
init_fpu(me);
if (!used_math()) {
local_irq_enable();
/*
* does a slab alloc which can sleep
*/
if (init_fpu(me)) {
/*
* ran out of memory!
*/
do_group_exit(SIGKILL);
return;
}
local_irq_disable();
}

clts(); /* Allow maths ops (or we recurse) */
restore_fpu_checking(&me->thread.xstate->fxsave);
task_thread_info(me)->status |= TS_USEDFPU;
me->fpu_counter++;
Expand Down
2 changes: 1 addition & 1 deletion include/asm-x86/i387.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

extern void fpu_init(void);
extern void mxcsr_feature_mask_init(void);
extern void init_fpu(struct task_struct *child);
extern int init_fpu(struct task_struct *child);
extern asmlinkage void math_state_restore(void);
extern void init_thread_xstate(void);

Expand Down
2 changes: 2 additions & 0 deletions include/asm-x86/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,8 @@ DECLARE_PER_CPU(struct orig_ist, orig_ist);

extern void print_cpu_info(struct cpuinfo_x86 *);
extern unsigned int xstate_size;
extern void free_thread_xstate(struct task_struct *);
extern struct kmem_cache *task_xstate_cachep;
extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
extern unsigned short num_cache_leaves;
Expand Down

0 comments on commit aa283f4

Please sign in to comment.