Skip to content

Commit

Permalink
sched/core: Free the stack early if CONFIG_THREAD_INFO_IN_TASK
Browse files Browse the repository at this point in the history
We currently keep every task's stack around until the task_struct
itself is freed.  This means that we keep the stack allocation alive
for longer than necessary and that, under load, we free stacks in
big batches whenever RCU drops the last task reference.  Neither of
these is good for reuse of cache-hot memory, and freeing in batches
prevents us from usefully caching small numbers of vmalloced stacks.

On architectures that have thread_info on the stack, we can't easily
change this, but on architectures that set THREAD_INFO_IN_TASK, we
can free it as soon as the task is dead.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jann Horn <jann@thejh.net>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/08ca06cde00ebed0046c5d26cbbf3fbb7ef5b812.1474003868.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Andy Lutomirski authored and Ingo Molnar committed Sep 16, 2016
1 parent aa1f1a6 commit 68f24b0
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 2 deletions.
4 changes: 3 additions & 1 deletion include/linux/init_task.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,9 @@ extern struct task_group root_task_group;
#endif

#ifdef CONFIG_THREAD_INFO_IN_TASK
# define INIT_TASK_TI(tsk) .thread_info = INIT_THREAD_INFO(tsk),
# define INIT_TASK_TI(tsk) \
.thread_info = INIT_THREAD_INFO(tsk), \
.stack_refcount = ATOMIC_INIT(1),
#else
# define INIT_TASK_TI(tsk)
#endif
Expand Down
14 changes: 14 additions & 0 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -1936,6 +1936,10 @@ struct task_struct {
#ifdef CONFIG_VMAP_STACK
struct vm_struct *stack_vm_area;
#endif
#ifdef CONFIG_THREAD_INFO_IN_TASK
/* A live task holds one reference. */
atomic_t stack_refcount;
#endif
/* CPU-specific state of this task */
struct thread_struct thread;
/*
Expand Down Expand Up @@ -3143,12 +3147,22 @@ static inline unsigned long *end_of_stack(struct task_struct *p)

#endif

#ifdef CONFIG_THREAD_INFO_IN_TASK
static inline void *try_get_task_stack(struct task_struct *tsk)
{
return atomic_inc_not_zero(&tsk->stack_refcount) ?
task_stack_page(tsk) : NULL;
}

extern void put_task_stack(struct task_struct *tsk);
#else
static inline void *try_get_task_stack(struct task_struct *tsk)
{
return task_stack_page(tsk);
}

static inline void put_task_stack(struct task_struct *tsk) {}
#endif

#define task_stack_end_corrupted(task) \
(*(end_of_stack(task)) != STACK_END_MAGIC)
Expand Down
35 changes: 34 additions & 1 deletion kernel/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -269,11 +269,40 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
}
}

void free_task(struct task_struct *tsk)
static void release_task_stack(struct task_struct *tsk)
{
account_kernel_stack(tsk, -1);
arch_release_thread_stack(tsk->stack);
free_thread_stack(tsk);
tsk->stack = NULL;
#ifdef CONFIG_VMAP_STACK
tsk->stack_vm_area = NULL;
#endif
}

#ifdef CONFIG_THREAD_INFO_IN_TASK
void put_task_stack(struct task_struct *tsk)
{
if (atomic_dec_and_test(&tsk->stack_refcount))
release_task_stack(tsk);
}
#endif

void free_task(struct task_struct *tsk)
{
#ifndef CONFIG_THREAD_INFO_IN_TASK
/*
* The task is finally done with both the stack and thread_info,
* so free both.
*/
release_task_stack(tsk);
#else
/*
* If the task had a separate stack allocation, it should be gone
* by now.
*/
WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0);
#endif
rt_mutex_debug_task_free(tsk);
ftrace_graph_exit_task(tsk);
put_seccomp_filter(tsk);
Expand Down Expand Up @@ -411,6 +440,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
#ifdef CONFIG_VMAP_STACK
tsk->stack_vm_area = stack_vm_area;
#endif
#ifdef CONFIG_THREAD_INFO_IN_TASK
atomic_set(&tsk->stack_refcount, 1);
#endif

if (err)
goto free_stack;
Expand Down Expand Up @@ -1771,6 +1803,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
atomic_dec(&p->cred->user->processes);
exit_creds(p);
bad_fork_free:
put_task_stack(p);
free_task(p);
fork_out:
return ERR_PTR(retval);
Expand Down
4 changes: 4 additions & 0 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2772,6 +2772,10 @@ static struct rq *finish_task_switch(struct task_struct *prev)
* task and put them back on the free list.
*/
kprobe_flush_task(prev);

/* Task is done with its stack. */
put_task_stack(prev);

put_task_struct(prev);
}

Expand Down

0 comments on commit 68f24b0

Please sign in to comment.