Skip to content

Commit

Permalink
vhost_task: Allow vhost layer to use copy_process
Browse files Browse the repository at this point in the history
Qemu will create vhost devices in the kernel which perform network, SCSI,
etc IO and management operations from worker threads created by the
kthread API. Because the kthread API does a copy_process on the kthreadd
thread, the vhost layer has to use kthread_use_mm to access the Qemu
thread's memory and cgroup_attach_task_all to add itself to the Qemu
thread's cgroups, and it bypasses the RLIMIT_NPROC limit which can result
in VMs creating more threads than the admin expected.

This patch adds a new struct vhost_task which can be used instead of
kthreads. They allow the vhost layer to use copy_process and inherit
the userspace process's mm and cgroups, the task is accounted for
under the userspace's nproc count and can be seen in its process tree,
and other features like namespaces work and are inherited by default.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Christian Brauner (Microsoft) <brauner@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
  • Loading branch information
Mike Christie authored and Christian Brauner committed Mar 23, 2023
1 parent 89c8e98 commit e297cd5
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 0 deletions.
2 changes: 2 additions & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -22176,7 +22176,9 @@ L: virtualization@lists.linux-foundation.org
L: netdev@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git
F: kernel/vhost_task.c
F: drivers/vhost/
F: include/linux/sched/vhost_task.h
F: include/linux/vhost_iotlb.h
F: include/uapi/linux/vhost.h

Expand Down
5 changes: 5 additions & 0 deletions drivers/vhost/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,14 @@ config VHOST_RING
This option is selected by any driver which needs to access
the host side of a virtio ring.

config VHOST_TASK
bool
default n

config VHOST
tristate
select VHOST_IOTLB
select VHOST_TASK
help
This option is selected by any driver which needs to access
the core of vhost.
Expand Down
23 changes: 23 additions & 0 deletions include/linux/sched/vhost_task.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_VHOST_TASK_H
#define _LINUX_VHOST_TASK_H

#include <linux/completion.h>

struct task_struct;

struct vhost_task {
int (*fn)(void *data);
void *data;
struct completion exited;
unsigned long flags;
struct task_struct *task;
};

struct vhost_task *vhost_task_create(int (*fn)(void *), void *arg,
const char *name);
void vhost_task_start(struct vhost_task *vtsk);
void vhost_task_stop(struct vhost_task *vtsk);
bool vhost_task_should_stop(struct vhost_task *vtsk);

#endif
1 change: 1 addition & 0 deletions kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ obj-y = fork.o exec_domain.o panic.o \
obj-$(CONFIG_USERMODE_DRIVER) += usermode_driver.o
obj-$(CONFIG_MODULES) += kmod.o
obj-$(CONFIG_MULTIUSER) += groups.o
obj-$(CONFIG_VHOST_TASK) += vhost_task.o

ifdef CONFIG_FUNCTION_TRACER
# Do not trace internal ftrace files
Expand Down
117 changes: 117 additions & 0 deletions kernel/vhost_task.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2021 Oracle Corporation
*/
#include <linux/slab.h>
#include <linux/completion.h>
#include <linux/sched/task.h>
#include <linux/sched/vhost_task.h>
#include <linux/sched/signal.h>

enum vhost_task_flags {
VHOST_TASK_FLAGS_STOP,
};

static int vhost_task_fn(void *data)
{
struct vhost_task *vtsk = data;
int ret;

ret = vtsk->fn(vtsk->data);
complete(&vtsk->exited);
do_exit(ret);
}

/**
* vhost_task_stop - stop a vhost_task
* @vtsk: vhost_task to stop
*
* Callers must call vhost_task_should_stop and return from their worker
* function when it returns true;
*/
void vhost_task_stop(struct vhost_task *vtsk)
{
pid_t pid = vtsk->task->pid;

set_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags);
wake_up_process(vtsk->task);
/*
* Make sure vhost_task_fn is no longer accessing the vhost_task before
* freeing it below. If userspace crashed or exited without closing,
* then the vhost_task->task could already be marked dead so
* kernel_wait will return early.
*/
wait_for_completion(&vtsk->exited);
/*
* If we are just closing/removing a device and the parent process is
* not exiting then reap the task.
*/
kernel_wait4(pid, NULL, __WCLONE, NULL);
kfree(vtsk);
}
EXPORT_SYMBOL_GPL(vhost_task_stop);

/**
* vhost_task_should_stop - should the vhost task return from the work function
* @vtsk: vhost_task to stop
*/
bool vhost_task_should_stop(struct vhost_task *vtsk)
{
return test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags);
}
EXPORT_SYMBOL_GPL(vhost_task_should_stop);

/**
* vhost_task_create - create a copy of a process to be used by the kernel
* @fn: thread stack
* @arg: data to be passed to fn
* @name: the thread's name
*
* This returns a specialized task for use by the vhost layer or NULL on
* failure. The returned task is inactive, and the caller must fire it up
* through vhost_task_start().
*/
struct vhost_task *vhost_task_create(int (*fn)(void *), void *arg,
const char *name)
{
struct kernel_clone_args args = {
.flags = CLONE_FS | CLONE_UNTRACED | CLONE_VM,
.exit_signal = 0,
.fn = vhost_task_fn,
.name = name,
.user_worker = 1,
.no_files = 1,
.ignore_signals = 1,
};
struct vhost_task *vtsk;
struct task_struct *tsk;

vtsk = kzalloc(sizeof(*vtsk), GFP_KERNEL);
if (!vtsk)
return NULL;
init_completion(&vtsk->exited);
vtsk->data = arg;
vtsk->fn = fn;

args.fn_arg = vtsk;

tsk = copy_process(NULL, 0, NUMA_NO_NODE, &args);
if (IS_ERR(tsk)) {
kfree(vtsk);
return NULL;
}

vtsk->task = tsk;
return vtsk;
}
EXPORT_SYMBOL_GPL(vhost_task_create);

/**
* vhost_task_start - start a vhost_task created with vhost_task_create
* @vtsk: vhost_task to wake up
*/
void vhost_task_start(struct vhost_task *vtsk)
{
wake_up_new_task(vtsk->task);
}
EXPORT_SYMBOL_GPL(vhost_task_start);

0 comments on commit e297cd5

Please sign in to comment.