Skip to content

Commit

Permalink
Make the user mode driver code a better citizen
Browse files Browse the repository at this point in the history
This is the third round of my changeset to split the user mode driver
code from the user mode helper code, and to make the code use common
facilities to get things done instead of recreating them just
for the user mode driver code.

I have split the changes into small enough pieces so they should be
easily readable and testable.

The changes lean into the preexisting interfaces in the kernel and
remove special cases for user mode driver code in favor of solutions
that don't need special cases.  This results in smaller code with fewer
bugs.

At a practical level this removes the maintenance burden of the user
mode drivers from the user mode helper code and from exec as the special
cases are removed.

Similarly the LSM interaction bugs are fixed by not having unnecessary
special cases for user mode drivers.

I have tested thes changes by booting with the code compiled in and
by killing "bpfilter_umh" and "running iptables -vnL" to restart
the userspace driver, also by running "while true; do iptables -L;rmmod
bpfilter; done" to verify the module load and unload work properly.

I have compiled tested each change with and without CONFIG_BPFILTER
enabled.

From v2 to v3 I have made two siginficant changes.
- I factored thread_group_exit out of pidfd_poll to allow the test
  to be used by the bpfilter code.
- I renamed umd.c and umd.h to usermode_driver.c and usermode_driver.h
  respectively.

I made a few very small changes from v1 to v2:
- Updated the function name in a comment when the function is renamed
- Moved some more code so that the the !CONFIG_BPFILTER case continues
  to compile when I moved the code into umd.c
- A fix for the module loading case to really flush the file descriptor.
- Removed split_argv entirely from fork_usermode_driver.
  There was nothing to split so it was just confusing.

Please let me know if you see any bugs.  Once the code review is
finished I plan to place the code in a non-rebasing branch
so I can pull it into my tree and so it can also be pulled into
the bpf-next tree.

v1: https://lkml.kernel.org/r/87pn9mgfc2.fsf_-_@x220.int.ebiederm.org
v2: https://lkml.kernel.org/r/87bll17ili.fsf_-_@x220.int.ebiederm.org

Eric W. Biederman (16):
      umh: Capture the pid in umh_pipe_setup
      umh: Move setting PF_UMH into umh_pipe_setup
      umh: Rename the user mode driver helpers for clarity
      umh: Remove call_usermodehelper_setup_file.
      umh: Separate the user mode driver and the user mode helper support
      umd: For clarity rename umh_info umd_info
      umd: Rename umd_info.cmdline umd_info.driver_name
      umd: Transform fork_usermode_blob into fork_usermode_driver
      umh: Stop calling do_execve_file
      exec: Remove do_execve_file
      bpfilter: Move bpfilter_umh back into init data
      umd: Track user space drivers with struct pid
      exit: Factor thread_group_exited out of pidfd_poll
      bpfilter: Take advantage of the facilities of struct pid
      umd: Remove exit_umh
      umd: Stop using split_argv

Link: https://lkml.kernel.org/r/87y2o1swee.fsf_-_@x220.int.ebiederm.org
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
  • Loading branch information
Eric W. Biederman committed Jul 8, 2020
2 parents b3a9e3b + 33c3260 commit f06b71f
Show file tree
Hide file tree
Showing 15 changed files with 275 additions and 260 deletions.
38 changes: 9 additions & 29 deletions fs/exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -1818,13 +1818,14 @@ static int exec_binprm(struct linux_binprm *bprm)
/*
* sys_execve() executes a new program.
*/
static int __do_execve_file(int fd, struct filename *filename,
struct user_arg_ptr argv,
struct user_arg_ptr envp,
int flags, struct file *file)
static int do_execveat_common(int fd, struct filename *filename,
struct user_arg_ptr argv,
struct user_arg_ptr envp,
int flags)
{
char *pathbuf = NULL;
struct linux_binprm *bprm;
struct file *file;
struct files_struct *displaced;
int retval;

Expand Down Expand Up @@ -1863,18 +1864,15 @@ static int __do_execve_file(int fd, struct filename *filename,
check_unsafe_exec(bprm);
current->in_execve = 1;

if (!file)
file = do_open_execat(fd, filename, flags);
file = do_open_execat(fd, filename, flags);
retval = PTR_ERR(file);
if (IS_ERR(file))
goto out_unmark;

sched_exec();

bprm->file = file;
if (!filename) {
bprm->filename = "none";
} else if (fd == AT_FDCWD || filename->name[0] == '/') {
if (fd == AT_FDCWD || filename->name[0] == '/') {
bprm->filename = filename->name;
} else {
if (filename->name[0] == '\0')
Expand Down Expand Up @@ -1935,8 +1933,7 @@ static int __do_execve_file(int fd, struct filename *filename,
task_numa_free(current, false);
free_bprm(bprm);
kfree(pathbuf);
if (filename)
putname(filename);
putname(filename);
if (displaced)
put_files_struct(displaced);
return retval;
Expand Down Expand Up @@ -1967,27 +1964,10 @@ static int __do_execve_file(int fd, struct filename *filename,
if (displaced)
reset_files_struct(displaced);
out_ret:
if (filename)
putname(filename);
putname(filename);
return retval;
}

static int do_execveat_common(int fd, struct filename *filename,
struct user_arg_ptr argv,
struct user_arg_ptr envp,
int flags)
{
return __do_execve_file(fd, filename, argv, envp, flags, NULL);
}

int do_execve_file(struct file *file, void *__argv, void *__envp)
{
struct user_arg_ptr argv = { .ptr.native = __argv };
struct user_arg_ptr envp = { .ptr.native = __envp };

return __do_execve_file(AT_FDCWD, NULL, argv, envp, 0, file);
}

int do_execve(struct filename *filename,
const char __user *const __user *__argv,
const char __user *const __user *__envp)
Expand Down
1 change: 0 additions & 1 deletion include/linux/binfmts.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,5 @@ extern int do_execveat(int, struct filename *,
const char __user * const __user *,
const char __user * const __user *,
int);
int do_execve_file(struct file *file, void *__argv, void *__envp);

#endif /* _LINUX_BINFMTS_H */
7 changes: 4 additions & 3 deletions include/linux/bpfilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,23 @@
#define _LINUX_BPFILTER_H

#include <uapi/linux/bpfilter.h>
#include <linux/umh.h>
#include <linux/usermode_driver.h>

struct sock;
int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval,
unsigned int optlen);
int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
int __user *optlen);
void bpfilter_umh_cleanup(struct umd_info *info);

struct bpfilter_umh_ops {
struct umh_info info;
struct umd_info info;
/* since ip_getsockopt() can run in parallel, serialize access to umh */
struct mutex lock;
int (*sockopt)(struct sock *sk, int optname,
char __user *optval,
unsigned int optlen, bool is_set);
int (*start)(void);
bool stop;
};
extern struct bpfilter_umh_ops bpfilter_ops;
#endif
9 changes: 0 additions & 9 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -1511,7 +1511,6 @@ extern struct pid *cad_pid;
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
#define PF_UMH 0x02000000 /* I'm an Usermodehelper process */
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
#define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */
Expand Down Expand Up @@ -2020,14 +2019,6 @@ static inline void rseq_execve(struct task_struct *t)

#endif

void __exit_umh(struct task_struct *tsk);

static inline void exit_umh(struct task_struct *tsk)
{
if (unlikely(tsk->flags & PF_UMH))
__exit_umh(tsk);
}

#ifdef CONFIG_DEBUG_RSEQ

void rseq_syscall(struct pt_regs *regs);
Expand Down
2 changes: 2 additions & 0 deletions include/linux/sched/signal.h
Original file line number Diff line number Diff line change
Expand Up @@ -674,6 +674,8 @@ static inline int thread_group_empty(struct task_struct *p)
#define delay_group_leader(p) \
(thread_group_leader(p) && !thread_group_empty(p))

extern bool thread_group_exited(struct pid *pid);

extern struct sighand_struct *__lock_task_sighand(struct task_struct *task,
unsigned long *flags);

Expand Down
15 changes: 0 additions & 15 deletions include/linux/umh.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,8 @@ struct subprocess_info {
const char *path;
char **argv;
char **envp;
struct file *file;
int wait;
int retval;
pid_t pid;
int (*init)(struct subprocess_info *info, struct cred *new);
void (*cleanup)(struct subprocess_info *info);
void *data;
Expand All @@ -40,19 +38,6 @@ call_usermodehelper_setup(const char *path, char **argv, char **envp,
int (*init)(struct subprocess_info *info, struct cred *new),
void (*cleanup)(struct subprocess_info *), void *data);

struct subprocess_info *call_usermodehelper_setup_file(struct file *file,
int (*init)(struct subprocess_info *info, struct cred *new),
void (*cleanup)(struct subprocess_info *), void *data);
struct umh_info {
const char *cmdline;
struct file *pipe_to_umh;
struct file *pipe_from_umh;
struct list_head list;
void (*cleanup)(struct umh_info *info);
pid_t pid;
};
int fork_usermode_blob(void *data, size_t len, struct umh_info *info);

extern int
call_usermodehelper_exec(struct subprocess_info *info, int wait);

Expand Down
18 changes: 18 additions & 0 deletions include/linux/usermode_driver.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#ifndef __LINUX_USERMODE_DRIVER_H__
#define __LINUX_USERMODE_DRIVER_H__

#include <linux/umh.h>
#include <linux/path.h>

struct umd_info {
const char *driver_name;
struct file *pipe_to_umh;
struct file *pipe_from_umh;
struct path wd;
struct pid *tgid;
};
int umd_load_blob(struct umd_info *info, const void *data, size_t len);
int umd_unload_blob(struct umd_info *info);
int fork_usermode_driver(struct umd_info *info);

#endif /* __LINUX_USERMODE_DRIVER_H__ */
1 change: 1 addition & 0 deletions kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ obj-y = fork.o exec_domain.o panic.o \
notifier.o ksysfs.o cred.o reboot.o \
async.o range.o smpboot.o ucount.o

obj-$(CONFIG_BPFILTER) += usermode_driver.o
obj-$(CONFIG_MODULES) += kmod.o
obj-$(CONFIG_MULTIUSER) += groups.o

Expand Down
25 changes: 24 additions & 1 deletion kernel/exit.c
Original file line number Diff line number Diff line change
Expand Up @@ -804,7 +804,6 @@ void __noreturn do_exit(long code)
exit_task_namespaces(tsk);
exit_task_work(tsk);
exit_thread(tsk);
exit_umh(tsk);

/*
* Flush inherited counters to the parent - before the parent
Expand Down Expand Up @@ -1711,6 +1710,30 @@ COMPAT_SYSCALL_DEFINE5(waitid,
}
#endif

/**
* thread_group_exited - check that a thread group has exited
* @pid: tgid of thread group to be checked.
*
* Test if the thread group represented by tgid has exited (all
* threads are zombies, dead or completely gone).
*
* Return: true if the thread group has exited. false otherwise.
*/
bool thread_group_exited(struct pid *pid)
{
struct task_struct *task;
bool exited;

rcu_read_lock();
task = pid_task(pid, PIDTYPE_PID);
exited = !task ||
(READ_ONCE(task->exit_state) && thread_group_empty(task));
rcu_read_unlock();

return exited;
}
EXPORT_SYMBOL(thread_group_exited);

__weak void abort(void)
{
BUG();
Expand Down
6 changes: 1 addition & 5 deletions kernel/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -1787,22 +1787,18 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
*/
static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
{
struct task_struct *task;
struct pid *pid = file->private_data;
__poll_t poll_flags = 0;

poll_wait(file, &pid->wait_pidfd, pts);

rcu_read_lock();
task = pid_task(pid, PIDTYPE_PID);
/*
* Inform pollers only when the whole thread group exits.
* If the thread group leader exits before all other threads in the
* group, then poll(2) should block, similar to the wait(2) family.
*/
if (!task || (task->exit_state && thread_group_empty(task)))
if (thread_group_exited(pid))
poll_flags = EPOLLIN | EPOLLRDNORM;
rcu_read_unlock();

return poll_flags;
}
Expand Down
Loading

0 comments on commit f06b71f

Please sign in to comment.