From 5e787dbf659fe77d56215be74044f85e01b3920f Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 23 Oct 2018 22:10:35 +0200 Subject: [PATCH 1/5] devtmpfs: use do_mount() instead of ksys_mount() In devtmpfs, do_mount() can be called directly instead of complex wrapping by ksys_mount(): - the first and third arguments are const strings in the kernel, and do not need to be copied over from userspace; - the fifth argument is NULL, and therefore no page needs to be copied over from userspace; - the second and fourth argument are passed through anyway. Signed-off-by: Dominik Brodowski --- drivers/base/devtmpfs.c | 6 +++--- include/linux/device.h | 4 ++-- init/do_mounts.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 30d0523014e0d..6cdbf15312387 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -359,7 +359,7 @@ static int handle_remove(const char *nodename, struct device *dev) * If configured, or requested by the commandline, devtmpfs will be * auto-mounted after the kernel mounted the root filesystem. */ -int devtmpfs_mount(const char *mntdir) +int devtmpfs_mount(void) { int err; @@ -369,7 +369,7 @@ int devtmpfs_mount(const char *mntdir) if (!thread) return 0; - err = ksys_mount("devtmpfs", mntdir, "devtmpfs", MS_SILENT, NULL); + err = do_mount("devtmpfs", "dev", "devtmpfs", MS_SILENT, NULL); if (err) printk(KERN_INFO "devtmpfs: error mounting %i\n", err); else @@ -394,7 +394,7 @@ static int devtmpfsd(void *p) *err = ksys_unshare(CLONE_NEWNS); if (*err) goto out; - *err = ksys_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, NULL); + *err = do_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, NULL); if (*err) goto out; ksys_chdir("/.."); /* will traverse into overmounted root */ diff --git a/include/linux/device.h b/include/linux/device.h index e226030c1df3c..96ff76731e93d 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1666,11 +1666,11 @@ extern bool kill_device(struct device *dev); #ifdef CONFIG_DEVTMPFS extern int devtmpfs_create_node(struct device *dev); extern int devtmpfs_delete_node(struct device *dev); -extern int devtmpfs_mount(const char *mntdir); +extern int devtmpfs_mount(void); #else static inline int devtmpfs_create_node(struct device *dev) { return 0; } static inline int devtmpfs_delete_node(struct device *dev) { return 0; } -static inline int devtmpfs_mount(const char *mountpoint) { return 0; } +static inline int devtmpfs_mount(void) { return 0; } #endif /* drivers/base/power/shutdown.c */ diff --git a/init/do_mounts.c b/init/do_mounts.c index af9cda887a23f..43f6d098c8806 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -670,7 +670,7 @@ void __init prepare_namespace(void) mount_root(); out: - devtmpfs_mount("dev"); + devtmpfs_mount(); ksys_mount(".", "/", NULL, MS_MOVE, NULL); ksys_chroot("."); } From d4440aac83d12f87df9bcc51e992b9c28c7f4fa5 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Wed, 27 Nov 2019 20:24:14 +0100 Subject: [PATCH 2/5] initrd: use do_mount() instead of ksys_mount() All three calls to ksys_mount() in initrd-related kernel code can be switched over to do_mount(): - the first and third arguments are const strings in the kernel, and do not need to be copied over from userspace; - the fifth argument is NULL, and therefore no page needs to be, copied over from userspace; - the second and fourth argument are passed through anyway. Signed-off-by: Dominik Brodowski --- init/do_mounts_initrd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index a9c6cc56f505e..3bf7b74153ab7 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -54,7 +54,7 @@ static int init_linuxrc(struct subprocess_info *info, struct cred *new) ksys_dup(0); /* move initrd over / and chdir/chroot in initrd root */ ksys_chdir("/root"); - ksys_mount(".", "/", NULL, MS_MOVE, NULL); + do_mount(".", "/", NULL, MS_MOVE, NULL); ksys_chroot("."); ksys_setsid(); return 0; @@ -89,7 +89,7 @@ static void __init handle_initrd(void) current->flags &= ~PF_FREEZER_SKIP; /* move initrd to rootfs' /old */ - ksys_mount("..", ".", NULL, MS_MOVE, NULL); + do_mount("..", ".", NULL, MS_MOVE, NULL); /* switch root and cwd back to / of rootfs */ ksys_chroot(".."); @@ -103,7 +103,7 @@ static void __init handle_initrd(void) mount_root(); printk(KERN_NOTICE "Trying to move old root to /initrd ... "); - error = ksys_mount("/old", "/root/initrd", NULL, MS_MOVE, NULL); + error = do_mount("/old", "/root/initrd", NULL, MS_MOVE, NULL); if (!error) printk("okay\n"); else { From cccaa5e33525fc07f4a2ce0518e50b9ddf435e47 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 23 Oct 2018 22:41:09 +0200 Subject: [PATCH 3/5] init: use do_mount() instead of ksys_mount() In prepare_namespace(), do_mount() can be used instead of ksys_mount() as the first and third argument are const strings in the kernel, the second and fourth argument are passed through anyway, and the fifth argument is NULL. In do_mount_root(), ksys_mount() is called with the first and third argument being already kernelspace strings, which do not need to be copied over from userspace to kernelspace (again). The second and fourth arguments are passed through to do_mount() anyway. The fifth argument, while already residing in kernelspace, needs to be put into a page of its own. Then, do_mount() can be used instead of ksys_mount(). Once this is done, there are no in-kernel users to ksys_mount() left, which can therefore be removed. Signed-off-by: Dominik Brodowski --- fs/namespace.c | 10 ++-------- include/linux/syscalls.h | 2 -- init/do_mounts.c | 28 ++++++++++++++++++++++------ 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 2fd0c8bcb8c14..be601d3a80080 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3325,8 +3325,8 @@ struct dentry *mount_subtree(struct vfsmount *m, const char *name) } EXPORT_SYMBOL(mount_subtree); -int ksys_mount(const char __user *dev_name, const char __user *dir_name, - const char __user *type, unsigned long flags, void __user *data) +SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, + char __user *, type, unsigned long, flags, void __user *, data) { int ret; char *kernel_type; @@ -3359,12 +3359,6 @@ int ksys_mount(const char __user *dev_name, const char __user *dir_name, return ret; } -SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, - char __user *, type, unsigned long, flags, void __user *, data) -{ - return ksys_mount(dev_name, dir_name, type, flags, data); -} - /* * Create a kernel mount representation for a new, prepared superblock * (specified by fs_fd) and attach to an open_tree-like file descriptor. diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index d0391cc2dae90..5262b7a76d392 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1231,8 +1231,6 @@ asmlinkage long sys_ni_syscall(void); * the ksys_xyzyyz() functions prototyped below. */ -int ksys_mount(const char __user *dev_name, const char __user *dir_name, - const char __user *type, unsigned long flags, void __user *data); int ksys_umount(char __user *name, int flags); int ksys_dup(unsigned int fildes); int ksys_chroot(const char __user *filename); diff --git a/init/do_mounts.c b/init/do_mounts.c index 43f6d098c8806..f55cbd9cb8183 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -387,12 +387,25 @@ static void __init get_fs_names(char *page) *s = '\0'; } -static int __init do_mount_root(char *name, char *fs, int flags, void *data) +static int __init do_mount_root(const char *name, const char *fs, + const int flags, const void *data) { struct super_block *s; - int err = ksys_mount(name, "/root", fs, flags, data); - if (err) - return err; + char *data_page; + struct page *p; + int ret; + + /* do_mount() requires a full page as fifth argument */ + p = alloc_page(GFP_KERNEL); + if (!p) + return -ENOMEM; + + data_page = page_address(p); + strncpy(data_page, data, PAGE_SIZE - 1); + + ret = do_mount(name, "/root", fs, flags, data_page); + if (ret) + goto out; ksys_chdir("/root"); s = current->fs->pwd.dentry->d_sb; @@ -402,7 +415,10 @@ static int __init do_mount_root(char *name, char *fs, int flags, void *data) s->s_type->name, sb_rdonly(s) ? " readonly" : "", MAJOR(ROOT_DEV), MINOR(ROOT_DEV)); - return 0; + +out: + put_page(p); + return ret; } void __init mount_block_root(char *name, int flags) @@ -671,7 +687,7 @@ void __init prepare_namespace(void) mount_root(); out: devtmpfs_mount(); - ksys_mount(".", "/", NULL, MS_MOVE, NULL); + do_mount(".", "/", NULL, MS_MOVE, NULL); ksys_chroot("."); } From b49a733d684e0096340b93e9dfd471f0e3ddc06d Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 23 Oct 2018 16:00:10 +0200 Subject: [PATCH 4/5] init: unify opening /dev/console as stdin/stdout/stderr Merge the two instances where /dev/console is opened as stdin/stdout/stderr. Signed-off-by: Dominik Brodowski --- include/linux/initrd.h | 2 ++ init/do_mounts_initrd.c | 5 +---- init/main.c | 17 ++++++++++++----- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/include/linux/initrd.h b/include/linux/initrd.h index d77fe34fb00a0..aa59143557286 100644 --- a/include/linux/initrd.h +++ b/include/linux/initrd.h @@ -28,3 +28,5 @@ extern unsigned int real_root_dev; extern char __initramfs_start[]; extern unsigned long __initramfs_size; + +void console_on_rootfs(void); diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 3bf7b74153ab7..dab8b1151b569 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -48,10 +48,7 @@ early_param("initrd", early_initrd); static int init_linuxrc(struct subprocess_info *info, struct cred *new) { ksys_unshare(CLONE_FS | CLONE_FILES); - /* stdin/stdout/stderr for /linuxrc */ - ksys_open("/dev/console", O_RDWR, 0); - ksys_dup(0); - ksys_dup(0); + console_on_rootfs(); /* move initrd over / and chdir/chroot in initrd root */ ksys_chdir("/root"); do_mount(".", "/", NULL, MS_MOVE, NULL); diff --git a/init/main.c b/init/main.c index 91f6ebb30ef04..2cd736059416f 100644 --- a/init/main.c +++ b/init/main.c @@ -1155,6 +1155,17 @@ static int __ref kernel_init(void *unused) "See Linux Documentation/admin-guide/init.rst for guidance."); } +void console_on_rootfs(void) +{ + /* Open the /dev/console as stdin, this should never fail */ + if (ksys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) + pr_err("Warning: unable to open an initial console.\n"); + + /* create stdout/stderr */ + (void) ksys_dup(0); + (void) ksys_dup(0); +} + static noinline void __init kernel_init_freeable(void) { /* @@ -1190,12 +1201,8 @@ static noinline void __init kernel_init_freeable(void) do_basic_setup(); - /* Open the /dev/console on the rootfs, this should never fail */ - if (ksys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) - pr_err("Warning: unable to open an initial console.\n"); + console_on_rootfs(); - (void) ksys_dup(0); - (void) ksys_dup(0); /* * check if there is an early userspace init. If yes, let it do all * the work From 8243186f0cc7c57cf9d6a110cd7315c44e3e0be8 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 23 Oct 2018 16:24:09 +0200 Subject: [PATCH 5/5] fs: remove ksys_dup() ksys_dup() is used only at one place in the kernel, namely to duplicate fd 0 of /dev/console to stdout and stderr. The same functionality can be achieved by using functions already available within the kernel namespace. Signed-off-by: Dominik Brodowski --- fs/file.c | 7 +------ include/linux/syscalls.h | 1 - init/main.c | 26 ++++++++++++++++++++------ 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/fs/file.c b/fs/file.c index 3da91a112babe..2f4fcf985079d 100644 --- a/fs/file.c +++ b/fs/file.c @@ -960,7 +960,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) return ksys_dup3(oldfd, newfd, 0); } -int ksys_dup(unsigned int fildes) +SYSCALL_DEFINE1(dup, unsigned int, fildes) { int ret = -EBADF; struct file *file = fget_raw(fildes); @@ -975,11 +975,6 @@ int ksys_dup(unsigned int fildes) return ret; } -SYSCALL_DEFINE1(dup, unsigned int, fildes) -{ - return ksys_dup(fildes); -} - int f_dupfd(unsigned int from, struct file *file, unsigned flags) { int err; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 5262b7a76d392..2960dedcfde8a 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1232,7 +1232,6 @@ asmlinkage long sys_ni_syscall(void); */ int ksys_umount(char __user *name, int flags); -int ksys_dup(unsigned int fildes); int ksys_chroot(const char __user *filename); ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count); int ksys_chdir(const char __user *filename); diff --git a/init/main.c b/init/main.c index 2cd736059416f..ec3a1463ac692 100644 --- a/init/main.c +++ b/init/main.c @@ -93,6 +93,7 @@ #include #include #include +#include #include #include @@ -1157,13 +1158,26 @@ static int __ref kernel_init(void *unused) void console_on_rootfs(void) { - /* Open the /dev/console as stdin, this should never fail */ - if (ksys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) - pr_err("Warning: unable to open an initial console.\n"); + struct file *file; + unsigned int i; + + /* Open /dev/console in kernelspace, this should never fail */ + file = filp_open("/dev/console", O_RDWR, 0); + if (!file) + goto err_out; + + /* create stdin/stdout/stderr, this should never fail */ + for (i = 0; i < 3; i++) { + if (f_dupfd(i, file, 0) != i) + goto err_out; + } + + return; - /* create stdout/stderr */ - (void) ksys_dup(0); - (void) ksys_dup(0); +err_out: + /* no panic -- this might not be fatal */ + pr_err("Warning: unable to open an initial console.\n"); + return; } static noinline void __init kernel_init_freeable(void)