diff --git a/Documentation/admin-guide/sysctl/fs.rst b/Documentation/admin-guide/sysctl/fs.rst index 2a501c9ddc556..e364d6c45790b 100644 --- a/Documentation/admin-guide/sysctl/fs.rst +++ b/Documentation/admin-guide/sysctl/fs.rst @@ -48,6 +48,7 @@ Currently, these files are in /proc/sys/fs: - suid_dumpable - super-max - super-nr +- trusted_for_policy aio-nr & aio-max-nr @@ -382,3 +383,52 @@ Each "watch" costs roughly 90 bytes on a 32bit kernel, and roughly 160 bytes on a 64bit one. The current default value for max_user_watches is the 1/25 (4%) of the available low memory, divided for the "watch" cost in bytes. + + +trusted_for_policy +------------------ + +An interpreter can call :manpage:`trusted_for(2)` with a +``TRUSTED_FOR_EXECUTION`` usage to check that opened regular files are expected +to be executable. If the file is not identified as executable, then the +syscall returns -EACCES. This may allow a script interpreter to check +executable permission before reading commands from a file, or a dynamic linker +to only load executable shared objects. One interesting use case is to enforce +a "write xor execute" policy through interpreters. + +The ability to restrict code execution must be thought as a system-wide policy, +which first starts by restricting mount points with the ``noexec`` option. +This option is also automatically applied to special filesystems such as /proc . +This prevents files on such mount points to be directly executed by the kernel +or mapped as executable memory (e.g. libraries). With script interpreters +using :manpage:`trusted_for(2)`, the executable permission can then be checked +before reading commands from files. This makes it possible to enforce the +``noexec`` at the interpreter level, and thus propagates this security policy +to scripts. To be fully effective, these interpreters also need to handle the +other ways to execute code: command line parameters (e.g., option ``-e`` for +Perl), module loading (e.g., option ``-m`` for Python), stdin, file sourcing, +environment variables, configuration files, etc. According to the threat +model, it may be acceptable to allow some script interpreters (e.g. Bash) to +interpret commands from stdin, may it be a TTY or a pipe, because it may not be +enough to (directly) perform syscalls. + +There are two complementary security policies: enforce the ``noexec`` mount +option, and enforce executable file permission. These policies are handled by +the ``fs.trusted_for_policy`` sysctl (writable only with ``CAP_SYS_ADMIN``) as +a bitmask: + +1 - Mount restriction: checks that the mount options for the underlying VFS + mount do not prevent execution. + +2 - File permission restriction: checks that the file is marked as + executable for the current process (e.g., POSIX permissions, ACLs). + +Note that as long as a policy is enforced, checking any non-regular file with +:manpage:`trusted_for(2)` returns -EACCES (e.g. TTYs, pipe), even when such a +file is marked as executable or is on an executable mount point. + +Code samples can be found in +tools/testing/selftests/interpreter/trust_policy_test.c and interpreter patches +(for the original O_MAYEXEC) are available at +https://github.com/clipos-archive/clipos4_portage-overlay/search?q=O_MAYEXEC . +See also an overview article: https://lwn.net/Articles/820000/ . diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 3515bc4f16a4f..2bf65d76e1885 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -490,3 +490,4 @@ 558 common process_mrelease sys_process_mrelease 559 common futex_waitv sys_futex_waitv 560 common set_mempolicy_home_node sys_ni_syscall +561 common trusted_for sys_trusted_for diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index ac964612d8b07..31f747a4f35cc 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -464,3 +464,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common trusted_for sys_trusted_for diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 4e65da3445c7a..14c95844f6c84 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -38,7 +38,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 451 +#define __NR_compat_syscalls 452 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 604a2053d0067..9bd1841e1d479 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -907,6 +907,8 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease) __SYSCALL(__NR_futex_waitv, sys_futex_waitv) #define __NR_set_mempolicy_home_node 450 __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) +#define __NR_trusted_for 451 +__SYSCALL(__NR_trusted_for, sys_trusted_for) /* * Please add new compat syscalls above this comment and update diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 78b1d03e86e1d..dc97eab8bbea0 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -371,3 +371,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common trusted_for sys_trusted_for diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index b1f3940bc2981..fabb49d38c74c 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -450,3 +450,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common trusted_for sys_trusted_for diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 820145e473501..a7aa26d5b143a 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -456,3 +456,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common trusted_for sys_trusted_for diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 253ff994ed2ec..ad9a968fee638 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -389,3 +389,4 @@ 448 n32 process_mrelease sys_process_mrelease 449 n32 futex_waitv sys_futex_waitv 450 n32 set_mempolicy_home_node sys_set_mempolicy_home_node +451 n32 trusted_for sys_trusted_for diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 3f1886ad9d806..294e0ab651e7a 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -365,3 +365,4 @@ 448 n64 process_mrelease sys_process_mrelease 449 n64 futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 n64 trusted_for sys_trusted_for diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 8f243e35a7b20..4644c7c7de6ae 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -438,3 +438,4 @@ 448 o32 process_mrelease sys_process_mrelease 449 o32 futex_waitv sys_futex_waitv 450 o32 set_mempolicy_home_node sys_set_mempolicy_home_node +451 o32 trusted_for sys_trusted_for diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 68b46fe2f17c5..208dc6eed4ca8 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -448,3 +448,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common trusted_for sys_trusted_for diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 2600b4237292c..7281bb9c1cba7 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -530,3 +530,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node +451 common trusted_for sys_trusted_for diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 799147658dee2..badd1fe09c97a 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -453,3 +453,4 @@ 448 common process_mrelease sys_process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node +451 common trusted_for sys_trusted_for sys_trusted_for diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 2de85c977f54f..8f92f449617b7 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -453,3 +453,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common trusted_for sys_trusted_for diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 4398cc6fb68dd..910fdd8d94434 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -496,3 +496,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common trusted_for sys_trusted_for diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 320480a8db4f8..47150d9e039a8 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -455,3 +455,4 @@ 448 i386 process_mrelease sys_process_mrelease 449 i386 futex_waitv sys_futex_waitv 450 i386 set_mempolicy_home_node sys_set_mempolicy_home_node +451 i386 trusted_for sys_trusted_for diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index c84d12608cd2d..29024e64fb30b 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -372,6 +372,7 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common trusted_for sys_trusted_for # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 52c94ab5c2058..efc45e1a53ce8 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -421,3 +421,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common trusted_for sys_trusted_for diff --git a/fs/open.c b/fs/open.c index 9ff2f621b760b..cf4cddc4e3f59 100644 --- a/fs/open.c +++ b/fs/open.c @@ -33,6 +33,8 @@ #include #include #include +#include +#include #include "internal.h" @@ -481,6 +483,137 @@ SYSCALL_DEFINE2(access, const char __user *, filename, int, mode) return do_faccessat(AT_FDCWD, filename, mode, 0); } +#define TRUST_POLICY_EXEC_MOUNT BIT(0) +#define TRUST_POLICY_EXEC_FILE BIT(1) + +static int sysctl_trusted_for_policy __read_mostly; + +#ifdef CONFIG_SYSCTL +static struct ctl_table open_sysctls[] = { + { + .procname = "trusted_for_policy", + .data = &sysctl_trusted_for_policy, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax_sysadmin, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_THREE, + }, + { } +}; + +static int __init init_fs_open_sysctls(void) +{ + register_sysctl_init("fs", open_sysctls); + return 0; +} +fs_initcall(init_fs_open_sysctls); +#endif /* CONFIG_SYSCTL */ + +/** + * sys_trusted_for - Check that a FD is trusted for a specific usage + * + * @fd: File descriptor to check. + * @usage: Identify the user space usage (defined by enum trusted_for_usage) + * intended for the file descriptor (only TRUSTED_FOR_EXECUTION for + * now). + * + * @flags: Must be 0. + * + * This system call enables user space to ask the kernel: is this file + * descriptor's content trusted to be used for this purpose? The set of @usage + * currently only contains TRUSTED_FOR_EXECUTION, but other may follow (e.g. + * configuration, sensitive data). If the kernel identifies the file + * descriptor as trustworthy for this usage, this call returns 0 and the caller + * should then take this information into account. + * + * The execution usage means that the content of the file descriptor is trusted + * according to the system policy to be executed by user space, which means + * that it interprets the content or (try to) maps it as executable memory. + * + * A simple system-wide security policy can be set by the system administrator + * through a sysctl configuration consistent with the mount points or the file + * access rights: Documentation/admin-guide/sysctl/fs.rst + * + * @flags could be used in the future to do complementary checks (e.g. + * signature or integrity requirements, origin of the file). + * + * Possible returned errors are: + * + * - EINVAL: unknown @usage or unknown @flags; + * - EBADF: @fd is not a file descriptor for the calling thread; + * - EACCES: the requested usage is denied (and user space should enforce it). + */ +SYSCALL_DEFINE3(trusted_for, const int, fd, const int, usage, const u32, flags) +{ + int mask, err = -EACCES; + struct fd f; + struct inode *inode; + + if (flags) + return -EINVAL; + + /* Only handles execution for now. */ + if (usage != TRUSTED_FOR_EXECUTION) + return -EINVAL; + mask = MAY_EXEC; + + f = fdget(fd); + if (!f.file) + return -EBADF; + inode = file_inode(f.file); + + /* + * For compatibility reasons, without a defined security policy, we + * must map the execute permission to the read permission. Indeed, + * from user space point of view, being able to execute data (e.g. + * scripts) implies to be able to read this data. + */ + if ((mask & MAY_EXEC)) { + /* + * If there is a system-wide execute policy enforced, then + * forbids access to non-regular files and special superblocks. + */ + if ((sysctl_trusted_for_policy & (TRUST_POLICY_EXEC_MOUNT | + TRUST_POLICY_EXEC_FILE))) { + if (!S_ISREG(inode->i_mode)) + goto out_fd; + /* + * Denies access to pseudo filesystems that will never + * be mountable (e.g. sockfs, pipefs) but can still be + * reachable through /proc/self/fd, or memfd-like file + * descriptors, or nsfs-like files. + * + * According to the selftests, SB_NOEXEC seems to be + * only used by proc and nsfs filesystems. + */ + if ((f.file->f_path.dentry->d_sb->s_flags & + (SB_NOUSER | SB_KERNMOUNT | SB_NOEXEC))) + goto out_fd; + } + + if ((sysctl_trusted_for_policy & TRUST_POLICY_EXEC_MOUNT) && + path_noexec(&f.file->f_path)) + goto out_fd; + /* + * For compatibility reasons, if the system-wide policy doesn't + * enforce file permission checks, then replaces the execute + * permission request with a read permission request. + */ + if (!(sysctl_trusted_for_policy & TRUST_POLICY_EXEC_FILE)) + mask &= ~MAY_EXEC; + /* To be executed *by* user space, files must be readable. */ + mask |= MAY_READ; + } + + err = inode_permission(file_mnt_user_ns(f.file), inode, + mask | MAY_ACCESS); + +out_fd: + fdput(f); + return err; +} + SYSCALL_DEFINE1(chdir, const char __user *, filename) { struct path path; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 7d9cfc730bd4c..6c87c99f08562 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -26,7 +26,7 @@ static const struct file_operations proc_sys_dir_file_operations; static const struct inode_operations proc_sys_dir_operations; /* shared constants to be used in various sysctls */ -const int sysctl_vals[] = { -1, 0, 1, 2, 4, 100, 200, 1000, 3000, INT_MAX, 65535 }; +const int sysctl_vals[] = { -1, 0, 1, 2, 4, 100, 200, 1000, 3000, INT_MAX, 65535, 3 }; EXPORT_SYMBOL(sysctl_vals); const unsigned long sysctl_long_vals[] = { 0, 1, LONG_MAX }; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a34b0f9a9972e..faa1b7dd6da4f 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -458,6 +458,7 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode); asmlinkage long sys_faccessat2(int dfd, const char __user *filename, int mode, int flags); +asmlinkage long sys_trusted_for(int fd, int usage, u32 flags); asmlinkage long sys_chdir(const char __user *filename); asmlinkage long sys_fchdir(unsigned int fd); asmlinkage long sys_chroot(const char __user *filename); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 6353d6db69b2e..644fd53ad5f1f 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -51,6 +51,7 @@ struct ctl_dir; /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ #define SYSCTL_MAXOLDUID ((void *)&sysctl_vals[10]) +#define SYSCTL_THREE ((void *)&sysctl_vals[11]) extern const int sysctl_vals[]; @@ -69,6 +70,8 @@ int proc_dobool(struct ctl_table *table, int write, void *buffer, int proc_dointvec(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_douintvec(struct ctl_table *, int, void *, size_t *, loff_t *); int proc_dointvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_dointvec_minmax_sysadmin(struct ctl_table *, int, void *, size_t *, + loff_t *); int proc_douintvec_minmax(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); int proc_dou8vec_minmax(struct ctl_table *table, int write, void *buffer, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 1c48b0ae3ba30..26d1ffaca07bb 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -886,8 +886,11 @@ __SYSCALL(__NR_futex_waitv, sys_futex_waitv) #define __NR_set_mempolicy_home_node 450 __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) +#define __NR_trusted_for 451 +__SYSCALL(__NR_trusted_for, sys_trusted_for) + #undef __NR_syscalls -#define __NR_syscalls 451 +#define __NR_syscalls 452 /* * 32 bit systems traditionally used different diff --git a/include/uapi/linux/trusted-for.h b/include/uapi/linux/trusted-for.h new file mode 100644 index 0000000000000..cc4f030c51033 --- /dev/null +++ b/include/uapi/linux/trusted-for.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_TRUSTED_FOR_H +#define _UAPI_LINUX_TRUSTED_FOR_H + +/** + * enum trusted_for_usage - Usage for which a file descriptor is trusted + * + * Argument of trusted_for(2). + */ +enum trusted_for_usage { + /** + * @TRUSTED_FOR_EXECUTION: Check that the data read from a file + * descriptor is trusted to be executed or interpreted (e.g. scripts). + */ + TRUSTED_FOR_EXECUTION = 1, +}; + +#endif /* _UAPI_LINUX_TRUSTED_FOR_H */ diff --git a/kernel/printk/sysctl.c b/kernel/printk/sysctl.c index c228343eeb975..c7129428ee9b9 100644 --- a/kernel/printk/sysctl.c +++ b/kernel/printk/sysctl.c @@ -11,15 +11,6 @@ static const int ten_thousand = 10000; -static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - if (write && !capable(CAP_SYS_ADMIN)) - return -EPERM; - - return proc_dointvec_minmax(table, write, buffer, lenp, ppos); -} - static struct ctl_table printk_sysctls[] = { { .procname = "printk", diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1cb7ca68cd4e1..6a494ab55beaf 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -888,6 +888,15 @@ static int proc_taint(struct ctl_table *table, int write, return err; } +int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + return proc_dointvec_minmax(table, write, buffer, lenp, ppos); +} + /** * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure * @min: pointer to minimum allowable value diff --git a/samples/landlock/sandboxer.c b/samples/landlock/sandboxer.c index 7a15910d21718..8859fc1935428 100644 --- a/samples/landlock/sandboxer.c +++ b/samples/landlock/sandboxer.c @@ -134,6 +134,7 @@ static int populate_ruleset( ret = 0; out_free_name: + free(path_list); free(env_path_name); return ret; } diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c index 32396962f04d6..7e27ce394020d 100644 --- a/security/landlock/syscalls.c +++ b/security/landlock/syscalls.c @@ -192,7 +192,7 @@ SYSCALL_DEFINE3(landlock_create_ruleset, return PTR_ERR(ruleset); /* Creates anonymous FD referring to the ruleset. */ - ruleset_fd = anon_inode_getfd("landlock-ruleset", &ruleset_fops, + ruleset_fd = anon_inode_getfd("[landlock-ruleset]", &ruleset_fops, ruleset, O_RDWR | O_CLOEXEC); if (ruleset_fd < 0) landlock_put_ruleset(ruleset); diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index d08fe4cfe8115..d6a54a1d9d3a1 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -21,6 +21,7 @@ TARGETS += ftrace TARGETS += futex TARGETS += gpio TARGETS += intel_pstate +TARGETS += interpreter TARGETS += ipc TARGETS += ir TARGETS += kcmp diff --git a/tools/testing/selftests/interpreter/.gitignore b/tools/testing/selftests/interpreter/.gitignore new file mode 100644 index 0000000000000..82a4846cbc4b2 --- /dev/null +++ b/tools/testing/selftests/interpreter/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +/*_test diff --git a/tools/testing/selftests/interpreter/Makefile b/tools/testing/selftests/interpreter/Makefile new file mode 100644 index 0000000000000..7402fdb6533ff --- /dev/null +++ b/tools/testing/selftests/interpreter/Makefile @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -Wall -O2 -I$(khdr_dir) +LDLIBS += -lcap + +src_test := $(wildcard *_test.c) +TEST_GEN_PROGS := $(src_test:.c=) + +KSFT_KHDR_INSTALL := 1 +include ../lib.mk + +khdr_dir = $(top_srcdir)/usr/include + +$(khdr_dir)/asm-generic/unistd.h: khdr + @: + +$(khdr_dir)/linux/trusted-for.h: khdr + @: + +$(OUTPUT)/%_test: %_test.c $(khdr_dir)/asm-generic/unistd.h $(khdr_dir)/linux/trusted-for.h ../kselftest_harness.h + $(LINK.c) $< $(LDLIBS) -o $@ diff --git a/tools/testing/selftests/interpreter/config b/tools/testing/selftests/interpreter/config new file mode 100644 index 0000000000000..dd53c266bf527 --- /dev/null +++ b/tools/testing/selftests/interpreter/config @@ -0,0 +1 @@ +CONFIG_SYSCTL=y diff --git a/tools/testing/selftests/interpreter/trust_policy_test.c b/tools/testing/selftests/interpreter/trust_policy_test.c new file mode 100644 index 0000000000000..b59f07f537ade --- /dev/null +++ b/tools/testing/selftests/interpreter/trust_policy_test.c @@ -0,0 +1,362 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test trusted_for(2) with fs.trusted_for_policy sysctl + * + * Copyright © 2018-2020 ANSSI + * + * Author: Mickaël Salaün + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../kselftest_harness.h" + +#ifndef trusted_for +static int trusted_for(const int fd, const enum trusted_for_usage usage, + const __u32 flags) +{ + errno = 0; + return syscall(__NR_trusted_for, fd, usage, flags); +} +#endif + +static const char sysctl_path[] = "/proc/sys/fs/trusted_for_policy"; + +static const char workdir_path[] = "./test-mount"; +static const char reg_file_path[] = "./test-mount/regular_file"; +static const char dir_path[] = "./test-mount/directory"; +static const char block_dev_path[] = "./test-mount/block_device"; +static const char char_dev_path[] = "./test-mount/character_device"; +static const char fifo_path[] = "./test-mount/fifo"; + +static void ignore_dac(struct __test_metadata *_metadata, int override) +{ + cap_t caps; + const cap_value_t cap_val[2] = { + CAP_DAC_OVERRIDE, + CAP_DAC_READ_SEARCH, + }; + + caps = cap_get_proc(); + ASSERT_NE(NULL, caps); + ASSERT_EQ(0, cap_set_flag(caps, CAP_EFFECTIVE, 2, cap_val, + override ? CAP_SET : CAP_CLEAR)); + ASSERT_EQ(0, cap_set_proc(caps)); + EXPECT_EQ(0, cap_free(caps)); +} + +static void ignore_sys_admin(struct __test_metadata *_metadata, int override) +{ + cap_t caps; + const cap_value_t cap_val[1] = { + CAP_SYS_ADMIN, + }; + + caps = cap_get_proc(); + ASSERT_NE(NULL, caps); + ASSERT_EQ(0, cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_val, + override ? CAP_SET : CAP_CLEAR)); + ASSERT_EQ(0, cap_set_proc(caps)); + EXPECT_EQ(0, cap_free(caps)); +} + +static void test_omx(struct __test_metadata *_metadata, + const char *const path, const int err_access) +{ + int flags = O_RDONLY | O_CLOEXEC; + int fd, access_ret, access_errno; + + /* Do not block on pipes. */ + if (path == fifo_path) + flags |= O_NONBLOCK; + + fd = open(path, flags); + ASSERT_LE(0, fd) { + TH_LOG("Failed to open %s: %s", path, strerror(errno)); + } + access_ret = trusted_for(fd, TRUSTED_FOR_EXECUTION, 0); + access_errno = errno; + if (err_access) { + ASSERT_EQ(err_access, access_errno) { + TH_LOG("Wrong error for trusted_for(2) with %s: %s", + path, strerror(access_errno)); + } + ASSERT_EQ(-1, access_ret); + } else { + ASSERT_EQ(0, access_ret) { + TH_LOG("Access denied for %s: %s", path, strerror(access_errno)); + } + } + + /* Tests unsupported trusted usage. */ + access_ret = trusted_for(fd, 0, 0); + ASSERT_EQ(-1, access_ret); + ASSERT_EQ(EINVAL, errno); + + access_ret = trusted_for(fd, 2, 0); + ASSERT_EQ(-1, access_ret); + ASSERT_EQ(EINVAL, errno); + + EXPECT_EQ(0, close(fd)); +} + +static void test_policy_fd(struct __test_metadata *_metadata, const int fd, + const bool has_policy) +{ + const int ret = trusted_for(fd, TRUSTED_FOR_EXECUTION, 0); + + if (has_policy) { + ASSERT_EQ(-1, ret); + ASSERT_EQ(EACCES, errno) { + TH_LOG("Wrong error for trusted_for(2) with FD: %s", strerror(errno)); + } + } else { + ASSERT_EQ(0, ret) { + TH_LOG("Access denied for FD: %s", strerror(errno)); + } + } +} + +FIXTURE(access) { + char initial_sysctl_value; + int memfd, pipefd; + int pipe_fds[2], socket_fds[2]; +}; + +static void test_file_types(struct __test_metadata *_metadata, FIXTURE_DATA(access) *self, + const int err_code, const bool has_policy) +{ + /* Tests are performed on a tmpfs mount point. */ + test_omx(_metadata, reg_file_path, err_code); + test_omx(_metadata, dir_path, has_policy ? EACCES : 0); + test_omx(_metadata, block_dev_path, has_policy ? EACCES : 0); + test_omx(_metadata, char_dev_path, has_policy ? EACCES : 0); + test_omx(_metadata, fifo_path, has_policy ? EACCES : 0); + + /* Checks that exec is denied for any socket FD. */ + test_policy_fd(_metadata, self->socket_fds[0], has_policy); + + /* Checks that exec is denied for any memfd. */ + test_policy_fd(_metadata, self->memfd, has_policy); + + /* Checks that exec is denied for any pipefs FD. */ + test_policy_fd(_metadata, self->pipefd, has_policy); +} + +static void test_files(struct __test_metadata *_metadata, FIXTURE_DATA(access) *self, + const int err_code, const bool has_policy) +{ + /* Tests as root. */ + ignore_dac(_metadata, 1); + test_file_types(_metadata, self, err_code, has_policy); + + /* Tests without bypass. */ + ignore_dac(_metadata, 0); + test_file_types(_metadata, self, err_code, has_policy); +} + +static void sysctl_write_char(struct __test_metadata *_metadata, const char value) +{ + int fd; + + fd = open(sysctl_path, O_WRONLY | O_CLOEXEC); + ASSERT_LE(0, fd); + ASSERT_EQ(1, write(fd, &value, 1)); + EXPECT_EQ(0, close(fd)); +} + +static char sysctl_read_char(struct __test_metadata *_metadata) +{ + int fd; + char sysctl_value; + + fd = open(sysctl_path, O_RDONLY | O_CLOEXEC); + ASSERT_LE(0, fd); + ASSERT_EQ(1, read(fd, &sysctl_value, 1)); + EXPECT_EQ(0, close(fd)); + return sysctl_value; +} + +FIXTURE_VARIANT(access) { + const bool mount_exec; + const bool file_exec; + const int sysctl_err_code[3]; +}; + +FIXTURE_VARIANT_ADD(access, mount_exec_file_exec) { + .mount_exec = true, + .file_exec = true, + .sysctl_err_code = {0, 0, 0}, +}; + +FIXTURE_VARIANT_ADD(access, mount_exec_file_noexec) +{ + .mount_exec = true, + .file_exec = false, + .sysctl_err_code = {0, EACCES, EACCES}, +}; + +FIXTURE_VARIANT_ADD(access, mount_noexec_file_exec) +{ + .mount_exec = false, + .file_exec = true, + .sysctl_err_code = {EACCES, 0, EACCES}, +}; + +FIXTURE_VARIANT_ADD(access, mount_noexec_file_noexec) +{ + .mount_exec = false, + .file_exec = false, + .sysctl_err_code = {EACCES, EACCES, EACCES}, +}; + +FIXTURE_SETUP(access) +{ + int procfd_path_size; + static const char path_template[] = "/proc/self/fd/%d"; + char procfd_path[sizeof(path_template) + 10]; + + /* + * Cleans previous workspace if any error previously happened (don't + * check errors). + */ + umount(workdir_path); + rmdir(workdir_path); + + /* Creates a clean mount point. */ + ASSERT_EQ(0, mkdir(workdir_path, 00700)); + ASSERT_EQ(0, mount("test", workdir_path, "tmpfs", MS_MGC_VAL | + (variant->mount_exec ? 0 : MS_NOEXEC), + "mode=0700,size=4k")); + + /* Creates a regular file. */ + ASSERT_EQ(0, mknod(reg_file_path, S_IFREG | (variant->file_exec ? 0500 : 0400), 0)); + /* Creates a directory. */ + ASSERT_EQ(0, mkdir(dir_path, variant->file_exec ? 0500 : 0400)); + /* Creates a character device: /dev/null. */ + ASSERT_EQ(0, mknod(char_dev_path, S_IFCHR | 0400, makedev(1, 3))); + /* Creates a block device: /dev/loop0 */ + ASSERT_EQ(0, mknod(block_dev_path, S_IFBLK | 0400, makedev(7, 0))); + /* Creates a fifo. */ + ASSERT_EQ(0, mknod(fifo_path, S_IFIFO | 0400, 0)); + + /* Creates a regular file without user mount point. */ + self->memfd = memfd_create("test-interpreted", MFD_CLOEXEC); + ASSERT_LE(0, self->memfd); + /* Sets mode, which must be ignored by the exec check. */ + ASSERT_EQ(0, fchmod(self->memfd, variant->file_exec ? 0500 : 0400)); + + /* Creates a pipefs file descriptor. */ + ASSERT_EQ(0, pipe(self->pipe_fds)); + procfd_path_size = snprintf(procfd_path, sizeof(procfd_path), + path_template, self->pipe_fds[0]); + ASSERT_LT(procfd_path_size, sizeof(procfd_path)); + self->pipefd = open(procfd_path, O_RDONLY | O_CLOEXEC); + ASSERT_LE(0, self->pipefd); + ASSERT_EQ(0, fchmod(self->pipefd, variant->file_exec ? 0500 : 0400)); + + /* Creates a socket file descriptor. */ + ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0, self->socket_fds)); + + /* Saves initial sysctl value. */ + self->initial_sysctl_value = sysctl_read_char(_metadata); + + /* Prepares for sysctl writes. */ + ignore_sys_admin(_metadata, 1); +} + +FIXTURE_TEARDOWN(access) +{ + EXPECT_EQ(0, close(self->memfd)); + EXPECT_EQ(0, close(self->pipefd)); + EXPECT_EQ(0, close(self->pipe_fds[0])); + EXPECT_EQ(0, close(self->pipe_fds[1])); + EXPECT_EQ(0, close(self->socket_fds[0])); + EXPECT_EQ(0, close(self->socket_fds[1])); + + /* Restores initial sysctl value. */ + sysctl_write_char(_metadata, self->initial_sysctl_value); + + /* There is no need to unlink the test files. */ + ASSERT_EQ(0, umount(workdir_path)); + ASSERT_EQ(0, rmdir(workdir_path)); +} + +TEST_F(access, sysctl_0) +{ + /* Do not enforce anything. */ + sysctl_write_char(_metadata, '0'); + test_files(_metadata, self, 0, false); +} + +TEST_F(access, sysctl_1) +{ + /* Enforces mount exec check. */ + sysctl_write_char(_metadata, '1'); + test_files(_metadata, self, variant->sysctl_err_code[0], true); +} + +TEST_F(access, sysctl_2) +{ + /* Enforces file exec check. */ + sysctl_write_char(_metadata, '2'); + test_files(_metadata, self, variant->sysctl_err_code[1], true); +} + +TEST_F(access, sysctl_3) +{ + /* Enforces mount and file exec check. */ + sysctl_write_char(_metadata, '3'); + test_files(_metadata, self, variant->sysctl_err_code[2], true); +} + +FIXTURE(cleanup) { + char initial_sysctl_value; +}; + +FIXTURE_SETUP(cleanup) +{ + /* Saves initial sysctl value. */ + self->initial_sysctl_value = sysctl_read_char(_metadata); +} + +FIXTURE_TEARDOWN(cleanup) +{ + /* Restores initial sysctl value. */ + ignore_sys_admin(_metadata, 1); + sysctl_write_char(_metadata, self->initial_sysctl_value); +} + +TEST_F(cleanup, sysctl_access_write) +{ + int fd; + ssize_t ret; + + ignore_sys_admin(_metadata, 1); + sysctl_write_char(_metadata, '0'); + + ignore_sys_admin(_metadata, 0); + fd = open(sysctl_path, O_WRONLY | O_CLOEXEC); + ASSERT_LE(0, fd); + ret = write(fd, "0", 1); + ASSERT_EQ(-1, ret); + ASSERT_EQ(EPERM, errno); + EXPECT_EQ(0, close(fd)); +} + +TEST_HARNESS_MAIN