From cb16dfed0093217a68c0faa9394fa5823927e04c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 14 Mar 2025 12:03:33 +0100
Subject: [PATCH 1/3] efi/libstub: Avoid physical address 0x0 when doing random
 allocation

Ben reports spurious EFI zboot failures on a system where physical RAM
starts at 0x0. When doing random memory allocation from the EFI stub on
such a platform, a random seed of 0x0 (which means no entropy source is
available) will result in the allocation to be placed at address 0x0 if
sufficient space is available.

When this allocation is subsequently passed on to the decompression
code, the 0x0 address is mistaken for NULL and the code complains and
gives up.

So avoid address 0x0 when doing random allocation, and set the minimum
address to the minimum alignment.

Cc: <stable@vger.kernel.org>
Reported-by: Ben Schneider <ben@bens.haus>
Tested-by: Ben Schneider <ben@bens.haus>
Reviewed-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 drivers/firmware/efi/libstub/randomalloc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c
index 5a732018be36..fd80b2f3233a 100644
--- a/drivers/firmware/efi/libstub/randomalloc.c
+++ b/drivers/firmware/efi/libstub/randomalloc.c
@@ -75,6 +75,10 @@ efi_status_t efi_random_alloc(unsigned long size,
 	if (align < EFI_ALLOC_ALIGN)
 		align = EFI_ALLOC_ALIGN;
 
+	/* Avoid address 0x0, as it can be mistaken for NULL */
+	if (alloc_min == 0)
+		alloc_min = align;
+
 	size = round_up(size, EFI_ALLOC_ALIGN);
 
 	/* count the suitable slots in each memory map entry */

From dec1277875a5974413068bfb67df7e87e51a189b Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 17 Mar 2025 08:23:11 +0100
Subject: [PATCH 2/3] efivarfs: use I_MUTEX_CHILD nested lock to traverse
 variables on resume

syzbot warns about a potential deadlock, but this is a false positive
resulting from a missing lockdep annotation: iterate_dir() locks the
parent whereas the inode_lock() it warns about locks the child, which is
guaranteed to be a different lock.

So use inode_lock_nested() instead with the appropriate lock class.

Reported-by: syzbot+019072ad24ab1d948228@syzkaller.appspotmail.com
Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 fs/efivarfs/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 6eae8cf655c1..42295d04f08d 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -421,7 +421,7 @@ static bool efivarfs_actor(struct dir_context *ctx, const char *name, int len,
 	if (err)
 		size = 0;
 
-	inode_lock(inode);
+	inode_lock_nested(inode, I_MUTEX_CHILD);
 	i_size_write(inode, size);
 	inode_unlock(inode);
 

From 11092db5b57377ac99e6339cfd16ca35ef011f3c Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Mon, 17 Mar 2025 23:06:01 -0400
Subject: [PATCH 3/3] efivarfs: fix NULL dereference on resume

LSMs often inspect the path.mnt of files in the security hooks, and this
causes a NULL deref in efivarfs_pm_notify() because the path is
constructed with a NULL path.mnt.

Fix by obtaining from vfs_kern_mount() instead, and being very careful
to ensure that deactivate_super() (potentially triggered by a racing
userspace umount) is not called directly from the notifier, because it
would deadlock when efivarfs_kill_sb() tried to unregister the notifier
chain.

[ Al notes:
	Umm...  That's probably safe, but not as a long-term solution -
	it's too intimately dependent upon fs/super.c internals. The
	reasons why you can't run into ->s_umount deadlock here are
	non-trivial... ]

Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Link: https://lore.kernel.org/r/e54e6a2f-1178-4980-b771-4d9bafc2aa47@tnxip.de
Link: https://lore.kernel.org/r/3e998bf87638a442cbc6864cdcd3d8d9e08ce3e3.camel@HansenPartnership.com
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 fs/efivarfs/super.c | 50 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 48 insertions(+), 2 deletions(-)

diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 42295d04f08d..0486e9b68bc6 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -474,12 +474,25 @@ static int efivarfs_check_missing(efi_char16_t *name16, efi_guid_t vendor,
 	return err;
 }
 
+static void efivarfs_deactivate_super_work(struct work_struct *work)
+{
+	struct super_block *s = container_of(work, struct super_block,
+					     destroy_work);
+	/*
+	 * note: here s->destroy_work is free for reuse (which
+	 * will happen in deactivate_super)
+	 */
+	deactivate_super(s);
+}
+
+static struct file_system_type efivarfs_type;
+
 static int efivarfs_pm_notify(struct notifier_block *nb, unsigned long action,
 			      void *ptr)
 {
 	struct efivarfs_fs_info *sfi = container_of(nb, struct efivarfs_fs_info,
 						    pm_nb);
-	struct path path = { .mnt = NULL, .dentry = sfi->sb->s_root, };
+	struct path path;
 	struct efivarfs_ctx ectx = {
 		.ctx = {
 			.actor	= efivarfs_actor,
@@ -487,6 +500,7 @@ static int efivarfs_pm_notify(struct notifier_block *nb, unsigned long action,
 		.sb = sfi->sb,
 	};
 	struct file *file;
+	struct super_block *s = sfi->sb;
 	static bool rescan_done = true;
 
 	if (action == PM_HIBERNATION_PREPARE) {
@@ -499,11 +513,43 @@ static int efivarfs_pm_notify(struct notifier_block *nb, unsigned long action,
 	if (rescan_done)
 		return NOTIFY_DONE;
 
+	/* ensure single superblock is alive and pin it */
+	if (!atomic_inc_not_zero(&s->s_active))
+		return NOTIFY_DONE;
+
 	pr_info("efivarfs: resyncing variable state\n");
 
-	/* O_NOATIME is required to prevent oops on NULL mnt */
+	path.dentry = sfi->sb->s_root;
+
+	/*
+	 * do not add SB_KERNMOUNT which a single superblock could
+	 * expose to userspace and which also causes MNT_INTERNAL, see
+	 * below
+	 */
+	path.mnt = vfs_kern_mount(&efivarfs_type, 0,
+				  efivarfs_type.name, NULL);
+	if (IS_ERR(path.mnt)) {
+		pr_err("efivarfs: internal mount failed\n");
+		/*
+		 * We may be the last pinner of the superblock but
+		 * calling efivarfs_kill_sb from within the notifier
+		 * here would deadlock trying to unregister it
+		 */
+		INIT_WORK(&s->destroy_work, efivarfs_deactivate_super_work);
+		schedule_work(&s->destroy_work);
+		return PTR_ERR(path.mnt);
+	}
+
+	/* path.mnt now has pin on superblock, so this must be above one */
+	atomic_dec(&s->s_active);
+
 	file = kernel_file_open(&path, O_RDONLY | O_DIRECTORY | O_NOATIME,
 				current_cred());
+	/*
+	 * safe even if last put because no MNT_INTERNAL means this
+	 * will do delayed deactivate_super and not deadlock
+	 */
+	mntput(path.mnt);
 	if (IS_ERR(file))
 		return NOTIFY_DONE;